Java实现PDF转Word【收集整理】

首先感谢 Mgg9702 博主提供的转换依赖包处理,关于如何获得一个破解的pdf转word我这里就不追述了,有需要去看:
https://blog.csdn.net/Mgg9702/article/details/124987483?spm=1001.2014.3001.5506

我这里主要涉及到整理一个pdf转word的jar工具

1.首先用以上方法得到一个纯净的jar包

引入jar包或依赖
这里用到的是aspose-pdf,这个依赖需要单独配置仓库地址,也可以直接去官网下载jar包
pom文件详情

<dependencies>
        <dependency>
            <groupId>org.javassistgroupId>
            <artifactId>javassistartifactId>
            <version>3.20.0-GAversion>
        dependency>
        <dependency>
            <groupId>com.asposegroupId>
            <artifactId>aspose-pdfartifactId>
            <version>22.4version>
        dependency>
dependencies>
<repositories>
	<repository>
		<id>AsposeJavaAPIid>
		<name>Aspose Java APIname>
		<url>https://repository.aspose.com/repo/url>
	repository>
repositories>

破解
找到你的jar包地址,引入依赖的去maven仓库地址找到jar,将地址填入代码中的jarPath,运行Main方法会在jar包统计目录下生成破解的包
aspose-pdf-22.4.cracked.jar,替换掉原来jar包就可以了

public class PDFJarCrack {
    public static void main(String[] args) throws Exception {
        String jarPath = "jar包地址";
        crack(jarPath);
    }
    
    private static void crack(String jarName) {
        try {
            ClassPool.getDefault().insertClassPath(jarName);
            CtClass ctClass = ClassPool.getDefault().getCtClass("com.aspose.pdf.ADocument");
            CtMethod[] declaredMethods = ctClass.getDeclaredMethods();
            int num = 0;
            for (int i = 0; i < declaredMethods.length; i++) {
                if (num == 2) {
                    break;
                }
                CtMethod method = declaredMethods[i];
                CtClass[] ps = method.getParameterTypes();
                if (ps.length == 2
                        && method.getName().equals("lI")
                        && ps[0].getName().equals("com.aspose.pdf.ADocument")
                        && ps[1].getName().equals("int")) {
                    // 最多只能转换4页 处理
                    System.out.println(method.getReturnType());
                    System.out.println(ps[1].getName());
                    method.setBody("{return false;}");
                    num = 1;
                }
                if (ps.length == 0 && method.getName().equals("lt")) {
                    // 水印处理
                    method.setBody("{return true;}");
                    num = 2;
                }
            }
            File file = new File(jarName);
            ctClass.writeFile(file.getParent());
            disposeJar(jarName, file.getParent() + "/com/aspose/pdf/ADocument.class");
        } catch (NotFoundException e) {
            e.printStackTrace();
        } catch (CannotCompileException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }

    }

    private static void disposeJar(String jarName, String replaceFile) {
        List<String> deletes = new ArrayList<>();
        deletes.add("META-INF/37E3C32D.SF");
        deletes.add("META-INF/37E3C32D.RSA");
        File oriFile = new File(jarName);
        if (!oriFile.exists()) {
            System.out.println("######Not Find File:" + jarName);
            return;
        }
        //将文件名命名成备份文件
        String bakJarName = jarName.substring(0, jarName.length() - 3) + "cracked.jar";
        //   File bakFile=new File(bakJarName);
        try {
            //创建文件(根据备份文件并删除部分)
            JarFile jarFile = new JarFile(jarName);
            JarOutputStream jos = new JarOutputStream(new FileOutputStream(bakJarName));
            Enumeration entries = jarFile.entries();
            while (entries.hasMoreElements()) {
                JarEntry entry = (JarEntry) entries.nextElement();
                if (!deletes.contains(entry.getName())) {
                    if (entry.getName().equals("com/aspose/pdf/ADocument.class")) {
                        System.out.println("Replace:-------" + entry.getName());
                        JarEntry jarEntry = new JarEntry(entry.getName());
                        jos.putNextEntry(jarEntry);
                        FileInputStream fin = new FileInputStream(replaceFile);
                        byte[] bytes = readStream(fin);
                        jos.write(bytes, 0, bytes.length);
                    } else {
                        jos.putNextEntry(entry);
                        byte[] bytes = readStream(jarFile.getInputStream(entry));
                        jos.write(bytes, 0, bytes.length);
                    }
                } else {
                    System.out.println("Delete:-------" + entry.getName());
                }
            }
            jos.flush();
            jos.close();
            jarFile.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private static byte[] readStream(InputStream inStream) throws Exception {
        ByteArrayOutputStream outSteam = new ByteArrayOutputStream();
        byte[] buffer = new byte[1024];
        int len = -1;
        while ((len = inStream.read(buffer)) != -1) {
            outSteam.write(buffer, 0, len);
        }
        outSteam.close();
        inStream.close();
        return outSteam.toByteArray();
    }
}

2.创建转换jar工具

import com.aspose.pdf.Document;
import com.aspose.pdf.SaveFormat;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;

public class Pdf2Word {
    public static void main(String[] args) {
        String pdfName = args[0];
        pdf2Word(pdfName);
    }

    //pdf转doc
    public static void pdf2Word(String pdfName) {
        long old = System.currentTimeMillis();
        try {
            //新建一个word文档
            String p = pdfName.substring(0, pdfName.lastIndexOf("."));
            String textPath = p + ".txt";
            String wordPath = p + ".docx";

            String fileName = pdfName.substring(pdfName.lastIndexOf("\\") + 1, pdfName.lastIndexOf("."));

            FileOutputStream os = new FileOutputStream(wordPath);
            //doc是将要被转化的word文档
            Document doc = new Document(pdfName);
            //全面支持DOC, DOCX, OOXML, RTF HTML, OpenDocument, PDF, EPUB, XPS, SWF 相互转换
            doc.save(os, SaveFormat.DocX);
            os.close();
            //转化用时
            long now = System.currentTimeMillis();

            String text = "Pdf 转 Word 共耗时:" + ((now - old) / 1000.0) + "秒";

            setCoverWordFlag(textPath, text);

            System.out.println(text);

        } catch (Exception e) {
            System.out.println("Pdf 转 Word 失败...");
            e.printStackTrace();
        }
    }

    private static void setCoverWordFlag(String fileName, String val) {
        FileWriter fw = null;
        try {
            File file = new File(fileName);
            if (!file.exists()) {
                file.createNewFile();
            }
            fw = new FileWriter(fileName);
            BufferedWriter bw = new BufferedWriter(fw);
            bw.write(val + "\n");
            bw.close();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                fw.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
}

创建测试类:

import org.apache.commons.io.FileUtils;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;

public class Pdf2WordTest {


    public static void main(String[] args) {
        try {
            Pdf2WordTest dd = new Pdf2WordTest();
            String pdfName = dd.getAppPath() + "/12345.pdf";
            final String filePdf2 = pdfName;
            String p = filePdf2.substring(0, filePdf2.lastIndexOf("."));
            String textPath = p + ".txt";

            String filePdf1 = "D:\\pdf2word\\222.pdf";
            //----清除文件---
            File oldFile = new File(pdfName);
            if (oldFile.exists()) {
                oldFile.delete();
                File oldText = new File(textPath);
                if (oldText.exists()) {
                    oldText.delete();
                }
            }
            //-------------
            FileUtils.copyFile(new File(filePdf1), new File(filePdf2));
			**//测试运行jar包**
            runCMD(filePdf2);
           /*
			**//本地测试转换**
			 new Thread(new Runnable() {
                @Override
                public void run() {
                    Pdf2Word.pdf2Word(filePdf2);
                }
            }).start();*/
            long old = System.currentTimeMillis();
            while (true) {
                if (new File(textPath).exists()) {
                    System.out.println("转换成功");
                    break;
                }
                System.out.println(1);
                long now = System.currentTimeMillis();
                if (((now - old) / 1000.0) >= 30) {
                    break;
                }
                Thread.sleep(1000);
            }
            System.out.println("结束");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private static String appPath;

    public String getAppPath() {
        File directory = new File("");//设定为当前文件夹
        try {
            String absolutePath = directory.getAbsolutePath();
            int i = absolutePath.indexOf(File.separatorChar);
            String root = absolutePath.substring(0, i) + File.separator + "app/pdf2word";
            this.appPath = root;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return this.appPath;
    }

    private static void runCMD(String param1) throws IOException {
        //在路径后面空格加参数,多个参数依次类推
        String cmd = "java -jar D:\\development\\workspace\\pdf-doc\\out\\artifacts\\pdf_doc_jar\\pdf-doc.jar " + param1;

        System.out.println("cmd ================ " + cmd);

        Process process = null;
        BufferedReader bufferedReader = null;
        String line = "";
        process = Runtime.getRuntime().exec(cmd);

        bufferedReader = new BufferedReader(new InputStreamReader(process.getInputStream()));

        while ((line = bufferedReader.readLine()) != null) {
            System.out.println(line);
        }
    }
}

pom.xml


<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0modelVersion>

    <groupId>org.examplegroupId>
    <artifactId>pdf-docartifactId>
    <version>1.0-SNAPSHOTversion>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.pluginsgroupId>
                <artifactId>maven-compiler-pluginartifactId>
                <configuration>
                    <source>6source>
                    <target>6target>
                configuration>
            plugin>
        plugins>
    build>
    <dependencies>
        <dependency>
            <groupId>commons-iogroupId>
            <artifactId>commons-ioartifactId>
            <version>2.0version>
        dependency>
        <dependency>
            <groupId>org.apache.commonsgroupId>
            <artifactId>commons-lang3artifactId>
            <version>3.2version>
        dependency>
        <dependency>
            <groupId>org.javassistgroupId>
            <artifactId>javassistartifactId>
            <version>3.20.0-GAversion>
        dependency>
    dependencies>
project>

把上面步骤一得到的jar导入到项目
aspose-pdf-22.4.cracked.jar

Java实现PDF转Word【收集整理】_第1张图片
最后总结下,系统自动生成的清单文件MANIFEST.MF用于执行jar包用,最好生成在src文件下,生成方式,删除以前的清单文件,生成时候选择相应的src目录即可如图:
Java实现PDF转Word【收集整理】_第2张图片

你可能感兴趣的:(java,pdf,word)