使用JAVA将PDF转WORD

添加依赖


        
            org.apache.pdfbox
            pdfbox
            2.0.22
        

        
            org.apache.poi
            poi-ooxml
            5.0.0
        

代码

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;

public class PdfToWordConverter {

    public static void main(String[] args) throws IOException {
        File file = new File("./input.pdf");
        convertToWord(file, new File("./output.docx"));
    }

    public static String extractTextFromPdf(File file) throws IOException {
        PDDocument document = PDDocument.load(file);
        PDFTextStripper stripper = new PDFTextStripper();
        String text = stripper.getText(document);
        document.close();
        return text;
    }
    
    public static void convertToWord(File pdf, File docx) throws IOException {
        String text = extractTextFromPdf(pdf);
        XWPFDocument document = new XWPFDocument();
        XWPFParagraph paragraph = document.createParagraph();
        XWPFRun run = paragraph.createRun();
        run.setText(text);
        FileOutputStream out = new FileOutputStream(docx);
        document.write(out);
        out.close();
        document.close();
    }

}

你可能感兴趣的:(java,pdf,word)