java使用pdfbox读取PDF图片并且保存到指定路径

写这个原因是记录下自己踩过这种坑,写出正确方法,避免下次忘记了

   /**
     * 读取pdf获取图片并且保存到指定路径
     * @param document
     * @param path
     * @throws IOException
     */
    public static void getPdfImages(PDDocument document,String path) throws IOException {
        List<RenderedImage> images = new ArrayList<RenderedImage>();
        PDImageXObject obj=null;
        int i=0;
        for (PDPage page : document.getPages()) {
            PDResources resources = page.getResources();
            for (COSName xObjectName : resources.getXObjectNames()) {
                PDXObject xObject = resources.getXObject(xObjectName);
                if (xObject instanceof PDFormXObject) {
                    continue;
                }else if (xObject instanceof PDImageXObject){
                     obj = (PDImageXObject) xObject;
                    File f = new File(path+i+++"."+obj.getSuffix());
                    ImageIO.write(obj.getImage(), obj.getSuffix(), f);
                }
            }
        }
    }
    /**
     * pdf转word
     * @param pdfPath
     */
    public static void pdfToWord(String pdfPath){
        try {
            PDDocument doc = PDDocument.load(new File(pdfPath));
            int pagenumber = doc.getNumberOfPages();
            pdfPath = pdfPath.substring(0, pdfPath.lastIndexOf("."));
            String fileName =pdfPath + ".doc";
            System.out.println(fileName);
            File file = new File(fileName);
            if (!file.exists()) {
                file.createNewFile();
            }
            FileOutputStream fos = new FileOutputStream(fileName);
            Writer writer = new OutputStreamWriter(fos, "UTF-8");
            PDFTextStripper stripper = new PDFTextStripper();
            stripper.setSortByPosition(true);// 排序
            stripper.setStartPage(1);// 设置转换的开始页
            stripper.setEndPage(pagenumber);// 设置转换的结束页
            stripper.writeText(doc, writer);
            writer.close();
            doc.close();
            System.out.println("pdf转换word成功!");
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
     /**
     * dpi越大转换后越清晰,相对转换速度越慢
     */
    private static final Integer DPI = 1600;
    /**
     * 转换后的图片类型
     */
    private static final String IMG_TYPE = "png";
    
      /**
     * pdf转图片
     * @param pdfPath pdf文件路径
     * @param path    图片保存路径
     * @param fileName 图片名称
     * @throws IOException
     */
    public static void  pdfToImage(File pdfPath,String path,String fileName) throws IOException {
        PDDocument doc = PDDocument.load(pdfPath);
        PDFRenderer renderer = new PDFRenderer(doc);
        int pageCount = doc.getNumberOfPages();
        for (int i = 0; i < pageCount; i++) {
            BufferedImage image = renderer.renderImageWithDPI(i, DPI);
            ImageIO.write(image, IMG_TYPE, new File(path+fileName+i+"."+IMG_TYPE));
        }
    }
  public static void main(String[]args) throws Exception {
        PDDocument document = PDDocument.load(new File("F:\\webservice\\webservice教程.pdf"));
        getPdfImages(document,"E:\\upload\\");
        pdfToWord("D:/个人陈述写作提纲.pdf");
    }
<dependencies>
    
    <dependency>
        <groupId>org.apache.pdfboxgroupId>
        <artifactId>pdfboxartifactId>
        <version>2.0.23version>
    dependency>
dependencies>

你可能感兴趣的:(Java,pdfbox,java)