java实现截取PDF指定页并进行图片格式转换

1、引入依赖


    org.apache.pdfbox
    pdfbox
    2.0.16


    org.apache.pdfbox
    fontbox
    2.0.16
jar包下载地址:
https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox
https://mvnrepository.com/artifact/org.apache.pdfbox/fontbox
 
2、实现DEMO
package com.dddpeter.app;

import org.apache.pdfbox.multipdf.Splitter;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;

import javax.imageio.ImageIO;
import javax.imageio.stream.ImageOutputStream;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.List;
import java.util.ListIterator;

public class PDFUtils {
    public static String splitPdf(int pageNum, String source, String dest) {
        File indexFile = new File(source);
        File outFile = new File(dest);
        PDDocument document = null;
        try {
            document = PDDocument.load(indexFile);
            // document.getNumberOfPages();
            Splitter splitter = new Splitter();
            splitter.setStartPage(pageNum);
            splitter.setEndPage(pageNum);
            List pages = splitter.split(document);
            ListIterator iterator = pages.listIterator();
            while (iterator.hasNext()) {
                PDDocument pd = iterator.next();
                if (outFile.exists()) {
                    outFile.delete();
                }
                pd.save(outFile);
                pd.close();
                if (outFile.exists()) {
                    return outFile.getPath();
                }
            }
            document.close();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (Exception e) {
            e.printStackTrace();
        }
        return null;
    }
    public static void pdfFileToImage(File pdffile,String targetPath){
        try {
            FileInputStream instream = new FileInputStream(pdffile);
            InputStream byteInputStream=null;
            try {
                PDDocument doc = PDDocument.load(instream);
                PDFRenderer renderer = new PDFRenderer(doc);
                int pageCount = doc.getNumberOfPages();
                if (pageCount > 0) {
                    BufferedImage image = renderer.renderImage(0, 4.0f);
                    image.flush();
                    ByteArrayOutputStream bs = new ByteArrayOutputStream();
                    ImageOutputStream imOut;
                    imOut = ImageIO.createImageOutputStream(bs);
                    ImageIO.write(image, "png", imOut);
                    byteInputStream = new ByteArrayInputStream(bs.toByteArray());
                    byteInputStream.close();
                }
                doc.close();
            }
            catch (IOException e) {
                e.printStackTrace();
            }
            File uploadFile = new File(targetPath);
            FileOutputStream fops;
            fops = new FileOutputStream(uploadFile);
            fops.write(readInputStream(byteInputStream));
            fops.flush();
            fops.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static byte[] readInputStream(InputStream inStream) throws Exception {
        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
        byte[] buffer = new byte[1024];
        int len = 0;
        while ((len = inStream.read(buffer)) != -1) {
            outStream.write(buffer, 0, len);
        }
        inStream.close();
        return outStream.toByteArray();
    }


    public static void main(String[] args) {
        String path = splitPdf(4,"D:\\data\\11.pdf","D:\\data\\out11.pdf");
        File file =new File(path);
        //上传的是png格式的图片结尾
        String targetfile="D:\\data\\out11.png";
        pdfFileToImage(file,targetfile);

    }
}

其中指定第几页可配合https://www.cnblogs.com/xsdty/p/11463174.html

实现动态获取关键字页码,动态截取转换为自己想要的格式图片。

你可能感兴趣的:(java实现截取PDF指定页并进行图片格式转换)