Java利用PdfBox实现Pdf转图片

一、导入依赖


<dependency>
    <groupId>org.apache.pdfboxgroupId>
    <artifactId>pdfboxartifactId>
    <version>2.0.9version>
dependency>

<dependency>
    <groupId>org.apache.pdfboxgroupId>
    <artifactId>fontboxartifactId>
    <version>2.0.9version>
dependency>

<dependency>
    <groupId>commons-logginggroupId>
    <artifactId>commons-loggingartifactId>
    <version>1.2version>
dependency>
<dependency>
    <groupId>org.apache.pdfboxgroupId>
    <artifactId>jbig2-imageioartifactId>
    <version>3.0.2version>
dependency>

二、代码实现

package com.example.pdf_box_to_images.contorller;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;

/**
 * @author [email protected]
 * @version 1.0
 * @date 2021/10/15 下午4:19
 */
public class PdfToImages {
    //可自由确定起始页和终止页
    public static void pdf2png(String fileAddress, String filename, int indexOfStart, int indexOfEnd) {
        // 将pdf装图片 并且自定义图片得格式大小
        File file = new File(fileAddress + "/" + filename + ".pdf");
        try {
            PDDocument doc = PDDocument.load(file);
            PDFRenderer renderer = new PDFRenderer(doc);
            int pageCount = doc.getNumberOfPages();
            for (int i = indexOfStart; i < indexOfEnd; i++) {
                BufferedImage image = renderer.renderImageWithDPI(i, 144); // Windows native DPI
                // BufferedImage srcImage = resize(image, 240, 240);//产生缩略图
                ImageIO.write(image, "PNG", new File(fileAddress + "/images/" + filename + "_" + (i + 1) + ".png"));
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    //转换全部的pdf
    public static void pdf2png(String fileAddress, String filename) {
        // 将pdf装图片 并且自定义图片得格式大小
        File file = new File(fileAddress + "/" + filename + ".pdf");
        try {
            PDDocument doc = PDDocument.load(file);
            PDFRenderer renderer = new PDFRenderer(doc);
            int pageCount = doc.getNumberOfPages();
            for (int i = 0; i < pageCount; i++) {
                BufferedImage image = renderer.renderImageWithDPI(i, 250); // Windows native DPI
                // BufferedImage srcImage = resize(image, 240, 240);//产生缩略图
                ImageIO.write(image, "PNG", new File(fileAddress + "/images/" + filename + "_" + (i + 1) + ".png"));
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args) {
        String fileAddress = "/home/kv/IdeaProjects/MarkVeriousTests2/PdfBoxToImages/src/main/resources";
        String filename = "Safety, Efficacy, and Pharmacokinetics of Almo";
        int indexOfStart = 0;//开始转换的页码
        int indexOfEnd = -1;//停止转换的页码,-1为全部
        if (indexOfEnd == -1) {
            pdf2png(fileAddress, filename);
        } else {
            pdf2png(fileAddress, filename, indexOfStart, indexOfEnd);
        }

    }
}

三、中文乱码或者图片到处空白处理

在liux中部署到docker或者服务器端时可能会出现中文乱码或者图片到处一篇空白的情况,这是由于系统缺乏一定的字体,只需要将字体导入即可。
解决中文乱码: https://blog.csdn.net/neulily2005/article/details/106003527/

四、利用多线程pdfbox提高效率

@Test
    public void test07() {
        int threadNum = Runtime.getRuntime().availableProcessors();
        ThreadPoolExecutor pool =
                new ThreadPoolExecutor(threadNum, threadNum * 2, 3, TimeUnit.SECONDS, new ArrayBlockingQueue<>(3));
        String filePath = "/home/kv/IdeaProjects/MarkVeriousTests2/PdfBoxToImages/src/main/resources";
        String fileName = "ijms20010146";
        long start = System.currentTimeMillis();
        try {
            PDDocument doc = PDDocument.load(new File(filePath + "/" + fileName + ".pdf"));
            int pageCount = doc.getNumberOfPages();
            if (pageCount < threadNum) {
                VeriousTest.pdf2png(filePath, fileName, 0, pageCount);
            } else {
                CountDownLatch count = new CountDownLatch(threadNum);
                int page = pageCount / threadNum;
                for (int i = 0; i < threadNum; i++) {
                    int finalI = i;
                    if (finalI == threadNum - 1) {
                        pool.execute(() -> {
                            VeriousTest.pdf2png(filePath, fileName, finalI * page+1, pageCount);
                            count.countDown();
                        });
                    }
                    pool.execute(() -> {
                        VeriousTest.pdf2png(filePath, fileName, finalI * page, (finalI + 1) * page);
                        count.countDown();
                    });
                }
                count.await();
            }
        } catch (IOException | InterruptedException e) {
            e.printStackTrace();
        } finally {
            pool.shutdown();
        }
        long end = System.currentTimeMillis();
        System.out.println((end - start) / 1000);
    }

你可能感兴趣的:(java,java)