http://zhuyufufu.iteye.com/admin/blogs/2012236
PDFBox 与 PDFRender在转换时有清晰度与效率的问题,
PDFBox转换效果稍好,PDFRender更快,但是多线程操作不能大幅提高转换效率。
搜索这下找到IcePDF 他是开源的,但是字体支持要收费。
拿IcePDF自带的例子展示,上代码:
package com.zas.ice.test; /* * Copyright 2006-2013 ICEsoft Technologies Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the * License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an "AS * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language * governing permissions and limitations under the License. */ import org.icepdf.core.exceptions.PDFException; import org.icepdf.core.exceptions.PDFSecurityException; import org.icepdf.core.pobjects.Document; import org.icepdf.core.pobjects.PDimension; import org.icepdf.core.pobjects.Page; import org.icepdf.core.util.GraphicsRenderingHints; import org.icepdf.ri.util.FontPropertiesManager; import org.icepdf.ri.util.PropertiesManager; import javax.imageio.ImageIO; import java.awt.*; import java.awt.image.BufferedImage; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.ResourceBundle; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; /** * ThePageCapture
class is an example of how to save page * captures to disk. A file specified at the command line is opened and every * page in the document is captured as an image and saved to disk as a * PNG graphic file. * * @since 5.0 */ public class PageCapture { static String outputFilePath = "D:\\pdf\\Linux命令行技术大全222222\\"; public static void main(String[] args) { // Get a file from the command line to open String filePath = "D:\\pdf\\面向对象软件构造(第二版)中英对照版.pdf"; // read/store the font cache. ResourceBundle messageBundle = ResourceBundle.getBundle( PropertiesManager.DEFAULT_MESSAGE_BUNDLE); PropertiesManager properties = new PropertiesManager(System.getProperties(), ResourceBundle.getBundle(PropertiesManager.DEFAULT_MESSAGE_BUNDLE)); new FontPropertiesManager(properties, System.getProperties(), messageBundle); // start the capture PageCapture pageCapture = new PageCapture(); pageCapture.capturePages(filePath); } public void capturePages(String filePath) { long beginTime = System.nanoTime(); // open the url Document document = new Document(); // setup two threads to handle image extraction. ExecutorService executorService = Executors.newFixedThreadPool(100); try { document.setFile(filePath); // create a list of callables. int pages = document.getNumberOfPages(); java.util.List> callables = new ArrayList >(pages); for (int i = 0; i <= pages; i++) { callables.add(new CapturePage(document, i)); } executorService.invokeAll(callables); executorService.submit(new DocumentCloser(document)).get(); } catch (InterruptedException e) { System.out.println("Error parsing PDF document " + e); } catch (ExecutionException e) { System.out.println("Error parsing PDF document " + e); } catch (PDFException ex) { System.out.println("Error parsing PDF document " + ex); } catch (PDFSecurityException ex) { System.out.println("Error encryption not supported " + ex); } catch (FileNotFoundException ex) { System.out.println("Error file not found " + ex); } catch (IOException ex) { System.out.println("Error handling PDF document " + ex); } executorService.shutdown(); long endTime = System.nanoTime(); System.out.println("耗时: " + (endTime - beginTime) / 1000000000 + " 秒" ); } /** * Captures images found in a page parse to file. */ public class CapturePage implements Callable { private Document document; private int pageNumber; private float scale = 1f; private float rotation = 0f; private CapturePage(Document document, int pageNumber) { this.document = document; this.pageNumber = pageNumber; } public Void call() { Page page = document.getPageTree().getPage(pageNumber); page.init(); PDimension sz = page.getSize(Page.BOUNDARY_CROPBOX, rotation, scale); int pageWidth = (int) sz.getWidth(); int pageHeight = (int) sz.getHeight(); BufferedImage image = new BufferedImage(pageWidth, pageHeight, BufferedImage.TYPE_INT_RGB); Graphics g = image.createGraphics(); page.paint(g, GraphicsRenderingHints.PRINT, Page.BOUNDARY_CROPBOX, rotation, scale); g.dispose(); // capture the page image to file try { System.out.println("Capturing page " + pageNumber); File file = new File(outputFilePath + "imageCapture_" + pageNumber + ".png"); ImageIO.write(image, "png", file); } catch (Throwable e) { e.printStackTrace(); } image.flush(); return null; } } /** * Disposes the document. */ public class DocumentCloser implements Callable { private Document document; private DocumentCloser(Document document) { this.document = document; } public Void call() { if (document != null) { document.dispose(); System.out.println("Document disposed"); } return null; } } }
代码用到了JDK的线程池,也用到了任务callable,算是涨涨见识了
在转换的效果上比PDFRender略好,与PDFBox差不远;在转换的效率上,比PDFBox好很多,比PDFRender略差。
资料包是从一位Iteye用户那里下的,但是记不住他的链接了。
IcePdf官网 http://www.icesoft.org/java/home.jsf