针对pdf(.pdf)、word(.docx,.doc),ppt(.pptx)文件按页转图片实现策略

对于大文件来说,如果需要加载完整个文件,用户体验将会是一个非常大的问题,针对这个问题,博主对.pdf .docx .doc .pptx文件做了按页分割,对于excel 文件后续持续更新

上源码

import com.aspose.words.Document;
import com.aspose.words.ImageSaveOptions;
import com.aspose.words.SaveFormat;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.poi.xslf.usermodel.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.imageio.IIOImage;
import javax.imageio.ImageIO;
import javax.imageio.ImageWriter;
import javax.imageio.stream.ImageOutputStream;
import java.awt.*;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.Date;
import java.util.Iterator;
import java.util.List;

/**
 * 

Description

*

office 相关工具类

* ====================== * ====================== * * @Author created by lgy * @Date 2019/12/6 */
public class OfficeUtil { private static final Logger log = LoggerFactory.getLogger(OfficeUtil.class); // public static void main(String[] args) { //// doPPTtoImage("D:\\1.pptx", "D:\\ppt", "test", "jpg"); // doWordToImage("D:\\1.docx", "D:\\ppt", "word", "jpg"); //// doPdfToImage("D:\\1.pdf", "D:\\ppt", "pdf", "jpg"); // } /** * PPT转图片(目前仅支持 .pptx) * * @param pptPath 待转图片ppt的路径 * @param path 存放路径 * @param picName 图片前缀名称 如 a 生成后为a_1,a_2 ... * @param picType 转成图片的类型,无点 如 jpg bmp png ... * @return true/false */ public static boolean doPPTtoImage(String pptPath, String path, String picName, String picType) { log.info("PPT转Image 》》》》》》 待转文件pptpath = " + pptPath + " 存放路径imgPath = " + path + " 命名(pickName_i)picName = " + picName + " 图片类型picType = " + picType); try { Long startTime = new Date().getTime(); File file = new File(pptPath); boolean isppt = checkPPTFile(file); if (!isppt) { return false; } File pathFile = new File(path); if (!pathFile.exists()) { pathFile.mkdirs(); } FileInputStream is = new FileInputStream(file); XMLSlideShow xmlSlideShow = new XMLSlideShow(is); List<XSLFSlide> xslfSlides = xmlSlideShow.getSlides(); Dimension pageSize = xmlSlideShow.getPageSize(); is.close(); for (int i = 0; i < xslfSlides.size(); i++) { setFont(xslfSlides.get(i)); BufferedImage img = new BufferedImage(pageSize.width, pageSize.height, BufferedImage.TYPE_INT_RGB); Graphics2D graphics = img.createGraphics(); graphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON); graphics.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY); graphics.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC); graphics.setRenderingHint(RenderingHints.KEY_FRACTIONALMETRICS, RenderingHints.VALUE_FRACTIONALMETRICS_ON); graphics.setPaint(Color.white); graphics.fill(new Rectangle2D.Float(0, 0, pageSize.width, pageSize.height)); xslfSlides.get(i).draw(graphics); File op_img_file = new File(path + "/" + picName + "_" + (i + 1) + "." + picType); log.info("imgFile:" + op_img_file); FileOutputStream out = new FileOutputStream(op_img_file); ImageIO.write(img, picType, out); out.close(); } Long endTime = new Date().getTime(); log.info("PPT转图片花费时间》》》》》》" + (endTime - startTime) + "毫秒"); return true; } catch (Exception e) { log.error(e.getMessage()); } return false; } /** * 支持doc docx文件格式 * * @param wordPath 待转文件world文件路径 * @param imgPath 存储图片路径 * @param imageName 图片前缀名称 如 a 生成后为a_1,a_2 ... * @param picType 转成图片的类型,无点 如 jpg bmp png ... * @return */ public static boolean doWordToImage(String wordPath, String imgPath, String imageName, String picType) { log.info("doc / docx 转Image 》》》》》》 待转文件wordPath = " + wordPath + " 存放路径imgPath = " + imgPath + " 命名(pickName_i)picName = " + imageName + " 图片类型picType = " + picType); Long startTime = new Date().getTime(); // licence 授权 ReadConfigFile.getLicense(); try { Document doc = new Document(wordPath); ImageSaveOptions img = new ImageSaveOptions(SaveFormat.JPEG); // 设置图片比例以及质量 img.setScale(3.0F); img.setJpegQuality(100); // 如果文件路径不存在,则创建 File file = new File(imgPath); if (!file.exists() && !file.isDirectory()) { file.mkdirs(); } System.out.println(doc.getPageCount()); // 将图片保存到文件夹下面 for (int i = 0; i < doc.getPageCount(); i++) { File imgFile = new File(imgPath + "/" + imageName + "_" + i + "." + picType); System.out.println(imgFile); FileOutputStream fileOS = new FileOutputStream(imgFile); img.setPageIndex(i); doc.save(fileOS, img); } Long endTime = new Date().getTime(); log.info("Word转图片花费时间》》》》》》" + (endTime - startTime) + "毫秒"); return true; } catch (Exception e) { log.error(e.getMessage()); } return false; } /** * pdf 转图片 * * @param fileUrl 待转文件pdf文件路径 * @param imagePath 存储图片路径 * @param imageName 图片前缀名称 如 a 生成后为a_1,a_2 ... * @param imgType 转成图片的类型,无点 如 jpg bmp png ... * @return */ public static boolean doPdfToImage(String fileUrl, String imagePath, String imageName, String imgType) { PDDocument pdDocument = null; /* dpi越大转换后越清晰,相对转换速度越慢 */ int dpi = 180; try { pdDocument = getPDDocument(fileUrl); PDFRenderer renderer = new PDFRenderer(pdDocument); int pages = pdDocument.getNumberOfPages(); OutputStream sos = null; for (int i = 0; i < pages; i++) { BufferedImage image = renderer.renderImageWithDPI(i, dpi); File file = new File(imagePath + "/" + imageName + "_" + i + "." + imgType); System.out.println(file); sos = new FileOutputStream(file); ImageIO.write(image, imgType, sos); sos.close(); } return true; } catch (Exception e) { log.error(e.getMessage()); } finally { if (pdDocument != null) { try { pdDocument.close(); } catch (IOException e) { log.error(e.getMessage()); } } } return false; } /** * 设置PPTX字体 * * @param slide */ private static void setFont(XSLFSlide slide) { for (XSLFShape shape : slide.getShapes()) { if (shape instanceof XSLFTextShape) { XSLFTextShape txtshape = (XSLFTextShape) shape; for (XSLFTextParagraph paragraph : txtshape.getTextParagraphs()) { List<XSLFTextRun> truns = paragraph.getTextRuns(); for (XSLFTextRun trun : truns) { trun.setFontFamily("宋体"); } } } } } /*检测PPT文件*/ private static boolean checkPPTFile(File file) { int pos = file.getName().lastIndexOf("."); String extName = ""; if (pos >= 0) { extName = file.getName().substring(pos); } if (".pptx".equalsIgnoreCase(extName)) { return true; } return false; } /** * 通过PDFbox获取文章总页数 * * @param filePath:文件路径 * @return * @throws IOException */ public static int getNumberOfPages(String filePath) throws IOException { PDDocument pdDocument = PDDocument.load(new File(filePath)); int pages = pdDocument.getNumberOfPages(); pdDocument.close(); return pages; } /** * 通过PDFbox生成文件的缩略图 * * @param filePath:文件路径 * @param outPath:输出图片路径 * @throws IOException */ public static void getThumbnails(String filePath, String outPath) throws IOException { PDDocument pdDocument = null; BufferedImage imgTemp = null; ImageOutputStream imageout = null; try { // 利用PdfBox生成图像 pdDocument = PDDocument.load(new File(filePath)); PDFRenderer renderer = new PDFRenderer(pdDocument); // 构造图片 imgTemp = renderer.renderImageWithDPI(0, 30, ImageType.RGB); // 设置图片格式 Iterator<ImageWriter> it = ImageIO.getImageWritersBySuffix("png"); // 将文件写出 ImageWriter writer = it.next(); imageout = ImageIO.createImageOutputStream(new FileOutputStream(outPath)); writer.setOutput(imageout); writer.write(new IIOImage(imgTemp, null, null)); } catch (Exception e) { log.error(e.getMessage()); } finally { imgTemp.flush(); imageout.flush(); imageout.close(); pdDocument.close(); } } /** * 装在pdf对象. * * @param fileUrl * @return */ private static PDDocument getPDDocument(String fileUrl) { FileInputStream inputStream = null; PDDocument pdf = null; try { File file = new File(fileUrl); inputStream = new FileInputStream(file); pdf = PDDocument.load(inputStream); return pdf; } catch (Exception e) { log.error(e.getMessage()); } finally { try { inputStream.close(); } catch (IOException e) { e.printStackTrace(); } } return null; } }

你可能感兴趣的:(文件转图片,java,poi)