对于大文件来说,如果需要加载完整个文件,用户体验将会是一个非常大的问题,针对这个问题,博主对.pdf .docx .doc .pptx文件做了按页分割,对于excel 文件后续持续更新
上源码
import com.aspose.words.Document;
import com.aspose.words.ImageSaveOptions;
import com.aspose.words.SaveFormat;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.poi.xslf.usermodel.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.imageio.IIOImage;
import javax.imageio.ImageIO;
import javax.imageio.ImageWriter;
import javax.imageio.stream.ImageOutputStream;
import java.awt.*;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
/**
* Description
* office 相关工具类
* ======================
* ======================
*
* @Author created by lgy
* @Date 2019/12/6
*/
public class OfficeUtil {
private static final Logger log = LoggerFactory.getLogger(OfficeUtil.class);
// public static void main(String[] args) {
//// doPPTtoImage("D:\\1.pptx", "D:\\ppt", "test", "jpg");
// doWordToImage("D:\\1.docx", "D:\\ppt", "word", "jpg");
//// doPdfToImage("D:\\1.pdf", "D:\\ppt", "pdf", "jpg");
// }
/**
* PPT转图片(目前仅支持 .pptx)
*
* @param pptPath 待转图片ppt的路径
* @param path 存放路径
* @param picName 图片前缀名称 如 a 生成后为a_1,a_2 ...
* @param picType 转成图片的类型,无点 如 jpg bmp png ...
* @return true/false
*/
public static boolean doPPTtoImage(String pptPath, String path, String picName, String picType) {
log.info("PPT转Image 》》》》》》 待转文件pptpath = " + pptPath + " 存放路径imgPath = " + path + " 命名(pickName_i)picName = " + picName + " 图片类型picType = " + picType);
try {
Long startTime = new Date().getTime();
File file = new File(pptPath);
boolean isppt = checkPPTFile(file);
if (!isppt) {
return false;
}
File pathFile = new File(path);
if (!pathFile.exists()) {
pathFile.mkdirs();
}
FileInputStream is = new FileInputStream(file);
XMLSlideShow xmlSlideShow = new XMLSlideShow(is);
List<XSLFSlide> xslfSlides = xmlSlideShow.getSlides();
Dimension pageSize = xmlSlideShow.getPageSize();
is.close();
for (int i = 0; i < xslfSlides.size(); i++) {
setFont(xslfSlides.get(i));
BufferedImage img = new BufferedImage(pageSize.width,
pageSize.height, BufferedImage.TYPE_INT_RGB);
Graphics2D graphics = img.createGraphics();
graphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING,
RenderingHints.VALUE_ANTIALIAS_ON);
graphics.setRenderingHint(RenderingHints.KEY_RENDERING,
RenderingHints.VALUE_RENDER_QUALITY);
graphics.setRenderingHint(RenderingHints.KEY_INTERPOLATION,
RenderingHints.VALUE_INTERPOLATION_BICUBIC);
graphics.setRenderingHint(RenderingHints.KEY_FRACTIONALMETRICS,
RenderingHints.VALUE_FRACTIONALMETRICS_ON);
graphics.setPaint(Color.white);
graphics.fill(new Rectangle2D.Float(0, 0, pageSize.width,
pageSize.height));
xslfSlides.get(i).draw(graphics);
File op_img_file = new File(path + "/" +
picName + "_" + (i + 1) + "." + picType);
log.info("imgFile:" + op_img_file);
FileOutputStream out = new FileOutputStream(op_img_file);
ImageIO.write(img, picType, out);
out.close();
}
Long endTime = new Date().getTime();
log.info("PPT转图片花费时间》》》》》》" + (endTime - startTime) + "毫秒");
return true;
} catch (Exception e) {
log.error(e.getMessage());
}
return false;
}
/**
* 支持doc docx文件格式
*
* @param wordPath 待转文件world文件路径
* @param imgPath 存储图片路径
* @param imageName 图片前缀名称 如 a 生成后为a_1,a_2 ...
* @param picType 转成图片的类型,无点 如 jpg bmp png ...
* @return
*/
public static boolean doWordToImage(String wordPath, String imgPath, String imageName, String picType) {
log.info("doc / docx 转Image 》》》》》》 待转文件wordPath = " + wordPath + " 存放路径imgPath = " + imgPath + " 命名(pickName_i)picName = " + imageName + " 图片类型picType = " + picType);
Long startTime = new Date().getTime();
// licence 授权
ReadConfigFile.getLicense();
try {
Document doc = new Document(wordPath);
ImageSaveOptions img = new ImageSaveOptions(SaveFormat.JPEG);
// 设置图片比例以及质量
img.setScale(3.0F);
img.setJpegQuality(100);
// 如果文件路径不存在,则创建
File file = new File(imgPath);
if (!file.exists() && !file.isDirectory()) {
file.mkdirs();
}
System.out.println(doc.getPageCount());
// 将图片保存到文件夹下面
for (int i = 0; i < doc.getPageCount(); i++) {
File imgFile = new File(imgPath + "/" + imageName + "_" + i + "." + picType);
System.out.println(imgFile);
FileOutputStream fileOS = new FileOutputStream(imgFile);
img.setPageIndex(i);
doc.save(fileOS, img);
}
Long endTime = new Date().getTime();
log.info("Word转图片花费时间》》》》》》" + (endTime - startTime) + "毫秒");
return true;
} catch (Exception e) {
log.error(e.getMessage());
}
return false;
}
/**
* pdf 转图片
*
* @param fileUrl 待转文件pdf文件路径
* @param imagePath 存储图片路径
* @param imageName 图片前缀名称 如 a 生成后为a_1,a_2 ...
* @param imgType 转成图片的类型,无点 如 jpg bmp png ...
* @return
*/
public static boolean doPdfToImage(String fileUrl, String imagePath, String imageName, String imgType) {
PDDocument pdDocument = null;
/* dpi越大转换后越清晰,相对转换速度越慢 */
int dpi = 180;
try {
pdDocument = getPDDocument(fileUrl);
PDFRenderer renderer = new PDFRenderer(pdDocument);
int pages = pdDocument.getNumberOfPages();
OutputStream sos = null;
for (int i = 0; i < pages; i++) {
BufferedImage image = renderer.renderImageWithDPI(i, dpi);
File file = new File(imagePath + "/" + imageName + "_" + i + "." + imgType);
System.out.println(file);
sos = new FileOutputStream(file);
ImageIO.write(image, imgType, sos);
sos.close();
}
return true;
} catch (Exception e) {
log.error(e.getMessage());
} finally {
if (pdDocument != null) {
try {
pdDocument.close();
} catch (IOException e) {
log.error(e.getMessage());
}
}
}
return false;
}
/**
* 设置PPTX字体
*
* @param slide
*/
private static void setFont(XSLFSlide slide) {
for (XSLFShape shape : slide.getShapes()) {
if (shape instanceof XSLFTextShape) {
XSLFTextShape txtshape = (XSLFTextShape) shape;
for (XSLFTextParagraph paragraph : txtshape.getTextParagraphs()) {
List<XSLFTextRun> truns = paragraph.getTextRuns();
for (XSLFTextRun trun : truns) {
trun.setFontFamily("宋体");
}
}
}
}
}
/*检测PPT文件*/
private static boolean checkPPTFile(File file) {
int pos = file.getName().lastIndexOf(".");
String extName = "";
if (pos >= 0) {
extName = file.getName().substring(pos);
}
if (".pptx".equalsIgnoreCase(extName)) {
return true;
}
return false;
}
/**
* 通过PDFbox获取文章总页数
*
* @param filePath:文件路径
* @return
* @throws IOException
*/
public static int getNumberOfPages(String filePath) throws IOException {
PDDocument pdDocument = PDDocument.load(new File(filePath));
int pages = pdDocument.getNumberOfPages();
pdDocument.close();
return pages;
}
/**
* 通过PDFbox生成文件的缩略图
*
* @param filePath:文件路径
* @param outPath:输出图片路径
* @throws IOException
*/
public static void getThumbnails(String filePath, String outPath) throws IOException {
PDDocument pdDocument = null;
BufferedImage imgTemp = null;
ImageOutputStream imageout = null;
try {
// 利用PdfBox生成图像
pdDocument = PDDocument.load(new File(filePath));
PDFRenderer renderer = new PDFRenderer(pdDocument);
// 构造图片
imgTemp = renderer.renderImageWithDPI(0, 30, ImageType.RGB);
// 设置图片格式
Iterator<ImageWriter> it = ImageIO.getImageWritersBySuffix("png");
// 将文件写出
ImageWriter writer = it.next();
imageout = ImageIO.createImageOutputStream(new FileOutputStream(outPath));
writer.setOutput(imageout);
writer.write(new IIOImage(imgTemp, null, null));
} catch (Exception e) {
log.error(e.getMessage());
} finally {
imgTemp.flush();
imageout.flush();
imageout.close();
pdDocument.close();
}
}
/**
* 装在pdf对象.
*
* @param fileUrl
* @return
*/
private static PDDocument getPDDocument(String fileUrl) {
FileInputStream inputStream = null;
PDDocument pdf = null;
try {
File file = new File(fileUrl);
inputStream = new FileInputStream(file);
pdf = PDDocument.load(inputStream);
return pdf;
} catch (Exception e) {
log.error(e.getMessage());
} finally {
try {
inputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
}