引用借鉴博客地址:
https://blog.csdn.net/yjclsx/article/details/51441632
https://blog.csdn.net/qq_36903131/article/details/82529676
直接上代码了:
package com.allen.utils;
import java.awt.Color;
import java.awt.Dimension;
import java.awt.Graphics2D;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.util.Date;
import java.util.List;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.io.FileUtils;
import org.apache.poi.hssf.converter.ExcelToHtmlConverter;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFShape;
import org.apache.poi.xslf.usermodel.XSLFSlide;
import org.apache.poi.xslf.usermodel.XSLFTextParagraph;
import org.apache.poi.xslf.usermodel.XSLFTextRun;
import org.apache.poi.xslf.usermodel.XSLFTextShape;
import org.w3c.dom.Document;
/*通过poi实现word、excel、ppt转html
*
* poi 版本用的org.apache.poi 4.1.0
* 包名分别为:
* poi
* poi-ooxml
* poi-ooxml-schemas
* poi-scratchpad
*
*/
public class WordExcelPptToHtml {
// 适用格式为*.doc,即Word 97-2003文档
public static void WordToHtml(String filePath,String fileName){
InputStream input;
try {
input = new FileInputStream(filePath + fileName);
HWPFDocument wordDocument = new HWPFDocument(input);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
wordToHtmlConverter.setPicturesManager(
new PicturesManager() {
public String savePicture(byte[] content, PictureType pictureType,
String suggestedName, float widthInches, float heightInches) {
return suggestedName;
}
}
);
wordToHtmlConverter.processDocument(wordDocument);
List pics = wordDocument.getPicturesTable().getAllPictures();
if (pics != null) {
for (int i = 0; i < pics.size(); i++) {
Picture pic = (Picture) pics.get(i);
try {
pic.writeImageContent(new FileOutputStream(filePath+ pic.suggestFullFileName()));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
String content = new String(outStream.toByteArray());
FileUtils.writeStringToFile(new File(filePath, "WorldToHtml.html"), content, "utf-8");
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
// 适用格式 为 *.xls,即EXCEL 97-2003文档, 格式为.xlsx 的不行
public static void ExcelToHtml(String filePath,String fileName){
InputStream input;
try {
input = new FileInputStream(filePath+fileName);
HSSFWorkbook excelBook=new HSSFWorkbook(input);
ExcelToHtmlConverter excelToHtmlConverter = new ExcelToHtmlConverter (DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument() );
excelToHtmlConverter.processWorkbook(excelBook);
List pics = excelBook.getAllPictures();
if (pics != null) {
for (int i = 0; i < pics.size(); i++) {
Picture pic = (Picture) pics.get (i);
try {
pic.writeImageContent (new FileOutputStream (filePath + pic.suggestFullFileName() ) );
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
Document htmlDocument =excelToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource (htmlDocument);
StreamResult streamResult = new StreamResult (outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty (OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty (OutputKeys.INDENT, "yes");
serializer.setOutputProperty (OutputKeys.METHOD, "html");
serializer.transform (domSource, streamResult);
outStream.close();
String content = new String (outStream.toByteArray() );
FileUtils.writeStringToFile(new File (filePath, "ExcelToHtml.html"), content, "utf-8");
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
// 适用PPT格式为 *.pptx ,ppt转图片插入html 即可
public static boolean PPTtoImage(String filePath,String fileName){
File file = new File(filePath+fileName);
boolean isppt = checkFile(file);
if (!isppt) {
System.out.println("The image you specify don't exit!");
return false;
}
XMLSlideShow ppt = null;
try {
ppt = new XMLSlideShow(new FileInputStream(filePath+fileName));
Dimension pgsize = ppt.getPageSize();
List slides = ppt.getSlides();
// 遍历幻灯片
for (XSLFSlide slide : slides) {
// for(int i=0;i shapes = slide.getShapes();
// 遍历图形
for (XSLFShape shape : shapes) {
// 判断该图形类是否是文本框类
if (shape instanceof XSLFTextShape) {
// 将图像类强制装换成文本框类
XSLFTextShape ts = (XSLFTextShape) shape;
// 获取文本框内的文字
String str = ts.getText();
System.out.println(str);
// 若想对文本框内的文字进行更改,还需要进行如下步骤
List textParagraphs = ts.getTextParagraphs();
for (XSLFTextParagraph tp : textParagraphs) {
List textRuns = tp.getTextRuns();
for (XSLFTextRun r : textRuns) {
if ("fuck you".equals(r.getRawText())) {
// 对匹配到的字符串进行更改
r.setText("I love you");
// 设置字体颜色
r.setFontColor(Color.RED);
}
}
}
}
}
BufferedImage img = new BufferedImage(pgsize.width,pgsize.height, BufferedImage.TYPE_INT_RGB);
Graphics2D graphics = img.createGraphics();
graphics.setPaint(Color.BLUE);
graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width, pgsize.height));
slide.draw(graphics);
// 这里设置图片的存放路径和图片的格式(jpeg,png,bmp等等),注意生成文件路径
FileOutputStream out = new FileOutputStream(filePath+"ppt_"+ new Date().getTime() + ".jpeg");
javax.imageio.ImageIO.write(img, "jpeg", out);
out.close();
}
System.out.println("success..........");
return true;
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
return false;
}
// function 检查文件是否为PPT
public static boolean checkFile(File file) {
boolean isppt = false;
String filename = file.getName();
String suffixname = null;
if (filename != null && filename.indexOf(".") != -1) {
suffixname = filename.substring(filename.indexOf("."));
if (suffixname.equals(".pptx")) {
isppt = true;
}
return isppt;
} else {
return isppt;
}
}
public static void main(String[] args) throws Throwable {
final String filePath = "G:\\";
final String worldFileName = "Word_test.doc";
// WordToHtml( filePath, worldFileName);
String excelFileName="123.xls";
// ExcelToHtml(filePath, excelFileName);
PPTtoImage(filePath,"testPPT.pptx");
}
}
pom 引用包:
org.apache.poi
poi-ooxml
4.1.0
org.apache.poi
poi-ooxml-schemas
4.1.0
org.apache.poi
poi
4.1.0
org.apache.poi
poi-scratchpad
4.1.0