项目后端使用maven,前端使用了富文本编辑器。目前从html转换的word为doc格式,也能将图片进行处理。网上的一些例子好多都是图片无法解析,这个地方千万要注意对图片的路径进行解析,src必须是全路径 域名+图片的正式路径,如果您的需求对图片的大小有限制的话也要修改img的style属性,如果修改完没有效果将style去掉直接加上宽和高。
一.添加maven依赖
org.apache.poi
poi-scratchpad
3.14
org.apache.poi
poi-ooxml
3.14
fr.opensagres.xdocreport
xdocreport
1.0.6
org.apache.poi
poi-ooxml-schemas
3.14
org.apache.poi
ooxml-schemas
1.3
org.jsoup
jsoup
1.11.3
org.apache.poi
poi
3.14
二.word转换为html
public static String docToHtml() throws Exception {
File path = new File(ResourceUtils.getURL(“classpath:”).getPath());
String imagePathStr = path.getAbsolutePath() + “\static\image\”;
String sourceFileName = path.getAbsolutePath() + “\static\test.doc”;
String targetFileName = path.getAbsolutePath() + “\static\test2.html”;
File file = new File(imagePathStr);
if(!file.exists()) {
file.mkdirs();
}
HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(sourceFileName));
org.w3c.dom.Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
//保存图片,并返回图片的相对路径
wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {
try (FileOutputStream out = new FileOutputStream(imagePathStr + name)) {
out.write(content);
} catch (Exception e) {
e.printStackTrace();
}
return “image/” + name;
});
wordToHtmlConverter.processDocument(wordDocument);
org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(new File(targetFileName));
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, “utf-8”);
serializer.setOutputProperty(OutputKeys.INDENT, “yes”);
serializer.setOutputProperty(OutputKeys.METHOD, “html”);
serializer.transform(domSource, streamResult);
return targetFileName;
docx格式转换为html
public static String docxToHtml() throws Exception {
File path = new File(ResourceUtils.getURL(“classpath:”).getPath());
String imagePath = path.getAbsolutePath() + “\static\image”;
String sourceFileName = path.getAbsolutePath() + “\static\test.docx”;
String targetFileName = path.getAbsolutePath() + “\static\test.html”;
OutputStreamWriter outputStreamWriter = null;
try {
XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileName));
XHTMLOptions options = XHTMLOptions.create();
// 存放图片的文件夹
options.setExtractor(new FileImageExtractor(new File(imagePath)));
// html中图片的路径
options.URIResolver(new BasicURIResolver("image"));
outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFileName), "utf-8");
XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
xhtmlConverter.convert(document, outputStreamWriter, options);
} finally {
if (outputStreamWriter != null) {
outputStreamWriter.close();
}
}
return targetFileName;
}
三.html转换为word**(亲测可用)**
实现的思路:首先将需要的数据拼接成html代码,因为富文本编辑器最终也是生成的前端代码,所以这个位置比较好处理,如果服务本编辑器中有图片的话需要对图片的路径和大小进行处理,网上很多的帖子取不到图片或者图片大小不能保证的话,基本上都是这个地方的问题。
下边直接上代码:
package com.bupticet.education.lab.utils;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.apache.derby.tools.sysinfo;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.struts2.ServletActionContext;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.util.ResourceUtils;
import org.springframework.web.util.HtmlUtils;
public class Snippet {
public String writeWordFile(String content,String url) {
String path = ServletActionContext.getServletContext().getRealPath("/userfiles/image/");
Map
if (!"".equals(path)) {
File fileDir = new File(path);
if (!fileDir.exists()) {
fileDir.mkdirs();
}
content = HtmlUtils.htmlUnescape(content);
List> imgs = getImgStr(content);
int count = 0;
try {
// 生成doc格式的word文档,需要手动改为docx
byte by[] = content.getBytes("UTF-8");
ByteArrayInputStream bais = new ByteArrayInputStream(by);
POIFSFileSystem poifs = new POIFSFileSystem();
DirectoryEntry directory = poifs.getRoot();
DocumentEntry documentEntry = directory.createDocument("WordDocument", bais);
FileOutputStream ostream = new FileOutputStream(url+".doc");
poifs.writeFilesystem(ostream);
bais.close();
ostream.close();
} catch (Exception e) {
e.printStackTrace();
}
}
return "success";
}
//获取html中的图片元素信息
public List> getImgStr(String htmlStr) {
List> pics = new ArrayList>();
Document doc = Jsoup.parse(htmlStr);
Elements imgs = doc.select("img");
for (Element img : imgs) {
HashMap map = new HashMap();
if(!"".equals(img.attr("width"))) {
map.put("width", img.attr("width").substring(0, img.attr("width").length() - 2));
}
if(!"".equals(img.attr("height"))) {
map.put("height", img.attr("height").substring(0, img.attr("height").length() - 2));
}
map.put("img", img.toString().substring(0, img.toString().length() - 1) + "/>");
map.put("img1", img.toString());
map.put("src", img.attr("src"));
pics.add(map);
}
return pics;
}
/**
* 解析html文件
* @param file
* @return
*/
public String readHtml(File file){
String body = "";
try {
FileInputStream iStream = new FileInputStream(file);
Reader reader = new InputStreamReader(iStream);
BufferedReader htmlReader = new BufferedReader(reader);
String line;
boolean found = false;
while (!found && (line = htmlReader.readLine()) != null) {
if (line.toLowerCase().indexOf("的前面可能存在空格
found = true;
}
}
found = false;
while (!found && (line = htmlReader.readLine()) != null) {
if (line.toLowerCase().indexOf("元素,则分行进行替代
String[] splitLines = line.split("元素
* @return 文件名
*/
public static String extractFilename(String htmlLine) {
int srcIndex = htmlLine.toLowerCase().indexOf("src=");
if (srcIndex == -1) { // 图片不存在,返回空字符串
return "";
} else {
String htmlSrc = htmlLine.substring(srcIndex + 4);
char splitChar = '\"'; // 默认为双引号,但也有可能为单引号
if (htmlSrc.charAt(0) == '\'') {
splitChar = '\'';
}
String[] firstSplit = htmlSrc.split(String.valueOf(splitChar));
String path = firstSplit[1]; // 第0位为空字符串
String[] secondSplit = path.split("[/\\\\]"); // 匹配正斜杠或反斜杠
return secondSplit[secondSplit.length - 1];
}
}
/* public static void main(String[] args) {
String path = "C:\\Users\\Administrator\\Desktop\\图片\\xxx.html";
File file = new File(path);
//body
String readHtml = readHtml(file);
System.out.println(readHtml+"--------------------------------");
String writeWordFile = writeWordFile(readHtml);
System.out.println(writeWordFile);
}*/
}
2.工具类
package com.bupticet.education.lab.utils;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.poi.xwpf.usermodel.XWPFTable;
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
import org.apache.poi.xwpf.usermodel.XWPFTableRow;
/**
适用于word 2007
*/
public class OfficeUtil {
/**
根据指定的参数值、模板,生成 word 文档
@param param 需要替换的变量
@param template 模板
*/
public static CustomXWPFDocument generateWord(Map
CustomXWPFDocument doc = null;
try {
OPCPackage pack = POIXMLDocument.openPackage(template);
doc = new CustomXWPFDocument(pack);
if (param != null && param.size() > 0) {
//处理段落
List paragraphList = doc.getParagraphs();
processParagraphs(paragraphList, param, doc);
//处理表格
Iterator it = doc.getTablesIterator();
while (it.hasNext()) {
XWPFTable table = it.next();
List rows = table.getRows();
for (XWPFTableRow row : rows) {
List cells = row.getTableCells();
for (XWPFTableCell cell : cells) {
List paragraphListTable = cell.getParagraphs();
processParagraphs(paragraphListTable, param, doc);
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
return doc;
}
/**
处理段落
@param paragraphList
/
public static void processParagraphs(List paragraphList,Map
if(paragraphList != null && paragraphList.size() > 0){
for(XWPFParagraph paragraph:paragraphList){
//poi转换过来的行间距过大,需要手动调整
if(paragraph.getSpacingBefore() >= 1000 || paragraph.getSpacingAfter() > 1000) {
paragraph.setSpacingBefore(0);
paragraph.setSpacingAfter(0);
}
//设置word中左右间距
paragraph.setIndentationLeft(0);
paragraph.setIndentationRight(0);
List runs = paragraph.getRuns();
//加了图片,修改了paragraph的runs的size,所以循环不能使用runs
List allRuns = new ArrayList(runs);
for (XWPFRun run : allRuns) {
String text = run.getText(0);
if(text != null){
boolean isSetText = false;
for (Entry
String key = entry.getKey();
if(text.indexOf(key) != -1){
isSetText = true;
Object value = entry.getValue();
if (value instanceof String) {//文本替换
text = text.replace(key, value.toString());
} else if (value instanceof Map) {//图片替换
text = text.replace(key, “”);
Map pic = (Map)value;
int width = Integer.parseInt(pic.get(“width”).toString());
int height = Integer.parseInt(pic.get(“height”).toString());
int picType = getPictureType(pic.get(“type”).toString());
byte[] byteArray = (byte[]) pic.get(“content”);
ByteArrayInputStream byteInputStream = new ByteArrayInputStream(byteArray);
try {
String blipId = doc.addPictureData(byteInputStream,picType);
doc.createPicture(blipId,doc.getNextPicNameNumber(picType), width, height,paragraph);
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
if(isSetText){
run.setText(text,0);
}
}
}
}
}
}
/
根据图片类型,取得对应的图片类型代码
@param picType
@return int
/
private static int getPictureType(String picType){
int res = CustomXWPFDocument.PICTURE_TYPE_PICT;
if(picType != null){
if(picType.equalsIgnoreCase(“png”)){
res = CustomXWPFDocument.PICTURE_TYPE_PNG;
}else if(picType.equalsIgnoreCase(“dib”)){
res = CustomXWPFDocument.PICTURE_TYPE_DIB;
}else if(picType.equalsIgnoreCase(“emf”)){
res = CustomXWPFDocument.PICTURE_TYPE_EMF;
}else if(picType.equalsIgnoreCase(“jpg”) || picType.equalsIgnoreCase(“jpeg”)){
res = CustomXWPFDocument.PICTURE_TYPE_JPEG;
}else if(picType.equalsIgnoreCase(“wmf”)){
res = CustomXWPFDocument.PICTURE_TYPE_WMF;
}
}
return res;
}
/*
将输入流中的数据写入字节数组
@param in
@return
*/
public static byte[] inputStream2ByteArray(InputStream in,boolean isClose){
byte[] byteArray = null;
try {
int total = in.available();
byteArray = new byte[total];
in.read(byteArray);
} catch (IOException e) {
e.printStackTrace();
}finally{
if(isClose){
try {
in.close();
} catch (Exception e2) {
System.out.println(“关闭流失败”);
}
}
}
return byteArray;
}
}
3.工具类
package com.bupticet.education.lab.utils;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlToken;
import org.openxmlformats.schemas.drawingml.x2006.main.CTNonVisualDrawingProps;
import org.openxmlformats.schemas.drawingml.x2006.main.CTPositiveSize2D;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTInline;
/**
自定义 XWPFDocument,并重写 createPicture()方法
*/
public class CustomXWPFDocument extends XWPFDocument {
public CustomXWPFDocument(InputStream in) throws IOException {
super(in);
}
public CustomXWPFDocument() {
super();
}
public CustomXWPFDocument(OPCPackage pkg) throws IOException {
super(pkg);
}
/**
@param ind
@param width 宽
@param height 高
@param paragraph 段落
*/
public void createPicture(String blipId, int ind, int width, int height,XWPFParagraph paragraph) {
final int EMU = 9525;
width *= EMU;
height *= EMU;
CTInline inline = paragraph.createRun().getCTR().addNewDrawing().addNewInline();
String picXml = “”
+ “
+ "
+ "
+ " pic:nvPicPr” + "
+ “” name=“Generated”/>”
+ " pic:cNvPicPr/"
+ " "
+ " pic:blipFill"
+ "
+ “” xmlns:r=“http://schemas.openxmlformats.org/officeDocument/2006/relationships"/>"
+ "
+ "
+ "
+ " "
+ " pic:spPr"
+ "
+ "
+ "
+ “” cy=""
+ height
+ “”/>"
+ "
+ "
+ "
+ "
+ " "
+ "
+ "
inline.addNewGraphic().addNewGraphicData();
XmlToken xmlToken = null;
try {
xmlToken = XmlToken.Factory.parse(picXml);
} catch (XmlException xe) {
xe.printStackTrace();
}
inline.set(xmlToken);
inline.setDistT(0);
inline.setDistB(0);
inline.setDistL(0);
inline.setDistR(0);
CTPositiveSize2D extent = inline.addNewExtent();
extent.setCx(width);
extent.setCy(height);
CTNonVisualDrawingProps docPr = inline.addNewDocPr();
docPr.setId(ind);
docPr.setName(“图片” + ind);
docPr.setDescr(“测试”);
}
}
4.调用
调用时将HTML代码进行拼接,对图片属性进行修改