maven
<dependency>
<groupId>org.springframework.bootgroupId>
<artifactId>spring-boot-starter-webartifactId>
dependency>
<dependency>
<groupId>org.apache.poigroupId>
<artifactId>poiartifactId>
<version>4.1.2version>
dependency>
<dependency>
<groupId>org.apache.poigroupId>
<artifactId>poi-ooxmlartifactId>
<version>4.1.2version>
dependency>
<dependency>
<groupId>org.jsoupgroupId>
<artifactId>jsoupartifactId>
<version>1.13.1version>
dependency>
代码
工具类
import com.example.contant.FontSizeConstant;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.xwpf.usermodel.*;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import javax.servlet.ServletOutputStream;
import javax.servlet.http.HttpServletResponse;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
@Slf4j
public class RichTextParser {
private static final List<String> tagList = Arrays.asList("p", "h1", "h2", "h3", "h4", "h5", "h6", "ol", "ul");
private static final List<String> specialTagList = Arrays.asList("strong", "em", "u", "s", "blockquote", "pre",
"ol", "li", "ul", "sub", "sup");
private static final List<String> listTag = Arrays.asList("ol", "ul");
public static void parseToDocx(String inputHtml, HttpServletResponse response) throws IOException {
log.info("转换前:{}",inputHtml);
inputHtml = inputHtml.replaceAll("\"", "'");
log.info("转换后:{}",inputHtml);
response.setContentType("application/msword");
response.setHeader("Content-Disposition", "attachment; filename=result.docx");
XWPFDocument document = new XWPFDocument();
Document doc = Jsoup.parse(inputHtml);
Elements paragraphs = getAll(new Elements(), doc);
for (Element paragraph : paragraphs) {
XWPFParagraph xwpfParagraph = document.createParagraph();
if (listTag.contains(paragraph.tagName())) {
createListTag(document, paragraph);
} else {
List<Element> list = separateRendering(paragraph);
list.forEach(ele -> {
String text = ele.text();
XWPFRun run = xwpfParagraph.createRun();
run.setText(text);
Element tmp = ele;
if (StringUtils.equals(tmp.tagName(), "p") && tmp.children().size() != 0) {
tmp = tmp.child(0);
}
String style = tmp.attr("style");
if (style != null && !style.isEmpty()) {
String[] styleAttrs = style.split(";");
processCss(styleAttrs, run);
}
processFontsize(ele, run);
processSpecialLabels(ele, run);
});
}
}
OutputStream outputStream = response.getOutputStream();
document.write(outputStream);
outputStream.close();
}
private static void createListTag(XWPFDocument document, Element paragraph) {
XWPFParagraph orderedListParagraph = document.createParagraph();
XWPFRun orderedListRun = orderedListParagraph.createRun();
XWPFNumbering numbering = null;
numbering = document.createNumbering();
Elements children = paragraph.children();
for (Element element : children) {
XWPFParagraph orderedListItemParagraph1 = document.createParagraph();
XWPFRun orderedListItemRun1 = orderedListItemParagraph1.createRun();
orderedListItemRun1.setText(element.text());
orderedListItemParagraph1.setNumID(numbering.addNum(BigInteger.ONE));
// orderedListItemParagraph1.setNumID(
// numbering.addNum(
// StringUtils.equals(paragraph.tagName(), "ul") ? BigInteger.ONE : BigInteger.valueOf(2)
// )
// );
}
}
private static List<Element> separateRendering(Element paragraph) {
Elements children = paragraph.children();
return new ArrayList<Element>() {{
if (children.size() == 0) {
add(paragraph);
} else {
addAll(children);
}
}};
}
private static Elements getAll(Elements elements, Document doc) {
for (Element child : doc.children().get(0).child(1).children()) {
if (tagList.contains(child.tagName())) {
elements.add(child);
}
}
return elements;
}
private static void processCss(String[] styleAttrs, XWPFRun run) {
for (String styleAttr : styleAttrs) {
String[] attrParts = styleAttr.split(":");
if (attrParts.length == 2) {
String attrName = attrParts[0].trim();
String attrValue = attrParts[1].trim();
switch (attrName) {
case "background-color":
run.setColor(getHexColor(attrValue));
break;
case "color":
run.setColor(getHexColor(attrValue));
break;
}
}
}
}
private static String getHexColor(String rgbString) {
String[] rgbValues = rgbString.replaceAll("[^\\d,]", "").split(",");
int red = Integer.parseInt(rgbValues[0].trim());
int green = Integer.parseInt(rgbValues[1].trim());
int blue = Integer.parseInt(rgbValues[2].trim());
String hexRed = Integer.toHexString(red);
String hexGreen = Integer.toHexString(green);
String hexBlue = Integer.toHexString(blue);
return padZero(hexRed) + padZero(hexGreen) + padZero(hexBlue);
}
private static String padZero(String hexValue) {
return hexValue.length() == 1 ? "0" + hexValue : hexValue;
}
private static void processSpecialLabels(Element paragraph, XWPFRun run) {
Elements children = paragraph.children();
if (children.size() == 0) {
doRenderingSpecialLabels(paragraph, run);
return;
}
for (Element e : children) {
doRenderingSpecialLabels(e, run);
processSpecialLabels(e, run);
}
}
private static void processFontsize(Element paragraph, XWPFRun run) {
switch (paragraph.tagName()) {
case "h1":
run.setFontSize(FontSizeConstant.H1_SIZE);
break;
case "h2":
run.setFontSize(FontSizeConstant.H2_SIZE);
break;
case "h3":
run.setFontSize(FontSizeConstant.H3_SIZE);
break;
case "h4":
run.setFontSize(FontSizeConstant.H4_SIZE);
break;
case "h5":
run.setFontSize(FontSizeConstant.H5_SIZE);
break;
case "h6":
run.setFontSize(FontSizeConstant.H6_SIZE);
break;
}
}
private static void doRenderingSpecialLabels(Element tag, XWPFRun run) {
if (specialTagList.contains(tag.tagName())) {
switch (tag.tagName()) {
case "strong":
run.setBold(true);
break;
case "em":
run.setItalic(true);
break;
case "u":
run.setUnderline(UnderlinePatterns.SINGLE);
break;
case "s":
run.setStrikeThrough(true);
break;
case "blockquote":
run.setImprinted(true);
break;
case "pre":
break;
case "sub":
doGenerateSub();
break;
case "sup":
doGenerateSup();
break;
}
}
}
private static void doGenerateSub() {
// todo 底数
}
private static void doGenerateSup() {
// todo 指数
}
常量类
public interface FontSizeConstant {
int H1_SIZE = 14;
int H2_SIZE = 12;
int H3_SIZE = 11;
int H4_SIZE = 10;
int H5_SIZE = 9;
int H6_SIZE = 8;
}