对于文档内容来说,目前支持标题、目录、段落、图片、超链接、表格(支持表格包含图片)的情况
对于文档格式来说,目前代码支持完全按照公文格式导出docx文档,不清楚公文格式的同学请看下图
<!-- poi start -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>1.3</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-full</artifactId>
<version>5.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-examples</artifactId>
<version>4.0.0</version>
</dependency>
<!-- poi end -->
<!-- 处理html依赖 start -->
<dependency>
<groupId>cn.wanghaomiao</groupId>
<artifactId>JsoupXpath</artifactId>
<version>2.3.2</version>
</dependency>
<!-- 处理html依赖 end -->
<!-- 工具包 start -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.12.0</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.47</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.13</version>
</dependency>
<!-- 工具包 end -->
<!-- lombok start -->
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.20</version>
</dependency>
<!-- lombok end -->
/**
* 阿拉伯数字 和 中文汉字 相互转换工具类
*
* @author 明快de玄米61
* @date 2022/9/13 15:40
*/
public class ChineseNumToArabicNumUtil {
static char[] cnArr = new char [] {'零','一','二','三','四','五','六','七','八','九'};
static char[] chArr = new char [] {'零','十','百','千','万','亿'};
static String allChineseNum = "零一二三四五六七八九十百千万亿";
static String allArabicNum = "0123456789";
static String num1 = "一二三四五六七八九";
static String num2 = "十百千万亿";
static String zero = "零";
/**
* 将汉字中的数字转换为阿拉伯数字, 转换纯中文数字,
* @param chineseNum
* @return
*/
public static int chineseNumToArabicNum(String chineseNum) {
int result = 0;
int temp = 1;//存放一个单位的数字如:十万
int count = 0;//判断是否有chArr
for (int i = 0; i < chineseNum.length(); i++) {
boolean b = true;//判断是否是chArr
char c = chineseNum.charAt(i);
for (int j = 0; j < cnArr.length; j++) {
if (c == cnArr[j]) {
if(0 != count){//添加下一个单位之前,先把上一个单位值添加到结果中
result += temp;
temp = 1;
count = 0;
}
// 下标,就是对应的值
temp = j;
b = false;
break;
}
}
if(b){//单位{'十','百','千','万','亿'}
for (int j = 0; j < chArr.length; j++) {
if (c == chArr[j]) {
switch (j) {
case 0:
temp *= 1;
break;
case 1:
temp *= 10;
break;
case 2:
temp *= 100;
break;
case 3:
temp *= 1000;
break;
case 4:
temp *= 10000;
break;
case 5:
temp *= 100000000;
break;
default:
break;
}
count++;
}
}
}
if (i == chineseNum.length() - 1) {//遍历到最后一个字符
result += temp;
}
}
return result;
}
/**
* 将字符串中的中文数字转换阿拉伯数字,其它非数字汉字不替换
* @param chineseNum
* @return
*/
public static String chineseNumToArabicNumTwo(String chineseNum) {
StringBuilder resultStr = new StringBuilder();
int tempresult = 0;
int temp = 1;//存放一个单位的数字如:十万
int count = 0;//判断是否有单位
// 重新将 temp, count, tempresult 设置为初始值
boolean setInitial = false;
// 以十百千万亿结束的在最后加
boolean isAdd = false;
boolean num1flag = false;
boolean num2flag = false;
for (int i = 0; i < chineseNum.length(); i++) {
if (setInitial) {
tempresult = 0;
temp = 1;
count = 0;
setInitial = false;
}
boolean b = true;//判断是否是chArr
char c = chineseNum.charAt(i);
if (allChineseNum.indexOf(c) >= 0) {
if (i < chineseNum.length() - 1 && num1.indexOf(c) >= 0 && num1.indexOf(chineseNum.charAt(i+1)) >= 0) {
num1flag = true;
}
for (int j = 0; j < cnArr.length; j++) {
if (c == cnArr[j]) {
if(0 != count){//添加下一个单位之前,先把上一个单位值添加到结果中
tempresult += temp;
temp = 1;
count = 0;
}
if (!isAdd && (i == chineseNum.length() - 1
|| allChineseNum.indexOf(chineseNum.charAt(i+1)) < 0)) {
tempresult += j;
setInitial = true;
resultStr.append(tempresult);
isAdd = true;
}
// 下标+1,就是对应的值
temp = j;
b = false;
break;
}
}
if (num1flag) {
resultStr.append(temp);
num1flag = false;
setInitial = true;
continue;
}
boolean test = (i < chineseNum.length() - 1 && zero.indexOf(chineseNum.charAt(i+1)) >= 0 )
|| (i >0 && zero.indexOf(chineseNum.charAt(i-1)) >= 0);
if (i < chineseNum.length() - 1 && zero.indexOf(c) >= 0 && test ) {
num2flag = true;
}
if(b){//单位{'十','百','千','万','亿'}
for (int j = 0; j < chArr.length; j++) {
if (c == chArr[j]) {
switch (j) {
case 0:
temp *= 1;
break;
case 1:
temp *= 10;
break;
case 2:
temp *= 100;
break;
case 3:
temp *= 1000;
break;
case 4:
temp *= 10000;
break;
case 5:
temp *= 100000000;
break;
default:
break;
}
count++;
}
}
}
if (num2flag) {
resultStr.append(temp);
num2flag = false;
setInitial = true;
continue;
}
if (!isAdd && (i == chineseNum.length() - 1
|| allChineseNum.indexOf(chineseNum.charAt(i+1)) < 0)) {
tempresult += temp;
setInitial = true;
resultStr.append(tempresult);
isAdd = true;
}
} else {
isAdd = false;
resultStr.append(c);
}
}
return resultStr.toString();
}
/**
* 将数字转换为中文数字, 这里只写到了万
* @param intInput
* @return
*/
public static String arabicNumToChineseNum(int intInput) {
String si = String.valueOf(intInput);
String sd = "";
if (si.length() == 1) {
if (intInput == 0) {
return sd;
}
sd += cnArr[intInput];
return sd;
} else if (si.length() == 2) {
if (si.substring(0, 1).equals("1")) {
sd += "十";
if (intInput % 10 == 0) {
return sd;
}
}
else
sd += (cnArr[intInput / 10] + "十");
sd += arabicNumToChineseNum(intInput % 10);
} else if (si.length() == 3) {
sd += (cnArr[intInput / 100] + "百");
if (String.valueOf(intInput % 100).length() < 2) {
if (intInput % 100 == 0) {
return sd;
}
sd += "零";
}
sd += arabicNumToChineseNum(intInput % 100);
} else if (si.length() == 4) {
sd += (cnArr[intInput / 1000] + "千");
if (String.valueOf(intInput % 1000).length() < 3) {
if (intInput % 1000 == 0) {
return sd;
}
sd += "零";
}
sd += arabicNumToChineseNum(intInput % 1000);
} else if (si.length() == 5) {
sd += (cnArr[intInput / 10000] + "万");
if (String.valueOf(intInput % 10000).length() < 4) {
if (intInput % 10000 == 0) {
return sd;
}
sd += "零";
}
sd += arabicNumToChineseNum(intInput % 10000);
}
return sd;
}
/**
* 判断传入的字符串是否全是汉字数字
* @param chineseStr
* @return
*/
public static boolean isChineseNum(String chineseStr) {
char [] ch = chineseStr.toCharArray();
for (char c : ch) {
if (!allChineseNum.contains(String.valueOf(c))) {
return false;
}
}
return true;
}
/**
* 判断数字字符串是否是整数字符串
* @param str
* @return
*/
public static boolean isNum(String str) {
String reg = "[0-9]+";
return str.matches(reg);
}
}
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.impl.client.DefaultHttpClient;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
/**
* 用于进行Https请求的HttpClient,重写默认支持所有https
*/
public class DefaultSSLUtils extends DefaultHttpClient {
public DefaultSSLUtils() throws Exception{
super();
SSLContext ctx = SSLContext.getInstance("TLS");
X509TrustManager tm = new X509TrustManager() {
@Override
public void checkClientTrusted(X509Certificate[] chain,
String authType) throws CertificateException {
}
@Override
public void checkServerTrusted(X509Certificate[] chain,
String authType) throws CertificateException {
}
@Override
public X509Certificate[] getAcceptedIssuers() {
return null;
}
};
ctx.init(null, new TrustManager[]{tm}, null);
SSLSocketFactory ssf = new SSLSocketFactory(ctx,SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
ClientConnectionManager ccm = this.getConnectionManager();
SchemeRegistry sr = ccm.getSchemeRegistry();
sr.register(new Scheme("https", 443, ssf));
}
}
import org.apache.commons.lang3.StringUtils;
import java.io.File;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.UUID;
/**
* 文件工具类
*
* @author 明快de玄米61
* @date 2022/6/28 16:04
*/
public class FileUtil {
/**
* 创建临时文件
* @author 明快de玄米61
* @date 2022/6/30 16:44
* @param fileName 文件名称
* @return 临时文件
**/
public static File createTempFile(String fileName) {
try {
// 参数判断
if (StringUtils.isBlank(fileName) || fileName.lastIndexOf(".") < 0) {
return null;
}
// 创建临时目录
String tmpdirPath = System.getProperty("java.io.tmpdir");
String dateStr = new SimpleDateFormat("yyyyMMdd").format(new Date());
String uuId = UUID.randomUUID().toString().replaceAll("-", "");
String[] params = {tmpdirPath, dateStr, uuId};
// 结果例如:C:\Users\Administrator\AppData\Local\Temp\20220630\0f774122c38b423793cc1c121611c142
String fileUrl = StringUtils.join( params, File.separator);
if (!new File(fileUrl).exists()) {
new File(fileUrl).mkdirs();
}
// 创建临时文件
File file = new File(fileUrl, fileName);
file.createNewFile();
return file;
} catch (Exception e) {
System.out.println("》》》创建临时文件失败,临时文件名称:" + fileName);
e.printStackTrace();
}
return null;
}
/**
* 删除父级文件 / 目录
* @param sourceFiles 当前文件/目录
*/
public static void deleteParentFile(File... sourceFiles) {
if (sourceFiles != null && sourceFiles.length > 0) {
// 查找父级目录
List<File> parentFiles = new ArrayList<>(sourceFiles.length);
for (File sourceFile : sourceFiles) {
if (sourceFile != null && sourceFile.exists()) {
parentFiles.add(sourceFile.getParentFile());
}
}
// 删除父级目录
deleteFile(parentFiles.toArray(new File[0]));
}
}
/**
* 删除文件 / 目录
* @author 明快de玄米61
* @date 2022/6/28 16:04
* @param sourceFiles 当前文件/目录
**/
public static void deleteFile(File... sourceFiles) {
if (sourceFiles != null && sourceFiles.length > 0) {
for (File sourceFile : sourceFiles) {
try {
// 判断存在性
if (sourceFile == null || !sourceFile.exists()) {
continue;
}
// 判断文件类型
if (sourceFile.isDirectory()) {
// 遍历子级文件 / 目录
File[] childrenFile = sourceFile.listFiles();
if (childrenFile != null && childrenFile.length > 0) {
for (File childFile : childrenFile) {
// 删除子级文件 / 目录
deleteFile(childFile);
}
}
}
// 删除 文件 / 目录 本身
sourceFile.delete();
} catch (Exception e) {
System.out.println("》》》删除文件/目录报错,其中文件/目录全路径:" + sourceFile.getAbsolutePath());
e.printStackTrace();
}
}
}
}
}
import org.apache.commons.lang3.StringUtils;
/**
* 字符串处理工具类
*
* @author 明快de玄米61
* @date 2022/7/22 13:21
*/
public class StringUtil {
/**
* 转换为字符串
* 如果给定的值为null,或者转换失败,返回默认值
* 转换失败不会报错
*
* @param value 被转换的值
* @param defaultValue 转换错误时的默认值
* @return 结果
*/
public static String toStr(Object value, String defaultValue) {
if (null == value) {
return defaultValue;
}
if (value instanceof String) {
return (String) value;
}
return value.toString();
}
/**
* 转换为int
* 如果给定的值为空,或者转换失败,返回默认值
* 转换失败不会报错
*
* @param value 被转换的值
* @param defaultValue 转换错误时的默认值
* @return 结果
*/
public static Integer toInt(Object value, Integer defaultValue) {
if (value == null) {
return defaultValue;
}
if (value instanceof Integer) {
return (Integer) value;
}
if (value instanceof Number) {
return ((Number) value).intValue();
}
final String valueStr = toStr(value, null);
if (StringUtils.isEmpty(valueStr)) {
return defaultValue;
}
try {
return Integer.parseInt(valueStr.trim());
} catch (Exception e) {
return defaultValue;
}
}
}
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import java.io.*;
import java.util.UUID;
public class ImageUtil {
public static File getImgFile(String imgUrl) {
// 创建图片对象
File imgFile = FileUtil.createTempFile(UUID.randomUUID().toString().replaceAll("-", "") + ".jpg");
// 创建client对象
CloseableHttpClient client = null;
try {
client = new DefaultSSLUtils();
} catch (Exception e) {
e.printStackTrace();
}
// 创建response对象
CloseableHttpResponse response = null;
// 获取输入流
InputStream inputStream = null;
// 文件输出流
FileOutputStream out = null;
try {
// 构造一个URL对象
URIBuilder uriBuilder = new URIBuilder(imgUrl);
// 创建http对象
HttpGet httpGet = new HttpGet(uriBuilder.build());
// 处理config设置
RequestConfig requestConfig = RequestConfig.custom().setConnectTimeout(10000).setConnectionRequestTimeout(10000).setSocketTimeout(10000).build();
httpGet.setConfig(requestConfig);
// 执行请求
response = client.execute(httpGet);
// 获取输入流
inputStream = response.getEntity().getContent();
// 以流的方式输出图片
out = new FileOutputStream(imgFile);
byte[] arr = new byte[1024];
int len = 0;
while ((len = inputStream.read(arr)) != -1) {
out.write(arr, 0, len);
}
out.flush();
} catch (Exception e) {
e.printStackTrace();
} finally {
// 回收资源
close(client, response, inputStream, out);
}
return imgFile;
}
/**
* 关闭资源
*
* @param closeables 资源列表
**/
private static void close(Closeable... closeables) {
for (Closeable closeable : closeables) {
if (closeable != null) {
try {
closeable.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.Units;
import org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy;
import org.apache.poi.xwpf.usermodel.*;
import org.apache.poi.xwpf.usermodel.XWPFTableCell.XWPFVertAlign;
import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.impl.xb.xmlschema.SpaceAttribute;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.*;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class WordUtil {
// 单位 start
private static final int PER_LINE = 100;
//每个字符的单位长度
private static final int PER_CHART = 100;
//1厘米≈567
private static final int PER_CM = 567;
//每一磅的单位长度
private static final int PER_POUND = 20;
//行距单位长度
private static final int ONE_LINE = 240;
// 单位 end
// 页边距 start
// 上页边距(单位:cm)
private static final double TOP = 3.7;
// 上页边距(单位:cm)
private static final double BOTTOM = 3.5;
// 上页边距(单位:cm)
private static final double LEFT = 2.8;
// 上页边距(单位:cm)
private static final double RIGHT = 2.6;
// 页边距 end
// 页码样式 start
// 字体
private static final String PAGE_FONT_FAMILY = "宋体";
// 字号
private static final Integer PAGE_FONT_SIZE = 14;
// 页码样式 end
// 正文样式 start
// 字体
private static final String PARA_FONT_FAMILY = "仿宋_GB2312";
// 字号
private static final Integer PARA_FONT_SIZE = 16;
// 行距(单位:磅)
private static final double PARA_ROW_SPACE = 28.95;
// 正文样式 end
// 标题最大级别
private static final Integer MAX_HEADING_LEVEL = 9;
// 标题样式前置标识
private static final String HEADING_STYLE_FRONT_SIGN = "标题";
// 最大正文图片宽度
private static final Integer MAX_PAGE_IMG_WIDTH = 350;
// 最大表格图片宽度
private static final Integer MAX_TABLE_IMG_WIDTH = 200;
// 表格最大宽度(单位:cm)
private static final double TABLE_WIDTH = 16.19;
// 单元格边距(单位:磅)
private static final double CELL_MARGIN = 5.67;
/** 标题样式集合 **/
private static List<HeadingStyle> headingStyleList = new ArrayList<>();
/** 定义标题格式 **/
static {
// 处理前四级标题样式
HeadingStyle one = new HeadingStyle(16, "黑体", true);
HeadingStyle two = new HeadingStyle(16, "楷体", true);
HeadingStyle three = new HeadingStyle(16, "仿宋", true);
HeadingStyle four = new HeadingStyle(16, "仿宋", true);
headingStyleList.add(one);
headingStyleList.add(two);
headingStyleList.add(three);
headingStyleList.add(four);
// 处理四级以下的标题样式(注意:由于公文格式中未指定样式,所以采用默认格式)
for (Integer i = 5; i <= MAX_HEADING_LEVEL; i++) {
HeadingStyle headingStyle = new HeadingStyle(16, "仿宋", true);
headingStyleList.add(headingStyle);
}
}
/**
* 生成docx文档文件
* @author 明快de玄米61
* @date 2022/9/13 16:37
* @param document XWPFDocument对象
* @param docxFile docx文档文件
* @return docx文档文件
**/
public static void generateDocxFile(XWPFDocument document, File docxFile) {
try {
document.write(new FileOutputStream(docxFile));
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 初始化XWPFDocument
* @author 明快de玄米61
* @date 2022/9/13 10:40
**/
public static XWPFDocument initXWPFDocument() {
XWPFDocument document = new XWPFDocument();
// 初始化页边距
initPageMargin(document);
// 初始化页脚
initFooter(document, PAGE_FONT_FAMILY, PAGE_FONT_SIZE, "000000", null, " ");
// 初始化标题级别
initHeadingStyle(document);
return document;
}
/**
* 处理文章中间标题
* @author 明快de玄米61
* @date 2022/9/13 15:23
* @param document XWPFDocument对象
* @param text 标题文本
**/
public static void dealDocxTitle(XWPFDocument document, String text) {
XWPFParagraph paragraph = document.createParagraph();
paragraph.setSpacingAfterLines((int) (100));
paragraph.setAlignment(ParagraphAlignment.CENTER);
paragraph.setVerticalAlignment(TextAlignment.TOP);
XWPFRun xwpfRun = paragraph.createRun();
xwpfRun.setBold(true);
xwpfRun.setFontSize((int) (22));
xwpfRun.setFontFamily("方正小标宋简体");
xwpfRun.setText(text);
}
/**
* 处理标题
* @author 明快de玄米61
* @date 2022/9/13 15:18
* @param document XWPFDocument对象
* @param level 级别
* @param sort 同级排序
* @param text 标题文本
* @param odfDealFlag 公文格式转换标志
**/
public static void dealHeading(XWPFDocument document, int level, int sort, String text, boolean odfDealFlag) {
// 如果标题级别超标,那么使用最后一个标题的样式
if (level > headingStyleList.size()) {
level = headingStyleList.size();
}
HeadingStyle headingStyle = headingStyleList.get(level - 1);
XWPFParagraph paragraph = document.createParagraph();
paragraph.setStyle(getHeadingStyle(level));
// 设置间距
setPSpacing(paragraph, 1, 1);
XWPFRun xwpfRun = paragraph.createRun();
xwpfRun.setBold(headingStyle.isBold());
xwpfRun.setFontSize((int) (headingStyle.getFontSize()));
xwpfRun.setFontFamily(headingStyle.getFontFamily());
xwpfRun.setText(getHeadingTextByODF(level, sort, text, odfDealFlag));
}
/**
* 根据公文格式获取标题(注意:ODF:公文格式Official document format)
* @author 明快de玄米61
* @date 2022/9/13 15:59
* @param level 标题级别
* @param sort 同级序号
* @param text 原始文本
* @return 按照公文格式处理之后的文本
* @param odfDealFlag 是否进行公文格式转换
**/
private static String getHeadingTextByODF(int level, int sort, String text, boolean odfDealFlag) {
if (!odfDealFlag) {
return text;
}
// 处理前面四级标题
String prefix = null;
switch (level) {
case 1:
prefix = ChineseNumToArabicNumUtil.arabicNumToChineseNum(sort) + "、";
break;
case 2:
prefix = "(" + ChineseNumToArabicNumUtil.arabicNumToChineseNum(sort) + ")";
break;
case 3:
prefix = sort + ".";
break;
case 4:
prefix = "(" + sort + ")";
break;
}
// 判断前四级标题是否已经包含前缀
if (prefix != null) {
text = text.startsWith(prefix) ? text : prefix + text;
}
// 四级以下标题不在处理
return text;
}
/**
* 获取标题样式标志
* @author 明快de玄米61
* @date 2022/9/13 15:18
* @param level 级别
* @return 标题样式标志
**/
private static String getHeadingStyle(int level) {
return HEADING_STYLE_FRONT_SIGN + level;
}
/**
* 处理正文内容
* @author 明快de玄米61
* @date 2022/9/13 11:45
* @param document
* @return
**/
public static void dealHtmlContent(XWPFDocument document, String html) {
// 判断html是否为空
if (StringUtils.isEmpty(html)) {
return;
}
// 去除特殊字符
html = dealSpecialCharacters(html);
// 处理不存在p标签的情况(说明:如果直接复制一句话到另外一个输入框,那么就不会生成p标签)
List<String> extractResultList = getExtractResultList(html);
// 处理html
for (String content : extractResultList) {
Pattern tablePattern = Pattern.compile("" );
Pattern hPattern = Pattern.compile("" );
Pattern imgPattern = Pattern.compile(" ");
Pattern aPattern = Pattern.compile("" );
// 表格(说明:复用采集1.0中处理表格的代码)
if (tablePattern.matcher(content).find()) {
// 处理tbody,适配采集1.0中导出表格的代码
Pattern pattern = Pattern.compile("" );
Matcher matcher = pattern.matcher(content);
String tbody = null;
while (matcher.find()) {
tbody = matcher.group();
}
// 按照采集1.0中要求进行数据封装
String table = "" + tbody + "
";
table = table.replace(", ").replace(", " ).replace(", ").replace(", " );
table = table.replace(" , " ).replace(", " );
Document word = Jsoup.parse(table);
// 使用采集1.0中处理表格的工具类代码
parseTableElement(word, document);
}
// 标题
else if (hPattern.matcher(content).find()) {
XWPFParagraph paragraph = createP(document);
// 设置对齐、缩进
setHAttr(paragraph, content);
// 设置行距
setPRowSpacing(paragraph, PARA_ROW_SPACE);
dealHText(paragraph, content);
}
// 图片
else if (imgPattern.matcher(content).find()) {
XWPFParagraph paragraph = createP(document);
// 设置对齐、缩进
setPAttr(paragraph, content);
// 设置段前段后间距
setPSpacing(paragraph, 1, 1);
dealImg(paragraph, content, PARA_FONT_FAMILY, PARA_FONT_SIZE);
}
// 超链接
else if (aPattern.matcher(content).find()) {
XWPFParagraph paragraph = createP(document);
// 设置对齐、缩进
setPAttr(paragraph, content);
// 设置行距
setPRowSpacing(paragraph, PARA_ROW_SPACE);
dealLink(paragraph, content, PARA_FONT_FAMILY, PARA_FONT_SIZE);
}
// 纯文本
else {
XWPFParagraph paragraph = createP(document);
// 设置对齐、缩进
setPAttr(paragraph, content);
// 设置行距
setPRowSpacing(paragraph, PARA_ROW_SPACE);
String text = Jsoup.parse(content).text();
dealPText(paragraph, text, PARA_FONT_FAMILY, PARA_FONT_SIZE);
}
}
}
private static String dealSpecialCharacters(String html) {
return html.replaceAll("[\r|\n|\b]", "");
}
private static void dealHText(XWPFParagraph paragraph, String content) {
// 处理字体大小
Integer hNum = getHNum(content);
Map<Integer, String> familyMap = new HashMap<>();
familyMap.put(1, "黑体");
familyMap.put(2, "楷体");
familyMap.put(3, "仿宋");
familyMap.put(4, "仿宋");
familyMap.put(5, "仿宋");
familyMap.put(6, "仿宋");
String family = StringUtil.toStr(familyMap.get(hNum), PARA_FONT_FAMILY);
Map<Integer, Integer> fontSizeMap = new HashMap<>();
// delete start by 明快de玄米61 time 2022/9/13 reason 标题太大,暂时删除
// fontSizeMap.put(1, 32);
// fontSizeMap.put(2, 24);
// fontSizeMap.put(3, 19);
// fontSizeMap.put(4, 16);
// fontSizeMap.put(5, 14);
// fontSizeMap.put(6, 13);
// delete end by 明快de玄米61 time 2022/9/13 reason 标题太大,暂时删除
// add start by 明快de玄米61 time 2022/9/13 reason 标题太大,字号暂时使用16
fontSizeMap.put(1, 16);
fontSizeMap.put(2, 16);
fontSizeMap.put(3, 16);
fontSizeMap.put(4, 16);
fontSizeMap.put(5, 16);
fontSizeMap.put(6, 16);
// add end by 明快de玄米61 time 2022/9/13 reason 标题太大,字号暂时使用16
Integer fontSize = StringUtil.toInt(fontSizeMap.get(hNum), PARA_FONT_SIZE);
// 按照链接形式处理
dealLink(paragraph, content, family ,fontSize);
}
private static Map<String, String> getFirstLabelStyle(String content, String labelName) {
Pattern pattern = Pattern.compile(String.format("<%s.*?style=\"(.*?)\".*?>", labelName));
Matcher matcher = pattern.matcher(content);
String style = null;
while (matcher.find()) {
style = matcher.group(1).trim();
}
Map<String, String> attrMap = new HashMap<>();
if (style != null) {
String[] attrArr = style.split(";");
for (String attr : attrArr) {
String[] keyAndValue = attr.split(":");
attrMap.put(keyAndValue[0].trim(), keyAndValue[1].trim());
}
}
return attrMap;
}
private static void setPAttr(XWPFParagraph paragraph, String content) {
Map<String, String> attrMap = getFirstLabelStyle(content, "p");
setPStyle(paragraph, attrMap);
}
private static void setHAttr(XWPFParagraph paragraph, String content) {
Map<String, String> attrMap = getFirstLabelStyle(content, "h[1-9]{1}");
// 处理style
setPStyle(paragraph, attrMap);
}
private static Integer getHNum(String content) {
Pattern pattern = Pattern.compile(");
Matcher matcher = pattern.matcher(content);
Integer num = null;
while (matcher.find()) {
num = Integer.valueOf(matcher.group(1));
break;
}
return num;
}
private static void setPStyle(XWPFParagraph paragraph, Map<String, String> attrMap) {
String align = attrMap.get("text-align");
String indent = attrMap.get("text-indent");
// 对齐方式
if (align != null) {
switch (align.toLowerCase()) {
case "left":
paragraph.setAlignment(ParagraphAlignment.LEFT);
break;
case "center":
paragraph.setAlignment(ParagraphAlignment.CENTER);
break;
case "right":
paragraph.setAlignment(ParagraphAlignment.RIGHT);
break;
case "justify":
paragraph.setAlignment(ParagraphAlignment.BOTH);
}
} else {
paragraph.setAlignment(ParagraphAlignment.LEFT);
}
// 缩进
if (indent != null) {
if (indent.contains("em")) {
setTextIndent(paragraph, Integer.valueOf(indent.replaceAll("em", "")));
}
}
}
private static XWPFParagraph createP(XWPFDocument document) {
// 正文1
XWPFParagraph paragraph = document.createParagraph();
return paragraph;
}
private static void dealPImg(XWPFParagraph paragraph, String src, Integer width) {
File imgFile = ImageUtil.getImgFile(src);
try {
writeImage(paragraph, imgFile.getAbsolutePath(), width);
} catch (Exception e) {
e.printStackTrace();
} finally {
FileUtil.deleteParentFile(imgFile);
}
}
private static void dealPText(XWPFParagraph paragraph, String text, String family, Integer fontSize) {
XWPFRun firstRun = paragraph.createRun();
// 设置字体和字号
setTextFontFamilyAndFontSize(firstRun, StringUtil.toStr(family, PARA_FONT_FAMILY), StringUtil.toInt(fontSize, 16));
// 设置文本
firstRun.setText(reverseEscapeChar(text));
}
private static void dealPLink(XWPFParagraph paragraph, String html, String href, String family, Integer fontSize) {
String name = Jsoup.parse(html).text();
String id = paragraph
.getDocument()
.getPackagePart()
.addExternalRelationship(href,
XWPFRelation.HYPERLINK.getRelation()).getId();
CTHyperlink cLink = paragraph.getCTP().addNewHyperlink();
cLink.setId(id);
// 创建链接文本
CTText ctText1 = CTText.Factory.newInstance();
ctText1.setStringValue(name);
CTR ctr = CTR.Factory.newInstance();
CTRPr rpr = ctr.addNewRPr();
//设置超链接样式
CTColor color = CTColor.Factory.newInstance();
color.setVal("0000FF");
rpr.setColor(color);
rpr.addNewU().setVal(STUnderline.SINGLE);
//设置字体
CTFonts fonts = rpr.isSetRFonts() ? rpr.getRFonts() : rpr.addNewRFonts();
fonts.setAscii(StringUtil.toStr(family, PARA_FONT_FAMILY));
fonts.setEastAsia(StringUtil.toStr(family, PARA_FONT_FAMILY));
fonts.setHAnsi(StringUtil.toStr(family, PARA_FONT_FAMILY));
//设置字体大小
CTHpsMeasure sz = rpr.isSetSz() ? rpr.getSz() : rpr.addNewSz();
sz.setVal(new BigInteger(StringUtil.toStr(fontSize * 2, "32")));
ctr.setTArray(new CTText[] { ctText1 });
// Insert the linked html into the link
cLink.setRArray(new CTR[] { ctr });
}
private static void dealLink(XWPFParagraph paragraph, String html, String family, Integer fontSize) {
List<LinkInfo> linkList=new ArrayList<>();
Pattern pattern = Pattern.compile("((.*)" );
Matcher matcher=pattern.matcher(html);
while(matcher.find()) {
linkList.add(new LinkInfo(matcher.start(), matcher.end(), matcher.group(2), matcher.group(3)));
}
if (linkList.size() > 0) {
for (int i = 0; i < linkList.size(); i++) {
// 当前
LinkInfo current = linkList.get(i);
// 处理头部
if (i == 0 && current.getStart() > 0) {
String text = Jsoup.parse(html.substring(0, current.getStart())).text();
dealPText(paragraph, text, family, fontSize);
}
// 处理自身
dealPLink(paragraph, current.getHtml(), current.getHref(), family, fontSize);
// 处理中间
if (i > 0 && i < linkList.size() - 1) {
// 下一个
LinkInfo next = linkList.get(i+1);
if (current.getEnd() < next.getStart()) {
String text = Jsoup.parse(html.substring(current.getEnd() + 1, next.getStart())).text();
dealPText(paragraph, text, family, fontSize);
}
}
// 处理尾部
if (i == linkList.size() - 1 && current.getEnd() < html.length()) {
String text = Jsoup.parse(html.substring(current.getEnd())).text();
dealPText(paragraph, text, family, fontSize);
}
}
} else {
// TODO 处理段落
String text = Jsoup.parse(html).text();
dealPText(paragraph, text, family, fontSize);
}
}
private static void dealImg(XWPFParagraph paragraph, String html, String fontFamily, int fontSize) {
Pattern pattern = Pattern.compile("(.*?)( )(.*?)" );
Matcher matcher = pattern.matcher(html);
while (matcher.find()) {
dealLink(paragraph, matcher.group(1), fontFamily, fontSize);
String src = getAttrByImg(matcher.group(2), "src");
String width = getAttrByImg(matcher.group(2), "width");
dealPImg(paragraph, src, StringUtils.isEmpty(width) ? null : Integer.valueOf(width));
dealLink(paragraph, matcher.group(3), fontFamily, fontSize);
}
}
private static String getAttrByImg(String html, String attrName) {
if (html == null) {
return null;
}
Document document = Jsoup.parse(html);
Elements img = document.getElementsByTag("img");
return img.get(0).attr(attrName);
}
/**
* 将表格、标题、文本抽取出来
* @author 明快de玄米61
* @date 2022/9/9 9:45
* @param html html代码
* @return 抽取结果列表
**/
private static List<String> getExtractResultList(String html) {
List<String> result = new ArrayList<>();
// 抽取表格
List<TableInfo> tableList=new ArrayList<TableInfo>();
Pattern pt=Pattern.compile("" );
Matcher mt=pt.matcher(html);
while(mt.find()) {
tableList.add(new TableInfo(mt.start(), mt.end(), mt.group()));
}
if (tableList.size() > 0) {
for (int i = 0; i < tableList.size(); i++) {
// 当前
TableInfo current = tableList.get(i);
// 处理第一次表格之前的内容
if (i == 0 && current.getStart() > 0) {
dealPAndHLabel(html.substring(0, current.getStart()), result);
}
// 处理两个表格相连的情况
dealTableConnect(result);
// 处理表格中单元格空白导致转换pdf报错问题
String currentHtml = dealTableTdBlank(current.getHtml());
// 处理表格内容
result.add(currentHtml);
// 处理表格后面的内容(注意:不处理最后一个表格之后的内容)
if (i < tableList.size() - 1) {
// 下一个
TableInfo next = tableList.get(i+1);
if (current.getEnd() < next.getStart()) {
dealPAndHLabel(html.substring(current.getEnd(), next.getStart()), result);
}
}
// 处理表格后面的内容(注意:只处理最后一个表格之后的内容)
if (i == tableList.size() - 1 && current.getEnd() < html.length()) {
dealPAndHLabel(html.substring(current.getEnd()), result);
}
}
} else {
dealPAndHLabel(html, result);
}
return result;
}
private static String dealImgNotWrapByP(String html) {
List<LabelInfo> labelInfos = new ArrayList<>();
String regex = "" ;
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(html);
while (matcher.find()) {
labelInfos.add(new LabelInfo(matcher.start(), matcher.end(), matcher.group()));
}
if (labelInfos.size() > 0) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < labelInfos.size(); i++) {
// 当前
LabelInfo current = labelInfos.get(i);
// 处理第一次表格之前的内容
if (i == 0 && current.getStart() > 0) {
usePWrapImg(sb, html.substring(0, current.getStart()));
}
// 处理表格内容
sb.append(current.getHtml());
// 处理表格后面的内容(注意:不处理最后一个表格之后的内容)
if (i < labelInfos.size() - 1) {
// 下一个
LabelInfo next = labelInfos.get(i + 1);
if (current.getEnd() < next.getStart()) {
usePWrapImg(sb, html.substring(current.getEnd(), next.getStart()));
}
}
// 处理表格后面的内容(注意:只处理最后一个表格之后的内容)
if (i == labelInfos.size() - 1 && current.getEnd() < html.length()) {
usePWrapImg(sb, html.substring(current.getEnd()));
}
}
html = sb.toString();
} else {
StringBuilder sb = new StringBuilder();
usePWrapImg(sb, html);
html = sb.toString();
}
return html;
}
private static void usePWrapImg(StringBuilder sb, String html) {
Pattern pattern = Pattern.compile(" ");
String s = pattern.matcher(html).replaceAll("$0
");
sb.append(s);
}
/**
* 使用p标签包括其他标签
* @author 明快de玄米61
* @date 2022/9/16 1:13
* @param html html标签
* @return 处理之后的html标签
**/
private static String usePWrapLabel(String html) {
boolean empty = StringUtils.isEmpty(Jsoup.parse(html).text());
if (!empty) {
return ""
+ html + "";
}
return null;
}
private static String dealTableTdBlank(String html) {
Pattern pattern = Pattern.compile("().*?" );
Matcher matcher = pattern.matcher(html);
Map<String, String> map = new HashMap<>();
while (matcher.find()) {
String oldTd = matcher.group();
if (StringUtils.isEmpty(Jsoup.parse(oldTd).text())) {
String group1 = matcher.group(1);
String newTd = oldTd.replaceFirst(group1, group1 + " ");
map.put(oldTd, newTd);
}
}
for (Map.Entry<String, String> entry : map.entrySet()) {
String key = entry.getKey();
String value = entry.getValue();
html = html.replaceAll(key, value);
}
return html;
}
private static void dealTableConnect(List<String> result) {
if (result.size() > 0) {
String last = result.get(result.size() - 1);
boolean matches = last.matches("" );
if (matches) {
result.add("");
}
}
}
/**
* 处理p标签和h标签
* @author 明快de玄米61
* @date 2022/9/9 1:11
* @param html html标签
* @param result p标签和h标签集合
* @return
**/
private static void dealPAndHLabel(String html, List<String> result) {
// 处理img标签没有被p标签包裹的情况
html = dealImgNotWrapByP(html);
// 处理p标签和h标签
List<LabelInfo> labelInfos = new ArrayList<>();
Pattern pattern = Pattern.compile("(|)"
, Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(html);
while (matcher.find()) {
labelInfos.add(new LabelInfo(matcher.start(), matcher.end(), matcher.group()));
}
if (labelInfos.size() > 0) {
for (int i = 0; i < labelInfos.size(); i++) {
// 当前
LabelInfo current = labelInfos.get(i);
// 处理第一次之前的内容
if (i == 0 && current.getStart() > 0) {
String s = usePWrapLabel(html.substring(0, current.getStart()));
if (s != null) {
result.add(s);
}
}
// 处理内容
result.add(current.getHtml());
// 处理后面的内容(注意:不处理最后一个之后的内容)
if (i < labelInfos.size() - 1) {
// 下一个
LabelInfo next = labelInfos.get(i + 1);
if (current.getEnd() < next.getStart()) {
String s = usePWrapLabel(html.substring(current.getEnd(), next.getStart()));
if (s != null) {
result.add(s);
}
}
}
// 处理后面的内容(注意:只处理最后一个之后的内容)
if (i == labelInfos.size() - 1 && current.getEnd() < html.length()) {
String s = usePWrapLabel(html.substring(current.getEnd()));
if (s != null) {
result.add(s);
}
}
}
} else {
String s = usePWrapLabel(html);
if (s != null) {
result.add(s);
}
}
}
/**
* 反转义特殊字符
* @author 明快de玄米61
* @date 2022/9/9 1:11
* @param
* @return
**/
private static String reverseEscapeChar(String text) {
return text.replaceAll(" ", " ").replaceAll("<", "<").replaceAll(">", ">").replaceAll("&", "&");
}
/**
* 设置首行缩进
* @author 明快de玄米61
* @date 2022/9/9 9:47
* @param
* @return
**/
private static void setTextIndent(XWPFParagraph paragraph, int num) {
CTPPr pPr = getPrOfParagraph(paragraph);
CTInd pInd = pPr.getInd() != null ? pPr.getInd() : pPr.addNewInd();
pInd.setFirstLineChars(BigInteger.valueOf((long) (num * PER_CHART)));
}
private static CTPPr getPrOfParagraph(XWPFParagraph paragraph) {
CTP ctp = paragraph.getCTP();
CTPPr pPr = ctp.isSetPPr() ? ctp.getPPr() : ctp.addNewPPr();
return pPr;
}
//设置行距
private static void setPRowSpacing(XWPFParagraph titleParagraph, double rowSpace) {
CTP ctp = titleParagraph.getCTP();
CTPPr ppr = ctp.isSetPPr() ? ctp.getPPr() : ctp.addNewPPr();
CTSpacing spacing = ppr.isSetSpacing() ? ppr.getSpacing() : ppr.addNewSpacing();
spacing.setAfter(BigInteger.valueOf(0));
spacing.setBefore(BigInteger.valueOf(0));
//设置行距类型为 EXACT
spacing.setLineRule(STLineSpacingRule.EXACT);
//1磅数是20
spacing.setLine(BigInteger.valueOf((int) (rowSpace * PER_POUND)));
}
//设置间距
private static void setPSpacing(XWPFParagraph paragraph, Integer before, Integer after) {
CTP ctp = paragraph.getCTP();
CTPPr ppr = ctp.isSetPPr() ? ctp.getPPr() : ctp.addNewPPr();
CTSpacing spacing = ppr.isSetSpacing() ? ppr.getSpacing() : ppr.addNewSpacing();
spacing.setAfter(BigInteger.valueOf(before * ONE_LINE));
spacing.setBefore(BigInteger.valueOf(after * ONE_LINE));
//设置行距类型为 EXACT
spacing.setLineRule(STLineSpacingRule.EXACT);
}
//设置字体字号
private static void setTextFontFamilyAndFontSize(XWPFRun xwpfRun, String fontFamily, int fontSize) {
xwpfRun.setFontFamily(fontFamily);
xwpfRun.setFontSize(fontSize);
}
/**
* 初始化页码
* @author 明快de玄米61
* @date 2022/9/13 11:30
* @param document XWPFDocument对象
**/
private static void initFooter(XWPFDocument document, String fontFamily, int fontSize, String color, String prefix, String suffix) {
// delete start by 明快de玄米61 time 2022/9/30 reason 使用其他方案解决页码字体不是宋体的问题
// // 创建页脚对象
// XWPFFooter footer = document.createFooter(HeaderFooterType.DEFAULT);
// XWPFParagraph paragraph = footer.createParagraph();
// // 水平右对齐
// paragraph.setAlignment(ParagraphAlignment.RIGHT);
// // 垂直居中
// paragraph.setVerticalAlignment(TextAlignment.CENTER);
//
// // 处理前缀
// setStyle(paragraph.createRun(), fontFamily, fontSize, false, color, prefix);
//
// // 处理页码
// CTFldChar fldChar = paragraph.createRun().getCTR().addNewFldChar();
// fldChar.setFldCharType(STFldCharType.Enum.forString("begin"));
// XWPFRun numberRun = paragraph.createRun();
// CTText ctText = numberRun.getCTR().addNewInstrText();
// ctText.setStringValue("PAGE \\* MERGEFORMAT");
// ctText.setSpace(SpaceAttribute.Space.Enum.forString("preserve"));
// setStyle(numberRun, fontFamily, fontSize, false, color, null);
// fldChar = paragraph.createRun().getCTR().addNewFldChar();
// fldChar.setFldCharType(STFldCharType.Enum.forString("end"));
//
// // 处理后缀
// setStyle(paragraph.createRun(), fontFamily, fontSize, false, color, suffix);
// delete end by 明快de玄米61 time 2022/9/30 reason 使用其他方案解决页码字体不是宋体的问题
// add start by 明快de玄米61 time 2022/9/30 reason 使用下面方案解决页码字体不是宋体的问题
// 创建页码对象
CTP pageNo = CTP.Factory.newInstance();
// 创建段落对象
XWPFParagraph paragraph = new XWPFParagraph(pageNo, document);
// 水平右对齐
paragraph.setAlignment(ParagraphAlignment.RIGHT);
// 垂直居中
paragraph.setVerticalAlignment(TextAlignment.CENTER);
// 处理前缀
setStyle(paragraph.createRun(), fontFamily, fontSize, false, color, prefix);
// 处理页码
int doubleFontSize = fontSize * 2;
CTPPr begin = pageNo.addNewPPr();
begin.addNewPStyle().setVal("style21");
begin.addNewJc().setVal(STJc.RIGHT);
CTR pageBegin=pageNo.addNewR();
pageBegin.addNewRPr().addNewRFonts().setAscii(fontFamily);
pageBegin.addNewRPr().addNewRFonts().setCs(fontFamily);
pageBegin.addNewRPr().addNewRFonts().setEastAsia(fontFamily);
pageBegin.addNewRPr().addNewRFonts().setHAnsi(fontFamily);
pageBegin.addNewRPr().addNewSz().setVal(BigInteger.valueOf(doubleFontSize));
pageBegin.addNewRPr().addNewSzCs().setVal(BigInteger.valueOf(doubleFontSize));
pageBegin.addNewFldChar().setFldCharType(STFldCharType.BEGIN);
CTR page=pageNo.addNewR();
page.addNewRPr().addNewRFonts().setAscii(fontFamily);
page.addNewRPr().addNewRFonts().setCs(fontFamily);
page.addNewRPr().addNewRFonts().setEastAsia(fontFamily);
page.addNewRPr().addNewRFonts().setHAnsi(fontFamily);
page.addNewRPr().addNewSz().setVal(BigInteger.valueOf(doubleFontSize));
page.addNewRPr().addNewSzCs().setVal(BigInteger.valueOf(doubleFontSize));
page.addNewInstrText().setStringValue("PAGE \\* MERGEFORMAT");
CTR pageSep=pageNo.addNewR();
pageSep.addNewRPr().addNewRFonts().setAscii(fontFamily);
pageSep.addNewRPr().addNewRFonts().setCs(fontFamily);
pageSep.addNewRPr().addNewRFonts().setEastAsia(fontFamily);
pageSep.addNewRPr().addNewRFonts().setHAnsi(fontFamily);
pageSep.addNewRPr().addNewSz().setVal(BigInteger.valueOf(doubleFontSize));
pageSep.addNewRPr().addNewSzCs().setVal(BigInteger.valueOf(doubleFontSize));
pageSep.addNewFldChar().setFldCharType(STFldCharType.SEPARATE);
CTR end = pageNo.addNewR();
CTRPr endRPr = end.addNewRPr();
endRPr.addNewNoProof();
endRPr.addNewLang().setVal("zh-CN");
end.addNewRPr().addNewRFonts().setAscii(fontFamily);
end.addNewRPr().addNewRFonts().setCs(fontFamily);
end.addNewRPr().addNewRFonts().setEastAsia(fontFamily);
end.addNewRPr().addNewRFonts().setHAnsi(fontFamily);
end.addNewRPr().addNewSz().setVal(BigInteger.valueOf(doubleFontSize));
end.addNewRPr().addNewSzCs().setVal(BigInteger.valueOf(doubleFontSize));
end.addNewFldChar().setFldCharType(STFldCharType.END);
// 处理后缀
setStyle(paragraph.createRun(), fontFamily, fontSize, false, color, suffix);
// 不太明白含义,但是不添加就无法生成页码
CTSectPr sectPr = document.getDocument().getBody().isSetSectPr() ? document.getDocument().getBody().getSectPr() : document.getDocument().getBody().addNewSectPr();
XWPFHeaderFooterPolicy policy = new XWPFHeaderFooterPolicy(document, sectPr);
policy.createFooter(STHdrFtr.DEFAULT, new XWPFParagraph[] { paragraph });
// add end by 明快de玄米61 time 2022/9/30 reason 使用下面方案解决页码字体不是宋体的问题
}
private static void setStyle(XWPFRun run, String fontFamily, int fontSize, boolean bold, String color, String text) {
run.setBold(bold);
run.setFontFamily(fontFamily);
run.setFontSize(fontSize);
if(!StringUtils.isEmpty(text)){
run.setText(text);
}
run.setColor(StringUtils.isEmpty(color) ? "000000" : color);
}
/**
* 初始化页边距
* @author 明快de玄米61
* @date 2022/9/13 11:01
* @param document XWPFDocument对象
**/
private static void initHeadingStyle(XWPFDocument document) {
for (int i = 1; i <= MAX_HEADING_LEVEL; i++) {
String heading = getHeadingStyle(i);
createHeadingStyle(document, heading, i);
}
}
/**
* 初始化页边距
* @author 明快de玄米61
* @date 2022/9/13 11:01
* @param document XWPFDocument对象
**/
private static void initPageMargin(XWPFDocument document) {
CTSectPr sectPr = document.getDocument().getBody().addNewSectPr();
CTPageMar ctpagemar = sectPr.addNewPgMar();
ctpagemar.setTop(new BigInteger(String.valueOf(Math.round(TOP * PER_CM))));
ctpagemar.setBottom(new BigInteger(String.valueOf(Math.round(BOTTOM * PER_CM))));
ctpagemar.setLeft(new BigInteger(String.valueOf(Math.round(LEFT * PER_CM))));
ctpagemar.setRight(new BigInteger(String.valueOf(Math.round(RIGHT * PER_CM))));
}
private static void createHeadingStyle(XWPFDocument doc, String strStyleId, int headingLevel) {
//创建样式
CTStyle ctStyle = CTStyle.Factory.newInstance();
//设置id
ctStyle.setStyleId(strStyleId);
CTString styleName = CTString.Factory.newInstance();
styleName.setVal(strStyleId);
ctStyle.setName(styleName);
CTDecimalNumber indentNumber = CTDecimalNumber.Factory.newInstance();
indentNumber.setVal(BigInteger.valueOf(headingLevel));
// 数字越低在格式栏中越突出
ctStyle.setUiPriority(indentNumber);
CTOnOff onoffnull = CTOnOff.Factory.newInstance();
ctStyle.setUnhideWhenUsed(onoffnull);
// 样式将显示在“格式”栏中
ctStyle.setQFormat(onoffnull);
// 样式定义给定级别的标题
CTPPr ppr = CTPPr.Factory.newInstance();
ppr.setOutlineLvl(indentNumber);
ctStyle.setPPr(ppr);
XWPFStyle style = new XWPFStyle(ctStyle);
// 获取新建文档对象的样式
style.setType(STStyleType.PARAGRAPH);
XWPFStyles styles = doc.createStyles();
styles.addStyle(style);
}
private static void parseTableElement(Element child, XWPFDocument document){
//先将合并的行列补齐,再对补齐后的表格进行数据处理
child = simplifyTable(child);
Elements trList = child.select("tr");
Elements thList=trList.first().select("th");
Elements tdList = trList.get(0).getElementsByTag("td");
XWPFTable table;
Map<String,Boolean>[][] array;
if(tdList.isEmpty()) {
// String colspan = thList.attr("colspan");
// if(!StringUtils.isEmpty(colspan)){
// table = document.createTable(trList.size(), Integer.valueOf(colspan));
// array = new Map[trList.size()][Integer.valueOf(colspan)];
// }else {
table = document.createTable(trList.size(), thList.size());
array = new Map[trList.size()][thList.size()];
// }
}else{
table = document.createTable(trList.size(), tdList.size());
array = new Map[trList.size()][tdList.size()];
}
// Map[][] array = new Map[trList.size()][tdList.size()];
//表格属性
CTTblPr tablePr = table.getCTTbl().addNewTblPr();
//表格宽度
CTTblWidth width = tablePr.addNewTblW();
width.setW(BigInteger.valueOf((int)(TABLE_WIDTH * PER_CM)));
//设置表格宽度为非自动
width.setType(STTblWidth.DXA);
for (int row = 0; row < trList.size(); row++) {
Element trElement = trList.get(row);
Elements tds = trElement.getElementsByTag("td");
if(tds.isEmpty()) {
tds=trElement.getElementsByTag("th");
}
for(int col = 0; col < tds.size(); col++) {
Element colElement = tds.get(col);
String colspan = colElement.attr("colspan");
String rowspan = colElement.attr("rowspan");
// String style = colElement.attr("style");
StringBuilder styleSB = new StringBuilder();
if(!StringUtils.isEmpty(colspan)){
int colCount = Integer.parseInt(colspan);
for(int i=0;i<colCount-1;i++){
try {
array[row][col+i+1] = new HashMap<String, Boolean>();
array[row][col+i+1].put("mergeCol", true);
}catch(Exception e) {
e.printStackTrace();
}
}
}
if(!StringUtils.isEmpty(rowspan)){
int rowCount = Integer.parseInt(rowspan);
for(int i=0;i<rowCount-1;i++){
array[row+i+1][col] = new HashMap<String, Boolean>();
array[row+i+1][col].put("mergeRow", true);
}
}
XWPFTableCell tableCell = table.getRow(row).getCell(col);
// add start by 明快de玄米61 time 2022/9/16 reason 设置单元格边距
setTableCellMar(tableCell, CELL_MARGIN, CELL_MARGIN, CELL_MARGIN, CELL_MARGIN);
// add add by 明快de玄米61 time 2022/9/16 reason 设置单元格边距
if(StringUtils.isEmpty(colspan)){
if(col == 0){
if(tableCell.getCTTc().getTcPr() == null){
tableCell.getCTTc().addNewTcPr().addNewHMerge().setVal(STMerge.RESTART);
}else{
if(tableCell.getCTTc().getTcPr().getHMerge() == null){
tableCell.getCTTc().getTcPr().addNewHMerge().setVal(STMerge.RESTART);
}else{
tableCell.getCTTc().getTcPr().getHMerge().setVal(STMerge.RESTART);
}
}
}else{
if(array[row][col]!=null && array[row][col].get("mergeCol")!=null && array[row][col].get("mergeCol")){
if(tableCell.getCTTc().getTcPr() == null){
tableCell.getCTTc().addNewTcPr().addNewHMerge().setVal(STMerge.CONTINUE);
}else{
if(tableCell.getCTTc().getTcPr().getHMerge() == null){
tableCell.getCTTc().getTcPr().addNewHMerge().setVal(STMerge.CONTINUE);
}else{
tableCell.getCTTc().getTcPr().getHMerge().setVal(STMerge.CONTINUE);
}
}
continue;
}else{
if(tableCell.getCTTc().getTcPr() == null){
tableCell.getCTTc().addNewTcPr().addNewHMerge().setVal(STMerge.RESTART);
}else{
if(tableCell.getCTTc().getTcPr().getHMerge() == null){
tableCell.getCTTc().getTcPr().addNewHMerge().setVal(STMerge.RESTART);
}else{
tableCell.getCTTc().getTcPr().getHMerge().setVal(STMerge.RESTART);
}
}
}
}
}else{
if(tableCell.getCTTc().getTcPr() == null){
tableCell.getCTTc().addNewTcPr().addNewHMerge().setVal(STMerge.RESTART);
}else{
if(tableCell.getCTTc().getTcPr().getHMerge() == null){
tableCell.getCTTc().getTcPr().addNewHMerge().setVal(STMerge.RESTART);
}else{
tableCell.getCTTc().getTcPr().getHMerge().setVal(STMerge.RESTART);
}
}
}
if(StringUtils.isEmpty(rowspan)){
if(array[row][col]!=null && array[row][col].get("mergeRow")!=null && array[row][col].get("mergeRow")){
if(tableCell.getCTTc().getTcPr() == null){
tableCell.getCTTc().addNewTcPr().addNewVMerge().setVal(STMerge.CONTINUE);
}else{
if(tableCell.getCTTc().getTcPr().getVMerge() == null){
tableCell.getCTTc().getTcPr().addNewVMerge().setVal(STMerge.CONTINUE);
}else{
tableCell.getCTTc().getTcPr().getVMerge().setVal(STMerge.CONTINUE);
}
}
continue;
}else{
if(tableCell.getCTTc().getTcPr() == null){
tableCell.getCTTc().addNewTcPr().addNewVMerge().setVal(STMerge.RESTART);
}else{
if(tableCell.getCTTc().getTcPr().getVMerge() == null){
tableCell.getCTTc().getTcPr().addNewVMerge().setVal(STMerge.RESTART);
}else{
tableCell.getCTTc().getTcPr().getVMerge().setVal(STMerge.RESTART);
}
}
}
}else{
if(tableCell.getCTTc().getTcPr() == null){
tableCell.getCTTc().addNewTcPr().addNewVMerge().setVal(STMerge.RESTART);
}else{
if(tableCell.getCTTc().getTcPr().getVMerge() == null){
tableCell.getCTTc().getTcPr().addNewVMerge().setVal(STMerge.RESTART);
}else{
tableCell.getCTTc().getTcPr().getVMerge().setVal(STMerge.RESTART);
}
}
}
tableCell.removeParagraph(0);
tableCell.setVerticalAlignment(XWPFVertAlign.CENTER);
parseingCell(tableCell,colElement);
}
}
}
/**
* @Description 设置单元格边距
* @param cell 待设置的单元格
* @param top 上边距 磅
* @param bottom 下边距 磅
* @param left 左边距 磅
* @param right 右边距 磅
*/
private static void setTableCellMar(XWPFTableCell cell, double top, double bottom, double left, double right) {
CTTcPr cttcpr = getCttcpr(cell);
CTTcMar ctTcMar = cttcpr.isSetTcMar() ? cttcpr.getTcMar() : cttcpr.addNewTcMar();
if(left >= 0){
(ctTcMar.isSetLeft() ? ctTcMar.getLeft() : ctTcMar.addNewLeft()).setW(BigInteger.valueOf(Math.round(left * PER_POUND)));
}
if(top >= 0){
(ctTcMar.isSetTop() ? ctTcMar.getTop() : ctTcMar.addNewTop()).setW(BigInteger.valueOf(Math.round(top * PER_POUND)));
}
if(right >= 0){
(ctTcMar.isSetRight() ? ctTcMar.getRight() : ctTcMar.addNewRight()).setW(BigInteger.valueOf(Math.round(right * PER_POUND)));
}
if(bottom >= 0){
(ctTcMar.isSetBottom() ? ctTcMar.getBottom() : ctTcMar.addNewBottom()).setW(BigInteger.valueOf(Math.round(bottom * PER_POUND)));
}
}
private static CTTcPr getCttcpr(XWPFTableCell cell){
CTTc ctTc = cell.getCTTc();
return ctTc.isSetTcPr() ? ctTc.getTcPr() : ctTc.addNewTcPr();
}
private static void parseingCell(XWPFTableCell tableCell, Element colElement){
Elements children = colElement.children();
if(!children.isEmpty()) {
for (Element element : children) {
if(!element.children().isEmpty()) {
parseingCell(tableCell,element);
}else {
parseingCellImg(tableCell, element);
}
}
}
if(colElement.hasText()) {
parseingCellImg(tableCell, colElement);
}
}
private static Element simplifyTable(Element child) {
Elements trElements = child.select("tr");
if (trElements != null) {
Iterator<Element> eleIterator = trElements.iterator();
Integer rowNum = 0;
// 针对于colspan操作
while (eleIterator.hasNext()) {
rowNum++;
Element trElement = eleIterator.next();
// 去除所有样式
trElement.removeAttr("class");
Elements tdElements = trElement.select("td");
List<Element> tdEleList = covertElements2List(tdElements);
for (int i = 0; i < tdEleList.size(); i++) {
Element curTdElement = tdEleList.get(i);
// 去除所有样式
curTdElement.removeAttr("class");
Element ele = curTdElement.clone();
String colspanValStr = curTdElement.attr("colspan");
if (!StringUtils.isEmpty(colspanValStr)) {
ele.removeAttr("colspan");
Integer colspanVal = Integer.parseInt(colspanValStr);
for (int k = 0; k < colspanVal - 1; k++) {
curTdElement.after(ele.outerHtml());
}
}
}
}
// 针对于rowspan操作
List<Element> trEleList = covertElements2List(trElements);
Element firstTrEle = trElements.first();
Elements tdElements = firstTrEle.select("td");
if(tdElements.isEmpty()) {
tdElements=firstTrEle.select("th");
}
Integer tdCount = tdElements.size();
for (int i = 0; i < tdElements.size(); i++) { // 获取该列下所有单元格
for (Element trElement : trEleList) {
List<Element> tdElementList = covertElements2List(trElement.select("td"));
try {
tdElementList.get(i);
} catch (Exception e) {
continue;
}
Node curTdNode = tdElementList.get(i);
Node cNode = curTdNode.clone();
String rowspanValStr = curTdNode.attr("rowspan");
if (!StringUtils.isEmpty(rowspanValStr)) {
cNode.removeAttr("rowspan");
Element nextTrElement = trElement.nextElementSibling();
Integer rowspanVal = Integer.parseInt(rowspanValStr);
for (int j = 0; j < rowspanVal - 1; j++) {
Node tempNode = cNode.clone();
List<Node> nodeList = new ArrayList<Node>();
nodeList.add(tempNode);
if (j > 0) {
nextTrElement = nextTrElement.nextElementSibling();
}
Integer indexNum = i ;
if (i == 0)
{
indexNum = 0;
}
if (indexNum == tdCount) {
nextTrElement.appendChild(tempNode);
}else {
nextTrElement.insertChildren(indexNum, nodeList);
}
}
}
}
}
}
Element tableEle = child.getElementsByTag("table").first();
return tableEle;
}
private static List<Element> covertElements2List(Elements curElements) {
List<Element> elementList = new ArrayList<Element>();
Iterator<Element> eleIterator = curElements.iterator();
while (eleIterator.hasNext()) {
Element curlement = eleIterator.next();
elementList.add(curlement);
}
return elementList;
}
private static void parseingCellImg(XWPFTableCell tableCell, Element element) {
if((element.toString().startsWith(")||element.toString().startsWith("))) {
String src=element.attr("src");
int res=getPictureType(src);
String width=element.attr("width");
String height=element.attr("height");
XWPFParagraph paragraph = tableCell.addParagraph();
paragraph.setAlignment(ParagraphAlignment.CENTER);
XWPFRun run = paragraph.createRun();
// add start by 明快de玄米61 time 2022/9/8 reason 图片下载
File imgFile = null;
try {
imgFile = ImageUtil.getImgFile(src);
FileInputStream iss = new FileInputStream(imgFile);
BufferedImage image = getImgByFilePath(imgFile.getAbsolutePath());
if (image == null) {
width = StringUtils.isEmpty(width) ? String.valueOf(MAX_TABLE_IMG_WIDTH) : width;
height = StringUtils.isEmpty(width) ? String.valueOf(MAX_TABLE_IMG_WIDTH) : width;
} else {
width = StringUtils.isEmpty(width) ? String.valueOf(image.getWidth()) : width;
BigDecimal originalWidth = new BigDecimal(image.getWidth());
BigDecimal originalHeight = new BigDecimal(image.getHeight());
height = StringUtils.isBlank(height) ? originalHeight.multiply(new BigDecimal(width).divide(originalWidth, 10, BigDecimal.ROUND_HALF_UP)).toBigInteger().toString() : height;
if (Integer.valueOf(width) >= MAX_TABLE_IMG_WIDTH) {
BigDecimal widthBigDecimal = new BigDecimal(width);
BigDecimal heightBigDecimal = new BigDecimal(height);
BigDecimal divide = new BigDecimal(MAX_TABLE_IMG_WIDTH).divide(widthBigDecimal, 10, BigDecimal.ROUND_HALF_UP);
width = widthBigDecimal.multiply(divide).toBigInteger().toString();
height = heightBigDecimal.multiply(divide).toBigInteger().toString();
}
}
// add end by 明快de玄米61 time 2022/9/8 reason 图片下载
// delete start by 明快de玄米61 time 2022/9/8 reason 本次图片是超链接
// FileInputStream iss=new FileInputStream(src);
// delete start by 明快de玄米61 time 2022/9/8 reason 本次图片是超链接
try {
run.addPicture(iss, res, "", Units.toEMU(Double.valueOf(width)), Units.toEMU(Double.valueOf(height)));
// r9.setTextPosition(28);
iss.close();
} catch (NumberFormatException e1) {
e1.printStackTrace();
} catch (InvalidFormatException e1) {
e1.printStackTrace();
} catch (IOException e1) {
e1.printStackTrace();
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if (imgFile != null) {
FileUtil.deleteParentFile(imgFile);
}
}
}
else {
XWPFParagraph paragraph = tableCell.addParagraph();
paragraph.setAlignment(ParagraphAlignment.LEFT);
XWPFRun run = paragraph.createRun();
run.setText(reverseEscapeChar(element.text()));
run.setFontFamily(PARA_FONT_FAMILY);
run.setFontSize(PARA_FONT_SIZE);
}
}
/**
* 根据图片路径获取图片
* @param path
* @return
* @throws Exception
*/
private static BufferedImage getImgByFilePath(String path) {
try {
FileInputStream fis = new FileInputStream(path);
byte[] byteArray = IOUtils.toByteArray(fis);
ByteArrayInputStream bais = new ByteArrayInputStream(byteArray);
return ImageIO.read(bais);
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
private static int getPictureType(String src){
String picType = "jpg";
if(src.contains("png")) {
picType="png";
}else if(src.contains("jpg")) {
picType="jpg";
}else if(src.contains("jpeg")) {
picType="jpeg";
}else if(src.contains("gif")) {
picType="gif";
}
int res = XWPFDocument.PICTURE_TYPE_PICT;
if(!StringUtils.isEmpty(picType)){
if(picType.equalsIgnoreCase("png")){
res = XWPFDocument.PICTURE_TYPE_PNG;
}else if(picType.equalsIgnoreCase("dib")){
res = XWPFDocument.PICTURE_TYPE_DIB;
}else if(picType.equalsIgnoreCase("emf")){
res = XWPFDocument.PICTURE_TYPE_EMF;
}else if(picType.equalsIgnoreCase("jpg") || picType.equalsIgnoreCase("jpeg")){
res = XWPFDocument.PICTURE_TYPE_JPEG;
}else if(picType.equalsIgnoreCase("wmf")){
res = XWPFDocument.PICTURE_TYPE_WMF;
}
}else {
res = XWPFDocument.PICTURE_TYPE_JPEG;
}
return res;
}
private static void createDefaultHeader(XWPFDocument docx, String text) throws IOException, XmlException{
CTP ctp = CTP.Factory.newInstance();
XWPFParagraph paragraph = new XWPFParagraph(ctp, docx);
ctp.addNewR().addNewT().setStringValue(text);
ctp.addNewR().addNewT().setSpace(SpaceAttribute.Space.PRESERVE);
CTSectPr sectPr = docx.getDocument().getBody().isSetSectPr() ? docx.getDocument().getBody().getSectPr() : docx.getDocument().getBody().addNewSectPr();
XWPFHeaderFooterPolicy policy = new XWPFHeaderFooterPolicy(docx, sectPr);
XWPFHeader header = policy.createHeader(STHdrFtr.DEFAULT, new XWPFParagraph[] { paragraph });
header.setXWPFDocument(docx);
}
private static void setDocumentMargin(XWPFDocument document, String left, String top, String right, String bottom) {
CTSectPr sectPr = document.getDocument().getBody().addNewSectPr();
CTPageMar ctpagemar = sectPr.addNewPgMar();
if (StringUtils.isNotBlank(left)) {
ctpagemar.setLeft(new BigInteger(left));
}
if (StringUtils.isNotBlank(top)) {
ctpagemar.setTop(new BigInteger(top));
}
if (StringUtils.isNotBlank(right)) {
ctpagemar.setRight(new BigInteger(right));
}
if (StringUtils.isNotBlank(bottom)) {
ctpagemar.setBottom(new BigInteger(bottom));
}
}
private static void addCustomHeadingStyle(XWPFDocument docxDocument, String strStyleId, int headingLevel) {
CTStyle ctStyle = CTStyle.Factory.newInstance();
ctStyle.setStyleId(strStyleId);
CTString styleName = CTString.Factory.newInstance();
styleName.setVal(strStyleId);
ctStyle.setName(styleName);
CTDecimalNumber indentNumber = CTDecimalNumber.Factory.newInstance();
indentNumber.setVal(BigInteger.valueOf(headingLevel));
ctStyle.setUiPriority(indentNumber);
CTOnOff onoffnull = CTOnOff.Factory.newInstance();
ctStyle.setUnhideWhenUsed(onoffnull);
ctStyle.setQFormat(onoffnull);
CTPPr ppr = CTPPr.Factory.newInstance();
ppr.setOutlineLvl(indentNumber);
ctStyle.setPPr(ppr);
XWPFStyle style = new XWPFStyle(ctStyle);
XWPFStyles styles = docxDocument.createStyles();
style.setType(STStyleType.PARAGRAPH);
styles.addStyle(style);
}
private static void setImgSpacing(XWPFParagraph paragraph) {
CTPPr ppr = paragraph.getCTP().getPPr();
if (ppr == null) ppr = paragraph.getCTP().addNewPPr();
CTSpacing spacing = ppr.isSetSpacing() ? ppr.getSpacing() : ppr.addNewSpacing();
spacing.setAfter(BigInteger.valueOf(0));
spacing.setBefore(BigInteger.valueOf(0));
spacing.setLineRule(STLineSpacingRule.AUTO);
}
private static String getStringNoBlank(String str) {
if(str!=null && !"".equals(str)) {
Pattern p = Pattern.compile("\\*|\\\\|\\:|\\?|\\<|\\>|\\/|\"|\\|");
Matcher m = p.matcher(str);
String strNoBlank = m.replaceAll("");
return strNoBlank;
}else {
return str;
}
}
private static double length(String value) {
double valueLength = 0;
String chinese = "[\u0391-\uFFE5]";
// 获取字段值的长度,如果含中文字符,则每个中文字符长度为2,否则为1
for (int i = 0; i < value.length(); i++) {
// 获取一个字符
String temp = value.substring(i, i + 1);
// 判断是否为中文字符
if (temp.matches(chinese)) {
// 中文字符长度为2
valueLength += 1;
} else {
// 其他字符长度为1
valueLength += 0.5;
}
}
return valueLength;
}
private static List<String> dealHElement(String content){
Pattern p=Pattern.compile(".*? ");
Matcher m=p.matcher(content);
List<String> realcontents=new ArrayList<String>();
int begin=0;
while(m.find()) {
String h=m.group();
int length=content.indexOf(h);
realcontents.add(content.substring(begin,length));
realcontents.add(h.replaceAll("| ", ""));
begin=length+h.length();
}
realcontents.add(content.substring(begin));
return realcontents;
}
/**
* 向Word中插入图片(仅支持png格式图片, 未完待续...)
* @param imagePath 图片文件路径
* @throws Exception
*/
private static void writeImage(XWPFParagraph paragraph, String imagePath, Integer width) throws Exception {
XWPFRun run = paragraph.createRun();
BufferedImage image = getImgByFilePath(imagePath);
int res=getPictureType(imagePath);
int height;
if (image == null) {
width = width == null ? MAX_PAGE_IMG_WIDTH : width;
height = width == null ? MAX_PAGE_IMG_WIDTH : width;
} else {
width = width == null ? image.getWidth() : width;
BigDecimal originalWidth = new BigDecimal(image.getWidth());
BigDecimal originalHeight = new BigDecimal(image.getHeight());
height = originalHeight.multiply(new BigDecimal(width).divide(originalWidth,10,BigDecimal.ROUND_HALF_UP)).toBigInteger().intValue();
if (width >= MAX_PAGE_IMG_WIDTH) {
BigDecimal widthBigDecimal = new BigDecimal(width);
BigDecimal heightBigDecimal = new BigDecimal(height);
BigDecimal divide = new BigDecimal(MAX_PAGE_IMG_WIDTH).divide(widthBigDecimal,10,BigDecimal.ROUND_HALF_UP);
width = widthBigDecimal.multiply(divide).toBigInteger().intValue();
height = heightBigDecimal.multiply(divide).toBigInteger().intValue();
}
}
run.addPicture(new FileInputStream(imagePath), res, "",
Units.toEMU(width), Units.toEMU(height));
}
static class TableInfo {
private Integer start;
private Integer end;
private String html;
public TableInfo(Integer start, Integer end, String html) {
this.start = start;
this.end = end;
this.html = html;
}
public Integer getStart() {
return start;
}
public void setStart(Integer start) {
this.start = start;
}
public Integer getEnd() {
return end;
}
public void setEnd(Integer end) {
this.end = end;
}
public String getHtml() {
return html;
}
public void setHtml(String html) {
this.html = html;
}
}
static class LinkInfo {
private Integer start;
private Integer end;
private String href;
private String html;
public LinkInfo(Integer start, Integer end, String href, String html) {
this.start = start;
this.end = end;
this.href = href;
this.html = html;
}
public Integer getStart() {
return start;
}
public void setStart(Integer start) {
this.start = start;
}
public Integer getEnd() {
return end;
}
public void setEnd(Integer end) {
this.end = end;
}
public String getHref() {
return href;
}
public void setHref(String href) {
this.href = href;
}
public String getHtml() {
return html;
}
public void setHtml(String html) {
this.html = html;
}
}
/**
* 标题样式
* @author 明快de玄米61
* @date 2022/9/13 10:36
**/
static class HeadingStyle implements Serializable {
private static final long serialVersionUID = 1L;
/** 字号 **/
private Integer fontSize;
/** 字体 **/
private String fontFamily;
/** 加粗 **/
private boolean bold;
public Integer getFontSize() {
return fontSize;
}
public void setFontSize(Integer fontSize) {
this.fontSize = fontSize;
}
public String getFontFamily() {
return fontFamily;
}
public void setFontFamily(String fontFamily) {
this.fontFamily = fontFamily;
}
public boolean isBold() {
return bold;
}
public void setBold(boolean bold) {
this.bold = bold;
}
HeadingStyle(Integer fontSize, String fontFamily, boolean bold) {
this.fontSize = fontSize;
this.fontFamily = fontFamily;
this.bold = bold;
}
}
static class LabelInfo {
public LabelInfo(Integer start, Integer end, String html) {
this.start = start;
this.end = end;
this.html = html;
}
private Integer start;
private Integer end;
private String html;
public Integer getStart() {
return start;
}
public void setStart(Integer start) {
this.start = start;
}
public Integer getEnd() {
return end;
}
public void setEnd(Integer end) {
this.end = end;
}
public String getHtml() {
return html;
}
public void setHtml(String html) {
this.html = html;
}
}
}
3、测试类1
3.1、说明
大家先看一张图,如下:
左侧目录树是使用思维导图工具生成的,如下:
点击每个目录都可以往右侧编辑器中输入内容,可以添加文字、图片、表格、超链接等,我们的任务就是把目录当做文档目录,然后导出word文档
3.2、代码
import com.alibaba.fastjson.JSONObject;
import com.alibaba.fastjson.TypeReference;
import com.alibaba.fastjson.parser.Feature;
import com.atguigu.demo.util.FileUtil;
import com.atguigu.demo.util.WordUtil;
import lombok.Data;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import java.io.File;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
public class Test1 {
public static void main(String[] args) {
// 假设viewList是从数据库查询出来的数据,返回值是一个list集合,每一个元素都是一个目录,但是目录中没有子级,所以我们接下来需要组装目录树
String str = "[{\"directoryBody\":\"\",\"directoryName\":\"坦克\",\"id\":\"0846d84214864e2096c04827735e1487\",\"parentId\":\"-1\",\"rId\":\"0846d84214864e2096c04827735e1487\",\"seq\":0,\"type\":0},{\"directoryBody\":\"超轻型坦克—10吨以下
轻型坦克—20吨以下
中型坦克—20吨至40吨
重型坦克—40吨至80吨
\",\"directoryName\":\"坦克级别\",\"id\":\"cnj7li9xtqtccnj7li9xwu80cnj7li9x\",\"parentId\":\"cnj7ldsro83kcnj7ldsrjd34cnj7ldsr\",\"rId\":\"0846d84214864e2096c04827735e1487\",\"seq\":1,\"type\":2},{\"directoryBody\":\"坦克一词是英文“tank”的音译,原意为贮存液体或气体的容器。参战前,为保密而取用此名,并一直沿用至今。
1915年,英国政府根据E.D.A.斯文顿的建议,利用汽车、拖拉机、枪炮制造和冶金技术,试制出坦克样车。
1916年研制成功Ⅰ型坦克。
第一次世界大战期间,英国、法国和德国共制造了近万辆坦克,主要有英国的Ⅳ型和Ⅴ型、法国的“圣沙蒙”和“雷诺”FT-17及德国的A7V等。两次世界大战之间,轻型坦克盛行,主要有:英国的“马蒂尔达”步兵坦克和“十字军”巡洋坦克,法国的“雷诺”R-35轻型坦克和“索玛”S-35中型坦克,苏联的T-26轻型坦克和T-28中型坦克,德国的PzKpfwⅡ轻型坦克和PzKpfwⅣ中型坦克等。
\",\"directoryName\":\"发展历史\",\"id\":\"cnj5w7qrxvy8cnj5w7qs2y9scnj5w7qs\",\"parentId\":\"0846d84214864e2096c04827735e1487\",\"rId\":\"0846d84214864e2096c04827735e1487\",\"seq\":1,\"type\":1},{\"directoryBody\":\"1960年代开始,由于中型坦克的火力和装甲防护已经达到或超过了以往重型坦克的水平,同时克服了重型坦克越野性差的弱点,从而形成了一种具有现代特征的的单一战斗坦克,即“主战坦克”或“主力战车”,成为各国家装甲部队的主力(Main battle tank,MBT)。\",\"directoryName\":\"主战坦克\",\"id\":\"cnj7llmxh6o0cnj7llmxgm4gcnj7llmx\",\"parentId\":\"cnj7ldsro83kcnj7ldsrjd34cnj7ldsr\",\"rId\":\"0846d84214864e2096c04827735e1487\",\"seq\":2,\"type\":2},{\"directoryBody\":\"尽管美国高官多次声称要在南海永久性部署海岸警卫队力量,然而由于财政因素制约,未来美国不太可能将海岸警卫队大规模部署在南海。
\",\"directoryName\":\"分类划分\",\"id\":\"cnj5w9f1r9c0cnj5w9f241kwcnj5w9f1\",\"parentId\":\"0846d84214864e2096c04827735e1487\",\"rId\":\"0846d84214864e2096c04827735e1487\",\"seq\":2,\"type\":1},{\"directoryBody\":\"因为侦察坦克的数量需求较小,所以常倾向使用步兵战车相似的底盤,如俄国的BMP-1和法国AMX-13,美国的M3布雷德利骑兵战车。\",\"directoryName\":\"侦察坦克\",\"id\":\"cnj7loh4x0cgcnj7loh54lj4cnj7loh5\",\"parentId\":\"cnj7ldsro83kcnj7ldsrjd34cnj7ldsr\",\"rId\":\"0846d84214864e2096c04827735e1487\",\"seq\":3,\"type\":2},{\"directoryBody\":\"中文名字 坦克 英文名 Tank 首次出场 索姆河战役 相关战争 第一次世界大战 发明国 英国 主要任务 陆上作战
\",\"directoryName\":\"装置组成\",\"id\":\"cnj5wb8i9qm8cnj5wb8ig6bkcnj5wb8i\",\"parentId\":\"0846d84214864e2096c04827735e1487\",\"rId\":\"0846d84214864e2096c04827735e1487\",\"seq\":3,\"type\":1},{\"directoryBody\":\"特种坦克意指装有特殊装备,负担专门任务的坦克。\",\"directoryName\":\"特种坦克\",\"id\":\"cnj7lplxwyyocnj7lplxwxs0cnj7lply\",\"parentId\":\"cnj7ldsro83kcnj7ldsrjd34cnj7ldsr\",\"rId\":\"0846d84214864e2096c04827735e1487\",\"seq\":4,\"type\":2},{\"directoryBody\":\"南海是世界著名的热带大陆边缘海之一,以闽粤沿海省界到诏安的宫古半岛经台湾浅滩到台湾岛南端的鹅銮鼻的连线与东海相接。整个南海几乎被大陆、半岛和岛屿所包围,南海与南海诸岛自古以来就是我国的海疆边防。
\",\"directoryName\":\"展望未来\",\"id\":\"cnj5wckz9534cnj5wckzc9hccnj5wckz\",\"parentId\":\"0846d84214864e2096c04827735e1487\",\"rId\":\"0846d84214864e2096c04827735e1487\",\"seq\":4,\"type\":1},{\"directoryName\":\"类型\",\"id\":\"cnj7ldsro83kcnj7ldsrjd34cnj7ldsr\",\"parentId\":\"0846d84214864e2096c04827735e1487\",\"rId\":\"0846d84214864e2096c04827735e1487\",\"seq\":5,\"type\":1}]";
List<TreeView> viewList = JSONObject.parseObject(str, new TypeReference<List<TreeView>>() {
}, Feature.OrderedField);
// 组装目录树,每个目录里面放置的是富文本,其中富文本存在于directoryBody字段中,目录名称存在于directoryName字段中,然后组装出来的目录结构树如下图
List<TreeView> treeList = createTreeViewTree("-1", viewList);
// 创建空的word文档作为导出位置,其中word文档名称是“坦克.docx”
String name = "坦克";
File file = FileUtil.createTempFile(name + ".docx");
try {
// 第二个参数为true代表开启公文格式
XWPFDocument document = getXWPFDocument(treeList, true);
WordUtil.generateDocxFile(document, file);
} catch (Exception e) {
e.printStackTrace();
} finally {
System.out.println("word文档位置:" + file.getAbsolutePath());
}
}
/**
* 创建目录树
*
* @param
* @return
* @author 明快de玄米61
* @date 2022/10/12 0:02
**/
private static List<TreeView> createTreeViewTree(String parentId, List<TreeView> viewList) {
List<TreeView> treeList = new ArrayList<TreeView>();
for (TreeView view : viewList) {
if (parentId.equals(view.getParentId())) {
view.setChildren(createTreeViewTree(view.getId(), viewList));
treeList.add(view);
}
}
return treeList;
}
/**
* 获取XWPFDocument
*
* @param treeList 目录树
* @param odfDealFlag 是否开启公文格式
* @return
* @author 明快de玄米61
* @date 2022/10/12 0:01
**/
public static XWPFDocument getXWPFDocument(List<TreeView> treeList, boolean odfDealFlag) {
// 初始化XWPFDocument
XWPFDocument document = WordUtil.initXWPFDocument();
// 组装模板目录列表
dealDirectoryViewTree(document, treeList, odfDealFlag);
return document;
}
private static void dealDirectoryViewTree(XWPFDocument document, List<TreeView> treeList, boolean odfDealFlag) {
for (int i = 0; i < treeList.size(); i++) {
TreeView view = treeList.get(i);
String directoryName = view.getDirectoryName();
Integer type = view.getType();
if (StringUtils.isEmpty(directoryName)) {
directoryName = " ";
}
// 处理中间大标题
if (type == 0) {
WordUtil.dealDocxTitle(document, directoryName);
}
// 处理正文标题
else {
WordUtil.dealHeading(document, type, i + 1, directoryName, odfDealFlag);
}
// 处理正文
WordUtil.dealHtmlContent(document, view.getDirectoryBody());
// 处理子级列表
if (view.getChildren() != null && view.getChildren().size() > 0) {
dealDirectoryViewTree(document, view.getChildren(), odfDealFlag);
}
}
}
}
@Data
class TreeView implements Serializable {
private static final long serialVersionUID = 1L;
// 分类id
private String id;
// 目录名称
private String directoryName;
// 目录详情
private String directoryBody;
// 目录详情数据
private String bodyData;
// 父节点id
private String parentId;
// 节点类型
private Integer type;
// 排序号
private Integer seq;
// 句子
private String sentences;
// 关系图数据
private String statisticalBody;
// 子级集合
private List<TreeView> children;
}
3.3、输出结果
word文档位置:C:\Users\mingming\AppData\Local\Temp\20221012\abb87b42e45e44dfb7e8645697a7d05c\坦克.docx
3.4、测试效果
4、测试类2
4.1、说明
大家也先来看一张图片,如下:
我们系统把docx文档传到第三方分析平台,然后第三方分析平台通过接口返回一串字符串,也就是我们测试代码中的str那种结构,我们需要将接口结果导出为word文档,按照项目经理要求,这种方式导出目录不用加公文格式标题,但是其他地方的格式符合公文格式要求
4.2、代码
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.atguigu.demo.util.FileUtil;
import com.atguigu.demo.util.WordUtil;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import java.io.File;
public class Test2 {
public static void main(String[] args) {
// 下面是树形结构的字符串,里面标题、目录、段落、图片、表格有不同的标识形式,我们需要将他们组装成类似于测试类1中的结构,然后导出文档
String str = "{\"results\":[{\"entities\":[{\"text\":\"美国\",\"offset\":0,\"length\":2,\"ne\":\"country\"},{\"text\":\"亚太\",\"offset\":4,\"length\":2,\"ne\":\"loc\"}],\"relation\":[],\"attribute\":[],\"id\":21148,\"index\":0,\"para_index\":0,\"caption\":\"美国重返亚太\",\"level\":\"1\",\"paragraphs\":[{\"resource_id\":\"b1b5a97aa6026e8d1ebe63f63b4f7cb4_p0\",\"directory_id\":\"b1b5a97aa6026e8d1ebe63f63b4f7cb4_p0\",\"content\":\"美国重返亚太\",\"index\":0,\"type\":\"paragraph\",\"is_title\":true,\"entities\":[{\"text\":\"美国\",\"offset\":0,\"length\":2,\"ne\":\"country\"},{\"text\":\"亚太\",\"offset\":4,\"length\":2,\"ne\":\"loc\"}],\"relation\":[],\"attribute\":[]},{\"resource_id\":\"b1b5a97aa6026e8d1ebe63f63b4f7cb4_t0\",\"html\":\"测试用例名称 地址栏隐藏 测试用例标识 DZLYC_01 测试用例对应需求文档章节 《某网站系统功能规格表》3.4节。 用例描述 支持打开的chrome窗口不显示地址栏 测试人员 测试时间 测试人员 测试时间
\",\"index\":1,\"type\":\"table\"}],\"children\":[{\"entities\":[{\"text\":\"日本\",\"offset\":2,\"length\":2,\"ne\":\"country\"}],\"event\":[],\"relation\":[],\"attribute\":[],\"id\":21149,\"index\":0,\"para_index\":0,\"caption\":\"1、日本贼心不死\",\"level\":\"2\",\"paragraphs\":[{\"resource_id\":\"b1b5a97aa6026e8d1ebe63f63b4f7cb4_p2\",\"directory_id\":\"b1b5a97aa6026e8d1ebe63f63b4f7cb4_p2\",\"content\":\"1、日本贼心不死\",\"index\":2,\"type\":\"paragraph\",\"is_title\":true,\"entities\":[{\"text\":\"日本\",\"offset\":2,\"length\":2,\"ne\":\"country\"}],\"event\":[],\"relation\":[],\"attribute\":[]},{\"resource_id\":\"b1b5a97aa6026e8d1ebe63f63b4f7cb4_t1\",\"html\":\"测试用例名称 地址栏隐藏 测试用例标识 DZLYC_01 测试用例对应需求文档章节 《某网站系统功能规格表》3.4节。 用例描述 支持打开的chrome窗口不显示地址栏 测试人员 测试时间 测试人员 测试时间
\",\"index\":3,\"type\":\"table\"}],\"hasChild\":false},{\"entities\":[{\"text\":\"中国\",\"offset\":2,\"length\":2,\"ne\":\"country\"}],\"event\":[],\"relation\":[],\"attribute\":[],\"id\":21150,\"index\":1,\"para_index\":1,\"caption\":\"2、中国梦,我的梦\",\"level\":\"2\",\"paragraphs\":[{\"resource_id\":\"b1b5a97aa6026e8d1ebe63f63b4f7cb4_p4\",\"directory_id\":\"b1b5a97aa6026e8d1ebe63f63b4f7cb4_p4\",\"content\":\"2、中国梦,我的梦\",\"index\":4,\"type\":\"paragraph\",\"is_title\":true,\"entities\":[{\"text\":\"中国\",\"offset\":2,\"length\":2,\"ne\":\"country\"}],\"event\":[],\"relation\":[],\"attribute\":[]},{\"resource_id\":\"b1b5a97aa6026e8d1ebe63f63b4f7cb4_i0\",\"path\":\"https://img-blog.csdnimg.cn/7e0bd747ac00471cbe916b558e6277eb.jpeg\",\"index\":5,\"type\":\"image\"}],\"hasChild\":false},{\"entities\":[],\"event\":[],\"relation\":[],\"attribute\":[],\"id\":21151,\"index\":2,\"para_index\":2,\"caption\":\"3、强国之路\",\"level\":\"2\",\"paragraphs\":[{\"resource_id\":\"b1b5a97aa6026e8d1ebe63f63b4f7cb4_p6\",\"directory_id\":\"b1b5a97aa6026e8d1ebe63f63b4f7cb4_p6\",\"content\":\"3、强国之路\",\"index\":6,\"type\":\"paragraph\",\"is_title\":true,\"entities\":[],\"event\":[],\"relation\":[],\"attribute\":[]},{\"resource_id\":\"b1b5a97aa6026e8d1ebe63f63b4f7cb4_p7\",\"directory_id\":\"b1b5a97aa6026e8d1ebe63f63b4f7cb4_p6\",\"content\":\"“第29届奥运会的圣火虽然熄灭了,但不代表奥运精神熄灭了,也不代表奥运会结束了,更不代表中国奥运就此结束了,让我们继续带着奥运精神迎接下一届奥运会,在鸟巢里\",\"index\":7,\"type\":\"paragraph\",\"is_title\":false,\"entities\":[{\"text\":\"中国\",\"offset\":1,\"length\":2,\"ne\":\"country\"},{\"text\":\"XXX\",\"offset\":28,\"length\":3,\"ne\":\"per\"},{\"text\":\"XXX\",\"offset\":31,\"length\":3,\"ne\":\"job\"},{\"text\":\"勇毅\",\"offset\":171,\"length\":2,\"ne\":\"aircraft\"}],\"attribute\":[]}],\"hasChild\":false}],\"hasChild\":true},{\"entities\":[{\"text\":\"中国\",\"offset\":3,\"length\":2,\"ne\":\"country\"}],\"event\":[],\"relation\":[],\"attribute\":[],\"id\":21152,\"index\":1,\"para_index\":1,\"caption\":\"腾飞的中国\",\"level\":\"1\",\"paragraphs\":[{\"resource_id\":\"b1b5a97aa6026e8d1ebe63f63b4f7cb4_p8\",\"directory_id\":\"b1b5a97aa6026e8d1ebe63f63b4f7cb4_p8\",\"content\":\"腾飞的中国\",\"index\":8,\"type\":\"paragraph\",\"is_title\":true,\"entities\":[{\"text\":\"中国\",\"offset\":3,\"length\":2,\"ne\":\"country\"}],\"event\":[],\"relation\":[],\"attribute\":[]},{\"resource_id\":\"b1b5a97aa6026e8d1ebe63f63b4f7cb4_p9\",\"directory_id\":\"b1b5a97aa6026e8d1ebe63f63b4f7cb4_p8\",\"content\":\"中国是多么繁华昌盛,科技发达! 中国是威武的雄师,屹立在疆土辽阔的东方!中国是勇猛的雄鹰,翱翔在蔚蓝的天空!中国是挺拔的青松\",\"index\":9,\"type\":\"paragraph\",\"is_title\":false,\"entities\":[{\"text\":\"这十年\",\"offset\":2,\"length\":3,\"ne\":\"time\"},{\"text\":\"这十年\",\"offset\":79,\"length\":3,\"ne\":\"time\"},{\"text\":\"涉滩\",\"offset\":84,\"length\":2,\"ne\":\"island\"},{\"text\":\"中国\",\"offset\":101,\"length\":2,\"ne\":\"country\"},{\"text\":\"XXX\",\"offset\":215,\"length\":3,\"ne\":\"per\"},{\"text\":\"中国\",\"offset\":224,\"length\":3,\"ne\":\"org\"},{\"text\":\"中国\",\"offset\":249,\"length\":2,\"ne\":\"country\"},{\"text\":\"海天\",\"offset\":266,\"length\":2,\"ne\":\"per\"}],\"attribute\":[]}],\"hasChild\":false}]}";
// 初始化XWPFDocument
XWPFDocument document = WordUtil.initXWPFDocument();
// 设置大标题
String name = "测试目录、文字、表格、表格中带图片、图片";
WordUtil.dealDocxTitle(document, name);
// 数据解析
JSONObject jsonObject = JSONObject.parseObject(str);
JSONArray results = jsonObject.getJSONArray("results");
if (results != null && results.size() > 0) {
for (Object result : results) {
JSONObject resultJsonObj = JSONObject.parseObject(result.toString());
// 第二个参数为false代表不开启公文格式
dealSuCaiDirectoryViewTree(document, resultJsonObj, false);
}
}
// 创建空的word文档作为导出位置,其中word文档名称是“测试目录、文字、表格、表格中带图片、图片.docx”
File file = FileUtil.createTempFile(name + ".docx");
try {
// 往docx文件中填充数据
WordUtil.generateDocxFile(document, file);
} catch (Exception e) {
e.printStackTrace();
} finally {
System.out.println("word文档位置:" + file.getAbsolutePath());
}
}
private static void dealSuCaiDirectoryViewTree(XWPFDocument document, JSONObject jsonObject, boolean odfDealFlag) {
// 1、处理自身
// 1.1、处理目录
// 目录级别
int level = jsonObject.getInteger("level");
// 目录在同级别的序号,从0开始
int index = jsonObject.getInteger("index");
// 目录标题
String caption = jsonObject.getString("caption");
WordUtil.dealHeading(document, level, index + 1, caption, odfDealFlag);
// 1.2、处理段落
StringBuilder sb = new StringBuilder();
JSONArray paragraphs = jsonObject.getJSONArray("paragraphs");
for (Object paragraph : paragraphs) {
JSONObject pJsonObj = JSONObject.parseObject(paragraph.toString());
// 标签类型,分别有paragraph(段落、目录)、image(图片)、table(表格)
String type = pJsonObj.getString("type");
// 段落内容
String content = pJsonObj.getString("content");
// 是否是目录
Boolean isTitle = pJsonObj.getBooleanValue("is_title");
// 表格html
String html = pJsonObj.getString("html");
// 图片链接
String path = pJsonObj.getString("path");
// 目录在上面已经被处理,此处不在处理
if (Boolean.TRUE.equals(isTitle)) {
continue;
}
// 处理段落
if ("paragraph".equals(type)) {
String pHtml = wrapTextByP(content);
sb.append(pHtml);
}
// 处理图片
else if ("image".equals(type)) {
String imgHtml = wrapImgByP(path);
sb.append(imgHtml);
}
// 处理表格
else if ("table".equals(type)) {
String tableHtml = dealTableContent(html);
sb.append(tableHtml);
}
}
if (sb.length() > 0) {
WordUtil.dealHtmlContent(document, sb.toString());
}
// 2、处理子级
Boolean hasChild = jsonObject.getBooleanValue("hasChild");
JSONArray children = jsonObject.getJSONArray("children");
if (Boolean.TRUE.equals(hasChild) && children != null && children.size() > 0) {
for (Object child : children) {
JSONObject childJsonObj = JSONObject.parseObject(child.toString());
dealSuCaiDirectoryViewTree(document, childJsonObj, odfDealFlag);
}
}
}
private static String dealTableContent(String html) {
// 添加tbody标签
if (!html.matches("" )) {
html = html.replaceAll("()(.*?)()" , "$1$2$3");
}
return html;
}
private static String wrapImgByP(String path) {
return String.format("", path);
}
private static String wrapTextByP(String content) {
return ""
+ content + "";
}
}
4.3、输出结果
word文档位置:C:\Users\mingming\AppData\Local\Temp\20221012\9d24885d9bd4490baab89d3edfe06174\测试目录、文字、表格、表格中带图片、图片.docx
4.4、测试结果
四、图片(测试代码中所用图片地址)
你可能感兴趣的:(java学习之路,apache,java,开发语言)