1.配置依赖
e-iceblue
spire.doc.free
3.9.0
org.jsoup
jsoup
1.14.2
org.apache.httpcomponents
httpclient
4.5.13
因e-iceblue在阿里云maven仓库中找不到,需配置特定仓库
com.e-iceblue
https://repo.e-iceblue.cn/repository/maven-public/
2.代码demo
import com.spire.doc.Document;
import com.spire.doc.FileFormat;
import com.spire.doc.Section;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpEntity;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.*;
import java.util.Base64;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
public class TestHtml2Doc {
public static void main(String[] args) {
long start = System.currentTimeMillis();
String str="";
try {
//富文本内容在本地文件中,根据具体情况改成从数据库或网络中获取资源
FileInputStream in=new FileInputStream("D:\\oo.txt");
// size 为字串的长度 ,这里一次性读完
int size=in.available();
byte[] buffer=new byte[size];
in.read(buffer);
in.close();
str=new String(buffer,"utf-8");
} catch (IOException e) {
e.printStackTrace();
}
org.jsoup.nodes.Document document = Jsoup.parse(str);
Element body = document.body();
// 获取body里面的所有子元素
Elements links = body.children();
try {
for (Element link : links) {
Elements img = link.select("img");
String src = img.attr("src");
if(StringUtils.isNotBlank(src) ) {
// 处理图片 因有些图片资源带有防盗链,需加上Referer
Map map=new HashMap() ;
map.put("Referer","http://aa.com.cn");
String string=doGetHeader(src,map);
img.attr("src","data:image/png;base64,"+string);
System.out.println("src = " + src);
System.out.println("Basestring = " + string);
}
}
} catch (Exception e) {
System.out.println("转换url图片报错了:"+e.getMessage());
}
String html = document.html();
//样式调整
html=html.replace("class=\"ql-align-center\"","style=\"text-align:center\"").replace("
\n" +
" ","
\n" +
" ");
//System.out.println("html = " + html);
exportWord(html,"590");
long end = System.currentTimeMillis();
System.out.println("(end-start) = " + (end-start)/1000);
}
public static void exportWord( String content, String fileName) {
try {
HttpServletRequest request=null;
HttpServletResponse response=null;
//新建Document对象
Document document = new Document();
//添加section
Section sec = document.addSection();
ByteArrayOutputStream os = new ByteArrayOutputStream();
//添加段落并写入HTML文本
sec.addParagraph().appendHTML(content);
document.saveToStream(os, FileFormat.Docx);
InputStream input = new ByteArrayInputStream(os.toByteArray());
//输出文件
FileOutputStream no=new FileOutputStream("D:\\"+fileName+".docx");
int len =-1;
byte []by = new byte[1024];
while((len = input.read(by))!=-1) {
no.write(by,0,len);
}
no.flush();
no.close();
input.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* @Description HTTP header GET请求图片地址 返回base64
* @param url 地址
* @param
*/
public static String doGetHeader(String url, Map headers) {
CloseableHttpResponse response = null;
try (CloseableHttpClient client = HttpClients.createDefault()){
HttpGet httpGet = new HttpGet(url);
Iterator> iterator = headers.entrySet().iterator();
//设置header信息
while(iterator.hasNext()) {
Map.Entry entry = iterator.next();
httpGet.setHeader(entry.getKey(), entry.getValue());
}
RequestConfig config = RequestConfig.custom().setConnectTimeout(60000)
.setConnectionRequestTimeout(60000)
.setSocketTimeout(60000)
.build();
httpGet.setConfig(config);
response = client.execute(httpGet);
HttpEntity entity = response.getEntity();
ByteArrayOutputStream out = new ByteArrayOutputStream();
entity.writeTo(out);
// ByteArrayOutputStream编码成base64字符串
String result = new String(Base64.getEncoder().encode(out.toByteArray()));
return result;
}catch(Exception e) {
System.out.println("httpClient请求图片url报错 " + e.getMessage());;
return null;
}finally {
try {
if(response != null) {
response.close();
}
}catch(Exception e) {
System.out.println("关闭响应流报错:"+e.getMessage());
}
}
}
}