java 富文本html 转 word(带图片处理


1.配置依赖

   e-iceblue
   spire.doc.free
   3.9.0


   org.jsoup
   jsoup
   1.14.2

		
			org.apache.httpcomponents
			httpclient
			4.5.13
		
因e-iceblue在阿里云maven仓库中找不到,需配置特定仓库


   com.e-iceblue
   https://repo.e-iceblue.cn/repository/maven-public/

2.代码demo
import com.spire.doc.Document;
import com.spire.doc.FileFormat;
import com.spire.doc.Section;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpEntity;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.*;
import java.util.Base64;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

public class TestHtml2Doc {
    public static void main(String[] args) {
        long start = System.currentTimeMillis();
        String str="";
        try {
			//富文本内容在本地文件中,根据具体情况改成从数据库或网络中获取资源
            FileInputStream in=new FileInputStream("D:\\oo.txt");
            // size 为字串的长度 ,这里一次性读完
            int size=in.available();
            byte[] buffer=new byte[size];
            in.read(buffer);
            in.close();
            str=new String(buffer,"utf-8");
        } catch (IOException e) {
            e.printStackTrace();
        }
        org.jsoup.nodes.Document document = Jsoup.parse(str);

        Element body = document.body();
        // 获取body里面的所有子元素
        Elements links = body.children();
        try {
            for (Element link : links) {
                Elements img = link.select("img");
                String src = img.attr("src");
                if(StringUtils.isNotBlank(src) ) {
                    // 处理图片 因有些图片资源带有防盗链,需加上Referer
                    Map map=new HashMap() ;
                    map.put("Referer","http://aa.com.cn");

                    String string=doGetHeader(src,map);
                    img.attr("src","data:image/png;base64,"+string);
                    System.out.println("src = " + src);
                    System.out.println("Basestring = " + string);

                }
            }
        } catch (Exception e) {
            System.out.println("转换url图片报错了:"+e.getMessage());
        }

        String html = document.html();
		//样式调整
        html=html.replace("class=\"ql-align-center\"","style=\"text-align:center\"").replace("

\n" + "

","

\n" + "

  "); //System.out.println("html = " + html); exportWord(html,"590"); long end = System.currentTimeMillis(); System.out.println("(end-start) = " + (end-start)/1000); } public static void exportWord( String content, String fileName) { try { HttpServletRequest request=null; HttpServletResponse response=null; //新建Document对象 Document document = new Document(); //添加section Section sec = document.addSection(); ByteArrayOutputStream os = new ByteArrayOutputStream(); //添加段落并写入HTML文本 sec.addParagraph().appendHTML(content); document.saveToStream(os, FileFormat.Docx); InputStream input = new ByteArrayInputStream(os.toByteArray()); //输出文件 FileOutputStream no=new FileOutputStream("D:\\"+fileName+".docx"); int len =-1; byte []by = new byte[1024]; while((len = input.read(by))!=-1) { no.write(by,0,len); } no.flush(); no.close(); input.close(); } catch (Exception e) { e.printStackTrace(); } } /** * @Description HTTP header GET请求图片地址 返回base64 * @param url 地址 * @param */ public static String doGetHeader(String url, Map headers) { CloseableHttpResponse response = null; try (CloseableHttpClient client = HttpClients.createDefault()){ HttpGet httpGet = new HttpGet(url); Iterator> iterator = headers.entrySet().iterator(); //设置header信息 while(iterator.hasNext()) { Map.Entry entry = iterator.next(); httpGet.setHeader(entry.getKey(), entry.getValue()); } RequestConfig config = RequestConfig.custom().setConnectTimeout(60000) .setConnectionRequestTimeout(60000) .setSocketTimeout(60000) .build(); httpGet.setConfig(config); response = client.execute(httpGet); HttpEntity entity = response.getEntity(); ByteArrayOutputStream out = new ByteArrayOutputStream(); entity.writeTo(out); // ByteArrayOutputStream编码成base64字符串 String result = new String(Base64.getEncoder().encode(out.toByteArray())); return result; }catch(Exception e) { System.out.println("httpClient请求图片url报错 " + e.getMessage());; return null; }finally { try { if(response != null) { response.close(); } }catch(Exception e) { System.out.println("关闭响应流报错:"+e.getMessage()); } } } }

你可能感兴趣的:(工具,java,word,富文本)