POI实现html和word的相互转换,可以实现图片转换

项目后端使用maven,前端使用了富文本编辑器。目前从html转换的word为doc格式,也能将图片进行处理。网上的一些例子好多都是图片无法解析,这个地方千万要注意对图片的路径进行解析,src必须是全路径 域名+图片的正式路径,如果您的需求对图片的大小有限制的话也要修改img的style属性,如果修改完没有效果将style去掉直接加上宽和高。
一.添加maven依赖

	
	    org.apache.poi
	    poi-scratchpad
	    3.14
	

	
	    org.apache.poi
	    poi-ooxml
	    3.14
	
	
	
	    fr.opensagres.xdocreport
	    xdocreport
	    1.0.6
	
	
	
	    org.apache.poi
	    poi-ooxml-schemas
	    3.14
	
	
	
	    org.apache.poi
	    ooxml-schemas
	    1.3
	

	
	    org.jsoup
	    jsoup
	    1.11.3
	
	
	    org.apache.poi
	    poi
	    3.14
	

二.word转换为html
public static String docToHtml() throws Exception {
File path = new File(ResourceUtils.getURL(“classpath:”).getPath());
String imagePathStr = path.getAbsolutePath() + “\static\image\”;
String sourceFileName = path.getAbsolutePath() + “\static\test.doc”;
String targetFileName = path.getAbsolutePath() + “\static\test2.html”;
File file = new File(imagePathStr);
if(!file.exists()) {
file.mkdirs();
}
HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(sourceFileName));
org.w3c.dom.Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
//保存图片,并返回图片的相对路径
wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {
try (FileOutputStream out = new FileOutputStream(imagePathStr + name)) {
out.write(content);
} catch (Exception e) {
e.printStackTrace();
}
return “image/” + name;
});
wordToHtmlConverter.processDocument(wordDocument);
org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(new File(targetFileName));
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, “utf-8”);
serializer.setOutputProperty(OutputKeys.INDENT, “yes”);
serializer.setOutputProperty(OutputKeys.METHOD, “html”);
serializer.transform(domSource, streamResult);
return targetFileName;
docx格式转换为html
public static String docxToHtml() throws Exception {
File path = new File(ResourceUtils.getURL(“classpath:”).getPath());
String imagePath = path.getAbsolutePath() + “\static\image”;
String sourceFileName = path.getAbsolutePath() + “\static\test.docx”;
String targetFileName = path.getAbsolutePath() + “\static\test.html”;

OutputStreamWriter outputStreamWriter = null;
try {
    XWPFDocument document = new XWPFDocument(new FileInputStream(sourceFileName));
    XHTMLOptions options = XHTMLOptions.create();
    // 存放图片的文件夹
    options.setExtractor(new FileImageExtractor(new File(imagePath)));
    // html中图片的路径
    options.URIResolver(new BasicURIResolver("image"));
    outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFileName), "utf-8");
    XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
    xhtmlConverter.convert(document, outputStreamWriter, options);
} finally {
    if (outputStreamWriter != null) {
        outputStreamWriter.close();
    }
}
return targetFileName;

}
三.html转换为word**(亲测可用)**
实现的思路:首先将需要的数据拼接成html代码,因为富文本编辑器最终也是生成的前端代码,所以这个位置比较好处理,如果服务本编辑器中有图片的话需要对图片的路径和大小进行处理,网上很多的帖子取不到图片或者图片大小不能保证的话,基本上都是这个地方的问题。
下边直接上代码:
package com.bupticet.education.lab.utils;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang.StringUtils;
import org.apache.derby.tools.sysinfo;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.struts2.ServletActionContext;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.util.ResourceUtils;
import org.springframework.web.util.HtmlUtils;

public class Snippet {
public String writeWordFile(String content,String url) {
String path = ServletActionContext.getServletContext().getRealPath("/userfiles/image/");
Map param = new HashMap();

        if (!"".equals(path)) {
            File fileDir = new File(path);
            if (!fileDir.exists()) {
                fileDir.mkdirs();
            }
            content = HtmlUtils.htmlUnescape(content);
            List> imgs = getImgStr(content);
            int count = 0;
          
            try {
                // 生成doc格式的word文档,需要手动改为docx
                byte by[] = content.getBytes("UTF-8");
                ByteArrayInputStream bais = new ByteArrayInputStream(by);
                POIFSFileSystem poifs = new POIFSFileSystem();
                DirectoryEntry directory = poifs.getRoot();
                DocumentEntry documentEntry = directory.createDocument("WordDocument", bais);
                FileOutputStream ostream = new FileOutputStream(url+".doc");
                poifs.writeFilesystem(ostream);
                bais.close();
                ostream.close();
 
            } catch (Exception e) {
                e.printStackTrace();
            }

        }
        return "success";
    }

    //获取html中的图片元素信息
    public  List> getImgStr(String htmlStr) {
        List> pics = new ArrayList>();

        Document doc = Jsoup.parse(htmlStr);
        Elements imgs = doc.select("img");
        for (Element img : imgs) {
            HashMap map = new HashMap();
            if(!"".equals(img.attr("width"))) {
                map.put("width", img.attr("width").substring(0, img.attr("width").length() - 2));
            }
            if(!"".equals(img.attr("height"))) {
                map.put("height", img.attr("height").substring(0, img.attr("height").length() - 2));
            }
            map.put("img", img.toString().substring(0, img.toString().length() - 1) + "/>");
            map.put("img1", img.toString());
            map.put("src", img.attr("src"));
            pics.add(map);
        }
        return pics;
    }
    
    
    
    
    
    /**
     * 解析html文件
     * @param file
     * @return
     */
    public   String readHtml(File file){
        String body = "";
        try {
            FileInputStream iStream = new FileInputStream(file);
            Reader reader = new InputStreamReader(iStream);
            BufferedReader htmlReader = new BufferedReader(reader);
                        
            String line;
            boolean found = false;
            while (!found && (line = htmlReader.readLine()) != null) {
                if (line.toLowerCase().indexOf("的前面可能存在空格
                    found = true;
                }
            }
            
            found = false;
            while (!found && (line = htmlReader.readLine()) != null) {
                if (line.toLowerCase().indexOf("元素,则分行进行替代
                        String[] splitLines = line.split("元素
     * @return 文件名
     */
    public static String extractFilename(String htmlLine) {
        int srcIndex = htmlLine.toLowerCase().indexOf("src=");
        if (srcIndex == -1) { // 图片不存在,返回空字符串
            return "";
        } else {
            String htmlSrc = htmlLine.substring(srcIndex + 4);
            char splitChar = '\"'; // 默认为双引号,但也有可能为单引号
            if (htmlSrc.charAt(0) == '\'') {
                splitChar = '\'';
            } 
            String[] firstSplit = htmlSrc.split(String.valueOf(splitChar));
            String path = firstSplit[1]; // 第0位为空字符串
            String[] secondSplit = path.split("[/\\\\]"); // 匹配正斜杠或反斜杠
            return secondSplit[secondSplit.length - 1];
        }
    }
    
    
    
   /* public static void main(String[] args) {
    	String path = "C:\\Users\\Administrator\\Desktop\\图片\\xxx.html";
    	File file = new File(path);
    	//body
    	String readHtml = readHtml(file);
    	System.out.println(readHtml+"--------------------------------");
    	String writeWordFile = writeWordFile(readHtml);
    	System.out.println(writeWordFile);
	}*/

}
2.工具类
package com.bupticet.education.lab.utils;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.poi.POIXMLDocument;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.poi.xwpf.usermodel.XWPFTable;
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
import org.apache.poi.xwpf.usermodel.XWPFTableRow;

/**

  • 适用于word 2007
    */
    public class OfficeUtil {

    /**

    • 根据指定的参数值、模板,生成 word 文档

    • @param param 需要替换的变量

    • @param template 模板
      */
      public static CustomXWPFDocument generateWord(Map param, String template) {
      CustomXWPFDocument doc = null;
      try {
      OPCPackage pack = POIXMLDocument.openPackage(template);
      doc = new CustomXWPFDocument(pack);
      if (param != null && param.size() > 0) {

           //处理段落  
           List paragraphList = doc.getParagraphs();  
           processParagraphs(paragraphList, param, doc);  
      
           //处理表格  
           Iterator it = doc.getTablesIterator();  
           while (it.hasNext()) {
               XWPFTable table = it.next();  
               List rows = table.getRows();  
               for (XWPFTableRow row : rows) {  
                   List cells = row.getTableCells();  
                   for (XWPFTableCell cell : cells) {  
                       List paragraphListTable =  cell.getParagraphs();  
                       processParagraphs(paragraphListTable, param, doc);  
                   }  
               }  
           }  
       }  
      

      } catch (Exception e) {
      e.printStackTrace();
      }
      return doc;
      }
      /**

    • 处理段落

    • @param paragraphList
      /
      public static void processParagraphs(List paragraphList,Map param,CustomXWPFDocument doc){
      if(paragraphList != null && paragraphList.size() > 0){
      for(XWPFParagraph paragraph:paragraphList){
      //poi转换过来的行间距过大,需要手动调整
      if(paragraph.getSpacingBefore() >= 1000 || paragraph.getSpacingAfter() > 1000) {
      paragraph.setSpacingBefore(0);
      paragraph.setSpacingAfter(0);
      }
      //设置word中左右间距
      paragraph.setIndentationLeft(0);
      paragraph.setIndentationRight(0);
      List runs = paragraph.getRuns();
      //加了图片,修改了paragraph的runs的size,所以循环不能使用runs
      List allRuns = new ArrayList(runs);
      for (XWPFRun run : allRuns) {
      String text = run.getText(0);
      if(text != null){
      boolean isSetText = false;
      for (Entry entry : param.entrySet()) {
      String key = entry.getKey();
      if(text.indexOf(key) != -1){
      isSetText = true;
      Object value = entry.getValue();
      if (value instanceof String) {//文本替换
      text = text.replace(key, value.toString());
      } else if (value instanceof Map) {//图片替换
      text = text.replace(key, “”);
      Map pic = (Map)value;
      int width = Integer.parseInt(pic.get(“width”).toString());
      int height = Integer.parseInt(pic.get(“height”).toString());
      int picType = getPictureType(pic.get(“type”).toString());
      byte[] byteArray = (byte[]) pic.get(“content”);
      ByteArrayInputStream byteInputStream = new ByteArrayInputStream(byteArray);
      try {
      String blipId = doc.addPictureData(byteInputStream,picType);
      doc.createPicture(blipId,doc.getNextPicNameNumber(picType), width, height,paragraph);
      } catch (Exception e) {
      e.printStackTrace();
      }
      }
      }
      }
      if(isSetText){
      run.setText(text,0);
      }
      }
      }
      }
      }
      }
      /
      *

    • 根据图片类型,取得对应的图片类型代码

    • @param picType

    • @return int
      /
      private static int getPictureType(String picType){
      int res = CustomXWPFDocument.PICTURE_TYPE_PICT;
      if(picType != null){
      if(picType.equalsIgnoreCase(“png”)){
      res = CustomXWPFDocument.PICTURE_TYPE_PNG;
      }else if(picType.equalsIgnoreCase(“dib”)){
      res = CustomXWPFDocument.PICTURE_TYPE_DIB;
      }else if(picType.equalsIgnoreCase(“emf”)){
      res = CustomXWPFDocument.PICTURE_TYPE_EMF;
      }else if(picType.equalsIgnoreCase(“jpg”) || picType.equalsIgnoreCase(“jpeg”)){
      res = CustomXWPFDocument.PICTURE_TYPE_JPEG;
      }else if(picType.equalsIgnoreCase(“wmf”)){
      res = CustomXWPFDocument.PICTURE_TYPE_WMF;
      }
      }
      return res;
      }
      /
      *

    • 将输入流中的数据写入字节数组

    • @param in

    • @return
      */
      public static byte[] inputStream2ByteArray(InputStream in,boolean isClose){
      byte[] byteArray = null;
      try {
      int total = in.available();
      byteArray = new byte[total];
      in.read(byteArray);
      } catch (IOException e) {
      e.printStackTrace();
      }finally{
      if(isClose){
      try {
      in.close();
      } catch (Exception e2) {
      System.out.println(“关闭流失败”);
      }
      }
      }
      return byteArray;
      }
      }
      3.工具类

package com.bupticet.education.lab.utils;

import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlToken;
import org.openxmlformats.schemas.drawingml.x2006.main.CTNonVisualDrawingProps;
import org.openxmlformats.schemas.drawingml.x2006.main.CTPositiveSize2D;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTInline;

/**

  • 自定义 XWPFDocument,并重写 createPicture()方法
    */
    public class CustomXWPFDocument extends XWPFDocument {
    public CustomXWPFDocument(InputStream in) throws IOException {
    super(in);
    }

    public CustomXWPFDocument() {
    super();
    }

    public CustomXWPFDocument(OPCPackage pkg) throws IOException {
    super(pkg);
    }

    /**

    • @param ind

    • @param width 宽

    • @param height 高

    • @param paragraph 段落
      */
      public void createPicture(String blipId, int ind, int width, int height,XWPFParagraph paragraph) {
      final int EMU = 9525;
      width *= EMU;
      height *= EMU;
      CTInline inline = paragraph.createRun().getCTR().addNewDrawing().addNewInline();
      String picXml = “”
      + “"
      + " "
      + " "
      + " pic:nvPicPr” + " + ind
      + “” name=“Generated”/>”
      + " pic:cNvPicPr/"
      + " "
      + " pic:blipFill"
      + " + blipId
      + “” xmlns:r=“http://schemas.openxmlformats.org/officeDocument/2006/relationships"/>"
      + "
      + " "
      + "
      "
      + " "
      + " pic:spPr"
      + " "
      + " "
      + " + width
      + “” cy=""
      + height
      + “”/>"
      + "
      "
      + " "
      + " "
      + "
      "
      + " "
      + "
      "
      + "
      " + “
      ”;

      inline.addNewGraphic().addNewGraphicData();
      XmlToken xmlToken = null;
      try {
      xmlToken = XmlToken.Factory.parse(picXml);
      } catch (XmlException xe) {
      xe.printStackTrace();
      }
      inline.set(xmlToken);

      inline.setDistT(0);
      inline.setDistB(0);
      inline.setDistL(0);
      inline.setDistR(0);

      CTPositiveSize2D extent = inline.addNewExtent();
      extent.setCx(width);
      extent.setCy(height);

      CTNonVisualDrawingProps docPr = inline.addNewDocPr();
      docPr.setId(ind);
      docPr.setName(“图片” + ind);
      docPr.setDescr(“测试”);
      }
      }

4.调用
调用时将HTML代码进行拼接,对图片属性进行修改

你可能感兴趣的:(后端)