POI将word转化为html

参考资料

1.POI包依赖:https://poi.apache.org/components/index.html
2.包版本问题:https://bbs.csdn.net/topics/392208805 并没有使用其中的3.9的版本,使用的为3.13
最开始使用的为3.17的版本,但在转为成html中出现错误:java.lang.NoSuchMethodError,使用3.9版本也出现了类似的问题:
java.lang.NoSuchMethodError:org.apache.poi.POIXMLDocumentPart.getPackageRelationship()
3.整体的参考:http://www.cnblogs.com/always-online/p/4800131.html
4.在发布到服务器上时,图片路径问题标签的src:
https://blog.csdn.net/B888888888888/article/details/78113527?locationNum=7&fps=1

相关说明:3.17版本在excel设置样式是API发生了变化
pom.xml—对应包


<dependency>
    <groupId>org.apache.commonsgroupId>
    <artifactId>commons-math3artifactId>
    <version>3.6.1version>
dependency>
<dependency>
    <groupId>org.apache.xmlbeansgroupId>
    <artifactId>xmlbeansartifactId>
    <version>2.6.0version>
dependency>
<dependency>
    <groupId>commons-codecgroupId>
    <artifactId>commons-codecartifactId>
    <version>1.10version>
dependency>
<dependency>
    <groupId>commons-logginggroupId>
    <artifactId>commons-loggingartifactId>
    <version>1.2version>
dependency>

<dependency>
    <groupId>org.apache.commonsgroupId>
    <artifactId>commons-collections4artifactId>
    <version>4.0version>
dependency>

<dependency>
    <groupId>com.github.virtualdgroupId>
    <artifactId>curvesapiartifactId>
    <version>1.04version>
dependency>

<dependency>
    <groupId>org.apache.poigroupId>
    <artifactId>poiartifactId>
    <version>3.13version>
dependency>

<dependency>
    <groupId>org.apache.poigroupId>
    <artifactId>poi-scratchpadartifactId>
    <version>3.13version>
dependency>

<dependency>
    <groupId>org.apache.poigroupId>
    <artifactId>poi-ooxml-schemasartifactId>
    <version>3.13version>
dependency>


<dependency>
    <groupId>org.apache.poigroupId>
    <artifactId>poi-ooxmlartifactId>
    <version>3.13version>
dependency>   


<dependency>
    <groupId>org.apache.poigroupId>
    <artifactId>poi-examplesartifactId>
    <version>3.13version>
dependency>

<dependency>
    <groupId>org.apache.poigroupId>
    <artifactId>poi-excelantartifactId>
    <version>3.13version>
dependency>

<dependency>
    <groupId>fr.opensagres.xdocreportgroupId>
    <artifactId>org.apache.poi.xwpf.converter.coreartifactId>
    <version>1.0.6version>
dependency>
<dependency>
     <groupId>fr.opensagres.xdocreportgroupId>
    <artifactId>fr.opensagres.xdocreport.documentartifactId>
    <version>1.0.6version>
dependency>

<dependency>
    <groupId>fr.opensagres.xdocreportgroupId>
    <artifactId>org.apache.poi.xwpf.converter.xhtmlartifactId>
    <version>1.0.6version>
dependency>`

代码如下:

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.converter.WordToHtmlUtils;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.converter.core.BasicURIResolver;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

public class WordToHtml {
    /**
     * doc转为html
     * @param ins
     * @param imageAbsolutePath:存在本机的完整路径
     * @param webImagePath:html上的img标签的src地址
     * @param htmlPath:存在本机上的html路径
     * @throws IOException
     * @throws ParserConfigurationException
     * @throws TransformerException
     */
    public static void docToHtml(InputStream ins,String imageAbsolutePath,String webImagePath, String htmlPath) throws IOException, ParserConfigurationException, TransformerException{
        HWPFDocumentCore wordDocument =  WordToHtmlUtils.loadDoc(ins);

        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        //设置图片存放的位置
        wordToHtmlConverter.setPicturesManager(new PicturesManager() {

            @Override
            public String savePicture(byte[] content, PictureType pictureType,
                    String suggestedName, float widthInches, float heightInches) {
                File file = new File(imageAbsolutePath + suggestedName);
                try {
                    OutputStream out = new FileOutputStream(file);
                    out.write(content);
                    out.close();
                } catch (FileNotFoundException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
                //webImagePath为最终html中img标签要读取的服务器上的地址,如"/csdn/a.png"/>
                //改路径自己设定,不要使用imageAbsolutePath路径,该路径为存本地时的完整路径
                return webImagePath + suggestedName;
            }
        });;
        wordToHtmlConverter.processDocument(wordDocument);
        org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();
        OutputStream out = new FileOutputStream(new File(htmlPath));
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(out);

        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer  = tf.newTransformer();

        serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        out.close();
    }

    /**
     * 将docx转为html
     * @param ins
     * @param imageUrl 存储在本机上的图路径
     * @param webImagePath html上img标签src值,其在生成图片时会在你设定的这个文件夹下自动产生/word/media文件夹
     * @param fileUrl
     * @throws IOException
     */
    public static void docxToHtml(InputStream ins,String imageUrl,String webImagePath,String fileUrl) throws IOException{

        //1:加载文档到XWPFDocument
        XWPFDocument document = new XWPFDocument(ins);
        //2:加载图片到指定文件夹
        File imgFile = new File(imageUrl);
        XHTMLOptions options = XHTMLOptions.create();
        options.setExtractor(new FileImageExtractor(imgFile));

        //使用相对路径时,使用BasicURIResolver,使用绝对路径可以使用FileURIResolver
        options.URIResolver(new BasicURIResolver(webImagePath));
        options.setIgnoreStylesIfUnused(false);
        options.setFragment(true);
        //3:转换XWPFDocument to XHTML 
        OutputStream out = new FileOutputStream(new File(fileUrl));  
        XHTMLConverter.getInstance().convert(document, out, options); 

    }
}

你可能感兴趣的:(Java)