将word转化为html,并读取html内容

/**
     * doc转为html
     * @param sourceFileName    源文件路径
     * @param htmlPath  目标文件路径
     * @return
     * @throws Exception
     */
    public static boolean docToHtml(String  sourceFileName, String htmlPath) throws Exception {
        HWPFDocumentCore wordDocument =  WordToHtmlUtils.loadDoc(new FileInputStream(sourceFileName));
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        wordToHtmlConverter.processDocument(wordDocument);
        Document htmlDocument = wordToHtmlConverter.getDocument();
        OutputStream out = new FileOutputStream(new File(htmlPath));
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(out);
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer  = tf.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        out.close();
        return true;
    }

    /**
     * 将docx转为html
     * @param sourceFileName 源文件路径
     * @param fileUrl 目标文件路径
     * @return
     * @throws Exception
     */
    public static boolean docxToHtml(String sourceFileName,String fileUrl) throws Exception{

         File wordFile = new File(sourceFileName).getAbsoluteFile(), htmlFile = new File(fileUrl);
        try {
             InputStream inputStream = new FileInputStream(wordFile);// 输入流
             XWPFDocument document = new XWPFDocument(inputStream);// 读取word文档
            inputStream.close();// 关闭输入流
             XHTMLOptions options = XHTMLOptions.create();// 创建选项
//            options.setImageManager(new ImageManager(wordFile.getParentFile(), "PoiImages"));// 设置图片文件夹保存的路径以及文件夹名称
             OutputStream outputStream = new FileOutputStream(htmlFile);// 输出流
            XHTMLConverter.getInstance().convert(document, outputStream, options);// word文档转html
            outputStream.close();// 关闭输出流
            document.close();// 关闭文档
        } catch (Exception e) {
            e.printStackTrace();
            return false;
        }
        return true;
    }


依赖
  
        
            commons-logging
            commons-logging
            1.2
        
        
            log4j
            log4j
            1.2.17
        
        
        
            commons-codec
            commons-codec
            1.10
        
        
            org.apache.commons
            commons-collections4
            4.1
        
        
            org.apache.poi
            poi
            3.17
        
        
            org.apache.poi
            poi-examples
            3.17
        
        
            org.apache.poi
            poi-excelant
            3.17
        
        
            org.apache.poi
            poi-ooxml
            3.17
        
        
            org.apache.xmlbeans
            xmlbeans
            2.6.0
        
        
            org.apache.poi
            ooxml-schemas
            1.3
        
        
            org.apache.poi
            poi-scratchpad
            3.17
        
        
            fr.opensagres.xdocreport
            fr.opensagres.poi.xwpf.converter.core
            2.0.1
        
        
            fr.opensagres.xdocreport
            fr.opensagres.poi.xwpf.converter.xhtml
            2.0.1
        
        
        
            commons-io
            commons-io
            2.5
        

 

你可能感兴趣的:(java)