1.pom.xml.主要依赖
<dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.10-FINAL</version> </dependency>
2.工具类
package com.exam.main; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.converter.PicturesManager; import org.apache.poi.hwpf.converter.WordToHtmlConverter; import org.apache.poi.hwpf.usermodel.Picture; import org.apache.poi.hwpf.usermodel.PictureType; import org.w3c.dom.Document; import javax.xml.XMLConstants; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import java.io.*; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.List; /** * Created by xin on 14/11/6. */ public class Main { private static void trySetSAXFeature(DocumentBuilderFactory documentBuilderFactory, String feature, boolean enabled) { try { documentBuilderFactory.setFeature(feature, enabled); } catch (Exception e) { e.printStackTrace(); } catch (AbstractMethodError ame) { ame.printStackTrace(); } } public static DocumentBuilderFactory getDocumentBuilderFactory() { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setExpandEntityReferences(false); trySetSAXFeature(factory, XMLConstants.FEATURE_SECURE_PROCESSING, true); trySetSAXFeature(factory, "http://xml.org/sax/features/external-general-entities", false); trySetSAXFeature(factory, "http://xml.org/sax/features/external-parameter-entities", false); trySetSAXFeature(factory, "http://apache.org/xml/features/nonvalidating/load-external-dtd", false); trySetSAXFeature(factory, "http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); return factory; } /** * 获取文件名(不含文件扩展名) * @param sourceFile 输入文件 * @return 返回的文件名 */ public static String getFileNameWithoutExtension(File sourceFile){ String filename=sourceFile.getName(); return filename.substring(0,filename.lastIndexOf('.')); } /** * doc转html(只支持doc格式,如果是docx,就要看XWPF).html文件与图片(如果有)都保存在doc文件所在目录. * @param docFile 输入的doc文件 * @param savePic 是否保存图片 * @throws Exception */ public static void docToHtml(final File docFile, boolean savePic) throws Exception { HWPFDocument hwpfDocument = new HWPFDocument(new FileInputStream(docFile)); Document newDocument = getDocumentBuilderFactory().newDocumentBuilder().newDocument(); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(newDocument); if (savePic) { List<Picture> pics=hwpfDocument.getPicturesTable().getAllPictures(); if(pics!=null&&pics.size()>0){ for(int i=0;i<pics.size();i++){ Picture pic = pics.get(i); pic.writeImageContent(new FileOutputStream(docFile.getParent()+"/"+pic.suggestFullFileName())); } } wordToHtmlConverter.setPicturesManager(new PicturesManager() { public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) { return suggestedName; } }); } wordToHtmlConverter.processDocument(hwpfDocument); StringWriter stringWriter = new StringWriter(); Transformer transformer = TransformerFactory.newInstance().newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); transformer.setOutputProperty(OutputKeys.METHOD, "html"); transformer.transform(new DOMSource(wordToHtmlConverter.getDocument()), new StreamResult(stringWriter)); FileChannel fileChannel = new FileOutputStream(docFile.getParent()+"/"+getFileNameWithoutExtension(docFile)+".html").getChannel(); fileChannel.write(ByteBuffer.wrap(stringWriter.toString().getBytes())); fileChannel.close(); } public static void main(String[] args) throws Exception { File file=new File("C:\\Users\\xin\\Desktop\\werwr\\1.doc"); docToHtml(file, true); } }