总结:当时业务需要支持doc和docx格式转化为html,当时参照了好多代码,也遇到了好多关于jar包的问题
到最后,就是jar包要么没引全,要不版本太低,主要是当时公司没用maven进行jar包管理。
所需的jar包,百度一下都能出来
doc转html
private CLS_VO_Result docToHtml(CLS_VO_File voFile) throws Exception{ CLS_VO_Result result = new CLS_VO_Result(); String fileName = voFile.getFileName(); // 目标文件,即带后缀的文件 String path = voFile.getBasePath(); // 要放进去的地址,将转换成的html放入的地址 String source = voFile.getSourcePath();// 源文件 word存在的地址 if (new File(path, fileName + ".html").exists()) { // 如果文件已存在 result.setRet(CLS_Easy7_Error.ERROR_OK); }else{ try{ if(!new File(source, fileName).exists()){ //如果原文件不存在 result.setRet(CLS_Easy7_Error.ERROR_PARAM); }else{ InputStream input = new FileInputStream(source + fileName); HWPFDocument wordDocument = new HWPFDocument(input); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter( DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()); wordToHtmlConverter.setPicturesManager(new PicturesManager() { @Override public String savePicture(byte[] content, PictureType pictureType,String suggestedName, float widthInches, float heightInches) { return suggestedName; } } ); wordToHtmlConverter.processDocument(wordDocument); List pics = wordDocument.getPicturesTable().getAllPictures(); if (pics != null) { for (int i = 0; i < pics.size(); i++) { Picture pic = (Picture) pics.get(i); try { pic.writeImageContent(new FileOutputStream(path + pic.suggestFullFileName())); } catch (FileNotFoundException e) { e.printStackTrace(); } } } Document htmlDocument = wordToHtmlConverter.getDocument(); ByteArrayOutputStream outStream = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(outStream); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); outStream.close(); String content = new String(outStream.toByteArray()); FileUtils.writeStringToFile(new File(path, fileName + ".html"), content, "utf-8"); result.setRet(CLS_Easy7_Error.ERROR_OK); } }catch(Exception e){ result.setRet(CLS_Easy7_Error.DB_ERROR_EXCEPTION); e.printStackTrace(); } } return result; }
docx转html
private CLS_VO_Result docxToHtml(CLS_VO_File voFile) throws Exception{ CLS_VO_Result result = new CLS_VO_Result();
String fileName = voFile.getFileName(); // 目标文件,即带后缀的文件 String path = voFile.getBasePath(); // 要放进去的地址,将转换成的html放入的地址 String source = voFile.getSourcePath();// 源文件 word存在的地址