java实现在线预览----poi操作word转html及03、07版本兼容问题

总结:当时业务需要支持doc和docx格式转化为html,当时参照了好多代码,也遇到了好多关于jar包的问题

到最后,就是jar包要么没引全,要不版本太低,主要是当时公司没用maven进行jar包管理。

所需的jar包,百度一下都能出来

doc转html

	private CLS_VO_Result docToHtml(CLS_VO_File voFile) throws Exception{
		CLS_VO_Result result = new CLS_VO_Result();
		String fileName = voFile.getFileName(); // 目标文件,即带后缀的文件
		String path = voFile.getBasePath(); // 要放进去的地址,将转换成的html放入的地址
		String source = voFile.getSourcePath();// 源文件 word存在的地址
		if (new File(path, fileName + ".html").exists()) { // 如果文件已存在
			result.setRet(CLS_Easy7_Error.ERROR_OK);
		}else{
			try{
				if(!new File(source,  fileName).exists()){ //如果原文件不存在
					result.setRet(CLS_Easy7_Error.ERROR_PARAM);
				}else{
					InputStream input = new FileInputStream(source + fileName);
					HWPFDocument wordDocument = new HWPFDocument(input);
					WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
						DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
						wordToHtmlConverter.setPicturesManager(new PicturesManager() {
							@Override
							public String savePicture(byte[] content, PictureType pictureType,String suggestedName, float widthInches, float heightInches) {
								return suggestedName;
							}
						}
					);
					wordToHtmlConverter.processDocument(wordDocument);
					List pics = wordDocument.getPicturesTable().getAllPictures();
					if (pics != null) {
						for (int i = 0; i < pics.size(); i++) {
							Picture pic = (Picture) pics.get(i);
							try {
								pic.writeImageContent(new FileOutputStream(path + pic.suggestFullFileName()));
							} catch (FileNotFoundException e) {
								e.printStackTrace();
							}
						}
					 }
					 Document htmlDocument = wordToHtmlConverter.getDocument();
					 ByteArrayOutputStream outStream = new ByteArrayOutputStream();
					 DOMSource domSource = new DOMSource(htmlDocument);
					 StreamResult streamResult = new StreamResult(outStream);
					 TransformerFactory tf = TransformerFactory.newInstance();
					 Transformer serializer = tf.newTransformer();
					 serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
					 serializer.setOutputProperty(OutputKeys.INDENT, "yes");
					 serializer.setOutputProperty(OutputKeys.METHOD, "html");
					 serializer.transform(domSource, streamResult);
					 outStream.close();
					 String content = new String(outStream.toByteArray());
					 FileUtils.writeStringToFile(new File(path,  fileName + ".html"), content, "utf-8");
					 result.setRet(CLS_Easy7_Error.ERROR_OK);
				}
			}catch(Exception e){
				result.setRet(CLS_Easy7_Error.DB_ERROR_EXCEPTION);
				e.printStackTrace();
			}
		}
		return result;
	}
docx转html
private CLS_VO_Result docxToHtml(CLS_VO_File voFile) throws Exception{
		CLS_VO_Result result = new CLS_VO_Result();
		
String fileName = voFile.getFileName(); // 目标文件,即带后缀的文件
		String path = voFile.getBasePath(); // 要放进去的地址,将转换成的html放入的地址
		String source = voFile.getSourcePath();// 源文件 word存在的地址

if (new File(path, fileName + ".html").exists()) { // 如果文件已存在
result.setRet(CLS_Easy7_Error.ERROR_OK);
} else {
try {
if (!new File(source, fileName).exists()) { // 如果原文件不存在
result.setRet(CLS_Easy7_Error.ERROR_PARAM);
} else {
// 读取文档内容
InputStream in = new FileInputStream(source + fileName);
XWPFDocument document = new XWPFDocument(in);
File imageFolderFile = new File(path);
// 加载html页面时图片路径
XHTMLOptions options = XHTMLOptions.create().URIResolver(new BasicURIResolver("./"));
// 图片保存文件夹路径
options.setExtractor(new FileImageExtractor(imageFolderFile));
OutputStream out = new FileOutputStream(new File(path,fileName + ".html"));
XHTMLConverter.getInstance().convert(document, out, options);
out.close();
result.setRet(CLS_Easy7_Error.ERROR_OK);
}
}catch (Exception e) {
result.setRet(CLS_Easy7_Error.DB_ERROR_EXCEPTION);
e.printStackTrace();
}
}
return result;

 
 

你可能感兴趣的:(java)