poi将word2007转换成html

1:导入jar包依赖

       
           org.apache.poi
           poi-scratchpad
           3.10.1
       
      
           org.apache.xmlbeans
           xmlbeans
           2.6.0
      
      
           fr.opensagres.xdocreport
           org.apache.poi.xwpf.converter.core
           1.0.6
      
      
           fr.opensagres.xdocreport
           org.apache.poi.xwpf.converter.xhtml
           1.0.6
              

2:创建工具类,便于后面直接调用即可

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.FileURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

public class WordtoHtml07 {
   
   public static void word07ToHtml(String fileName ,String imageFile , String htmFile) throws IOException{
       File f = new File(fileName);
       if (!f.exists()) {
           System.out.println("sorry file does not exists");
       }else{
           if (f.getName().endsWith(".docx")|| f.getName().endsWith(".DOCX") || f.getName().endsWith(".doc")) {
               //1:加载文档到XWPFDocument
               InputStream in = new FileInputStream(f);
               XWPFDocument document = new XWPFDocument(in);
               //2:加载图片到指定文件夹
               File imgFile = new File(imageFile);
               XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(imgFile));
               options.setExtractor(new FileImageExtractor(imgFile));
               
               //3:转换XWPFDocument to XHTML 
                 OutputStream out = new FileOutputStream(new File(htmFile));  
                   XHTMLConverter.getInstance().convert(document, out, options);  
           }else{
                 System.out.println("Enter only MS Office 2007+ files");           
           }           
       }           
   }      
   public static void main(String args[]) {  
       try {           
           word07ToHtml("F:/51/1.doc","F:/51/media","F:/51/1.htm");
       } catch (IOException e) {  
           // TODO Auto-generated catch block  
           e.printStackTrace();  
       }  
   }   
}

附:简单的实现了word 转换成html ,欢迎各路大神补充在项目中遇到的其他需求,完善工具类的封装,谢谢

你可能感兴趣的:(poi将word2007转换成html)