POI读取word表格内容

poi读取word表格内容(.doc docx)

jar依赖

 
            org.apache.poi
            poi-ooxml
            3.9
        `
        `
            org.apache.poi
            poi-scratchpad
            3.9
        `
        `
            org.apache.poi
            poi-ooxml-schemas
            3.9
        

注意版本统一,避免包冲突
Bad type on operand stack

读取.docx 表格内容

public void readWork2007(File file) {
        long start = System.currentTimeMillis ();
        try (FileInputStream fin = new FileInputStream (file.getAbsolutePath ())) {
                System.out.println ("这是.docx文件,开始解析-----");
                //word 2007 图片不会被读取, 表格中的数据会被放在字符串的最后
                //得到word文档的信息
                XWPFDocument xwpf = new XWPFDocument (fin);
                //得到word中的表格
                Iterator it = xwpf.getTablesIterator ();
                // 设置需要读取的表格  set是设置需要读取的第几个表格,total是文件中表格的总数
                while (it.hasNext ()) {
                    XWPFTable table = it.next ();
                    //读取每一行数据
                    for (int i = 0; i < table.getNumberOfRows (); i++) {
                        XWPFTableRow row = table.getRow (i);
                        //读取每一列数据
                        List cells = row.getTableCells ();
                        for (XWPFTableCell cell : cells) {
                            //输出当前的单元格的数据

                            System.out.print (cell.getText () + "\t");
                        }
                        System.out.println ();
                    }

             …… 省略   

         } 

读取.doc 表格内容

public void readWord2003(File file){
   try (FileInputStream fin = new FileInputStream (file.getAbsolutePath ())) {
				 System.out.println ("这是.doc文件,------开始解析---------");
                POIFSFileSystem pfs = new POIFSFileSystem (fin);
                HWPFDocument hwpf = new HWPFDocument (pfs);
                //得到文档的读取范围
                Range range = hwpf.getRange ();
                TableIterator tableIterator = new TableIterator (range);
                List wordText= analysis (tableIterator);
                wordText.forEach (aa -> System.out.println (aa.toString ()));
     }
 ……省略
}

 private List analysis(TableIterator tablesIterator) {
        List wordTexts = new ArrayList<> ();
        while (tablesIterator.hasNext ()) {
            Table table = tablesIterator.next ();
            for (int i = 1; i < table.numRows (); i++) {
                TableRow row = table.getRow (i);
                // 这里只取 1 3 5 列单元格
           		  wordTexts .add( row.getCell(0).getText().tream());
				 wordTexts .add( row.getCell(2).getText().tream());
				 wordTexts .add( row.getCell(4).getText().tream());
            }
        }
        return wordTexts ;
    }

你可能感兴趣的:(java)