poi读取word(doc,docx)表格,针对自己的表格合并单元格

image.png
package com.zt.haide.util.easyWord;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.*;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.usermodel.*;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

public class ReadWordTable {
    public static void main(String[] args) {
        String path = "C:\\Users\\Administrator\\Desktop\\lzq1.doc";
        List> lists = readWord(path);
        lists.forEach(list->{
            System.out.println("-------");
            list.forEach(str-> System.out.println(str));});
    }


    /**
     * 读word的表格
     * @param path
     * @return
     */
    public static List> readWord(String path){
        List> list = new ArrayList<>();
        try {
            File file = new File(path);
            HashMap map = new HashMap<>();
            map.put("str1", "");
            map.put("str2", "");
            map.put("str3", "");
            map.put("str4", "");
            InputStream is = new FileInputStream(file);
            if(path.toLowerCase().endsWith("docx")) {
            XWPFDocument docx = new XWPFDocument(is);
                List tables = docx.getTables();
                //遍历word内的表格
                for (int i = 0; i < 1; i++) {
                    XWPFTable table = tables.get(i);
                    //读取每一行数据

                    for (int j = 0; j < table.getNumberOfRows(); j++) {
                        if (j >= 1) {

                            List stringList = new ArrayList<>();
                            //读取表格的某一行的所有数据
                            XWPFTableRow row = table.getRow(j);
                            //读取每一列的数据
                            List cells = row.getTableCells();
                            String text = "";
                            for (int k = 0; k < 4; k++) {
                                XWPFTableCell cell = cells.get(k);
                                List paragraphs1 = cell.getParagraphs();
                                text = "";
                                if (paragraphs1.size() <= 1) {
                                    //一格一条数据
                                    text = cell.getText();
                                } else if (paragraphs1.size() > 1) {
                                    //一格多条数据
                                    //表格内含有多个人的时候处理
                                    text = "";
                                    for (int h = 0; h < paragraphs1.size(); h++) {
                                        XWPFParagraph xwpfParagraph = paragraphs1.get(h);
                                        List runs = xwpfParagraph.getRuns();
//                                    if (h > 0){
//                                        text += ",";
//                                    }
                                        if (runs.size() > 1) {
                                            for (int b = 0; b < runs.size(); b++) {
                                                XWPFRun xwpfRun = runs.get(b);
                                                text += xwpfRun.getText(0);
                                            }
                                        } else {
                                            for (XWPFRun run : runs) {
                                                text += run.getText(0);
                                            }
                                        }
                                    }
                                }

                                if (("").equals(text)) {
                                    String s = map.get("str" + k);
                                    text = s;
                                }
                                map.put("str" + k, text);
                                stringList.add(text);
                            }
                            list.add(stringList);
                        }
                    }
                }
            }else {

                // 处理doc格式 即office2003版本
                POIFSFileSystem pfs = new POIFSFileSystem(is);
                HWPFDocument hwpf = new HWPFDocument(pfs);
                Range range = hwpf.getRange();//得到文档的读取范围
                TableIterator it = new TableIterator(range);
                // 迭代文档中的表格
                // 如果有多个表格只读取需要的一个 set是设置需要读取的第几个表格,total是文件中表格的总数
                int set = 1, total = 4;
                int num = set;
                for (int i = 0; i < set - 1; i++) {
                    it.hasNext();
                    it.next();
                }
                while (it.hasNext()) {
                    Table tb = (Table) it.next();
                    //迭代行,默认从0开始,可以依据需要设置i的值,改变起始行数,也可设置读取到那行,只需修改循环的判断条件即可
                    for (int i = 1; i < tb.numRows(); i++) {
                        ArrayList strings = new ArrayList<>();
                        TableRow tr = tb.getRow(i);
                        //迭代列,默认从0开始
                        for (int j = 0; j < 4; j++) {
                            TableCell td = tr.getCell(j);//取得单元格
                            //取得单元格的内容
                            String text="";
                            for (int k = 0; k < td.numParagraphs(); k++) {
                                Paragraph para = td.getParagraph(k);
                                String s = para.text();
                                //去除后面的特殊符号
                                if (null != s && !"".equals(s)) {
                                    s = s.substring(0, s.length() - 1);
                                }
                                text+=s;
                            }
                            if (("").equals(text)) {
                                String s = map.get("str" + j);
                                text = s;
                            }
                            map.put("str" + j, text);
                            strings.add(text);
                        }
                        list.add(strings);
                    }
                    // 过滤多余的表格
                    while (num < total) {
                        it.hasNext();
                        it.next();
                        num += 1;
                    }

                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return list;
    }

    /**
     * 获取文档中备注(读取Word段落的内容)
     * @param paragraphs
     * @return
     */
    public static String getNotes(List paragraphs){
        XWPFParagraph para = paragraphs.get(3);
        String notes = "";
        List runList = para.getRuns();
        if (runList.size() > 0) {
            for (XWPFRun r : runList) {
                notes += r.getText(0);
            }
            System.out.println("备注*:" + notes);
        }else {
            System.out.println("备注*:" + notes);
        }
        return notes;
    }
}

你可能感兴趣的:(poi读取word(doc,docx)表格,针对自己的表格合并单元格)