EXCEL POI 合并单元格解析

EXCEL POI 合并单元格解析

1、EXCEL2007的合并单元格的XML格式


		
		

mergeCells 里面包含了所有合并单元格的情况,需要读取到mergeCell就可以知道 该Sheet的所有单元格的合并情况

2 、SAX解析实现

import org.apache.commons.lang3.StringUtils;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;


/**
 * 类名称:CellMergeParser
 * 类描述:
 */
public class CellMergeParser {
    private XSSFReader xssfReader;

    List merges = new ArrayList<>();
    InputStream is;

    /**
     * 构造方法
     */
    public CellMergeParser(String filename) throws Exception {
        if (StringUtils.isEmpty(filename))
            throw new Exception("文件名不能空");
        is = new FileInputStream(new File(filename));
        OPCPackage pkg = OPCPackage.open(is);
        init(pkg);

    }

    public CellMergeParser(InputStream is) throws Exception {
        if (null == is)
            throw new Exception("流不能空");
        OPCPackage pkg = OPCPackage.open(is);
        init(pkg);
    }

    private void init(OPCPackage pkg) throws IOException, OpenXML4JException {
        xssfReader = new XSSFReader(pkg);
    }

    /**
     * 获取sheet
     *
     * @throws Exception
     */
    public List process(int sheetIndex) throws Exception {
        SharedStringsTable sst = null;
        try {
            sst = xssfReader.getSharedStringsTable();
            XMLReader parser = fetchSheetParser(sst);
            Iterator it = xssfReader.getSheetsData();
            int index = 0;
            while (it.hasNext()) {
                index++;
                if (index == sheetIndex) {
                    InputStream sheet = it.next();
                    InputSource sheetSource = new InputSource(sheet);
                    parser.parse(sheetSource);
                    sheet.close();


                    return merges;
                }
            }
            throw new IllegalArgumentException("sheet index not exist");
        } finally {
            try {
                is.close();
            } catch (Exception e) {
                e.printStackTrace();
            }

        }

    }

    /**
     * 加载sax 解析器
     *
     * @param sst
     * @return
     * @throws SAXException
     */
    private XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
        XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
        ContentHandler handler = new PagingHandler(sst);
        parser.setContentHandler(handler);
        return parser;
    }

    /**
     * See org.xml.sax.helpers.DefaultHandler javadocs
     */
    private class PagingHandler extends DefaultHandler {
        private SharedStringsTable sst;

        private PagingHandler(SharedStringsTable sst) {
            this.sst = sst;
        }

        /**
         * 开始元素
         */
        @Override
        public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
            //获取所有的 cell
            if ("mergeCell".equalsIgnoreCase(name)) {
                String value = attributes.getValue("ref");
                merges.add(value);
            }
        }

        /**
         * 获取value
         */
        @Override
        public void endElement(String uri, String localName, String name) throws SAXException {
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
        }

    }

    /**
     * 转换成 merge bean
     *
     * @return
     */
    public List convertBean() {
        List mergeBeans = new ArrayList<>();

        merges.forEach(v -> {
            MergeBean mergeBean = RowColUtil.calcMerge(v);
            mergeBeans.add(mergeBean);

        });

        return mergeBeans;
    }

}

通过对EXCEL的格式理解,使用XML的SAX解析方案 解析出所有的CELLMerge元素,然后转化为下标范围

import org.apache.poi.ss.util.CellReference;

import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * A1:A2
 *
 * @author dean 
 * @version 1.0.0
 * @date 2020/01/08 17:10
 * @since 1.0.0
 */
public class RowColUtil {
    /**
     * 正则匹配 mergeBean
     * @param group
     * @return
     */
    public  static  MergeBean calcMerge(String group){
        group = group.toLowerCase();
        String[] rw = group.split(":");

        Pattern p = Pattern.compile("[a-z]+|\\d+");
        Matcher m = p.matcher(rw[0]);
        ArrayList start = new ArrayList<>();
        while (m.find()) {
            start.add(m.group());
        }

        m = p.matcher(rw[1]);
        ArrayList end = new ArrayList<>();
        while (m.find()) {
            end.add(m.group());
        }

        int firstCol = CellReference.convertColStringToIndex(start.get(0));
        int firstRow = Integer.parseInt(start.get(1));

        int endCol = CellReference.convertColStringToIndex(end.get(0));
        int endRow = Integer.parseInt(end.get(1));

        MergeBean mergeBean = new MergeBean();
        mergeBean.setFirstColumn(firstCol);
        mergeBean.setFirstRow(firstRow);
        mergeBean.setLastColumn(endCol);
        mergeBean.setLastRow(endRow);

        return mergeBean;
    }
}

import lombok.Getter;
import lombok.Setter;
import lombok.ToString;

/**
 * Merge group组
 * [A1:A2, B3:B4]
 *  row 从1 开始
 * @author dean 
 * @version 1.0.0
 * @date 2020/01/08 17:06
 * @since 1.0.0
 */
@Getter
@Setter
@ToString
public class MergeBean {
    int firstRow;
    int firstColumn;
    int lastRow;
    int lastColumn;

    /**
     * 是否包含
     * @param row
     * @param column
     * @return
     */
    public boolean isMergeGroup(int row,int column){
        if (row >= firstRow && row <= lastRow) {
            if (column >= firstColumn && column <= lastColumn) {
                return true;
            }
        }
        return false;
    }

    /**
     * 生成唯一值
     * @return
     */
    public String createdKey(){
        return firstColumn+"-"+firstRow;
    }
}

import lombok.Getter;
import lombok.Setter;
import org.apache.poi.ss.util.CellReference;

import javax.ws.rs.GET;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;

/**
 * 分组缓存
 *
 * @author dean 
 * @version 1.0.0
 * @date 2020/01/08 17:22
 * @since 1.0.0
 */
@Getter
@Setter
public class MergeDataCacheManage {

    /**
     * 默认后缀
     */
    public static final String POSTFIX = "_MG";

    private List mergeBeans;
    private ConcurrentHashMap groups = new ConcurrentHashMap<>();
    private ConcurrentHashMap values = new ConcurrentHashMap<>();

    private int mergeMax = 0;
    private String postfix;

    /**
     * 创建merge cache
     *
     * @param mergeBeans
     */
    public MergeDataCacheManage(List mergeBeans) {

        this.mergeBeans = mergeBeans;
        if (mergeBeans != null) {
            mergeBeans.forEach(mb -> {
                if (mb.getLastColumn() >= mergeMax) {
                    mergeMax = mb.getLastColumn();
                }
            });
        }
        postfix = POSTFIX;
    }


    /**
     * row 从第1开始
     * col 从0 开始
     * @param data
     * @param row
     */
    public synchronized  HashMap caclGroup(HashMap data, int row) {
        if (mergeMax == 0) {
            return null;
        }
        HashMap postFixMap = null;
        for (int i = 0; i <= mergeMax; i++) {
            MergeBean mb = existGroup(row, i);
            if (mb != null) {
                if (postFixMap == null) {
                    postFixMap = new HashMap<>();
                }
                caclPostFix(postFixMap,data,mb,row,i);
            }
        }
        return postFixMap;
    }


    /**
     * 分组 + 赋值
     * @param postFixMap
     * @param data
     * @param mb
     * @param row
     * @param col
     */
    public synchronized void caclPostFix(HashMap postFixMap, HashMap data, MergeBean mb, int row, int col) {
        String key = mb.createdKey();
        if (groups.get(key) == null) {
            groups.put(key, UUID.random());
            if (data.get(col)!=null) {
                values.put(key, data.get(col));
            }
        }
        data.put(col,values.get(key));

        String colName = CellReference.convertNumToColString(col);
        postFixMap.put(colName+postfix,groups.get(key));
    }


    /**
     * 是否存在组
     *
     * @param row
     * @param col
     * @return
     */
    public MergeBean existGroup(int row, int col) {

        for (MergeBean mb : mergeBeans) {
            if (mb.isMergeGroup(row, col)) {
                return mb;
            }
        }
        return null;
    }

}

对解析出来的结果进行计算。

测试

import java.util.List;

/**
 * TODO
 *
 * @author dean 
 * @version 1.0.0
 * @date 2020/01/08 10:14
 * @since 1.0.0
 */
public class SaxTest {

    public static void main(String[] args) throws Exception {
        String filepath = "C:\\Users\\tanping.GLOBALE\\Desktop\\demo.xlsx";
        CellMergeParser reader = new CellMergeParser(filepath);
        List result = reader.process(1);

        System.out.println(result);

        result.forEach(v->{
            MergeBean mergeBean = RowColUtil.calcMerge(v);
            System.out.println(mergeBean);
        });
    }
}

你可能感兴趣的:(xmll,java)