Poi使用sax解析xlsx

本文参考了
https://blog.csdn.net/sai739295732/article/details/68489403

https://blog.csdn.net/rainyspring4540/article/details/50747122
两篇文章,感谢这两篇文章的作者

maven



    org.apache.poi
    poi
    3.10-FINAL
 

    org.apache.poi
    poi-ooxml
    3.10-FINAL



    xerces
    xercesImpl
    2.9.1

解析工具代码

package com.cignacmb.sms.common.util;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

public class SaxExcelUtils {
    private static List> dataListT;
    private final int startRow;
    private final int endRow;
    private int currentRow = 0;
    private final String filename;
    private static Map map;
    static char[] strChar ;
    /**
     * 构造方法
     */
    public SaxExcelUtils(String filename,int startRow,int endRow) throws Exception{
        dataListT = new ArrayList<>();
        if(StringUtils.isEmpty(filename)) throw new Exception("文件名不能空");
        this.filename = filename;
        this.startRow = startRow;
        this.endRow = endRow+1;
        processSheet();
    }
    /**
     * 指定获取第一个sheet
     * @param filename
     * @throws Exception
     */
    private void processSheet() throws Exception {
        OPCPackage pkg = OPCPackage.open(filename);
        XSSFReader r = new XSSFReader( pkg );
        SharedStringsTable sst = r.getSharedStringsTable();
        XMLReader parser = fetchSheetParser(sst);
        Iterator it = r.getSheetsData();
        while(it.hasNext()){
            map = null;
            InputStream sheet1 = it.next();
            InputSource sheetSource = new InputSource(sheet1);
            parser.parse(sheetSource);
            sheet1.close();
        }
    }
    /**
     * 加载sax 解析器
     * @param sst
     * @return
     * @throws SAXException
     */
    private XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
        XMLReader parser =
                XMLReaderFactory.createXMLReader(
                        "org.apache.xerces.parsers.SAXParser"
                );
        ContentHandler handler = new PagingHandler(sst);
        parser.setContentHandler(handler);
        return parser;
    }

    /**
     * See org.xml.sax.helpers.DefaultHandler javadocs
     */
    private  class PagingHandler extends DefaultHandler {
        private SharedStringsTable sst;
        private String lastContents;
        private boolean nextIsString;
        private String index = null;
        private PagingHandler(SharedStringsTable sst) {
            this.sst = sst;
        }
        /**
         * 开始元素 (获取key 值)
         */
        @Override
        public void startElement(String uri, String localName, String name,
                                 Attributes attributes) throws SAXException {
            if(name.equals("c")) {
                index = attributes.getValue("r");
                //判断是否是新的一行
                if(Pattern.compile("^A[0-9]+$").matcher(index).find()){
                    if(map!=null&&isAccess()&&!map.isEmpty()){
                        dataListT.add(map);
                    }
                    map = new LinkedHashMap<>();
                    currentRow++;
                }
                if(isAccess()){
                    String cellType = attributes.getValue("t");
                    if(cellType != null && cellType.equals("s")) {
                        nextIsString = true;
                    } else {
                        nextIsString = false;
                    }
                }
            }
            lastContents = "";
        }
        /**
         * 获取value
         */
        @Override
        public void endElement(String uri, String localName, String name)
                throws SAXException {
            if(isAccess()){
                if(nextIsString) {
                    int idx = Integer.parseInt(lastContents);
                    lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
                    nextIsString = false;
                }
                if(name.equals("v")) {
                    map.put(index, lastContents);
                }
            }

        }
        @Override
        public void characters(char[] ch, int start, int length)
                throws SAXException {
            if(isAccess()){
                lastContents += new String(ch, start, length);
            }
        }
        @Override
        public void endDocument ()throws SAXException{
            if(map!=null&&isAccess()&&!map.isEmpty()){
                dataListT.add(map);
            }
        }

    }
    private boolean isAccess(){
        if(currentRow>=startRow&&startRow<=endRow){
            return true;
        }
        return false;
    }
    /**
     * 获取数据 并且填补字段值为空的数据
     * @return
     * @throws Exception
     */
    public List> getMyDataList() throws Exception{
        List> list = dataListT.subList(startRow, dataListT.size());
        if(!list.isEmpty()){
            Map map = dataListT.get(0);
            List com = data("A",map.size()-1);
            for(int i=0;i returnMap = list.get(i);
                for(String str:com){
                    boolean flag = true;
                    for(Entry entry:returnMap.entrySet()){
                        if(entry.getKey().contains(str)){
                            //有
                            flag = false;
                            break;
                        }
                    }
                    if(flag){
                        //没有
                        returnMap.put(str+(i+2), null);
                    }
                }
            }
        }
        return list;
    }


    /**
     * 封装数据
     * @param str
     * @param counts
     * @return
     */
    public static List data(String str,int counts){
        List list = new ArrayList<>();
        list.add(str);
        for(int i=0;i=0){
            int sc = (int)strChar[(strChar.length-1)-index];
            if(sc- 25 >= aint){
                jinwei(index+1);
                strChar[(strChar.length-1)-index] = a;
            }else{
                strChar[strChar.length-1-index] = (char)(sc+1);
            }
        }else{
            strChar[(strChar.length-1)-index+1] = a;
            StringBuilder str = new StringBuilder();
            str.append('A');
            str.append(strChar);
            strChar = str.toString().toCharArray();
        }
    }
}

快速获取总行数

package com.cignacmb.sms.common.util;


import java.io.InputStream;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;


/**
 * XSSF and SAX (Event API) basic example.
 * See {@link XLSX2CSV} for a fuller example of doing
 *  XSLX processing with the XSSF Event code.
 */
public class MaxRowExcelUtil {
    //new add
    public long maxRow = 0;//记录总行数

    private String filename = null;
    public MaxRowExcelUtil(String filename) throws Exception{
        if(StringUtils.isBlank(filename)) throw new Exception("文件名不能空");
        this.filename = filename;
        processFirstSheet();
    }
    /**
     * 指定获取第一个sheet
     * @param filename
     * @throws Exception
     */
    private void processFirstSheet() throws Exception {
        OPCPackage pkg = OPCPackage.open(filename);
        XSSFReader r = new XSSFReader( pkg );
        SharedStringsTable sst = r.getSharedStringsTable();

        XMLReader parser = fetchSheetParser(sst);

        // To look up the Sheet Name / Sheet Order / rID,
        //  you need to process the core Workbook stream.
        // Normally it's of the form rId# or rSheet#
        InputStream sheet2 = r.getSheet("rId1");
        InputSource sheetSource = new InputSource(sheet2);
        parser.parse(sheetSource);
        sheet2.close();
    }

    private XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
        XMLReader parser =
                XMLReaderFactory.createXMLReader(
                        "org.apache.xerces.parsers.SAXParser"
                );
        ContentHandler handler = new MaxRowHandler();
        parser.setContentHandler(handler);
        return parser;
    }

    /**
     * See org.xml.sax.helpers.DefaultHandler javadocs
     */
    private  class MaxRowHandler extends DefaultHandler {

        @Override
        public void startElement(String uri, String localName, String name,
                                 Attributes attributes) throws SAXException {
            // c => cell
            if(name.equals("c")) {
                String index = attributes.getValue("r");
                if(Pattern.compile("A[0-9]+$").matcher(index).find()){
                    maxRow++;
                }
            }
        }
    }
}

你可能感兴趣的:(Poi使用sax解析xlsx)