高效读取大数据量excle2007文件的解决方案

网上这方面资料不少,但或多或少都有问题,目前github上有一个开源工具easyexcle,但是其依赖比较复杂,而公司的项目用的还是比较原始的lib包的方式,没有使用包管理工具,于是打算使用poi自己做一个

1.环境

基于poi3.12版本开发

2.原理

excle数据主要基于xml保存,而poi则是通过操作xml来对excle文件进行操作。在进行读取时有Event modelUser model两种模式,顾名思义,前者基于事件读取,每读取一个xml元素(excle数据)就调用用户自定义的处理逻辑对数据进行处理。而后者在事件模式的基础上进行了封装,将所有数据写入内存中构造出对应的sheetrowcell模型,这种方式更便于用户操作,但性能较差,而且在excle文件较大时会内存溢出。

3.实现

poi自带一个基于Event model的读取器XSSFReader和对应的xml解析类XSSFSheetXMLHandler,但功能非常简陋,我们主要基于这两个类进行开发,首先扩展XSSFSheetXMLHandler类,增加一个数据读取完毕的回调。

public class LocalXSSFSheetXmlHandler extends XSSFSheetXMLHandler {
        public LocalXSSFSheetXmlHandler(StylesTable styles, CommentsTable comments, ReadOnlySharedStringsTable strings, SheetContentsHandler sheetContentsHandler, DataFormatter dataFormatter, boolean formulasNotResults) {
            super(styles, comments, strings, sheetContentsHandler, dataFormatter, formulasNotResults);
        }

        public LocalXSSFSheetXmlHandler(StylesTable styles, ReadOnlySharedStringsTable strings, SheetContentsHandler sheetContentsHandler, DataFormatter dataFormatter, boolean formulasNotResults) {
            super(styles, strings, sheetContentsHandler, dataFormatter, formulasNotResults);
        }

        public LocalXSSFSheetXmlHandler(StylesTable styles, ReadOnlySharedStringsTable strings, SheetContentsHandler sheetContentsHandler, boolean formulasNotResults) {
            super(styles, strings, sheetContentsHandler, formulasNotResults);
        }

        private Procedure procedure;

        public LocalXSSFSheetXmlHandler handleEnd(Procedure procedure){
            this.procedure = procedure;
            return this;
        }
        public void endDocument ()
                throws SAXException
        {
            procedure.run();
        }
    }
package com.apex.bss.mod.util.excle;

/**
 * Created by Feng
 */
public interface Procedure {
    void run();
}

我们扩展了XSSFSheetXMLHandler类,并且重写了endDocument方法,在原本的类中,该方法没有任何操作,在扩展类中,我们调用Procedure.run方法来处理excle读取完毕时的业务逻辑。
poi自带的事件处理接口是XSSFSheetXMLHandler.SheetContentsHandler,代码如下

public interface SheetContentsHandler {
        void startRow(int var1);

        void endRow(int var1);

        void cell(String var1, String var2, XSSFComment var3);

        void headerFooter(String var1, boolean var2, String var3);
    }

可以看到数据是基于cell被处理的,我们要在startRowendRow方法中自己构建行数据

 public class SheetHandler implements XSSFSheetXMLHandler.SheetContentsHandler,Procedure {
        protected List row = new LinkedList<>();

        private ExcleHandler excleHandler;

        private int preIndex = 1;

        public SheetHandler(ExcleHandler excleHandler){
            this.excleHandler = excleHandler;
        }

        @Override
        public void run() {
            excleHandler.over();
        }

        @Override
        public void startRow(int i) {
            row.clear();
        }

        @Override
        public void endRow(int i) {
            excleHandler.handleRow(row,i);
        }

        @Override
        public void cell(String cellReference, String formattedValue, XSSFComment comment) {
            int index = excleCelNumToIndex(cellReference);
            if(index - preIndex > 1){
                for(int i = 0;i < index - preIndex - 1;i++){
                    row.add("");
                }
            }
            row.add(formattedValue);
            preIndex = index;
        }

        @Override
        public void headerFooter(String s, boolean b, String s1) {

        }

        private int excleCelNumToIndex(String celNum){
            int num = 0;
            int result = 0;
            celNum = Pattern.compile("[\\d]").matcher(celNum).replaceAll("");
            int length = celNum.length();
            for(int i = 0; i < length; i++) {
                char ch = celNum.charAt(length - i - 1);
                num = (int)(ch - 'A' + 1) ;
                num *= Math.pow(26, i);
                result += num;
            }
            return result;
        }
    }

实现了一个事件处理接口,将数据封装成行数据,并且将具体的处理逻辑委托给ExcleHandler接口,此接口基于行来处理excle数据

public interface ExcleHandler {
    public void handleRow(Object row, int rowNumber);

    public void over();
}

需要注意的是,poi提供给我们的读取工具会跳过某些没有编辑过的单元格,我们需要在构建行的时候自己补全缺失的数据
然后自定义一个ExcleReader用于读取excle

package com.apex.bss.mod.util.excle;

import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.model.CommentsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.io.InputStream;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Pattern;

/**
 * Created by Feng
 */
public class EventExcleReader {
    private OPCPackage opcPackage;
    private SheetHandler handler;
    private ExcleTypeEnum excleTypeEnum;

    public EventExcleReader(String filename, ExcleTypeEnum excleType, ExcleHandler handler) throws IllegalAccessException, InvalidFormatException {
        if(excleType == ExcleTypeEnum.XLS){
            throw new IllegalAccessException("暂不支持XLS文件");
        }
        opcPackage = OPCPackage.open(filename, PackageAccess.READ);
        this.handler = new SheetHandler(handler);
    }

    public EventExcleReader(InputStream inputStream, ExcleTypeEnum excleType, ExcleHandler handler) throws IllegalAccessException, InvalidFormatException, IOException {
        if(excleType == ExcleTypeEnum.XLS){
            throw new IllegalAccessException("暂不支持XLS文件");
        }
        opcPackage = OPCPackage.open(inputStream);
        this.handler = new SheetHandler(handler);
    }

    public void read(){
        InputStream sheetInputStream = null;
        try {
            XSSFReader xssfReader = new XSSFReader(opcPackage);
            StylesTable styles = xssfReader.getStylesTable();
            ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(opcPackage);
            sheetInputStream = xssfReader.getSheetsData().next();
            processSheet(styles, strings, sheetInputStream);
        } catch (Exception e) {
            throw new RuntimeException(e.getMessage(), e);
        }finally {
            if(sheetInputStream != null){
                try {
                    sheetInputStream.close();
                } catch (IOException e) {
                    throw new RuntimeException(e.getMessage(), e);
                }
            }
            if(opcPackage != null){
                try {
                    opcPackage.close();
                } catch (IOException e) {
                    throw new RuntimeException(e.getMessage(), e);
                }
            }
        }
    }

    private void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, InputStream sheetInputStream) throws SAXException, ParserConfigurationException, IOException{
        XMLReader sheetParser = SAXHelper.newXMLReader();
        sheetParser.setContentHandler(new LocalXSSFSheetXmlHandler(styles, strings, handler, false).handleEnd(handler));
        sheetParser.parse(new InputSource(sheetInputStream));
    }
}

然后根据具体业务实现ExcleHandler接口处理业务逻辑即可,这里我们还可以进一步对其进行封装实现数据的分段处理

public class ExcleUtil {
    public static void readExcleByReduce(InputStream inputStream, int reduce, ExcleTypeEnum excleType, BiConsumer>,LinkedList> consumer) throws IllegalAccessException, IOException, InvalidFormatException {
        new EventExcleReader(inputStream, excleType, new ReduceExcleHandler(consumer).reduceBy(reduce)).read();

    }
    public static void readExcle(InputStream inputStream,ExcleTypeEnum excleType, BiConsumer>,LinkedList> consumer) throws IllegalAccessException, IOException, InvalidFormatException {
        new EventExcleReader(inputStream, excleType, new ReduceExcleHandler(consumer)).read();
    }

    /**
     * 分段读取类
     */
    private static class ReduceExcleHandler implements ExcleHandler{

        private int reduce = 1;

        BiConsumer>,LinkedList> consumer;

        private LinkedList> datas = new LinkedList<>();

        private LinkedList head;

        public ReduceExcleHandler(BiConsumer>,LinkedList> consumer){
            this.consumer = consumer;
        }

        private ReduceExcleHandler reduceBy(int reduce){
            this.reduce = reduce;
            return this;
        }

        @Override
        public void handleRow(Object row, int rowNumber) {
            if(rowNumber <= 0){
                head = new LinkedList<>((LinkedList) row);
            }else {
                if (datas.size() >= reduce) {
                    consumer.accept(datas, head);
                    datas.clear();
                }
                datas.add(new LinkedList<>((LinkedList) row));
            }
        }

        @Override
        public void over() {
            consumer.accept(datas,head);
        }
    }
}

在上述代码中我们实现了ReduceExcleHandler,将excle数据以reduce为间隔进行分割,分批次处理,下面是一个简单的上传并导入excle到指定表的示例

package com.apex.bss.mod.util.excle;

import net.sf.hibernate.mapping.Collection;
import net.spy.memcached.util.StringUtils;
import plugins.bean.DBUtils;
import plugins.bean.DatabaseUtils;
import sun.awt.image.ImageWatched;

import java.sql.DatabaseMetaData;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.util.LinkedList;
import java.util.List;
import java.util.function.BiConsumer;

/**
 * Created by Feng
 */
public class OracleReduceConsumer implements BiConsumer>,LinkedList> {
    private int successCount = 0;

    private int failCount = 0;

    private String insertSql;

    private static DatabaseUtils databaseUtils = new DBUtils();

    private final String tableName;

    private LinkedList columns = new LinkedList<>();

    private boolean verifyPass = false;

    public OracleReduceConsumer(String tableName) throws IllegalAccessException {
        if(com.apex.bss.foundation.utils.StringUtils.isEmpty(tableName)){
            throw new IllegalAccessException("表不能为空");
        }
        try {
            ResultSet resultSet = databaseUtils.getConnection().prepareStatement("select * from " + tableName).executeQuery();
            ResultSetMetaData rsmd = resultSet.getMetaData();
            int columnCount = rsmd.getColumnCount();
            for(int i=1;i<=columnCount;i++){
                columns.add(rsmd.getColumnName(i));
            }
        }catch (Exception e){
            throw new IllegalAccessException("查询表头异常:"+e.getMessage());
        }
        this.tableName = tableName;
    }

    public int getSuccessCount() {
        return successCount;
    }

    public int getFailCount() {
        return failCount;
    }

    @Override
    public void accept(LinkedList> linkedLists, LinkedList strings) {
        if(!verifyPass){
            boolean verifyResult = verify(this.columns,strings);
            if(verifyResult){
                verifyPass = true;
            }else{
                throw new RuntimeException("表头验证错误");
            }
        }
        if(insertSql == null){
            insertSql = "INSERT INTO " + tableName +"(" + StringUtils.join(strings,",") + ") VALUES(" + generatPlaceHolder(strings.size()) + ")";
        }
        for(LinkedList data:linkedLists){
            try {
                databaseUtils.executeUpdate(insertSql,generateParam(data,strings.size()));
                successCount += 1;
            }catch (Exception e){
                failCount += 1;
            }
        }
    }

    private String generatPlaceHolder(int length){
        StringBuilder result = new StringBuilder();
        for(int i = 0;i < length;i++){
            if(i == length - 1){
                result.append("?");
            }else {
                result.append("?,");
            }
        }
        return result.toString();
    }

    private String[] generateParam(LinkedList strings,int headSize){
        for(int i = strings.size();i < headSize;i++){
            strings.add("");
        }
        return strings.toArray(new String[]{});
    }

    private boolean verify(LinkedList head,LinkedList columns){
        if(head.size() != columns.size()){
            return false;
        }
        for(int i = 0;i < head.size();i++){
            if(!head.get(i).equalsIgnoreCase(columns.get(i))){
                return false;
            }
        }
        return true;
    }
}

	@RequestMapping("importExcle")
    @ResponseBody
    public String importExcle(HttpServletRequest request, HttpServletResponse response, @RequestParam(value = "file") MultipartFile file) throws IOException, InvalidFormatException, IllegalAccessException, SQLException {
        String tableName = request.getParameter("tableName");
        HashMap result = new HashMap();
        try {
            if(StringUtils.isEmpty(tableName)){
                throw new IllegalAccessException("表名不能为空");
            }
            OracleReduceConsumer oracleReduceConsumer = new OracleReduceConsumer(tableName);
            ExcleUtil.readExcleByReduce(file.getInputStream(), 2, ExcleTypeEnum.XLSL, oracleReduceConsumer);
            result.put("code",1);
            result.put("msg","成功导入"+oracleReduceConsumer.getSuccessCount()+"条数据,失败"+oracleReduceConsumer.getFailCount()+"条数据");
        }catch (Exception e){
            result.put("code",-1);
            result.put("msg",e.getMessage());
        }
        return StringUtils.toJson(result);
    }

你可能感兴趣的:(编程语言_JAVA)