POI解析百万行excel的实现

poi 的usermodel api读取大数据量excel会导致OOM,可以使用eventmodel api来处理这种excel.

少量的行数excel可以用

XSSFWorkbook wb = new XSSFWorkbook(inputStream);  
         XSSFSheet sheet = wb.getSheetAt(0);  
         Iterator iter = sheet.iterator();  
         boolean isfirstline = true;  
         while (iter.hasNext()) {  
             Row row = iter.next();  
             if (isfirstline) { // 忽略上传文件第一行的标题栏  
                 isfirstline = false;  
                 continue;  
             }  
              //解析excel,每行有11列,然后对每列解析出来之后调用后端服务把数据保存到数据库中,  
             }  
         }  

但是大于几十万行的时候就会出现 out of memory的错误。很简单

XSSFWorkbook wb = new XSSFWorkbook(inputStream);  

导致了内存溢出。
解决方法就是可以使用eventmodel api来处理这种excel.

    new ExcelEventUserModelParse(filePath)
                    .setHandler(new ExcelEventUserModelParse.SimpleSheetContentsHandler() {
                        @Override
                        public void endRow(int rowNum) {
                            // System.out.println(row.toString());
                            if (!isEmpty.getIsEmpty()) {
                                table.add(row);
                            } else {
                                isnull.setIsEmpty(isEmpty.getIsEmpty());
                                isnull.setRowNum(isEmpty.getRowNum());
                            }
                        }
                    }).parse();

table就是解析出来的结果

public class ExcelEventUserModelParse {
    private Logger logger = LoggerFactory.getLogger(ExcelEventUserModelParse.class);
    private String filename;
    private SheetContentsHandler handler;

    public ExcelEventUserModelParse(String filename) {
        this.filename = filename;
    }

    public ExcelEventUserModelParse setHandler(SheetContentsHandler handler) {
        this.handler = handler;
        return this;
    }

    public void parse() {
        OPCPackage pkg = null;
        InputStream sheetInputStream = null;
        try {
            pkg = OPCPackage.open(filename, PackageAccess.READ);
            XSSFReader xssfReader = new XSSFReader(pkg);
            StylesTable styles = xssfReader.getStylesTable();
            ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg);
            sheetInputStream = xssfReader.getSheetsData().next();
            processSheet(styles, strings, sheetInputStream);
        } catch (Exception e) {
            logger.error(e.getMessage());
            throw new RuntimeException(e.getMessage(), e);
        } finally {
            if (sheetInputStream != null) {
                try {
                    sheetInputStream.close();
                } catch (IOException e) {
                    logger.error(e.getMessage());
                    throw new RuntimeException(e.getMessage(), e);
                }
            }
            if (pkg != null) {
                try {
                    pkg.close();
                } catch (IOException e) {
                    logger.error(e.getMessage());
                    throw new RuntimeException(e.getMessage(), e);
                }
            }
        }
    }

    private void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, InputStream sheetInputStream)
            throws SAXException, ParserConfigurationException, IOException {
        XMLReader sheetParser = SAXHelper.newXMLReader();

        if (handler != null) {
            sheetParser.setContentHandler(new XSSFSheetXMLHandler(styles, strings, handler, false));
        } else {
            sheetParser.setContentHandler(
                    new XSSFSheetXMLHandler(styles, strings, new SimpleSheetContentsHandler(), false));
        }

        sheetParser.parse(new InputSource(sheetInputStream));
    }

    public static class SimpleSheetContentsHandler implements SheetContentsHandler {
        private Logger logger = LoggerFactory.getLogger(SimpleSheetContentsHandler.class);
        protected ImportDecryptModel row = new ImportDecryptModel();
        protected int a = -1;
        protected JudgementModel isEmpty = new JudgementModel();

        @Override
        public void startRow(int rowNum) {
            if (rowNum == a + 1) {
                a = rowNum;
            } else {
                isEmpty.setIsEmpty(true);
                logger.error("第" + rowNum + "行数据为空");
                a = rowNum;
                isEmpty.setRowNum(rowNum);
            }
            row = new ImportDecryptModel();
        }

        @Override
        public void endRow(int rowNum) {
            logger.error(rowNum + " : " + row);
        }

        @Override
        public void cell(String cellReference, String formattedValue, XSSFComment comment) {
            if (isEmpty != null && !isEmpty.getIsEmpty()) {
                switch (cellReference.substring(0, 1)) {
                case "A":
                    row.setUserId(formattedValue);
                    break;
                case "B":
                    row.setUid(formattedValue);
                    break;
                case "C":
                    row.setUserName(formattedValue);
                    break;
                case "D":
                    row.setIdCard(formattedValue);
                    break;
                case "E":
                    row.setMobileNo(formattedValue);
                    break;
                case "F":
                    row.setBankCard(formattedValue);
                    break;
                default:
                    break;
                }
            }
        }

        @Override
        public void headerFooter(String text, boolean isHeader, String tagName) {

        }
    }

解析的工作全部在cell()里面,速度大约100万条记录20s左右。关键不会内存溢出

你可能感兴趣的:(eclispe,j2ee)