大量数据导入在网络上搜寻到的相关代码大部分通过集成POI原生的DefaultHandler重写其startElement, endElement, characters方法进行相关的解析,而POI已经将相关逻辑封装在XSSFSheetXMLHandler,只要实现暴露的接口SheetContentsHandler即可。
使用SheetContentsHandler的例子可以参考官方的XLSX2CVS。
本例实现该接口:
package cn.skio.venus.api;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import javax.xml.parsers.ParserConfigurationException;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
/**
* @autor jasmine
*/
public class ExcelEventParser {
private String fileName;
private SimpleSheetContentsHandler handler;
// 测试使用对比使用SAX和UserModel模式选择(实际使用不需要)
private Integer saxInterupt;
private void setHandler(SimpleSheetContentsHandler handler) {
this.handler = handler;
}
// 放置读取数据
protected List> table = new ArrayList<>();
public ExcelEventParser(String filename, Integer saxInterupt){
this.fileName = filename;
this.saxInterupt = saxInterupt;
}
public List> parse() {
OPCPackage opcPackage = null;
InputStream inputStream = null;
try {
FileInputStream fileStream = new FileInputStream(fileName);
opcPackage = OPCPackage.open(fileStream);
XSSFReader xssfReader = new XSSFReader(opcPackage);
StylesTable styles = xssfReader.getStylesTable();
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(opcPackage);
inputStream = xssfReader.getSheetsData().next();
processSheet(styles, strings, inputStream);
} catch (Exception e) {
e.printStackTrace();
} finally {
if (inputStream != null) {
try {
inputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (opcPackage != null) {
try {
opcPackage.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return table;
}
// 确定XMLReader解析器,使用SAX模式解析xml文件
private void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, InputStream sheetInputStream) throws SAXException, ParserConfigurationException, IOException {
XMLReader sheetParser = SAXHelper.newXMLReader();
if (handler == null) {
setHandler(new SimpleSheetContentsHandler());
}
sheetParser.setContentHandler(new XSSFSheetXMLHandler(styles, strings, handler, false));
try {
sheetParser.parse(new InputSource(sheetInputStream));
} catch (RuntimeException e) {
System.out.println("---> 遇到空行读取文件结束!");
}
}
// 实现SheetContentsHandler
public class SimpleSheetContentsHandler implements SheetContentsHandler{
protected List row;
@Override
public void startRow(int rowNum) {
row = new LinkedList<>();
}
@Override
public void endRow(int rowNum) {
// 判断是否使用异常作为文件读取结束(有些Excel文件格式特殊,导致很多空行,浪费内存)
if (saxInterupt == 1) {
if (row.isEmpty()) {
throw new RuntimeException("Excel文件读取完毕");
}
}
// 添加数据到list集合
table.add(row);
}
/**
* 所有单元格数据转换为string类型,需要自己做数据类型处理
* @param cellReference 单元格索引
* @param formattedValue 单元格内容(全部被POI格式化为字符串)
* @param comment
*/
@Override
public void cell(String cellReference, String formattedValue, XSSFComment comment) {
row.add(formattedValue);
}
@Override
public void headerFooter(String text, boolean isHeader, String tagName) {
}
}
}
经测试结果,发现使用SAX模式(抛弃了样式等,只关注数据)仅仅消耗很少内存,效率高;而普通Workbook读取数据(测试文件为5.2MB的有大量空行文件)内存消耗 > 1GB(此时线上系统OOM概率非常大);
导出数据的话瓶颈主要在于数据写入Excel文件,代码(同样的74273条数据导出)如下:
// 使用SXSSFwrokbook,大量数据处理快速
@GetMapping("/outExcel")
public void outPutExcel(HttpServletResponse response) throws Exception {
// 每次写100行数据,就刷新数据出缓存
SXSSFWorkbook wb = new SXSSFWorkbook(100); // keep 100 rows in memory, exceeding rows will be flushed to disk
Sheet sh = wb.createSheet();
List tmps = tmpDao.findAll();
log.info("---> 数据量:{}", tmps.size());
for(int rowNum = 0; rowNum < tmps.size(); rowNum++){
Row row = sh.createRow(rowNum);
Tmp tmp = tmps.get(rowNum);
Cell cell1 = row.createCell(0);
cell1.setCellValue(tmp.getSource());
Cell cell2 = row.createCell(1);
cell2.setCellValue(tmp.getName());
Cell cell3 = row.createCell(2);
cell3.setCellValue(tmp.getPhone());
Cell cell4 = row.createCell(3);
cell4.setCellValue(tmp.getCity());
}
String fileName = "sxssf.xlsx";
response.setHeader("Content-Disposition", "attachment;filename=" + URLEncoder.encode(fileName, "UTF-8"));
wb.write(response.getOutputStream());
wb.close();
}
// XSSFWorkbook, 效率低下
@GetMapping("/outExcel2")
public void outPutExcel2(HttpServletResponse response) throws Exception {
XSSFWorkbook wb = new XSSFWorkbook();
Sheet sh = wb.createSheet();
List tmps = tmpDao.findAll();
log.info("---> 数据量:{}", tmps.size());
for(int rowNum = 0; rowNum < tmps.size(); rowNum++){
Row row = sh.createRow(rowNum);
Tmp tmp = tmps.get(rowNum);
Cell cell1 = row.createCell(0);
cell1.setCellValue(tmp.getSource());
Cell cell2 = row.createCell(1);
cell2.setCellValue(tmp.getName());
Cell cell3 = row.createCell(2);
cell3.setCellValue(tmp.getPhone());
Cell cell4 = row.createCell(3);
cell4.setCellValue(tmp.getCity());
}
String fileName = "sxssf.xlsx";
response.setHeader("Content-Disposition", "attachment;filename=" + URLEncoder.encode(fileName, "GBK"));
wb.write(response.getOutputStream());
wb.close();
}
效率对比:
对象 | 耗时 |
---|---|
SXSSFWorkbook | |
XSSFWorkbook |
参考链接:
[1]: https://blog.csdn.net/Holmofy/article/details/82532311
[2]: https://blog.csdn.net/daiyutage/article/details/53010491
[3]: https://www.cnblogs.com/yfrs/p/5689347.html
[4]: easyexcel