项目中经常会用到数据的导入和导出,通常导出就是查询数据库数据后使用apache POI写入到excel中。那么如何解决大数据的导入和导出呢?
WorkBook接口实现类 | 介绍 | 优点 | 缺点 |
---|---|---|---|
HSSFWorkbook | 可以操作Excel2003的版本后缀为.xls的文件 | 内存不会溢出 | 最多导出65535行 |
XSSFWorkbook | 操作Excel2003~2007之间的版本,拓展名为.xlsx | 容易内存溢出 | 最多1048576行,16384列 |
SXSSFWorkbook | 操作Excel2007之后的文件 | 内存中数据达到一定程度这些数据会被持久化到硬盘中存储起来,内存中存储的都是最新的数据;支持超大型Excel文件的创建 | 同一时间点只能访问一定数量的数据,如clone、公式求值、改动表头等不再被支持 |
怎么选择和使用呢?
public static final int sheetDataRows = 1000000;
public static final int writeDataRows = 200000;
public static final int oneSheetWriteCount = sheetDataRows/writeDataRows;
public Page checkPage(int pageNum, int pageSize){
Page res = new Page<>();
res.setCurrent(pageNum);
res.setSize(pageSize);
return res;
}
@Test
public void fastExport(){
try {
FileOutputStream os = new FileOutputStream(new File("文件导出.xlsx"));
ExcelWriter writer = new ExcelWriter(os, ExcelTypeEnum.XLSX);
Table table = new Table(1);
List<List<String>> titles = new ArrayList<>();
titles.add(Arrays.asList("id"));
titles.add(Arrays.asList("title"));
titles.add(Arrays.asList("tag"));
table.setHead(titles);
int totalCount = demoService.count();
int sheehtNum = totalCount%sheetDataRows==0? (totalCount/sheetDataRows)
: (totalCount/sheetDataRows+1);
int lastSheetWriteCount = totalCount%sheetDataRows == 0? (oneSheetWriteCount)
: (totalCount%sheetDataRows%writeDataRows==0? totalCount%sheetDataRows/writeDataRows: totalCount%sheetDataRows/writeDataRows+1);
List<List<String>> dataList = new ArrayList<>();
int count = 0;
for(int i=0; i<sheehtNum; i++){
Sheet sheet = new Sheet(i,0);
sheet.setSheetName("测试sheet" + i);
for(int j=0; j<(i==sheehtNum-1? oneSheetWriteCount:lastSheetWriteCount); j++){
dataList.clear();
Page<Demo> page = checkPage(j+1+i*oneSheetWriteCount,writeDataRows);
IPage<Demo> page1 = demoService.page(page);
page1.getRecords().stream().forEach(item ->{
dataList.add(Arrays.asList(item.getId(),item.getTitle(),item.getTag()));
});
writer.write0(dataList,sheet,table);
log.info("count:{}",count);
count++;
}
}
writer.finish();
os.flush();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException exception) {
exception.printStackTrace();
}
}
public class ActResultService {
public void saveData(List<Map<Integer,String>> dataList){
HashMap<String, Object> result = new HashMap<>();
//结果集中数据为0时,结束方法.进行下一次调用
if (dataList.size() == 0) {
return;
}
//JDBC分批插入+事务操作完成对10w数据的插入
Connection conn = null;
PreparedStatement ps = null;
try {
long startTime = System.currentTimeMillis();
System.out.println(dataList.size() + "条,开始导入到数据库时间:" + startTime + "ms");
conn = JDBCDruidUtil.getConnection();
//控制事务:默认不提交
conn.setAutoCommit(false);
String sql = "insert into demo(id, title, tag) VALUES ";
sql += "(?,?,?)";
ps = conn.prepareStatement(sql);
//循环结果集:这里循环不支持"烂布袋"表达式
for (int i = 0; i < dataList.size(); i++) {
Map<Integer, String> item = dataList.get(i);
ps.setString(1, item.get(0));
ps.setString(2, item.get(1));
ps.setString(3, item.get(2));
//将一组参数添加到此 PreparedStatement 对象的批处理命令中。
ps.addBatch();
}
//执行批处理
ps.executeBatch();
//手动提交事务
conn.commit();
long endTime = System.currentTimeMillis();
System.out.println(dataList.size() + "条,结束导入到数据库时间:" + endTime + "ms");
System.out.println(dataList.size() + "条,导入用时:" + (endTime - startTime) + "ms");
result.put("success", "1111");
} catch (Exception e) {
result.put("exception", "0000");
e.printStackTrace();
} finally {
//关连接
JDBCDruidUtil.close(conn, ps);
}
}
}
public class EasyExceGeneralDatalListener extends AnalysisEventListener<Map<Integer,String>> {
private ActResultService actResultService;
List<Map<Integer,String>> dataList = new ArrayList<>();
public EasyExceGeneralDatalListener(){}
public EasyExceGeneralDatalListener(ActResultService actResultService){
this.actResultService = actResultService;
}
@Override
public void invoke(Map<Integer,String> data, AnalysisContext analysisContext) {
dataList.add(data);
if(dataList.size() >= 100000){
actResultService.saveData(dataList);
dataList.clear();
}
}
@Override
public void doAfterAllAnalysed(AnalysisContext analysisContext) {
actResultService.saveData(dataList);
dataList.clear();
}
}
@Autowired
ActResultService actResultService;
@Test
public void importDataFromExcel(){
String filaName = "/Users/*****/Documents/myproject/workspace/文件导出.xlsx";
long start = System.currentTimeMillis();
log.info("读取excel开始!");
EasyExcel.read(filaName, new EasyExceGeneralDatalListener(actResultService)).doReadAll();
log.info("数据导入结束,耗时{}毫秒", (System.currentTimeMillis()-start));
}