现在的任务就是把客户数据迁移到数据库中,由于客户提供的数据都存储在excel中,有些文件数据量还很大,在 usermodel模式下经常内存溢出,于是只能采用直接通过pl/sql往数据库复制或是用eventusermodel模式读取。直接复制倒是简单,但是速度太慢,一次复制的太多也会内存溢出。
usermodel模式对excel操作前需要将文件全部转入内存,对较大文件来说内存开销很大。但是其使用简单。
eventusermodel模式采用事件模型,对文件边读取边处理,内存消耗较低,效率高,因为不用等待文件全部装入内存。但使用较复杂。
excel读取采用的API为POI3.6,使用前先下载此包,若运行中出现其他依赖包不存在,请下载相应依赖包。
下面展示的是excel2003及其之前版本的大文件读取方法。
抽象类 HxlsAbstract:
- package com.gaosheng.util.xls;
- import java.io.FileInputStream;
- import java.io.FileNotFoundException;
- import java.io.IOException;
- import java.io.PrintStream;
- import java.sql.SQLException;
- import java.util.ArrayList;
- import java.util.List;
-
- import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
- import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
- import org.apache.poi.hssf.eventusermodel.HSSFListener;
- import org.apache.poi.hssf.eventusermodel.HSSFRequest;
- import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
- import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
- import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
- import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
- import org.apache.poi.hssf.model.HSSFFormulaParser;
- import org.apache.poi.hssf.record.BOFRecord;
- import org.apache.poi.hssf.record.BlankRecord;
- import org.apache.poi.hssf.record.BoolErrRecord;
- import org.apache.poi.hssf.record.BoundSheetRecord;
- import org.apache.poi.hssf.record.FormulaRecord;
- import org.apache.poi.hssf.record.LabelRecord;
- import org.apache.poi.hssf.record.LabelSSTRecord;
- import org.apache.poi.hssf.record.NoteRecord;
- import org.apache.poi.hssf.record.NumberRecord;
- import org.apache.poi.hssf.record.RKRecord;
- import org.apache.poi.hssf.record.Record;
- import org.apache.poi.hssf.record.SSTRecord;
- import org.apache.poi.hssf.record.StringRecord;
- import org.apache.poi.hssf.usermodel.HSSFWorkbook;
- import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-
- public abstract class HxlsAbstract implements HSSFListener {
- private int minColumns;
- private POIFSFileSystem fs;
- private PrintStream output;
-
- private int lastRowNumber;
- private int lastColumnNumber;
-
-
- private boolean outputFormulaValues = true;
-
-
- private SheetRecordCollectingListener workbookBuildingListener;
- private HSSFWorkbook stubWorkbook;
-
-
- private SSTRecord sstRecord;
- private FormatTrackingHSSFListener formatListener;
-
-
- private int sheetIndex = -1;
- private BoundSheetRecord[] orderedBSRs;
- @SuppressWarnings("unchecked")
- private ArrayList boundSheetRecords = new ArrayList();
-
-
- private int nextRow;
- private int nextColumn;
- private boolean outputNextStringRecord;
-
- private int curRow;
- private List<String> rowlist;
- @SuppressWarnings( "unused")
- private String sheetName;
-
- public HxlsAbstract(POIFSFileSystem fs)
- throws SQLException {
- this.fs = fs;
- this.output = System.out;
- this.minColumns = -1;
- this.curRow = 0;
- this.rowlist = new ArrayList<String>();
- }
-
- public HxlsAbstract(String filename) throws IOException,
- FileNotFoundException, SQLException {
- this(new POIFSFileSystem(new FileInputStream(filename)));
- }
-
-
-
-
-
- public abstract void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException;
-
-
-
-
- public void process() throws IOException {
- MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(
- this);
- formatListener = new FormatTrackingHSSFListener(listener);
-
- HSSFEventFactory factory = new HSSFEventFactory();
- HSSFRequest request = new HSSFRequest();
-
- if (outputFormulaValues) {
- request.addListenerForAllRecords(formatListener);
- } else {
- workbookBuildingListener = new SheetRecordCollectingListener(
- formatListener);
- request.addListenerForAllRecords(workbookBuildingListener);
- }
-
- factory.processWorkbookEvents(request, fs);
- }
-
-
-
-
- @SuppressWarnings("unchecked")
- public void processRecord(Record record) {
- int thisRow = -1;
- int thisColumn = -1;
- String thisStr = null;
- String value = null;
-
- switch (record.getSid()) {
- case BoundSheetRecord.sid:
- boundSheetRecords.add(record);
- break;
- case BOFRecord.sid:
- BOFRecord br = (BOFRecord) record;
- if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
-
- if (workbookBuildingListener != null && stubWorkbook == null) {
- stubWorkbook = workbookBuildingListener
- .getStubHSSFWorkbook();
- }
-
-
-
-
- sheetIndex++;
- if (orderedBSRs == null) {
- orderedBSRs = BoundSheetRecord
- .orderByBofPosition(boundSheetRecords);
- }
- sheetName = orderedBSRs[sheetIndex].getSheetname();
- }
- break;
-
- case SSTRecord.sid:
- sstRecord = (SSTRecord) record;
- break;
-
- case BlankRecord.sid:
- BlankRecord brec = (BlankRecord) record;
-
- thisRow = brec.getRow();
- thisColumn = brec.getColumn();
- thisStr = "";
- break;
- case BoolErrRecord.sid:
- BoolErrRecord berec = (BoolErrRecord) record;
-
- thisRow = berec.getRow();
- thisColumn = berec.getColumn();
- thisStr = "";
- break;
-
- case FormulaRecord.sid:
- FormulaRecord frec = (FormulaRecord) record;
-
- thisRow = frec.getRow();
- thisColumn = frec.getColumn();
-
- if (outputFormulaValues) {
- if (Double.isNaN(frec.getValue())) {
-
-
- outputNextStringRecord = true;
- nextRow = frec.getRow();
- nextColumn = frec.getColumn();
- } else {
- thisStr = formatListener.formatNumberDateCell(frec);
- }
- } else {
- thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook,
- frec.getParsedExpression()) + '"';
- }
- break;
- case StringRecord.sid:
- if (outputNextStringRecord) {
-
- StringRecord srec = (StringRecord) record;
- thisStr = srec.getString();
- thisRow = nextRow;
- thisColumn = nextColumn;
- outputNextStringRecord = false;
- }
- break;
-
- case LabelRecord.sid:
- LabelRecord lrec = (LabelRecord) record;
-
- curRow = thisRow = lrec.getRow();
- thisColumn = lrec.getColumn();
- value = lrec.getValue().trim();
- value = value.equals("")?" ":value;
- this.rowlist.add(thisColumn, value);
- break;
- case LabelSSTRecord.sid:
- LabelSSTRecord lsrec = (LabelSSTRecord) record;
-
- curRow = thisRow = lsrec.getRow();
- thisColumn = lsrec.getColumn();
- if (sstRecord == null) {
- rowlist.add(thisColumn, " ");
- } else {
- value = sstRecord
- .getString(lsrec.getSSTIndex()).toString().trim();
- value = value.equals("")?" ":value;
- rowlist.add(thisColumn,value);
- }
- break;
- case NoteRecord.sid:
- NoteRecord nrec = (NoteRecord) record;
-
- thisRow = nrec.getRow();
- thisColumn = nrec.getColumn();
-
- thisStr = '"' + "(TODO)" + '"';
- break;
- case NumberRecord.sid:
- NumberRecord numrec = (NumberRecord) record;
-
- curRow = thisRow = numrec.getRow();
- thisColumn = numrec.getColumn();
- value = formatListener.formatNumberDateCell(numrec).trim();
- value = value.equals("")?" ":value;
-
- rowlist.add(thisColumn, value);
- break;
- case RKRecord.sid:
- RKRecord rkrec = (RKRecord) record;
-
- thisRow = rkrec.getRow();
- thisColumn = rkrec.getColumn();
- thisStr = '"' + "(TODO)" + '"';
- break;
- default:
- break;
- }
-
-
- if (thisRow != -1 && thisRow != lastRowNumber) {
- lastColumnNumber = -1;
- }
-
-
- if (record instanceof MissingCellDummyRecord) {
- MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
- curRow = thisRow = mc.getRow();
- thisColumn = mc.getColumn();
- rowlist.add(thisColumn," ");
- }
-
-
- if (thisStr != null) {
- if (thisColumn > 0) {
- output.print(',');
- }
- output.print(thisStr);
- }
-
-
- if (thisRow > -1)
- lastRowNumber = thisRow;
- if (thisColumn > -1)
- lastColumnNumber = thisColumn;
-
-
- if (record instanceof LastCellOfRowDummyRecord) {
- if (minColumns > 0) {
-
- if (lastColumnNumber == -1) {
- lastColumnNumber = 0;
- }
- }
-
- lastColumnNumber = -1;
- try {
- optRows(sheetIndex,curRow, rowlist);
- } catch (SQLException e) {
- e.printStackTrace();
- }
- rowlist.clear();
- }
- }
- }
package com.gaosheng.util.xls;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BlankRecord;
import org.apache.poi.hssf.record.BoolErrRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.LabelRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.NoteRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.RKRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hssf.record.StringRecord;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
public abstract class HxlsAbstract implements HSSFListener {
private int minColumns;
private POIFSFileSystem fs;
private PrintStream output;
private int lastRowNumber;
private int lastColumnNumber;
/** Should we output the formula, or the value it has? */
private boolean outputFormulaValues = true;
/** For parsing Formulas */
private SheetRecordCollectingListener workbookBuildingListener;
private HSSFWorkbook stubWorkbook;
// Records we pick up as we process
private SSTRecord sstRecord;
private FormatTrackingHSSFListener formatListener;
/** So we known which sheet we're on */
private int sheetIndex = -1;
private BoundSheetRecord[] orderedBSRs;
@SuppressWarnings("unchecked")
private ArrayList boundSheetRecords = new ArrayList();
// For handling formulas with string results
private int nextRow;
private int nextColumn;
private boolean outputNextStringRecord;
private int curRow;
private List<String> rowlist;
@SuppressWarnings( "unused")
private String sheetName;
public HxlsAbstract(POIFSFileSystem fs)
throws SQLException {
this.fs = fs;
this.output = System.out;
this.minColumns = -1;
this.curRow = 0;
this.rowlist = new ArrayList<String>();
}
public HxlsAbstract(String filename) throws IOException,
FileNotFoundException, SQLException {
this(new POIFSFileSystem(new FileInputStream(filename)));
}
//excel记录行操作方法,以行索引和行元素列表为参数,对一行元素进行操作,元素为String类型
// public abstract void optRows(int curRow, List<String> rowlist) throws SQLException ;
//excel记录行操作方法,以sheet索引,行索引和行元素列表为参数,对sheet的一行元素进行操作,元素为String类型
public abstract void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException;
/**
* 遍历 excel 文件
*/
public void process() throws IOException {
MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(
this);
formatListener = new FormatTrackingHSSFListener(listener);
HSSFEventFactory factory = new HSSFEventFactory();
HSSFRequest request = new HSSFRequest();
if (outputFormulaValues) {
request.addListenerForAllRecords(formatListener);
} else {
workbookBuildingListener = new SheetRecordCollectingListener(
formatListener);
request.addListenerForAllRecords(workbookBuildingListener);
}
factory.processWorkbookEvents(request, fs);
}
/**
* HSSFListener 监听方法,处理 Record
*/
@SuppressWarnings("unchecked")
public void processRecord(Record record) {
int thisRow = -1;
int thisColumn = -1;
String thisStr = null;
String value = null;
switch (record.getSid()) {
case BoundSheetRecord.sid:
boundSheetRecords.add(record);
break;
case BOFRecord.sid:
BOFRecord br = (BOFRecord) record;
if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
// Create sub workbook if required
if (workbookBuildingListener != null && stubWorkbook == null) {
stubWorkbook = workbookBuildingListener
.getStubHSSFWorkbook();
}
// Works by ordering the BSRs by the location of
// their BOFRecords, and then knowing that we
// process BOFRecords in byte offset order
sheetIndex++;
if (orderedBSRs == null) {
orderedBSRs = BoundSheetRecord
.orderByBofPosition(boundSheetRecords);
}
sheetName = orderedBSRs[sheetIndex].getSheetname();
}
break;
case SSTRecord.sid:
sstRecord = (SSTRecord) record;
break;
case BlankRecord.sid:
BlankRecord brec = (BlankRecord) record;
thisRow = brec.getRow();
thisColumn = brec.getColumn();
thisStr = "";
break;
case BoolErrRecord.sid:
BoolErrRecord berec = (BoolErrRecord) record;
thisRow = berec.getRow();
thisColumn = berec.getColumn();
thisStr = "";
break;
case FormulaRecord.sid:
FormulaRecord frec = (FormulaRecord) record;
thisRow = frec.getRow();
thisColumn = frec.getColumn();
if (outputFormulaValues) {
if (Double.isNaN(frec.getValue())) {
// Formula result is a string
// This is stored in the next record
outputNextStringRecord = true;
nextRow = frec.getRow();
nextColumn = frec.getColumn();
} else {
thisStr = formatListener.formatNumberDateCell(frec);
}
} else {
thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook,
frec.getParsedExpression()) + '"';
}
break;
case StringRecord.sid:
if (outputNextStringRecord) {
// String for formula
StringRecord srec = (StringRecord) record;
thisStr = srec.getString();
thisRow = nextRow;
thisColumn = nextColumn;
outputNextStringRecord = false;
}
break;
case LabelRecord.sid:
LabelRecord lrec = (LabelRecord) record;
curRow = thisRow = lrec.getRow();
thisColumn = lrec.getColumn();
value = lrec.getValue().trim();
value = value.equals("")?" ":value;
this.rowlist.add(thisColumn, value);
break;
case LabelSSTRecord.sid:
LabelSSTRecord lsrec = (LabelSSTRecord) record;
curRow = thisRow = lsrec.getRow();
thisColumn = lsrec.getColumn();
if (sstRecord == null) {
rowlist.add(thisColumn, " ");
} else {
value = sstRecord
.getString(lsrec.getSSTIndex()).toString().trim();
value = value.equals("")?" ":value;
rowlist.add(thisColumn,value);
}
break;
case NoteRecord.sid:
NoteRecord nrec = (NoteRecord) record;
thisRow = nrec.getRow();
thisColumn = nrec.getColumn();
// TODO: Find object to match nrec.getShapeId()
thisStr = '"' + "(TODO)" + '"';
break;
case NumberRecord.sid:
NumberRecord numrec = (NumberRecord) record;
curRow = thisRow = numrec.getRow();
thisColumn = numrec.getColumn();
value = formatListener.formatNumberDateCell(numrec).trim();
value = value.equals("")?" ":value;
// Format
rowlist.add(thisColumn, value);
break;
case RKRecord.sid:
RKRecord rkrec = (RKRecord) record;
thisRow = rkrec.getRow();
thisColumn = rkrec.getColumn();
thisStr = '"' + "(TODO)" + '"';
break;
default:
break;
}
// 遇到新行的操作
if (thisRow != -1 && thisRow != lastRowNumber) {
lastColumnNumber = -1;
}
// 空值的操作
if (record instanceof MissingCellDummyRecord) {
MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
curRow = thisRow = mc.getRow();
thisColumn = mc.getColumn();
rowlist.add(thisColumn," ");
}
// 如果遇到能打印的东西,在这里打印
if (thisStr != null) {
if (thisColumn > 0) {
output.print(',');
}
output.print(thisStr);
}
// 更新行和列的值
if (thisRow > -1)
lastRowNumber = thisRow;
if (thisColumn > -1)
lastColumnNumber = thisColumn;
// 行结束时的操作
if (record instanceof LastCellOfRowDummyRecord) {
if (minColumns > 0) {
// 列值重新置空
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
}
// 行结束时, 调用 optRows() 方法
lastColumnNumber = -1;
try {
optRows(sheetIndex,curRow, rowlist);
} catch (SQLException e) {
e.printStackTrace();
}
rowlist.clear();
}
}
}
继承类: HxlsBig,作用:将excel中数据转储到数据库临时表中,实现optRows方法
- package com.gaosheng.util.examples.xls;
- import java.io.FileInputStream;
- import java.io.FileNotFoundException;
- import java.io.IOException;
- import java.io.PrintStream;
- import java.sql.Connection;
- import java.sql.DriverManager;
- import java.sql.PreparedStatement;
- import java.sql.SQLException;
- import java.sql.Statement;
- import java.util.List;
- import java.util.Properties;
-
- import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-
- import com.gaosheng.util.xls.HxlsAbstract;
-
-
- public class HxlsBig extends HxlsAbstract{
-
- public static void main(String[] args) throws Exception {
-
- HxlsBig xls2csv = new HxlsBig("E:/up.xls","hxls_temp");
- xls2csv.process();
- xls2csv.close();
- }
-
- public HxlsBig(POIFSFileSystem fs, PrintStream output,String tableName)
- throws SQLException {
- super(fs);
- this.conn = getNew_Conn();
- this.statement = conn.createStatement();
- this.tableName = tableName;
- }
-
- public HxlsBig(String filename,String tableName) throws IOException,
- FileNotFoundException, SQLException {
- this(new POIFSFileSystem(new FileInputStream(filename)), System.out,tableName);
- }
-
- private Connection conn = null;
- private Statement statement = null;
- private PreparedStatement newStatement = null;
-
- private String tableName = "temp_table";
- private boolean create = true;
-
-
- public void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException {
- if (curRow == 0 && sheetIndex == 0 ) {
- StringBuffer preSql = new StringBuffer("insert into " + tableName
- + " values(");
- StringBuffer table = new StringBuffer("create table " + tableName
- + "(");
- int c = rowlist.size();
- for (int i = 0; i < c; i++) {
- preSql.append("?,");
- table.append(rowlist.get(i));
- table.append(" varchar2(100) ,");
- }
-
- table.deleteCharAt(table.length() - 1);
- preSql.deleteCharAt(preSql.length() - 1);
- table.append(")");
- preSql.append(")");
- if (create) {
- statement = conn.createStatement();
- try{
- statement.execute("drop table "+tableName);
- }catch(Exception e){
-
- }finally{
- System.out.println("表 "+tableName+" 删除成功");
- }
</d
分享到:
评论