前端时间写了注解方式Excel的读取和写入,它是根据注解完成Excel的操作,虽说支持大数据,但对于超大数据就无能为力了,因为它的读写期间都是将所有数据放入系统内存的,除非你有超大的内存。
因项目需要对超大数据的Excel读写操作,于是网上找了个超大数据的读写代码,这个不需要太大内存。并对此进行了简单的修改。
原理如下:
Excel超大数据读取:抽象Excel2007读取器,excel2007的底层数据结构是xml文件,采用SAX的事件驱动的方法解析 xml,需要继承DefaultHandler,在遇到文件内容时,事件会触发,这种做法可以大大降低内存的耗费,特别使用于大数据量的文件。
Excel超大数据写入:抽象excel2007读入器,先构建.xlsx一张模板,改写模板中的sheet.xml, 使用这种方法 写入.xlsx文件,不需要太大的内存。
先看调用示例:
String file = "E:/导入测试数据.xlsx";
ExcelReader reader = new ExcelReader() {
public void getRows(int sheetIndex, int curRow, List rowList) {
System.out.println("Sheet:" + sheetIndex + ", Row:" + curRow + ", Data:" +rowList);
}
};
reader.process(file, 1);
String file = "E:/导出测试数据.xlsx";
ExcelWriter writer = new ExcelWriter() {
public void generate() throws Exception {
// 电子表格开始
this.beginSheet();
for (int rownum = 0; rownum < 100; rownum++) {
// 插入新行
this.insertRow(rownum);
// 建立新单元格,索引值从0开始,表示第一列
this.createCell(0, "第 " + rownum + " 行");
this.createCell(1, 34343.123456789);
this.createCell(2, "23.67%");
this.createCell(3, "12:12:23");
this.createCell(4, "2014-10-11 12:12:23");
this.createCell(5, "true");
this.createCell(6, "false");
// 结束行
this.endRow();
}
// 电子表格结束
this.endSheet();
}
};
writer.process(file);
}
这里只展示了对数据的读取和写入,如果正式保存到数据库时建议读取一部分(如100条)再写入一次数据库,尽量不要读取一条就写入一条,这样会非常耗费资源。
源代码如下:
import java.io.InputStream;
import java.math.BigDecimal;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
/**
* Excel超大数据读取,抽象Excel2007读取器,excel2007的底层数据结构是xml文件,采用SAX的事件驱动的方法解析
* xml,需要继承DefaultHandler,在遇到文件内容时,事件会触发,这种做法可以大大降低 内存的耗费,特别使用于大数据量的文件。
* @version 2014-9-2
*/
public abstract class ExcelReader extends DefaultHandler {
// 共享字符串表
private SharedStringsTable sst;
// 上一次的内容
private String lastContents;
private boolean nextIsString;
private int sheetIndex = -1;
private List rowList = new ArrayList();
// 当前行
private int curRow = 0;
// 当前列
private int curCol = 0;
// 日期标志
private boolean dateFlag;
// 数字标志
private boolean numberFlag;
private boolean isTElement;
/**
* 遍历工作簿中所有的电子表格
* @param filename
* @throws Exception
*/
public void process(String filename) throws Exception {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
Iterator sheets = r.getSheetsData();
while (sheets.hasNext()) {
curRow = 0;
sheetIndex++;
InputStream sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
}
}
/**
* 只遍历一个电子表格,其中sheetId为要遍历的sheet索引,从1开始,1-3
* @param filename
* @param sheetId
* @throws Exception
*/
public void process(String filename, int sheetId) throws Exception {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
// 根据 rId# 或 rSheet# 查找sheet
InputStream sheet2 = r.getSheet("rId" + sheetId);
sheetIndex++;
InputSource sheetSource = new InputSource(sheet2);
parser.parse(sheetSource);
sheet2.close();
}
public XMLReader fetchSheetParser(SharedStringsTable sst)
throws SAXException {
XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
this.sst = sst;
parser.setContentHandler(this);
return parser;
}
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
// System.out.println("startElement: " + localName + ", " + name + ", " + attributes);
// c => 单元格
if ("c".equals(name)) {
// 如果下一个元素是 SST 的索引,则将nextIsString标记为true
String cellType = attributes.getValue("t");
if ("s".equals(cellType)) {
nextIsString = true;
} else {
nextIsString = false;
}
// 日期格式
String cellDateType = attributes.getValue("s");
if ("1".equals(cellDateType)) {
dateFlag = true;
} else {
dateFlag = false;
}
String cellNumberType = attributes.getValue("s");
if ("2".equals(cellNumberType)) {
numberFlag = true;
} else {
numberFlag = false;
}
}
// 当元素为t时
if ("t".equals(name)) {
isTElement = true;
} else {
isTElement = false;
}
// 置空
lastContents = "";
}
public void endElement(String uri, String localName, String name)
throws SAXException {
// System.out.println("endElement: " + localName + ", " + name);
// 根据SST的索引值的到单元格的真正要存储的字符串
// 这时characters()方法可能会被调用多次
if (nextIsString) {
try {
int idx = Integer.parseInt(lastContents);
lastContents = new XSSFRichTextString(sst.getEntryAt(idx))
.toString();
} catch (Exception e) {
}
}
// t元素也包含字符串
if (isTElement) {
String value = lastContents.trim();
rowList.add(curCol, value);
curCol++;
isTElement = false;
// v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引
// 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符
} else if ("v".equals(name)) {
String value = lastContents.trim();
value = value.equals("") ? " " : value;
try {
// 日期格式处理
if (dateFlag) {
Date date = HSSFDateUtil.getJavaDate(Double.valueOf(value));
SimpleDateFormat dateFormat = new SimpleDateFormat("dd/MM/yyyy");
value = dateFormat.format(date);
}
// 数字类型处理
if (numberFlag) {
BigDecimal bd = new BigDecimal(value);
value = bd.setScale(3, BigDecimal.ROUND_UP).toString();
}
} catch (Exception e) {
// 转换失败仍用读出来的值
}
rowList.add(curCol, value);
curCol++;
} else {
// 如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法
if (name.equals("row")) {
getRows(sheetIndex + 1, curRow, rowList);
rowList.clear();
curRow++;
curCol = 0;
}
}
}
public void characters(char[] ch, int start, int length)
throws SAXException {
// 得到单元格内容的值
lastContents += new String(ch, start, length);
}
/**
* 获取行数据回调
* @param sheetIndex
* @param curRow
* @param rowList
*/
public abstract void getRows(int sheetIndex, int curRow, List rowList);
/**
* 测试方法
*/
public static void main(String[] args) throws Exception {
String file = "E:/导入测试数据.xlsx";
ExcelReader reader = new ExcelReader() {
public void getRows(int sheetIndex, int curRow, List rowList) {
System.out.println("Sheet:" + sheetIndex + ", Row:" + curRow + ", Data:" +rowList);
}
};
reader.process(file, 1);
}
}
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Writer;
import java.util.Calendar;
import java.util.Enumeration;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream;
import org.apache.poi.hssf.util.CellReference;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
/**
* Excel超大数据写入,抽象excel2007读入器,先构建.xlsx一张模板,改写模板中的sheet.xml,
* 使用这种方法 写入.xlsx文件,不需要太大的内存
* @version 2014-9-2
*/
public abstract class ExcelWriter {
private SpreadsheetWriter sw;
/**
* 写入电子表格的主要流程
*
* @param fileName
* @throws Exception
*/
public void process(String fileName) throws Exception {
// 建立工作簿和电子表格对象
XSSFWorkbook wb = new XSSFWorkbook();
XSSFSheet sheet = wb.createSheet("sheet1");
// 持有电子表格数据的xml文件名 例如 /xl/worksheets/sheet1.xml
String sheetRef = sheet.getPackagePart().getPartName().getName();
// 保存模板
FileOutputStream os = new FileOutputStream("template.xlsx");
wb.write(os);
os.close();
// 生成xml文件
File tmp = File.createTempFile("sheet", ".xml");
Writer fw = new FileWriter(tmp);
sw = new SpreadsheetWriter(fw);
generate();
fw.close();
// 使用产生的数据替换模板
File templateFile = new File("template.xlsx");
FileOutputStream out = new FileOutputStream(fileName);
substitute(templateFile, tmp, sheetRef.substring(1), out);
out.close();
// 删除文件之前调用一下垃圾回收器,否则无法删除模板文件
System.gc();
// 删除临时模板文件
if (templateFile.isFile() && templateFile.exists()) {
templateFile.delete();
}
}
/**
* 类使用者应该使用此方法进行写操作
*
* @throws Exception
*/
public abstract void generate() throws Exception;
public void beginSheet() throws IOException {
sw.beginSheet();
}
public void insertRow(int rowNum) throws IOException {
sw.insertRow(rowNum);
}
public void createCell(int columnIndex, String value) throws IOException {
sw.createCell(columnIndex, value, -1);
}
public void createCell(int columnIndex, double value) throws IOException {
sw.createCell(columnIndex, value, -1);
}
public void endRow() throws IOException {
sw.endRow();
}
public void endSheet() throws IOException {
sw.endSheet();
}
/**
*
* @param zipfile
* the template file
* @param tmpfile
* the XML file with the sheet data
* @param entry
* the name of the sheet entry to substitute, e.g.
* xl/worksheets/sheet1.xml
* @param out
* the stream to write the result to
*/
private static void substitute(File zipfile, File tmpfile, String entry,
OutputStream out) throws IOException {
ZipFile zip = new ZipFile(zipfile);
ZipOutputStream zos = new ZipOutputStream(out);
@SuppressWarnings("unchecked")
Enumeration en = (Enumeration) zip.entries();
while (en.hasMoreElements()) {
ZipEntry ze = en.nextElement();
if (!ze.getName().equals(entry)) {
zos.putNextEntry(new ZipEntry(ze.getName()));
InputStream is = zip.getInputStream(ze);
copyStream(is, zos);
is.close();
}
}
zos.putNextEntry(new ZipEntry(entry));
InputStream is = new FileInputStream(tmpfile);
copyStream(is, zos);
is.close();
zos.close();
}
private static void copyStream(InputStream in, OutputStream out)
throws IOException {
byte[] chunk = new byte[1024];
int count;
while ((count = in.read(chunk)) >= 0) {
out.write(chunk, 0, count);
}
}
/**
* 在写入器中写入电子表格
*
*/
public static class SpreadsheetWriter {
private final Writer _out;
private int _rownum;
private static String LINE_SEPARATOR = System
.getProperty("line.separator");
public SpreadsheetWriter(Writer out) {
_out = out;
}
public void beginSheet() throws IOException {
_out.write(""
+ "");
_out.write("" + LINE_SEPARATOR);
}
public void endSheet() throws IOException {
_out.write(" ");
_out.write(" ");
}
/**
* 插入新行
*
* @param rownum
* 以0开始
*/
public void insertRow(int rownum) throws IOException {
_out.write("" + LINE_SEPARATOR);
this._rownum = rownum;
}
/**
* 插入行结束标志
*/
public void endRow() throws IOException {
_out.write("
" + LINE_SEPARATOR);
}
/**
* 插入新列
*
* @param columnIndex
* @param value
* @param styleIndex
* @throws IOException
*/
public void createCell(int columnIndex, String value, int styleIndex)
throws IOException {
String ref = new CellReference(_rownum, columnIndex)
.formatAsString();
_out.write("");
_out.write("" + encoderXML(value) + " ");
_out.write(" ");
}
public void createCell(int columnIndex, String value)
throws IOException {
createCell(columnIndex, value, -1);
}
public void createCell(int columnIndex, double value, int styleIndex)
throws IOException {
String ref = new CellReference(_rownum, columnIndex)
.formatAsString();
_out.write("");
_out.write("" + value + " ");
_out.write(" ");
}
public void createCell(int columnIndex, double value)
throws IOException {
createCell(columnIndex, value, -1);
}
public void createCell(int columnIndex, Calendar value, int styleIndex)
throws IOException {
createCell(columnIndex, DateUtil.getExcelDate(value, false),
styleIndex);
}
}
// XML Encode
private static final String[] xmlCode = new String[256];
static {
// Special characters
xmlCode['\''] = "'";
xmlCode['\"'] = "\""; // double quote
xmlCode['&'] = "&"; // ampersand
xmlCode['<'] = "<"; // lower than
xmlCode['>'] = ">"; // greater than
}
/**
*
* Encode the given text into xml.
*
*
* @param string
* the text to encode
* @return the encoded string
*/
public static String encoderXML(String string) {
if (string == null)
return "";
int n = string.length();
char character;
String xmlchar;
StringBuffer buffer = new StringBuffer();
// loop over all the characters of the String.
for (int i = 0; i < n; i++) {
character = string.charAt(i);
// the xmlcode of these characters are added to a StringBuffer
// one by one
try {
xmlchar = xmlCode[character];
if (xmlchar == null) {
buffer.append(character);
} else {
buffer.append(xmlCode[character]);
}
} catch (ArrayIndexOutOfBoundsException aioobe) {
buffer.append(character);
}
}
return buffer.toString();
}
/**
* 测试方法
*/
public static void main(String[] args) throws Exception {
String file = "E:/导出测试数据.xlsx";
ExcelWriter writer = new ExcelWriter() {
public void generate() throws Exception {
// 电子表格开始
this.beginSheet();
for (int rownum = 0; rownum < 100; rownum++) {
// 插入新行
this.insertRow(rownum);
// 建立新单元格,索引值从0开始,表示第一列
this.createCell(0, "第 " + rownum + " 行");
this.createCell(1, 34343.123456789);
this.createCell(2, "23.67%");
this.createCell(3, "12:12:23");
this.createCell(4, "2014-10-11 12:12:23");
this.createCell(5, "true");
this.createCell(6, "false");
// 结束行
this.endRow();
}
// 电子表格结束
this.endSheet();
}
};
writer.process(file);
}
}