对于用JAVA访问EXCEL文件,常用的组件有jxl和POI, jxl因为目前已无版本更新,并且仅能访问EXCEL2007以前的文件格式,对xlsx文件格式目前不支持, 故而推荐使用POI,
POI的官方下载地址为: http://poi.apache.org/
本质上来说, EXCEL2007文档是一个XML文档, 所以POI对EXCEL文件读操作有两种方式,一种是DOM, 一种是SAX, DOM对EXCEL操作简单,易上手, 但是占用较多的内存,尤其是表格超过10000行, 经常会出现内存溢出问题,虽然可以改变JVM启动参数, 加大内存,但不能从根本上解决问题,所以当需要读取较大的EXCEL文件时, 我们采用第二种方式更为合理.
对于EXCEL文档写操作也面临同样问题, 如果写入的行数过多, 也会遇到内存溢出, 但是写操作解决大文件的办法比较简单, 设置写缓存就可以了, 不过因为EXCEL的SHEET有最大行数的限制(65534), 所以如果行数超过这个限制, 需要创建新的SHEET, 以下是对EXCEL大文件读写操作的示例.
import java.io.InputStream;
import java.util.Date;
import java.util.Iterator;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
public class TestExcelReader {
public static void main(String[] args) throws Exception {
String path = "c://cache//test.xlsx";
ExcelHandler handler = new ExcelHandler();
handler.process(path);
}
public static class ExcelHandler extends DefaultHandler {
public ExcelHandler() {}
private StylesTable stylesTable;
private CellDataType dataType;
private SharedStringsTable sst;
private Map<String, String> datas;
private short dataFormat;
private String dataFormatString;
private boolean tagString;
private boolean tElement = false;
private String content;
private Coordinate coordinate;
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
super.startElement(uri, localName, qName, attributes);
if ("c".equals(qName)) {
this.setDataType(attributes);
coordinate = getCoordinate(attributes.getValue("r"));
}
else if ("t".equals(qName)) {
tElement = true;
}
else {
tElement = false;
}
content = "";
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
super.endElement(uri, localName, qName);
if (tagString) {
int idx = Integer.parseInt(content);
content = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
}
if (tElement) {
String value = content.trim();
setDataValue(datas, value, true);
tElement = false;
}
if ("v".equals(qName)) {
setDataValue(datas, content.trim(), false);
}
if ("row".equals(qName)) {
// process the row datas;
System.out.println(datas);
datas.clear();
}
}
public void process(String path) {
InputStream is = null;
OPCPackage pkg = null;
try {
pkg = OPCPackage.open(path);
XSSFReader reader = new XSSFReader(pkg);
stylesTable = reader.getStylesTable();
SharedStringsTable sst = reader.getSharedStringsTable();
this.sst = sst;
Iterator<InputStream> it = reader.getSheetsData();
if (it.hasNext()) {
is = it.next();
InputSource source = new InputSource(is);
XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
parser.setContentHandler(this);
parser.parse(source);
}
}
catch(Throwable ex) {
ex.printStackTrace();
}
finally {
try {
if (is != null) is.close();
if (pkg != null) pkg.close();
}
catch(Throwable e) {}
}
}
public void setDataValue(Map<String, String> datas, String value, boolean t) {
String column = coordinate.getColumnName();
if (t) {
datas.put(column, value);
}
else {
switch (dataType) {
case BOOL:
char first = value.charAt(0);
datas.put(column, first == '0' ? "否":"是");
break;
case ERROR:
String err = "\"ERROR:" + value.toString() + '"';
datas.put(column, err);
break;
case FORMULA:
datas.put(column, value);
break;
case INLINESTR:
XSSFRichTextString rts = new XSSFRichTextString(value.toString());
String inline = rts.toString();
datas.put(column, inline);
break;
case SSTINDEX:
String sidx = value.toString();
try {
int idx = Integer.parseInt(sidx);
XSSFRichTextString rtss = new XSSFRichTextString(sst.getEntryAt(idx));
String sstindex = rtss.toString();
datas.put(column, sstindex);
rtss = null;
}
catch (NumberFormatException ex) {
ex.printStackTrace();
datas.put(column, value);
}
break;
case NUMBER:
Double db = Double.parseDouble(value);
datas.put(column, String.valueOf(db));
break;
case DATE:
double number = Double.parseDouble(value);
Date date = DateUtil.getJavaDate(number);
datas.put(column, date.toString());
break;
default:
datas.put(column, "");
break;
}
}
}
public void setDataType(Attributes attributes) {
String cellType = attributes.getValue("t");
String cellStyleStr = attributes.getValue("s");
dataType = CellDataType.NUMBER;
if ("b".equals(cellType)) {
dataType = CellDataType.BOOL;
}
else if ("e".equals(cellType)) {
dataType = CellDataType.ERROR;
}
else if ("inlineStr".equals(cellType)) {
dataType = CellDataType.INLINESTR;
}
else if ("s".equals(cellType)) {
dataType = CellDataType.SSTINDEX;
}
else if ("str".equals(cellType)) {
dataType = CellDataType.FORMULA;
}
if (cellStyleStr != null) {
int styleIndex = Integer.parseInt(cellStyleStr);
XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
dataFormat = style.getDataFormat();
dataFormatString = style.getDataFormatString();
if (dataFormatString == null) {
dataType = CellDataType.NULL;
dataFormatString = BuiltinFormats.getBuiltinFormat(dataFormat);
}
if (dataFormat > 0) {
if (!isScientificReg(dataFormatString)) {
dataType = CellDataType.DATE;
}
}
}
}
}
public static interface Coordinate {
String getColumnName();
int getLine();
}
public static Coordinate getCoordinate(String row) {
Coordinate coordinate = null;
if (row != null && !"".equals(row)) {
int last = -1;
for(int i = 0; i < row.length(); i++) {
if (Character.isDigit(row.charAt(i))) {
last = i;
break;
}
}
String sline = last != -1 ? row.substring(last):"0";
final String column = last != -1 ? row.substring(0, last):row;
final int line = Integer.parseInt(sline);
coordinate = new Coordinate() {
@Override
public String getColumnName() {
return column;
}
@Override
public int getLine() {
return line;
}
};
}
if (coordinate == null) {
coordinate = new Coordinate() {
@Override
public String getColumnName() {
return "";
}
@Override
public int getLine() {
return 0;
}
};
}
return coordinate;
}
public static boolean isScientificReg(String str) {
boolean result = false;
if (str != null) {
String reg = "0.[0]+[Ee][+-]00";
Pattern pattern = Pattern.compile(reg);
result = pattern.matcher(str).matches();
}
return result;
}
public static enum CellDataType {
BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER, DATE, NULL
}
}