package cn.com.songjy.test.excel;
import java.io.IOException;
import java.text.DecimalFormat;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.ss.util.CellReference;
public class ReadLine {
private Log log = LogFactory.getLog(ReadLine.class);
/**
*
* @param excel_file_path --excel文件的存储路径
* @throws IOException
* @throws InvalidFormatException
*/
public void read(String excel_file_path) throws IOException,
InvalidFormatException {
Workbook wb = WorkbookFactory.create(new java.io.File(excel_file_path));
Sheet sheet = wb.getSheetAt(0);
/*定义最小读取的行数*/
int rowStart = Math.min(0, sheet.getFirstRowNum());
/*定义最大读取的行数*/
int rowEnd = Math.max(0, sheet.getLastRowNum());
for (int rowNum = rowStart; rowNum <= rowEnd; rowNum++) {
Row row = sheet.getRow(rowNum);
/* 假如当前为空行,跳过当前行继续读取下一行数据 */
if (row == null)
continue;
/*定义当前行的最大读取列*/
int lastColumn = Math.max(row.getLastCellNum(), 0);
DecimalFormat df = new DecimalFormat("0");
for (int cn = 0; cn < lastColumn; cn++) {
Cell cell = row.getCell(cn, Row.RETURN_BLANK_AS_NULL);
/* 假如当前为空格,跳过当前格继续读取下一格的数据 */
if (null == cell)
continue;
CellReference cellRef = new CellReference(row.getRowNum(),
cell.getColumnIndex());
log.info(cellRef.formatAsString());
log.info(" - ");
switch (cell.getCellType()) {
case Cell.CELL_TYPE_STRING:
log.info(cell.getRichStringCellValue().getString());
break;
case Cell.CELL_TYPE_NUMERIC:
if (DateUtil.isCellDateFormatted(cell)) {
log.info(cell.getDateCellValue());
} else {
log.info(df.format(cell.getNumericCellValue()));
}
break;
case Cell.CELL_TYPE_BOOLEAN:
log.info(cell.getBooleanCellValue());
break;
case Cell.CELL_TYPE_FORMULA:
log.info(cell.getCellFormula());
break;
default:
log.info("----");
}
}
}
}
public static void main(String[] args) {
try {
new ReadLine().read("E:\\a.xlsx");
} catch (IOException e) {
e.printStackTrace();
} catch (InvalidFormatException e) {
e.printStackTrace();
}
}
}
该程序依赖jar包
poi-3.9.jar
poi-ooxml-3.9.jar
poi-ooxml-schemas-3.9.jar
commons-logging-1.1.3.jar
dom4j-1.6.jar
xmlbeans-2.3.0.jar
Java 正则表达式解析csv文件
POI读取大数据量的Excel文件
Java读取大数据量07Excel的方法(POI),如下:
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
public class ExcelUtil extends DefaultHandler {
private SharedStringsTable sst;
private String lastContents;
private boolean nextIsString;
private int sheetIndex = -1;
private List<String> rowlist = new ArrayList<String>();
private int curRow = 0;
private int curCol = 0;
/**
* 读取第一个工作簿的入口方法
*
* @param path
*/
public void readOneSheet(String path) throws Exception {
OPCPackage pkg = OPCPackage.open(path);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
InputStream sheet = r.getSheet("rId1");
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
}
/**
* 读取所有工作簿的入口方法
*
* @param path
* @throws Exception
*/
public void process(String path) throws Exception {
OPCPackage pkg = OPCPackage.open(path);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
Iterator<InputStream> sheets = r.getSheetsData();
while (sheets.hasNext()) {
curRow = 0;
sheetIndex++;
InputStream sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
}
}
/**
* 该方法自动被调用,每读一行调用一次,在方法中写自己的业务逻辑即可
*
* @param sheetIndex
* 工作簿序号
* @param curRow
* 处理到第几行
* @param rowList
* 当前数据行的数据集合
*/
public void optRow(int sheetIndex, int curRow, List<String> rowList) {
String temp = "";
for (String str : rowList) {
temp += str + "_";
}
System.out.println(temp);
}
public XMLReader fetchSheetParser(SharedStringsTable sst)
throws SAXException {
XMLReader parser = XMLReaderFactory
.createXMLReader("org.apache.xerces.parsers.SAXParser");
this.sst = sst;
parser.setContentHandler(this);
return parser;
}
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
// c => 单元格
if (name.equals("c")) {
// 如果下一个元素是 SST 的索引,则将nextIsString标记为true
String cellType = attributes.getValue("t");
if (cellType != null && cellType.equals("s")) {
nextIsString = true;
} else {
nextIsString = false;
}
}
// 置空
lastContents = "";
}
public void endElement(String uri, String localName, String name)
throws SAXException {
// 根据SST的索引值的到单元格的真正要存储的字符串
// 这时characters()方法可能会被调用多次
if (nextIsString) {
try {
int idx = Integer.parseInt(lastContents);
lastContents = new XSSFRichTextString(sst.getEntryAt(idx))
.toString();
} catch (Exception e) {
}
}
// v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引
// 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符
if (name.equals("v")) {
String value = lastContents.trim();
value = value.equals("") ? " " : value;
rowlist.add(curCol, value);
curCol++;
} else {
// 如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法
if (name.equals("row")) {
optRow(sheetIndex, curRow, rowlist);
rowlist.clear();
curRow++;
curCol = 0;
}
}
}
public void characters(char[] ch, int start, int length)
throws SAXException {
// 得到单元格内容的值
lastContents += new String(ch, start, length);
}
}
java向excel 写入海量数据内存溢出问题的解决,如下:
import java.io.FileOutputStream;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
public class Test {
/*** @param args */
public static void main(String[] args) {
if (args[0].equals("hssf")) {
hssfTest();
}
if (args[0].equals("sxssf")) {
sxssfTest();
}
}
/* 不会内存溢出,最后生成一个大概40M的文件 */
public static void sxssfTest() {
Workbook wb = new SXSSFWorkbook(100); // keep 100 rows in memory,//
// exceeding rows will be//
// flushed to disk
Sheet sh = wb.createSheet();
int rownum = 0;
try {
while (true) {
Row row = sh.createRow(rownum);
for (int cellnum = 0; cellnum < 10; cellnum++) {
Cell cell = row.createCell(cellnum);
String address = new CellReference(cell).formatAsString();
cell.setCellValue(address);
}
System.out.println(rownum);
rownum++;
if (rownum >= 1000000)
break;
}
FileOutputStream out = new FileOutputStream("sxssf.xlsx");
wb.write(out);
out.close();
} catch (Exception e) {
System.out.println(ExceptionUtils.getFullStackTrace(e));
}
}
/* 20000行左右就内存溢出了 */
public static void hssfTest() {
XSSFWorkbook wb = new XSSFWorkbook();
Sheet sh = wb.createSheet();
int rownum = 0;
try {
while (true) {
Row row = sh.createRow(rownum);
for (int cellnum = 0; cellnum < 10; cellnum++) {
Cell cell = row.createCell(cellnum);
String address = new CellReference(cell).formatAsString();
cell.setCellValue(address);
}
System.out.println(rownum);
rownum++;
if (rownum >= 1000000)
break;
}
FileOutputStream out = new FileOutputStream("hssf.xlsx");
wb.write(out);
out.close();
} catch (Exception e) {
System.out.println(ExceptionUtils.getFullStackTrace(e));
}
}
}