package org.hzjun.hlt.excel;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
/**
* 针对poi加载xlsx大数据量的内存溢出,用此方法加载数据保证不溢出
*/
public class XlsxProcessHandler {
/**
* Uses the XSSF Event SAX helpers to do most of the work of parsing the
* Sheet XML, and outputs the contents as a (basic) CSV.
*/
private class SheetToCSV implements SheetContentsHandler {
private boolean
firstCellOfRow
= false;
private int
currentRow
= -1;
private int
currentCol
= -1;
private List
rowList
= null;
private void outputMissingRows(int number) {
// System.out.println( "number=" + number );
for (int i = 0; i < number; i++) {
for (int j = 0; j < minColumns; j++) {
// output.append( ',' );
}
// output.append( '\n' );
}
}
public void startRow(int rowNum) {
// If there were gaps, output the missing rows
outputMissingRows( rowNum - currentRow - 1 );
// Prepare for this row
firstCellOfRow = true;
currentRow = rowNum;
currentCol = -1;
}
public void endRow(int rowNum) {
// Ensure the minimum number of columns
for (int i = currentCol; i < minColumns; i++) {
// output.append( ',' );
}
// output.append( '\n' );
XlsxProcessHandler.this.processRow( rowList );
}
public void cell(String cellReference, String formattedValue, XSSFComment comment) {
// System.out.println( comment );
if (firstCellOfRow) {
firstCellOfRow = false;
rowList = new ArrayList();
} else {
// output.append( ',' );
}
// Did we miss any cells?
int thisCol = (new CellReference( cellReference )).getCol();
int missedCols = thisCol - currentCol - 1;
for (int i = 0; i < missedCols; i++) {
// output.append( ',' );
}
currentCol = thisCol;
// Number or string?
try {
rowList.add( formattedValue );
// Double.parseDouble( formattedValue );
// output.append( formattedValue );
} catch (NumberFormatException e) {
// output.append( '"' );
// output.append( formattedValue );
// output.append( '"' );
}
}
public void headerFooter(String text, boolean isHeader, String tagName) {
// Skip, no headers or footers in CSV
}
}
// /
private OPCPackage
xlsxPackage;
/**
* Number of columns to read starting with leftmost
*/
private int
minColumns;
/**
* Destination for data
*/
// private PrintStream output;
/**
* Creates a new XLSX -> CSV converter
*
* @param pkg
* The XLSX package to process
* @param output
* The PrintStream to output the CSV to
* @param minColumns
* The minimum number of columns to output, or -1 for no minimum
*/
// public XLSX2CSV(OPCPackage pkg, PrintStream output, int minColumns) {
// }
/**
* Parses and shows the content of one sheet using the specified styles and
* shared-strings tables.
*
* @param styles
* @param strings
* @param sheetInputStream
*/
public void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, SheetContentsHandler sheetHandler, InputStream sheetInputStream)
throws IOException, ParserConfigurationException, SAXException {
DataFormatter formatter = new DataFormatter();
InputSource sheetSource = new InputSource( sheetInputStream );
try {
XMLReader sheetParser = SAXHelper.newXMLReader();
ContentHandler handler = new XSSFSheetXMLHandler( styles, null, strings, sheetHandler, formatter, false );
sheetParser.setContentHandler( handler );
sheetParser.parse( sheetSource );
} catch (ParserConfigurationException e) {
throw new RuntimeException( "SAX parser appears to be broken - " + e.getMessage() );
}
}
/**
* Initiates the processing of the XLS workbook file to CSV.
*
* @throws IOException
* @throws OpenXML4JException
* @throws ParserConfigurationException
* @throws SAXException
*/
public void process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable( this.xlsxPackage );
XSSFReader xssfReader = new XSSFReader( this.xlsxPackage );
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
int index = 0;
while (iter.hasNext()) {
// 暂时只处理一个sheet
if (index > 0)
continue;
InputStream stream = iter.next();
String sheetName = iter.getSheetName();
// this.output.println();
// this.output.println( sheetName + " [index=" + index + "]:" );
processSheet( styles, strings, new SheetToCSV(), stream );
stream.close();
++index;
}
}
public void processExcel(String filePath) {
// "f:/workbook1452231301852.xlsx"
File xlsxFile = new File( filePath );
try {
OPCPackage p = OPCPackage.open( xlsxFile.getPath(), PackageAccess.READ );
// XLSX2CSV xlsx2csv = new XLSX2CSV( p, System.out, minColumns );
this.xlsxPackage = p;
// this.output = output;
// this.minColumns = minColumns;
process();
p.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public interface RowHandler {
public void processRow(List rowList);
}
public void processRow(List rowList) {
handler.processRow( rowList );
}
private RowHandler
handler;
public XlsxProcessHandler(RowHandler handler) {
this.handler = handler;
}
public static void main(String[] args) throws Exception {
XlsxProcessHandler a = new XlsxProcessHandler( new RowHandler() {
@Override
public void processRow(List row) {
System.out.println( row );
}
} );
a.processExcel( "f:/workbook1452231301852.xlsx" );
}
}