poi加载xlsx大数据处理!

package org.hzjun.hlt.excel;


import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;


import javax.xml.parsers.ParserConfigurationException;


import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;


/**
 * 针对poi加载xlsx大数据量的内存溢出,用此方法加载数据保证不溢出
 */
public class XlsxProcessHandler {
/**
* Uses the XSSF Event SAX helpers to do most of the work of parsing the
* Sheet XML, and outputs the contents as a (basic) CSV.
*/
private class SheetToCSV implements SheetContentsHandler {
private boolean firstCellOfRow = false;
private int currentRow = -1;
private int currentCol = -1;
private List rowList = null;


private void outputMissingRows(int number) {
// System.out.println( "number=" + number );
for (int i = 0; i < number; i++) {
for (int j = 0; j < minColumns; j++) {
// output.append( ',' );
}
// output.append( '\n' );
}
}


public void startRow(int rowNum) {
// If there were gaps, output the missing rows
outputMissingRows( rowNum - currentRow - 1 );
// Prepare for this row
firstCellOfRow = true;
currentRow = rowNum;
currentCol = -1;
}


public void endRow(int rowNum) {
// Ensure the minimum number of columns
for (int i = currentCol; i < minColumns; i++) {
// output.append( ',' );
}
// output.append( '\n' );
XlsxProcessHandler.this.processRow( rowList );
}


public void cell(String cellReference, String formattedValue, XSSFComment comment) {
// System.out.println( comment );
if (firstCellOfRow) {
firstCellOfRow = false;
rowList = new ArrayList();
} else {
// output.append( ',' );
}


// Did we miss any cells?
int thisCol = (new CellReference( cellReference )).getCol();
int missedCols = thisCol - currentCol - 1;
for (int i = 0; i < missedCols; i++) {
// output.append( ',' );
}
currentCol = thisCol;


// Number or string?
try {
rowList.add( formattedValue );
// Double.parseDouble( formattedValue );
// output.append( formattedValue );
} catch (NumberFormatException e) {
// output.append( '"' );
// output.append( formattedValue );
// output.append( '"' );
}
}


public void headerFooter(String text, boolean isHeader, String tagName) {
// Skip, no headers or footers in CSV
}
}


// /


private OPCPackage xlsxPackage;


/**
* Number of columns to read starting with leftmost
*/
private int minColumns;


/**
* Destination for data
*/
// private PrintStream output;
/**
* Creates a new XLSX -> CSV converter

* @param pkg
*            The XLSX package to process
* @param output
*            The PrintStream to output the CSV to
* @param minColumns
*            The minimum number of columns to output, or -1 for no minimum
*/
// public XLSX2CSV(OPCPackage pkg, PrintStream output, int minColumns) {
// }
/**
* Parses and shows the content of one sheet using the specified styles and
* shared-strings tables.

* @param styles
* @param strings
* @param sheetInputStream
*/
public void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, SheetContentsHandler sheetHandler, InputStream sheetInputStream)
throws IOException, ParserConfigurationException, SAXException {
DataFormatter formatter = new DataFormatter();
InputSource sheetSource = new InputSource( sheetInputStream );
try {
XMLReader sheetParser = SAXHelper.newXMLReader();
ContentHandler handler = new XSSFSheetXMLHandler( styles, null, strings, sheetHandler, formatter, false );
sheetParser.setContentHandler( handler );
sheetParser.parse( sheetSource );
} catch (ParserConfigurationException e) {
throw new RuntimeException( "SAX parser appears to be broken - " + e.getMessage() );
}
}


/**
* Initiates the processing of the XLS workbook file to CSV.

* @throws IOException
* @throws OpenXML4JException
* @throws ParserConfigurationException
* @throws SAXException
*/
public void process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable( this.xlsxPackage );
XSSFReader xssfReader = new XSSFReader( this.xlsxPackage );
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
int index = 0;
while (iter.hasNext()) {
// 暂时只处理一个sheet
if (index > 0)
continue;
InputStream stream = iter.next();
String sheetName = iter.getSheetName();
// this.output.println();
// this.output.println( sheetName + " [index=" + index + "]:" );
processSheet( styles, strings, new SheetToCSV(), stream );
stream.close();
++index;
}
}


public void processExcel(String filePath) {
// "f:/workbook1452231301852.xlsx"
File xlsxFile = new File( filePath );


try {
OPCPackage p = OPCPackage.open( xlsxFile.getPath(), PackageAccess.READ );
// XLSX2CSV xlsx2csv = new XLSX2CSV( p, System.out, minColumns );
this.xlsxPackage = p;
// this.output = output;
// this.minColumns = minColumns;
process();
p.close();
} catch (Exception e) {
e.printStackTrace();
}


}


public interface RowHandler {
public void processRow(List rowList);
}


public void processRow(List rowList) {


handler.processRow( rowList );
}


private RowHandler handler;


public XlsxProcessHandler(RowHandler handler) {
this.handler = handler;
}


public static void main(String[] args) throws Exception {


XlsxProcessHandler a = new XlsxProcessHandler( new RowHandler() {
@Override
public void processRow(List row) {
System.out.println( row );
}


} );
a.processExcel( "f:/workbook1452231301852.xlsx" );


}
}

你可能感兴趣的:(java)