Iterate over rows and cells(EXCEL文件的读取)

package cn.com.songjy.test.excel;

import java.io.IOException;
import java.text.DecimalFormat;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.ss.util.CellReference;

public class ReadLine {

	private Log log = LogFactory.getLog(ReadLine.class);

	/**
	 * 
	 * @param excel_file_path --excel文件的存储路径
	 * @throws IOException
	 * @throws InvalidFormatException
	 */
	public void read(String excel_file_path) throws IOException,
			InvalidFormatException {
		
		Workbook wb = WorkbookFactory.create(new java.io.File(excel_file_path));
		
		Sheet sheet = wb.getSheetAt(0);
		
		/*定义最小读取的行数*/
		int rowStart = Math.min(0, sheet.getFirstRowNum());
		
		/*定义最大读取的行数*/
		int rowEnd = Math.max(0, sheet.getLastRowNum());
		
		for (int rowNum = rowStart; rowNum <= rowEnd; rowNum++) {

			Row row = sheet.getRow(rowNum);

			/* 假如当前为空行,跳过当前行继续读取下一行数据 */
			if (row == null)
				continue;

			/*定义当前行的最大读取列*/
			int lastColumn = Math.max(row.getLastCellNum(), 0);

			DecimalFormat df = new DecimalFormat("0");

			for (int cn = 0; cn < lastColumn; cn++) {
				Cell cell = row.getCell(cn, Row.RETURN_BLANK_AS_NULL);

				/* 假如当前为空格,跳过当前格继续读取下一格的数据 */
				if (null == cell)
					continue;

				CellReference cellRef = new CellReference(row.getRowNum(),
						cell.getColumnIndex());

				log.info(cellRef.formatAsString());
				log.info(" - ");

				switch (cell.getCellType()) {
				case Cell.CELL_TYPE_STRING:
					log.info(cell.getRichStringCellValue().getString());
					break;
				case Cell.CELL_TYPE_NUMERIC:
					if (DateUtil.isCellDateFormatted(cell)) {
						log.info(cell.getDateCellValue());
					} else {
						log.info(df.format(cell.getNumericCellValue()));
					}
					break;
				case Cell.CELL_TYPE_BOOLEAN:
					log.info(cell.getBooleanCellValue());
					break;
				case Cell.CELL_TYPE_FORMULA:
					log.info(cell.getCellFormula());
					break;
				default:
					log.info("----");
				}

			}
		}
	}

	public static void main(String[] args) {
		try {
			new ReadLine().read("E:\\a.xlsx");
		} catch (IOException e) {
			e.printStackTrace();
		} catch (InvalidFormatException e) {
			e.printStackTrace();
		}
	}
}


该程序依赖jar包

poi-3.9.jar
poi-ooxml-3.9.jar
poi-ooxml-schemas-3.9.jar
commons-logging-1.1.3.jar
dom4j-1.6.jar
xmlbeans-2.3.0.jar

Java 正则表达式解析csv文件
POI读取大数据量的Excel文件
Java读取大数据量07Excel的方法(POI),如下:
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

public class ExcelUtil extends DefaultHandler {

	private SharedStringsTable sst;
	private String lastContents;
	private boolean nextIsString;

	private int sheetIndex = -1;
	private List<String> rowlist = new ArrayList<String>();
	private int curRow = 0;
	private int curCol = 0;

	/**
	 * 读取第一个工作簿的入口方法
	 * 
	 * @param path
	 */
	public void readOneSheet(String path) throws Exception {
		OPCPackage pkg = OPCPackage.open(path);
		XSSFReader r = new XSSFReader(pkg);
		SharedStringsTable sst = r.getSharedStringsTable();

		XMLReader parser = fetchSheetParser(sst);

		InputStream sheet = r.getSheet("rId1");

		InputSource sheetSource = new InputSource(sheet);
		parser.parse(sheetSource);

		sheet.close();
	}

	/**
	 * 读取所有工作簿的入口方法
	 * 
	 * @param path
	 * @throws Exception
	 */
	public void process(String path) throws Exception {
		OPCPackage pkg = OPCPackage.open(path);
		XSSFReader r = new XSSFReader(pkg);
		SharedStringsTable sst = r.getSharedStringsTable();

		XMLReader parser = fetchSheetParser(sst);

		Iterator<InputStream> sheets = r.getSheetsData();
		while (sheets.hasNext()) {
			curRow = 0;
			sheetIndex++;
			InputStream sheet = sheets.next();
			InputSource sheetSource = new InputSource(sheet);
			parser.parse(sheetSource);
			sheet.close();
		}
	}

	/**
	 * 该方法自动被调用,每读一行调用一次,在方法中写自己的业务逻辑即可
	 * 
	 * @param sheetIndex
	 *            工作簿序号
	 * @param curRow
	 *            处理到第几行
	 * @param rowList
	 *            当前数据行的数据集合
	 */
	public void optRow(int sheetIndex, int curRow, List<String> rowList) {
		String temp = "";
		for (String str : rowList) {
			temp += str + "_";
		}
		System.out.println(temp);
	}

	public XMLReader fetchSheetParser(SharedStringsTable sst)
			throws SAXException {
		XMLReader parser = XMLReaderFactory
				.createXMLReader("org.apache.xerces.parsers.SAXParser");
		this.sst = sst;
		parser.setContentHandler(this);
		return parser;
	}

	public void startElement(String uri, String localName, String name,
			Attributes attributes) throws SAXException {
		// c => 单元格
		if (name.equals("c")) {
			// 如果下一个元素是 SST 的索引,则将nextIsString标记为true
			String cellType = attributes.getValue("t");
			if (cellType != null && cellType.equals("s")) {
				nextIsString = true;
			} else {
				nextIsString = false;
			}
		}
		// 置空
		lastContents = "";
	}

	public void endElement(String uri, String localName, String name)
			throws SAXException {
		// 根据SST的索引值的到单元格的真正要存储的字符串
		// 这时characters()方法可能会被调用多次
		if (nextIsString) {
			try {
				int idx = Integer.parseInt(lastContents);
				lastContents = new XSSFRichTextString(sst.getEntryAt(idx))
						.toString();
			} catch (Exception e) {

			}
		}

		// v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引
		// 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符
		if (name.equals("v")) {
			String value = lastContents.trim();
			value = value.equals("") ? " " : value;
			rowlist.add(curCol, value);
			curCol++;
		} else {
			// 如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法
			if (name.equals("row")) {
				optRow(sheetIndex, curRow, rowlist);
				rowlist.clear();
				curRow++;
				curCol = 0;
			}
		}
	}

	public void characters(char[] ch, int start, int length)
			throws SAXException {
		// 得到单元格内容的值
		lastContents += new String(ch, start, length);
	}

}

j​a​v​a​向​e​x​c​e​l​ ​写​入​海​量​数​据​内​存​溢​出​问​题​的​解​决,如下:
import java.io.FileOutputStream;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

public class Test {

	/*** @param args */
	public static void main(String[] args) {
		if (args[0].equals("hssf")) {
			hssfTest();
		}
		if (args[0].equals("sxssf")) {
			sxssfTest();
		}
	}

	/* 不会内存溢出,最后生成一个大概40M的文件 */
	public static void sxssfTest() {
		Workbook wb = new SXSSFWorkbook(100); // keep 100 rows in memory,//
												// exceeding rows will be//
												// flushed to disk
		Sheet sh = wb.createSheet();
		int rownum = 0;
		try {
			while (true) {
				Row row = sh.createRow(rownum);
				for (int cellnum = 0; cellnum < 10; cellnum++) {
					Cell cell = row.createCell(cellnum);
					String address = new CellReference(cell).formatAsString();
					cell.setCellValue(address);
				}
				System.out.println(rownum);
				rownum++;
				if (rownum >= 1000000)
					break;
			}
			FileOutputStream out = new FileOutputStream("sxssf.xlsx");
			wb.write(out);
			out.close();
		} catch (Exception e) {
			System.out.println(ExceptionUtils.getFullStackTrace(e));
		}
	}

	/* 20000行左右就内存溢出了 */
	public static void hssfTest() {
		XSSFWorkbook wb = new XSSFWorkbook();
		Sheet sh = wb.createSheet();
		int rownum = 0;
		try {
			while (true) {
				Row row = sh.createRow(rownum);
				for (int cellnum = 0; cellnum < 10; cellnum++) {
					Cell cell = row.createCell(cellnum);
					String address = new CellReference(cell).formatAsString();
					cell.setCellValue(address);
				}
				System.out.println(rownum);
				rownum++;
				if (rownum >= 1000000)
					break;
			}
			FileOutputStream out = new FileOutputStream("hssf.xlsx");
			wb.write(out);
			out.close();
		} catch (Exception e) {
			System.out.println(ExceptionUtils.getFullStackTrace(e));
		}
	}
}

你可能感兴趣的:(java,poi,Excel)