package cn.com.songjy.test.excel; import java.io.IOException; import java.text.NumberFormat; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.DateUtil; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.usermodel.WorkbookFactory; import org.apache.poi.ss.util.CellReference; public class ReadLine { private Log log = LogFactory.getLog(ReadLine.class); /** * * @param excel_file_path * --excel文件的存储路径 * @throws IOException * @throws InvalidFormatException */ public void read(String excel_file_path) throws IOException, InvalidFormatException { Workbook wb = WorkbookFactory.create(new java.io.File(excel_file_path)); Sheet sheet = wb.getSheetAt(0); /* 定义最小读取的行数 */ int rowStart = Math.min(0, sheet.getFirstRowNum()); /* 定义最大读取的行数 */ int rowEnd = Math.max(0, sheet.getLastRowNum()); for (int rowNum = rowStart; rowNum <= rowEnd; rowNum++) { Row row = sheet.getRow(rowNum); /* 假如当前为空行,跳过当前行继续读取下一行数据 */ if (row == null) continue; /* 定义当前行的最大读取列 */ int lastColumn = Math.max(row.getLastCellNum(), 0); NumberFormat df = NumberFormat.getNumberInstance(); // df.setMaximumFractionDigits(2);//保留小数点后两位 // df.setMinimumFractionDigits(2);//如果不这么做,那么当value的值是100.00的时候返回100 df.setGroupingUsed(false);// 去掉千分位,如果想输出的格式用逗号隔开,可以设置成true for (int cn = 0; cn < lastColumn; cn++) { Cell cell = row.getCell(cn, Row.RETURN_BLANK_AS_NULL); /* 假如当前为空格,跳过当前格继续读取下一格的数据 */ if (null == cell) continue; CellReference cellRef = new CellReference(row.getRowNum(), cell.getColumnIndex()); log.info(cellRef.formatAsString()); log.info(" - "); switch (cell.getCellType()) { case Cell.CELL_TYPE_STRING: log.info(cell.getRichStringCellValue().getString()); break; case Cell.CELL_TYPE_NUMERIC: if (DateUtil.isCellDateFormatted(cell)) { log.info(cell.getDateCellValue()); } else { log.info(df.format(cell.getNumericCellValue())); } break; case Cell.CELL_TYPE_BOOLEAN: log.info(cell.getBooleanCellValue()); break; case Cell.CELL_TYPE_FORMULA: log.info(cell.getCellFormula()); break; default: log.info("----"); } } } } public static void main(String[] args) { try { new ReadLine().read("E:\\a.xlsx"); } catch (IOException e) { e.printStackTrace(); } catch (InvalidFormatException e) { e.printStackTrace(); } } }
该程序依赖jar包
poi-3.9.jar
poi-ooxml-3.9.jar
poi-ooxml-schemas-3.9.jar
commons-logging-1.1.3.jar
dom4j-1.6.jar
xmlbeans-2.3.0.jar
Java 正则表达式解析csv文件
POI读取大数据量的Excel文件
Java读取大数据量07Excel的方法(POI),如下:
import java.io.InputStream; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.XMLReaderFactory; public class ExcelUtil extends DefaultHandler { private SharedStringsTable sst; private String lastContents; private boolean nextIsString; private int sheetIndex = -1; private Listrowlist = new ArrayList (); private int curRow = 0; private int curCol = 0; /** * 读取第一个工作簿的入口方法 * * @param path */ public void readOneSheet(String path) throws Exception { OPCPackage pkg = OPCPackage.open(path); XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); InputStream sheet = r.getSheet("rId1"); InputSource sheetSource = new InputSource(sheet); parser.parse(sheetSource); sheet.close(); } /** * 读取所有工作簿的入口方法 * * @param path * @throws Exception */ public void process(String path) throws Exception { OPCPackage pkg = OPCPackage.open(path); XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); Iterator sheets = r.getSheetsData(); while (sheets.hasNext()) { curRow = 0; sheetIndex++; InputStream sheet = sheets.next(); InputSource sheetSource = new InputSource(sheet); parser.parse(sheetSource); sheet.close(); } } /** * 该方法自动被调用,每读一行调用一次,在方法中写自己的业务逻辑即可 * * @param sheetIndex * 工作簿序号 * @param curRow * 处理到第几行 * @param rowList * 当前数据行的数据集合 */ public void optRow(int sheetIndex, int curRow, List rowList) { String temp = ""; for (String str : rowList) { temp += str + "_"; } System.out.println(temp); } public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException { XMLReader parser = XMLReaderFactory .createXMLReader("org.apache.xerces.parsers.SAXParser"); this.sst = sst; parser.setContentHandler(this); return parser; } public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { // c => 单元格 if (name.equals("c")) { // 如果下一个元素是 SST 的索引,则将nextIsString标记为true String cellType = attributes.getValue("t"); if (cellType != null && cellType.equals("s")) { nextIsString = true; } else { nextIsString = false; } } // 置空 lastContents = ""; } public void endElement(String uri, String localName, String name) throws SAXException { // 根据SST的索引值的到单元格的真正要存储的字符串 // 这时characters()方法可能会被调用多次 if (nextIsString) { try { int idx = Integer.parseInt(lastContents); lastContents = new XSSFRichTextString(sst.getEntryAt(idx)) .toString(); } catch (Exception e) { } } // v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引 // 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符 if (name.equals("v")) { String value = lastContents.trim(); value = value.equals("") ? " " : value; rowlist.add(curCol, value); curCol++; } else { // 如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法 if (name.equals("row")) { optRow(sheetIndex, curRow, rowlist); rowlist.clear(); curRow++; curCol = 0; } } } public void characters(char[] ch, int start, int length) throws SAXException { // 得到单元格内容的值 lastContents += new String(ch, start, length); } }
java向excel 写入海量数据内存溢出问题的解决,如下:
import java.io.FileOutputStream; import org.apache.commons.lang.exception.ExceptionUtils; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.util.CellReference; import org.apache.poi.xssf.streaming.SXSSFWorkbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook; public class Test { /*** @param args */ public static void main(String[] args) { if (args[0].equals("hssf")) { hssfTest(); } if (args[0].equals("sxssf")) { sxssfTest(); } } /* 不会内存溢出,最后生成一个大概40M的文件 */ public static void sxssfTest() { Workbook wb = new SXSSFWorkbook(100); // keep 100 rows in memory,// // exceeding rows will be// // flushed to disk Sheet sh = wb.createSheet(); int rownum = 0; try { while (true) { Row row = sh.createRow(rownum); for (int cellnum = 0; cellnum < 10; cellnum++) { Cell cell = row.createCell(cellnum); String address = new CellReference(cell).formatAsString(); cell.setCellValue(address); } System.out.println(rownum); rownum++; if (rownum >= 1000000) break; } FileOutputStream out = new FileOutputStream("sxssf.xlsx"); wb.write(out); out.close(); } catch (Exception e) { System.out.println(ExceptionUtils.getFullStackTrace(e)); } } /* 20000行左右就内存溢出了 */ public static void hssfTest() { XSSFWorkbook wb = new XSSFWorkbook(); Sheet sh = wb.createSheet(); int rownum = 0; try { while (true) { Row row = sh.createRow(rownum); for (int cellnum = 0; cellnum < 10; cellnum++) { Cell cell = row.createCell(cellnum); String address = new CellReference(cell).formatAsString(); cell.setCellValue(address); } System.out.println(rownum); rownum++; if (rownum >= 1000000) break; } FileOutputStream out = new FileOutputStream("hssf.xlsx"); wb.write(out); out.close(); } catch (Exception e) { System.out.println(ExceptionUtils.getFullStackTrace(e)); } } }
apache common之CSV文件操作
读取合并单元格的内容
package demo; import java.io.IOException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.usermodel.WorkbookFactory; import org.apache.poi.ss.util.CellRangeAddress; public class ReadLine { private Log log = LogFactory.getLog(ReadLine.class); /** * * @param excel_file_path * --excel文件的存储路径 * @throws IOException * @throws InvalidFormatException */ public void read(String excel_file_path) throws IOException, InvalidFormatException { Workbook wb = WorkbookFactory.create(new java.io.File(excel_file_path)); Sheet sheet = wb.getSheetAt(0); /* 定义最小读取的行数 */ int rowStart = Math.min(0, sheet.getFirstRowNum()); /* 定义最大读取的行数 */ int rowEnd = Math.max(0, sheet.getLastRowNum()); for (int rowNum = rowStart; rowNum <= rowEnd; rowNum++) { Row row = sheet.getRow(rowNum); /* 假如当前为空行,跳过当前行继续读取下一行数据 */ if (row == null) continue; /* 定义当前行的最大读取列 */ int lastColumn = Math.max(row.getLastCellNum(), 0); for (int cellnum = 0; cellnum < lastColumn; cellnum++) { Cell cell = row.getCell(cellnum, Row.RETURN_NULL_AND_BLANK); boolean is = isMergedRegion(sheet, rowNum, cellnum);// 判断是合并单元格否 if (true == is) { log.info(getMergedRegionValue(sheet, rowNum, cellnum)); } else { log.info(getCellValue(cell)); } } } } /** * 判断指定的单元格是否是合并单元格 * * @param sheet * @param row * @param column * @return */ public boolean isMergedRegion(Sheet sheet, int row, int column) { int sheetMergeCount = sheet.getNumMergedRegions(); for (int i = 0; i < sheetMergeCount; i++) { CellRangeAddress ca = sheet.getMergedRegion(i); int firstColumn = ca.getFirstColumn(); int lastColumn = ca.getLastColumn(); int firstRow = ca.getFirstRow(); int lastRow = ca.getLastRow(); if (row >= firstRow && row <= lastRow) { if (column >= firstColumn && column <= lastColumn) { return true; } } } return false; } /** * 获取单元格的值 * * @param cell * @return */ public String getCellValue(Cell cell) { if (cell == null) return ""; if (cell.getCellType() == Cell.CELL_TYPE_STRING) { return cell.getStringCellValue(); } else if (cell.getCellType() == Cell.CELL_TYPE_BOOLEAN) { return String.valueOf(cell.getBooleanCellValue()); } else if (cell.getCellType() == Cell.CELL_TYPE_FORMULA) { return cell.getCellFormula(); } else if (cell.getCellType() == Cell.CELL_TYPE_NUMERIC) { return String.valueOf(cell.getNumericCellValue()); } return ""; } /** * 获取合并单元格的值 * * @param sheet * @param row * @param column * @return */ public String getMergedRegionValue(Sheet sheet, int row, int column) { int sheetMergeCount = sheet.getNumMergedRegions(); for (int i = 0; i < sheetMergeCount; i++) { CellRangeAddress ca = sheet.getMergedRegion(i); int firstColumn = ca.getFirstColumn(); int lastColumn = ca.getLastColumn(); int firstRow = ca.getFirstRow(); int lastRow = ca.getLastRow(); if (row >= firstRow && row <= lastRow) { if (column >= firstColumn && column <= lastColumn) { Row fRow = sheet.getRow(firstRow); Cell fCell = fRow.getCell(firstColumn); return getCellValue(fCell); } } } return null; } public static void main(String[] args) { try { new ReadLine().read("E:\\a.xlsx"); } catch (IOException e) { e.printStackTrace(); } catch (InvalidFormatException e) { e.printStackTrace(); } } }
添加超链接示例代码:
这个是设置一个超链接弹出email地址, 其他类似 import java.io.FileOutputStream; import org.apache.poi.ss.usermodel; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.poi.ss.usermodel.IndexedColors; /** * @author lance */ public class HyperlinkExample { public static void main(String[]args) throws Exception{ Workbook wb = new XSSFWorkbook(); //or new HSSFWorkbook(); CreationHelper createHelper = wb.getCreationHelper(); CellStyle hlink_style = wb.createCellStyle(); Font hlink_font = wb.createFont(); hlink_font.setUnderline(Font.U_SINGLE); hlink_font.setColor(IndexedColors.BLUE.getIndex()); hlink_style.setFont(hlink_font); Cell cell; Sheet sheet = wb.createSheet("Hyperlinks"); //URL cell = sheet.createRow(0).createCell((short)0); cell.setCellValue("URL Link"); Hyperlink link = createHelper.createHyperlink(Hyperlink.LINK_URL); link.setAddress("http://poi.apache.org/"); cell.setHyperlink(link); cell.setCellStyle(hlink_style); //link to a file in the current directory cell = sheet.createRow(1).createCell((short)0); cell.setCellValue("File Link"); link = createHelper.createHyperlink(Hyperlink.LINK_FILE); link.setAddress("link1.xls"); cell.setHyperlink(link); cell.setCellStyle(hlink_style); //e-mail link cell = sheet.createRow(2).createCell((short)0); cell.setCellValue("Email Link"); link = createHelper.createHyperlink(Hyperlink.LINK_EMAIL); //设置路径 link.setAddress("mailto:poi@apache.org?subject=Hyperlinks"); cell.setHyperlink(link); cell.setCellStyle(hlink_style); //create a target sheet and cell Sheet sheet2 = wb.createSheet("Target Sheet"); sheet2.createRow(0).createCell((short)0).setCellValue("Target Cell"); cell = sheet.createRow(3).createCell((short)0); cell.setCellValue("Worksheet Link"); Hyperlink link2 = createHelper.createHyperlink(Hyperlink.LINK_DOCUMENT); link2.setAddress("'Target Sheet'!A1"); cell.setHyperlink(link2); cell.setCellStyle(hlink_style); FileOutputStream out = new FileOutputStream("hyperinks.xlsx"); wb.write(out); out.close(); } }