excel2007文件格式与之前版本不同,之前版本采用的是微软自己的存储格式。07版内容的存储采用XML格式,所以,理所当然的,对大数据量的 xlsx文件的读取采用的也是XML的处理方式SAX。
同之前的版本一样,大数据量文件的读取采用的是事件模型eventusermodel。usermodel模式需要将文件一次性全部读到内存中,07版的既然采用的存储模式是xml,解析用的DOM方式也是如此,这种模式操作简单,容易上手,但是对于大量数据占用的内存也是相当可观,在Eclipse中经常出现内存溢出。
下面就是采用eventusermodel对07excel文件读取。
同上篇,我将当前行的单元格数据存储到List中,抽象出 optRows 方法,该方法会在每行末尾时调用,方法参数为当前行索引curRow(int型)及存有行内单元格数据的List。继承类只需实现该行级方法即可。
经测试,对12万条数据,7M大小的文件也能正常运行。无需设置vm的内存空间。
excel读取采用的API为POI3.6,使用前先下载此包,若运行中出现其他依赖包不存在,请下载相应依赖包。
抽象类:XxlsAbstract ,作用:遍历excel文件,提供行级操作方法 optRows
- package com.gaosheng.util.xls;
- import java.io.InputStream;
- import java.sql.SQLException;
- import java.util.ArrayList;
- import java.util.Iterator;
- import java.util.List;
- import org.apache.poi.xssf.eventusermodel.XSSFReader;
- import org.apache.poi.xssf.model.SharedStringsTable;
- import org.apache.poi.xssf.usermodel.XSSFRichTextString;
- import org.apache.poi.openxml4j.opc.OPCPackage;
- import org.xml.sax.Attributes;
- import org.xml.sax.InputSource;
- import org.xml.sax.SAXException;
- import org.xml.sax.XMLReader;
- import org.xml.sax.helpers.DefaultHandler;
- import org.xml.sax.helpers.XMLReaderFactory;
- /**
- * XSSF and SAX (Event API)
- */
- public abstract class XxlsAbstract extends DefaultHandler {
- private SharedStringsTable sst;
- private String lastContents;
- private boolean nextIsString;
- private int sheetIndex = -1;
- private List<String> rowlist = new ArrayList<String>();
- private int curRow = 0;
- private int curCol = 0;
- //excel记录行操作方法,以行索引和行元素列表为参数,对一行元素进行操作,元素为String类型
- // public abstract void optRows(int curRow, List<String> rowlist) throws SQLException ;
- //excel记录行操作方法,以sheet索引,行索引和行元素列表为参数,对sheet的一行元素进行操作,元素为String类型
- public abstract void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException;
- //只遍历一个sheet,其中sheetId为要遍历的sheet索引,从1开始,1-3
- public void processOneSheet(String filename,int sheetId) throws Exception {
- OPCPackage pkg = OPCPackage.open(filename);
- XSSFReader r = new XSSFReader(pkg);
- SharedStringsTable sst = r.getSharedStringsTable();
- XMLReader parser = fetchSheetParser(sst);
- // rId2 found by processing the Workbook
- // 根据 rId# 或 rSheet# 查找sheet
- InputStream sheet2 = r.getSheet("rId"+sheetId);
- sheetIndex++;
- InputSource sheetSource = new InputSource(sheet2);
- parser.parse(sheetSource);
- sheet2.close();
- }
- /**
- * 遍历 excel 文件
- */
- public void process(String filename) throws Exception {
- OPCPackage pkg = OPCPackage.open(filename);
- XSSFReader r = new XSSFReader(pkg);
- SharedStringsTable sst = r.getSharedStringsTable();
- XMLReader parser = fetchSheetParser(sst);
- Iterator<InputStream> sheets = r.getSheetsData();
- while (sheets.hasNext()) {
- curRow = 0;
- sheetIndex++;
- InputStream sheet = sheets.next();
- InputSource sheetSource = new InputSource(sheet);
- parser.parse(sheetSource);
- sheet.close();
- }
- }
- public XMLReader fetchSheetParser(SharedStringsTable sst)
- throws SAXException {
- XMLReader parser = XMLReaderFactory
- .createXMLReader("org.apache.xerces.parsers.SAXParser");
- this.sst = sst;
- parser.setContentHandler(this);
- return parser;
- }
- public void startElement(String uri, String localName, String name,
- Attributes attributes) throws SAXException {
- // c => 单元格
- if (name.equals("c")) {
- // 如果下一个元素是 SST 的索引,则将nextIsString标记为true
- String cellType = attributes.getValue("t");
- if (cellType != null && cellType.equals("s")) {
- nextIsString = true;
- } else {
- nextIsString = false;
- }
- }
- // 置空
- lastContents = "";
- }
- public void endElement(String uri, String localName, String name)
- throws SAXException {
- // 根据SST的索引值的到单元格的真正要存储的字符串
- // 这时characters()方法可能会被调用多次
- if (nextIsString) {
- try {
- int idx = Integer.parseInt(lastContents);
- lastContents = new XSSFRichTextString(sst.getEntryAt(idx))
- .toString();
- } catch (Exception e) {
- }
- }
- // v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引
- // 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符
- if (name.equals("v")) {
- String value = lastContents.trim();
- value = value.equals("")?" ":value;
- rowlist.add(curCol, value);
- curCol++;
- }else {
- //如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法
- if (name.equals("row")) {
- try {
- optRows(sheetIndex,curRow,rowlist);
- } catch (SQLException e) {
- e.printStackTrace();
- }
- rowlist.clear();
- curRow++;
- curCol = 0;
- }
- }
- }
- public void characters(char[] ch, int start, int length)
- throws SAXException {
- //得到单元格内容的值
- lastContents += new String(ch, start, length);
- }
- }
- package com.gaosheng.util.xls;
- import java.io.InputStream;
- import java.sql.SQLException;
- import java.util.ArrayList;
- import java.util.Iterator;
- import java.util.List;
- import org.apache.poi.xssf.eventusermodel.XSSFReader;
- import org.apache.poi.xssf.model.SharedStringsTable;
- import org.apache.poi.xssf.usermodel.XSSFRichTextString;
- import org.apache.poi.openxml4j.opc.OPCPackage;
- import org.xml.sax.Attributes;
- import org.xml.sax.InputSource;
- import org.xml.sax.SAXException;
- import org.xml.sax.XMLReader;
- import org.xml.sax.helpers.DefaultHandler;
- import org.xml.sax.helpers.XMLReaderFactory;
- /**
- * XSSF and SAX (Event API)
- */
- public abstract class XxlsAbstract extends DefaultHandler {
- private SharedStringsTable sst;
- private String lastContents;
- private boolean nextIsString;
- private int sheetIndex = -1;
- private List<String> rowlist = new ArrayList<String>();
- private int curRow = 0;
- private int curCol = 0;
- //excel记录行操作方法,以行索引和行元素列表为参数,对一行元素进行操作,元素为String类型
- // public abstract void optRows(int curRow, List<String> rowlist) throws SQLException ;
- //excel记录行操作方法,以sheet索引,行索引和行元素列表为参数,对sheet的一行元素进行操作,元素为String类型
- public abstract void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException;
- //只遍历一个sheet,其中sheetId为要遍历的sheet索引,从1开始,1-3
- public void processOneSheet(String filename,int sheetId) throws Exception {
- OPCPackage pkg = OPCPackage.open(filename);
- XSSFReader r = new XSSFReader(pkg);
- SharedStringsTable sst = r.getSharedStringsTable();
- XMLReader parser = fetchSheetParser(sst);
- // rId2 found by processing the Workbook
- // 根据 rId# 或 rSheet# 查找sheet
- InputStream sheet2 = r.getSheet("rId"+sheetId);
- sheetIndex++;
- InputSource sheetSource = new InputSource(sheet2);
- parser.parse(sheetSource);
- sheet2.close();
- }
- /**
- * 遍历 excel 文件
- */
- public void process(String filename) throws Exception {
- OPCPackage pkg = OPCPackage.open(filename);
- XSSFReader r = new XSSFReader(pkg);
- SharedStringsTable sst = r.getSharedStringsTable();
- XMLReader parser = fetchSheetParser(sst);
- Iterator<InputStream> sheets = r.getSheetsData();
- while (sheets.hasNext()) {
- curRow = 0;
- sheetIndex++;
- InputStream sheet = sheets.next();
- InputSource sheetSource = new InputSource(sheet);
- parser.parse(sheetSource);
- sheet.close();
- }
- }
- public XMLReader fetchSheetParser(SharedStringsTable sst)
- throws SAXException {
- XMLReader parser = XMLReaderFactory
- .createXMLReader("org.apache.xerces.parsers.SAXParser");
- this.sst = sst;
- parser.setContentHandler(this);
- return parser;
- }
- public void startElement(String uri, String localName, String name,
- Attributes attributes) throws SAXException {
- // c => 单元格
- if (name.equals("c")) {
- // 如果下一个元素是 SST 的索引,则将nextIsString标记为true
- String cellType = attributes.getValue("t");
- if (cellType != null && cellType.equals("s")) {
- nextIsString = true;
- } else {
- nextIsString = false;
- }
- }
- // 置空
- lastContents = "";
- }
- public void endElement(String uri, String localName, String name)
- throws SAXException {
- // 根据SST的索引值的到单元格的真正要存储的字符串
- // 这时characters()方法可能会被调用多次
- if (nextIsString) {
- try {
- int idx = Integer.parseInt(lastContents);
- lastContents = new XSSFRichTextString(sst.getEntryAt(idx))
- .toString();
- } catch (Exception e) {
- }
- }
- // v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引
- // 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符
- if (name.equals("v")) {
- String value = lastContents.trim();
- value = value.equals("")?" ":value;
- rowlist.add(curCol, value);
- curCol++;
- }else {
- //如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法
- if (name.equals("row")) {
- try {
- optRows(sheetIndex,curRow,rowlist);
- } catch (SQLException e) {
- e.printStackTrace();
- }
- rowlist.clear();
- curRow++;
- curCol = 0;
- }
- }
- }
- public void characters(char[] ch, int start, int length)
- throws SAXException {
- //得到单元格内容的值
- lastContents += new String(ch, start, length);
- }
- }
继承类:XxlsBig,作用:将数据转出到数据库临时表
- package com.gaosheng.util.examples.xls;
- import java.io.FileInputStream;
- import java.io.IOException;
- import java.sql.Connection;
- import java.sql.DriverManager;
- import java.sql.PreparedStatement;
- import java.sql.SQLException;
- import java.sql.Statement;
- import java.util.List;
- import java.util.Properties;
- import com.gaosheng.util.xls.XxlsAbstract;
- public class XxlsBig extends XxlsAbstract {
- public static void main(String[] args) throws Exception {
- XxlsBig howto = new XxlsBig("temp_table");
- howto.processOneSheet("F:/new.xlsx",1);
- howto.process("F:/new.xlsx");
- howto.close();
- }
- public XxlsBig(String tableName) throws SQLException{
- this.conn = getNew_Conn();
- this.statement = conn.createStatement();
- this.tableName = tableName;
- }
- private Connection conn = null;
- private Statement statement = null;
- private PreparedStatement newStatement = null;
- private String tableName = "temp_table";
- private boolean create = true;
- public void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException {
- if (sheetIndex == 0 && curRow == 0) {
- StringBuffer preSql = new StringBuffer("insert into " + tableName
- + " values(");
- StringBuffer table = new StringBuffer("create table " + tableName
- + "(");
- int c = rowlist.size();
- for (int i = 0; i < c; i++) {
- preSql.append("?,");
- table.append(rowlist.get(i));
- table.append(" varchar2(100) ,");
- }
- table.deleteCharAt(table.length() - 1);
- preSql.deleteCharAt(preSql.length() - 1);
- table.append(")");
- preSql.append(")");
- if (create) {
- statement = conn.createStatement();
- try{
- statement.execute("drop table "+tableName);
- }catch(Exception e){
- }finally{
- System.out.println("表 "+tableName+" 删除成功");
- }
- if (!statement.execute(table.toString())) {
- System.out.println("创建表 "+tableName+" 成功");
- // return;
- } else {
- System.out.println("创建表 "+tableName+" 失败");
- return;
- }
- }
- conn.setAutoCommit(false);
- newStatement = conn.prepareStatement(preSql.toString());
- } else if(curRow>0) {
- // 一般行
- int col = rowlist.size();
- for (int i = 0; i < col; i++) {
- newStatement.setString(i + 1, rowlist.get(i).toString());
- }
- newStatement.addBatch();
- if (curRow % 1000 == 0) {
- newStatement.executeBatch();
- conn.commit();
- }
- }
- }
- private static Connection getNew_Conn() {
- Connection conn = null;
- Properties props = new Properties();
- FileInputStream fis = null;
- try {
- fis = new FileInputStream("D:/database.properties");
- props.load(fis);
- DriverManager.registerDriver(new oracle.jdbc.driver.OracleDriver());
- // String jdbcURLString =
- // "jdbc:oracle:thin:@192.168.0.28:1521:orcl";
- StringBuffer jdbcURLString = new StringBuffer();
- jdbcURLString.append("jdbc:oracle:thin:@");
- jdbcURLString.append(props.getProperty("host"));
- jdbcURLString.append(":");
- jdbcURLString.append(props.getProperty("port"));
- jdbcURLString.append(":");
- jdbcURLString.append(props.getProperty("database"));
- conn = DriverManager.getConnection(jdbcURLString.toString(), props
- .getProperty("user"), props.getProperty("password"));
- } catch (Exception e) {
- e.printStackTrace();
- } finally {
- try {
- fis.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- return conn;
- }
- public int close() {
- try {
- newStatement.executeBatch();
- conn.commit();
- System.out.println("数据写入完毕");
- this.newStatement.close();
- this.statement.close();
- this.conn.close();
- return 1;
- } catch (SQLException e) {
- return 0;
- }
- }
- }
- package com.gaosheng.util.examples.xls;
- import java.io.FileInputStream;
- import java.io.IOException;
- import java.sql.Connection;
- import java.sql.DriverManager;
- import java.sql.PreparedStatement;
- import java.sql.SQLException;
- import java.sql.Statement;
- import java.util.List;
- import java.util.Properties;
- import com.gaosheng.util.xls.XxlsAbstract;
- public class XxlsBig extends XxlsAbstract {
- public static void main(String[] args) throws Exception {
- XxlsBig howto = new XxlsBig("temp_table");
- howto.processOneSheet("F:/new.xlsx",1);
- howto.process("F:/new.xlsx");
- howto.close();
- }
- public XxlsBig(String tableName) throws SQLException{
- this.conn = getNew_Conn();
- this.statement = conn.createStatement();
- this.tableName = tableName;
- }
- private Connection conn = null;
- private Statement statement = null;
- private PreparedStatement newStatement = null;
- private String tableName = "temp_table";
- private boolean create = true;
- public void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException {
- if (sheetIndex == 0 && curRow == 0) {
- StringBuffer preSql = new StringBuffer("insert into " + tableName
- + " values(");
- StringBuffer table = new StringBuffer("create table " + tableName
- + "(");
- int c = rowlist.size();
- for (int i = 0; i < c; i++) {
- preSql.append("?,");
- table.append(rowlist.get(i));
- table.append(" varchar2(100) ,");
- }
- table.deleteCharAt(table.length() - 1);
- preSql.deleteCharAt(preSql.length() - 1);
- table.append(")");
- preSql.append(")");
- if (create) {
- statement = conn.createStatement();
- try{
- statement.execute("drop table "+tableName);
- }catch(Exception e){
- }finally{
- System.out.println("表 "+tableName+" 删除成功");
- }
- if (!statement.execute(table.toString())) {
- System.out.println("创建表 "+tableName+" 成功");
- // return;
- } else {
- System.out.println("创建表 "+tableName+" 失败");
- return;
- }
- }
- conn.setAutoCommit(false);
- newStatement = conn.prepareStatement(preSql.toString());
- } else if(curRow>0) {
- // 一般行
- int col = rowlist.size();
- for (int i = 0; i < col; i++) {
- newStatement.setString(i + 1, rowlist.get(i).toString());
- }
- newStatement.addBatch();
- if (curRow % 1000 == 0) {
- newStatement.executeBatch();
- conn.commit();
- }
- }
- }
- private static Connection getNew_Conn() {
- Connection conn = null;
- Properties props = new Properties();
- FileInputStream fis = null;
- try {
- fis = new FileInputStream("D:/database.properties");
- props.load(fis);
- DriverManager.registerDriver(new oracle.jdbc.driver.OracleDriver());
- // String jdbcURLString =
- // "jdbc:oracle:thin:@192.168.0.28:1521:orcl";
- StringBuffer jdbcURLString = new StringBuffer();
- jdbcURLString.append("jdbc:oracle:thin:@");
- jdbcURLString.append(props.getProperty("host"));
- jdbcURLString.append(":");
- jdbcURLString.append(props.getProperty("port"));
- jdbcURLString.append(":");
- jdbcURLString.append(props.getProperty("database"));
- conn = DriverManager.getConnection(jdbcURLString.toString(), props
- .getProperty("user"), props.getProperty("password"));
- } catch (Exception e) {
- e.printStackTrace();
- } finally {
- try {
- fis.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- return conn;
- }
- public int close() {
- try {
- newStatement.executeBatch();
- conn.commit();
- System.out.println("数据写入完毕");
- this.newStatement.close();
- this.statement.close();
- this.conn.close();
- return 1;
- } catch (SQLException e) {
- return 0;
- }
- }
- }
继承类:XxlsPrint,作用:将数据输出到控制台
- package com.gaosheng.util.examples.xls;
- import java.sql.SQLException;
- import java.util.List;
- import com.gaosheng.util.xls.XxlsAbstract;
- public class XxlsPrint extends XxlsAbstract {
- @Override
- public void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException {
- for (int i = 0; i < rowlist.size(); i++) {
- System.out.print("'" + rowlist.get(i) + "',");
- }
- System.out.println();
- }
- public static void main(String[] args) throws Exception {
- XxlsPrint howto = new XxlsPrint();
- howto.processOneSheet("F:/new.xlsx",1);
- // howto.processAllSheets("F:/new.xlsx");
- }
- }
- package com.gaosheng.util.examples.xls;
- import java.sql.SQLException;
- import java.util.List;
- import com.gaosheng.util.xls.XxlsAbstract;
- public class XxlsPrint extends XxlsAbstract {
- @Override
- public void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException {
- for (int i = 0; i < rowlist.size(); i++) {
- System.out.print("'" + rowlist.get(i) + "',");
- }
- System.out.println();
- }
- public static void main(String[] args) throws Exception {
- XxlsPrint howto = new XxlsPrint();
- howto.processOneSheet("F:/new.xlsx",1);
- // howto.processAllSheets("F:/new.xlsx");
- }
- }
源代码在附件中,还包含了说明文件、数据库配置文件、以及整合xls文件和xlsx文件读取的类:Xls2Do。