实现文件转换 xsl xlsx 转换为 csv

   最近帮朋友写了一个xsl转换为csv的方法,直接去网上找了一下apache的官方例子  大体流程是:

xsl转csv的方法:

package com.ptpec.business.service;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;

import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BlankRecord;
import org.apache.poi.hssf.record.BoolErrRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.LabelRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.NoteRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.RKRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hssf.record.StringRecord;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

/**
 * A XLS -> CSV processor, that uses the MissingRecordAware EventModel code to
 * ensure it outputs all columns and rows.
 *
 * @author
 */

//文件转换  xsl  转换为  csv

public class XLS2CSV implements HSSFListener {
    private int minColumns;
    private POIFSFileSystem fs;
    private PrintStream output;

    private int lastRowNumber;
    private int lastColumnNumber;

    /** Should we output the formula, or the value it has? */
    private boolean outputFormulaValues = true;

    /** For parsing Formulas */
    private SheetRecordCollectingListener workbookBuildingListener;
    private HSSFWorkbook stubWorkbook;

    // Records we pick up as we process
    private SSTRecord sstRecord;
    private FormatTrackingHSSFListener formatListener;

    /** So we known which sheet we're on */
    private int sheetIndex = -1;
    private BoundSheetRecord[] orderedBSRs;
    private ArrayList boundSheetRecords = new ArrayList();

    // For handling formulas with string results
    private int nextRow;
    private int nextColumn;
    private boolean outputNextStringRecord;

    private final String OUTPUT_CHARSET = "GBK";

    /**
     * Creates a new XLS -> CSV converter
     *
     * @param fs
     *            The POIFSFileSystem to process
     * @param output
     *            The PrintStream to output the CSV to
     * @param minColumns
     *            The minimum number of columns to output, or -1 for no minimum
     */
    public XLS2CSV(POIFSFileSystem fs, PrintStream output, int minColumns) {
        this.fs = fs;
        this.output = output;
        this.minColumns = minColumns;
    }

    public XLS2CSV(String inputFilePath, String outputFilePath) throws Exception {
        fs = new POIFSFileSystem(new FileInputStream(inputFilePath));
        output = new PrintStream(outputFilePath, OUTPUT_CHARSET);
        minColumns = -1;
    }

    /**
     * Creates a new XLS -> CSV converter
     *
     * @param filename
     *            The file to process
     * @param minColumns
     *            The minimum number of columns to output, or -1 for no minimum
     * @throws IOException
     * @throws FileNotFoundException
     */
    public XLS2CSV(String filename, int minColumns) throws IOException,
            FileNotFoundException {
        this(new POIFSFileSystem(new FileInputStream(filename)), System.out,
                minColumns);
    }

    /**
     * Initiates the processing of the XLS file to CSV
     */
    public void process() throws IOException {
        MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(
                this);
        formatListener = new FormatTrackingHSSFListener(listener);

        HSSFEventFactory factory = new HSSFEventFactory();
        HSSFRequest request = new HSSFRequest();

        if (outputFormulaValues) {
            request.addListenerForAllRecords(formatListener);
        } else {
            workbookBuildingListener = new SheetRecordCollectingListener(
                    formatListener);
            request.addListenerForAllRecords(workbookBuildingListener);
        }

        factory.processWorkbookEvents(request, fs);
        close();
    }

    /**
     * Main HSSFListener method, processes events, and outputs the CSV as the
     * file is processed.
     */
    public void processRecord(Record record) {
        int thisRow = -1;
        int thisColumn = -1;
        String thisStr = null;

        switch (record.getSid()) {
            case BoundSheetRecord.sid:
                boundSheetRecords.add(record);
                break;
            case BOFRecord.sid:
                BOFRecord br = (BOFRecord) record;
                if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
                    // Create sub workbook if required
                    if (workbookBuildingListener != null && stubWorkbook == null) {
                        stubWorkbook = workbookBuildingListener
                                .getStubHSSFWorkbook();
                    }

                    // Output the worksheet name
                    // Works by ordering the BSRs by the location of
                    // their BOFRecords, and then knowing that we
                    // process BOFRecords in byte offset order
                    sheetIndex++;
                    if (orderedBSRs == null) {
                        orderedBSRs = BoundSheetRecord
                                .orderByBofPosition(boundSheetRecords);
                    }
                    output.println();
                    output.println(orderedBSRs[sheetIndex].getSheetname() + " ["
                            + (sheetIndex + 1) + "]:");
                }
                break;

            case SSTRecord.sid:
                sstRecord = (SSTRecord) record;
                break;

            case BlankRecord.sid:
                BlankRecord brec = (BlankRecord) record;

                thisRow = brec.getRow();
                thisColumn = brec.getColumn();
                thisStr = "";
                break;
            case BoolErrRecord.sid:
                BoolErrRecord berec = (BoolErrRecord) record;

                thisRow = berec.getRow();
                thisColumn = berec.getColumn();
                thisStr = "";
                break;

            case FormulaRecord.sid:
                FormulaRecord frec = (FormulaRecord) record;

                thisRow = frec.getRow();
                thisColumn = frec.getColumn();

                if (outputFormulaValues) {
                    if (Double.isNaN(frec.getValue())) {
                        // Formula result is a string
                        // This is stored in the next record
                        outputNextStringRecord = true;
                        nextRow = frec.getRow();
                        nextColumn = frec.getColumn();
                    } else {
                        thisStr = formatListener.formatNumberDateCell(frec);
                    }
                } else {
                    thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook,
                            frec.getParsedExpression()) + '"';
                }
                break;
            case StringRecord.sid:
                if (outputNextStringRecord) {
                    // String for formula
                    StringRecord srec = (StringRecord) record;
                    thisStr = srec.getString();
                    thisRow = nextRow;
                    thisColumn = nextColumn;
                    outputNextStringRecord = false;
                }
                break;

            case LabelRecord.sid:
                LabelRecord lrec = (LabelRecord) record;

                thisRow = lrec.getRow();
                thisColumn = lrec.getColumn();
                thisStr = '"' + lrec.getValue() + '"';
                break;
            case LabelSSTRecord.sid:
                LabelSSTRecord lsrec = (LabelSSTRecord) record;

                thisRow = lsrec.getRow();
                thisColumn = lsrec.getColumn();
                if (sstRecord == null) {
                    thisStr = '"' + "(No SST Record, can't identify string)" + '"';
                } else {
                    thisStr = '"' + sstRecord.getString(lsrec.getSSTIndex())
                            .toString() + '"';
                }
                break;
            case NoteRecord.sid:
                NoteRecord nrec = (NoteRecord) record;

                thisRow = nrec.getRow();
                thisColumn = nrec.getColumn();
                // TODO: Find object to match nrec.getShapeId()
                thisStr = '"' + "(TODO)" + '"';
                break;
            case NumberRecord.sid:
                NumberRecord numrec = (NumberRecord) record;

                thisRow = numrec.getRow();
                thisColumn = numrec.getColumn();

                // Format
                thisStr = formatListener.formatNumberDateCell(numrec);
                break;
            case RKRecord.sid:
                RKRecord rkrec = (RKRecord) record;

                thisRow = rkrec.getRow();
                thisColumn = rkrec.getColumn();
                thisStr = '"' + "(TODO)" + '"';
                break;
            default:
                break;
        }

        // Handle new row
        if (thisRow != -1 && thisRow != lastRowNumber) {
            lastColumnNumber = -1;
        }

        // Handle missing column
        if (record instanceof MissingCellDummyRecord) {
            MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
            thisRow = mc.getRow();
            thisColumn = mc.getColumn();
            thisStr = "";
        }

        // If we got something to print out, do so
        if (thisStr != null) {
            if (thisColumn > 0) {
                output.print(',');
            }
            output.print(thisStr);
        }

        // Update column and row count
        if (thisRow > -1)
            lastRowNumber = thisRow;
        if (thisColumn > -1)
            lastColumnNumber = thisColumn;

        // Handle end of row
        if (record instanceof LastCellOfRowDummyRecord) {
            // Print out any missing commas if needed
            if (minColumns > 0) {
                // Columns are 0 based
                if (lastColumnNumber == -1) {
                    lastColumnNumber = 0;
                }
                for (int i = lastColumnNumber; i < (minColumns); i++) {
                    output.print(',');
                }
            }

            // We're onto a new row
            lastColumnNumber = -1;

            // End the row
            output.println();
        }
    }

    public void close(){
        this.output.close();
    }
    public static void main(String[] args) throws Exception {
        XLS2CSV xls2csv = new XLS2CSV("D:\\听说\\index.xls","D:\\听说\\index.csv");
        xls2csv.process();
    }
}

XLSX转CSV的方法:

/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements.  See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License.  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package com.cnas.fileConversion;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;

/**
 * A rudimentary XLSX -> CSV processor modeled on the POI sample program
 * XLS2CSVmra by Nick Burch from the package
 * org.apache.poi.hssf.eventusermodel.examples. Unlike the HSSF version, this
 * one completely ignores missing rows.
 * 

* Data sheets are read using a SAX parser to keep the memory footprint * relatively small, so this should be able to read enormous workbooks. The * styles table and the shared-string table must be kept in memory. The standard * POI styles table class is used, but a custom (read-only) class is used for * the shared string table because the standard POI SharedStringsTable grows * very quickly with the number of unique strings. *

* Thanks to Eric Smith for a patch that fixes a problem triggered by cells with * multiple "t" elements, which is how Excel represents different formats (e.g., * one word plain and one word bold). * * @author */ public class XLSX2CSV { /** * The type of the data value is indicated by an attribute on the cell. The * value is usually in a "v" element within the cell. */ enum xssfDataType { BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER, } /** * Derived from http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api *

* Also see Standard ECMA-376, 1st edition, part 4, pages 1928ff, at * http://www.ecma-international.org/publications/standards/Ecma-376.htm *

* A web-friendly version is http://openiso.org/Ecma/376/Part4 */ class MyXSSFSheetHandler extends DefaultHandler { /** * Table with styles */ private StylesTable stylesTable; /** * Table with unique strings */ private ReadOnlySharedStringsTable sharedStringsTable; /** * Destination for data */ private final PrintStream output; /** * Number of columns to read starting with leftmost */ private final int minColumnCount; // Set when V start element is seen private boolean vIsOpen; // Set when cell start element is seen; // used when cell close element is seen. private xssfDataType nextDataType; // Used to format numeric cell values. private short formatIndex; private String formatString; private final DataFormatter formatter; private int thisColumn = -1; // The last column printed to the output stream private int lastColumnNumber = -1; // Gathers characters as they are seen. private StringBuffer value; /** * Accepts objects needed while parsing. * * @param styles * Table of styles * @param strings * Table of shared strings * @param cols * Minimum number of columns to show * @param target * Sink for output */ public MyXSSFSheetHandler(StylesTable styles, ReadOnlySharedStringsTable strings, int cols, PrintStream target) { this.stylesTable = styles; this.sharedStringsTable = strings; this.minColumnCount = cols; this.output = target; this.value = new StringBuffer(); this.nextDataType = xssfDataType.NUMBER; this.formatter = new DataFormatter(); } /* * (non-Javadoc) * * @see * org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, * java.lang.String, java.lang.String, org.xml.sax.Attributes) */ public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { if ("inlineStr".equals(name) || "v".equals(name)) { vIsOpen = true; // Clear contents cache value.setLength(0); } // c => cell else if ("c".equals(name)) { // Get the cell reference String r = attributes.getValue("r"); int firstDigit = -1; for (int c = 0; c < r.length(); ++c) { if (Character.isDigit(r.charAt(c))) { firstDigit = c; break; } } thisColumn = nameToColumn(r.substring(0, firstDigit)); // Set up defaults. this.nextDataType = xssfDataType.NUMBER; this.formatIndex = -1; this.formatString = null; String cellType = attributes.getValue("t"); String cellStyleStr = attributes.getValue("s"); if ("b".equals(cellType)) nextDataType = xssfDataType.BOOL; else if ("e".equals(cellType)) nextDataType = xssfDataType.ERROR; else if ("inlineStr".equals(cellType)) nextDataType = xssfDataType.INLINESTR; else if ("s".equals(cellType)) nextDataType = xssfDataType.SSTINDEX; else if ("str".equals(cellType)) nextDataType = xssfDataType.FORMULA; else if (cellStyleStr != null) { // It's a number, but almost certainly one // with a special style or format int styleIndex = Integer.parseInt(cellStyleStr); XSSFCellStyle style = stylesTable.getStyleAt(styleIndex); this.formatIndex = style.getDataFormat(); this.formatString = style.getDataFormatString(); if (this.formatString == null) this.formatString = BuiltinFormats .getBuiltinFormat(this.formatIndex); } } } /* * (non-Javadoc) * * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, * java.lang.String, java.lang.String) */ public void endElement(String uri, String localName, String name) throws SAXException { String thisStr = null; // v => contents of a cell if ("v".equals(name)) { // Process the value contents as required. // Do now, as characters() may be called more than once switch (nextDataType) { case BOOL: char first = value.charAt(0); thisStr = first == '0' ? "FALSE" : "TRUE"; break; case ERROR: thisStr = "\"ERROR:" + value.toString() + '"'; break; case FORMULA: // A formula could result in a string value, // so always add double-quote characters. thisStr = '"' + value.toString() + '"'; break; case INLINESTR: // TODO: have seen an example of this, so it's untested. XSSFRichTextString rtsi = new XSSFRichTextString(value .toString()); thisStr = '"' + rtsi.toString() + '"'; break; case SSTINDEX: String sstIndex = value.toString(); try { int idx = Integer.parseInt(sstIndex); XSSFRichTextString rtss = new XSSFRichTextString( sharedStringsTable.getEntryAt(idx)); thisStr = '"' + rtss.toString() + '"'; } catch (NumberFormatException ex) { output.println("Failed to parse SST index '" + sstIndex + "': " + ex.toString()); } break; case NUMBER: String n = value.toString(); if (this.formatString != null) thisStr = formatter.formatRawCellContents(Double .parseDouble(n), this.formatIndex, this.formatString); else thisStr = n; break; default: thisStr = "(TODO: Unexpected type: " + nextDataType + ")"; break; } // Output after we've seen the string contents // Emit commas for any fields that were missing on this row if (lastColumnNumber == -1) { lastColumnNumber = 0; } for (int i = lastColumnNumber; i < thisColumn; ++i) output.print(','); // Might be the empty string. output.print(thisStr); // Update column if (thisColumn > -1) lastColumnNumber = thisColumn; } else if ("row".equals(name)) { // Print out any missing commas if needed if (minColumns > 0) { // Columns are 0 based if (lastColumnNumber == -1) { lastColumnNumber = 0; } for (int i = lastColumnNumber; i < (this.minColumnCount); i++) { output.print(','); } } // We're onto a new row output.println(); lastColumnNumber = -1; } } /** * Captures characters only if a suitable element is open. Originally * was just "v"; extended for inlineStr also. */ public void characters(char[] ch, int start, int length) throws SAXException { if (vIsOpen) value.append(ch, start, length); } /** * Converts an Excel column name like "C" to a zero-based index. * * @param name * @return Index corresponding to the specified name */ private int nameToColumn(String name) { int column = -1; for (int i = 0; i < name.length(); ++i) { int c = name.charAt(i); column = (column + 1) * 26 + c - 'A'; } return column; } } // ///////////////////////////////////// private OPCPackage xlsxPackage; private int minColumns; private PrintStream output; private final String OUTPUT_CHARSET = "GBK"; /** * Creates a new XLSX -> CSV converter * * @param pkg * The XLSX package to process * @param output * The PrintStream to output the CSV to * @param minColumns * The minimum number of columns to output, or -1 for no minimum */ public XLSX2CSV(OPCPackage pkg, PrintStream output, int minColumns) { this.xlsxPackage = pkg; this.output = output; this.minColumns = minColumns; } //TODO catch exceptions public XLSX2CSV(String inputFilePath, String outputFilePath) throws Exception { xlsxPackage = OPCPackage.open(inputFilePath, PackageAccess.READ); output = new PrintStream(outputFilePath, OUTPUT_CHARSET); minColumns = -1; } /** * Parses and shows the content of one sheet using the specified styles and * shared-strings tables. * * @param styles * @param strings * @param sheetInputStream */ public void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, InputStream sheetInputStream) throws IOException, ParserConfigurationException, SAXException { InputSource sheetSource = new InputSource(sheetInputStream); SAXParserFactory saxFactory = SAXParserFactory.newInstance(); SAXParser saxParser = saxFactory.newSAXParser(); XMLReader sheetParser = saxParser.getXMLReader(); ContentHandler handler = new MyXSSFSheetHandler(styles, strings, this.minColumns, this.output); sheetParser.setContentHandler(handler); sheetParser.parse(sheetSource); } /** * Initiates the processing of the XLS workbook file to CSV. * * @throws IOException * @throws OpenXML4JException * @throws ParserConfigurationException * @throws SAXException */ public void process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException { ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable( this.xlsxPackage); XSSFReader xssfReader = new XSSFReader(this.xlsxPackage); StylesTable styles = xssfReader.getStylesTable(); XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader .getSheetsData(); int index = 0; while (iter.hasNext()) { InputStream stream = iter.next(); String sheetName = iter.getSheetName(); this.output.println(); this.output.println(sheetName + " [index=" + index + "]:"); processSheet(styles, strings, stream); stream.close(); ++index; } close(); } public void close() { try { this.xlsxPackage.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } this.output.close(); } public static void main(String[] args) throws Exception { XLS2CSV xls2csv = new XLS2CSV("D:\\听说\\index.xlsx","D:\\听说\\index.csv"); xlsx2csv.process(); } }

 

你可能感兴趣的:(程序人生)