分享用Java将Excel的xls和xlsx文件转换成csv文件的方法, XLS2CSV, XLSX2CSV
在做的项目中遇到需要处理Excel的多种输出文件,包括csv, xls, xlsx这三种文件, 所以想先将后两种合适都转换到csv格式再一起解析. 于是就去找到了下面这两个类, 两个好像都是apache官方的例子.来源链接:
XLSX2CSV: https://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java
上面两个地址中后面一个好像是被墙的,我也是翻过了墙才勉强访问到.为了方便大家我把代码贴到下面,
不过跟上面的有一点区别: 我自己在两个类中都多加了一个构造器,把功能封装到 只要传入来源文件和输出文件路径就可以用了
使用方法示例请看代码最后面的main方法.
依赖的包在本页最下面提供...
XLS2CSV:
1
/**/
/* ====================================================================
2Licensed to the Apache Software Foundation (ASF) under one or more
3contributor license agreements. See the NOTICE file distributed with
4this work for additional information regarding copyright ownership.
5The ASF licenses this file to You under the Apache License, Version 2.0
6(the "License"); you may not use this file except in compliance with
7the License. You may obtain a copy of the License at
8
9http://www.apache.org/licenses/LICENSE-2.0
10
11Unless required by applicable law or agreed to in writing, software
12distributed under the License is distributed on an "AS IS" BASIS,
13WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14See the License for the specific language governing permissions and
15limitations under the License.
16==================================================================== */
17 package cn.jayslong.convert;
18
19 import java.io.FileInputStream;
20 import java.io.FileNotFoundException;
21 import java.io.IOException;
22 import java.io.PrintStream;
23 import java.util.ArrayList;
24
25 import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
26 import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
27 import org.apache.poi.hssf.eventusermodel.HSSFListener;
28 import org.apache.poi.hssf.eventusermodel.HSSFRequest;
29 import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
30 import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
31 import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
32 import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
33 import org.apache.poi.hssf.model.HSSFFormulaParser;
34 import org.apache.poi.hssf.record.BOFRecord;
35 import org.apache.poi.hssf.record.BlankRecord;
36 import org.apache.poi.hssf.record.BoolErrRecord;
37 import org.apache.poi.hssf.record.BoundSheetRecord;
38 import org.apache.poi.hssf.record.FormulaRecord;
39 import org.apache.poi.hssf.record.LabelRecord;
40 import org.apache.poi.hssf.record.LabelSSTRecord;
41 import org.apache.poi.hssf.record.NoteRecord;
42 import org.apache.poi.hssf.record.NumberRecord;
43 import org.apache.poi.hssf.record.RKRecord;
44 import org.apache.poi.hssf.record.Record;
45 import org.apache.poi.hssf.record.SSTRecord;
46 import org.apache.poi.hssf.record.StringRecord;
47 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
48 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
49
50 /** */ /**
51 * A XLS -> CSV processor, that uses the MissingRecordAware EventModel code to
52 * ensure it outputs all columns and rows.
53 *
54 * @author Nick Burch
55 */
56 public class XLS2CSV implements HSSFListener {
57 private int minColumns;
58 private POIFSFileSystem fs;
59 private PrintStream output;
60
61 private int lastRowNumber;
62 private int lastColumnNumber;
63
64 /** *//** Should we output the formula, or the value it has? */
65 private boolean outputFormulaValues = true;
66
67 /** *//** For parsing Formulas */
68 private SheetRecordCollectingListener workbookBuildingListener;
69 private HSSFWorkbook stubWorkbook;
70
71 // Records we pick up as we process
72 private SSTRecord sstRecord;
73 private FormatTrackingHSSFListener formatListener;
74
75 /** *//** So we known which sheet we're on */
76 private int sheetIndex = -1;
77 private BoundSheetRecord[] orderedBSRs;
78 private ArrayList boundSheetRecords = new ArrayList();
79
80 // For handling formulas with string results
81 private int nextRow;
82 private int nextColumn;
83 private boolean outputNextStringRecord;
84
85 private final String OUTPUT_CHARSET = "GBK";
86
87 /** *//**
88 * Creates a new XLS -> CSV converter
89 *
90 * @param fs
91 * The POIFSFileSystem to process
92 * @param output
93 * The PrintStream to output the CSV to
94 * @param minColumns
95 * The minimum number of columns to output, or -1 for no minimum
96 */
97 public XLS2CSV(POIFSFileSystem fs, PrintStream output, int minColumns) {
98 this.fs = fs;
99 this.output = output;
100 this.minColumns = minColumns;
101 }
102
103 public XLS2CSV(String inputFilePath, String outputFilePath) throws Exception {
104 fs = new POIFSFileSystem(new FileInputStream(inputFilePath));
105 output = new PrintStream(outputFilePath, OUTPUT_CHARSET);
106 minColumns = -1;
107 }
108
109 /** *//**
110 * Creates a new XLS -> CSV converter
111 *
112 * @param filename
113 * The file to process
114 * @param minColumns
115 * The minimum number of columns to output, or -1 for no minimum
116 * @throws IOException
117 * @throws FileNotFoundException
118 */
119 public XLS2CSV(String filename, int minColumns) throws IOException,
120 FileNotFoundException {
121 this(new POIFSFileSystem(new FileInputStream(filename)), System.out,
122 minColumns);
123 }
124
125 /** *//**
126 * Initiates the processing of the XLS file to CSV
127 */
128 public void process() throws IOException {
129 MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(
130 this);
131 formatListener = new FormatTrackingHSSFListener(listener);
132
133 HSSFEventFactory factory = new HSSFEventFactory();
134 HSSFRequest request = new HSSFRequest();
135
136 if (outputFormulaValues) {
137 request.addListenerForAllRecords(formatListener);
138 } else {
139 workbookBuildingListener = new SheetRecordCollectingListener(
140 formatListener);
141 request.addListenerForAllRecords(workbookBuildingListener);
142 }
143
144 factory.processWorkbookEvents(request, fs);
145 }
146
147 /** *//**
148 * Main HSSFListener method, processes events, and outputs the CSV as the
149 * file is processed.
150 */
151 public void processRecord(Record record) {
152 int thisRow = -1;
153 int thisColumn = -1;
154 String thisStr = null;
155
156 switch (record.getSid()) {
157 case BoundSheetRecord.sid:
158 boundSheetRecords.add(record);
159 break;
160 case BOFRecord.sid:
161 BOFRecord br = (BOFRecord) record;
162 if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
163 // Create sub workbook if required
164 if (workbookBuildingListener != null && stubWorkbook == null) {
165 stubWorkbook = workbookBuildingListener
166 .getStubHSSFWorkbook();
167 }
168
169 // Output the worksheet name
170 // Works by ordering the BSRs by the location of
171 // their BOFRecords, and then knowing that we
172 // process BOFRecords in byte offset order
173 sheetIndex++;
174 if (orderedBSRs == null) {
175 orderedBSRs = BoundSheetRecord
176 .orderByBofPosition(boundSheetRecords);
177 }
178 output.println();
179 output.println(orderedBSRs[sheetIndex].getSheetname() + " ["
180 + (sheetIndex + 1) + "]:");
181 }
182 break;
183
184 case SSTRecord.sid:
185 sstRecord = (SSTRecord) record;
186 break;
187
188 case BlankRecord.sid:
189 BlankRecord brec = (BlankRecord) record;
190
191 thisRow = brec.getRow();
192 thisColumn = brec.getColumn();
193 thisStr = "";
194 break;
195 case BoolErrRecord.sid:
196 BoolErrRecord berec = (BoolErrRecord) record;
197
198 thisRow = berec.getRow();
199 thisColumn = berec.getColumn();
200 thisStr = "";
201 break;
202
203 case FormulaRecord.sid:
204 FormulaRecord frec = (FormulaRecord) record;
205
206 thisRow = frec.getRow();
207 thisColumn = frec.getColumn();
208
209 if (outputFormulaValues) {
210 if (Double.isNaN(frec.getValue())) {
211 // Formula result is a string
212 // This is stored in the next record
213 outputNextStringRecord = true;
214 nextRow = frec.getRow();
215 nextColumn = frec.getColumn();
216 } else {
217 thisStr = formatListener.formatNumberDateCell(frec);
218 }
219 } else {
220 thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook,
221 frec.getParsedExpression()) + '"';
222 }
223 break;
224 case StringRecord.sid:
225 if (outputNextStringRecord) {
226 // String for formula
227 StringRecord srec = (StringRecord) record;
228 thisStr = srec.getString();
229 thisRow = nextRow;
230 thisColumn = nextColumn;
231 outputNextStringRecord = false;
232 }
233 break;
234
235 case LabelRecord.sid:
236 LabelRecord lrec = (LabelRecord) record;
237
238 thisRow = lrec.getRow();
239 thisColumn = lrec.getColumn();
240 thisStr = '"' + lrec.getValue() + '"';
241 break;
242 case LabelSSTRecord.sid:
243 LabelSSTRecord lsrec = (LabelSSTRecord) record;
244
245 thisRow = lsrec.getRow();
246 thisColumn = lsrec.getColumn();
247 if (sstRecord == null) {
248 thisStr = '"' + "(No SST Record, can't identify string)" + '"';
249 } else {
250 thisStr = '"' + sstRecord.getString(lsrec.getSSTIndex())
251 .toString() + '"';
252 }
253 break;
254 case NoteRecord.sid:
255 NoteRecord nrec = (NoteRecord) record;
256
257 thisRow = nrec.getRow();
258 thisColumn = nrec.getColumn();
259 // TODO: Find object to match nrec.getShapeId()
260 thisStr = '"' + "(TODO)" + '"';
261 break;
262 case NumberRecord.sid:
263 NumberRecord numrec = (NumberRecord) record;
264
265 thisRow = numrec.getRow();
266 thisColumn = numrec.getColumn();
267
268 // Format
269 thisStr = formatListener.formatNumberDateCell(numrec);
270 break;
271 case RKRecord.sid:
272 RKRecord rkrec = (RKRecord) record;
273
274 thisRow = rkrec.getRow();
275 thisColumn = rkrec.getColumn();
276 thisStr = '"' + "(TODO)" + '"';
277 break;
278 default:
279 break;
280 }
281
282 // Handle new row
283 if (thisRow != -1 && thisRow != lastRowNumber) {
284 lastColumnNumber = -1;
285 }
286
287 // Handle missing column
288 if (record instanceof MissingCellDummyRecord) {
289 MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
290 thisRow = mc.getRow();
291 thisColumn = mc.getColumn();
292 thisStr = "";
293 }
294
295 // If we got something to print out, do so
296 if (thisStr != null) {
297 if (thisColumn > 0) {
298 output.print(',');
299 }
300 output.print(thisStr);
301 }
302
303 // Update column and row count
304 if (thisRow > -1)
305 lastRowNumber = thisRow;
306 if (thisColumn > -1)
307 lastColumnNumber = thisColumn;
308
309 // Handle end of row
310 if (record instanceof LastCellOfRowDummyRecord) {
311 // Print out any missing commas if needed
312 if (minColumns > 0) {
313 // Columns are 0 based
314 if (lastColumnNumber == -1) {
315 lastColumnNumber = 0;
316 }
317 for (int i = lastColumnNumber; i < (minColumns); i++) {
318 output.print(',');
319 }
320 }
321
322 // We're onto a new row
323 lastColumnNumber = -1;
324
325 // End the row
326 output.println();
327 }
328 }
329
330 public static void main(String[] args) throws Exception {
331 XLS2CSV xls2csv = new XLS2CSV("Time.xls","out.csv");
332 xls2csv.process();
333 }
334}
2Licensed to the Apache Software Foundation (ASF) under one or more
3contributor license agreements. See the NOTICE file distributed with
4this work for additional information regarding copyright ownership.
5The ASF licenses this file to You under the Apache License, Version 2.0
6(the "License"); you may not use this file except in compliance with
7the License. You may obtain a copy of the License at
8
9http://www.apache.org/licenses/LICENSE-2.0
10
11Unless required by applicable law or agreed to in writing, software
12distributed under the License is distributed on an "AS IS" BASIS,
13WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14See the License for the specific language governing permissions and
15limitations under the License.
16==================================================================== */
17 package cn.jayslong.convert;
18
19 import java.io.FileInputStream;
20 import java.io.FileNotFoundException;
21 import java.io.IOException;
22 import java.io.PrintStream;
23 import java.util.ArrayList;
24
25 import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
26 import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
27 import org.apache.poi.hssf.eventusermodel.HSSFListener;
28 import org.apache.poi.hssf.eventusermodel.HSSFRequest;
29 import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
30 import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
31 import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
32 import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
33 import org.apache.poi.hssf.model.HSSFFormulaParser;
34 import org.apache.poi.hssf.record.BOFRecord;
35 import org.apache.poi.hssf.record.BlankRecord;
36 import org.apache.poi.hssf.record.BoolErrRecord;
37 import org.apache.poi.hssf.record.BoundSheetRecord;
38 import org.apache.poi.hssf.record.FormulaRecord;
39 import org.apache.poi.hssf.record.LabelRecord;
40 import org.apache.poi.hssf.record.LabelSSTRecord;
41 import org.apache.poi.hssf.record.NoteRecord;
42 import org.apache.poi.hssf.record.NumberRecord;
43 import org.apache.poi.hssf.record.RKRecord;
44 import org.apache.poi.hssf.record.Record;
45 import org.apache.poi.hssf.record.SSTRecord;
46 import org.apache.poi.hssf.record.StringRecord;
47 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
48 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
49
50 /** */ /**
51 * A XLS -> CSV processor, that uses the MissingRecordAware EventModel code to
52 * ensure it outputs all columns and rows.
53 *
54 * @author Nick Burch
55 */
56 public class XLS2CSV implements HSSFListener {
57 private int minColumns;
58 private POIFSFileSystem fs;
59 private PrintStream output;
60
61 private int lastRowNumber;
62 private int lastColumnNumber;
63
64 /** *//** Should we output the formula, or the value it has? */
65 private boolean outputFormulaValues = true;
66
67 /** *//** For parsing Formulas */
68 private SheetRecordCollectingListener workbookBuildingListener;
69 private HSSFWorkbook stubWorkbook;
70
71 // Records we pick up as we process
72 private SSTRecord sstRecord;
73 private FormatTrackingHSSFListener formatListener;
74
75 /** *//** So we known which sheet we're on */
76 private int sheetIndex = -1;
77 private BoundSheetRecord[] orderedBSRs;
78 private ArrayList boundSheetRecords = new ArrayList();
79
80 // For handling formulas with string results
81 private int nextRow;
82 private int nextColumn;
83 private boolean outputNextStringRecord;
84
85 private final String OUTPUT_CHARSET = "GBK";
86
87 /** *//**
88 * Creates a new XLS -> CSV converter
89 *
90 * @param fs
91 * The POIFSFileSystem to process
92 * @param output
93 * The PrintStream to output the CSV to
94 * @param minColumns
95 * The minimum number of columns to output, or -1 for no minimum
96 */
97 public XLS2CSV(POIFSFileSystem fs, PrintStream output, int minColumns) {
98 this.fs = fs;
99 this.output = output;
100 this.minColumns = minColumns;
101 }
102
103 public XLS2CSV(String inputFilePath, String outputFilePath) throws Exception {
104 fs = new POIFSFileSystem(new FileInputStream(inputFilePath));
105 output = new PrintStream(outputFilePath, OUTPUT_CHARSET);
106 minColumns = -1;
107 }
108
109 /** *//**
110 * Creates a new XLS -> CSV converter
111 *
112 * @param filename
113 * The file to process
114 * @param minColumns
115 * The minimum number of columns to output, or -1 for no minimum
116 * @throws IOException
117 * @throws FileNotFoundException
118 */
119 public XLS2CSV(String filename, int minColumns) throws IOException,
120 FileNotFoundException {
121 this(new POIFSFileSystem(new FileInputStream(filename)), System.out,
122 minColumns);
123 }
124
125 /** *//**
126 * Initiates the processing of the XLS file to CSV
127 */
128 public void process() throws IOException {
129 MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(
130 this);
131 formatListener = new FormatTrackingHSSFListener(listener);
132
133 HSSFEventFactory factory = new HSSFEventFactory();
134 HSSFRequest request = new HSSFRequest();
135
136 if (outputFormulaValues) {
137 request.addListenerForAllRecords(formatListener);
138 } else {
139 workbookBuildingListener = new SheetRecordCollectingListener(
140 formatListener);
141 request.addListenerForAllRecords(workbookBuildingListener);
142 }
143
144 factory.processWorkbookEvents(request, fs);
145 }
146
147 /** *//**
148 * Main HSSFListener method, processes events, and outputs the CSV as the
149 * file is processed.
150 */
151 public void processRecord(Record record) {
152 int thisRow = -1;
153 int thisColumn = -1;
154 String thisStr = null;
155
156 switch (record.getSid()) {
157 case BoundSheetRecord.sid:
158 boundSheetRecords.add(record);
159 break;
160 case BOFRecord.sid:
161 BOFRecord br = (BOFRecord) record;
162 if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
163 // Create sub workbook if required
164 if (workbookBuildingListener != null && stubWorkbook == null) {
165 stubWorkbook = workbookBuildingListener
166 .getStubHSSFWorkbook();
167 }
168
169 // Output the worksheet name
170 // Works by ordering the BSRs by the location of
171 // their BOFRecords, and then knowing that we
172 // process BOFRecords in byte offset order
173 sheetIndex++;
174 if (orderedBSRs == null) {
175 orderedBSRs = BoundSheetRecord
176 .orderByBofPosition(boundSheetRecords);
177 }
178 output.println();
179 output.println(orderedBSRs[sheetIndex].getSheetname() + " ["
180 + (sheetIndex + 1) + "]:");
181 }
182 break;
183
184 case SSTRecord.sid:
185 sstRecord = (SSTRecord) record;
186 break;
187
188 case BlankRecord.sid:
189 BlankRecord brec = (BlankRecord) record;
190
191 thisRow = brec.getRow();
192 thisColumn = brec.getColumn();
193 thisStr = "";
194 break;
195 case BoolErrRecord.sid:
196 BoolErrRecord berec = (BoolErrRecord) record;
197
198 thisRow = berec.getRow();
199 thisColumn = berec.getColumn();
200 thisStr = "";
201 break;
202
203 case FormulaRecord.sid:
204 FormulaRecord frec = (FormulaRecord) record;
205
206 thisRow = frec.getRow();
207 thisColumn = frec.getColumn();
208
209 if (outputFormulaValues) {
210 if (Double.isNaN(frec.getValue())) {
211 // Formula result is a string
212 // This is stored in the next record
213 outputNextStringRecord = true;
214 nextRow = frec.getRow();
215 nextColumn = frec.getColumn();
216 } else {
217 thisStr = formatListener.formatNumberDateCell(frec);
218 }
219 } else {
220 thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook,
221 frec.getParsedExpression()) + '"';
222 }
223 break;
224 case StringRecord.sid:
225 if (outputNextStringRecord) {
226 // String for formula
227 StringRecord srec = (StringRecord) record;
228 thisStr = srec.getString();
229 thisRow = nextRow;
230 thisColumn = nextColumn;
231 outputNextStringRecord = false;
232 }
233 break;
234
235 case LabelRecord.sid:
236 LabelRecord lrec = (LabelRecord) record;
237
238 thisRow = lrec.getRow();
239 thisColumn = lrec.getColumn();
240 thisStr = '"' + lrec.getValue() + '"';
241 break;
242 case LabelSSTRecord.sid:
243 LabelSSTRecord lsrec = (LabelSSTRecord) record;
244
245 thisRow = lsrec.getRow();
246 thisColumn = lsrec.getColumn();
247 if (sstRecord == null) {
248 thisStr = '"' + "(No SST Record, can't identify string)" + '"';
249 } else {
250 thisStr = '"' + sstRecord.getString(lsrec.getSSTIndex())
251 .toString() + '"';
252 }
253 break;
254 case NoteRecord.sid:
255 NoteRecord nrec = (NoteRecord) record;
256
257 thisRow = nrec.getRow();
258 thisColumn = nrec.getColumn();
259 // TODO: Find object to match nrec.getShapeId()
260 thisStr = '"' + "(TODO)" + '"';
261 break;
262 case NumberRecord.sid:
263 NumberRecord numrec = (NumberRecord) record;
264
265 thisRow = numrec.getRow();
266 thisColumn = numrec.getColumn();
267
268 // Format
269 thisStr = formatListener.formatNumberDateCell(numrec);
270 break;
271 case RKRecord.sid:
272 RKRecord rkrec = (RKRecord) record;
273
274 thisRow = rkrec.getRow();
275 thisColumn = rkrec.getColumn();
276 thisStr = '"' + "(TODO)" + '"';
277 break;
278 default:
279 break;
280 }
281
282 // Handle new row
283 if (thisRow != -1 && thisRow != lastRowNumber) {
284 lastColumnNumber = -1;
285 }
286
287 // Handle missing column
288 if (record instanceof MissingCellDummyRecord) {
289 MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
290 thisRow = mc.getRow();
291 thisColumn = mc.getColumn();
292 thisStr = "";
293 }
294
295 // If we got something to print out, do so
296 if (thisStr != null) {
297 if (thisColumn > 0) {
298 output.print(',');
299 }
300 output.print(thisStr);
301 }
302
303 // Update column and row count
304 if (thisRow > -1)
305 lastRowNumber = thisRow;
306 if (thisColumn > -1)
307 lastColumnNumber = thisColumn;
308
309 // Handle end of row
310 if (record instanceof LastCellOfRowDummyRecord) {
311 // Print out any missing commas if needed
312 if (minColumns > 0) {
313 // Columns are 0 based
314 if (lastColumnNumber == -1) {
315 lastColumnNumber = 0;
316 }
317 for (int i = lastColumnNumber; i < (minColumns); i++) {
318 output.print(',');
319 }
320 }
321
322 // We're onto a new row
323 lastColumnNumber = -1;
324
325 // End the row
326 output.println();
327 }
328 }
329
330 public static void main(String[] args) throws Exception {
331 XLS2CSV xls2csv = new XLS2CSV("Time.xls","out.csv");
332 xls2csv.process();
333 }
334}
XLSX2CSV:
1
/**/
/* ====================================================================
2Licensed to the Apache Software Foundation (ASF) under one or more
3contributor license agreements. See the NOTICE file distributed with
4this work for additional information regarding copyright ownership.
5The ASF licenses this file to You under the Apache License, Version 2.0
6(the "License"); you may not use this file except in compliance with
7the License. You may obtain a copy of the License at
8
9 http://www.apache.org/licenses/LICENSE-2.0
10
11Unless required by applicable law or agreed to in writing, software
12distributed under the License is distributed on an "AS IS" BASIS,
13WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14See the License for the specific language governing permissions and
15limitations under the License.
16==================================================================== */
17 package cn.jayslong.convert;
18
19 import java.io.File;
20 import java.io.FileOutputStream;
21 import java.io.IOException;
22 import java.io.InputStream;
23 import java.io.PrintStream;
24
25 import javax.xml.parsers.ParserConfigurationException;
26 import javax.xml.parsers.SAXParser;
27 import javax.xml.parsers.SAXParserFactory;
28
29 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
30 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
31 import org.apache.poi.openxml4j.opc.OPCPackage;
32 import org.apache.poi.openxml4j.opc.PackageAccess;
33 import org.apache.poi.ss.usermodel.BuiltinFormats;
34 import org.apache.poi.ss.usermodel.DataFormatter;
35 import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
36 import org.apache.poi.xssf.eventusermodel.XSSFReader;
37 import org.apache.poi.xssf.model.StylesTable;
38 import org.apache.poi.xssf.usermodel.XSSFCellStyle;
39 import org.apache.poi.xssf.usermodel.XSSFRichTextString;
40 import org.xml.sax.Attributes;
41 import org.xml.sax.ContentHandler;
42 import org.xml.sax.InputSource;
43 import org.xml.sax.SAXException;
44 import org.xml.sax.XMLReader;
45 import org.xml.sax.helpers.DefaultHandler;
46
47 /** */ /**
48 * A rudimentary XLSX -> CSV processor modeled on the POI sample program
49 * XLS2CSVmra by Nick Burch from the package
50 * org.apache.poi.hssf.eventusermodel.examples. Unlike the HSSF version, this
51 * one completely ignores missing rows.
52 * <p/>
53 * Data sheets are read using a SAX parser to keep the memory footprint
54 * relatively small, so this should be able to read enormous workbooks. The
55 * styles table and the shared-string table must be kept in memory. The standard
56 * POI styles table class is used, but a custom (read-only) class is used for
57 * the shared string table because the standard POI SharedStringsTable grows
58 * very quickly with the number of unique strings.
59 * <p/>
60 * Thanks to Eric Smith for a patch that fixes a problem triggered by cells with
61 * multiple "t" elements, which is how Excel represents different formats (e.g.,
62 * one word plain and one word bold).
63 *
64 * @author Chris Lott
65 */
66 public class XLSX2CSV {
67
68 /** *//**
69 * The type of the data value is indicated by an attribute on the cell. The
70 * value is usually in a "v" element within the cell.
71 */
72 enum xssfDataType {
73 BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,
74 }
75
76 /** *//**
77 * Derived from http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api
78 * <p/>
79 * Also see Standard ECMA-376, 1st edition, part 4, pages 1928ff, at
80 * http://www.ecma-international.org/publications/standards/Ecma-376.htm
81 * <p/>
82 * A web-friendly version is http://openiso.org/Ecma/376/Part4
83 */
84 class MyXSSFSheetHandler extends DefaultHandler {
85
86 /** *//**
87 * Table with styles
88 */
89 private StylesTable stylesTable;
90
91 /** *//**
92 * Table with unique strings
93 */
94 private ReadOnlySharedStringsTable sharedStringsTable;
95
96 /** *//**
97 * Destination for data
98 */
99 private final PrintStream output;
100
101 /** *//**
102 * Number of columns to read starting with leftmost
103 */
104 private final int minColumnCount;
105
106 // Set when V start element is seen
107 private boolean vIsOpen;
108
109 // Set when cell start element is seen;
110 // used when cell close element is seen.
111 private xssfDataType nextDataType;
112
113 // Used to format numeric cell values.
114 private short formatIndex;
115 private String formatString;
116 private final DataFormatter formatter;
117
118 private int thisColumn = -1;
119 // The last column printed to the output stream
120 private int lastColumnNumber = -1;
121
122 // Gathers characters as they are seen.
123 private StringBuffer value;
124
125 /** *//**
126 * Accepts objects needed while parsing.
127 *
128 * @param styles
129 * Table of styles
130 * @param strings
131 * Table of shared strings
132 * @param cols
133 * Minimum number of columns to show
134 * @param target
135 * Sink for output
136 */
137 public MyXSSFSheetHandler(StylesTable styles,
138 ReadOnlySharedStringsTable strings, int cols, PrintStream target) {
139 this.stylesTable = styles;
140 this.sharedStringsTable = strings;
141 this.minColumnCount = cols;
142 this.output = target;
143 this.value = new StringBuffer();
144 this.nextDataType = xssfDataType.NUMBER;
145 this.formatter = new DataFormatter();
146 }
147
148 /**//*
149 * (non-Javadoc)
150 *
151 * @see
152 * org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String,
153 * java.lang.String, java.lang.String, org.xml.sax.Attributes)
154 */
155 public void startElement(String uri, String localName, String name,
156 Attributes attributes) throws SAXException {
157
158 if ("inlineStr".equals(name) || "v".equals(name)) {
159 vIsOpen = true;
160 // Clear contents cache
161 value.setLength(0);
162 }
163 // c => cell
164 else if ("c".equals(name)) {
165 // Get the cell reference
166 String r = attributes.getValue("r");
167 int firstDigit = -1;
168 for (int c = 0; c < r.length(); ++c) {
169 if (Character.isDigit(r.charAt(c))) {
170 firstDigit = c;
171 break;
172 }
173 }
174 thisColumn = nameToColumn(r.substring(0, firstDigit));
175
176 // Set up defaults.
177 this.nextDataType = xssfDataType.NUMBER;
178 this.formatIndex = -1;
179 this.formatString = null;
180 String cellType = attributes.getValue("t");
181 String cellStyleStr = attributes.getValue("s");
182 if ("b".equals(cellType))
183 nextDataType = xssfDataType.BOOL;
184 else if ("e".equals(cellType))
185 nextDataType = xssfDataType.ERROR;
186 else if ("inlineStr".equals(cellType))
187 nextDataType = xssfDataType.INLINESTR;
188 else if ("s".equals(cellType))
189 nextDataType = xssfDataType.SSTINDEX;
190 else if ("str".equals(cellType))
191 nextDataType = xssfDataType.FORMULA;
192 else if (cellStyleStr != null) {
193 // It's a number, but almost certainly one
194 // with a special style or format
195 int styleIndex = Integer.parseInt(cellStyleStr);
196 XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
197 this.formatIndex = style.getDataFormat();
198 this.formatString = style.getDataFormatString();
199 if (this.formatString == null)
200 this.formatString = BuiltinFormats
201 .getBuiltinFormat(this.formatIndex);
202 }
203 }
204
205 }
206
207 /**//*
208 * (non-Javadoc)
209 *
210 * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String,
211 * java.lang.String, java.lang.String)
212 */
213 public void endElement(String uri, String localName, String name)
214 throws SAXException {
215
216 String thisStr = null;
217
218 // v => contents of a cell
219 if ("v".equals(name)) {
220 // Process the value contents as required.
221 // Do now, as characters() may be called more than once
222 switch (nextDataType) {
223
224 case BOOL:
225 char first = value.charAt(0);
226 thisStr = first == '0' ? "FALSE" : "TRUE";
227 break;
228
229 case ERROR:
230 thisStr = "\"ERROR:" + value.toString() + '"';
231 break;
232
233 case FORMULA:
234 // A formula could result in a string value,
235 // so always add double-quote characters.
236 thisStr = '"' + value.toString() + '"';
237 break;
238
239 case INLINESTR:
240 // TODO: have seen an example of this, so it's untested.
241 XSSFRichTextString rtsi = new XSSFRichTextString(value
242 .toString());
243 thisStr = '"' + rtsi.toString() + '"';
244 break;
245
246 case SSTINDEX:
247 String sstIndex = value.toString();
248 try {
249 int idx = Integer.parseInt(sstIndex);
250 XSSFRichTextString rtss = new XSSFRichTextString(
251 sharedStringsTable.getEntryAt(idx));
252 thisStr = '"' + rtss.toString() + '"';
253 } catch (NumberFormatException ex) {
254 output.println("Failed to parse SST index '" + sstIndex
255 + "': " + ex.toString());
256 }
257 break;
258
259 case NUMBER:
260 String n = value.toString();
261 if (this.formatString != null)
262 thisStr = formatter.formatRawCellContents(Double
263 .parseDouble(n), this.formatIndex,
264 this.formatString);
265 else
266 thisStr = n;
267 break;
268
269 default:
270 thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
271 break;
272 }
273
274 // Output after we've seen the string contents
275 // Emit commas for any fields that were missing on this row
276 if (lastColumnNumber == -1) {
277 lastColumnNumber = 0;
278 }
279 for (int i = lastColumnNumber; i < thisColumn; ++i)
280 output.print(',');
281
282 // Might be the empty string.
283 output.print(thisStr);
284
285 // Update column
286 if (thisColumn > -1)
287 lastColumnNumber = thisColumn;
288
289 } else if ("row".equals(name)) {
290
291 // Print out any missing commas if needed
292 if (minColumns > 0) {
293 // Columns are 0 based
294 if (lastColumnNumber == -1) {
295 lastColumnNumber = 0;
296 }
297 for (int i = lastColumnNumber; i < (this.minColumnCount); i++) {
298 output.print(',');
299 }
300 }
301
302 // We're onto a new row
303 output.println();
304 lastColumnNumber = -1;
305 }
306
307 }
308
309 /** *//**
310 * Captures characters only if a suitable element is open. Originally
311 * was just "v"; extended for inlineStr also.
312 */
313 public void characters(char[] ch, int start, int length)
314 throws SAXException {
315 if (vIsOpen)
316 value.append(ch, start, length);
317 }
318
319 /** *//**
320 * Converts an Excel column name like "C" to a zero-based index.
321 *
322 * @param name
323 * @return Index corresponding to the specified name
324 */
325 private int nameToColumn(String name) {
326 int column = -1;
327 for (int i = 0; i < name.length(); ++i) {
328 int c = name.charAt(i);
329 column = (column + 1) * 26 + c - 'A';
330 }
331 return column;
332 }
333
334 }
335
336 // /////////////////////////////////////
337
338 private OPCPackage xlsxPackage;
339 private int minColumns;
340 private PrintStream output;
341 private final String OUTPUT_CHARSET = "GBK";
342
343 /** *//**
344 * Creates a new XLSX -> CSV converter
345 *
346 * @param pkg
347 * The XLSX package to process
348 * @param output
349 * The PrintStream to output the CSV to
350 * @param minColumns
351 * The minimum number of columns to output, or -1 for no minimum
352 */
353 public XLSX2CSV(OPCPackage pkg, PrintStream output, int minColumns) {
354 this.xlsxPackage = pkg;
355 this.output = output;
356 this.minColumns = minColumns;
357 }
358
359 //TODO catch exceptions
360 public XLSX2CSV(String inputFilePath, String outputFilePath) throws Exception {
361 xlsxPackage = OPCPackage.open(inputFilePath, PackageAccess.READ);
362 output = new PrintStream(outputFilePath, OUTPUT_CHARSET);
363 minColumns = -1;
364 }
365
366 /** *//**
367 * Parses and shows the content of one sheet using the specified styles and
368 * shared-strings tables.
369 *
370 * @param styles
371 * @param strings
372 * @param sheetInputStream
373 */
374 public void processSheet(StylesTable styles,
375 ReadOnlySharedStringsTable strings, InputStream sheetInputStream)
376 throws IOException, ParserConfigurationException, SAXException {
377
378 InputSource sheetSource = new InputSource(sheetInputStream);
379 SAXParserFactory saxFactory = SAXParserFactory.newInstance();
380 SAXParser saxParser = saxFactory.newSAXParser();
381 XMLReader sheetParser = saxParser.getXMLReader();
382 ContentHandler handler = new MyXSSFSheetHandler(styles, strings,
383 this.minColumns, this.output);
384 sheetParser.setContentHandler(handler);
385 sheetParser.parse(sheetSource);
386 }
387
388 /** *//**
389 * Initiates the processing of the XLS workbook file to CSV.
390 *
391 * @throws IOException
392 * @throws OpenXML4JException
393 * @throws ParserConfigurationException
394 * @throws SAXException
395 */
396 public void process() throws IOException, OpenXML4JException,
397 ParserConfigurationException, SAXException {
398
399 ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(
400 this.xlsxPackage);
401 XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
402 StylesTable styles = xssfReader.getStylesTable();
403 XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader
404 .getSheetsData();
405 int index = 0;
406 while (iter.hasNext()) {
407 InputStream stream = iter.next();
408 String sheetName = iter.getSheetName();
409 this.output.println();
410 this.output.println(sheetName + " [index=" + index + "]:");
411 processSheet(styles, strings, stream);
412 stream.close();
413 ++index;
414 }
415 }
416
417 public static void main(String[] args) throws Exception {
418 XLSX2CSV xlsx2csv = new XLSX2CSV("Time.xlsx", "outs.csv");
419 xlsx2csv.process();
420 }
421
422}
2Licensed to the Apache Software Foundation (ASF) under one or more
3contributor license agreements. See the NOTICE file distributed with
4this work for additional information regarding copyright ownership.
5The ASF licenses this file to You under the Apache License, Version 2.0
6(the "License"); you may not use this file except in compliance with
7the License. You may obtain a copy of the License at
8
9 http://www.apache.org/licenses/LICENSE-2.0
10
11Unless required by applicable law or agreed to in writing, software
12distributed under the License is distributed on an "AS IS" BASIS,
13WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14See the License for the specific language governing permissions and
15limitations under the License.
16==================================================================== */
17 package cn.jayslong.convert;
18
19 import java.io.File;
20 import java.io.FileOutputStream;
21 import java.io.IOException;
22 import java.io.InputStream;
23 import java.io.PrintStream;
24
25 import javax.xml.parsers.ParserConfigurationException;
26 import javax.xml.parsers.SAXParser;
27 import javax.xml.parsers.SAXParserFactory;
28
29 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
30 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
31 import org.apache.poi.openxml4j.opc.OPCPackage;
32 import org.apache.poi.openxml4j.opc.PackageAccess;
33 import org.apache.poi.ss.usermodel.BuiltinFormats;
34 import org.apache.poi.ss.usermodel.DataFormatter;
35 import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
36 import org.apache.poi.xssf.eventusermodel.XSSFReader;
37 import org.apache.poi.xssf.model.StylesTable;
38 import org.apache.poi.xssf.usermodel.XSSFCellStyle;
39 import org.apache.poi.xssf.usermodel.XSSFRichTextString;
40 import org.xml.sax.Attributes;
41 import org.xml.sax.ContentHandler;
42 import org.xml.sax.InputSource;
43 import org.xml.sax.SAXException;
44 import org.xml.sax.XMLReader;
45 import org.xml.sax.helpers.DefaultHandler;
46
47 /** */ /**
48 * A rudimentary XLSX -> CSV processor modeled on the POI sample program
49 * XLS2CSVmra by Nick Burch from the package
50 * org.apache.poi.hssf.eventusermodel.examples. Unlike the HSSF version, this
51 * one completely ignores missing rows.
52 * <p/>
53 * Data sheets are read using a SAX parser to keep the memory footprint
54 * relatively small, so this should be able to read enormous workbooks. The
55 * styles table and the shared-string table must be kept in memory. The standard
56 * POI styles table class is used, but a custom (read-only) class is used for
57 * the shared string table because the standard POI SharedStringsTable grows
58 * very quickly with the number of unique strings.
59 * <p/>
60 * Thanks to Eric Smith for a patch that fixes a problem triggered by cells with
61 * multiple "t" elements, which is how Excel represents different formats (e.g.,
62 * one word plain and one word bold).
63 *
64 * @author Chris Lott
65 */
66 public class XLSX2CSV {
67
68 /** *//**
69 * The type of the data value is indicated by an attribute on the cell. The
70 * value is usually in a "v" element within the cell.
71 */
72 enum xssfDataType {
73 BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,
74 }
75
76 /** *//**
77 * Derived from http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api
78 * <p/>
79 * Also see Standard ECMA-376, 1st edition, part 4, pages 1928ff, at
80 * http://www.ecma-international.org/publications/standards/Ecma-376.htm
81 * <p/>
82 * A web-friendly version is http://openiso.org/Ecma/376/Part4
83 */
84 class MyXSSFSheetHandler extends DefaultHandler {
85
86 /** *//**
87 * Table with styles
88 */
89 private StylesTable stylesTable;
90
91 /** *//**
92 * Table with unique strings
93 */
94 private ReadOnlySharedStringsTable sharedStringsTable;
95
96 /** *//**
97 * Destination for data
98 */
99 private final PrintStream output;
100
101 /** *//**
102 * Number of columns to read starting with leftmost
103 */
104 private final int minColumnCount;
105
106 // Set when V start element is seen
107 private boolean vIsOpen;
108
109 // Set when cell start element is seen;
110 // used when cell close element is seen.
111 private xssfDataType nextDataType;
112
113 // Used to format numeric cell values.
114 private short formatIndex;
115 private String formatString;
116 private final DataFormatter formatter;
117
118 private int thisColumn = -1;
119 // The last column printed to the output stream
120 private int lastColumnNumber = -1;
121
122 // Gathers characters as they are seen.
123 private StringBuffer value;
124
125 /** *//**
126 * Accepts objects needed while parsing.
127 *
128 * @param styles
129 * Table of styles
130 * @param strings
131 * Table of shared strings
132 * @param cols
133 * Minimum number of columns to show
134 * @param target
135 * Sink for output
136 */
137 public MyXSSFSheetHandler(StylesTable styles,
138 ReadOnlySharedStringsTable strings, int cols, PrintStream target) {
139 this.stylesTable = styles;
140 this.sharedStringsTable = strings;
141 this.minColumnCount = cols;
142 this.output = target;
143 this.value = new StringBuffer();
144 this.nextDataType = xssfDataType.NUMBER;
145 this.formatter = new DataFormatter();
146 }
147
148 /**//*
149 * (non-Javadoc)
150 *
151 * @see
152 * org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String,
153 * java.lang.String, java.lang.String, org.xml.sax.Attributes)
154 */
155 public void startElement(String uri, String localName, String name,
156 Attributes attributes) throws SAXException {
157
158 if ("inlineStr".equals(name) || "v".equals(name)) {
159 vIsOpen = true;
160 // Clear contents cache
161 value.setLength(0);
162 }
163 // c => cell
164 else if ("c".equals(name)) {
165 // Get the cell reference
166 String r = attributes.getValue("r");
167 int firstDigit = -1;
168 for (int c = 0; c < r.length(); ++c) {
169 if (Character.isDigit(r.charAt(c))) {
170 firstDigit = c;
171 break;
172 }
173 }
174 thisColumn = nameToColumn(r.substring(0, firstDigit));
175
176 // Set up defaults.
177 this.nextDataType = xssfDataType.NUMBER;
178 this.formatIndex = -1;
179 this.formatString = null;
180 String cellType = attributes.getValue("t");
181 String cellStyleStr = attributes.getValue("s");
182 if ("b".equals(cellType))
183 nextDataType = xssfDataType.BOOL;
184 else if ("e".equals(cellType))
185 nextDataType = xssfDataType.ERROR;
186 else if ("inlineStr".equals(cellType))
187 nextDataType = xssfDataType.INLINESTR;
188 else if ("s".equals(cellType))
189 nextDataType = xssfDataType.SSTINDEX;
190 else if ("str".equals(cellType))
191 nextDataType = xssfDataType.FORMULA;
192 else if (cellStyleStr != null) {
193 // It's a number, but almost certainly one
194 // with a special style or format
195 int styleIndex = Integer.parseInt(cellStyleStr);
196 XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
197 this.formatIndex = style.getDataFormat();
198 this.formatString = style.getDataFormatString();
199 if (this.formatString == null)
200 this.formatString = BuiltinFormats
201 .getBuiltinFormat(this.formatIndex);
202 }
203 }
204
205 }
206
207 /**//*
208 * (non-Javadoc)
209 *
210 * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String,
211 * java.lang.String, java.lang.String)
212 */
213 public void endElement(String uri, String localName, String name)
214 throws SAXException {
215
216 String thisStr = null;
217
218 // v => contents of a cell
219 if ("v".equals(name)) {
220 // Process the value contents as required.
221 // Do now, as characters() may be called more than once
222 switch (nextDataType) {
223
224 case BOOL:
225 char first = value.charAt(0);
226 thisStr = first == '0' ? "FALSE" : "TRUE";
227 break;
228
229 case ERROR:
230 thisStr = "\"ERROR:" + value.toString() + '"';
231 break;
232
233 case FORMULA:
234 // A formula could result in a string value,
235 // so always add double-quote characters.
236 thisStr = '"' + value.toString() + '"';
237 break;
238
239 case INLINESTR:
240 // TODO: have seen an example of this, so it's untested.
241 XSSFRichTextString rtsi = new XSSFRichTextString(value
242 .toString());
243 thisStr = '"' + rtsi.toString() + '"';
244 break;
245
246 case SSTINDEX:
247 String sstIndex = value.toString();
248 try {
249 int idx = Integer.parseInt(sstIndex);
250 XSSFRichTextString rtss = new XSSFRichTextString(
251 sharedStringsTable.getEntryAt(idx));
252 thisStr = '"' + rtss.toString() + '"';
253 } catch (NumberFormatException ex) {
254 output.println("Failed to parse SST index '" + sstIndex
255 + "': " + ex.toString());
256 }
257 break;
258
259 case NUMBER:
260 String n = value.toString();
261 if (this.formatString != null)
262 thisStr = formatter.formatRawCellContents(Double
263 .parseDouble(n), this.formatIndex,
264 this.formatString);
265 else
266 thisStr = n;
267 break;
268
269 default:
270 thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
271 break;
272 }
273
274 // Output after we've seen the string contents
275 // Emit commas for any fields that were missing on this row
276 if (lastColumnNumber == -1) {
277 lastColumnNumber = 0;
278 }
279 for (int i = lastColumnNumber; i < thisColumn; ++i)
280 output.print(',');
281
282 // Might be the empty string.
283 output.print(thisStr);
284
285 // Update column
286 if (thisColumn > -1)
287 lastColumnNumber = thisColumn;
288
289 } else if ("row".equals(name)) {
290
291 // Print out any missing commas if needed
292 if (minColumns > 0) {
293 // Columns are 0 based
294 if (lastColumnNumber == -1) {
295 lastColumnNumber = 0;
296 }
297 for (int i = lastColumnNumber; i < (this.minColumnCount); i++) {
298 output.print(',');
299 }
300 }
301
302 // We're onto a new row
303 output.println();
304 lastColumnNumber = -1;
305 }
306
307 }
308
309 /** *//**
310 * Captures characters only if a suitable element is open. Originally
311 * was just "v"; extended for inlineStr also.
312 */
313 public void characters(char[] ch, int start, int length)
314 throws SAXException {
315 if (vIsOpen)
316 value.append(ch, start, length);
317 }
318
319 /** *//**
320 * Converts an Excel column name like "C" to a zero-based index.
321 *
322 * @param name
323 * @return Index corresponding to the specified name
324 */
325 private int nameToColumn(String name) {
326 int column = -1;
327 for (int i = 0; i < name.length(); ++i) {
328 int c = name.charAt(i);
329 column = (column + 1) * 26 + c - 'A';
330 }
331 return column;
332 }
333
334 }
335
336 // /////////////////////////////////////
337
338 private OPCPackage xlsxPackage;
339 private int minColumns;
340 private PrintStream output;
341 private final String OUTPUT_CHARSET = "GBK";
342
343 /** *//**
344 * Creates a new XLSX -> CSV converter
345 *
346 * @param pkg
347 * The XLSX package to process
348 * @param output
349 * The PrintStream to output the CSV to
350 * @param minColumns
351 * The minimum number of columns to output, or -1 for no minimum
352 */
353 public XLSX2CSV(OPCPackage pkg, PrintStream output, int minColumns) {
354 this.xlsxPackage = pkg;
355 this.output = output;
356 this.minColumns = minColumns;
357 }
358
359 //TODO catch exceptions
360 public XLSX2CSV(String inputFilePath, String outputFilePath) throws Exception {
361 xlsxPackage = OPCPackage.open(inputFilePath, PackageAccess.READ);
362 output = new PrintStream(outputFilePath, OUTPUT_CHARSET);
363 minColumns = -1;
364 }
365
366 /** *//**
367 * Parses and shows the content of one sheet using the specified styles and
368 * shared-strings tables.
369 *
370 * @param styles
371 * @param strings
372 * @param sheetInputStream
373 */
374 public void processSheet(StylesTable styles,
375 ReadOnlySharedStringsTable strings, InputStream sheetInputStream)
376 throws IOException, ParserConfigurationException, SAXException {
377
378 InputSource sheetSource = new InputSource(sheetInputStream);
379 SAXParserFactory saxFactory = SAXParserFactory.newInstance();
380 SAXParser saxParser = saxFactory.newSAXParser();
381 XMLReader sheetParser = saxParser.getXMLReader();
382 ContentHandler handler = new MyXSSFSheetHandler(styles, strings,
383 this.minColumns, this.output);
384 sheetParser.setContentHandler(handler);
385 sheetParser.parse(sheetSource);
386 }
387
388 /** *//**
389 * Initiates the processing of the XLS workbook file to CSV.
390 *
391 * @throws IOException
392 * @throws OpenXML4JException
393 * @throws ParserConfigurationException
394 * @throws SAXException
395 */
396 public void process() throws IOException, OpenXML4JException,
397 ParserConfigurationException, SAXException {
398
399 ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(
400 this.xlsxPackage);
401 XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
402 StylesTable styles = xssfReader.getStylesTable();
403 XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader
404 .getSheetsData();
405 int index = 0;
406 while (iter.hasNext()) {
407 InputStream stream = iter.next();
408 String sheetName = iter.getSheetName();
409 this.output.println();
410 this.output.println(sheetName + " [index=" + index + "]:");
411 processSheet(styles, strings, stream);
412 stream.close();
413 ++index;
414 }
415 }
416
417 public static void main(String[] args) throws Exception {
418 XLSX2CSV xlsx2csv = new XLSX2CSV("Time.xlsx", "outs.csv");
419 xlsx2csv.process();
420 }
421
422}
依赖的包主要是Apache的POI包,Maven的pom文件给大家:
1
<
dependencies
>
2 < dependency >
3 < groupId > net.sf.opencsv </ groupId >
4 < artifactId > opencsv </ artifactId >
5 < version > 2.1 </ version >
6 </ dependency >
7 < dependency >
8 < groupId > org.apache.poi </ groupId >
9 < artifactId > ooxml-schemas </ artifactId >
10 < version > 1.1 </ version >
11 < type > pom </ type >
12 </ dependency >
13 < dependency >
14 < groupId > org.apache.poi </ groupId >
15 < artifactId > poi </ artifactId >
16 < version > 3.7 </ version >
17 </ dependency >
18 < dependency >
19 < groupId > org.apache.poi </ groupId >
20 < artifactId > ooxml-schemas </ artifactId >
21 < version > 1.1 </ version >
22 </ dependency >
23 < dependency >
24 < groupId > org.apache.poi </ groupId >
25 < artifactId > poi-ooxml </ artifactId >
26 < version > 3.7 </ version >
27 </ dependency >
28 < dependency >
29 < groupId > dom4j </ groupId >
30 < artifactId > dom4j </ artifactId >
31 < version > 1.6.1 </ version >
32 </ dependency >
33 </ dependencies >
2 < dependency >
3 < groupId > net.sf.opencsv </ groupId >
4 < artifactId > opencsv </ artifactId >
5 < version > 2.1 </ version >
6 </ dependency >
7 < dependency >
8 < groupId > org.apache.poi </ groupId >
9 < artifactId > ooxml-schemas </ artifactId >
10 < version > 1.1 </ version >
11 < type > pom </ type >
12 </ dependency >
13 < dependency >
14 < groupId > org.apache.poi </ groupId >
15 < artifactId > poi </ artifactId >
16 < version > 3.7 </ version >
17 </ dependency >
18 < dependency >
19 < groupId > org.apache.poi </ groupId >
20 < artifactId > ooxml-schemas </ artifactId >
21 < version > 1.1 </ version >
22 </ dependency >
23 < dependency >
24 < groupId > org.apache.poi </ groupId >
25 < artifactId > poi-ooxml </ artifactId >
26 < version > 3.7 </ version >
27 </ dependency >
28 < dependency >
29 < groupId > dom4j </ groupId >
30 < artifactId > dom4j </ artifactId >
31 < version > 1.6.1 </ version >
32 </ dependency >
33 </ dependencies >