Java POI: Convert Excel to CSV by XSSFReader

1. 

  public String convertExcelToCsv(File uploadedFile, String reportType)
            throws InvalidFormatException, IOException, InterruptedException
    {
        logger.info(String
                .format("Start convertExcelToCsv, uploadedFile.name: [%s], reportTYpe: [%s]",
                        uploadedFile.getName(), reportType));

        String csvFileName = String.format("%s%s%s_%s.csv", tempCsvDir,
                File.separator, reportType, Calendar.getInstance()
                        .getTimeInMillis());

        BlockingQueue<List<String>> rowDataQueue = new ArrayBlockingQueue<List<String>>(
                10000);
        ExecutorService executorService = Executors.newFixedThreadPool(2);
        executorService.submit(new CustomizedXSSFReader(uploadedFile,
                rowDataQueue));
        executorService.submit(new CustomizedXSSFWriter(csvFileName,
                rowDataQueue));
        executorService.shutdown();

        while (!executorService.isTerminated())
        {
            logger.info("Main thread waits 1 more second for convertExcelToCsv");
            executorService.awaitTermination(1, TimeUnit.SECONDS);
        }

        logger.info(String.format(
                "Finished convertExcelToCsv, csvFileName: [%s]", csvFileName));
        return csvFileName;
    }

 

2. 

public class CustomizedXSSFReader implements Callable<Object>
{
    private static final Logger logger = Logger
            .getLogger(CustomizedXSSFReader.class);

    private File excelFile;
    private List<SheetDefination> targetSheetDefList;
    private BlockingQueue<List<String>> rowDataQueue;

    public CustomizedXSSFReader(File excelFile,
            BlockingQueue<List<String>> rowDataQueue)
    {
        super();
        this.excelFile = excelFile;
        this.rowDataQueue = rowDataQueue;
    }

    @Override
    public Object call() throws Exception
    {
        OPCPackage opcPackage = OPCPackage.open(excelFile);
        XSSFReader xssfReader = new XSSFReader(opcPackage);

        readWorkbookDef(xssfReader);
        readSheet(xssfReader);

        return null;
    }

    private void readWorkbookDef(XSSFReader xssfReader) throws IOException,
            InvalidFormatException, SAXException
    {
        InputStream workbookDefInputStream = xssfReader.getWorkbookData();
        XMLReader workbookDefXMLReader = XMLReaderFactory.createXMLReader();
        workbookDefXMLReader
                .setContentHandler(new CustomizedWorkbookDefHandler());
        workbookDefXMLReader.parse(new InputSource(workbookDefInputStream));
    }

    public void readSheet(XSSFReader xssfReader) throws Exception
    {
        logger.info(String.format(
                "Start readSheet, excelFileName: [%s], targetSheetRID: [%s]",
                excelFile.getName(), targetSheetDefList.get(0).getSheetRID()));

        SharedStringsTable sharedStringsTable = xssfReader
                .getSharedStringsTable();

        InputStream inputStream = xssfReader.getSheet(targetSheetDefList.get(0)
                .getSheetRID());
        XMLReader xmlReader = XMLReaderFactory.createXMLReader();
        xmlReader.setContentHandler(new CustomizedContentHandler(
                sharedStringsTable));

        InputSource inputSource = new InputSource(inputStream);
        xmlReader.parse(inputSource);
        inputStream.close();

        logger.info(String.format("Finished readSheet, excelFileName: [%s]",
                excelFile.getName()));
    }

    private static class SheetDefination
    {
        private String sheetName;
        private String sheetId;
        private String sheetRID;

        public SheetDefination(String sheetName, String sheetId, String sheetRID)
        {
            super();
            this.sheetName = sheetName;
            this.sheetId = sheetId;
            this.sheetRID = sheetRID;
        }

        @SuppressWarnings("unused")
        public String getSheetName()
        {
            return sheetName;
        }

        @SuppressWarnings("unused")
        public String getSheetId()
        {
            return sheetId;
        }

        public String getSheetRID()
        {
            return sheetRID;
        }

        @Override
        public String toString()
        {
            return "SheetDefination [sheetName=" + sheetName + ", sheetId="
                    + sheetId + ", sheetRID=" + sheetRID + "]";
        }

    }

    private class CustomizedWorkbookDefHandler extends DefaultHandler
    {
        private boolean isWorkbookStarted;
        private boolean isSheetsStarted;
        private boolean isSheetStarted;

        @Override
        public void startElement(String uri, String localName, String name,
                Attributes attributes) throws SAXException
        {
            if ("workbook".equals(name))
            {
                isWorkbookStarted = true;
            }
            else if ("sheets".equals(name))
            {
                isSheetsStarted = true;
                targetSheetDefList = new ArrayList<SheetDefination>();
            }
            else if ("sheet".equals(name))
            {
                isSheetStarted = true;
            }

            if (isWorkbookStarted && isSheetsStarted && isSheetStarted)
            {
                String sheetName = attributes.getValue("name");
                String sheetId = attributes.getValue("sheetId");
                String sheetRID = attributes.getValue("r:id");
                SheetDefination sheetDefination = new SheetDefination(
                        sheetName, sheetId, sheetRID);
                logger.info(String.format(
                        "Added sheetDefination: [%s] into targetSheetDefList.",
                        sheetDefination));
                targetSheetDefList.add(sheetDefination);
            }
        }

        @Override
        public void endElement(String uri, String localName, String name)
                throws SAXException
        {
            if ("workbook".equals(name))
            {
                logger.info(String
                        .format("Finished resolve workbookDef, targetSheetDefList: [%s]",
                                targetSheetDefList));
                isWorkbookStarted = false;
            }
            else if ("sheets".equals(name))
            {
                logger.info(String.format(
                        "Finished resolve sheetsDef, targetSheetDefList: [%s]",
                        targetSheetDefList));
                isSheetsStarted = false;
            }
            else if ("sheet".equals(name))
            {
                isSheetStarted = false;
            }
        }

    }

    private class CustomizedContentHandler extends DefaultHandler
    {
        private boolean isSheetDataStarted;
        private boolean isRowStarted;
        private boolean isColumnStarted;
        private boolean isValueStarted;
        private boolean valueShouldGetFromSharedStringTable;

        private SharedStringsTable sharedStringsTable;
        private StringBuilder cellValue;

        private int totalRowNum;
        private int totalColumnNum;

        private int currColumnNumCursor = 0;
        private int prevColumnNumCursor = -1;

        private int currRowNumCursor = 0;
        private int prevRowNumCursor = -1;

        private List<String> rowData;

        public CustomizedContentHandler(SharedStringsTable sharedStringsTable)
        {
            super();
            this.sharedStringsTable = sharedStringsTable;
        }

        private void initSheetInfo(String cellReferenceRegion)
        {
            logger.info(String.format(
                    "Start initSheetInfo, cellReferenceRegion: [%s]",
                    cellReferenceRegion));

            String[] cellReferences = StringUtils.split(cellReferenceRegion,
                    ':');
            if (2 != cellReferences.length)
            {
                totalColumnNum = 10;
                totalRowNum = 10;

                logger.warn(String
                        .format("cellReferenceRegion: [%s] is not reliable, thus we use default totalColumnNum: [%d], totalRowNum: [%d]",
                                cellReferenceRegion, totalColumnNum,
                                totalRowNum));
            }
            else
            {
                String startCellReference = cellReferences[0];
                String endCellReference = cellReferences[1];

                int startX = CellReferenceUtil
                        .getColIndexByCoordName(startCellReference);
                int startY = CellReferenceUtil
                        .getRowIndexByCoordName(startCellReference);

                int endX = CellReferenceUtil
                        .getColIndexByCoordName(endCellReference);
                int endY = CellReferenceUtil
                        .getRowIndexByCoordName(endCellReference);
                totalRowNum = endY - startY + 1;
                totalColumnNum = endX - startX + 1;

                logger.info(String
                        .format("Finished initSheetInfo, totalRowNum: [%d], totalColumnNum: [%d]",
                                totalRowNum, totalColumnNum));
            }
        }

        @Override
        public void startElement(String uri, String localName, String name,
                Attributes attributes) throws SAXException
        {
            if ("dimension".equals(name))
            {
                String cellReferenceRegion = attributes.getValue("ref");
                initSheetInfo(cellReferenceRegion);
            }
            else if ("sheetData".equals(name))
            {
                isSheetDataStarted = true;
            }
            else if ("row".equals(name))
            {
                isRowStarted = true;
                currRowNumCursor = Integer.parseInt(attributes.getValue("r")) - 1;
                rowData = new ArrayList<String>(totalColumnNum);
            }
            else if ("c".equals(name))
            {
                isColumnStarted = true;

                String cellIndex = attributes.getValue("r");
                currColumnNumCursor = CellReferenceUtil
                        .getColIndexByCoordName(cellIndex);

                String cellType = attributes.getValue("t");
                if ("s".equals(cellType))
                {
                    valueShouldGetFromSharedStringTable = true;
                }
                else
                {
                    valueShouldGetFromSharedStringTable = false;
                }
            }
            else if ("v".equals(name))
            {
                isValueStarted = true;
                cellValue = new StringBuilder();
            }
        }

        private void fillEmptyCellValuesIfNecessary()
        {
            if ((currColumnNumCursor - prevColumnNumCursor) > 1)
            {
                for (int i = 1; i < currColumnNumCursor - prevColumnNumCursor; i++)
                {
                    rowData.add("");
                }
            }
        }

        private void fillEmptyRowValuesIfNecessary()
        {
            if ((currRowNumCursor - prevRowNumCursor) > 1)
            {
                for (int i = 1; i < currRowNumCursor - prevRowNumCursor; i++)
                {
                    rowDataQueue.add(new ArrayList<String>(totalColumnNum));
                }
            }
        }

        @Override
        public void endElement(String uri, String localName, String name)
                throws SAXException
        {
            if ("sheetData".equals(name))
            {
                isSheetDataStarted = false;
                rowDataQueue.offer(new ArrayList<String>());
            }
            else if ("row".equals(name))
            {
                isRowStarted = false;

                fillEmptyRowValuesIfNecessary();
                rowDataQueue.offer(rowData);
                prevRowNumCursor = currRowNumCursor;
            }
            else if ("c".equals(name))
            {
                isColumnStarted = false;
            }
            else if ("v".equals(name))
            {
                if (isSheetDataStarted && isRowStarted && isColumnStarted
                        && isValueStarted)
                {
                    fillEmptyCellValuesIfNecessary();

                    if (valueShouldGetFromSharedStringTable)
                    {
                        valueShouldGetFromSharedStringTable = false;

                        int index = Integer.parseInt(cellValue.toString());
                        String cellValueStr = new XSSFRichTextString(
                                sharedStringsTable.getEntryAt(index))
                                .toString();
                        rowData.add(cellValueStr);
                    }
                    else
                    {
                        rowData.add(cellValue.toString());
                    }
                }
                else
                {
                    logger.error(String
                            .format("Error Excel->XML, isSheetDataStarted, all of isRowStarted, isColumnStarted, isValueStarted should  be true "));
                }
                prevColumnNumCursor = currColumnNumCursor;
                isValueStarted = false;
                cellValue = new StringBuilder();
            }
        }

        @Override
        public void characters(char[] ch, int start, int length)
                throws SAXException
        {
            if (isSheetDataStarted && isRowStarted && isColumnStarted
                    && isValueStarted)
            {
                cellValue.append(new String(ch, start, length));
            }
        }
    }

}

 

3. 

public class CustomizedXSSFWriter implements Callable<Object>
{
    private static final Logger logger = Logger
            .getLogger(CustomizedXSSFWriter.class);

    private String csvFileName;
    private BlockingQueue<List<String>> rowDataQueue;

    private int currentLineCount = 0;

    public CustomizedXSSFWriter(String csvFileName,
            BlockingQueue<List<String>> rowDataQueue)
    {
        super();
        this.csvFileName = csvFileName;
        this.rowDataQueue = rowDataQueue;
    }

    @Override
    public Object call() throws Exception
    {
        CSVWriter writer = new CSVWriter(new FileWriter(csvFileName));

        while (true)
        {
            List<String> rowData = rowDataQueue.take();

            if (rowData.isEmpty())
            {
                break;
            }
            writer.writeNext(rowData.toArray(new String[rowData.size()]));
            currentLineCount++;
        }

        writer.close();

        logger.info(String.format(
                "Finished write fileName: [%s]. totalLineCount: [%d]",
                csvFileName, currentLineCount));
        return null;
    }

}

 

4. 

    @Test
    public void convertExcelToCsvTest() throws Exception
    {
        File uploadedFile = new File(
                "src/test/resources/ccar-icg-14q/CCAR_ICG_14Q_MANUAL_ADJUSTMENT_SECURITIZED_PRODUCT_3791_RAPTOR-C-20140812_190656657_20140331.xlsx");
        String reportType = "RCAST_CCAR_ICG_14Q_ADJUSTMENT";
        ccarIcg14QFileUploadService.convertExcelToCsv(uploadedFile, reportType);
    }

 

 

 

Reference Links:

1) https://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/xssf/eventusermodel/examples/FromHowTo.java

 

你可能感兴趣的:(Java POI,SAX for POI,XSSFReader)