[C#]_[使用微软OpenXmlSDK (OpenXmlReader)读取xlsx表格] 读取大数据量100万条数据Excel文件解决方案...

 

1.OpenXmlSDK是个很好的类库,可惜只能通过C#调用,C#的童鞋又福气了。

2.服务端程序由于没法安装office,所以这个对asp.net网站来说是最理想的库了。需要.net 4.0版本以上.

3.以流形式,sax模型读取大文件。

 

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;

namespace ConsoleApplication1
{
    class ProductObject
    {

        public String xinghao;//型号
        public String changjia;//厂家
        public String pihao;//批号
        public String fengzhuang;//封装
        public String shuliang;//数量

        public void init() 
        {
            //初始化成员变量
        }
    }

    class Program
    {
        static void Main(string[] args)
        {
            String fileName = @"E:\software\TestData\xlsx\test.xlsx";
            Program pro = new Program();
            pro.ReadAllCellValues(fileName);
        }

        void CallSqlInsert(ProductObject po)
        {
            //call bl interface method to insert data to database.
            //注意处理空值的情况
            Console.Out.Write("{0}:{1}:{2}:{3}:{4} ", po.xinghao, po.changjia, po.pihao, po.fengzhuang, po.shuliang);
            Console.Out.WriteLine();
        }

        String GetCellValue(WorkbookPart workbookPart,Cell c) 
        {
            string cellValue;
            if (c.DataType != null && c.DataType == CellValues.SharedString)
            {
                SharedStringItem ssi = workbookPart.SharedStringTablePart.SharedStringTable.Elements().ElementAt(int.Parse(c.CellValue.InnerText));
                cellValue = ssi.Text.Text;
            }
            else
            {
                cellValue = c.CellValue.InnerText;
            }
            return cellValue;
        }

        Boolean StoreProductObject(OpenXmlReader reader, WorkbookPart workbookPart, ProductObject po) 
        {
            reader.ReadFirstChild();
            if (reader.ElementType != typeof(Cell))
            {
                return false;
            }

            Cell c = (Cell)reader.LoadCurrentElement();
            po.xinghao = GetCellValue(workbookPart,c);

            reader.ReadNextSibling();
            c = (Cell)reader.LoadCurrentElement();
            po.changjia = GetCellValue(workbookPart,c);

            reader.ReadNextSibling();
            c = (Cell)reader.LoadCurrentElement();
            po.pihao = GetCellValue(workbookPart,c);

            reader.ReadNextSibling();
            c = (Cell)reader.LoadCurrentElement();
            po.fengzhuang = GetCellValue(workbookPart,c);

            reader.ReadNextSibling();
            c = (Cell)reader.LoadCurrentElement();
            po.shuliang = GetCellValue(workbookPart,c);
            return true;
        }

        //这种SAX的读取方式很高效,至于是读一行提交一行好还是读100行再提交100行好自己决定.
        //这种SAX的方式对读取超大xlsx文件不存在内存占用过大和慢的问题.
        void ReadAllCellValues(string fileName)
        {
            using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(fileName, false))
            {
                WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart;
                ProductObject po = new ProductObject();

                foreach (WorksheetPart worksheetPart in workbookPart.WorksheetParts)
                {
                    OpenXmlReader reader = OpenXmlReader.Create(worksheetPart);
                    while (reader.Read())
                    {
                        if (reader.ElementType == typeof(Worksheet))
                        {
                            if (reader.ReadFirstChild())
                            {
                                SheetProperties properties = (SheetProperties)reader.LoadCurrentElement();
                                //只处理名字是Sheet1的,其他不处理
                                if (properties.CodeName != "Sheet1")
                                {
                                    break;
                                }
                            }
                        }
                        if (reader.ElementType == typeof(Row))
                        {
                            //先调用一个成员函数初始化init,自己写.
                            po.init();
                            if (StoreProductObject(reader, workbookPart, po)) 
                            {
                                CallSqlInsert(po);
                            }
                            
                        }
                    }
                }
            }
        }

    }
}

 

你可能感兴趣的:(c#,大数据,python)