.Net 大数据量导出Excel方案

前言

当要导出的 DataTable 数据量很大时(比如行数几十万的情况下),NPOI 内存占用非常高,这里研究一下性能更好的excel导出方式

一、使用 closedXML

image.png

0、工具类-生成DataTable

using System;
using System.Collections.Generic;
using System.Data;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace ExcelExportTest
{
    public class GenerateDataTable
    {
        public DataTable GetNewTable(int rowCount=600000)
        {
            Stopwatch sw = new Stopwatch();
            sw.Start();
            var data = new DataTable("测试表格");
            data.Columns.Add("Library", typeof(string));
            data.Columns.Add("Name", typeof(string));
            data.Columns.Add("Description", typeof(string));
            data.Columns.Add("Type", typeof(string));
            data.Columns.Add("Definer", typeof(string));
            data.Columns.Add("Definer_Description", typeof(string));
            data.Columns.Add("Creation_Date", typeof(DateTime));
            data.Columns.Add("Days_Since_Creation", typeof(string));
            data.Columns.Add("Size", typeof(string));
            data.Columns.Add("Last_Used", typeof(DateTime));
            data.Columns.Add("Attribute", typeof(string));
            data.Columns.Add("Count_Of_Objects_Referenced", typeof(string));
            data.Columns.Add("Possibly_Referenced", typeof(string));

            for (var i = 0; i < rowCount; i++)
            {
                data.Rows.Add(
                    "xxxxxxxxx",
                    "xxxxxxxxx",
                    "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
                    "*USRSPC",
                    "xxxxxxxxx",
                    "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
                    DateTime.Now,
                    "789",
                    "16384",
                    DateTime.Now.AddDays(120),
                    "GRC",
                    "0",
                    "0"
                );
            }
            sw.Stop();
            Console.WriteLine($"Generate datable used [{sw.ElapsedMilliseconds}] ms");
            return data;
        }
    }
}

1、一次导入全部数据并保存

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using ClosedXML.Excel;

namespace ExcelExportTest
{
    public class ClosedXmlTest
    {
        public void DoTest_GenerateDirectly()
        {
            var gdt = new GenerateDataTable();
            var table = gdt.GetNewTable();
            Thread.Sleep(5000);

            Stopwatch sw = new Stopwatch();
            sw.Start();
            using (var workbook = new XLWorkbook())
            {
                var filePath = System.IO.Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), DateTime.Now.ToString("yyyy-MM-dd_HH-mm-ss")+ ".xlsx");
                var worksheet = workbook.Worksheets.Add(table, table.TableName);
                sw.Stop();
                Console.WriteLine($"Add workbook used {sw.ElapsedMilliseconds} ms");
                sw.Restart();
                workbook.SaveAs(filePath);
            }
            sw.Stop();
            Console.WriteLine($"Save used {sw.ElapsedMilliseconds} ms");
        }
    }
}
  • 耗时及内存占用


    image.png

    image.png

    内存占用 3000 MB左右,耗时 100 秒左右,生成的文件大小 23 MB


    image.png

2、每次导入8000笔数据,保存,再打开文件追加

        public void DoTest_GenerateThenAttach()
        {
            var gdt = new GenerateDataTable();
            var table = gdt.GetNewTable(1);

            Stopwatch sw = new Stopwatch();
            sw.Start();
            var filePath = System.IO.Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), DateTime.Now.ToString("yyyy-MM-dd_HH-mm-ss") + ".xlsx");
            int looRowCount = 8000;
            using (var workbook = new XLWorkbook())
            {
                workbook.Worksheets.Add(gdt.GetTableWithNRows(table, looRowCount), table.TableName);
                workbook.SaveAs(filePath);
            }

            int i = 2;
            int loop = 300000 / looRowCount;
            while (i <= loop)
            {
                using (var workbook = new XLWorkbook(filePath))
                {
                    IXLWorksheet Worksheet = workbook.Worksheet(table.TableName);
                    int NumberOfLastRow = Worksheet.LastRowUsed().RowNumber();
                    IXLCell CellForNewData = Worksheet.Cell(NumberOfLastRow + 1, 1);
                    CellForNewData.InsertTable(gdt.GetTableWithNRows(table, looRowCount));
                    if (i == loop)
                    {
                        Worksheet.Columns().AdjustToContents();
                    }
                    workbook.Save();
                    Console.WriteLine($"Loop {i} work done...");
                    i++;
                }

                GC.Collect();
                GC.WaitForPendingFinalizers();
                GC.Collect();
            }

            sw.Stop();
            Console.WriteLine($"Save all rows used {sw.ElapsedMilliseconds} ms");
        }

        public DataTable GetTableWithNRows(DataTable dataIn, int rowCount)
        {
            DataTable data = dataIn.Clone();
            for (var i = 0; i < rowCount; i++)
            {
                data.Rows.Add(
                    "xxxxxxxxx",
                    "xxxxxxxxx",
                    "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
                    "*USRSPC",
                    "xxxxxxxxx",
                    "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
                    DateTime.Now,
                    "789",
                    "16384",
                    DateTime.Now.AddDays(120),
                    "GRC",
                    "0",
                    "0"
                );
            }

            return data;
        }
  • 但是这个运行非常慢,耗时严重,而且越往后读取excel后内存占用还是会很大


    image.png

二、使用 OpenXML

image.png

1、使用高级API

        public void DoTest()
        {
            var gdt = new GenerateDataTable();
            var table = gdt.GetNewTable();

            Stopwatch sw = new Stopwatch();
            sw.Start();
            var filename = System.IO.Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), DateTime.Now.ToString("yyyy-MM-dd_HH-mm-ss") + ".xlsx");
            WriteExcelFile(filename, table);
            sw.Stop();
            Console.WriteLine($"Save used {sw.ElapsedMilliseconds} ms");
            GC.Collect();
            GC.WaitForPendingFinalizers();
            GC.Collect();
        }

        private static void WriteExcelFile(string filename, DataTable table)
        {
            using (SpreadsheetDocument document = SpreadsheetDocument.Create(filename, SpreadsheetDocumentType.Workbook))
            {
                WorkbookPart workbookPart = document.AddWorkbookPart();
                workbookPart.Workbook = new Workbook();

                WorksheetPart worksheetPart = workbookPart.AddNewPart();
                var sheetData = new SheetData();
                worksheetPart.Worksheet = new Worksheet(sheetData);

                Sheets sheets = workbookPart.Workbook.AppendChild(new Sheets());
                Sheet sheet = new Sheet() { Id = workbookPart.GetIdOfPart(worksheetPart), SheetId = 1, Name = table.TableName };

                sheets.Append(sheet);

                Row headerRow = new Row();

                List columns = new List();
                foreach (System.Data.DataColumn column in table.Columns)
                {
                    columns.Add(column.ColumnName);

                    Cell cell = new Cell();
                    cell.DataType = CellValues.String;
                    cell.CellValue = new CellValue(column.ColumnName);
                    headerRow.AppendChild(cell);
                }

                sheetData.AppendChild(headerRow);

                foreach (DataRow dsrow in table.Rows)
                {
                    Row newRow = new Row();
                    foreach (String col in columns)
                    {
                        Cell cell = new Cell();
                        cell.DataType = CellValues.String;
                        cell.CellValue = new CellValue(dsrow[col].ToString());
                        newRow.AppendChild(cell);
                    }

                    sheetData.AppendChild(newRow);
                }

                workbookPart.Workbook.Save();
            }
        }
  • 内存占用峰值情况【1300MB】


    image.png
  • 耗时【22秒】


    image.png
  • 生成文件大小【2.33MB】


    image.png

2、使用OpenXmlWriter

参考文章

  • Writing Large Excel Files with the Open XML SDK
  • Export big amount of data from XLSX - OutOfMemoryException
  • How to properly use OpenXmlWriter to write large Excel files
  • Working with the shared string table (Open XML SDK)
        public void DoTest2()
        {
            var gdt = new GenerateDataTable();
            var table = gdt.GetNewTable();

            Stopwatch sw = new Stopwatch();
            sw.Start();
            var filename = System.IO.Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), DateTime.Now.ToString("yyyy-MM-dd_HH-mm-ss") + ".xlsx");
            LargeExport(filename, table);
            sw.Stop();
            Console.WriteLine($"Save used {sw.ElapsedMilliseconds} ms");
            GC.Collect();
            GC.WaitForPendingFinalizers();
            GC.Collect();
        }
        public static void LargeExport(string filename, DataTable table)
        {
            using (SpreadsheetDocument document = SpreadsheetDocument.Create(filename, SpreadsheetDocumentType.Workbook))
            {
                //this list of attributes will be used when writing a start element
                List attributes;
                OpenXmlWriter writer;

                document.AddWorkbookPart();
                WorksheetPart workSheetPart = document.WorkbookPart.AddNewPart();

                writer = OpenXmlWriter.Create(workSheetPart);
                writer.WriteStartElement(new Worksheet());
                writer.WriteStartElement(new SheetData());

                // 表头列信息
                //create a new list of attributes
                attributes = new List();
                // add the row index attribute to the list
                attributes.Add(new OpenXmlAttribute("r", null, "1"));
                //write the row start element with the row index attribute
                writer.WriteStartElement(new Row(), attributes);
                for (int columnNum = 0; columnNum < table.Columns.Count; ++columnNum)
                {
                    //reset the list of attributes
                    attributes = new List();
                    // add data type attribute - in this case inline string (you might want to look at the shared strings table)
                    attributes.Add(new OpenXmlAttribute("t", null, "str"));
                    //add the cell reference attribute
                    attributes.Add(new OpenXmlAttribute("r", "", $"{GetColumnName(columnNum+1)}1"));

                    //write the cell start element with the type and reference attributes
                    writer.WriteStartElement(new Cell(), attributes);
                    //write the cell value
                    writer.WriteElement(new CellValue(table.Columns[columnNum].ColumnName));

                    // write the end cell element
                    writer.WriteEndElement();
                }
                // write the end row element
                writer.WriteEndElement();

                for (int rowNum = 1; rowNum <= table.Rows.Count; ++rowNum)
                {
                    int docRowNum = rowNum + 1;
                    //create a new list of attributes
                    attributes = new List();
                    // add the row index attribute to the list
                    attributes.Add(new OpenXmlAttribute("r", null, docRowNum.ToString()));

                    //write the row start element with the row index attribute
                    writer.WriteStartElement(new Row(), attributes);

                    for (int columnNum = 1; columnNum <= table.Columns.Count; ++columnNum)
                    {
                        //reset the list of attributes
                        attributes = new List();
                        // add data type attribute - in this case inline string (you might want to look at the shared strings table)
                        attributes.Add(new OpenXmlAttribute("t", null, "str"));
                        //add the cell reference attribute
                        attributes.Add(new OpenXmlAttribute("r", "", $"{GetColumnName(columnNum)}{docRowNum}"));

                        //write the cell start element with the type and reference attributes
                        writer.WriteStartElement(new Cell(), attributes);
                        var cellValue = table.Rows[rowNum - 1][columnNum - 1];
                        string cellStr = cellValue == null ? "" : (cellValue is DateTime?((DateTime)cellValue).ToString("yyyy-MM-dd HH:mm:ss.fff") : cellValue.ToString());
                        //write the cell value
                        writer.WriteElement(new CellValue(cellStr));

                        // write the end cell element
                        writer.WriteEndElement();
                    }

                    // write the end row element
                    writer.WriteEndElement();
                }

                // write the end SheetData element
                writer.WriteEndElement();
                // write the end Worksheet element
                writer.WriteEndElement();
                writer.Close();

                writer = OpenXmlWriter.Create(document.WorkbookPart);
                writer.WriteStartElement(new Workbook());
                writer.WriteStartElement(new Sheets());

                writer.WriteElement(new Sheet()
                {
                    Name = table.TableName,
                    SheetId = 1,
                    Id = document.WorkbookPart.GetIdOfPart(workSheetPart)
                });

                // End Sheets
                writer.WriteEndElement();
                // End Workbook
                writer.WriteEndElement();

                writer.Close();

                document.Close();
            }
        }

        //A simple helper to get the column name from the column index. This is not well tested!
        private static string GetColumnName(int columnIndex)
        {
            int dividend = columnIndex;
            string columnName = String.Empty;
            int modifier;

            while (dividend > 0)
            {
                modifier = (dividend - 1) % 26;
                columnName = Convert.ToChar(65 + modifier).ToString() + columnName;
                dividend = (int)((dividend - modifier) / 26);
            }

            return columnName;
        }
  • 内存占用【不超过 190MB】


    image.png
  • 耗时【17 秒】


    image.png
  • 生成的文件大小【26 MB】


    image.png

将数据量增大到100万

内存占用不超过【260 MB】,耗时【30秒】,生成文件大小【48.1MB】


image.png

image.png

三、使用 npoi

        public void DoTest()
        {
            var gdt = new GenerateDataTable();
            var table = gdt.GetNewTable();

            Stopwatch sw = new Stopwatch();
            sw.Start();
            var filename = System.IO.Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), DateTime.Now.ToString("yyyy-MM-dd_HH-mm-ss") + ".xlsx");
            ExportDataTableToExcel(table, filename);
            sw.Stop();
            Console.WriteLine($"Save used {sw.ElapsedMilliseconds} ms");
            GC.Collect();
            GC.WaitForPendingFinalizers();
            GC.Collect();
        }

        public static Tuple ExportDataTableToExcel(DataTable dt, string saveTopath)
        {
            bool result = false;
            string message = "";
            IWorkbook workbook = null;
            FileStream fs = null;
            IRow row = null;
            ISheet sheet = null;
            ICell cell = null;
            try
            {
                if (dt != null && dt.Rows.Count > 0)
                {
                    if (saveTopath.IndexOf(".xlsx", StringComparison.Ordinal) > 0) // 2007版本
                        workbook = new XSSFWorkbook();
                    else //if (saveTopath.IndexOf(".xls", StringComparison.Ordinal) > 0) // 2003版本
                        workbook = new HSSFWorkbook();
                    sheet = workbook.CreateSheet(dt.TableName);
                    int rowCount = dt.Rows.Count;//行数  
                    int columnCount = dt.Columns.Count;//列数  

                    //设置列头  
                    row = sheet.CreateRow(0);//excel第一行设为列头  
                    for (int c = 0; c < columnCount; c++)
                    {
                        cell = row.CreateCell(c);
                        cell.SetCellValue(dt.Columns[c].ColumnName);
                    }

                    //设置每行每列的单元格,  
                    for (int i = 0; i < rowCount; i++)
                    {
                        row = sheet.CreateRow(i + 1);
                        for (int j = 0; j < columnCount; j++)
                        {
                            cell = row.CreateCell(j);//excel第二行开始写入数据  
                            cell.SetCellValue(dt.Rows[i][j].ToString());
                        }
                    }
                    using (fs = File.OpenWrite(saveTopath))
                    {
                        workbook.Write(fs);//向打开的这个xls文件中写入数据  
                        result = true;
                    }
                }
                else
                {
                    message = "没有解析到数据!";
                }
                return new Tuple(result, message);
            }
            catch (Exception ex)
            {
                if (fs != null)
                {
                    fs.Close();
                }
                return new Tuple(false, ex.Message);
            }
        }
  • 内存占用【2600MB】


    image.png
  • 耗时【50 秒】

  • 生成的文件大小【29 MB】


    image.png

四、结论

显然,在只考虑生成文件的情况下,应该选择使用OpenXmlWriter的方式

代码

GitHub Page-LargeDataExportExcel

你可能感兴趣的:(.Net 大数据量导出Excel方案)