上篇地址:C#_Execl导入导出
之前写了一个单个文件的Execl导入导出,能满足一般Execl数据操作,后面有个需求是这样的。有三十三万的数据量,甚至更多,并且分别存在不同的Execl文件中。每个文件500条数据的样子。想要集中批量处理。如果还是之前那个方式,那多不实际。
话不多说,上代码。
程序截图:
上图为左边为上篇写的Execl单文件处理,右边是新增的多Execl文件处理。
详细可以看看上篇_C#_Execl导入导出
public static DataTable MuchExcelToDataTable(List filePath, bool isColumnName)
{
DataTable dataTable = null;//零时存取一个Execl文件的数据
DataTable lastdata = null;//最后返回的Datatable
FileStream fs = null;
DataColumn column = null;
DataRow dataRow = null;
IWorkbook workbook = null;
ISheet sheet = null;
IRow row = null;
ICell cell = null;
int startRow = 0;
bool isFisrtTime = true;
try
{
if (filePath.Count > 0)
{
foreach (var item in filePath)
{
using (fs = File.OpenRead(item))
{
// 2007版本
if (item.IndexOf(".xlsx") > 0)
workbook = new XSSFWorkbook(fs);
// 2003版本
else if (item.IndexOf(".xls") > 0)
workbook = new HSSFWorkbook(fs);
if (workbook != null)
{
sheet = workbook.GetSheetAt(0);//读取第一个sheet,当然也可以循环读取每个sheet
//需要新建一个DataTable来将每一次读取的都合并存起来,然后这个dataTable只存每一个的
if (dataTable != null)
{
if (isFisrtTime)
{
lastdata = dataTable.Copy();
isFisrtTime = false;
}
for (int i = 0; i < dataTable.Rows.Count; i++)
{
lastdata.Rows.Add(dataTable.Rows[i].ItemArray);
int count = lastdata.Rows.Count;
}
}
dataTable = new DataTable();//每次在这里重新实例化Datatable会导致数据被覆盖抹掉
if (sheet != null)
{
int rowCount = sheet.LastRowNum;//总行数
if (rowCount > 0)
{
IRow firstRow = sheet.GetRow(0);//第一行
int cellCount = firstRow.LastCellNum;//列数
//构建datatable的列
if (isColumnName)
{
startRow = 1;//如果第一行是列名,则从第二行开始读取
for (int i = firstRow.FirstCellNum; i < cellCount; ++i)
{
cell = firstRow.GetCell(i);
if (cell != null)
{
if (cell.StringCellValue != null)
{
column = new DataColumn(cell.StringCellValue);
dataTable.Columns.Add(column);
}
}
}
}
else
{
for (int i = firstRow.FirstCellNum; i < cellCount; ++i)
{
column = new DataColumn("column" + (i + 1));
dataTable.Columns.Add(column);
}
}
//填充行
for (int i = startRow; i <= rowCount; ++i)
{
row = sheet.GetRow(i);
if (row == null) continue;
dataRow = dataTable.NewRow();
for (int j = row.FirstCellNum; j < cellCount; ++j)
{
cell = row.GetCell(j);
if (cell == null)
{
dataRow[j] = "";
}
else
{
//CellType(Unknown = -1,Numeric = 0,String = 1,Formula = 2,Blank = 3,Boolean = 4,Error = 5,)
switch (cell.CellType)
{
case CellType.Blank:
dataRow[j] = "";
break;
case CellType.Numeric:
short format = cell.CellStyle.DataFormat;
//对时间格式(2015.12.5、2015/12/5、2015-12-5等)的处理
if (format == 14 || format == 31 || format == 57 || format == 58)
dataRow[j] = cell.DateCellValue;
else
dataRow[j] = cell.NumericCellValue;
break;
case CellType.String:
dataRow[j] = cell.StringCellValue;
break;
}
}
}
dataTable.Rows.Add(dataRow);
}
}
}
}
}
}
}
return lastdata;
}
catch (Exception)
{
if (fs != null)
{
fs.Close();
}
return null;
}
}
不同之处,请仔细对比吧!
public static DataTable MuchExcelToDataTable(List filePath, bool isColumnName)
{
DataTable dataTable = null;//零时存取一个Execl文件的数据
DataTable lastdata = null;//最后返回的Datatable
FileStream fs = null;
DataColumn column = null;
DataRow dataRow = null;
IWorkbook workbook = null;
ISheet sheet = null;
IRow row = null;
ICell cell = null;
int startRow = 0;
bool isFisrtTime = true;
try
{
if (filePath.Count > 0)
{
foreach (var item in filePath)
{
using (fs = File.OpenRead(item))
{
// 2007版本
if (item.IndexOf(".xlsx") > 0)
workbook = new XSSFWorkbook(fs);
// 2003版本
else if (item.IndexOf(".xls") > 0)
workbook = new HSSFWorkbook(fs);
if (workbook != null)
{
sheet = workbook.GetSheetAt(0);//读取第一个sheet,当然也可以循环读取每个sheet
//需要新建一个DataTable来将每一次读取的都合并存起来,然后这个dataTable只存每一个的
dataTable = new DataTable();//每次在这里重新实例化Datatable会导致数据被覆盖抹掉
if (sheet != null)
{
int rowCount = sheet.LastRowNum;//总行数
if (rowCount > 0)
{
IRow firstRow = sheet.GetRow(0);//第一行
int cellCount = firstRow.LastCellNum;//列数
//构建datatable的列
if (isColumnName)
{
startRow = 1;//如果第一行是列名,则从第二行开始读取
for (int i = firstRow.FirstCellNum; i < cellCount; ++i)
{
cell = firstRow.GetCell(i);
if (cell != null)
{
if (cell.StringCellValue != null)
{
column = new DataColumn(cell.StringCellValue);
dataTable.Columns.Add(column);
}
}
}
}
else
{
for (int i = firstRow.FirstCellNum; i < cellCount; ++i)
{
column = new DataColumn("column" + (i + 1));
dataTable.Columns.Add(column);
}
}
//填充行
for (int i = startRow; i <= rowCount; ++i)
{
row = sheet.GetRow(i);
if (row == null) continue;
dataRow = dataTable.NewRow();
for (int j = row.FirstCellNum; j < cellCount; ++j)
{
cell = row.GetCell(j);
if (cell == null)
{
dataRow[j] = "";
}
else
{
//CellType(Unknown = -1,Numeric = 0,String = 1,Formula = 2,Blank = 3,Boolean = 4,Error = 5,)
switch (cell.CellType)
{
case CellType.Blank:
dataRow[j] = "";
break;
case CellType.Numeric:
short format = cell.CellStyle.DataFormat;
//对时间格式(2015.12.5、2015/12/5、2015-12-5等)的处理
if (format == 14 || format == 31 || format == 57 || format == 58)
dataRow[j] = cell.DateCellValue;
else
dataRow[j] = cell.NumericCellValue;
break;
case CellType.String:
dataRow[j] = cell.StringCellValue;
break;
}
}
}
dataTable.Rows.Add(dataRow);
}
}
}
if (dataTable != null)
{
//第一次的情况下,需要将整个表复制给lastdata表中,同时保存表结构
if (isFisrtTime)
{
lastdata = dataTable.Copy();
isFisrtTime = false;
}
//第二次则只需要将新表中的数据循环添加到表中即可
else
{
for (int i = 0; i < dataTable.Rows.Count; i++)
{
lastdata.Rows.Add(dataTable.Rows[i].ItemArray);
int count = lastdata.Rows.Count;
}
}
}
}
}
}
}
return lastdata;
}
catch (Exception)
{
if (fs != null)
{
fs.Close();
}
return null;
}
}
}
下面贴上,按钮事件,添加一个Listbox控件
///
/// 批量读取
///
///
///
private void btn_muchread_Click(object sender, EventArgs e)
{
if (listpath.Items.Count > 0)
{
List lp = new List();
for (int i = 0; i <= listpath.Items.Count - 1; i++)
{
lp.Add(listpath.Items[i].ToString());
}
stopwatch.Start();
DataTable dt = MuchExeclToRead.MuchExcelToDataTable(lp, true) as DataTable;
dgv_ExeclData.DataSource = dt;
stopwatch.Stop();
label6.Text = stopwatch.ElapsedMilliseconds + "ms";
label4.Text = dt.Rows.Count.ToString();
}
else
{
MessageBox.Show("未选择文件夹,请选择后重试");
return;
}
}
操作,调用都有了。希望能对你有帮助。欢迎指正,谢谢!
下面为我自己的亲测;
一次读取661个文件,数据量330000条;
时间在三四秒的样子。效率也算凑合;还可以优化;
好了,整篇内容就这些了;拜拜!