如题,百万级数据量csv入库
思路
读取csv文件转成DataTable,分批次步长1W批量入库,其中csv单元格内逗号做特殊处理防止串列
实现
using Dapper;
using System;
using System.Collections.Generic;
using System.Data;
using System.Data.SqlClient;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace CSVToSql
{
class Program
{
static void Main(string[] args)
{
IDbConnection connection = ConnectionFactory.ASConnection;
int count = 0;
string readerPath = @"C:\Users\kbjh372\Desktop\CRM数据表结构\medical_examples\test\PROFESSION_20210604131046_001.csv";
string writerPath = @"C:\Users\kbjh372\Desktop\CRM数据表结构\medical_examples\test\PROFESSION_20210604131046_write.csv";
//if (File.Exists(writerPath))
//{
// File.Delete(writerPath);
//}
//using (StreamReader reader = new StreamReader(readerPath, Encoding.UTF8))
//{
// while (!reader.EndOfStream)
// {
// string line = reader.ReadLine();
// using (StreamWriter writer = new StreamWriter(writerPath, true, Encoding.Default))
// {
// writer.WriteLine(line);
// }
// count++;
// Console.WriteLine("正在转码第{0}行,请稍等", count);
// }
//}
//Console.WriteLine("转码完成,共转码{0}条数据", count);
Console.WriteLine("开始导入数据,请稍等");
/*
//开始向数据库导入数据
string sql = "BULK INSERT dbo.CRM_PROFESSION FROM 'C:\\Users\\kbjh372\\Desktop\\CRM数据表结构\\medical_examples\\test\\PROFESSION_20210604131046_write.csv' WITH(FIELDTERMINATOR=',',BATCHSIZE=100000,FIRSTROW=2)";
try
{
//DbHelper.ExecuteSql(sql);//使用的是方法ExecuteNonQuery()方法,具体的Helper就不上传了。
connection.Execute(sql);
}
catch (Exception ex)
{
using (StreamWriter writerLog = new StreamWriter(@"C:\Users\kbjh372\Desktop\CRM数据表结构\medical_examples\test\Log\Log.txt"))
{
writerLog.WriteLine(ex.ToString());
}
}
*/
var dt = CSVFileHelper.OpenCSV(writerPath);
List<DataTable> dts = CSVFileHelper.SplitDataTable(dt, 10000);
//声明一个事务对象
SqlTransaction tran = null;//声明一个事务对象
try
{
using (SqlConnection conn = new SqlConnection("server=.;uid=sa;pwd=..;database=..;"))
{
conn.Open();//打开链接
for (int i = 0; i < dts.Count; i++)
{
using (tran = conn.BeginTransaction())
{
using (SqlBulkCopy copy = new SqlBulkCopy(conn, SqlBulkCopyOptions.Default, tran))
{
copy.DestinationTableName = "CRM_PROFESSION"; //指定服务器上目标表的名称
copy.WriteToServer(dts[i]); //执行把DataTable中的数据写入DB
tran.Commit(); //提交事务
}
}
}
}
}
catch (Exception ex)
{
if (null != tran)
tran.Rollback();
//LogHelper.Add(ex);
}
/*
for (int i = 0; i < dt.Rows.Count; i++)
{
var CODE = dt.Rows[i]["CODE"];
var NAME = dt.Rows[i]["NAME"];
var TYPE = dt.Rows[i]["TYPE"];
var GENDER = dt.Rows[i]["GENDER"];
var MOBILE = dt.Rows[i]["MOBILE"];
var EMAIL = dt.Rows[i]["EMAIL"];
var INSTITUTION_CODE = dt.Rows[i]["INSTITUTION_CODE"];
var INSTITUTION_NAME = dt.Rows[i]["INSTITUTION_NAME"];
var DEPARTMENT_CODE = dt.Rows[i]["DEPARTMENT_CODE"];
var DEPARTMENT_NAME = dt.Rows[i]["DEPARTMENT_NAME"];
var TITLE_CODE = dt.Rows[i]["TITLE_CODE"];
var TITLE_NAME = dt.Rows[i]["TITLE_NAME"];
var POSITION_CODE = dt.Rows[i]["POSITION_CODE"];
var POSITION_NAME = dt.Rows[i]["POSITION_NAME"];
var QUALIFICATION = dt.Rows[i]["QUALIFICATION"];
var MODIFIED_DATE = dt.Rows[i]["MODIFIED_DATE"];
string sql = @"insert into [CRM_PROFESSION] values (@CODE,@NAME,@TYPE,@GENDER,@MOBILE,@EMAIL,@INSTITUTION_CODE,
@INSTITUTION_NAME,@DEPARTMENT_CODE,@DEPARTMENT_NAME,@TITLE_CODE,
@TITLE_NAME,@POSITION_CODE,@POSITION_NAME,@QUALIFICATION,@MODIFIED_DATE)";
connection.Execute(sql, new
{
CODE = CODE.ToString().Trim('"'),
NAME = NAME.ToString().Trim('"'),
TYPE = TYPE.ToString().Trim('"'),
GENDER = GENDER.ToString().Trim('"'),
MOBILE = MOBILE.ToString().Trim('"'),
EMAIL = EMAIL.ToString().Trim('"'),
INSTITUTION_CODE = INSTITUTION_CODE.ToString().Trim('"'),
INSTITUTION_NAME = INSTITUTION_NAME.ToString().Trim('"'),
DEPARTMENT_CODE = DEPARTMENT_CODE.ToString().Trim('"'),
DEPARTMENT_NAME = DEPARTMENT_NAME.ToString().Trim('"'),
TITLE_CODE = TITLE_CODE.ToString().Trim('"'),
TITLE_NAME = TITLE_NAME.ToString().Trim('"'),
POSITION_CODE = POSITION_CODE.ToString().Trim('"'),
POSITION_NAME = POSITION_NAME.ToString().Trim('"'),
QUALIFICATION = QUALIFICATION.ToString().Trim('"'),
MODIFIED_DATE = Convert.ToDateTime(MODIFIED_DATE.ToString().Trim('"')),
});
}
*/
Console.WriteLine("数据导入完毕");
Console.ReadKey();
}
}
}
工具类
using System;
using System.Collections.Generic;
using System.Data;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace CSVToSql
{
public class CSVFileHelper
{
///
/// 将CSV文件的数据读取到DataTable中
///
/// CSV文件路径
/// 返回读取了CSV数据的DataTable
public static DataTable OpenCSV(string filePath)
{
Encoding encoding = GetType(filePath); //Encoding.ASCII;//
DataTable dt = new DataTable();
//加一个ID列名
DataColumn dc0 = new DataColumn("ID");
dt.Columns.Add(dc0);
FileStream fs = new FileStream(filePath, System.IO.FileMode.Open, System.IO.FileAccess.Read);
//StreamReader sr = new StreamReader(fs, Encoding.UTF8);
StreamReader sr = new StreamReader(fs, encoding);
//string fileContent = sr.ReadToEnd();
//encoding = sr.CurrentEncoding;
//记录每次读取的一行记录
string strLine = "";
//记录每行记录中的各字段内容
string[] aryLine = null;
string[] tableHead = null;
//标示列数
int columnCount = 0;
//标示是否是读取的第一行
bool IsFirst = true;
//逐行读取CSV中的数据
while ((strLine = sr.ReadLine()) != null)
{
//strLine = Common.ConvertStringUTF8(strLine, encoding);
//strLine = Common.ConvertStringUTF8(strLine);
if (IsFirst == true)
{
tableHead = strLine.Split(',');
IsFirst = false;
columnCount = tableHead.Length;
//创建列
for (int i = 0; i < columnCount; i++)
{
DataColumn dc = new DataColumn(tableHead[i]);
dt.Columns.Add(dc);
}
}
else
{
//aryLine = strLine.Split(',');
aryLine = CSVstrToArry(strLine);
DataRow dr = dt.NewRow();
for (int j = 0; j < columnCount; j++)
{
//dr[j] = aryLine[j];
dr[j + 1] = aryLine[j].Trim('"'); //串一列ID出来
}
dt.Rows.Add(dr);
}
}
if (aryLine != null && aryLine.Length > 0)
{
dt.DefaultView.Sort = tableHead[0] + " " + "asc";
}
sr.Close();
fs.Close();
return dt;
}
/// 给定文件的路径,读取文件的二进制数据,判断文件的编码类型
/// 文件路径
/// 文件的编码类型
public static System.Text.Encoding GetType(string FILE_NAME)
{
System.IO.FileStream fs = new System.IO.FileStream(FILE_NAME, System.IO.FileMode.Open,
System.IO.FileAccess.Read);
System.Text.Encoding r = GetType(fs);
fs.Close();
return r;
}
/// 通过给定的文件流,判断文件的编码类型
/// 文件流
/// 文件的编码类型
public static System.Text.Encoding GetType(System.IO.FileStream fs)
{
byte[] Unicode = new byte[] { 0xFF, 0xFE, 0x41 };
byte[] UnicodeBIG = new byte[] { 0xFE, 0xFF, 0x00 };
byte[] UTF8 = new byte[] { 0xEF, 0xBB, 0xBF }; //带BOM
System.Text.Encoding reVal = System.Text.Encoding.Default;
System.IO.BinaryReader r = new System.IO.BinaryReader(fs, System.Text.Encoding.Default);
int i;
int.TryParse(fs.Length.ToString(), out i);
byte[] ss = r.ReadBytes(i);
if (IsUTF8Bytes(ss) || (ss[0] == 0xEF && ss[1] == 0xBB && ss[2] == 0xBF))
{
reVal = System.Text.Encoding.UTF8;
}
else if (ss[0] == 0xFE && ss[1] == 0xFF && ss[2] == 0x00)
{
reVal = System.Text.Encoding.BigEndianUnicode;
}
else if (ss[0] == 0xFF && ss[1] == 0xFE && ss[2] == 0x41)
{
reVal = System.Text.Encoding.Unicode;
}
r.Close();
return reVal;
}
/// 判断是否是不带 BOM 的 UTF8 格式
///
///
private static bool IsUTF8Bytes(byte[] data)
{
int charByteCounter = 1; //计算当前正分析的字符应还有的字节数
byte curByte; //当前分析的字节.
for (int i = 0; i < data.Length; i++)
{
curByte = data[i];
if (charByteCounter == 1)
{
if (curByte >= 0x80)
{
//判断当前
while (((curByte <<= 1) & 0x80) != 0)
{
charByteCounter++;
}
//标记位首位若为非0 则至少以2个1开始 如:110XXXXX...........1111110X
if (charByteCounter == 1 || charByteCounter > 6)
{
return false;
}
}
}
else
{
//若是UTF-8 此时第一位必须为1
if ((curByte & 0xC0) != 0x80)
{
return false;
}
charByteCounter--;
}
}
if (charByteCounter > 1)
{
throw new Exception("非预期的byte格式");
}
return true;
}
///
/// 分解数据表
///
/// 需要分解的表
/// 每个表包含的数据量
///
public static List<DataTable> SplitDataTable(DataTable originalTab, int rowsNum)
{
//获取所需创建的表数量
int tableNum = originalTab.Rows.Count / rowsNum;
//获取数据余数
int remainder = originalTab.Rows.Count % rowsNum;
List<DataTable> ds = new List<DataTable>();
//如果只需要创建1个表,直接将原始表存入DataSet
if (tableNum == 0)
{
ds.Add(originalTab);
}
else
{
DataTable[] tableSlice = new DataTable[tableNum];
for (int c = 0; c < tableNum; c++)
{
tableSlice[c] = new DataTable();
foreach (DataColumn dc in originalTab.Columns)
{
tableSlice[c].Columns.Add(dc.ColumnName, dc.DataType);
}
}
for (int i = 0; i < tableNum; i++)
{
if (i != tableNum - 1)
{
for (int j = i * rowsNum; j < ((i + 1) * rowsNum); j++)
{
tableSlice[i].ImportRow(originalTab.Rows[j]);
}
}
else
{
for (int k = i * rowsNum; k < ((i + 1) * rowsNum + remainder); k++)
{
tableSlice[i].ImportRow(originalTab.Rows[k]);
}
}
}
foreach (DataTable dt in tableSlice)
{
ds.Add(dt);
}
}
return ds;
}
///
/// 跳过引号中的逗号,进行逗号分隔(字段内容中的逗号不参与分隔)
///
///
///
public static string[] CSVstrToArry(string splitStr)
{
var newstr = string.Empty;
List<string> sList = new List<string>();
bool isSplice = false;
string[] array = splitStr.Split(new char[] { ',' });
foreach (var str in array)
{
if (!string.IsNullOrEmpty(str) && str.IndexOf('"') > -1)
{
var firstchar = str.Substring(0, 1);
var lastchar = string.Empty;
if (str.Length > 0)
{
lastchar = str.Substring(str.Length - 1, 1);
}
if (firstchar.Equals("\"") && !lastchar.Equals("\""))
{
isSplice = true;
}
if (lastchar.Equals("\""))
{
if (!isSplice)
newstr += str;
else
newstr = newstr + "," + str;
isSplice = false;
}
}
else
{
if (string.IsNullOrEmpty(newstr))
newstr += str;
}
if (isSplice)
{
//添加因拆分时丢失的逗号
if (string.IsNullOrEmpty(newstr))
newstr += str;
else
newstr = newstr + "," + str;
}
else
{
sList.Add(newstr.Replace("\"", "").Trim());//去除字符中的双引号和首尾空格
newstr = string.Empty;
}
}
return sList.ToArray();
}
}
}
仅供学习参考,如有侵权联系我删除