string sql = "insert into dbo.CsdnAccount (UserName, Password, Email) values ('{0}', '{1}', '{2}') "; using (StreamReader streamreader = new StreamReader(@"D:\www.csdn.net.data")) { string s = streamreader.ReadLine(); while (s != null) { string username = s.Split('#')[0].Trim(); string password = s.Split('#')[1].Trim(); string email = s.Split('#')[2].Trim(); using (StreamWriter streamwrite = new StreamWriter(@"D:\www.csdn.net.txt", true)) { streamwrite.WriteLine(string.Format(sql, username, password, email)); streamwrite.WriteLine("\n"); } s = streamreader.ReadLine(); } }
我要把这些数据直接导入数据库中,由于数据量非常庞大,因此我考虑使用SqlBulkCopy进行批量插入。
首先,创建一个DataTable,与数据库中的表对应:
USE [CSDNData] GO CREATE TABLE [dbo].[Account]( [UserName] [nvarchar](128) NULL, [Password] [nvarchar](128) NULL, [Email] [nvarchar](256) NULL ) ON [PRIMARY] GO
private DataTable CreateTable() { DataTable dt = new DataTable(); dt.Columns.Add(new DataColumn("UserName", typeof(string))); dt.Columns.Add(new DataColumn("Password", typeof(string))); dt.Columns.Add(new DataColumn("Email", typeof(string))); return dt; }
private void WriteToServer(DataTable dt) { string connectionString = @"Data Source=DST42796\SqlExpress;Initial Catalog=CSDNData;Integrated Security=True"; using (SqlConnection conn = new SqlConnection(connectionString)) { SqlBulkCopy bulk = new SqlBulkCopy(conn); bulk.DestinationTableName = "Account"; bulk.BatchSize = dt.Rows.Count; if (dt != null && dt.Rows.Count != 0) { conn.Open(); bulk.WriteToServer(dt); } bulk.Close(); } }
using (StreamReader streamreader = new StreamReader(@"D:\www.csdn.net.data")) { string s = streamreader.ReadLine(); DataTable dt = CreateTable(); while (!string.IsNullOrWhiteSpace(s)) { string username = s.Split('#')[0].Trim(); string password = s.Split('#')[1].Trim(); string email = s.Split('#')[2].Trim(); DataRow newrow = dt.NewRow(); newrow["UserName"] = username; newrow["Password"] = password; newrow["Email"] = email; dt.Rows.Add(newrow); if (dt.Rows.Count % 2000 == 0) { WriteToServer(dt); Thread.Sleep(2000); dt = CreateTable(); } s = streamreader.ReadLine(); } }
每2000条效率太差,优化后的程序是每一百万条导入一次:
using (StreamReader streamreader = new StreamReader(@"D:\www.csdn.net.data", Encoding.GetEncoding("GB2312"))) { string s = streamreader.ReadLine(); DataTable dt = CreateTable(); int i = 0; while (!string.IsNullOrWhiteSpace(s)) { string username = s.Split('#')[0].Trim(); string password = s.Split('#')[1].Trim(); string email = s.Split('#')[2].Trim(); DataRow newrow = dt.NewRow(); newrow["UserName"] = username; newrow["Password"] = password; newrow["Email"] = email; dt.Rows.Add(newrow); if (dt.Rows.Count % 1000000 == 0) { WriteToServer(dt); Thread.Sleep(30000); dt = CreateTable(); } s = streamreader.ReadLine(); i++; } if (i > 6000000) { WriteToServer(dt); } }