把CSDN泄露的账户数据转成SQL语句 - 补充导入数据库的操作

            string sql = "insert into dbo.CsdnAccount (UserName, Password, Email) values ('{0}', '{1}', '{2}') ";
            
            using (StreamReader streamreader = new StreamReader(@"D:\www.csdn.net.data"))
            {
                string s = streamreader.ReadLine();

                while (s != null)
                {
                    string username = s.Split('#')[0].Trim();
                    string password = s.Split('#')[1].Trim();
                    string email = s.Split('#')[2].Trim();

                    using (StreamWriter streamwrite = new StreamWriter(@"D:\www.csdn.net.txt", true))
                    {
                        streamwrite.WriteLine(string.Format(sql, username, password, email));
                        streamwrite.WriteLine("\n");
                    }

                    s = streamreader.ReadLine();
                }
            }

我要把这些数据直接导入数据库中,由于数据量非常庞大,因此我考虑使用SqlBulkCopy进行批量插入。


首先,创建一个DataTable,与数据库中的表对应:

USE [CSDNData]
GO

CREATE TABLE [dbo].[Account](
	[UserName] [nvarchar](128) NULL,
	[Password] [nvarchar](128) NULL,
	[Email] [nvarchar](256) NULL
) ON [PRIMARY]

GO

        private DataTable CreateTable()
        {
            DataTable dt = new DataTable();
            dt.Columns.Add(new DataColumn("UserName", typeof(string)));
            dt.Columns.Add(new DataColumn("Password", typeof(string)));
            dt.Columns.Add(new DataColumn("Email", typeof(string)));

            return dt;
        }

然后,定义一个SqlBulkCopy插入数据的函数:

        private void WriteToServer(DataTable dt)
        {
            string connectionString = @"Data Source=DST42796\SqlExpress;Initial Catalog=CSDNData;Integrated Security=True";

            using (SqlConnection conn = new SqlConnection(connectionString))
            {
                SqlBulkCopy bulk = new SqlBulkCopy(conn);
                bulk.DestinationTableName = "Account";
                bulk.BatchSize = dt.Rows.Count;

                if (dt != null && dt.Rows.Count != 0)
                {
                    conn.Open();
                    bulk.WriteToServer(dt);
                }

                bulk.Close();
            }
        }

最后,从本地下载的泄露账户文件中,把数据导入数据库中,每2000条导入一次:

            using (StreamReader streamreader = new StreamReader(@"D:\www.csdn.net.data"))
            {
                string s = streamreader.ReadLine();

                DataTable dt = CreateTable();

                while (!string.IsNullOrWhiteSpace(s))
                {
                    string username = s.Split('#')[0].Trim();
                    string password = s.Split('#')[1].Trim();
                    string email = s.Split('#')[2].Trim();

                    DataRow newrow = dt.NewRow();
                    newrow["UserName"] = username;
                    newrow["Password"] = password;
                    newrow["Email"] = email;

                    dt.Rows.Add(newrow);

                    if (dt.Rows.Count % 2000 == 0)
                    {
                        WriteToServer(dt);

                        Thread.Sleep(2000);

                        dt = CreateTable();
                    }

                    s = streamreader.ReadLine();
                }
            }

每2000条效率太差,优化后的程序是每一百万条导入一次:

            using (StreamReader streamreader = new StreamReader(@"D:\www.csdn.net.data", Encoding.GetEncoding("GB2312")))
            {
                string s = streamreader.ReadLine();

                DataTable dt = CreateTable();
                
                int i = 0;
                while (!string.IsNullOrWhiteSpace(s))
                {
                    string username = s.Split('#')[0].Trim();
                    string password = s.Split('#')[1].Trim();
                    string email = s.Split('#')[2].Trim();

                    DataRow newrow = dt.NewRow();
                    newrow["UserName"] = username;
                    newrow["Password"] = password;
                    newrow["Email"] = email;

                    dt.Rows.Add(newrow);

                    if (dt.Rows.Count % 1000000 == 0)
                    {
                        WriteToServer(dt);

                        Thread.Sleep(30000);

                        dt = CreateTable();
                    }

                    s = streamreader.ReadLine();
                    i++;
                }

                if (i > 6000000)
                {
                    WriteToServer(dt);
                }
            }


你可能感兴趣的:(把CSDN泄露的账户数据转成SQL语句 - 补充导入数据库的操作)