批量数据导入
- 导入数据的需求都是十几万的数据迁移,一条一条执行太慢。pymysql 提供了批量执行sql的方法
详情参考:简单实现pymysql批量写入数据方法
import os
import sys
root_path = os.getcwd()
sys.path.append(root_path)
from datetime import datetime
import time as runTime
import pymysql
import pathlib
import openpyxl
root_path = pathlib.Path.cwd()
file_path = root_path.joinpath("Tools/MasterDataBuilder/Template/ResourceLedger.xlsx")
class ImportCustomData(object):
def __init__(self):
print("初始化")
self.host = "localhost"
self.port = 3306
self.database = "db1001"
self.user = "435431321"
self.password = "43214321"
self.conn = None
self.OperateTypeDict = {
"入库": 0,
"领用": 1,
"借用": 2,
"退还": 3,
"报废": 4,
"资产转移": 5,
"盘点修改": 6,
"取消报废": 7,
"送检": 9,
"检验送回": 10,
"待报废": 11,
"盘点删除": 12,
"转账": 13
}
def __enter__(self):
print("连接数据库 ... ")
try:
self.conn = pymysql.connect(host=self.host, port=self.port, user=self.user,
password=self.password, database=self.database)
except Exception as e:
print("连接数据库失败:{}".format(e))
print("连接数据库成功")
return self
def __exit__(self, exc_type, exc_val, exc_tb):
print("断开数据库连接")
if self.conn:
self.conn.close()
def run(self):
start_time = datetime.now()
rows = self.read_excel(file_path=file_path)
sql_str, data =self.BuildResourceLedger(rows=rows)
self.execute_sql(sql_str, data)
end_time = datetime.now()
print("耗时:{} s".format((end_time - start_time).seconds))
def read_excel(self, file_path=None):
print("读取数据:start ... ")
rows = []
workbook = openpyxl.load_workbook(file_path)
sheet = workbook["RecordSet"]
row_count = sheet.max_row
key_list = [row.value for row in sheet[1]]
for i in range(3, row_count):
row_values = [row.value for row in sheet[i]]
row = dict()
row["Name"] = str(round(runTime.time() * 1000000) + i)
for idx, value in enumerate(row_values):
if value or value == 0:
if idx == 4:
value = self.OperateTypeDict[value]
if idx == 6:
value = datetime.strptime(value, "%Y/%m/%d %H:%M:%S")
value = value.strftime("%Y-%m-%d %H:%M:%S")
row[key_list[idx]] = value
rows.append(row)
print("读取数据:end")
print("总共 {} 行数据".format(len(rows)))
return rows
def BuildResourceLedger(self, rows=None):
print("处理sql数据: start ... ")
insert_sql = """insert into `DataModel_T_ResourceLedger`(ResourceCode, SerialCode, SpecificationNewModel, OperateQuantity, OperateType, CreatedBy, CreatedOn, Remark, ResponsibleDepartment, SubArea, ResourceName, ResponsiblePerson, Name, ModifiedBy, ModifiedOn, Version, IsTemplate, UniversalState) values(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 0, 0, 2)"""
data = []
for row_dict in rows:
item = (str(row_dict.get("ResourceCode", "")), row_dict.get("SerialCode", ""), row_dict.get("SpecificationNewModel", ""),
row_dict.get("OperateQuantity", ""), row_dict.get("OperateType", ""), row_dict.get("CreatedBy", ""),
row_dict.get("CreatedOn", ""), row_dict.get("Remark", ""), row_dict.get("ResponsibleDepartment", ""),
row_dict.get("SubArea", ""), row_dict.get("ResourceName", ""), row_dict.get("ResponsiblePerson", ""),
row_dict.get("Name", ""), row_dict.get("CreatedBy", ""), row_dict.get("CreatedOn", ""))
data.append(item)
print("处理sql数据: end")
return insert_sql, data
def execute_sql(self, sql_str, data):
print("执行sql:start ... ")
cursor = self.conn.cursor()
try:
cursor.executemany(sql_str, data)
total = cursor.fetchall()
print("执行成功!受影响行数:{}".format(total))
self.conn.commit()
except Exception as e:
print("执行失败:{}".format(e))
self.conn.rollback()
finally:
cursor.close()
print("执行sql:end")
if __name__ == '__main__':
with ImportCustomData() as import_custom_data:
import_custom_data.run()