数据练习集
user_data.csv练习数据集https://download.csdn.net/download/Hudas/85712209?spm=1001.2014.3001.5503导入数据集
import pandas as pd
# 读取数据
data = pd.read_csv(r'C:\Users\HP\Desktop\user_data.csv')
# 1048575行记录,6列字段
data.shape # (1048575, 6)
import pandas as pd
from sqlalchemy import create_engine
# 创建数据库对象
engine = create_engine('mysql+pymysql://root:123456@localhost:3306/test')
# 读取数据
data = pd.read_csv(r'C:\Users\HP\Desktop\user_data.csv')
# 将数据存入到Mysql数据库中的user01数据表中
data.to_sql('user01',engine,chunksize=100000,index=None)
print('存入成功!')
结果展示
PyMySQL是在Python3.x版本中用于连接Mysql服务器的一个库
注意:引入import pymysql之前需要进行安装
创建数据表user02
DROP TABLE IF EXISTS `user02`;
CREATE TABLE `user02` (
`user_id` bigint(20) DEFAULT NULL,
`item_id` bigint(20) DEFAULT NULL,
`behavior_type` bigint(20) DEFAULT NULL,
`user_geohash` text CHARACTER SET utf8 COLLATE utf8_general_ci,
`item_category` bigint(20) DEFAULT NULL,
`time` text CHARACTER SET utf8 COLLATE utf8_general_ci
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;
import pandas as pd
import pymysql
# 设置数据库连接信息:ip地址、用户名、密码、数据库名、端口号、字符集
conn = pymysql.connect(
host='127.0.0.1',
user='root',
passwd='123456',
db='test',
port = 3306,
charset="utf8")
# 分块处理
# 因为数据量庞大,所以设置每次只处理100000条数据
big_size = 100000
with pd.read_csv(r'C:\Users\HP\Desktop\user_data.csv',chunksize=big_size) as reader:
for df in reader:
datas = []
print('处理:',len(df))
for i ,j in df.iterrows():
data = (j['user_id'],j['item_id'],j['behavior_type'],j['item_category'],j['time'])
datas.append(data)
_values = ",".join(['%s', ] * 5)
# sql插入语句
sql = """insert into user02(user_id,item_id,behavior_type,item_category,time) values(%s)""" % _values
# 使用cursor()方法创建一个游标对象cursor
cursor = conn.cursor()
# 执行sql语句
cursor.executemany(sql,datas)
# 提交到数据库执行
conn.commit()
# 关闭数据库连接
conn.close()
cursor.close()
print('存入成功!')
结果展示
提示Tips:使用connect()方法连接数据库时,额外设置字符集 charset=utf-8,可以防止插入中文时出错
扩展阅读资料
利用Python操作Mysql数据库https://blog.csdn.net/Hudas/article/details/124255734