def mysql_select(sql):
conn = pymysql.connect(
host='192.168.1.101',
user='root',
passwd='***',
db='shuiben_crawler',
port=3306,
charset="utf8")
# 使用cursor()方法获取操作游标
cursor = conn.cursor()
# 批量插入数据
print("sql: " + sql)
cursor.execute(sql)
# 这是查询表中所有的数据
rest = cursor.fetchall()
# 关闭数据库连接
conn.close()
return rest
查询出的数据是一个元组:
if __name__ == "__main__": q_sql = "select question_id,question_desc from gpt_exams_dw order by question_id" list = mysql_select(q_sql) for l in list: question_id = l[0] qustion_desc = l[1]
def mysql_insert(sql):
conn = pymysql.connect(
host='192.168.1.101',
user='root',
passwd='***',
db='shuiben_crawler',
port=3306,
charset="utf8")
# 使用cursor()方法获取操作游标
cursor = conn.cursor()
print("sql: " + sql)
cursor.execute(sql)
conn.commit()
# 关闭
conn.close()
cursor.close()
return
使用
if __name__ == "__main__": question_reas="中华人民共和国境内增值税纳税人均应按照公告的规定进行增值税纳税申报" question_id = 1 sql = "insert into huikao8_exam_dev(`question_desc`, `question_id`) values(s%, s%)" % (question_reas, question_id) mysql_insert(sql)
def mysql_update(sql):
conn = pymysql.connect(
host='192.168.1.101',
user='root',
passwd='taxBook@2021',
db='shuiben_crawler',
port=3306,
charset="utf8")
# 使用cursor()方法获取操作游标
cursor = conn.cursor()
print("sql: " + sql)
cursor.execute(sql)
conn.commit()
# 关闭
conn.close()
cursor.close()
return
单条插入和单条更新其实是一样的,只不过传入的SQL不同而已
if __name__ == "__main__": question_id = l[0] qustion_desc = l[1] question_desc_distinct = clean_data(qustion_desc) sql = "update gpt_exams_dw set question_desc_distinct='%s' where question_id=%d" % ( question_desc_distinct, question_id) fu.mysql_update(sql)
def mysql_insert_batch(datas, sql):
conn = pymysql.connect(
host='192.168.1.101',
# host='localhost',
user='root',
passwd='***',
db='shuiben_crawler',
port=3306,
charset="utf8")
# 使用cursor()方法获取操作游标
cursor = conn.cursor()
# 批量插入数据
print("sql: " + sql)
# 每一个值都作为一个元组,整个参数集作为一个元组
# datas=((111111,'haha',13),(22222,'hehe',34))
# 或者每一个值作为元组,整个参数集作为list :param=[(111111,'haha',13),(22222,'hehe',34)]
# datas = []
# 使用executemany方法批量插入数据
cursor.executemany(sql, datas)
# 提交
conn.commit()
print("本次成功插入%s条数据!!!" % len(datas))
# 关闭
conn.close()
cursor.close()
return
调用:
if __name__ == "__main__": result = ana_data(question_list, question_subj, cate, chapter) _values = ",".join(['%s', ] * len(result[0])) sql = "insert into huikao8_exam(`question_id_source`, `question_desc`, `question_desc_text`, `question_answ`, `question_reas`, `question_reas_text`, `question_knlg`, `question_type`, `question_subj`, `question_surc`, `create_time`) values(%s)" % _values # result为一个元组数组 mysql_insert(result, sql)
def mysql_update_batch(sql, update_datas):
conn = pymysql.connect(
host='192.168.1.101',
user='root',
passwd='***',
db='shuiben_crawler',
port=3306,
charset="utf8")
# 使用cursor()方法获取操作游标
cursor = conn.cursor()
# 批量更新
try:
res = cursor.executemany(sql, update_datas)
print(res)
conn.commit()
except Exception as e:
print(e)
conn.rollback()
finally:
conn.close()
批量更新和批量插入用的也是相同的方法,只不过传入的SQL不同:
if __name__ == "__main__": q_sql = "select question_id,question_desc from gpt_exams_test limit 10 " list = mysql_select(q_sql) sql = "update gpt_exams_test set question_desc_distinct=(%s),question_desc_text=(%s) where question_id=(%s)" update_datas = [] for l in list: question_id = l[0] qustion_desc = l[1] question_desc_text = fu.clear_custom_tags(qustion_desc) question_desc_distinct = clean_data(qustion_desc) vo = (question_desc_distinct, question_desc_text, question_id) print(vo) update_datas.append(vo) mysql_update_batch(sql, update_datas)
此处有个坑,就是字符串的格式化 s%必须用括号括起来, 我最初是这样写的:sql = "update gpt_exams_test set question_desc_distinct='%s',question_desc_text='%s' where question_id=%s",结果一直报SQL语法错误。
def mysql_pd(sql):
con = sqlalchemy.create_engine('mysql+pymysql://root:***@192.168.1.101:3306/shuiben_crawler?charset=utf8')
print("sql=" + sql)
rows = pd.read_sql_query(sql=sql, con=con)
return rows
使用pandas的优点是返回的结果是DataFrame,我们可以基于DataFrame取数更方便。
if __name__ == "__main__": q_sql = "select question_id,question_desc from gpt_exams_test limit 10 " rows = mysql_pd(q_sql) # 使用with指定文件编码 with open(r"D:\data\gpt_exams\huikao8.jsonl", "a", encoding='utf-8') as file: rows.to_json(file, orient='records', force_ascii=False, lines=True)
pandas的使用方法可参考这个:Pandas | LearnKu 终身编程者的知识社区