1.python要和mysql交互,我们利用pymysql
这个库。
下载地址:
https://github.com/PyMySQL/PyMySQL
安装(注意cd到我们项目的虚拟环境后):
python -m pip install pymysql
import pymysql.cursors
# 连接数据库
connection = pymysql.connect(host='localhost',
user='root',
password='root',
db='pythondemo',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
try:
with connection.cursor() as cursor:
# Read a single record
sql = "SELECT * From test"
cursor.execute(sql)
result = cursor.fetchone()
print(result)
finally:
connection.close()
返回的结果是字典
类型,这是因为在连接数据库的时候我们是这样设置的:
cursorclass=pymysql.cursors.DictCursor
元组类型:
cursorclass=pymysql.cursors.Cursor
主要有4种:
Cursor 默认,查询返回list或者tuple
DictCursor 查询返回dict,包含字段名
SSCursor 效果同Cursor,无缓存游标
SSDictCursor 效果同DictCursor,无缓存游标。
插入一条数据
#插入数据
try:
with connection.cursor() as cursor:
sql = "INSERT INTO test(`title`)VALUES (%s)"
cursor.execute(sql,["今天的新闻"])
# 手动提交 默认不自动提交
connection.commit()
finally:
connection.close()
一次性插入多条数据:(executemany()
有别于execute()
)
#插入多条数据
try:
with connection.cursor() as cursor:
sql = "INSERT INTO test(`title`)VALUES (%s)"
cursor.executemany(sql, ["多条数据1", "多条数据2"])
# 手动提交 默认不自动提交
connection.commit()
finally:
connection.close()
sql绑定参数
sql = "INSERT INTO test(`title`)VALUES (%s)"
cursor.executemany(sql, ["多条数据1", "多条数据2"])
我们用%s
占位,执行SQL的时候才传递具体的值。用列表和元组均可(["多条数据1", "多条数据2"]或者("多条数据1", "多条数据2"))
二,将分析的金融数据入库
1.建表sql:
CREATE TABLE `fund` (
`code` varchar(50) NOT NULL,
`name` varchar(255),
`NAV` decimal(5,4),
`ACCNAV` decimal(5,4),
`updated_at` datetime,
PRIMARY KEY (`code`)
) COMMENT='基金表'
2.准备插入SQL:
INSERT INTO fund(`code`,`name`,`NAV`,`ACCNAV`,`updated_at`)VALUES (%(code)s,%(name)s,%(NAV)s,%(ACCNAV)s,%(updated_at)s)
注意%(code)s
这种占位符,要求我们执行这SQL的时候传入的参数必须是字典数据类型。
通配符有:%d(整型),%s(字符型),%f(浮点型)
MySQL小知识:
在插入的时候如果有重复的主键,就更新
insert into 表名 xxxx ON duplicate Key update 表名
3.完整sql
INSERT INTO fund(code,name,NAV,ACCNAV,updated_at)VALUES (%(code)s,%(name)s,%(NAV)s,%(ACCNAV)s,%(updated_at)s)
ON duplicate Key UPDATE updated_at=%(updated_at)s,NAV=%(NAV)s,ACCNAV=%(ACCNAV)s;
4.全部sql
# #引入包
# from urllib.request import urlopen
#
# response = urlopen("http://fund.eastmoney.com/fund.html")
# html = response.read();
#
# #这个网页编码是gb2312
# #print(html.decode("gb2312"))
#
# #把html内容保存到一个文件
# with open("1.txt","wb") as f:
# f.write(html.decode("gb2312").encode("utf8"))
# f.close()
import pymysql.cursors
from bs4 import BeautifulSoup
from datetime import datetime
updated_at = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
with open("1.txt", "rb") as f:
html = f.read().decode("utf8")
f.close()
# 分析html内容
soup = BeautifulSoup(html,"html.parser")
# 取出网页title
#print(soup.title) #每日开放式基金净值表 _ 天天基金网
# 基金编码
codes = soup.find("table",id="oTable").tbody.find_all("td","bzdm")
result = () # 初始化一个元组
for code in codes:
result += ({
"code": code.get_text(),
"name": code.next_sibling.find("a").get_text(),
"NAV": code.next_sibling.next_sibling.get_text(),
"ACCNAV": code.next_sibling.next_sibling.next_sibling.get_text(),
"updated_at": updated_at
},)
# 连接数据库
connection = pymysql.connect(host='localhost',
user='root',
password='root',
db='pythondemo',
charset='utf8mb4',
cursorclass=pymysql.cursors.Cursor)
try:
with connection.cursor() as cursor:
sql = """INSERT INTO fund(`code`,`name`,`NAV`,`ACCNAV`,`updated_at`)VALUES (%(code)s,%(name)s,%(NAV)s,%(ACCNAV)s,%(updated_at)s)"""
cursor.executemany(sql,result)
# 手动提交 默认不自动提交
connection.commit()
finally:
connection.close()