【声明:因时间关系,爬虫部分代码有所借鉴,本人主要想将该三部分结合起来,供读者学习交流,如有冒犯,尽请原谅!】
一、本地环境:
1.python 3.7
2.mysql 8.0.17(Navicat)
3.beautifulsoup 4-4.8.0
4.pyecharts-0.5.11
!!!(若pyecharts导入过程之中遇到较难解决的问题,读者可尝试Anaconda3,其自带pyecharts)
二、爬取数据放置MySQL
import requests
from bs4 import BeautifulSoup
import re
import pymysql
db = pymysql.connect("localhost", "root", "123zwh", "zwh1")
cursor = db.cursor()
url = 'http://www.china-10.com/news/488659.html'
html = requests.get(url)
soup = BeautifulSoup(html.content, 'html.parser')
# 找到所有class为md_td的td元素
aaa = soup.find_all(name="td", attrs={"class": re.compile(r"md_td")})
# 检查索引,以便于后面爬取工作
# for n,i in enumerate(aaa):
# print(n,i.text)
demo_list = []
for i in aaa[4:128]:
demo_list.append(i.text)
while demo_list:
print(int(demo_list[0:4][0]), demo_list[0:4][1], int(float(demo_list[0:4][2]) * 10000),
int(float(demo_list[0:4][3]) * 10000))
# 测试数据类型
# print(type(demo_list[0:4][0]), type(demo_list[0:4][1]), type(demo_list[0:4][2]), type(demo_list[0:4][3]))
insert_message = ("INSERT INTO china_population VALUES(%s,%s,%s,%s)")
data = (int(demo_list[0:4][0]), demo_list[0:4][1], int(float(demo_list[0:4][2]) * 10000),
int(float(demo_list[0:4][3]) * 10000))
# 数据插入数据库
cursor.execute(insert_message, data)
db.commit()
del demo_list[0:4]
# 关闭数据库
db.close()
数据库数据展示:
三、从MySQL获取数据,利用pyecharts绘制成图生成html
import pymysql
from pyecharts import Bar,Line
namelist = []
numlist = []
riselist=[]
db = pymysql.connect("localhost", "root", "123zwh", "zwh1")
cursor = db.cursor()
try:
sql_name = """ SELECT province FROM china_population """
cursor.execute(sql_name)
names = cursor.fetchall()
for name in names:
namelist.append(name[0])
print(namelist)
sql_num = """ SELECT population FROM china_population """
cursor.execute(sql_num)
nums = cursor.fetchall()
for num in nums:
numlist.append(num[0])
print(numlist)
sql_rise = """ SELECT rise FROM china_population """
cursor.execute(sql_rise)
rise_num = cursor.fetchall()
for i in rise_num:
riselist.append(i[0])
print(riselist)
except:
print("未查询到数据!")
db.rollback()
finally:
db.close()
def drawPic():
columns=[]
data1=[]
for i in names:
columns.append(i[0])
for i in nums:
data1.append(i[0])
bar = Bar("2019中国各省人数")
bar.add("人口数", columns, data1, mark_line=["average"], mark_point=["max", "min"],area_color='yellow')
bar.render('peo_popu.html')
def drawPic1():
# 普通折线图
line = Line('2019各省人口数量增量折现图')
Rise=[]
line.add('各省人口数量增量', namelist, riselist, mark_point=['max','min'],is_smooth=True)
line.show_config()
line.render('peo_rise.html')
if __name__ == '__main__':
drawPic()
drawPic1()