文末附教程博客链接,感兴趣可以去看一下。
分享一个pyecharts学习网址:
Python:数据可视化pyecharts的使用.
import requests
from bs4 import BeautifulSoup
from pyecharts import Page, Pie, Bar
url = "https://movie.douban.com/cinema/later/chengdu/"
response = requests.get(url)
soup = BeautifulSoup(response.content.decode('utf-8'), 'lxml')
all_movies = soup.find('div', id = "showing-soon")
#建立存储所有电影信息的集合
all_movies_info = []
for each_movie in all_movies.find_all('div', class_ = "item"):
all_a = each_movie.find_all('a')
all_li = each_movie.find_all('li')
movie_name = all_a[1].text
movie_href = all_a[1]['href']
movie_date = all_li[0].text
movie_type = all_li[1].text
movie_area = all_li[2].text
movie_lovers = all_li[3].text
all_movies_info.append({'name': movie_name, 'date': movie_date, 'type': movie_type,
'area': movie_area, 'lovers': movie_lovers.replace('人想看', '')})
#print(all_movies_info)
#按‘想看的人数’排序
#sorted函数,第一个参数是一个可以遍历的对象,key参数接受一个匿名函数,用以指定以遍历对象内作为排序的依据的元素
sort_by_lovers = sorted(all_movies_info, key = lambda x: int(x['lovers']))
#取出每个元素的name属性
names = [l['name'] for l in sort_by_lovers]
#取出每个元素的lovers属性
lovers = [l['lovers'] for l in sort_by_lovers]
#初始化,标题
lovers_rank_bar = Bar("电影想看人数排行榜")
#图例、x轴、y轴、交换xy、显示y轴的值、y轴值右侧显示
lovers_rank_bar.add('', names, lovers, is_convert=True, is_label_show=True, label_pos='right')
#jupyter下直接展示
lovers_rank_bar
echarts的一个bug:x轴和y轴转换后,y轴刻度显示问题
有人在github上提供了一个暂时解决方案,大家可以参考一下https://github.com/pyecharts/pyecharts/issues/653
#获取所有类型
all_types = [i['type'] for i in all_movies_info]
#建立统计集合
type_count = {}
for each_types in all_types:
#把类型分割成单个
type_list = each_types.split(' / ')
for e_type in type_list:
if e_type not in type_count:
type_count[e_type] = 1
else:
type_count[e_type] += 1
#print(type_count)
#初始化饼图(标题下移60px)
type_pie = Pie('上映类型占比', title_top=60)
#直接取出统计的类型名和数量并强制转换为list
type_pie.add('', list(type_count.keys()), list(type_count.values()), is_label_show=True)
#jupyter下直接显示
type_pie
#绘制电影上映日期柱状图
all_dates = [i['date'] for i in all_movies_info]
#建立统计集合
dates_count = {}
for date in all_dates:
if date not in dates_count:
dates_count[date] = 1
else:
dates_count[date] += 1
# print(dates_count)
#初始化柱形图
dates_bar = Bar('上映日期统计')
dates_bar.add('',list(dates_count.keys()), list(dates_count.values()), is_label_show=True)
#jupyter下直接显示
dates_bar
#使用Page逐个展示图表
import requests
from bs4 import BeautifulSoup
from pyecharts import Page, Pie, Bar
#请求网页
url = "https://movie.douban.com/cinema/later/chengdu/"
response = requests.get(url)
#解析
soup = BeautifulSoup(response.content.decode('utf-8'), 'lxml')
all_movies = soup.find('div', id="showing-soon")
#创建存储电影信息的集合
all_movies_info = []
for each_movie in all_movies.find_all('div', class_="item"):
all_a_tag = each_movie.find_all('a')
all_li_tag = each_movie.find_all('li')
movie_name = all_a_tag[1].text
moive_href = all_a_tag[1]['href']
#如果运行报错:index out of range:是因为有电影没有日期
if len(all_li_tag) == 4:
movie_date = all_li_tag[0].text
movie_type = all_li_tag[1].text
movie_area = all_li_tag[2].text
movie_lovers = all_li_tag[3].text.replace('人想看', '')
else:
movie_date = "未知"
movie_type = all_li_tag[0].text
movie_area = all_li_tag[1].text
movie_lovers = all_li_tag[2].text.replace('人想看', '')
all_movies_info.append({'name': movie_name, 'date': movie_date, 'type': movie_type,
'area': movie_area, 'lovers': movie_lovers})
#print(all_movies_info)
#同一个网页显示多个图
page = Page()
#绘制关注者排行榜图
sort_by_lovers = sorted(all_movies_info, key = lambda x: int(x['lovers']))
#取出每个元素的name属性
names = [l['name'] for l in sort_by_lovers]
#取出每个元素的lovers属性
lovers = [l['lovers'] for l in sort_by_lovers]
#初始化,标题
lovers_rank_bar = Bar("电影想看人数排行榜")
#图例、x轴、y轴、交换xy、显示y轴的值、y轴值右侧显示
lovers_rank_bar.add('', names, lovers, is_convert=True, is_label_show=True, label_pos='right')
#jupyter下直接展示
#lovers_rank_bar
page.add(lovers_rank_bar)
#绘制电影类型占比图
#获取所有类型
all_types = [i['type'] for i in all_movies_info]
#建立统计集合
type_count = {}
for each_types in all_types:
#把类型分割成单个
type_list = each_types.split(' / ')
for e_type in type_list:
if e_type not in type_count:
type_count[e_type] = 1
else:
type_count[e_type] += 1
#print(type_count)
#初始化饼图(标题下移60px)
type_pie = Pie('上映类型占比', title_top=60)
#直接取出统计的类型名和数量并强制转换为list
type_pie.add('', list(type_count.keys()), list(type_count.values()), is_label_show=True)
#jupyter下直接显示
#type_pie
page.add(type_pie)
#绘制电影上映日期柱状图
all_dates = [i['date'] for i in all_movies_info]
#建立统计集合
dates_count = {}
for date in all_dates:
if date not in dates_count:
dates_count[date] = 1
else:
dates_count[date] += 1
# print(dates_count)
#初始化柱形图
dates_bar = Bar('上映日期统计')
dates_bar.add('',list(dates_count.keys()), list(dates_count.values()), is_label_show=True)
#jupyter下直接显示
#dates_bar
page.add(dates_bar)
#jupyter下自动显示全部图表
page
附上学习链接:
爬虫入门教程⑩— 用漂亮的图表展示爬取到的数据.