哔哩哔哩热门视频排行榜

1.包含字典的列表的排序(lambda排序)
video_list是要排序的列表
x['times']中的times是排序的关键字
reverse=True表示降序排序,默认为升序

sorted_list = sorted(video_list, key=lambda x: x['times'], reverse=True)

代码:

# 爬取哔哩哔哩视频热门视频排行榜

# 导入
from matplotlib import pyplot as plt
import requests
from lxml import etree
import numpy as np

plt.rcParams["font.sans-serif"] = ['SimHei']        # 设置字体
# plt.rcParams['axes.unicode_minus'] = False

# 使用字典定义请求头
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"}
# 要爬取的网址
req=requests.get('https://www.bilibili.com/ranking?spm_id_from=333.334.b_62616e6e65725f6c696e6b.1', headers=headers)

dom = etree.HTML(req.content.decode("utf-8"))
# 存放爬取到的xml(列表存放的)
block = dom.xpath('//ul[@class="rank-list"]/li')
# print(len(block))
# 打印提取到的结果

t = etree.tostring(block[0], encoding="utf-8", pretty_print=True)
# t2= etree.tostring(block[1], encoding="utf-8", pretty_print=True)
# print(t.decode("utf-8"))
# print(t2.decode("utf-8"))

# 用于存放爬取到的信息
video_list = []
for i in range(0, len(block)):
    # 视频排行
    video_asc = block[i].xpath('.//div[@class="num"]/text()')[0]
    # print(video_asc)
    # 获取视频标题
    video_name = block[i].xpath('.//div[@class="lazy-img cover"]/img/@alt')[0]
    # print(video_name)
    # 播放次数
    video_times = block[i].xpath('.//div[@class="detail"]/span[1]/text()')[0].replace('万','')
    video_times = float(video_times)
    # print(type(video_times))
    # print(video_times)
    # 视频作者
    video_author = block[i].xpath('.//div[@class="detail"]/a/span/text()')[0]
    # print(video_author)
    # 将信息添加到列表中
    video_list.append({
        'asc': video_asc,
        'name': video_name,
        'times': video_times,
        'author': video_author
    })
#     将爬取的信息保存到txt文件中
with open("./哔哩哔哩热门视频排行榜.txt", "w", encoding='utf-8') as f:  # 设置文件对象
    for i in video_list:
        # print(i)
        f.writelines('排行:'+i['asc']+'  视频标题:'+i['name']+'\n'+'\t\t播放次数:'+str(i['times'])+'万  up主:'+i['author']+'\n')
# 将上榜视频播放量最高的前五up主以柱状图的显示
sorted_list = sorted(video_list, key=lambda x: x['times'], reverse=True)
# x表示x轴
x = []
# y轴
y = []

#定义函数来显示柱状上的数值


def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        plt.text(rect.get_x()+rect.get_width()/2.-0.25, 1.01*height, '%s' % float(height))

for i in range(0, 5):
    # 将前五的up主名存放到列表x
    x.append( sorted_list[i]['author'])
    # 将前五的视频播放次数存放到表y
    y.append(sorted_list[i]['times'])

#     以这种图方式显示

a = plt.bar(np.arange(len(x)),y)
autolabel(a)

plt.xlabel('up主名')
plt.ylabel('播放次数(万次)')
plt.bar(x, y)
plt.show()

效果
将排名保存到txt文件中



将播放最高的前五up主以柱状图显示


你可能感兴趣的:(哔哩哔哩热门视频排行榜)