python培训第二讲第三讲作业

爬取目标:B站各类视频30日排行

代码

import requests
from lxml import etree
import xlwt
import time

headers = {
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
params = [
  {'type': '全部', 'code': '0'},
  {'type': '动画', 'code': '1'},
  {'type': '国创相关', 'code': '168'},
  {'type': '音乐', 'code': '3'},
  {'type': '舞蹈', 'code': '129'},
  {'type': '游戏', 'code': '4'},
  {'type': '科技', 'code': '36'},
  {'type': '数码', 'code': '188'},
  {'type': '生活', 'code': '160'},
  {'type': '鬼畜', 'code': '119'},
  {'type': '时尚', 'code': '155'},
  {'type': '娱乐', 'code': '5'},
  {'type': '影视', 'code': '181'}
]

all_info_list = []

def get_info(url, type):
  res = requests.get(url, headers=headers)
  html = etree.HTML(res.text)
  infos = html.xpath('//ul[@class="rank-list"]/li')
  for info in infos:
    rank = info.xpath('div[1]/text()')[0]
    name = info.xpath('div[2]/div[2]/a/text()')[0]
    players = info.xpath('div[2]/div[2]/div[1]/span[1]/text()')[0]
    comments = info.xpath('div[2]/div[2]/div[1]/span[2]/text()')[0]
    author = info.xpath('div[2]/div[2]/div[1]/a/span/text()')[0]
    score = info.xpath('div[2]/div[2]/div[2]/div/text()')[0]
    info_list = [rank, name, players, comments, author, score]
    all_info_list.append(info_list)

if __name__ == '__main__':
  book = xlwt.Workbook(encoding='utf-8')
  for param in params:
    sheet = book.add_sheet(param['type'])
    header = ['排名', '视频', '播放量', '弹幕量', '作者', '综合得分']
    for t in range(len(header)):
      sheet.write(0, t, header[t])

    url = 'https://www.bilibili.com/ranking/all/{}/0/30'.format(str(param['code']))
    get_info(url, param['type'])
    i = 1
    for list in all_info_list:
      j = 0
      for data in list:
        sheet.write(i, j, data)
        j += 1
      i += 1
    all_info_list = []
    time.sleep(2)

  book.save('C:/Users/user/Desktop/B站30日排行.xls')

结果

全部排行

动漫类排行

国创类排行

音乐类排行

其他省略...

你可能感兴趣的:(python培训第二讲第三讲作业)