import os
import re
import time
import requests
import random
from urllib3.request import urlencode
定义一个函数,获取网址
def get_page(next_offset):
params = {
'page_size': 10,
'next_offset': next_offset,
'tag': '今日热门',
'platform': 'pc'
}
baseurl = 'https://api.vc.bilibili.com/board/v1/ranking/top?'
url = baseurl + urlencode(params)
return url
#自定义一个函数,处理不能作为文件名的字符
# 自定义一个函数,处理不能作为文件名的字符
def validatetitle(title):
rstr = '[/\:*?<>|@\n\r]'
new_title = re.sub(rstr,'_',title)
return new_title
# 报错信息如下:原因是文件名太长,可以通过字符串的索引截取一部分作为名字
# Invalid argument: 'Mint .44手枪双爆头,力挽狂澜追平比分\n#彩虹六号:围攻# #2020邀请赛#.mp4'
urlist = [i*10+1 for i in range(1,10)]
urlist.insert(0,0)
for i in urlist:
print('=====================================')
print('正在爬取offset=%d的异步数据'%i)
print('======================================')
url = get_page(i)
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.42 Safari/537.36'}
response = requests.get(url, headers = headers)
json = response.json()
print(response.status_code)
data = json.get('data')
items = data.get('items')
# 创建一个文件夹,用于存放所有的视频
viedo_path = r'D:\Desktop\爬虫_anaconda\bilibili小视频\viedo'
if not os.path.exists(viedo_path):
os.mkdir(viedo_path)
os.chdir(viedo_path)
print('当前文件夹是:',os.getcwd()) # 获取当前文件夹名字
for index,item in enumerate(items, start = 1): # 遍历每一个视频
print('爬取第%d个视频'%index)
# 文件名字
viedo_name = item['item']['description']
viedo_name = validatetitle(viedo_name)[:15] + r'......'
print(viedo_name)
# 视频下载地址
viedo_url = item['item']['video_playurl']
# print(viedo_url)
# 下载视频并保存
resp = requests.get(viedo_url,headers = headers)
file_path = '{file_name}.{file_suffix}'.format(file_name = viedo_name, file_suffix = 'mp4') # 这里直接写文件名字,是因为现在我们就在创建的文件夹中
with open(file_path,'wb') as f:
f.write(resp.content)
time.sleep(random.randint(6,8))
print('爬取完成')