环境介绍:
python 3.8
pycharm
模块使用:
requests
re
subprocess
首先分析一下某站大学视频数据和音频数据来自于哪里
注意:
import subprocess
#Python学习交流群:748989764
import requests # 数据请求模块 需要 pip install requests
import re # 正则表达式
import pprint # 格式化输出模块
headers = {
'referer': 'https://www.bilibili.com/',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36'
}
def get_response(html_url):
"""发送请求函数"""
response = requests.get(url=html_url, headers=headers)
return response
def get_video_info(html_url):
"""获取视频标题 / CID / session"""
response = get_response(html_url)
cid = re.findall('"cid":(\d+),', response.text)[0]
session = re.findall('"session":"(.*?)"', response.text)[0]
title = re.findall(''
, response.text)[0].replace(' ', '')
# print(cid, session, title)
video_info = [cid, session, title]
return video_info
def get_video_content(cid, session, bv_id):
index_url = 'https://api.bilibili.com/x/player/playurl'
data = {
'cid': cid,
'qn': '0',
'type': '',
'otype': 'json',
'fourk': '1',
'bvid': bv_id,
'fnver': '0',
'fnval': '976',
'session': session,
}
json_data = requests.get(url=index_url, params=data, headers=headers).json()
audio_url = json_data['data']['dash']['audio'][0]['baseUrl']
video_url = json_data['data']['dash']['video'][0]['baseUrl']
# print(audio_url, video_url)
# pprint.pprint(json_data)
video_content = [audio_url, video_url]
return video_content
def save(title, audio_url, video_url):
"""保存数据"""
audio_content = get_response(audio_url).content
video_content = get_response(video_url).content
with open(title + '.mp3', mode='wb') as f:
f.write(audio_content)
with open(title + '.mp4', mode='wb') as f:
f.write(video_content)
print(title, '保存完成')
def merge_data(video_name):
"""数据的合并"""
print('视频合成开始:', video_name)
cmd = f"ffmpeg -i {video_name}.mp4 -i {video_name}.mp3 -c:v copy -c:a aac -strict experimental {video_name}output.mp4"
# print(cmd)
subprocess.run(cmd, shell=True)
print('视频合成结束:', video_name)
def main(bv_id):
"""主函数"""
url = f'https://www.bilibili.com/video/{bv_id}'
video_info = get_video_info(url)
video_content = get_video_content(video_info[0], video_info[1], bv_id)
save(video_info[2], video_content[0], video_content[1])
merge_data(video_info[2])
if __name__ == '__main__':
bv = input('请输入你要下载的视频BV号: ')
# bv = 'BV1p4411d7og'
main(bv)
# url = 'https://www.bilibili.com/video/BV1p4411d7og'
# get_video_info(url)