前言
- 如果不明白如何对其进行爬取的过程,那么请参考: https://www.jianshu.com/p/65f8e46034fc 将为您解释其基本的实现过程。
- 主要原因:唧唧下不了番剧。
- 网络不好会弹出请求地址失败的信息。
- SESSDATA 需在 Cookie 查看, 没有则登录尝试,与会员视频有密切联系。
(1)Login (account, password) -> POST 形式提交数据 (密码被加密后提交) -> 没有返回值,但自动在 Response Headers 设置了 set-cookie,里面也就包含了 SESSDATA 等信息。也就是说,SESSDATA的获取需要登录。
(2)账户密码为隐私数据,这里就用 xxx 替代。
(3)制作繁琐,请自行查看。
Request URL
https://passport.bilibili.com/x/passport-login/web/login
Response Headers
access-control-allow-credentials: true
access-control-allow-headers: Origin,No-Cache,X-Requested-With,If-Modified-Since,Pragma,Last-Modified,Cache-Control,Expires,Content-Type,Access-Control-Allow-Credentials,DNT,X-CustomHeader,Keep-Alive,User-Agent,X-Cache-Webcdn,x-bilibili-key-real-ip,x-backend-bili-real-ip
access-control-allow-origin: https://passport.bilibili.com
bili-status-code: 0
bili-trace-id: 2bca58efea602525
cache-control: no-cache
content-length: 343
content-type: application/json; charset=utf-8
date: Thu, 11 Feb 2021 12:40:54 GMT
expires: Thu, 11 Feb 2021 12:40:53 GMT
set-cookie: SESSDATA=xxx; Path=/; Domain=bilibili.com; Expires=Tue, 10 Aug 2021 12:24:13 GMT; HttpOnly
set-cookie: bili_jct=667e147fa375f1da305e76654d4db3ff; Path=/; Domain=bilibili.com; Expires=Tue, 10 Aug 2021 12:24:13 GMT
set-cookie: DedeUserID=15687846; Path=/; Domain=bilibili.com; Expires=Tue, 10 Aug 2021 12:24:13 GMT
set-cookie: DedeUserID__ckMd5=1da4abd91d4728b0; Path=/; Domain=bilibili.com; Expires=Tue, 10 Aug 2021 12:24:13 GMT
set-cookie: sid=6k8eebr6; Path=/; Domain=bilibili.com; Expires=Tue, 10 Aug 2021 12:24:13 GMT
status: 200
x-cache-webcdn: BYPASS from ks-gz-webcdn-08
Form Data
source: main_web
username: xxx
password: xxx
keep: true
token: 254dc3aa330846aa8f1b0181e000e57e
go_url: https://passport.bilibili.com/account/security#/home
challenge: ef0a6e536d322765695e5be56582f651
validate: bfd8921294fe572577ada80b00c9dd7e
seccode: bfd8921294fe572577ada80b00c9dd7e|jordan
- 可下载视频类型为官方与自上传,小视频没怎么用,所以不太清楚是否可行。
- 想要 exe 方式打开的朋友,请自行打包。如有问题,请参考网上 pyinstaller 安装: https://blog.csdn.net/qq_44737094/article/details/105970391?utm_medium=distribute.pc_relevant.none-task-blog-baidujs_baidulandingword-6&spm=1001.2101.3001.4242
- 大会员的话,这里就使用部分解析网站。失效的话,自行查找。毕竟付出与收入不成正比,建议大会员支持下 B站。
效果图
源码
bilibili-video-downloader.py
import requests
import json
import os
import time
import re
import base64
import json
from os.path import join
from lxml import etree
'''
读取配置文件
'''
with open('./bilibili.config', 'r') as fp:
for line in fp.readlines():
cont = line.split('=')
if cont[0].find('FFMPEG_PATH') > -1:
ffmpeg_path = cont[1].strip()[1:-1]
elif cont[0].find('VIDEO_DIRECTORY') > -1:
v_dir_path = cont[1].strip()[1:-1]
elif cont[0].find('VIDEO_QUALITY') > -1:
v_eq = int(cont[1].strip())
elif cont[0].find('VIDEO_WAY') > -1:
v_fnval = int(cont[1].strip())
elif cont[0].find('SESSDATA') > -1:
f_sessdata = cont[1].strip()[1:-1]
elif cont[0].find('EPISODE_COMMAND') > -1:
EPISODE_COMMAND = cont[1].strip()
elif cont[0].find('VIDEO_URL_MODE ') > -1:
vu_mode = cont[1].strip()
elif cont[0].find('VIDEO_URL_FILE_PATH') > -1:
vuf_path = cont[1].strip()[1:-1]
fp.close()
'''
修改下列字符为window可用的文件名
['\\', '/', ':', '*', '?', '<', '>', '|']
也就是说,把上述列表的符号,统一替换为另一种可用符号。
为求快,就统一,下划线 _
'''
def local_filename_win_auto(name, s_sign='_'):
ban_sign = ['\\', '/', ':', '*', '?', '<', '>', '|']
name_arr = list(name)
for i in range(0, len(name_arr)):
if name[i] in ban_sign:
name_arr[i] = s_sign
return ''.join(name_arr)
# local_filename_win_auto("第15话_奥托·苏文/相信的理由.flv")
'''
command = '1, 3, 4, 5, 29- 50, 66, 11-20'
, - 普通分隔 -> 单个数字
- - 数字范围 -> 范围数字
'''
def get_num_list_from_str(command):
its = command.split(',')
other_arr = []
main_arr = []
for it in its:
if it.find('-') > -1:
rs = it.split('-')
other_arr.append([i for i in range(int(rs[0]), int(rs[1]) + 1)])
else:
main_arr.append(int(it))
for arr in other_arr:
main_arr += arr
return main_arr
'''
https://api.bilibili.com/pgc/player/web/playurl?
cid=286965803& - cid
bvid=BV1QT4y1A7xq& - bvid
qn=0& - video quality[112, 80, 64, 32, 16, 0] - 0 是根据网速自动更换播放源文件
type=&
otype=json - response data type
ep_id=373888 - episode id
fourk=1&
fnver=0&
fnval=80& - 视频的传输类型 - [80:m4s视频与音频文件分开, 112:等:flv, 1:mp4 流畅 360] - 其他的自行探索
'''
def get_url_type(url):
if url.find('www.bilibili.com/video') > -1:
return 1
elif url.find('www.bilibili.com/bangumi/play/') > -1:
return 2
else:
return -1
def get_video_api(url_type):
if url_type == 1:
return "https://api.bilibili.com/x/player/playurl"
elif url_type == 2:
return "https://api.bilibili.com/pgc/player/web/playurl"
'''
video accept info:
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
'accept_format': 'hdflv2,flv,flv720,flv480,mp4',
'accept_description': ['高清 1080P+', '高清 1080P', '高清 720P', '清晰 480P', '流畅 360P'],
'accept_quality': [112, 80, 64, 32, 16],
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
Conclude as follows:
1. 视频播放类型有两种:1) 现成的格式文件 2) 视频和音频分流的 m4s
2. 视频格式以 flv 为主,除了 流畅360P 为 mp4 格式
'''
def video_api(url, eq, fnval, **kwargs):
url_type = get_url_type(url)
api_url = get_video_api(url_type)
params = {
"otype": "json",
"qn": eq,
"fnval": fnval
}
for k, v in kwargs.items():
params[k] = v
# print(params, api_url, url)
'''
网页原版的请求头
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
headers = {
"cookie": "_uuid=2F78CD5C-87D5-33B4-5B07-85F0193F954375729infoc; sid=5y5w2c9c; DedeUserID=15687846; "
"DedeUserID__ckMd5=1da4abd91d4728b0; buvid3=08D5F52E-A4F4-4DC0-A9C5-97F623DFAF90138399infoc; "
"CURRENT_FNVAL=80; blackside_state=1;"
"LIVE_BUVID=AUTO4616018298606670; SESSDATA=xxx; "
"bili_jct=5d2cdd00ee83e17ae39ad2cbe8f777c1; CURRENT_QUALITY=0; bg_view_36101=374479; "
"bg_view_36168=373889%7C373888; bp_video_offset_15687846=487791466020378892; PVID=2 ",
"origin": "https://www.bilibili.com",
"referer": f"https://www.bilibili.com/bangumi/play/ep{ep_id}",
"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/85.0.4183.121 Safari/537.36 "
}
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
Cookie 的 SESSDATA 与账户或会员的检测有关
如果 SESSDATA 失效,则自行去网上找
翻看方式:
Browser -> Press F12(具体看浏览器 and System) -> Application -> Storage
-> Cookies(http://www.bilibili.com) -> Name:SESSDATA, Value:???(就是这个东东)
'''
headers = {
"cookie": f"SESSDATA={f_sessdata}",
"referer": url,
"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/85.0.4183.121 Safari/537.36 "
}
return requests.get(url=api_url, headers=headers, params=params).json()
'''
通过啊B的不同api,与参数,返回相应的数据。这里主要是视频下载的数据
'''
# print(video_api("https://www.bilibili.com/video/BV18V411o7YX", eq=64, fnval=80,
# cid=174780228, bvid='BV18V411o7YX', ep_id=3))
# print(video_api("https://www.bilibili.com/bangumi/play/ss29325", eq=64, fnval=80,
# cid=144089354, bvid='BV11J411E731', ep_id=307248))
"""
考虑日常文件单位问题,这里的范围仅适用于日常大部分
1 byte = 8 bit
1 MB = 1024 byte
1 GB = 1024 MB
1 TB = 1024 GB
"""
def format_byte(size, show_bit=1, sign='', d_num=2):
# init
names = ['B', 'KB', 'M', 'G', 'T']
arr = [0 for i in range(0, 5)]
result = ['' for i in range(0, show_bit)]
decimal_mode = False
# 计算各单位的值并存储
for i in range(0, len(names)):
arr[i] = size % 1024
size = int(size / 1024)
# 计算可用于展示的最大单位
s_inx = len(names) - 1
while arr[s_inx] == 0:
s_inx -= 1
# 弄个小数位,好看点。比如: 1G and 1.56G
if show_bit == 1 and not d_num <= 0:
decimal_mode = True
show_bit = 2
# 可用数量的索引计算 = (需求 <= 计算可用)
s_num = s_inx + 1 if s_inx + 1 - show_bit < 0 else show_bit
# 根据可用数量,进行局部展示数组单位的拼接
if decimal_mode:
return f"{str(arr[s_inx])}.{str(int(arr[s_inx - 1] / 1024 * 1000))[:2]} {names[s_inx]}"
else:
for i in range(0, s_num):
result[i] = f"{str(arr[s_inx - i])}{names[s_inx - i]}"
# 反转并拼接展示数组
return sign.join(result)
'''
https://xy183x237x74x212xy.mcdn.bilivideo.cn:4483
/upgcxcode/03/58/286965803/286965803-1-30102.m4s?
expires=1612373226&
platform=pc&
ssig=xUQa_u7j0UWOBVR0kS8O5w&
oi=3746188697&
trid=aebf0cf2f1d44761b0b27f027045be66p&nfc=1&
nfb=maPYqpoel5MI3qOUX6YpRA==&
mcdnid=1001353&
mid=15687846&
orderid=1,3&
agrr=0&logo=60000001
'''
def bilibili_download(url, directory, filename):
path = join(directory, filename)
headers = {
"accept": "*/*",
"accept-encoding": "identity",
"accept-language": "zh-CN,zh;q=0.9",
"origin": "https://www.bilibili.com",
"range": "bytes=0-", # 视频字节响应范围 0- 为全部字节
"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/85.0.4183.121 Safari/537.36 "
}
res = requests.get(url=url, stream=True, headers=headers)
full_size = content_length = int(res.headers['content-length'])
mode = 'wb'
# 根据本地文件的状态进行功能的调整
if os.path.exists(path):
if full_size == int(os.path.getsize(path)):
return True
elif full_size > int(os.path.getsize(path)):
mode = 'ab'
headers['range'] = f"bytes={os.path.getsize(path)}-"
res = requests.get(url=url, stream=True, headers=headers)
content_length = int(res.headers['content-length'])
else:
os.remove(path)
# Prepare to download and print some message on the console
print(res.status_code, filename)
print("请求地址: %s" % url)
print("剩余字节: %s" % format_byte(content_length))
print("文件目录: %s" % path)
print("文件存在: %s" % os.path.exists(path))
# Download process
with open(path, mode) as f:
for chunk in res.iter_content(chunk_size=1024 * 1024):
if chunk:
f.write(chunk)
print(f'\r 下载进度:{int(int(os.path.getsize(path)) / full_size * 100)}%',
end='', flush=True)
f.close()
# Finish download and tell clients who can quit
if full_size == os.path.getsize(path):
print("\n%s 下载完成!\n" % filename)
return True
else:
print("\n%s 下载未完成!\n" % filename)
return False
# The ffmpeg util is used to join videos split more than two parts
def video_join(directory, video_path, audio_path, new_path):
cur_dir = os.getcwd()
os.chdir(directory)
'''
>>>>>>>>>>>>>>>>>>>>>>>
os.system 执行状态码
0 - success
其他数字 - other errors
>>>>>>>>>>>>>>>>>>>>>>>>
详情参考 CSDN 的文章: https://blog.csdn.net/lwgkzl/article/details/81060016
'''
cmd_status = os.system(f"ffmpeg -i {video_path} -i {audio_path} -codec copy {new_path}")
if not cmd_status == 0:
cmd_status = os.system(f"{join(ffmpeg_path, 'ffmpeg.exe')} "
f"-i {video_path} -i {audio_path} "
f"-codec copy {new_path}")
os.chdir(cur_dir)
return cmd_status == 0
'''
BiliBili Crawl Robot
Here will feedback url type num and data as a result so as to parse json data simply
Basic Info:
[epList]
1. bv_id - bvid
2. cid - cid
3. ep_id - id 集号
4. ep_title - longTitle 剧集名称
5. ep_num - title 具体集数位置
[mediaInfo]
6. title - title 番剧名称
7. ssid - ssId 番剧号
'''
def parse_basic_info_from_detail_page(url):
html = requests.get(url).text
parser = etree.HTML(html)
scripts = parser.xpath("//script")
info_head = "window.__INITIAL_STATE__"
info_tail = ";(function"
url_type = get_url_type(url)
data = {}
for script in scripts:
body = script.xpath("text()")
if len(body) == 0:
continue
elif body[0].find(info_head) == 0:
data = json.loads(body[0][len(info_head) + 1:body[0].rfind(info_tail)])
break
return url_type, data
# 这是无episode的version
# parse_basic_info_from_detail_page("https://www.bilibili.com/video/BV12v4y1o7wr")
# parse_basic_info_from_detail_page("https://www.bilibili.com/video/BV18V411o7YX")
'''
data -> 开始: [videoData(videos[视频的p数], pages[各p的信息](cid, part[p名], page[p的序号]))]
视频和番剧的数据返回格式不一样,等待调整 !!!
'''
'''
通过 url 解析文件名
url: https://www.abc.com/video/bv1234/1234.m4s?bv=1234&date=1234455656
在这里的 url 中, ? 后面的则为由 & 所分割的查询参数 params
也就是 params 中有 bv = 1234 和 date = 1234455656
问号前的 // 开始,到第一个 / 之前的为域名加主机的内容
/ 之后的则为服务器的具体目录,这里最后的后缀是 .m4s 这证明这是一个视频文件,可用于下载
考虑到后面的 params 可能存在重复的 ? 这里就分开进行,先找最左边的问号,并切开取问号前的内容
之后,找最右边的 / 切开并取其右边的内容, 这样就最大可能完整地取出文件名
'''
def parse_filename(url):
# Obtain filename by url parsing
sl_url = url[url.rfind('/') + 1:]
sl_url_arr = sl_url.split('&')
if len(sl_url_arr) == 1:
filename = sl_url
else:
filename = sl_url_arr[0][:sl_url_arr[0].rfind('?')]
return filename
'''
解析网站获取视频 URL 的方式:
1. 无加密
2. Base64 解密
3. 切换播放源【mp4、m3u8】
4. 其他仍待考察
PS:
1. 大部分都是以网页视频框形式为主,除非捕获,代码分析过于繁琐。如果需要,可以使用浏览器的断点进行调试 debug。
2. 大部分解析网站基本都会设置反调试的 JS 代码
采坑记录:
1. 开发者面板(无法打开,打开时的恶意操作) - Auto 重复性操作 -> cpu: 99%
2. 部分调试的键鼠操作被封禁 - F12, right click
3. 恶性广告【You know 。。。】
4. URL 编码 - base64 ...
5. FLASH - 自行探索,本人暂不使用 ...
6. 问题来源 - 调试,网页互调用[典型的 iframe, document.parent],源地址编码 ...
7. 原因 - 防盗链、知识成果 -> 有部分盗链的网站直接贴牌防盗链【不过这些网站也就这样了】
8. 部分文件已经被编码,所以本人不太清楚其作用 - Such as: _0xc...: function(...) {...}, ...
9. 建议使用脱离浏览器的调试工具 - 浏览器的操作被 js 卡住,调试麻烦
'''
parser_headers = dict({"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/85.0.4183.121 Safari/537.36 "})
def love_parser(url, arr):
html = requests.get(f"https://vip.2ktvb.com/player/?url={url}", headers=parser_headers).text
m_res = re.findall(r'\"url\":.*?,', html)
if m_res:
res = m_res[0].split(':')[1].strip()[1:-2]
arr.append(str(base64.urlsafe_b64decode(res), 'utf-8'))
return True
else:
return False
# m3u8
def RDHK_parser(url, arr):
html = requests.get(f"https://jx.rdhk.net/api.php?url={url}", headers=parser_headers).text
arr.append(json.loads(html[1:-2]).get('url'))
return True
def hero_parser(url, arr):
html = requests.get(f"https://api.10dy.net/?url={url}", headers=parser_headers).text
m_res = re.findall(r'\"url\":.*?\",', html)
if m_res:
res_arr = m_res[0].split(':')
arr.append(':'.join([res_arr[1], res_arr[2]])[1: -2])
return True
else:
return False
def moon_parser(url, arr):
parser_headers['referer'] = f"https://api.fenglinys.net/?url={url}"
html = requests.get(f"https://titan.mgtv.com.kkflv.net/?url={url}", headers=parser_headers).text
m_res = re.findall(r'atob\(.*?\)', html)
if m_res:
res = m_res[0][6: -2]
arr.append(str(base64.urlsafe_b64decode(res), 'utf-8'))
return True
else:
return False
def parse_video_url(url):
arr = []
if moon_parser(url, arr) is not None:
return arr[0]
elif love_parser(url, arr) is not None:
return arr[0]
elif hero_parser(url, arr) is not None:
return arr[0]
elif RDHK_parser(url, arr) is not None:
return arr[0]
# 其中一个大会员视频解析
# bb_url = "https://www.bilibili.com/bangumi/play/ep317557"
# print(parse_video_url(bb_url))
'''
番剧详情页的解析,也就是具体播放页面 /video or /bangumi/play
'''
def vip_video_download(url, directory, filename):
print("【会员解析】 模式启用")
res = parse_video_url(url)
if res is not None:
f_status = bilibili_download(url, directory, filename)
return f_status
return False
def video_download(url, cid, bvid, ep_id=-1, root='./', title='', eq=80, fnval=80):
suffix = 'mp4' if eq == 16 else 'flv'
new_name = f"{'_'.join(title.split(' '))}.{suffix}"
new_file = join(root, new_name)
res_status = False
# 有 ep_id 的是官方订阅视频,没有的,则是 UP主 自行上传的视频。前者的vip视频居多
if ep_id == -1:
v = video_api(url, eq=eq, fnval=fnval, cid=cid, bvid=bvid)
else:
v = video_api(url, eq=eq, fnval=fnval, cid=cid, bvid=bvid, ep_id=ep_id)
# 请求状态反馈
if int(v.get('code')) == -10403:
print(f"【请求失败】 Code: -10403 {title}")
res = vip_video_download(url, root, new_name)
print(f"【请求会员视频状态】 {str(res)}")
'''
有会员的番剧无法获取权限时,状态为 -10403
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
{'code': -10403, 'message': '大会员专享限制'}
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
成功如下:
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
{'code': 0, 'message': 'success', ...}
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
PS:
1. 需要请求头提交cookie的用户数据
2. 试看 6 min 的那些格式与正式版不一样,所以统一会员下载
3. 会员源视频地址需要解析,所以不知何时会失效。做好心理准备,或自行编写。
4. 个人建议,买一下会员吧。毕竟服务等需要经费。
'''
result = v.get('result')
if result is None:
result = v.get('data')
print(f"【准备下载】 {title}")
try:
'''
两种类型的api文件
1. video + audio => m4s => video_join => flv/mp4/...
2. durl => flv/mp4 => ...
'''
if not result.get('has_paid'):
print(f"【试看会员】 {title}")
res = vip_video_download(url, root, new_name)
print(f"【请求会员视频状态】 {str(res)}")
elif result.get('durl') is not None:
f_url = result.get('durl')[0].get('url')
f_status = bilibili_download(f_url, root, new_name)
if f_status:
print(f"【下载完成】 {title}")
res_status = True
elif result.get('dash') is not None:
eq_range = result.get('accept_quality')
dash = result.get('dash')
videos = dash.get('video')
audio = dash.get('audio')[0]
eq_exists = eq in eq_range
v_urls = []
if not eq_exists:
if eq > eq_range[0]:
eq = eq_range[0]
elif eq < eq_range[len(eq_range) - 1]:
eq = eq_range[len(eq_range) - 1]
for video in videos:
if video.get('id') == eq:
v_urls.append(video.get('base_url'))
print(f"【视频质量】 {eq}")
for k, v_url in enumerate(v_urls):
a_url = audio.get('base_url')
v_name = f'v_{parse_filename(v_url)}'
a_name = f'a_{parse_filename(a_url)}'
video_file = join(root, v_name)
audio_file = join(root, a_name)
# 判断该文件是否存在
if os.path.exists(new_file):
if os.path.getsize(new_file) == 0:
os.remove(new_file)
else:
return True
# 下载视频文件并获取文件的下载状态
v_status = bilibili_download(v_url, root, v_name)
a_status = bilibili_download(a_url, root, a_name)
# 判断是否满足视频拼接的条件(文件完整性)
if not v_status or not a_status:
return res_status
# 视频拼接,并删除原有的 m4s 文件
reduce_status = video_join(root, v_name, a_name, new_name)
os.remove(video_file)
os.remove(audio_file)
print(f"【{'下载完成' if reduce_status else f'拼接失败_{str(k + 1)}'}】 {title}")
if reduce_status:
res_status = True
break
return res_status
except Exception as e:
print(f"【!下载出错】 {title}")
print(">>>>>>>>>>>>>>>>>>>>>")
print(repr(e))
print(">>>>>>>>>>>>>>>>>>>>>")
def videos_download(url, root='./', eq=80, fnval=80, new_dir=True, s_time=2, d_range=False, dv_command=None):
# 通过页面,获取该视频的集数,及其相应的 BV, ID, CID 信息
info = parse_basic_info_from_detail_page(url)
m_info = {}
if info[0] == 1:
m_info = info[1].get('videoData')
fnval = 80 # UP主 上传的那些视频,不是 80 的话,好像无法获取视频的下载数据
elif info[0] == 2:
m_info = info[1].get('mediaInfo')
# 新建视频名称的下载目录
if new_dir:
root = join(root, local_filename_win_auto(m_info.get('title')))
if not os.path.exists(root):
os.mkdir(root)
if d_range and dv_command is None:
dv_command = input("请输入您所需的集数[逗号','分隔,区间就用'-']: Such as: 1, 2-3, 4\n")
print(f"<<<<<<<<<< {m_info.get('title')} >>>>>>>>>>")
if info[0] == 1:
bv_id = m_info.get('bvid')
episode_num = m_info.get('videos')
pages = m_info.get('pages')
for i in get_num_list_from_str(dv_command) if d_range else range(0, episode_num):
# 通过上述信息,请求视频的具体信息
video_download(url, pages[i].get('cid'), bv_id, -1, root,
f"第{pages[i].get('page')}话_{local_filename_win_auto(pages[i].get('part'))}", eq, fnval)
# 暂停 2s 防止被 ban
time.sleep(s_time)
elif info[0] == 2:
ep_list = info[1].get('epList')
# 逐集下载
for i in get_num_list_from_str(dv_command) if d_range else range(0, len(ep_list)):
# 通过上述信息,请求视频的具体信息
video_download(url, ep_list[i].get('cid'), ep_list[i].get('bvid'), ep_list[i].get('id'), root,
f"第{ep_list[i].get('title')}话_{local_filename_win_auto(ep_list[i].get('longTitle'))}",
eq, fnval)
# 暂停 2s 防止被 ban
time.sleep(s_time)
'''
下载成功的案例
Time: 2021-02-05 15:32
'''
# 番剧: 黑神
# videos_download("https://www.bilibili.com/bangumi/play/ep33376", "H:/bilibili_py_load/")
# 番剧: Re:从零开始的异世界生活 第二季 后半
# videos_download("https://www.bilibili.com/bangumi/play/ss36429", "H:/bilibili_py_load/")
"""
信息获取
https://api.bilibili.com/x/player/v2?
cid=290412211&aid=416423565&
bvid=BV1NV411B74z&
season_id=36168&
ep_id=373889
比如下面: 视频的原 aid, ip 信息, 账户信息, 局部 vip 信息
{"aid": 416423565,"bvid": "BV1NV411B74z","ip_info": {...},"name": "mvlg_fms", ...}
由于这个对于视频下载而言,作用不大,所以这里就简略地过一下。毕竟分析时顺便看到这玩意。
用户信息的显示可能是因为你保存了密码,或者在使用账户,一般而言,用户信息记录在 Cookie 中。
"""
'''
字节格式转换测试
'''
# print(format_byte(248347827))
# print(format_byte(1360485139))
'''
功能测试
'''
# video_download("https://www.bilibili.com/video/BV12v4y1o7wr", 291293061, 'BV12v4y1o7wr', eq=32, root=v_dir_path)
# print(video_api("https://www.bilibili.com/video/BV18V411o7YX", eq=32, fnval=80,
# cid=174780228, bvid='BV18V411o7YX'))
# print(parse_basic_info_from_detail_page("https://www.bilibili.com/video/BV12v4y1o7wr"))
# videos_download("https://www.bilibili.com/video/BV12v4y1o7wr")
# videos_download("https://www.bilibili.com/video/BV18V411o7YX", eq=32)
if __name__ == '__main__':
'''
ffmpeg 文件的目录请自行设定于相关的配置文件 bilibili.config
设置方法: FFMPEG_PATH = "H:/"
这里的读取方式可以使用逐行的正则公式 r'.*?FFMPEG_PATH.*?=.*?\".*?\"'
为求方便,快速,find 指令就算了,不想要的配置请自行删除
ffmpeg 是视频处理主要使用的工具,详情百度
不使用配置文件的人,自行使用以下代码设置
# v_dir_path = input("请输入视频文件的存储路径:\n")
# v_eq = int(input("[0, 1, 2, 3, 4] 清晰度 与 数字 成反比 - 请输入数字:\n"))
# v_eqs = [112, 80, 64, 32, 16]
'''
print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
print(" @author: mvlg")
print(" @function: One downloader is used to download most part of video in bilibili we want")
print(" @preface: I hope this tool will have a good time for you")
print(" if you have any problems, please call me or send mail")
print(" @platform: 、CSDN")
print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
vd_urls = []
d_command = not EPISODE_COMMAND == "False"
# 以下皆可以在配置文件中自行设置
if vu_mode == "False":
while True:
v_res = input("请输入番剧的详情播放页网址: \n"
"退出 - 请输入 q | Q | quit | exit\n")
if v_res in ['q', 'Q', 'quit', 'exit']:
break
vd_urls.append(v_res)
else:
fp = open(vuf_path, 'r')
for line in fp.readlines():
if line.startswith('#'):
continue
vd_urls.append(line.strip())
fp.close()
for vd_url in vd_urls:
print(f"【即将访问】{vd_url}")
try:
videos_download(vd_url, v_dir_path, eq=v_eq, fnval=v_fnval,
dv_command=d_command)
except Exception as e:
print(f"【!视频下载主程序出错】 {vd_url}")
print(">>>>>>>>>>>>>>>>>>>>>")
print(repr(e))
print(">>>>>>>>>>>>>>>>>>>>>")
time.sleep(5)
os.system("pause")
bilibili.config
FFMPEG_PATH = "./"
VIDEO_DIRECTORY = "H:/bilibili_py_load/"
SESSDATA = " "
VIDEO_QUALITY = 80
VIDEO_WAY = 112
EPISODE_COMMAND = False
VIDEO_URL_MODE = True
VIDEO_URL_FILE_PATH = "./urls.txt"
urls.txt
# 这里的 # 号开头为注释
https://www.bilibili.com/bangumi/play/ss36362