#EXTM3U
#EXT-X-VERSION:3
#EXT-X-TARGETDURATION:6 每个ts文件的最大长度
#EXT-X-PLAYLIST-TYPE:VOD
#EXT-X-MEDIA-SEQUENCE:0
#EXT-X-KEY:METHOD=AES-128,URI=“/20231106/rPXApTHz/2000kb/hls/key.key” 切片文件的加密方式以及加密的秘钥地址
#EXTINF:3,
/20231106/rPXApTHz/2000kb/hls/45NdQTke.ts 不带#是每个ts文件的地址
- 拿到视频页的页面源代码
- 从视频页的页面源代码中找到对应的iframe,提取到iframe里面的src
- 请求到src对应的页面源代码。在该页面中解析出真正的M3U8文件地址
- 下载第一层M3U8,从第一层M3U8中解析出第二层的地址
- 下载第二层M3U8.从第二层M3U8中解析出每一个TS文件的路径,启动协程任务
- 对ts文件进行解密操作:先拿key
- 对ts文件进行合并.还原回mp4文件
使用request获取视频页面的源码
def get_page_source(url):
head = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
}
resp = requests.get(url=url,headers=head)
resp.encoding = "utf-8"
return resp.text
使用正则re模块
def get_first_m3u8_src(url):
page_source = get_page_source(url)
tree = etree.HTML(page_source)
obj = re.compile(r'"link_pre":"","url":"(?P.*?)index.m3u8"' ,re.S)
result = obj.search(page_source)
tmp_ifram_src = result.group("m3u8_src")
ifram_src = tmp_ifram_src.replace('\\','') + 'index.m3u8'
return ifram_src
下载是因为在后续的步骤中会使用到文件
with open(file = ,mode = ,encoding=) as f:
f.write()
使用with创建文件,并写入
def download_second_m3u8(url):
resp = get_page_source(url)
resp_lst = resp.split('\n')
second_m3u8_src = urljoin(url,resp_lst[2])
page_source = get_page_source(second_m3u8_src)
with open(file='./m3u8.txt',mode='w',encoding='utf-8') as f:
f.write(page_source)
使用strip()去皮
使用startwith()判断字符串的开头
def get_merge_ts():
ls = []
with open(file='./m3u8.txt',mode='r',encoding='utf-8') as f:
ts = f.readlines()
for i in ts:
if i.strip().startswith('#'):
continue
ls.append(i.strip())
return ls
async def download_one_ts(ts_src):
file_name = ts_src.split('/')[-1]
for i in range(10):
print(f'开始下载{file_name}......')
try:
async with aiohttp.ClientSession() as session:
async with session.get(ts_src) as resp:
content = await resp.content.read()
async with aiofiles.open(file=f"./movies_yuan/{file_name}",mode='wb') as f:
await f.write(content)
break
except:
print(f"{file_name}下载失败,重新下载")
time.sleep(2*i)
print(f'{file_name}下载完成.....')
async def download_all_ts():
url = 'https://ukzy.ukubf3.com/'
tasks = []
ts = get_merge_ts()
print(f"共计{len(ts)}个电影")
for i in ts:
ts_src = urljoin(url,i)
task = asyncio.create_task(download_one_ts(ts_src))
tasks.append(task)
await asyncio.wait(tasks)
def get_key():
key_url = ''
obj = re.compile(r'#EXT-X-KEY:METHOD=AES-128,URI="(?P.*?)"' )
with open(file='./m3u8.txt', mode='r', encoding='utf-8') as f:
key_url = obj.search(f.read()).group('key_url')
url = urljoin("https://ukzy.ukubf3.com/",key_url)
key_tmp = get_page_source(url)
key = key_tmp.encode('utf-8')
return key
pip install pycryptodome
from Crypto.Cipher import AES
aes = AES.new(key=key,IV=b"0000000000000000",mode=AES.MODE_CBC)
# 单个文件解密
async def dos_one_file(file_name,key):
print(f"开始解密{file_name}")
aes = AES.new(key=key,IV=b"0000000000000000",mode=AES.MODE_CBC)
async with aiofiles.open(file=f"./movies_yuan/{file_name}",mode='rb') as f1, \
aiofiles.open(f"./movies_mudi/{file_name}", mode='wb') as f2:
content = await f1.read()
bs = aes.decrypt(content)
await f2.write(bs)
print(f"{file_name}解密完毕")
#解密所有文件
async def dos_all_file():
tasks = []
key = get_key()
for i in get_merge_ts():
file_name = i.split('/')[-1]
task = asyncio.create_task(dos_one_file(file_name,key))
tasks.append(task)
await asyncio.wait(tasks)
os.system('copy /b file1+file2 mew_file')
合并file1 file2 文件为new_filedef merge_all_ts():
now_dir = os.getcwd()
file_names = []
for i in get_merge_ts():
name = i.split('/')[-1]
file_names.append(name)
os.chdir('./movies_mudi')
tmp = []
n = 1
for i in range(len(file_names)):
tmp.append(file_names[i])
if i%100 == 0 and i!=0:
tmp_name = '+'.join(tmp)
os.system(f'copy /b {tmp_name} {n}.mp4')
n += 1
tmp = []
tmp_name = '+'.join(tmp)
os.system(f'copy /b {tmp_name} {n}.mp4')
last_ls = [f"{i}.mp4" for i in range(1,n+1)]
print(last_ls)
os.system(f'copy /b {"+".join(last_ls)} movie.mp4')
os.chdir(now_dir)
import asyncio
import os
import time
import aiofiles
import aiohttp
import requests
from lxml import etree
import re
from urllib.parse import urljoin
from Crypto.Cipher import AES
# 获取页面源码
def get_page_source(url):
head = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
}
resp = requests.get(url=url,headers=head)
resp.encoding = "utf-8"
return resp.text
#提取源码中第一层m3u8的地址
def get_first_m3u8_src(url):
page_source = get_page_source(url)
tree = etree.HTML(page_source)
obj = re.compile(r'"link_pre":"","url":"(?P.*?)index.m3u8"' ,re.S)
result = obj.search(page_source)
tmp_ifram_src = result.group("m3u8_src")
ifram_src = tmp_ifram_src.replace('\\','') + 'index.m3u8'
return ifram_src
#获取m3u8文件并下载
def download_second_m3u8(url):
resp = get_page_source(url)
resp_lst = resp.split('\n')
second_m3u8_src = urljoin(url,resp_lst[2])
page_source = get_page_source(second_m3u8_src)
with open(file='./m3u8.txt',mode='w',encoding='utf-8') as f:
f.write(page_source)
# 获取m3u8中的ts_src
def get_merge_ts():
ls = []
with open(file='./m3u8.txt',mode='r',encoding='utf-8') as f:
ts = f.readlines()
for i in ts:
if i.strip().startswith('#'):
continue
ls.append(i.strip())
return ls
#下载单个ts
async def download_one_ts(ts_src):
file_name = ts_src.split('/')[-1]
for i in range(10):
print(f'开始下载{file_name}......')
try:
async with aiohttp.ClientSession() as session:
async with session.get(ts_src) as resp:
content = await resp.content.read()
async with aiofiles.open(file=f"./movies_yuan/{file_name}",mode='wb') as f:
await f.write(content)
break
except:
print(f"{file_name}下载失败,重新下载")
time.sleep(2*i)
print(f'{file_name}下载完成.....')
#下载所有的ts
async def download_all_ts():
url = 'https://ukzy.ukubf3.com/'
tasks = []
ts = get_merge_ts()
print(f"共计{len(ts)}个电影")
for i in ts:
ts_src = urljoin(url,i)
task = asyncio.create_task(download_one_ts(ts_src))
tasks.append(task)
await asyncio.wait(tasks)
def get_key():
key_url = ''
obj = re.compile(r'#EXT-X-KEY:METHOD=AES-128,URI="(?P.*?)"' )
with open(file='./m3u8.txt', mode='r', encoding='utf-8') as f:
key_url = obj.search(f.read()).group('key_url')
url = urljoin("https://ukzy.ukubf3.com/",key_url)
key_tmp = get_page_source(url)
key = key_tmp.encode('utf-8')
return key
# 单个文件解密
async def dos_one_file(file_name,key):
print(f"开始解密{file_name}")
aes = AES.new(key=key,IV=b"0000000000000000",mode=AES.MODE_CBC)
async with aiofiles.open(file=f"./movies_yuan/{file_name}",mode='rb') as f1, \
aiofiles.open(f"./movies_mudi/{file_name}", mode='wb') as f2:
content = await f1.read()
bs = aes.decrypt(content)
await f2.write(bs)
print(f"{file_name}解密完毕")
#解密所有文件
async def dos_all_file():
tasks = []
key = get_key()
for i in get_merge_ts():
file_name = i.split('/')[-1]
task = asyncio.create_task(dos_one_file(file_name,key))
tasks.append(task)
await asyncio.wait(tasks)
#所有ts文件合并
def merge_all_ts():
now_dir = os.getcwd()
file_names = []
for i in get_merge_ts():
name = i.split('/')[-1]
file_names.append(name)
os.chdir('./movies_mudi')
tmp = []
n = 1
for i in range(len(file_names)):
tmp.append(file_names[i])
if i%100 == 0 and i!=0:
tmp_name = '+'.join(tmp)
os.system(f'copy /b {tmp_name} {n}.mp4')
n += 1
tmp = []
tmp_name = '+'.join(tmp)
os.system(f'copy /b {tmp_name} {n}.mp4')
last_ls = [f"{i}.mp4" for i in range(1,n+1)]
print(last_ls)
os.system(f'copy /b {"+".join(last_ls)} movie.mp4')
os.chdir(now_dir)
def main():
# url = "http://www.slxljy.com/tvshow/877737-2-1.html"
# # first_m3u8_src = get_first_m3u8_src(url)
# print("开始下载电影......")
# #下载所有的ts
# asyncio.run(download_all_ts())
# print('电影下载完成')
# # 解密所有ts
# asyncio.run(dos_all_file())
merge_all_ts()
if __name__ == '__main__':
main()