Python3实现的m3u8批量下载器 解密&合并&多线程

①、下载m3u8文件,如果其内容的表示形式是第1种,则还需要再次访问对应的分辨率的url,重新下载m3u8
②、解析m3u8,判断是否加密了(需要提取加密方式、加密key、IV),提取ts列表
③、多线程下载所有ts(注意别打乱顺序,在m3u8文件中的顺序就是在完整视频中的顺序,所以需要记录原来的顺序,或者按照顺序进行ts重命名)
④、合并(如果加密了,则对每个ts解密)
⑤、调用FFmpeg,将合并好的视频信息放入一个mp4容器中(直接放在mp4文件也行)
⑥、回到①,开始下载下一个m3u8
# UTF-8
# author hestyle
# desc: 必须在终端直接执行,不能在pycharm等IDE中直接执行,否则看不到动态进度条效果

import os
import sys
import m3u8
import requests
import traceback
import threadpool
from Crypto.Cipher import AES

headers = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
    "Connection": "Keep-Alive",
    "Accept-Encoding": "gzip, deflate, br",
    "Accept-Language": "zh-CN,zh;q=0.9",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36"
}
######################配置信息##########################
# m3u8链接批量输入文件
m3u8InputFilePath = "D:/input/m3u8s_input.txt"
# 视频保存路径
saveRootDirPath = "D:/output"
# 下载出错的m3u8保存文件
errorM3u8InfoDirPath = "D:/output/error.txt"
# m3u8文件、key文件下载尝试次数,ts流默认无限次尝试下载,直到成功
m3u8TryCountConf = 10
# 线程数(同时下载的分片数)
processCountConf = 50
#######################################################

# 全局变量
# 全局线程池
taskThreadPool = None
# 当前下载的m3u8 url
m3u8Url = None
# url前缀
rootUrlPath = None
# title
title = None
# ts count
sumCount = None
# 已处理的ts
doneCount = None
# cache path
cachePath = saveRootDirPath + "/cache"
# log path
logPath = cachePath + "/log.log"
# log file
logFile = None

# 1、下载m3u8文件
def getM3u8Info():
    global m3u8Url
    global logFile
    global rootUrlPath
    tryCount = m3u8TryCountConf
    while True:
        if tryCount < 0:
            print("\t{0}下载失败!".format(m3u8Url))
            logFile.write("\t{0}下载失败!".format(m3u8Url))
            return None
        tryCount = tryCount - 1
        try:
            response = requests.get(m3u8Url, headers=headers, timeout=20, allow_redirects=True)
            if response.status_code == 301:
                nowM3u8Url = response.headers["location"]
                print("\t{0}重定向至{1}!".format(m3u8Url, nowM3u8Url))
                logFile.write("\t{0}重定向至{1}!\n".format(m3u8Url, nowM3u8Url))
                m3u8Url = nowM3u8Url
                continue
            expected_length = int(response.headers.get('Content-Length'))
            actual_length = len(response.content)
            if expected_length > actual_length:
                raise Exception("m3u8下载不完整")
            print("\t{0}下载成功!".format(m3u8Url))
            logFile.write("\t{0}下载成功!".format(m3u8Url))
            rootUrlPath = m3u8Url[0:m3u8Url.rindex('/')]
            break
        except TimeoutError:
            print("\t{0}下载失败!正在重试".format(m3u8Url))
            logFile.write("\t{0}下载失败!正在重试".format(m3u8Url))
            traceback.print_exc()
    # 解析m3u8中的内容
    m3u8Info = m3u8.loads(response.text)
    # 有可能m3u8Url是一个多级码流
    if m3u8Info.is_variant:
        print("\t{0}为多级码流!".format(m3u8Url))
        logFile.write("\t{0}为多级码流!".format(m3u8Url))
        for rowData in response.text.split('\n'):
            # 寻找响应内容的中的m3u8
            if rowData.endswith(".m3u8"):
                m3u8Url = m3u8Url.replace("index.m3u8", rowData)
                rootUrlPath = m3u8Url[0:m3u8Url.rindex('/')]
                return getM3u8Info()
        # 遍历未找到就返回None
        print("\t{0}响应未寻找到m3u8!".format(response.text))
        logFile.write("\t{0}响应未寻找到m3u8!".format(response.text))
        return None
    else:
        return m3u8Info

# 2、下载key文件
def getKey(keyUrl):
    global logFile
    tryCount = m3u8TryCountConf
    while True:
        if tryCount < 0:
            print("\t{0}下载失败!".format(keyUrl))
            logFile.write("\t{0}下载失败!".format(keyUrl))
            return None
        tryCount = tryCount - 1
        try:
            response = requests.get(keyUrl, headers=headers, timeout=20, allow_redirects=True)
            if response.status_code == 301:
                nowKeyUrl = response.headers["location"]
                print("\t{0}重定向至{1}!".format(keyUrl, nowKeyUrl))
                logFile.write("\t{0}重定向至{1}!\n".format(keyUrl, nowKeyUrl))
                keyUrl = nowKeyUrl
                continue
            expected_length = int(response.headers.get('Content-Length'))
            actual_length = len(response.content)
            if expected_length > actual_length:
                raise Exception("key下载不完整")
            print("\t{0}下载成功!key = {1}".format(keyUrl, response.content.decode("utf-8")))
            logFile.write("\t{0}下载成功! key = {1}".format(keyUrl, response.content.decode("utf-8")))
            break
        except :
            print("\t{0}下载失败!".format(keyUrl))
            logFile.write("\t{0}下载失败!".format(keyUrl))
    return response.text

# 3、多线程下载ts流
def mutliDownloadTs(playlist):
    global logFile
    global sumCount
    global doneCount
    global taskThreadPool
    taskList = []
    # 每个ts单独作为一个task
    for index in range(len(playlist)):
        dict = {"playlist": playlist, "index": index}
        taskList.append((None, dict))
    # 重新设置ts数量,已下载的ts数量
    doneCount = 0
    sumCount = len(taskList)
    printProcessBar(sumCount, doneCount, 50)
    # 构造thread pool
    requests = threadpool.makeRequests(downloadTs, taskList)
    [taskThreadPool.putRequest(req) for req in requests]
    # 等待所有任务处理完成
    taskThreadPool.wait()
    print("")
    return True

# 4、下载单个ts playlists[index]
def downloadTs(playlist, index):
    global logFile
    global sumCount
    global doneCount
    global cachePath
    global rootUrlPath
    succeed = False
    while not succeed:
        # 文件名格式为 "00000001.ts",index不足8位补充0
        outputPath = cachePath + "/" + "{0:0>8}.ts".format(index)
        outputFp = open(outputPath, "wb+")
        if playlist[index].startswith("http"):
            tsUrl = playlist[index]
        else:
            tsUrl = rootUrlPath + "/" + playlist[index]
        try:
            response = requests.get(tsUrl, timeout=10, headers=headers, stream=True)
            if response.status_code == 200:
                expected_length = int(response.headers.get('Content-Length'))
                actual_length = len(response.content)
                if expected_length > actual_length:
                    raise Exception("分片下载不完整")
                outputFp.write(response.content)
                doneCount += 1
                printProcessBar(sumCount, doneCount, 50)
                logFile.write("\t分片{0:0>8} url = {1} 下载成功!".format(index, tsUrl))
                succeed = True
        except Exception as exception:
            logFile.write("\t分片{0:0>8} url = {1} 下载失败!正在重试...msg = {2}".format(index, tsUrl, exception))
        outputFp.close()

# 5、合并ts
def mergeTs(tsFileDir, outputFilePath, cryptor, count):
    global logFile
    outputFp = open(outputFilePath, "wb+")
    for index in range(count):
        printProcessBar(count, index + 1, 50)
        logFile.write("\t{0}\n".format(index))
        inputFilePath = tsFileDir + "/" + "{0:0>8}.ts".format(index)
        if not os.path.exists(outputFilePath):
            print("\n分片{0:0>8}.ts, 不存在,已跳过!".format(index))
            logFile.write("分片{0:0>8}.ts, 不存在,已跳过!\n".format(index))
            continue
        inputFp = open(inputFilePath, "rb")
        fileData = inputFp.read()
        try:
            if cryptor is None:
                outputFp.write(fileData)
            else:
                outputFp.write(cryptor.decrypt(fileData))
        except Exception as exception:
            inputFp.close()
            outputFp.close()
            print(exception)
            return False
        inputFp.close()
    print("")
    outputFp.close()
    return True

# 6、删除ts文件
def removeTsDir(tsFileDir):
    # 先清空文件夹
    for root, dirs, files in os.walk(tsFileDir, topdown=False):
        for name in files:
            os.remove(os.path.join(root, name))
        for name in dirs:
            os.rmdir(os.path.join(root, name))
    os.rmdir(tsFileDir)
    return True

# 7、convert to mp4(调用了FFmpeg,将合并好的视频内容放置到一个mp4容器中)
def ffmpegConvertToMp4(inputFilePath, ouputFilePath):
    global logFile
    if not os.path.exists(inputFilePath):
        print(inputFilePath + " 路径不存在!")
        logFile.write(inputFilePath + " 路径不存在!\n")
        return False
    cmd = r'.\ffmpeg -i "{0}" -vcodec copy -acodec copy "{1}"'.format(inputFilePath, ouputFilePath)
    if os.system(cmd) == 0:
        print(inputFilePath + "转换成功!")
        logFile.write(inputFilePath + "转换成功!\n")
        return True
    else:
        print(inputFilePath + "转换失败!")
        logFile.write(inputFilePath + "转换失败!\n")
        return False

# 8、模拟输出进度条
def printProcessBar(sumCount, doneCount, width):
    precent = doneCount / sumCount
    useCount = int(precent * width)
    spaceCount = int(width - useCount)
    precent = precent*100
    print('\t{0}/{1} {2}{3} {4:.2f}%'.format(sumCount, doneCount, useCount*'■', spaceCount*'□', precent), file=sys.stdout, flush=True, end='\r')

# m3u8下载器
def m3uVideo8Downloader():
    global title
    global logFile
    global m3u8Url
    global cachePath
    # 1、下载m3u8
    print("\t1、开始下载m3u8...")
    logFile.write("\t1、开始下载m3u8...\n")
    m3u8Info = getM3u8Info()
    if m3u8Info is None:
        return False
    tsList = []
    for playlist in m3u8Info.segments:
        tsList.append(playlist.uri)
    # 2、获取key
    keyText = ""
    cryptor = None
    # 判断是否加密
    if (len(m3u8Info.keys) != 0) and (m3u8Info.keys[0] is not None):
        # 默认选择第一个key,且AES-128算法
        key = m3u8Info.keys[0]
        if key.method != "AES-128":
            print("\t{0}不支持的解密方式!".format(key.method))
            logFile.write("\t{0}不支持的解密方式!\n".format(key.method))
            return False
        # 如果key的url是相对路径,加上m3u8Url的路径
        keyUrl = key.uri
        if not keyUrl.startswith("http"):
            keyUrl = m3u8Url.replace("index.m3u8", keyUrl)
        print("\t2、开始下载key...")
        logFile.write("\t2、开始下载key...\n")
        keyText = getKey(keyUrl)
        if keyText is None:
            return False
        # 判断是否有偏移量
        if key.iv is not None:
            cryptor = AES.new(bytes(keyText, encoding='utf8'), AES.MODE_CBC, bytes(key.iv, encoding='utf8'))
        else:
            cryptor = AES.new(bytes(keyText, encoding='utf8'), AES.MODE_CBC, bytes(keyText, encoding='utf8'))
    # 3、下载ts
    print("\t3、开始下载ts...")
    logFile.write("\t3、开始下载ts...\n")
    if mutliDownloadTs(tsList):
        print("\tts下载完成---------------------")
        logFile.write("\tts下载完成---------------------\n")
    # 4、合并ts
    print("\t4、开始合并ts...")
    logFile.write("\t4、开始合并ts...\n")
    if mergeTs(cachePath, cachePath + "/cache.flv", cryptor, len(tsList)):
        print("\tts合并完成---------------------")
        logFile.write("\tts合并完成---------------------\n")
    else:
        print(keyText)
        print("\tts合并失败!")
        logFile.write("\tts合并失败!\n")
        return False
    # 5、开始转换成mp4
    print("\t5、开始mp4转换...")
    logFile.write("\t5、开始mp4转换...\n")
    if not ffmpegConvertToMp4(cachePath + "/cache.flv", saveRootDirPath + "/" + title + ".mp4"):
        return False
    return True

if __name__ == '__main__':
    # 判断m3u8文件是否存在
    if not (os.path.exists(m3u8InputFilePath)):
        print("{0}文件不存在!".format(m3u8InputFilePath))
        exit(0)
    m3u8InputFp = open(m3u8InputFilePath, "r", encoding="utf-8")
    # 设置error的m3u8 url输出
    errorM3u8InfoFp = open(errorM3u8InfoDirPath, "a+", encoding="utf-8")
    # 设置log file
    if not os.path.exists(cachePath):
        os.makedirs(cachePath)
    logFile = open(logPath, "w+", encoding="utf-8")
    # 初始化线程池
    taskThreadPool = threadpool.ThreadPool(processCountConf)
    while True:
        rowData = m3u8InputFp.readline()
        rowData = rowData.strip('\n')
        if rowData == "":
            break
        m3u8Info = rowData.split(',')
        title = m3u8Info[0]
        m3u8Url = m3u8Info[1]
        try:
            print("{0} 开始下载:".format(m3u8Info[0]))
            logFile.write("{0} 开始下载:\n".format(m3u8Info[0]))
            if m3uVideo8Downloader():
                # 成功下载完一个m3u8则清空logFile
                logFile.truncate()
                print("{0} 下载成功!".format(m3u8Info[0]))
            else:
                errorM3u8InfoFp.write(title + "," + m3u8Url + '\n')
                errorM3u8InfoFp.flush()
                print("{0} 下载失败!".format(m3u8Info[0]))
                logFile.write("{0} 下载失败!\n".format(m3u8Info[0]))
        except Exception as exception:
            print(exception)
            traceback.print_exc()
    # 关闭文件
    logFile.close()
    m3u8InputFp.close()
    errorM3u8InfoFp.close()
    print("----------------下载结束------------------")

github链接

https://github.com/hestyle/m3u8_downloader

转载:https://www.52pojie.cn/thread-1279759-1-1.html

你可能感兴趣的:(python笔记,python)