m3u8视频通用下载器

闲来无事,写了个m3u8视频下载器,分享给各位(好处不多说!都懂!),如果有什么不对的地方,还请指正。另外还有m3u8视频解析器,通过视频播放链接(非商业性网站)解析出m3u8地址,然后再通过m3u8下载器进行下载,如果有需要的小伙伴请私信。

中间可能会有些看上去冗余的代码,主要是为了兼容各种稀奇古怪的m3u8内容。

脚本仅用于技术学习与研究,请勿用于任何非法用途,否则后果自负,本作者不承担任何法律责任。


原创文章,转载请注明出处,谢谢!https://blog.csdn.net/weixin_36381802/article/details/113694338


环境: pip install gevent requests loguru pycryptodome


# -*- coding:utf-8 -*-

"""
NAME: m3u8视频下载器
VERSION: v1.0
DATE: 2021-02-05
TIPS:
  1.若部分视频无法播放,建议更改文件名后缀或切换其它播放器(QuickTime、WindowsMedia等)进行尝试;
  2.在MacOS或Linux系统上运行前请确认已安装合并视频片段所使用的工具ffmpeg(Windows无视);
  3.仅支持下载m3u8类型视频,mp4等链接暂不支持(普通下载器满大街都是);
  4.脚本仅用于技术学习与研究,请勿用于任何非法用途,否则后果自负,本作者不承担任何责任。
"""


import argparse
import os
import platform
import re
import shutil
import time
from datetime import datetime
from urllib.parse import urljoin

import gevent
from gevent.pool import Pool
from gevent import monkey; monkey.patch_all()

import requests
import urllib3
from Crypto.Cipher import AES

# 自定义日志显示格式(个人比较喜欢这个格式)
from os import environ
environ['LOGURU_FORMAT'] = "{time:YYYY-MM-DD HH:mm:ss} | {level:<5} | {message}"
from loguru import logger

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


class M3u8VideoDownloader:
    headers = {
     
        'Accept': '*/*',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
    }

    def __init__(self, m3u8_url, download_path=None, video_name=None, is_del_clip=True, test_download_num=0,
                 retry_count=10, thread_num=30, dec_func=None, m3u8_content_plaintext=None):
        """
        :param m3u8_url: m3u8链接
        :param download_path: 下载路径
        :param video_name: 视频名称(不能出现括号)
        :param is_del_clip: 合并视频完成后是否删除原片段
        :param test_download_num: 测试下载视频数量
        :param retry_count: 单个视频片段下载失败重试次数
        :param thread_num: 下载线程数
        :param dec_func: m3u8内容解密函数(内容被加密时可传入解密函数,或直接将解密后的明文内容传递给参数m3u8_content_plaintext)
        :param m3u8_content_plaintext: 已解密的m3u8明文内容
        """
        self.m3u8_url = m3u8_url
        self.download_path = download_path
        self.cache_path = None         # 临时缓存路径
        self.video_name = video_name or str(int(time.time()))
        self.video_name_suffix = '.mp4'  # 文件类型后缀
        self.is_del_clip = is_del_clip
        self.test_download_num = test_download_num
        self.retry_count = retry_count
        self.thread_num = min(thread_num, 50)
        self.max_merge_num = 500       # 单次合并文件最大数量
        self.dec_func = dec_func
        self.m3u8_content_plaintext = m3u8_content_plaintext
        self.key_url = None
        self.key = None
        self.iv = None
        self.decipher = None
        self.video_clip_list = []      # 视频片段名称列表
        self.total_duration = 0        # 视频总时间(分钟)
        self.total_video_clip_num = 0  # 视频片段数量
        self.download_num = 0          # 已下载数量
        self.total_download_size = 0   # 总下载大小
        self.is_special_link = False   # 视频片段链接未带后缀(例`.ts`)时为True,一般出现在m3u8内容被加密的视频网站

    def fetch(self, url, binary=False):
        resp = requests.get(url, headers=self.headers, timeout=30, verify=False)
        status_code = resp.status_code
        if status_code != 200:
            raise Exception(f'请求失败({status_code}):{url}')
        if binary:
            return resp.content
        return resp.content.decode()

    @staticmethod
    def deal_video_name(video_name: str):
        if '-' in video_name:
            video_name = video_name[:video_name.find('-')]
        video_name = video_name.strip()
        video_name = re.sub(r'\s+', '-', video_name)
        video_name = re.sub(r'(在线观看|免费在线观看|在线播放|免费在线播放|正在播放|超前点播|\(|\)|\.)', '', video_name)
        if video_name.startswith('《') and video_name.endswith('》'):
            video_name = video_name.strip('《》')
        return video_name

    def get_m3u8_content(self):
        """获取m3u8内容"""
        logger.info(f'M3U8链接:{self.m3u8_url}')
        try:
            m3u8_content = self.fetch(self.m3u8_url)
        except Exception as e:
            raise Exception(f'获取m3u8内容失败({self.m3u8_url}):{repr(e)}')

        # 如果内容被加密,需要通过传入的解密函数进行解密
        if self.dec_func:
            try:
                m3u8_content = self.dec_func(m3u8_content)
            except Exception as e:
                raise Exception(f'解密m3u8内容失败({self.m3u8_url}):{repr(e)}')

        if '#EXTM3U' not in m3u8_content:
            raise Exception(f'错误的M3U8信息,请确认链接是否正确:{self.m3u8_url}<{m3u8_content}>')
        if '#EXT-X-STREAM-INF' in m3u8_content:
            m3u8_url_list = [line for line in m3u8_content.split('\n') if line.find('.m3u8') != -1]
            if len(m3u8_url_list) > 1:
                logger.info(f'发现{len(m3u8_url_list)}个m3u8地址:{m3u8_url_list}')
            self.m3u8_url = urljoin(self.m3u8_url, m3u8_url_list[0])
            return self.get_m3u8_content()
        # logger.info(f'M3U8内容已获取完成:{self.m3u8_url}')
        return m3u8_content

    def parse_m3u8_info(self, m3u8_content):
        """解析m3u8文件:获取解密key、iv、视频url列表"""
        all_lines = m3u8_content.strip('\n').split('\n')
        is_updated_base_url = False
        is_exist_clip = False

        for index, line in enumerate(all_lines):
            if '#EXT-X-KEY' in line:
                # 避免重复解析key与iv
                if not (self.key_url and self.iv):
                    method, key_url_part, self.iv = self.parse_ext_x_key(line)
                    self.key_url = urljoin(self.m3u8_url, key_url_part)
                    logger.info(f'视频已加密:{method}  Key地址:{key_url_part}')
            elif '#EXTINF' in line:
                for i in range(5):
                    _index = index + i + 1
                    # 过滤标签
                    if not all_lines[_index].startswith('#'):
                        next_line = all_lines[_index].rstrip()
                        break
                else:
                    raise Exception('未发现有效的下载链接')
                if not is_updated_base_url:
                    is_exist_clip = True
                    is_updated_base_url = True
                    if next_line.startswith('http') or next_line.startswith('/'):
                        suffix = next_line.rsplit('/', 1)[-1]
                        if '.ts' in suffix or '.' in suffix:
                            # 将下载地址更新到m3u8_url
                            self.m3u8_url = urljoin(self.m3u8_url, next_line)
                        else:
                            self.m3u8_url = next_line[:next_line.rfind('/', 0, next_line.rfind('/')) + 1]
                            self.is_special_link = True
                        logger.debug(f'视频下载主地址已更新:{self.m3u8_url.rsplit("/", 1)[0]}')
                # 计算视频总时长
                duration_str = line.split(':')[-1].rstrip()
                try:
                    self.total_duration += float(duration_str[:-1])
                except ValueError:
                    pass
                # 添加视频到视频片段名称列表
                if self.is_special_link:
                    clip_name = next_line[next_line.rfind('/', 0, next_line.rfind('/')) + 1:].replace('/', '@@') + '.ts'
                    self.video_clip_list.append(clip_name)
                else:
                    clip_name = next_line.rsplit('/', 1)[-1]
                    self.video_clip_list.append(clip_name)
        if not is_exist_clip:
            raise Exception('未发现视频下载链接')
        self.total_duration = int(self.total_duration) // 60 + 1
        self.total_video_clip_num = len(self.video_clip_list)
        logger.info(f'M3U8内容解析已完成,视频片段数量:{self.total_video_clip_num},视频时长:{self.total_duration}分钟,下载主地址:{self.m3u8_url.rsplit("/", 1)[0]}')

    @staticmethod
    def parse_ext_x_key(ext_x_key: str) -> (str, str, bytes):
        """解析#EXT-X-KEY中的key链接与iv"""
        ret = re.search(r'METHOD=(.*?),URI="(.*?)"(?:,IV=(\w+))?', ext_x_key)
        method, key_url, iv = ret.groups()
        iv = iv.replace('0x', '')[:16].encode() if iv else b''
        return method, key_url, iv

    def get_key(self):
        try:
            self.key = self.fetch(self.key_url, binary=True)
        except Exception as e:
            raise Exception(f'获取key失败({self.key_url}):{repr(e)}')
        logger.info(f'key解析已完成:{self.key}  iv:{self.iv or "无"}')

    def init_decipher(self):
        self.decipher = AES.new(self.key, AES.MODE_CBC, self.iv or self.key[:16])

    def download_all_videos(self):
        # 重试时重新初始化已下载数量
        if self.cache_path:
            self.download_num = 0
        else:
            # 默认保存在用户目录下的Downloads/videos文件夹内
            if self.download_path is None:
                self.download_path = os.path.join(os.path.expanduser('~'), 'Downloads')
            self.download_path = os.path.join(self.download_path, 'Videos')
        if not os.path.exists(self.download_path):
            os.makedirs(self.download_path)
        file_list = os.listdir(self.download_path)
        if f'{self.video_name}{self.video_name_suffix}' in file_list or f'{self.video_name}.ts' in file_list:
            logger.info(f'视频已经存在:{self.video_name}')
            return
        logger.info(f'视频保存目录:{self.download_path}')
        # 临时缓存目录
        if not self.cache_path:
            self.cache_path = os.path.join(self.download_path, datetime.now().strftime('%Y%m%d'))
        if not os.path.exists(self.cache_path):
            os.makedirs(self.cache_path)

        # 测试下载部分视频
        if self.test_download_num > 0:
            self.video_clip_list = self.video_clip_list[:self.test_download_num]
            logger.info(f'当前为测试模式,设置下载视频片段数量:{self.test_download_num}')

        logger.info(f'即将开始下载视频:{self.video_name}{self.video_name_suffix}')
        start_time = int(time.time())

        # 协程池
        pool = Pool(self.thread_num)
        for clip in self.video_clip_list:
            pool.add(gevent.spawn(self.download_decode_save_video, clip))
        pool.join()

        # 线程池
        # from concurrent.futures.thread import ThreadPoolExecutor
        # with ThreadPoolExecutor(max_workers=self.thread_num) as pool:
        #     pool.map(self.download_decode_save_video, self.video_clip_list)

        spend_time = int(time.time()) - start_time
        logger.info(f'下载视频耗时:{spend_time}秒')

    def download_decode_save_video(self, clip):
        """下载、解码、保存视频"""
        url = urljoin(self.m3u8_url, clip)
        # 删除文件名中的参数部分,但url中的参数不能少
        clip = clip.split('?')[0]
        full_path_filename = os.path.join(self.cache_path, clip)
        if os.path.exists(full_path_filename):
            self.download_num += 1
            logger.debug(f'视频片段已存在({self.download_num}):{clip}')
            return

        if self.is_special_link:
            url = url.replace('@@', '/').replace('.ts', '')

        # 下载单个视频
        raw_data = self.download_single_video(url)
        # 解码视频
        data = self.decode_video_clip(clip, raw_data)
        # 保存视频
        self.save_video_clip(clip, full_path_filename, data)

    def download_single_video(self, url):
        status_code = 0
        for i in range(self.retry_count):
            try:
                response = requests.get(url, headers=self.headers, timeout=30, verify=False)
            except Exception as e:
                if i == self.retry_count - 1:
                    raise Exception(f'下载失败({url}):{repr(e)}')
            else:
                status_code = response.status_code
                if status_code == 200:
                    data = response.content
                    break
                time.sleep(0.3)
        else:
            raise Exception(f'多次尝试下载失败({url}):{status_code}')
        return data

    def decode_video_clip(self, clip, data):
        if self.decipher is not None:
            try:
                data = self.decipher.decrypt(data)
            except Exception as e:
                raise Exception(f'数据解密失败({clip}):{repr(e)}<{len(data)}>')
        return data

    def save_video_clip(self, filename, full_path_filename, data):
        with open(full_path_filename, 'wb') as f:
            f.write(data)
        file_size = len(data)
        self.total_download_size += file_size
        self.download_num += 1
        file_size_m = round(file_size / float(1024*1024), 2)
        total_download_size_m = round(self.total_download_size/float(1024*1024), 2)
        total_num = self.test_download_num if 0 < self.test_download_num < self.total_video_clip_num else self.total_video_clip_num
        remainder = total_num - self.download_num
        logger.debug(f'已完成({self.download_num})-剩余({remainder}):{filename} <{file_size_m:0<4}M - {total_download_size_m}M>')

    def win_merge(self):
        """Windows平台合并视频"""
        cur_path = os.getcwd()
        os.chdir(self.cache_path)
        merge_num = 1
        merge_video_list = []
        start_index, end_index = 0, self.max_merge_num
        while 1:
            cur_merge_list = [clip.split('?')[0] for clip in self.video_clip_list[start_index:end_index]]
            if not cur_merge_list:
                video_filename = f'{self.video_name}{self.video_name_suffix}'
                if not merge_video_list:
                    logger.error('视频合并失败')
                    os.chdir(cur_path)
                    return False
                elif len(merge_video_list) == 1:
                    os.rename(merge_video_list[0], video_filename)
                    if self.is_del_clip:
                        os.system('del /Q *.ts*')
                    if self.is_special_link:
                        os.rename(video_filename, video_filename.replace(self.video_name_suffix, '.ts'))
                    os.chdir(cur_path)
                    video_filename = self.move_del_file(video_filename)
                    logger.info(f'视频合并已全部完成:{video_filename}')
                else:
                    status = os.system(f"copy /b {'+'.join(merge_video_list)} {video_filename} >> merge.log")
                    if status == 0:
                        if self.is_del_clip:
                            os.system('del /Q *.ts*')
                        if self.is_special_link:
                            os.rename(video_filename, video_filename.replace(self.video_name_suffix, '.ts'))
                        os.chdir(cur_path)
                        video_filename = self.move_del_file(video_filename)
                        logger.info(f'视频合并已全部完成:{video_filename}')
                    else:
                        os.chdir(cur_path)
                        logger.error(f'最后一次合并失败:{merge_video_list}')
                return True
            cur_video_name = f'{self.video_name}_temp{merge_num}.ts'
            cmd_name = '+'.join(cur_merge_list)
            status = os.system(f"copy /b {cmd_name} {cur_video_name} >> merge.log")
            if status == 0:
                merge_num += 1
                start_index, end_index = end_index, end_index + self.max_merge_num
                merge_video_list.append(cur_video_name)
                logger.info(f'本次合并{len(cur_merge_list)}个视频完成:{cur_video_name}')
            else:
                logger.error('视频合并失败')
                os.chdir(cur_path)
                return False

    def linux_merge(self):
        """Linux或MacOS平台合并视频(需要使用ffmpeg)"""
        video_file_list = [os.path.join(self.cache_path, filename.split('?')[0]) for filename in self.video_clip_list]
        # 将video路径并合成一个字符参数
        file_argv = '|'.join(video_file_list)
        # 指定输出文件名称
        mp4_filename = os.path.join(self.cache_path, f'{self.video_name}{self.video_name_suffix}')
        # 调取系统命令使用ffmpeg将ts合成mp4文件
        cmd = f'ffmpeg -i "concat:{file_argv}" -c copy {mp4_filename}'
        status = os.system(cmd)
        if status == 0:
            # 删除原ts文件
            if self.is_del_clip:
                os.system(f'rm {os.path.join(self.cache_path, "*.ts*")}')
            if self.is_special_link:
                os.rename(mp4_filename, mp4_filename.replace(self.video_name_suffix, '.ts'))
            mp4_filename = self.move_del_file(mp4_filename)
            logger.info(f'视频已全部合并完成:{mp4_filename}')
            return True
        else:
            logger.error('视频合并失败')
            return False

    def move_del_file(self, video_filename):
        """移动文件并删除临时文件夹"""
        if self.is_special_link:
            video_filename = video_filename.replace(self.video_name_suffix, '.ts')
        shutil.move(os.path.join(self.cache_path, video_filename), self.cache_path.rsplit(os.sep, 1)[0])
        shutil.rmtree(self.cache_path)
        return video_filename

    def merge_video_file(self):
        """合并视频片段"""
        if self.test_download_num == 0:
            total_video_clip_num = self.total_video_clip_num
        else:
            if self.test_download_num < self.total_video_clip_num:
                total_video_clip_num = self.test_download_num
            else:
                total_video_clip_num = self.total_video_clip_num

        if self.download_num != total_video_clip_num:
            logger.error(f'视频信息不完整,取消合并:{self.download_num}-{total_video_clip_num}')
            return False
        logger.info(f'视频已全部下载完成,即将合并{self.download_num}个视频...')

        # 根据系统选择相应的合并方式
        sys_info = platform.system()
        if 'Windows' in sys_info:   # Windows
            status = self.win_merge()
        elif 'Linux' in sys_info:   # Linux
            status = self.linux_merge()
        elif 'Darwin' in sys_info:  # MacOS
            status = self.linux_merge()
        else:
            raise Exception(f'其它系统信息:{sys_info}')
        return status

    def start(self):
        # 1.获取m3u8内容
        m3u8_content = self.m3u8_content_plaintext or self.get_m3u8_content()

        # 2.解析m3u8内容
        self.parse_m3u8_info(m3u8_content)

        if not self.video_clip_list:
            logger.error('解析未发现有效的视频片段')
            return

        # 3.如果存在加密,获取解密key,并初始化解密器
        if self.key_url:
            self.get_key()
            self.init_decipher()

        # 下载/合并失败或视频片段不完整时重试3次
        for _ in range(3):
            # 4.下载视频
            self.download_all_videos()
            if self.download_num == 0:
                return

            # 5.合并视频
            if self.merge_video_file():
                break


def parse_args():
    """获取命令行参数信息"""
    arg_parser = argparse.ArgumentParser(description='========== M3U8下载器 ==========')
    arg_parser.add_argument('url', help='m3u8地址')
    arg_parser.add_argument('-p', '--path', help='下载路径')
    arg_parser.add_argument('-n', '--name', help='视频名称')
    arg_parser.add_argument('-c', '--count', type=int, help='测试下载视频片段数量', default=0)
    args = arg_parser.parse_args()
    return args.url, args.path, args.name, args.count


def download(m3u8_url, download_path=None, custom_video_name=None, test_download_num=0, m3u8_content=None):
    """
    :param m3u8_url: m3u8链接
    :param download_path: 下载路径
    :param custom_video_name: 自定义视频名称
    :param test_download_num: 测试下载数量(为0时下载全部)
    :param m3u8_content: m3u8明文内容
    :return:
    """
    if not (m3u8_url and m3u8_url.startswith('http')):
        logger.error(f'url不正确:{m3u8_url}')
        return

    if '.mp4' in m3u8_url:
        logger.error(f'当前为mp4链接(暂不支持下载):{m3u8_url}')
        return

    # 下载视频
    downloader = M3u8VideoDownloader(m3u8_url=m3u8_url,
                                     download_path=download_path,
                                     video_name=custom_video_name,
                                     test_download_num=test_download_num,
                                     m3u8_content_plaintext=m3u8_content)
    try:
        downloader.start()
    except Exception as e:
        logger.exception(f'视频下载失败({repr(e)}):{m3u8_url}')


if __name__ == '__main__':
    # 重点提醒:此链接必须是m3u8地址,而非视频播放地址!!!
    m3u8_url = 'https://baikevideo.cdn.bcebos.com/media/mda-Ogtg6GwqTr85eadR/ebf6891cf5cccea366d510589ff04edc.m3u8'

    # 以下3项为可选参数
    download_path = None
    video_title = None
    test_num = 0  # 测试下载数量(为0时下载全部)

    # 如果在命令行执行,请将m3u8_url设置为None,并根据提示传入相关参数
    if not m3u8_url:
        m3u8_url, download_path, video_title, test_num = parse_args()

    download(m3u8_url, download_path, video_title, test_num)


脚本仅用于技术学习与研究,请勿用于任何非法用途,否则后果自负,本作者不承担任何法律责任。


原创文章,转载请注明出处,谢谢!https://blog.csdn.net/weixin_36381802/article/details/113694338

你可能感兴趣的:(Python,python)