python 实现多线程下载m3u8格式视频,使用FFmpeg合并(升级修订自s_kangkang_A)

基本代码源自:

https://blog.csdn.net/s_kangkang_A/article/details/103071822。

感谢 s_kangkang_A

https://blog.csdn.net/s_kangkang_A

在此基础上继续进行了完善:

1、调整了 baseurl 的获取方式,直接从 m3u8 文件的路径中获取 baseurl,这样就把 所有参数 都可以拿到代码外面 进行输入,而无需 每次改代码了

2、改用了eventlet 绿色线程,提高了效率和网络速度

3、增加了 m3u8 的加密处理

4、修订了 存盘文件 名称,可以解决一些 网站使用特殊文件名的问题。

5、修订了 Queue 中的 轮空 的判定方法,改成了 标准 样式。原作的判定方法,曾经用过,特殊条件下会出错的。

 

还在继续努力的,设法能 直接 render 输入网址,自动得到 m3u8 文件,这样就是 全自动了。

需要用 python 3.7以上运行,否则requests就不能正常再 eventlet 中工作。

另外:eventlet 真的是个极品的 "线程"库。已经用了好多年。

# -*- coding: utf-8 -*-
from __future__ import print_function
from __future__ import absolute_import

import datetime
import os, sys
import re
import threading
#from eventlet.green import threading
import time
from urllib.parse import urlparse
#import requests
import eventlet

if sys.version_info.major < 3:
    import Queue
    import ConfigParser as configparser
else:   
    import queue as Queue
    import configparser

requests = eventlet.import_patched('requests')
 
# 预下载,获取m3u8文件,读出ts链接,并写入文档
def down(headers, url, base_url):
    m3u8_dirname = os.path.dirname (url)
    m3u8_urlp = urlparse (url)
    # 当ts文件链接不完整时,需拼凑
    resp = requests.get(url, headers=headers)
    m3u8_text = resp.text
    # print(m3u8_text)
    # 按行拆分m3u8文档
    ts_queue = eventlet.queue.Queue()
    #ts_queue = eventlet.queue.LifoQueue ()
    #ts_queue = Queue(10000)
    lines = m3u8_text.split('\n')
    s = len(lines)
    # 找到文档中含有ts字段的行
    #concatfile = 'cache/' + "s" + '.txt'
    concatfile = 'cache/' + "decode" + '.m3u8'
    if os.path.exists(concatfile):
        os.remove(concatfile)
    s_count = 1
    for i,line in enumerate(lines):
        #if len(line) >=3 and line[-3:] == '.ts':
        if '.ts' in line:
            if 'http' in line:
                # print("ts>>", line)
                http_line = line
                pass
            else:
                path = os.path.dirname (line)
                if len(path) == 0:
                    http_line = m3u8_dirname + '/' + line
                else:
                    http_line = m3u8_urlp.scheme + '://' + m3u8_urlp.netloc + '' + line
                    #line = base_url + line
            #filename = re.search('([a-zA-Z0-9-_]+.ts)', line).group(1).strip()
            #filename = os.path.basename (line)
            filename = str(s_count).zfill(10)+'.ts'
            if not os.path.exists('cache/' +filename):
                #print ("  Add ", filename)
                #ts_queue.put(line)
                ts_queue.put ((filename, http_line, 0))
            else:
                #print ("  Had ", filename)
                pass
            # print('ts>>',line)

            # 一定要先写文件,因为线程的下载是无序的,文件无法按照
            # 123456。。。去顺序排序,而文件中的命名也无法保证是按顺序的
            # 这会导致下载的ts文件无序,合并时,就会顺序错误,导致视频有问题。
            #open(concatfile, 'a+').write("file %s\n" % filename)
            open(concatfile, 'a+').write("%s\n" % filename)
            #print("\r", '文件写入中', i, "/", s, end="", flush=True)
            s_count += 1
            print("\r", '写入中', s_count, "/", s, http_line, end="", flush=True)
        else:
            # 若发现了 加密 key,则把 key 本地化
            key_re = re.search ("(URI=\".*\.key\")", line)
            if key_re != None:
                key_url = key_re.group(1).strip()
                key_url = key_url[5:-1]
                path = os.path.dirname (key_url)
                if len(path) == 0:
                    http_key = m3u8_dirname + '/' + key_url
                else:
                    http_key = m3u8_urlp.scheme + '://' + m3u8_urlp.netloc + '' + key_url

                key_line = line[:key_re.start()+5] + "key.key" + line[key_re.end()-1:]
                print (line, key_url, http_key, key_line,"\n")

                key_r = requests.get(http_key, stream=True, headers=headers, timeout=(15, 60), verify=True)
                with open('cache/key.key', 'wb') as fp:
                    for chunk in key_r.iter_content(5242):
                        if chunk:
                            fp.write(chunk)
                open(concatfile, 'a+').write(key_line+"\n")
            else:                
                open(concatfile, 'a+').write(line+"\n")
    return ts_queue, concatfile
 
# 线程模式,执行线程下载
def run(ts_queue, headers, pool):
    while True:
        try:
            #url, sleepTime = ts_queue.get (True, 0.5)
            filename, url, sleepTime = ts_queue.get (True, 0.5)
        except Queue.Empty :
            break

        if sleepTime > 0:
            eventlet.sleep (sleepTime)
        #filename = re.search('([a-zA-Z0-9-_]+.ts)', url).group(1).strip()
        #filename = os.path.basename (url)
        requests.packages.urllib3.disable_warnings()

        try:
            r = requests.get(url, stream=True, headers=headers, timeout=(15, 60), verify=False)
            r.raise_for_status ()
            with open('cache/' + filename, 'wb') as fp:
                for chunk in r.iter_content(5242):
                    if chunk:
                        fp.write(chunk)
            print("\r", '任务文件 ', filename, ' 下载成功', pool.running(), ts_queue.qsize(), end="         ", flush=True)
        except Exception as exc:
            print( '任务文件 ', filename, ' 下载失败, 代码:', exc)
            ts_queue.put((filename, url, 5))
            #eventlet.sleep (2)
    #return True
 
# 视频合并方法,使用ffmpeg
def merge(concatfile, name):
    try:
        #path = 'cache/' + name + '.mp4'
        path = 'e:/rm/80s/' + name + '.mp4'
        # command = 'ffmpeg -y -f concat -i %s -crf 18 -ar 48000 -vcodec libx264 -c:a aac -r 25 -g 25 -keyint_min 25 -strict -2 %s' % (concatfile, path)
        command  = "ffmpeg -allowed_extensions ALL -protocol_whitelist \"file,http,crypto,tcp\" "
        #command += ' -y -f concat -i %s -bsf:a aac_adtstoasc -c copy %s' % (concatfile, path)
        command += ' -y -i %s -bsf:a aac_adtstoasc -c copy %s' % (concatfile, path)
        print (command)
        os.system(command)
        print('视频合并完成')
    except:
        print('合并失败')

def remove():
    dir = 'cache/'
    """
    #for line in open('cache/s.txt'):
    for line in open('cache/decode.m3u8'):
        #line = re.search('file (.*?ts)', line).group(1).strip()
        line = re.search('(.*?ts)', line).group(1).strip()
        # print(line)
        os.remove(dir + line)
    print("ts文件全部删除")
    try:
        os.remove('cache/s.txt')
        print('文件删除成功')
    except:
        print('文件删除失败')
    """
    command = "del " + dir + "*/Q"
    os.system(command)

# headers 和 base_url 必须根据实际网站 !手动 ! 设置
if __name__ == '__main__':
    # 测试用链接:https://yiyi.55zuiday.com/ppvod/70B5A6E3A150A99882E28EC793CAF519.m3u8 
    # 链接电影:地球最后的夜晚
    # https://youku.com-ok-sohu.com/20191110/20128_fd24c5a9/1000k/hls/61033a1fdc2000000.ts
    #base_url = 'https://yiyi.55zuiday.com/'
    base_url = 'http://cn5.download05.com/hls/20190721/e7339c09fff1c6c817c2b3cfad9b1d39/1563674290/film_00000.ts'
    headers = {
        #'referer': 'https://yiyi.55zuiday.com/share/wVuAcJFy1tMy4t0x',
        'referer': 'http://www.douying99.com/play/47309_m3u8_0.html',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36'
    }

    requests.adapters.DEFAULT_RETRIES = 5

    name = input('请输入视频名称:')
    headers['referer'] = input('请输入网页链接:').strip()
    url = input('请输入视频m3u8链接:').strip()

    start = datetime.datetime.now().replace(microsecond=0)
    print("目录文件开始写入")
    s, concatfile = down(headers, url, base_url)
    print('\n')
    print("目录文件写入结束")
    # 获取队列元素数量
    t_num = s.qsize()
    # 根据数量来开线程数,每五个元素一个线程
    # 最大开到50个
    print("下载任务开始")
    """
    if num > 5:
        t_num = num // 5
    else:
        t_num = 1
    """
    if t_num > 60:
        t_num = 60
    # print(s,concatfile)

    pool = eventlet.GreenPool(t_num)
    run_args={'ts_queue': s, 'headers': headers, 'pool': pool}
    for i in range(t_num):
        pool.spawn_n (run, **run_args)
    pool.waitall()
    """
    threads = []
    for i in range(t_num):
        t = threading.Thread(target=run, name='th-' + str(i), kwargs={'ts_queue': s, 'headers': headers})
        t.setDaemon(True)
        threads.append(t)
    for t in threads:
        time.sleep(0.4)
        t.start()
    for t in threads:
        t.join()
    """
    print('\n')
    print("下载任务结束")
    end = datetime.datetime.now().replace(microsecond=0)
    print('写文件及下载耗时:' + str(end - start))
    merge(concatfile, name)
    remove()
    over = datetime.datetime.now().replace(microsecond=0)
    print('合并及删除文件耗时:' + str(over - end))
    print("所有任务结束 ", name)
    print('任务总时长:', over - start)

 

你可能感兴趣的:(python 实现多线程下载m3u8格式视频,使用FFmpeg合并(升级修订自s_kangkang_A))