给一个m3u8链接,5分钟内还一个mp4

多线程下载一个m3u8并转MP4

入门一个月照猫画虎写的,欢迎大佬指导
在python里面几乎完美运行,可是在用了pyinstaller打包为exe之后却偶尔因为报错卡壳

过程中的操作:
一、手动获取一个m3u8链接,并读出基础部分(以后会写自动读取网站上的link)
二、拼接ts地址
三、分线程
四、下载链接文件
五、拼接文件为MP4,清理缓存

#目标是多线程下载一个m3u8并转MP4
#5.24成功了,还有个问题,服务器强制关闭了链接我卡在了多线程里面出不来,如何防止检测和安全退出进程呢,其实没关系
#day5.25关闭防火墙,不然有的会连不上,还有一个问题异常之后漏了几个,漏报了一个异常
#day5.26 基本完善
#防火墙防火墙防火墙别忘关
#python m3u8tomp4.py
import threading
import requests
import re
import os
from os import path
import shutil
import urllib3
import socket
import time 
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
socket.setdefaulttimeout(15)
def get_ts_url(m3u8_path,base_url):#获得每一个ts地址
    urls=[]
    with open(m3u8_path,'r')as file:
        lines=file.readlines()
        for line in lines:
            if line.endswith('.ts\n'):
                urls.append(base_url+line.strip('\n'))
    return urls

def down_ts(start,end,ts_urls,d_path):#轮流下载ts
    for ts_url in ts_urls[start:end]:
    #for ts_url in ts_urls:
        
        ts_name=ts_url.split('/')[-1]
        #print('downloading %s'%ts_name)
        
        header={
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400'
            #'user-agent':'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Mobile Safari/537.36'
        }
       # requests.adapters.DEFAULT_RETRIES = 5
        try:
            response=requests.get(ts_url,headers=header,stream=True,verify=False)#stream是大文件等.iter_content再下载,verify跳过ssl
        except Exception as e:
            global tsurlleft
            tsurlleft.append(ts_url)
            print("异常请求:%s"%e.args)
            continue
            #return停止了循环,是之前犯的错误
            #pass之后卡住了
        ts_path = d_path+"/{0}".format(ts_name)
        with open(ts_path,"wb+") as file:
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:
                    file.write(chunk)
        response.close()
        time.sleep(2)
        #print("%s 下载完成" %ts_name)

def file_walker(path):#轮询路径下文件获得名字列表
    file_list = []
    for root, dirs, files in os.walk(path): # 生成器
        for fn in files:
            p = str(root+'/'+fn)
            file_list.append(p)
 
    #print(file_list)
    return file_list
 
def combine(d_path, combine_path, file_name):#合并获得的列表
    file_list = file_walker(d_path)
    file_path = combine_path + '\\'+file_name + '.mp4'
    with open(file_path, 'wb+') as fw:
        for i in range(len(file_list)):
 
            fw.write(open(file_list[i], 'rb').read())
    print('合成完毕')
    shutil.rmtree(d_path)
    print('缓存清理完毕')
def thre(num_thread):#多线程
    part = file_size // num_thread  # 如果不能整除,最后一块应该多几个字节
    for i in range(num_thread):
        start = part * i
        if i == num_thread - 1:   # 最后一块
            end = file_size
        else:
            end = start + part
        t = threading.Thread(target=down_ts, kwargs={'start': start, 'end': end, 'ts_urls':ts_urls, 'd_path': d_path})
        t.setDaemon(True)
        t.start()
    # 等待所有线程下载完成    
    main_thread = threading.current_thread()
    for t in threading.enumerate():
        if t is main_thread:
            continue
    t.join()

if __name__ == '__main__':
    print('''
    本软件实现多线程下载m3u8并自动转化为mp4
    事先准备好一个保存至dililidown目录下的完整m3u8和一个基础链接
    创建这样的文件夹:E:/vsc and pytraining/pytrain/dililidown/all
    E:/vsc and pytraining/pytrain/dililidown/tss/del
    ''')
    m3u8_name=input('请输入m3u8文件不带后缀的文件名:')
    m3u8_path='E:/vsc and pytraining/pytrain/dililidown/'+m3u8_name+'.m3u8'
    base_url=input('''
    请输入如下的基础链接:
    https://sina.com-h-sina.com/20180813/8954_f12018ec/1000k/hls/
    ''')
    d_path='E:/vsc and pytraining/pytrain/dililidown/tss/del'
    combine_path='E:/vsc and pytraining/pytrain/dililidown/all'


    exist=os.path.exists(d_path)
    if exist:
        shutil.rmtree(d_path)
    os.makedirs(d_path)


    ts_urls=get_ts_url(m3u8_path,base_url)
    file_size=len(ts_urls)
    tsurlleft=[]


    for number in range(100,0,-1):
        if file_size % number < 10:
            num_thread=number
            break

    thre(num_thread)

    for failtimes in range(5):
        if tsurlleft:
            print('第{}次补偿'.format(failtimes+1))
            print(tsurlleft)
            ts_urls=tsurlleft
            tsurlleft=[]
            down_ts(0,len(ts_urls),ts_urls,d_path)
            #thre(5)  filesize没改
            time.sleep(3)
            if failtimes==4:
                break
        if not tsurlleft:
            time.sleep(4)
            combine(d_path,combine_path,m3u8_name)

            break
    
    if tsurlleft:
        print('多次尝试未成功,请手动')
    print('运行完毕')
    time.sleep(4)
    input('任意键退出')


你可能感兴趣的:(笔记)