网址-m3u8-ts-多线程-mp4

网址-m3u8-ts-多线程-mp4

我上一个帖子的优化

#python dililidownload.py
import threading
import requests
import re
import os
from os import path
import shutil
import urllib3
import socket
import time 
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
socket.setdefaulttimeout(10)

def down_m3u8(netlink):
    global m3u8_name
    global m3u8_path
    global base_url
    m3u8_path='E:/vsc and pytraining/pytrain/dililidown/'+m3u8_name+'.m3u8'
    header={
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400'
    }
    html=requests.get(netlink,headers=header).text
    reg=r'var vid = \'(.*?)\';'
    vlinks=re.findall(reg,html)
    m3u8_url=[]
    if vlinks:
        for things in range(len(vlinks)):
            m3u8_link=vlinks[things]
            m3u8_link=m3u8_link.replace(r'%3A',r':')
            m3u8_link=m3u8_link.replace(r'%2F',r'/')
            try:
                m3u8_url=m3u8_link.replace(r'index.m3u8',r'1000k/hls/index.m3u8')#可以下载m3u8的
                print(m3u8_url)
                m3u8_file=requests.get(m3u8_url,headers=header)
                with open(m3u8_path,"wb+") as m3u8file:
                    m3u8file.write(m3u8_file.content)
                base_url=m3u8_link.replace(r'index.m3u8',r'1000k/hls/')
                time.sleep(3)
                break
            except Exception as e:
                print("异常请求:%s"%e.args)
                try:
                    m3u8_url=m3u8_link.replace(r'index.m3u8',r'800k/hls/index.m3u8')#可以下载m3u8的
                    print(m3u8_url)
                    m3u8_file=requests.get(m3u8_url,headers=header)
                    with open(m3u8_path,"wb+") as m3u8file:
                        m3u8file.write(m3u8_file.content)
                    base_url=m3u8_link.replace(r'index.m3u8',r'800k/hls/')
                    time.sleep(3)
                    break
                except:
                    print('m3u8地址暂无法获取,请检查输入链接!')
                    if things == len(vlinks)-1:
                        input('搜寻完毕,获取失败,按任意键结束。。。')
                        exit()
                    continue
        m3u8_file.close()
                
def get_ts_url(m3u8_path,base_url):#获得每一个ts地址
    urls=[]
    with open(m3u8_path,'r')as mfile:
        lines=mfile.readlines()
        for line in lines:
            if line.endswith('.ts\n'):
                urls.append(base_url+line.strip('\n'))
        mfile.close()
    return urls

def down_ts(start,end,ts_urls,d_path):#轮流下载ts
    for ts_url in ts_urls[start:end]:
        ts_name=ts_url.split('/')[-1]
        header={
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400'
        }
        try:
            response=requests.get(ts_url,headers=header,stream=True,verify=False)#stream是大文件等.iter_content再下载,verify跳过ssl
        except:
            continue
        ts_path = d_path+"/{0}".format(ts_name)
        try:
            with open(ts_path,"wb+") as file:
                for chunk in response.iter_content(chunk_size=1024):
                    if chunk:
                        file.write(chunk)
        except:
            continue
        response.close()
        time.sleep(2)
def file_walker(path):#轮询路径下文件获得名字列表
    file_list = []
    for root, dirs, files in os.walk(path): # 生成器
        for fn in files:
            p = str(root+'/'+fn)
            file_list.append(p)
    return file_list
 
def combine(d_path, combine_path, file_name):#合并获得的列表
    file_list = file_walker(d_path)
    file_path = combine_path + '\\'+file_name + '.mp4'
    with open(file_path, 'wb+') as fw:
        for i in range(len(file_list)):
 
            fw.write(open(file_list[i], 'rb').read())
    print('合成完毕')
    print('清理缓存中请等待')
    shutil.rmtree(d_path)
    os.remove(m3u8_path)
    time.sleep(5)
    print('缓存清理完毕')
def thre(file_size,num_thread,ts_urls):#多线程
    part = file_size // num_thread  # 如果不能整除,最后一块应该多几个字节
    for i in range(num_thread):
        start = part * i
        if i == num_thread - 1:   # 最后一块
            end = file_size
        else:
            end = start + part
        t = threading.Thread(target=down_ts, kwargs={'start': start, 'end': end, 'ts_urls':ts_urls, 'd_path': d_path})
        t.setDaemon(True)
        t.start()
    # 等待所有线程下载完成    
    main_thread = threading.current_thread()
    for t in threading.enumerate():
        if t is main_thread:
            continue
    t.join()

def checkfull(d_path): 
    file_lista = []#放已经下载好的名字
    for root, dirs, files in os.walk(d_path): # 生成器d_
        for fname in files:
            px = str(fname)
            file_lista.append(px)
    tsnames=get_ts_url(m3u8_path,'')#完整的名字
    global final 
    final= [ i for i in tsnames if i not in file_lista ]
    #剩下没下载的ts名字
   
    
    if final:
        for numb in range(100,0,-1):#选择线程的较优值
            if len(final) % numb < 8:
                num_thr=numb
                break
        ts_urlsfinal=[]
        for lll in final:
            ts_urlsfinal.append(base_url+lll)
        print('开始本次补偿')
        left_size=len(ts_urlsfinal)
        print('剩下{}个链接未下载'.format(left_size))
        thre(left_size,num_thr,ts_urlsfinal)
    if not final:
        print('准备合成')
        time.sleep(3)
        combine(d_path,combine_path,m3u8_name)
        print('运行完毕')
        time.sleep(3)
        exit()

if __name__ == '__main__':
    print('''
    本软件实现多线程下载m3u8并自动转化为mp4
    事先准备好一个网站的某集链接
    如果中途卡住可以直接叉叉,不用担心缓存问题
    会生成这样的文件夹:E:/vsc and pytraining/pytrain/dililidown/all
    E:/vsc and pytraining/pytrain/dililidown/tss/del
    ''')

    netlink=input('请输入一集链接:')
    m3u8_name=input('给你要下载的东西取个名字:')
    d_path='E:/vsc and pytraining/pytrain/dililidown/tss/del/'+m3u8_name
    combine_path='E:/vsc and pytraining/pytrain/dililidown/all'

    exist=os.path.exists(d_path)#缓存文件夹..
    if not exist:
        #shutil.rmtree(d_path)#删除
        os.makedirs(d_path)#创建
    exista=os.path.exists(combine_path)
    if not exista:
        os.makedirs(combine_path)



    down_m3u8(netlink)#通过链接下载m3u8并找到基础链接..
    for failtimes in range(9):
        
        try:
            print('第{}次检查文件'.format(failtimes+1))
            checkfull(d_path)

        except:
            if not final:
                break
            continue
    if final :
        print('多次尝试仍然有遗漏,请重开或手动')  

    input('任意键退出')
    

#输入例子 dilili的第9集  http://m.dlili.tv/vplay/NDc3Mi0xLTg=.html  第10集http://m.dlili.tv/vplay/NDc3Mi0xLTk=.html


你可能感兴趣的:(笔记)