douyin for video

run.py

from tkinter import *
from tkinter import ttk
from tkinter import messagebox
import os
import threading
from spider_douyin_video import Spider
import re

"""
test_url
https://v.douyin.com/eHfEYLw/    #水冰月
不足:
1.加入多线程并发下载
2.没有做到边解析,边下载
3.签名稳定性未知
"""


class App:
    def __init__(self):
        self.base_dir = './video/'
        self.my_sig = 'JKc3lQAARLjMfRXooRa1gSSnN4'
        self.sig_item = {'签名1': '_02B4Z6wo00f012vHFNAAAIDC4IyPMWxITgNr5xBAALnu25',
                         '签名2': "_02B4Z6wo00f012vHFNAAAIDC4IyPMWxITgNr5xBAALnu25"}
        self.create_widget()
        self.set_widget()
        self.place_widget()
        self.window.mainloop()

    def create_widget(self):
        self.window = Tk()
        self.window.title('Douyin_User_Video_Downloader-v1.0')
        width = 600
        height = 700
        screen_width = self.window.winfo_screenwidth()
        screen_height = self.window.winfo_screenheight()
        left = (screen_width - width) / 2
        top = (screen_height - height) / 2
        self.window.geometry("%dx%d+%d+%d" % (width, height, left, top))
        self.window.resizable(0, 0)
        self.l1 = ttk.Label(self.window, text='请输入用户主页链接地址:')
        self.e1 = ttk.Entry(self.window, width=90)
        self.l2 = ttk.Label(self.window, text='当前状态:')
        self.t1 = Text(self.window, width=80, height=30)
        self.l3 = ttk.Label(self.window, text='当前下载进度:')
        self.progress = ttk.Progressbar(self.window, orient=HORIZONTAL, length=460, mode='determinate', value=0,
                                        maximum=100)
        self.l4_var = StringVar()
        self.l4_var.set('0.0%[未下载]')
        self.l4 = ttk.Label(self.window, textvariable=self.l4_var)
        self.b1 = ttk.Button(self.window, text='开始下载', command=lambda: self.thread_it(self.download_videos))
        self.m = Menu(self.window)
        self.window['menu'] = self.m

    def set_widget(self):
        self.s1 = Menu(self.m, tearoff=False)
        self.s2 = Menu(self.m, tearoff=False)
        self.s3 = Menu(self.m, tearoff=False)
        self.m.add_cascade(label='文件', menu=self.s1)
        self.m.add_cascade(label='下载', menu=self.s2)
        self.m.add_cascade(label='关于', menu=self.s3)
        self.s1.add_command(label='打开文件夹', command=self.open_dir)
        self.s1.add_command(label='退出', command=self.quit_window)
        self.s2.add_command(label='自定义签名', command=self.set_sig)
        self.s2.add_command(label='下载视频', command=lambda: self.thread_it(self.download_videos))
        self.s3.add_command(label='联系作者', command=self.connect_author)
        self.window.protocol('WM_DELETE_WINDOW', self.quit_window)
        self.window.bind('', self.escape)
        self.e1.bind('', self.enter)

    def place_widget(self):
        self.l1.pack(anchor="w")
        self.e1.pack(anchor="w", padx=20)
        self.l2.pack(anchor="w")
        self.t1.pack(anchor="w", padx=20)
        self.l3.pack(anchor="w")
        self.progress.pack()
        self.l4.pack()
        self.b1.pack()

    def download_videos(self):
        share_link = self.e1.get()
        if share_link.strip().startswith(r'https://v.douyin.com/'):
            try:
                if sig_:
                    the_sig = sig_
                else:
                    the_sig = self.my_sig
            except NameError:
                the_sig = self.my_sig
            spider = Spider(share_link)
            whole_url = spider.get_whole_link(the_sig)
            self.s2.entryconfig('下载视频', state=DISABLED)
            self.b1.config(state=DISABLED)
            self.t1.insert(END, '正在解析视频......\n')
            spider.get_video_data(whole_url)
            videos = spider.videos_list
            if videos:
                self.t1.insert(END, '解析完成,开始下载......\n')
                for item in videos:
                    no = int(self.t1.index('end').split('.')[0]) - 3
                    new_name = ''.join(
                        re.findall('[\u4E00-\u9FA5\s]+', item["video_title"]))  # 去除标题中的表情字符,因为表情字符插入到Text控件中会产生TclError
                    self.t1.insert(END, f'[{no}]{new_name}.mp4')
                    self.t1.see(END)
                    for progress, speed in spider.download_video(self.base_dir, item):
                        self.l4_var.set(f'进度:%.1f%% 速度:%s' % (progress, speed))
                        self.progress['value'] = int(progress)
                        self.progress.update()
                    self.t1.insert(END, f'  ----->完成\n')
                self.t1.insert(END, f'所有视频下载完成!\n')
                self.b1.config(state=NORMAL)
                self.s2.entryconfig('下载视频', state=NORMAL)
            else:
                messagebox.showerror('错误', '没有解析出视频,请检查签名是否可用!')
        else:
            messagebox.showwarning('警告', '请输入正确的分享链接!')
            self.e1.delete(0, END)

    def open_dir(self):
        try:
            os.makedirs(self.base_dir)
        except:
            pass
        abs_path = os.path.abspath(self.base_dir)
        # 使用绝对路径打开文件夹
        os.startfile(abs_path)

    def quit_window(self):
        ret = messagebox.askyesno('提示', '是否要退出?')
        if ret == True:
            self.window.destroy()

    def escape(self, event):
        self.quit_window()

    def connect_author(self):
        messagebox.showinfo('联系作者', '作者QQ:懷淰メ')

    def e1_clear(self):
        self.e1.delete(0, END)

    def enter(self, event):
        self.thread_it(self.download_videos)

    def set_sig(self):
        def set_the_sig():
            try:
                global sig_
                sig_ = self.sig_item[combobox.get()]
                messagebox.showinfo('提示', '签名设置成功!')
                self.set_sig_window.destroy()
            except KeyError:
                def_sig = self.sig_window_combox_var.get()
                if len(def_sig) != 0:
                    messagebox.showinfo('提示', '自定义签名设置成功!')
                else:
                    messagebox.showwarning('警告', '自定义签名失败,将使用默认签名!')
                    self.set_sig_window.destroy()

        self.set_sig_window = Toplevel(self.window)
        set_sig_window_width = 200
        set_sig_window_height = 50
        set_sig_window_screen_left = self.set_sig_window.winfo_screenwidth()
        set_sig_window_screen_top = self.set_sig_window.winfo_screenheight()
        set_sig_window_left = (set_sig_window_screen_left - set_sig_window_width) / 2
        set_sig_window_top = (set_sig_window_screen_top - set_sig_window_height) / 2
        self.set_sig_window.geometry(
            '%dx%d+%d+%d' % (set_sig_window_width, set_sig_window_height, set_sig_window_left, set_sig_window_top))
        self.sig_window_combox_var = StringVar()
        combobox = ttk.Combobox(self.set_sig_window, textvariable=self.sig_window_combox_var,
                                value=[item for item in self.sig_item])
        combobox.current(0)
        combobox.pack(side='left', )
        s_b1 = ttk.Button(self.set_sig_window, text='设置', command=set_the_sig)
        s_b1.pack(side='left')
        self.set_sig_window.protocol('WM_DELETE_WINDOW', self.sig_window_show_warn)

    def sig_window_show_warn(self):
        messagebox.showwarning('警告', '签名未更改,将使用默认签名!')
        self.set_sig_window.destroy()

    def thread_it(self, func, *args):
        t = threading.Thread(target=func, args=args)
        self.window.update()
        t.setDaemon(True)  # 设置守护,主线程结束,子线程结束
        t.start()


if __name__ == '__main__':
    App()

spider_douyin_video.py

import requests
import re
from urllib.parse import urlencode
import json
import time
from requests.adapters import HTTPAdapter
import os


class Spider(object):

    def __init__(self, share_link):
        self.link = share_link
        self.videos_list = list()

    def get_whole_link(self, sig):
        # sig = 'JKc3lQAARLjMfRXooRa1gSSnN4'
        # sig = '1rexVRAciIE-bZMoZ46qv9a3sU'    #示例签名2
        headers = {
            'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Mobile Safari/537.36'
        }
        r = requests.get(self.link, headers=headers, allow_redirects=False)
        # sec_uid由大小写字母和数字组成
        sec_uid = ''.join(re.findall('sec_uid=(\w+)&', r.text))
        api_url = 'https://www.iesdouyin.com/web/api/v2/aweme/post/?'
        params = {
            'sec_uid': sec_uid,
            'count': 21,
            'max_cursor': 0,
            'aid': 1128,
            '_signature': sig,
            'dytk': ''
        }
        # 拼凑出完整url
        whole_url = api_url + urlencode(params)
        return whole_url

    # 访问视频list接口,获取视频列表
    def get_video_data(self, whole_url):
        headers = {
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
            'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
            'pragma': 'no-cache',
            'cache-control': 'no-cache',
            'upgrade-insecure-requests': '1',
            'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1',
        }
        r = requests.get(whole_url, headers=headers)
        _json = json.loads(r.text)
        next_cursor = _json.get('max_cursor')
        try:
            aweme_list = _json['aweme_list']
            if len(aweme_list) == 0:
                return self.videos_list
            else:
                self.author_name = aweme_list[0].get('author').get('nickname')
                for data in aweme_list:
                    item = {}
                    # id值
                    video_id = data['video'].get('vid')
                    # 视频简介
                    item['video_title'] = data['desc']
                    item['download_link'] = f'https://aweme.snssdk.com/aweme/v1/play/?video_id={video_id}'
                    self.videos_list.append(item)
                if next_cursor != 0:
                    # !!!绕弯了半天!!!
                    next_url = re.sub(r'max_cursor=\d+', f'max_cursor={next_cursor}', whole_url)
                    self.get_video_data(next_url)
                else:
                    return self.videos_list
        except KeyError:
            pass

    def download_video(self, base_dir, item):
        start_time = time.time()
        # 自定义分隔符 *!;-(
        pre_filename = item['video_title']
        # 去除文件命名中不允许出现的特殊字符
        filename = re.sub('\?|?|/|、|(\*)|"|(\|)|<|>|:', '_', pre_filename)
        # new_name=''.join(re.findall(r'[\u4E00-\u9FA5\s]+',filename))#去除标题中的表情字符,因为表情字符插入到Text控件中会产生TclError
        video_link = item['download_link']
        # 通过索引获取行数
        # 一定要用手机的UA
        headers = {
            'Connection': 'keep-alive',
            'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1'
        }
        # 由于可能会产生[WinError 10060] 由于连接方在一段时间后没有正确答复或连接的主机没有反应,连接尝试失败。'))
        # 错误,所以加入了超时时间,(超时时间=请求时间+响应时间)
        full_dir_path = base_dir + self.author_name + '/'
        try:
            os.makedirs(full_dir_path)
        except:
            pass
        try:
            s = requests.session()
            s.keep_alive = False
            s.mount('https://', HTTPAdapter(max_retries=5))
            r = requests.get(video_link, headers=headers, stream=True, timeout=30)
            file_size = int(r.headers['Content-Length'])
            chunck_size = 1024
            size_ = 0
            with open(full_dir_path + filename + '.mp4', 'wb')as f:
                for data in r.iter_content(chunck_size):
                    f.write(data)
                    size_ += len(data)
                    progress = float(size_ / file_size * 100)  # 当前下载百分比
                    speed = self.format_size((size_) / (time.time() - start_time)) + '/S'
                    yield progress, speed
        except:
            pass

    def format_size(self, bytes):
        try:
            bytes = float(bytes)
            kb = bytes / 1024
        except:
            return "Error"
        if kb >= 1024:
            M = kb / 1024
            if M >= 1024:
                G = M / 1024
                return "%.2fG" % (G)
            else:
                return "%.2fM" % (M)
        else:
            return "%.2fK" % (kb)

你可能感兴趣的:(douyin for video)