run.py
from tkinter import *
from tkinter import ttk
from tkinter import messagebox
import os
import threading
from spider_douyin_video import Spider
import re
"""
test_url
https://v.douyin.com/eHfEYLw/ #水冰月
不足:
1.加入多线程并发下载
2.没有做到边解析,边下载
3.签名稳定性未知
"""
class App:
def __init__(self):
self.base_dir = './video/'
self.my_sig = 'JKc3lQAARLjMfRXooRa1gSSnN4'
self.sig_item = {'签名1': '_02B4Z6wo00f012vHFNAAAIDC4IyPMWxITgNr5xBAALnu25',
'签名2': "_02B4Z6wo00f012vHFNAAAIDC4IyPMWxITgNr5xBAALnu25"}
self.create_widget()
self.set_widget()
self.place_widget()
self.window.mainloop()
def create_widget(self):
self.window = Tk()
self.window.title('Douyin_User_Video_Downloader-v1.0')
width = 600
height = 700
screen_width = self.window.winfo_screenwidth()
screen_height = self.window.winfo_screenheight()
left = (screen_width - width) / 2
top = (screen_height - height) / 2
self.window.geometry("%dx%d+%d+%d" % (width, height, left, top))
self.window.resizable(0, 0)
self.l1 = ttk.Label(self.window, text='请输入用户主页链接地址:')
self.e1 = ttk.Entry(self.window, width=90)
self.l2 = ttk.Label(self.window, text='当前状态:')
self.t1 = Text(self.window, width=80, height=30)
self.l3 = ttk.Label(self.window, text='当前下载进度:')
self.progress = ttk.Progressbar(self.window, orient=HORIZONTAL, length=460, mode='determinate', value=0,
maximum=100)
self.l4_var = StringVar()
self.l4_var.set('0.0%[未下载]')
self.l4 = ttk.Label(self.window, textvariable=self.l4_var)
self.b1 = ttk.Button(self.window, text='开始下载', command=lambda: self.thread_it(self.download_videos))
self.m = Menu(self.window)
self.window['menu'] = self.m
def set_widget(self):
self.s1 = Menu(self.m, tearoff=False)
self.s2 = Menu(self.m, tearoff=False)
self.s3 = Menu(self.m, tearoff=False)
self.m.add_cascade(label='文件', menu=self.s1)
self.m.add_cascade(label='下载', menu=self.s2)
self.m.add_cascade(label='关于', menu=self.s3)
self.s1.add_command(label='打开文件夹', command=self.open_dir)
self.s1.add_command(label='退出', command=self.quit_window)
self.s2.add_command(label='自定义签名', command=self.set_sig)
self.s2.add_command(label='下载视频', command=lambda: self.thread_it(self.download_videos))
self.s3.add_command(label='联系作者', command=self.connect_author)
self.window.protocol('WM_DELETE_WINDOW', self.quit_window)
self.window.bind('', self.escape)
self.e1.bind('', self.enter)
def place_widget(self):
self.l1.pack(anchor="w")
self.e1.pack(anchor="w", padx=20)
self.l2.pack(anchor="w")
self.t1.pack(anchor="w", padx=20)
self.l3.pack(anchor="w")
self.progress.pack()
self.l4.pack()
self.b1.pack()
def download_videos(self):
share_link = self.e1.get()
if share_link.strip().startswith(r'https://v.douyin.com/'):
try:
if sig_:
the_sig = sig_
else:
the_sig = self.my_sig
except NameError:
the_sig = self.my_sig
spider = Spider(share_link)
whole_url = spider.get_whole_link(the_sig)
self.s2.entryconfig('下载视频', state=DISABLED)
self.b1.config(state=DISABLED)
self.t1.insert(END, '正在解析视频......\n')
spider.get_video_data(whole_url)
videos = spider.videos_list
if videos:
self.t1.insert(END, '解析完成,开始下载......\n')
for item in videos:
no = int(self.t1.index('end').split('.')[0]) - 3
new_name = ''.join(
re.findall('[\u4E00-\u9FA5\s]+', item["video_title"])) # 去除标题中的表情字符,因为表情字符插入到Text控件中会产生TclError
self.t1.insert(END, f'[{no}]{new_name}.mp4')
self.t1.see(END)
for progress, speed in spider.download_video(self.base_dir, item):
self.l4_var.set(f'进度:%.1f%% 速度:%s' % (progress, speed))
self.progress['value'] = int(progress)
self.progress.update()
self.t1.insert(END, f' ----->完成\n')
self.t1.insert(END, f'所有视频下载完成!\n')
self.b1.config(state=NORMAL)
self.s2.entryconfig('下载视频', state=NORMAL)
else:
messagebox.showerror('错误', '没有解析出视频,请检查签名是否可用!')
else:
messagebox.showwarning('警告', '请输入正确的分享链接!')
self.e1.delete(0, END)
def open_dir(self):
try:
os.makedirs(self.base_dir)
except:
pass
abs_path = os.path.abspath(self.base_dir)
# 使用绝对路径打开文件夹
os.startfile(abs_path)
def quit_window(self):
ret = messagebox.askyesno('提示', '是否要退出?')
if ret == True:
self.window.destroy()
def escape(self, event):
self.quit_window()
def connect_author(self):
messagebox.showinfo('联系作者', '作者QQ:懷淰メ')
def e1_clear(self):
self.e1.delete(0, END)
def enter(self, event):
self.thread_it(self.download_videos)
def set_sig(self):
def set_the_sig():
try:
global sig_
sig_ = self.sig_item[combobox.get()]
messagebox.showinfo('提示', '签名设置成功!')
self.set_sig_window.destroy()
except KeyError:
def_sig = self.sig_window_combox_var.get()
if len(def_sig) != 0:
messagebox.showinfo('提示', '自定义签名设置成功!')
else:
messagebox.showwarning('警告', '自定义签名失败,将使用默认签名!')
self.set_sig_window.destroy()
self.set_sig_window = Toplevel(self.window)
set_sig_window_width = 200
set_sig_window_height = 50
set_sig_window_screen_left = self.set_sig_window.winfo_screenwidth()
set_sig_window_screen_top = self.set_sig_window.winfo_screenheight()
set_sig_window_left = (set_sig_window_screen_left - set_sig_window_width) / 2
set_sig_window_top = (set_sig_window_screen_top - set_sig_window_height) / 2
self.set_sig_window.geometry(
'%dx%d+%d+%d' % (set_sig_window_width, set_sig_window_height, set_sig_window_left, set_sig_window_top))
self.sig_window_combox_var = StringVar()
combobox = ttk.Combobox(self.set_sig_window, textvariable=self.sig_window_combox_var,
value=[item for item in self.sig_item])
combobox.current(0)
combobox.pack(side='left', )
s_b1 = ttk.Button(self.set_sig_window, text='设置', command=set_the_sig)
s_b1.pack(side='left')
self.set_sig_window.protocol('WM_DELETE_WINDOW', self.sig_window_show_warn)
def sig_window_show_warn(self):
messagebox.showwarning('警告', '签名未更改,将使用默认签名!')
self.set_sig_window.destroy()
def thread_it(self, func, *args):
t = threading.Thread(target=func, args=args)
self.window.update()
t.setDaemon(True) # 设置守护,主线程结束,子线程结束
t.start()
if __name__ == '__main__':
App()
spider_douyin_video.py
import requests
import re
from urllib.parse import urlencode
import json
import time
from requests.adapters import HTTPAdapter
import os
class Spider(object):
def __init__(self, share_link):
self.link = share_link
self.videos_list = list()
def get_whole_link(self, sig):
# sig = 'JKc3lQAARLjMfRXooRa1gSSnN4'
# sig = '1rexVRAciIE-bZMoZ46qv9a3sU' #示例签名2
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Mobile Safari/537.36'
}
r = requests.get(self.link, headers=headers, allow_redirects=False)
# sec_uid由大小写字母和数字组成
sec_uid = ''.join(re.findall('sec_uid=(\w+)&', r.text))
api_url = 'https://www.iesdouyin.com/web/api/v2/aweme/post/?'
params = {
'sec_uid': sec_uid,
'count': 21,
'max_cursor': 0,
'aid': 1128,
'_signature': sig,
'dytk': ''
}
# 拼凑出完整url
whole_url = api_url + urlencode(params)
return whole_url
# 访问视频list接口,获取视频列表
def get_video_data(self, whole_url):
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'upgrade-insecure-requests': '1',
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1',
}
r = requests.get(whole_url, headers=headers)
_json = json.loads(r.text)
next_cursor = _json.get('max_cursor')
try:
aweme_list = _json['aweme_list']
if len(aweme_list) == 0:
return self.videos_list
else:
self.author_name = aweme_list[0].get('author').get('nickname')
for data in aweme_list:
item = {}
# id值
video_id = data['video'].get('vid')
# 视频简介
item['video_title'] = data['desc']
item['download_link'] = f'https://aweme.snssdk.com/aweme/v1/play/?video_id={video_id}'
self.videos_list.append(item)
if next_cursor != 0:
# !!!绕弯了半天!!!
next_url = re.sub(r'max_cursor=\d+', f'max_cursor={next_cursor}', whole_url)
self.get_video_data(next_url)
else:
return self.videos_list
except KeyError:
pass
def download_video(self, base_dir, item):
start_time = time.time()
# 自定义分隔符 *!;-(
pre_filename = item['video_title']
# 去除文件命名中不允许出现的特殊字符
filename = re.sub('\?|?|/|、|(\*)|"|(\|)|<|>|:', '_', pre_filename)
# new_name=''.join(re.findall(r'[\u4E00-\u9FA5\s]+',filename))#去除标题中的表情字符,因为表情字符插入到Text控件中会产生TclError
video_link = item['download_link']
# 通过索引获取行数
# 一定要用手机的UA
headers = {
'Connection': 'keep-alive',
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1'
}
# 由于可能会产生[WinError 10060] 由于连接方在一段时间后没有正确答复或连接的主机没有反应,连接尝试失败。'))
# 错误,所以加入了超时时间,(超时时间=请求时间+响应时间)
full_dir_path = base_dir + self.author_name + '/'
try:
os.makedirs(full_dir_path)
except:
pass
try:
s = requests.session()
s.keep_alive = False
s.mount('https://', HTTPAdapter(max_retries=5))
r = requests.get(video_link, headers=headers, stream=True, timeout=30)
file_size = int(r.headers['Content-Length'])
chunck_size = 1024
size_ = 0
with open(full_dir_path + filename + '.mp4', 'wb')as f:
for data in r.iter_content(chunck_size):
f.write(data)
size_ += len(data)
progress = float(size_ / file_size * 100) # 当前下载百分比
speed = self.format_size((size_) / (time.time() - start_time)) + '/S'
yield progress, speed
except:
pass
def format_size(self, bytes):
try:
bytes = float(bytes)
kb = bytes / 1024
except:
return "Error"
if kb >= 1024:
M = kb / 1024
if M >= 1024:
G = M / 1024
return "%.2fG" % (G)
else:
return "%.2fM" % (M)
else:
return "%.2fK" % (kb)