# coding=utf-8
import requests
import json
import re
import os
import sys
import time
from urllib.parse import urlparse
from contextlib import closing
from bs4 import BeautifulSoup
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
class DouYinDownloader(object):
def __init__(self):
self.headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'max-age=0',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
}
self.domain = ['www.douyin.com', 'v.douyin.com', 'www.snssdk.com',
'www.amemv.com', 'www.iesdouyin.com', 'aweme.snssdk.com']
def hello(self):
print('*' * 60)
print('\t\t抖音无水印视频下载')
print('*' * 60)
self.run()
def run(self):
self.share_url = 'https://v.douyin.com/cVKkGK/'
# self.share_url = "http://v.douyin.com/LmKj5u/"
if not self.share_url:
return self.run()
self.share_url = self.getLocation()
share_url_parse = urlparse(self.share_url)
if not share_url_parse.scheme in ['http', 'https'] or not share_url_parse.netloc in self.domain:
return self.run()
html_url = share_url_parse.scheme + "://" + share_url_parse.netloc + \
share_url_parse.path + "?" + share_url_parse.query
self.downLoader(html_url)
def downLoader(self, url):
response = requests.get(url, headers=self.headers)
bf = BeautifulSoup(response.text, 'lxml')
video = bf.find_all('video')
video_url = video[0].get('src').replace('playwm', 'play')
print(video_url)
response = requests.get(
video_url, headers=self.headers, allow_redirects=False)
print(response.headers.keys())
inputs = bf.find_all("input")
video_name = time.time()
for item in inputs:
temp = item.get('name')
if temp == 'shareDesc':
video_name = item.get('value')
break
size = 0
with closing(requests.get(video_url, headers=self.headers, stream=True, verify=False)) as response:
chunk_size = 1024
content_size = int(response.headers['content-length'])
if response.status_code == 200:
sys.stdout.write(' [文件大小]:%0.2f MB %s \n' % (
content_size / chunk_size / 1024, video_name + '.mp4'))
with open(video_name + ".mp4", "wb") as file:
for data in response.iter_content(chunk_size=chunk_size):
file.write(data)
size += len(data)
file.flush()
sys.stdout.write(' [下载进度]:%.2f%% %s' % (
float(size / content_size * 100), video_name + '.mp4 \r'))
sys.stdout.flush()
sys.stdout.write('\n')
def getLocation(self):
response = requests.get(
self.share_url, headers=self.headers, allow_redirects=False)
if 'Location' in response.headers.keys():
return response.headers['Location']
else:
return self.share_url
if __name__ == '__main__':
dy = DouYinDownloader()
dy.hello()
无水印解析php版本
https://github.com/nongcunqq/dspjx
https://github.com/ufan0/kill-douyin-watermark-online
https://github.com/iqiqiya/iqiqiya-API/tree/master/douyin
无水印下载ins and douyin
https://github.com/Neilyoz/DouYinAndInsDownloader
https://github.com/jielundong/douyin-gg
#!/usr/bin/python
# -*- coding: utf-8 -*-
try:
from BeautifulSoup import BeautifulSoup
except ImportError:
from bs4 import BeautifulSoup
import requests
import urllib.request
import urllib
import json
import re
import os
headers = {
'accept-encoding': 'deflate',
'accept-language': 'zh-CN,zh;q=0.9',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'upgrade-insecure-requests': '1',
'user-agent': "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1",
}
HEADERS = {
'user-agent': "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"}
mapCode2Name = {"0xe602": "num_", "0xe605": "num_3", "0xe606": "num_4", "0xe603": "num_1", "0xe604": "num_2",
"0xe618": "num_", "0xe619": "num_4", "0xe60a": "num_8", "0xe60b": "num_9", "0xe60e": "num_",
"0xe60f": "num_5", "0xe60c": "num_4", \
"0xe60d": "num_1", "0xe612": "num_6", "0xe613": "num_8", "0xe610": "num_3", "0xe611": "num_2",
"0xe616": "num_1", "0xe617": "num_3", "0xe614": "num_9", "0xe615": "num_7", "0xe609": "num_7",
"0xe607": "num_5", "0xe608": "num_6", "0xe61b": "num_5", \
"0xe61c": "num_8", "0xe61a": "num_2", "0xe61f": "num_6", "0xe61d": "num_9", "0xe61e": "num_7"}
mapCode2Font = {"num_9": 8, "num_5": 5, "num_6": 6, "num_": 1, "num_7": 9, "num_8": 7, "num_1": 0, "num_2": 3,
"num_3": 2, "num_4": 4}
def getUserInfo(shared_url, **headers):
html_doc = getHtml(shared_url, **headers)
result = {}
if html_doc:
html_doc = html_doc.replace('', 'hzsd')
soup = BeautifulSoup(html_doc, 'html.parser')
header_url = soup.select("[class~=avatar]")[0]['src']
nickname = soup.select("[class~=nickname]")[0].string
uid = soup.select("[class~=shortid]")[0].get_text()
uid = uid.split(" ")
id = woff2tff(uid)
sign = soup.select("[class~=signature]")[0].string
dataInfo = soup.select("[class~=follow-info]")[0]
dataInfo = splitByChinese(dataInfo.get_text())
dataInfo = [d for d in dataInfo if len(d) > 0]
focus = dataInfo[0].split(' ')
focus = woff2tff(focus)
fans = dataInfo[1].split(' ')
fans = woff2tff(fans)
liked = dataInfo[2].split(' ')
liked = woff2tff(liked)
works = soup.select("[class='user-tab active tab get-list']")[0].get_text()
works = woff2tff(works.split(' '))
result['avatar'] = header_url
result['nickname'] = nickname
result['id'] = id
result['sign'] = sign
result['focus'] = focus
result['fans'] = fans
result['liked'] = liked
result['works'] = works
return result
def getUserVideos(url):
number = re.findall(r'share/user/(\d+)', url)
if not len(number):
return
dytk = get_dytk(url)
hostname = urllib.parse.urlparse(url).hostname
if hostname != 't.tiktok.com' and not dytk:
return
user_id = number[0]
return getUserMedia(user_id, dytk, url)
def getRealAddress(url):
if url.find('v.douyin.com') < 0:
return url
res = requests.get(url, headers=headers, allow_redirects=False)
return res.headers['Location'] if res.status_code == 302 else None
def get_dytk(url):
res = requests.get(url, headers=headers)
if not res:
return None
dytk = re.findall("dytk: '(.*)'", res.content.decode('utf-8'))
if len(dytk):
return dytk[0]
return None
def getUserMedia(user_id, dytk, url):
videos = []
parsed = urllib.parse.urlparse(url)
hostname = parsed.hostname
sec_uid = urllib.parse.parse_qs(parsed.query)['sec_uid']
# signature = generateSignature(str(user_id))
user_video_url = "https://%s/web/api/v2/aweme/post/" % hostname
user_video_params = {
'sec_uid': sec_uid,
'count': '21',
'max_cursor': '0',
'aid': '1128',
'_signature': '2Vx9mxAZh0o-K4Wdv7NFKNlcfY',
'dytk': dytk
}
if hostname == 't.tiktok.com':
user_video_params.pop('dytk')
user_video_params['aid'] = '1180'
max_cursor, video_count = None, 0
while True:
if max_cursor:
user_video_params['max_cursor'] = str(max_cursor)
res = requests.get(user_video_url, headers=headers,
params=user_video_params)
contentJson = json.loads(res.content.decode('utf-8'))
aweme_list = contentJson.get('aweme_list', [])
for aweme in aweme_list:
video_count += 1
aweme['hostname'] = hostname
video = {
'addr': aweme['video']['play_addr']['url_list'][0],
'desc': aweme['desc'],
'duration': aweme['video']['duration'],
'cover': aweme['video']['cover']['url_list'][0],
'statistics': aweme['statistics']
}
videos.append(video)
if contentJson.get('has_more'):
max_cursor = contentJson.get('max_cursor')
else:
break
if video_count == 0:
print("There's no video in number %s." % user_id)
return videos
def getHtml(url, **headers):
try:
req = urllib.request.Request(url, headers=headers)
resp = urllib.request.urlopen(req)
return str(resp.read(), 'utf-8')
except urllib.error.HTTPError as e:
print(e.msg)
return ''
def woff2tff(ls):
res = ''
for s in ls:
res = res + formatNum(s)
return res
def splitByChinese(s):
p = re.compile("[\u4e00-\u9fa5]", re.U)
return p.split(s)
def isChinese(s):
p = re.compile("[\u4e00-\u9fa5]", re.U)
result = p.match(s)
if result:
return True
return False
def formatNum(s):
if isChinese(s):
return ''
if len(s) < 8 or s.find("hzsdxe6") < 0:
return s
s1 = '0' + s[4:-1]
res = mapCode2Font[mapCode2Name[s1]]
return str(res)
def getUserAll(shared_url):
profile = getUserInfo(shared_url, **HEADERS)
if profile:
videos = getUserVideos(getRealAddress(shared_url))
profile['videos'] = videos
return profile
if __name__ == '__main__':
userInfo = getUserAll("https://v.douyin.com/qKDMXG/")
print(json.dumps(userInfo))
特别注意 mongodb和mitmdump一块儿使用时,需要将mongodb配置写在函数里,否则报错
def save(video_url,filename):
client = pymongo.MongoClient('127.0.0.1', 27017)
db = client.douyin
collection = db.douyin_shoucang
if video_url:
collection.update({'url': video_url},
{'$set': {'filename': filename
}}, upsert=True)
import urllib.request
import json
import os
import requests, re, time, pymongo, os, queue, threading
from bs4 import BeautifulSoup
from urllib.parse import quote
def save(video_url,filename):
client = pymongo.MongoClient('127.0.0.1', 27017)
db = client.douyin
collection = db.douyin_shoucang
if video_url:
collection.update({'url': video_url},
{'$set': {'filename': filename
}}, upsert=True)
path = 'video'
url_key = 'aweme/v1/aweme/favorite/'
def response(flow):
if url_key in flow.request.url:
print("hello\n" * 3)
data = json.loads(flow.response.text) # 以json方式加载response
items = data.get('aweme_list')
print('uiui',flow.response.text)
# 以用户ID为目录,判断用户ID,不下载重复文件
l = []
for data in data['aweme_list']:
try:
video_name = data['desc'] or data['aweme_id'] # 视频描述或视频ID,作为文件名
video_url = data['video']['play_addr']['url_list'][0] # 视频链接
print('video_name',video_name)
except:
video_name = None
if video_name:
file_dict = {}
filename =path + '/' + video_name
if not os.path.exists(filename):
file_dict['name'] = filename
file_dict['url'] = video_url
l.append(file_dict)
# urllib.request.urlretrieve(video_url, filename=filename + '.mp4')
print('下载完成:------------------>' + filename)
save(video_url,filename)
else:
print('already download')
print('len l',len(l))
print(l)
import urllib.request
import json
import os
import requests, re, time, pymongo, os, queue, threading
from bs4 import BeautifulSoup
from urllib.parse import quote
client = pymongo.MongoClient('127.0.0.1', 27017)
db = client.douyin
collection = db.douyin_shoucang
def save_image(name,image_url):
file_path = u'{0}.{1}'.format(name,'mp4')
if os.path.exists(file_path):
print('have this one')
else:
if not os.path.exists('{0}'.format('video')):
try:
os.makedirs('{0}'.format('video'))
except:
pass
try:
new_image_url = image_url
try:
response = requests.get(new_image_url, timeout=(3,10))
except:
print('等待 5 秒钟')
print(name)
print(image_url)
response = requests.get(new_image_url, timeout=(3, 15))
if response.status_code == 200:
file_path = u'{0}.{1}'.format(name, 'mp4')
if not os.path.exists(file_path):
# os.makedirs(file_path)
with open(file_path, 'wb')as f:
print('now download', file_path)
f.write(response.content)
else:
print('Already Downloaded', file_path)
except requests.ConnectionError:
print('Failed to save image')
items = collection.find()
l = []
for item in items:
file_dict = {}
url = item.get('url')
filename = item.get('filename')
if '' in filename:
filename = re.sub('/','\//',filename)
print(item)
print(filename)
print(url)
file_dict['filename'] = filename
file_dict['url'] = url
l.append(file_dict)
# break
class MyThread(threading.Thread):
def __init__(self, func):
threading.Thread.__init__(self)
self.func = func
def run(self):
self.func()
def worker():
while not q.empty():
item = q.get() # 或得任务
save_image(item['filename'],item['url'])
# print('Processing : ',item)
# time.sleep(1)
def main():
threads = []
for task in l:
q.put(task)
for i in range(threadNum): #开启三个线程
thread = MyThread(worker)
thread.start()
threads.append(thread)
for thread in threads:
thread.join()
q = queue.Queue()
threadNum = 100
main()