python 下载抖音分享 无水印视频

下载一条无水印抖音分享视频

# coding=utf-8
import requests
import json
import re
import os
import sys
import time
from urllib.parse import urlparse
from contextlib import closing
from bs4 import BeautifulSoup
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)


class DouYinDownloader(object):
    def __init__(self):
        self.headers = {
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
            'accept-encoding': 'gzip, deflate, br',
            'accept-language': 'zh-CN,zh;q=0.9',
            'cache-control': 'max-age=0',
            'upgrade-insecure-requests': '1',
            'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
        }

        self.domain = ['www.douyin.com', 'v.douyin.com', 'www.snssdk.com',
                       'www.amemv.com', 'www.iesdouyin.com', 'aweme.snssdk.com']

    def hello(self):
        print('*' * 60)
        print('\t\t抖音无水印视频下载')
        print('*' * 60)
        self.run()

    def run(self):
        self.share_url = 'https://v.douyin.com/cVKkGK/'
        # self.share_url = "http://v.douyin.com/LmKj5u/"

        if not self.share_url:
            return self.run()

        self.share_url = self.getLocation()

        share_url_parse = urlparse(self.share_url)

        if not share_url_parse.scheme in ['http', 'https'] or not share_url_parse.netloc in self.domain:
            return self.run()

        html_url = share_url_parse.scheme + "://" + share_url_parse.netloc + \
            share_url_parse.path + "?" + share_url_parse.query

        self.downLoader(html_url)

    def downLoader(self, url):
        response = requests.get(url, headers=self.headers)
        bf = BeautifulSoup(response.text, 'lxml')
        video = bf.find_all('video')
        video_url = video[0].get('src').replace('playwm', 'play')

        print(video_url)

        response = requests.get(
            video_url, headers=self.headers, allow_redirects=False)
        print(response.headers.keys())

        inputs = bf.find_all("input")
        video_name = time.time()

        for item in inputs:
            temp = item.get('name')
            if temp == 'shareDesc':
                video_name = item.get('value')
                break

        size = 0
        with closing(requests.get(video_url, headers=self.headers, stream=True, verify=False)) as response:
            chunk_size = 1024
            content_size = int(response.headers['content-length'])

            if response.status_code == 200:
                sys.stdout.write('  [文件大小]:%0.2f MB %s \n' % (
                    content_size / chunk_size / 1024, video_name + '.mp4'))

                with open(video_name + ".mp4", "wb") as file:
                    for data in response.iter_content(chunk_size=chunk_size):
                        file.write(data)
                        size += len(data)
                        file.flush()
                        sys.stdout.write('  [下载进度]:%.2f%% %s' % (
                            float(size / content_size * 100), video_name + '.mp4 \r'))
                        sys.stdout.flush()

                sys.stdout.write('\n')

    def getLocation(self):
        response = requests.get(
            self.share_url, headers=self.headers, allow_redirects=False)
        if 'Location' in response.headers.keys():
            return response.headers['Location']
        else:
            return self.share_url


if __name__ == '__main__':
    dy = DouYinDownloader()
    dy.hello()

无水印解析php版本
https://github.com/nongcunqq/dspjx
https://github.com/ufan0/kill-douyin-watermark-online
https://github.com/iqiqiya/iqiqiya-API/tree/master/douyin

无水印下载ins and douyin
https://github.com/Neilyoz/DouYinAndInsDownloader

获取用户所有无水印视频,返回json格式

https://github.com/jielundong/douyin-gg

#!/usr/bin/python
# -*- coding: utf-8 -*-
try:
    from BeautifulSoup import BeautifulSoup
except ImportError:
    from bs4 import BeautifulSoup
import requests
import urllib.request
import urllib
import json
import re
import os

headers = {
    'accept-encoding': 'deflate',
    'accept-language': 'zh-CN,zh;q=0.9',
    'pragma': 'no-cache',
    'cache-control': 'no-cache',
    'upgrade-insecure-requests': '1',
    'user-agent': "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1",
}

HEADERS = {
    'user-agent': "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"}

mapCode2Name = {"0xe602": "num_", "0xe605": "num_3", "0xe606": "num_4", "0xe603": "num_1", "0xe604": "num_2",
                "0xe618": "num_", "0xe619": "num_4", "0xe60a": "num_8", "0xe60b": "num_9", "0xe60e": "num_",
                "0xe60f": "num_5", "0xe60c": "num_4", \
                "0xe60d": "num_1", "0xe612": "num_6", "0xe613": "num_8", "0xe610": "num_3", "0xe611": "num_2",
                "0xe616": "num_1", "0xe617": "num_3", "0xe614": "num_9", "0xe615": "num_7", "0xe609": "num_7",
                "0xe607": "num_5", "0xe608": "num_6", "0xe61b": "num_5", \
                "0xe61c": "num_8", "0xe61a": "num_2", "0xe61f": "num_6", "0xe61d": "num_9", "0xe61e": "num_7"}
mapCode2Font = {"num_9": 8, "num_5": 5, "num_6": 6, "num_": 1, "num_7": 9, "num_8": 7, "num_1": 0, "num_2": 3,
                "num_3": 2, "num_4": 4}


def getUserInfo(shared_url, **headers):
    html_doc = getHtml(shared_url, **headers)
    result = {}
    if html_doc:
        html_doc = html_doc.replace('&#', 'hzsd')
        soup = BeautifulSoup(html_doc, 'html.parser')
        header_url = soup.select("[class~=avatar]")[0]['src']
        nickname = soup.select("[class~=nickname]")[0].string
        uid = soup.select("[class~=shortid]")[0].get_text()
        uid = uid.split(" ")
        id = woff2tff(uid)
        sign = soup.select("[class~=signature]")[0].string
        dataInfo = soup.select("[class~=follow-info]")[0]
        dataInfo = splitByChinese(dataInfo.get_text())
        dataInfo = [d for d in dataInfo if len(d) > 0]
        focus = dataInfo[0].split(' ')
        focus = woff2tff(focus)
        fans = dataInfo[1].split(' ')
        fans = woff2tff(fans)
        liked = dataInfo[2].split(' ')
        liked = woff2tff(liked)
        works = soup.select("[class='user-tab active tab get-list']")[0].get_text()
        works = woff2tff(works.split(' '))
        result['avatar'] = header_url
        result['nickname'] = nickname
        result['id'] = id
        result['sign'] = sign
        result['focus'] = focus
        result['fans'] = fans
        result['liked'] = liked
        result['works'] = works
    return result


def getUserVideos(url):
    number = re.findall(r'share/user/(\d+)', url)
    if not len(number):
        return
    dytk = get_dytk(url)
    hostname = urllib.parse.urlparse(url).hostname
    if hostname != 't.tiktok.com' and not dytk:
        return
    user_id = number[0]
    return getUserMedia(user_id, dytk, url)


def getRealAddress(url):
    if url.find('v.douyin.com') < 0:
        return url
    res = requests.get(url, headers=headers, allow_redirects=False)
    return res.headers['Location'] if res.status_code == 302 else None


def get_dytk(url):
    res = requests.get(url, headers=headers)
    if not res:
        return None
    dytk = re.findall("dytk: '(.*)'", res.content.decode('utf-8'))
    if len(dytk):
        return dytk[0]
    return None


def getUserMedia(user_id, dytk, url):
    videos = []
    parsed = urllib.parse.urlparse(url)
    hostname = parsed.hostname
    sec_uid = urllib.parse.parse_qs(parsed.query)['sec_uid']

    # signature = generateSignature(str(user_id))
    user_video_url = "https://%s/web/api/v2/aweme/post/" % hostname
    user_video_params = {
        'sec_uid': sec_uid,
        'count': '21',
        'max_cursor': '0',
        'aid': '1128',
        '_signature': '2Vx9mxAZh0o-K4Wdv7NFKNlcfY',
        'dytk': dytk
    }
    if hostname == 't.tiktok.com':
        user_video_params.pop('dytk')
        user_video_params['aid'] = '1180'

    max_cursor, video_count = None, 0
    while True:
        if max_cursor:
            user_video_params['max_cursor'] = str(max_cursor)
        res = requests.get(user_video_url, headers=headers,
                           params=user_video_params)
        contentJson = json.loads(res.content.decode('utf-8'))
        aweme_list = contentJson.get('aweme_list', [])
        for aweme in aweme_list:
            video_count += 1
            aweme['hostname'] = hostname
            video = {
                'addr': aweme['video']['play_addr']['url_list'][0],
                'desc': aweme['desc'],
                'duration': aweme['video']['duration'],
                'cover': aweme['video']['cover']['url_list'][0],
                'statistics': aweme['statistics']
            }
            videos.append(video)
        if contentJson.get('has_more'):
            max_cursor = contentJson.get('max_cursor')
        else:
            break

    if video_count == 0:
        print("There's no video in number %s." % user_id)

    return videos


def getHtml(url, **headers):
    try:
        req = urllib.request.Request(url, headers=headers)
        resp = urllib.request.urlopen(req)
        return str(resp.read(), 'utf-8')
    except urllib.error.HTTPError as e:
        print(e.msg)
        return ''


def woff2tff(ls):
    res = ''
    for s in ls:
        res = res + formatNum(s)
    return res


def splitByChinese(s):
    p = re.compile("[\u4e00-\u9fa5]", re.U)
    return p.split(s)


def isChinese(s):
    p = re.compile("[\u4e00-\u9fa5]", re.U)
    result = p.match(s)
    if result:
        return True
    return False


def formatNum(s):
    if isChinese(s):
        return ''
    if len(s) < 8 or s.find("hzsdxe6") < 0:
        return s
    s1 = '0' + s[4:-1]
    res = mapCode2Font[mapCode2Name[s1]]
    return str(res)


def getUserAll(shared_url):
    profile = getUserInfo(shared_url, **HEADERS)
    if profile:
        videos = getUserVideos(getRealAddress(shared_url))
        profile['videos'] = videos
    return profile


if __name__ == '__main__':
    userInfo = getUserAll("https://v.douyin.com/qKDMXG/")
    print(json.dumps(userInfo))

mitmdump配合网易mumu模拟器抓取抖音收藏 存入mongodb 然后多线程下载

特别注意 mongodb和mitmdump一块儿使用时,需要将mongodb配置写在函数里,否则报错

def save(video_url,filename):
    client = pymongo.MongoClient('127.0.0.1', 27017)
    db = client.douyin

    collection = db.douyin_shoucang
    if video_url:
        collection.update({'url': video_url},
                          {'$set': {'filename': filename
                                    }}, upsert=True)


import urllib.request
import json
import os

import requests, re, time, pymongo, os, queue, threading
from bs4 import BeautifulSoup
from urllib.parse import quote



def save(video_url,filename):
    client = pymongo.MongoClient('127.0.0.1', 27017)
    db = client.douyin

    collection = db.douyin_shoucang
    if video_url:
        collection.update({'url': video_url},
                          {'$set': {'filename': filename
                                    }}, upsert=True)



path = 'video'
url_key = 'aweme/v1/aweme/favorite/'


def response(flow):
    if url_key in flow.request.url:
        print("hello\n" * 3)
        data = json.loads(flow.response.text)  # 以json方式加载response
        items = data.get('aweme_list')

        print('uiui',flow.response.text)






         # 以用户ID为目录,判断用户ID,不下载重复文件


        l = []
        for data in data['aweme_list']:
            try:
                video_name = data['desc'] or data['aweme_id']  # 视频描述或视频ID,作为文件名
                video_url = data['video']['play_addr']['url_list'][0]  # 视频链接

                print('video_name',video_name)
            except:
                video_name = None
            if video_name:
                file_dict = {}
                filename =path + '/' +  video_name
                if not os.path.exists(filename):
                    file_dict['name'] = filename
                    file_dict['url'] = video_url
                    l.append(file_dict)
                    # urllib.request.urlretrieve(video_url, filename=filename + '.mp4')
                    print('下载完成:------------------>' + filename)
                    save(video_url,filename)

                else:
                    print('already download')
        print('len l',len(l))
        print(l)

多线程下载mongodb中的抖音视频

import urllib.request
import json
import os

import requests, re, time, pymongo, os, queue, threading
from bs4 import BeautifulSoup
from urllib.parse import quote



client = pymongo.MongoClient('127.0.0.1', 27017)
db = client.douyin

collection = db.douyin_shoucang

def save_image(name,image_url):
    file_path = u'{0}.{1}'.format(name,'mp4')

    if os.path.exists(file_path):
        print('have this one')
    else:

        if not os.path.exists('{0}'.format('video')):
            try:
                os.makedirs('{0}'.format('video'))
            except:
                pass

        try:

            new_image_url = image_url
            try:
                response = requests.get(new_image_url, timeout=(3,10))
            except:
                print('等待 5 秒钟')
                print(name)
                print(image_url)
                response = requests.get(new_image_url, timeout=(3, 15))
            if response.status_code == 200:
                file_path = u'{0}.{1}'.format(name, 'mp4')
                if not os.path.exists(file_path):
                    # os.makedirs(file_path)
                    with open(file_path, 'wb')as f:
                        print('now download', file_path)
                        f.write(response.content)

                else:
                    print('Already Downloaded', file_path)
        except requests.ConnectionError:
            print('Failed to save image')

items = collection.find()

l = []
for item in items:
    file_dict = {}
    url = item.get('url')
    filename = item.get('filename')
    if '' in filename:
        filename = re.sub('/','\//',filename)
    print(item)
    print(filename)
    print(url)
    file_dict['filename'] = filename
    file_dict['url'] = url
    l.append(file_dict)



    # break

class MyThread(threading.Thread):
    def __init__(self, func):
        threading.Thread.__init__(self)
        self.func = func
    def run(self):
        self.func()


def worker():
    while not q.empty():
        item = q.get()  # 或得任务
        save_image(item['filename'],item['url'])
        # print('Processing : ',item)
        # time.sleep(1)
def main():
    threads = []
    for task in l:
        q.put(task)
    for i in range(threadNum):   #开启三个线程
        thread = MyThread(worker)
        thread.start()
        threads.append(thread)
    for thread in threads:
        thread.join()

q = queue.Queue()
threadNum = 100
main()


你可能感兴趣的:(python 下载抖音分享 无水印视频)