python 爬取搞笑视频_拯救不开心!Python的批量爬取抖音网红猪小屁的无水印视频...

小笨聪最近看到抖音上有个昵称为猪小屁的视频,镜头里猪小屁萌萌的笨笨的好可爱。

围观一下python 爬取搞笑视频_拯救不开心!Python的批量爬取抖音网红猪小屁的无水印视频..._第1张图片https://www.zhihu.com/video/1084070262621741056

视频挺有新意的,并且不开心时还能逗人一笑,所以小笨聪就想下载下来可是视频有水印,十分影响观看体验;并且逐个下载的话效率也很低那怎么办呢?

人生苦短,我用Python的!下面让我们愉快地开始吧!

一,代码介绍

1.用到的库

1import os

2import sys, getopt

3import urllib.parse

4import urllib.request

5import copy

6import hashlib

7import codecs

8import requests

9import re

10from six.moves import queue as Queue

11from threading import Thread

12import json

13import time

2.获取远程文件大小

1def getRemoteFileSize(url, proxy=None):

2 '''

3 通过content-length头获取远程文件大小

4 '''

5 try:

6 request = urllib.request.Request(url)

7 request.get_method = lambda: 'HEAD'

8 response = urllib.request.urlopen(request)

9 response.read()

10 except urllib.error.HTTPError as e:

11 # 远程文件不存在

12 print(e.code)

13 print(e.read().decode("utf8"))

14 return 0

15 else:

16 fileSize = dict(response.headers).get('Content-Length', 0)

17 return int(fileSize)

3.获取文件实际地址

1def get_real_address(url):

2 if url.find('v.douyin.com') < 0: return url

3 res = requests.get(url, headers=HEADERS, allow_redirects=False)

4 return res.headers['Location'] if res.status_code == 302 else None

4.设置下载器

1class DownloadWorker(Thread):

2 def __init__(self, queue):

3 Thread.__init__(self)

4 self.queue = queue

5

6 def run(self):

7 while True:

8 medium_type, uri, download_url, target_folder = self.queue.get()

9 download(medium_type, uri, download_url, target_folder)

10 self.queue.task_done()

5.设置下载队列

1def _join_download_queue(self, aweme, target_folder):

2 try:

3 if aweme.get('video', None):

4 uri = aweme['video']['play_addr']['uri']

5 download_url = "https://aweme.snssdk.com/aweme/v1/play/?{0}"

6 download_params = {

7 'video_id': uri,

8 'line': '0',

9 'ratio': '720p',

10 'media_type': '4',

11 'vr_type': '0',

12 'test_cdn': 'None',

13 'improve_bitrate': '0',

14 'iid': '35628056608',

15 'device_id': '46166618999',

16 'os_api': '18',

17 'app_name': 'aweme',

18 'channel': 'App%20Store',

19 'idfa': '00000000-0000-0000-0000-000000000000',

20 'device_platform': 'iphone',

21 'build_number': '27014',

22 'vid': '2ED380A7-F09C-6C9E-90F5-862D58F3129C',

23 'openudid': '21dae85eeac1da35a69e2a0ffeaeef61c78a2e98',

24 'device_type': 'iPhone8%2C2',

25 'app_version': '2.7.0',

26 'version_code': '2.7.0',

27 'os_version': '12.0',

28 'screen_width': '1242',

29 'aid': '1128',

30 'ac': 'WIFI'

31 }

32 if aweme.get('hostname') == 't.tiktok.com':

33 download_url = 'http://api.tiktokv.com/aweme/v1/play/?{0}'

34 download_params = {

35 'video_id': uri,

36 'line': '0',

37 'ratio': '720p',

38 'media_type': '4',

39 'vr_type': '0',

40 'test_cdn': 'None',

41 'improve_bitrate': '0',

42 'version_code': '1.7.2',

43 'language': 'en',

44 'app_name': 'trill',

45 'vid': 'D7B3981F-DD46-45A1-A97E-428B90096C3E',

46 'app_version': '1.7.2',

47 'device_id': '6619780206485964289',

48 'channel': 'App Store',

49 'mcc_mnc': '',

50 'tz_offset': '28800'

51 }

52 url = download_url.format('&'.join([key + '=' + download_params[key] for key in download_params]))

53 self.queue.put(('video', uri, url, target_folder))

54 else:

55 if aweme.get('image_infos', None):

56 image = aweme['image_infos']['label_large']

57 self.queue.put(('image', image['uri'], image['url_list'][0], target_folder))

58

59 except KeyError:

60 return

61 except UnicodeDecodeError:

62 print("Cannot decode response data from DESC %s" % aweme['desc'])

63 return

6.下载指定抖音号的视频

1 def _download_user_media(self, user_id, dytk, url):

2 current_folder = os.getcwd()

3 target_folder = os.path.join(current_folder, 'download/%s' % user_id)

4 if not os.path.isdir(target_folder):

5 os.mkdir(target_folder)

6

7 if not user_id:

8 print("Number %s does not exist" % user_id)

9 return

10 hostname = urllib.parse.urlparse(url).hostname

11 signature = self.generateSignature(str(user_id))

12 user_video_url = "https://%s/aweme/v1/aweme/post/" % hostname

13 user_video_params = {

14 'user_id': str(user_id),

15 'count': '21',

16 'max_cursor': '0',

17 'aid': '1128',

18 '_signature': signature,

19 'dytk': dytk

20 }

21 max_cursor, video_count = None, 0

22 while True:

23 if max_cursor:

24 user_video_params['max_cursor'] = str(max_cursor)

25 res = requests.get(user_video_url, headers=HEADERS, params=user_video_params)

26 contentJson = json.loads(res.content.decode('utf-8'))

27 aweme_list = contentJson.get('aweme_list', [])

28 for aweme in aweme_list:

29 video_count += 1

30 self._join_download_queue(aweme, target_folder)

31 if contentJson.get('has_more'):

32 max_cursor = contentJson.get('max_cursor')

33 else:

34 break

35 if not noFavorite:

36 favorite_folder = target_folder + '/favorite'

37 video_count = self.__download_favorite_media(user_id, dytk, hostname, signature, favorite_folder,

38 video_count)

39

40 if video_count == 0:

41 print("There's no video in number %s." % user_id)

42

43 return video_count

7.获取视频个数

1def parse_sites(fileName):

2 with open(fileName, "rb") as f:

3 txt = f.read().rstrip().lstrip()

4 txt = codecs.decode(txt, 'utf-8')

5 txt = txt.replace("\t", ",").replace("\r", ",").replace("\n", ",").replace(" ", ",")

6 txt = txt.split(",")

7 numbers = list()

8 for raw_site in txt:

9 site = raw_site.lstrip().rstrip()

10 if site:

11 numbers.append(site)

12 return numbers

二,使用说明

1.配置和运行安装的Python的编辑器外还需安装的Node.js(官网:HTTPS://http://nodejs.org/)

将抖音号的链接复制粘贴到共享url.txt

运行过程(视频上传太慢截图代替啦):

2.视频保存

程序运行后,会默认在当前路径下面生成一个跟抖音ID名字相同的文件夹,视频都会放在这个文件夹下面。运行这个脚本,不会重复下载已经下载过的视频,所以不用担心重复下载的问题。同时,多次运行可以找回丢失的或者删除的视频。

这时我们随便选一个看看水印去掉没?python 爬取搞笑视频_拯救不开心!Python的批量爬取抖音网红猪小屁的无水印视频..._第2张图片https://www.zhihu.com/video/1084071026744352768

3.高级功能

如果想下载抖音里某个话题或音乐下的视频,比如胡歌和学猫叫,那怎么办呢?

这里小笨聪也为大家考虑到了。与前面的操作一样,只需要把相应链接粘贴进份额,url.txt文件里就OK了。【完】

轻松一刻python 爬取搞笑视频_拯救不开心!Python的批量爬取抖音网红猪小屁的无水印视频..._第3张图片https://www.zhihu.com/video/1084071268982231040

以上就是本次批量下载抖音无水印视频的过程。

微信公众号“学编程的金融客”后台回复“ 猪小屁 ”即可获得源码(禁止商业用途)。拯救不开心!Python批量爬取抖音网红猪小屁的无水印视频​mp.weixin.qq.compython 爬取搞笑视频_拯救不开心!Python的批量爬取抖音网红猪小屁的无水印视频..._第4张图片

你的点赞就是对我最大的支持!微信公众号二维码

你可能感兴趣的:(python,爬取搞笑视频)