Python爬虫系列之抖音热门视频爬取

Python爬虫系列之抖音热门视频爬取

主要使用requests库以及手机抓包工具去分析接口

该demo仅供参考,已经失效,需要抖音2019年5月最新所有接口请点击这里获取

抖音资源获取接口文档请点击这里

代码仅供学习交流,请勿用于非法用途

小程序爬虫接单、app爬虫接单、网页爬虫接单、接口定制、网站开发、小程序开发 > 点击这里联系我们 <

微信请扫描下方二维码

在这里插入图片描述

详细实战过程此处不列出了,后期有时间会出一个详细的教程

import requests
import json
import urllib.parse
import time

'''
    @Author     :王磊
    @Date       :2018/12/5
    @Description:爬取抖音热门视频
'''

headers = {
    "Cookie": "install_id=53112482656; ttreq=1$a4ed279b42b9acb3dee9a3a3c2d645ce99ed786f; odin_tt=38d535495242f853ffdf693ae531a152910b1047bbb3ba5c8e2fa7f3cbd7f6a1ec9f6027fc44ea36c4bd45281487d4a7; sid_guard=d074b1c430eef87a3599e20ef34a5555%7C1543976393%7C5184000%7CSun%2C+03-Feb-2019+02%3A19%3A53+GMT; uid_tt=4e0b25bc326fae6b428afc5826243eeb; sid_tt=d074b1c430eef87a3599e20ef34a5555; sessionid=d074b1c430eef87a3599e20ef34a5555",
    "Accept-Encoding": "gzip",
    "X-SS-REQ-TICKET": "1543976807598",
    "X-Tt-Token": "00d074b1c430eef87a3599e20ef34a5555b97ecb95bff1a3d1a81726386a1adf7a91df6c32bfa121fc10400ffede8df72016",
    "sdk-version": "1",
    "X-SS-TC": "0",
    "User-Agent": "com.ss.android.ugc.aweme/350 (Linux; U; Android 8.0.0; zh_CN; MI 5; Build/OPR1.170623.032; Cronet/58.0.2991.0)"
}


def getHTML(url):
    '''
    get方式获取html
    :param url:
    :return:
    '''
    rsp = requests.get(url, headers=headers)
    return rsp.content.decode(rsp.apparent_encoding, 'ignore')


def postHTML(url):
    '''
    post方式获取html
    :param url:
    :return:
    '''
    rsp = requests.post(url,headers=headers)
    return rsp.content.decode(rsp.apparent_encoding, 'ignore')


def getVideo(key):
    '''
    获取第一个视频连接地址
    :param key:
    :return:
    '''
    # 编译关键词
    key = urllib.parse.quote(key)
    # 拼接关键词搜索接口url
    url = 'https://api.amemv.com/aweme/v1/general/search/single/?keyword=' + key + '&offset=0&count=10&is_pull_refresh=0&hot_search=0&latitude=30.725991&longitude=103.968091&ts=1543984658&js_sdk_version=1.2.2&app_type=normal&manifest_version_code=350&_rticket=1543984657736&ac=wifi&device_id=60155513971&iid=53112482656&os_version=8.0.0&channel=xiaomi&version_code=350&device_type=MI%205&language=zh&uuid=862258031596696&resolution=1080*1920&openudid=8aa8e21fca47053b&update_version_code=3502&app_name=aweme&version_name=3.5.0&os_api=26&device_brand=Xiaomi&ssmix=a&device_platform=android&dpi=480&aid=1128&as=a1e5055072614ce6a74033&cp=5813c65d2e7d0769e1[eIi&mas=01327dcd31044d72007555ed00c3de0b5dcccc0c2cec866ca6c62c'
    # 获取搜索界面并转化为json对象
    jsonObj = json.loads(postHTML(url))
    # 获取data对应v
    metes = jsonObj['data']
    nums = len(metes)
    uri = ''
    # 多个视频列表捕获第一个视频地址即刻返回视频uri(视频唯一标识)
    for _ in range(nums):
        data = metes[_]['aweme_info']['video']
        if 'download_suffix_logo_addr' in data.keys():
            uri = data['download_suffix_logo_addr']['uri']
            break
    # 拼接视频地址
    videoURL = 'https://aweme.snssdk.com/aweme/v1/playwm/?video_id=' + uri + '&line=0'
    # 返回视频地址
    return videoURL


def main():
    '''
    入口函数
    :return:
    '''
    ts = str(time.time())
    # 入口url(热门列表url)
    url = 'https://aweme.snssdk.com/aweme/v1/hot/search/list/?detail_list=0&ts=' + ts + '&js_sdk_version=1.2.2&app_type=normal&manifest_version_code=350&_rticket=1543976807872&ac=wifi&device_id=60155513971&iid=53112482656&os_version=8.0.0&channel=xiaomi&version_code=350&device_type=MI%205&language=zh&resolution=1080*1920&openudid=8aa8e21fca47053b&update_version_code=3502&app_name=aweme&version_name=3.5.0&os_api=26&device_brand=Xiaomi&ssmix=a&device_platform=android&dpi=480&aid=1128&as=a1c56320b7f6ccc7874900&cp=3d63c15f7576037de1_uMy&mas=01258b5acd59f6bccb58178086286fdded0c0c9c2cec1cecc6c6c6'
    # 获取热门列表数据
    html = getHTML(url)
    # 转化为json对象
    jsonObj = json.loads(html)
    # 获取每一个热门数据列表
    word_list = jsonObj['data']['word_list']
    index = 1
    # 循环解析每个热门事件
    for li in word_list:
        try:
            word = li['word']
            hot_value = li['hot_value']
            hot_index = index
            videoURL = getVideo(word)
            index += 1
            print("排名:%d ,关键词: %s ,热度值: %d ,视频下载地址: %s" % (hot_index, word, hot_value, videoURL))
        except Exception as e:
            pass
        finally:
            time.sleep(3)


if __name__ == '__main__':
    main()
   

☞点击这里与我探讨☚

♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪
♪♪后续会更新系列基于Python的爬虫小例子,欢迎关注。♪♪
♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪♪

你可能感兴趣的:(Python)