mitmproxy+appium爬取抖音小视频

mitmproxy+appium爬取抖音小视频

目标

自动化爬取抖音APP小视频,随机获取用户,下载五个小视频,结果以用户ID为单位保存至video目录

环境配置

见:

视频下载

dy_mitmdump.py

import urllib.request
import json
import os

path = 'C:\\Users\\only\\Desktop\\video\\'
url_key = '/aweme/v1/aweme/post'


def response(flow):
    if url_key in flow.request.url:
        data = json.loads(flow.response.text)  # 以json方式加载response
        u_id = data['aweme_list'][0]['author']['uid']  # 用户ID,不可见
        unique_id = data['aweme_list'][0]['author']['unique_id']  # 抖音号
        u_name = data['aweme_list'][0]['author']['nickname']  # 昵称
        #  以用户ID为目录,判断用户ID,不下载重复文件
        user_path = path + u_id
        if not os.path.exists(user_path):
            os.mkdir(user_path)
            # os.makedirs(user_path)
            num = 1
            for data in data['aweme_list']:
                video_name = data['desc'] or data['aweme_id']  # 视频描述或视频ID,作为文件名
                video_url = data['video']['play_addr']['url_list'][0]  # 视频链接
                filename = user_path + '\\' + video_name
                urllib.request.urlretrieve(video_url, filename=filename + '.mp4')
                print('下载完成:' + filename)
                if num < 5:
                    num += 1
                else:
                    break

Appium自动化测试

dy_appium.py

from appium import webdriver
from appium.webdriver.common.touch_action import TouchAction
from time import sleep

server = 'http://localhost:4723/wd/hub'
desired_caps = {
    'platformName': 'Android',
    'deviceName': 'OPPO_A53m',
    'appPackage': 'com.ss.android.ugc.aweme',
    'appActivity': '.main.MainActivity'
}

# 启动回话
driver = webdriver.Remote(server, desired_caps)
print('启动会话完成')

# 关闭提示
sleep(5)
TouchAction(driver).tap(x=360, y=1024).perform()  # 关闭提示
sleep(1)
TouchAction(driver).tap(x=374, y=564).perform()  # 关闭上滑

# 动力链
while True:
    # 点击进入用户主页
    sleep(1)
    # TouchAction(driver).tap(x=653, y=370).perform() # 有时点进关注页面?
    driver.tap([(650, 360)], 500)
    sleep(2)

    # 返回推荐
    try:
        driver.find_element_by_id('com.ss.android.ugc.aweme:id/aae')
    except Exception:
        try:
            driver.find_element_by_id('com.ss.android.ugc.aweme:id/dag')
        except Exception:
            print("关闭直播")
            TouchAction(driver).tap(x=374, y=564).perform()  # 关闭提示
            TouchAction(driver).tap(x=675, y=50).perform()  # 关闭直播
        else:
            print('关闭广告')
            TouchAction(driver).tap(x=45, y=103).perform()
    else:
        print('正常返回')
        TouchAction(driver).tap(x=40, y=100).perform()
    sleep(1)

    # 滑动下一个视频
    # TouchAction(driver).press(x=342, y=938).move_to(x=368, y=267).release().perform() # 有时点成长按?
    driver.swipe(340, 918, 340, 230)

Result

mitmproxy+appium爬取抖音小视频_第1张图片
mitmproxy+appium爬取抖音小视频_第2张图片
mitmproxy+appium爬取抖音小视频_第3张图片

以此记录一下爬取过程,欢迎大家一起来学习交流。

你可能感兴趣的:(python网络爬虫,app,python,selenium)