爬虫——抖音app用户信息爬取(mimtproxy截取响应报文)

爬虫——抖音app用户信息爬取(中间代理人方式)

  • mitmproxy
    • 编写一个py文件供mitmproxy加载
    • 存储到mysql
    • 完整代码
    • 效果图
  • 结尾

mitmproxy

1、官网下载 : https://mitmproxy.org/

2、在 windows 中,以管理员身份运行 cmd 或 power shell:

pip3 install mitmproxy

应当可以看到类似于这样的输出:

Mitmproxy: 4.0.1
Python: 3.6.5
OpenSSL: OpenSSL 1.1.0h 27 Mar 2018
Platform: Windows-10-10.0.16299-SP0

编写一个py文件供mitmproxy加载

import json,os
import pymysql

def response(flow):
    url = 'https://aweme-eagle-hl.snssdk.com/aweme/v1/' #抖音用户信息url
    if flow.request.url.startswith(url):         
        text = flow.response.text
        data = json.loads(text)
        user_id = data['user']['short_id'] #id
        user_id1 = data['user']['unique_id'] #修改过的id
        user_name = data['user']['nickname'] #用户名
        user_qianm = data['user']['signature'] #个性签名
        user_country = data['user']['country'] #国家
        user_province = data['user']['province'] #省会
        user_city = data['user']['city'] #城市
        user_birthday = data['user']['birthday'] #生日
        user_fans = data['user']['followers_detail'][0]['fans_count'] #粉丝
        user_favorited = data['user']['total_favorited'] #获赞数
        user_aweme = data['user']['aweme_count'] #作品数

        #print(flow.request.url)
        #print(data)
        user_id2 = ''
        if int(user_id) == 0: #判断是否修改过id,原id为空
            user_id2 = user_id1
        else:
            user_id2 = user_id
        print(user_id2)
        print(user_name)
        user_gender1 = ''
        if 'gender' in data['user']:
            user_gender = data['user']['gender']  # 性别
            if user_gender == 1:
                user_gender1 = '男'
            else:

                user_gender1 = '女'

        print(user_gender1)
        print(user_qianm)
        dizhi = ''
        if 'school_name' in data['user'].keys():
            user_school = data['user']['school_name']  # 学校
            dizhi = user_country + user_province + user_city + user_school
        else:
            dizhi = user_country + user_province + user_city

        print(dizhi)

        if 'custom_verify' in data['user']:
            user_verify = data['user']['enterprise_verify_reason'] #官方认证标志
            print(user_verify)

        print(user_birthday)
        print('作品数:'+ str(user_aweme))
        print('获赞数:'+ str(user_favorited))
        print('粉丝数:'+ str(user_fans))


存储到mysql

con = pymysql.connect(host='localhost', user='root', password='123456', db="douyin_user", port=3306)
        print(con)
        try:
            cursor = con.cursor()
            cursor.execute('INSERT INTO douyin_user_datas VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)',[user_id2,
            user_name,
            user_gender1,
            user_qianm,
            dizhi,
            user_birthday,
            user_verify,
            user_aweme,
            user_favorited,
            user_fans])
            con.commit()
        except:
            con.rollback()
            print("asdfghj")

        con.close()

完整代码

import json,os
import pymysql

def response(flow):
    url = 'https://aweme-eagle-hl.snssdk.com/aweme/v1/'
    if flow.request.url.startswith(url):
        text = flow.response.text
        data = json.loads(text)
        user_id = data['user']['short_id'] #id
        user_id1 = data['user']['unique_id'] #修改过的id
        user_name = data['user']['nickname'] #用户名
        user_qianm = data['user']['signature'] #个性签名
        user_country = data['user']['country'] #国家
        user_province = data['user']['province'] #省会
        user_city = data['user']['city'] #城市
        user_birthday = data['user']['birthday'] #生日
        user_fans = data['user']['followers_detail'][0]['fans_count'] #粉丝
        user_favorited = data['user']['total_favorited'] #获赞数
        user_aweme = data['user']['aweme_count'] #作品数

        #print(flow.request.url)
        #print(data)
        user_id2 = ''
        if int(user_id) == 0: #判断是否修改过id,原id为空
            user_id2 = user_id1
        else:
            user_id2 = user_id
        print(user_id2)
        print(user_name)
        user_gender1 = ''
        if 'gender' in data['user']:
            user_gender = data['user']['gender']  # 性别
            if user_gender == 1:
                user_gender1 = '男'
            else:

                user_gender1 = '女'

        print(user_gender1)
        print(user_qianm)
        dizhi = ''
        if 'school_name' in data['user'].keys():
            user_school = data['user']['school_name']  # 学校
            dizhi = user_country + user_province + user_city + user_school
        else:
            dizhi = user_country + user_province + user_city

        print(dizhi)

        if 'custom_verify' in data['user']:
            user_verify = data['user']['enterprise_verify_reason'] #官方认证标志
            print(user_verify)

        print(user_birthday)
        print('作品数:'+ str(user_aweme))
        print('获赞数:'+ str(user_favorited))
        print('粉丝数:'+ str(user_fans))

        con = pymysql.connect(host='localhost', user='root', password='123456', db="douyin_user", port=3306)
        print(con)
        try:
            cursor = con.cursor()
            cursor.execute('INSERT INTO douyin_user_datas VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)',[user_id2,
            user_name,
            user_gender1,
            user_qianm,
            dizhi,
            user_birthday,
            user_verify,
            user_aweme,
            user_favorited,
            user_fans])
            con.commit()
        except:
            con.rollback()
            print("asdfghj")

        con.close()

效果图

爬虫——抖音app用户信息爬取(mimtproxy截取响应报文)_第1张图片

结尾

mitmproxy教程网上有很多,这里就不介绍了。
我也是一个小白。
如果有问题请评论留言,互相进步。

你可能感兴趣的:(python爬虫)