Python连接Twitter API读取用户画像及推特评论

使用Twitter API获取推特数据

最近由于实验室研究需求,需要对Twitter15及Twitter16数据集进行扩展。具体为:1.根据user_id,获取用户画像;2.根据tweet_id,获取推文下的评论。

连接TwitterAPI

首先根据自己申请的推特开发者账号,去连接Twitter API

import tweepy
import time
import csv
import pandas as pd
import json
from collections import OrderedDict
import datetime
import re

# 填写twitter提供的开发Key和secret
consumer_key = 'XXX'
consumer_secret = 'XXX'
access_token = 'XXX'
access_token_secret = 'XXX'

# 提交你的Key和secret
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

# 获取类似于内容句柄的东西
api = tweepy.API(auth, proxy='127.0.0.1:7890')

读取tweet_id

# 获取所有的tweet_id
with open('uid.csv','r',encoding='utf-8') as f1:
    reader = csv.reader(f1)
    tweets_id = [row[0] for row in reader]
    print(tweets_id)
    print(len(tweets_id))
f1.close()

根据tweet_id爬取推文评论

for tweet_id in tweets_id:
    print(f"正在获取 tweet_id = %s 的推特的评论..." % tweet_id)

    with open('comments_id/' + tweet_id + '.csv', 'r', encoding='utf-8') as f2:
        reader = csv.reader(f2)
        comments_id = [row[0] for row in reader]
        with open('comments/' + tweet_id + '.csv', 'a', encoding='utf-8') as f3:
            for comment_id in comments_id:
                print(f"正在获取 comment_id = %s 的评论内容..." % comment_id)
                flag = 0
                while flag == 0:
                    try:
                        comment = api.get_status(id=comment_id).text
                        # print(type(comment))
                    except Exception as e:
                        print(repr(e))
                        if repr(e) == "TweepError([{'code': 179, 'message': 'Sorry, you are not authorized to see this status.'}])":
                            print("没有权限查看此条评论!")
                            flag = 1
                            comment_list = [comment_id,]
                            break
                        elif repr(e) == "TweepError([{'code': 144, 'message': 'No status found with that ID.'}])":
                            print("此条评论已被删除!")
                            flag = 1
                            comment_list = [comment_id, ]
                            break
                        elif repr(e) == "TweepError([{'code': 63, 'message': 'User has been suspended.'}])":
                            print("用户被冻结!")
                            flag = 1
                            comment_list = [comment_id, ]
                            break
                        elif repr(e) == "TweepError([{'code': 34, 'message': 'Sorry, that page does not exist.'}])":
                            print("该页面不存在!")
                            flag = 1
                            comment_list = [comment_id, ]
                            break
                        else:
                            print("网络不稳定,正常重新连接...")
                            flag=0
                    else:
                        flag = 1
                        print("成功找的此条评论!")
                        print(comment)
                        pattern1 = re.compile(r'\@(?:.|\s)*? ')
                        comment = ''.join(re.sub(pattern1, '', comment))
                        pattern2 = re.compile(r'\@(?:.|\s)*?$')
                        comment = ''.join(re.sub(pattern2,'',comment))
                        comment = ''.join(re.sub('@','',comment))
                        pattern3 = re.compile(r'\n')
                        comment = ''.join(re.sub(pattern3, ' ', comment))
                        print(comment)
                        comment_list = [comment_id, comment]
                        break

                writer = csv.writer(f3)
                writer.writerows([comment_list])
        f3.close()
    f2.close()

读取所有的user_id

# 获取所有的user_id
with open('uid.csv','r',encoding='utf-8') as f1:
    reader = csv.reader(f1)
    user_ids = [row[1] for row in reader]
    print(user_ids)
    print(len(user_ids))

根据user_id爬取用户画像

# 获取所有的user_id
with open('uid.csv','r',encoding='utf-8') as f1:
    reader = csv.reader(f1)
    user_ids = [row[1] for row in reader]
    print(user_ids)
    print(len(user_ids))

for user_id in user_ids:
    print(f"正在查找用户ID=%s的用户信息..." % user_id)
    flag = 0
    while flag == 0:
        try:
            User = api.get_user(user_id=user_id)
        except Exception as e:
            print(repr(e))
            if repr(e) == "TweepError: [{'code': 50, 'message': 'User not found.'}]":
                print('用户不存在!')
                with open('user_not_found.csv','a',encoding='utf-8') as f:
                    writer = csv.writer(f)
                    writer.writerows(user_id)
                f.close()
                flag = 1
                break
            elif repr(e) == "TweepError: [{'code': 50, 'message': 'User not found.'}]":
                print('用户被冻结!')
                with open('user_has_been_suspended.csv','a',encoding='utf-8') as f:
                    writer = csv.writer(f)
                    writer.writerows(user_id)
                f.close()
                flag = 1
                break
            else:
                print("网络不稳定,正在尝试重新连接...")
                flag = 0
        else:
            flag = 1
            user_dict = {"user_id":user_id}
            user_dict["name"] = User.name
            user_dict["screen_name"] = User.screen_name
            user_dict["location"] = User.location
            user_dict["profile_location"] = User.profile_location
            user_dict["description"] = User.description
            user_dict["protected"] = User.protected
            user_dict["followers_count"] = User.followers_count
            user_dict["friends_count"] = User.friends_count
            user_dict["listed_count"] = User.listed_count
            user_dict["created_at"] = str(User.created_at)
            user_dict["favourites_count"] = User.favourites_count
            user_dict["utc_offset"] = User.utc_offset
            user_dict["time_zone"] = User.time_zone
            user_dict["geo_enabled"] = User.geo_enabled
            user_dict["verified"] = User.verified
            user_dict["statuses_count"] = User.statuses_count
            user_dict["lang"] = User.lang
            user_dict["contributors_enabled"] = User.contributors_enabled
            user_dict["is_translator"] = User.is_translator
            user_dict["is_translation_enabled"] = User.is_translation_enabled
            user_dict["profile_background_tile"] = User.profile_background_tile
            user_dict["profile_use_background_image"] = User.profile_use_background_image
            user_dict["has_extended_profile"] = User.has_extended_profile
            user_dict["default_profile"] = User.default_profile
            user_dict["default_profile_image"] = User.default_profile_image
            user_dict["following"] = User.following
            user_dict["follow_request_sent"] = User.follow_request_sent
            user_dict["notifications"] = User.notifications
            user_dict["translator_type"] = User.translator_type
            print(user_dict)
            json_str = json.dumps(user_dict, indent=4, ensure_ascii=False)
            with open(user_id + '.json', 'w',encoding='utf-8') as f2:
                f2.write(json_str)
            break
    # time.sleep(1)
    # f2.close()
f1.close()

你可能感兴趣的:(Python,python,Twitter,推特,API)