第六讲的作业

# 本程序获取了微博中我关注的名人明星的动态,表情、图片等链接没有去除。

# 微博的cookie有时可以获取有时无法获取。

import requests

import json

import re

headers = {

'User-Agent':'Mozilla/5.0 (Linux; Android 6.0;'

                  ' Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Mobile Safari/537.36',

    'cookie':'_T_WM=42825461822; ALF=1562893494;'

' SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9Whdmdr4hNT10BpB5--AjXxY5JpX5K-hUgL.FoMp1hn41h57ehn2dJLoI7L7dNSXPcxDdJMt;'

' SCF=Ap_rjiL3WjfcsgrpoI6M1NCw9xzMtjV0ET2krD8bQOBKtLG0zM9SIgARCFP_gl2AiVlfWhjCknAnAkfuG7Jp4zU.;'

' SUB=_2A25wBCDYDeRhGeFP41oY-C7MyzSIHXVTB0CQrDV6PUJbktAKLXOikW1NQRR9vjxVJ7J2-dttMpJXo5N4t7yB5w2g; SUHB=0FQbxZwx1hq9pm; MLOGIN=1;'

' WEIBOCN_FROM=1110005030; XSRF-TOKEN=5ba360;'

' M_WEIBOCN_PARAMS=luicode%3D20000174%26lfid%3D102803_ctg1_3288_-_ctg1_3288%26uicode%3D20000174'

}

url ='https://m.weibo.cn/feed/group?gid=4381951836619369'

def get_info(url_1, number):

res = requests.get(url_1, headers=headers)

json_data = json.loads(res.text)

# print(json_data)

    statuses = json_data['data']['statuses']

for statusin statuses:

text = status['text']

new_text = re.sub('[a-zA-Z0-9\s<="_>:/.?]+', '', text, re.S)

print(new_text)

next_cursor = json_data['data']['next_cursor']

number = number +1

    if number <=3:

next_url ='https://m.weibo.cn/feed/group?gid=4381951836619369&max_id={}'.format(next_cursor)

get_info(next_url, number)

else:

pass

get_info(url, 1)




你可能感兴趣的:(第六讲的作业)