第六讲作业

import requests
import json
import re

headers = {
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
  'cookie': '_T_WM=39971880054; SUHB=0G0vSx687zpwFs; MLOGIN=1; XSRF-TOKEN=8d7ee6'
}

url ='https://m.weibo.cn/feed/group?gid=3935483719458447'
def get_info(_url, page):
  res = requests.get(_url, headers=headers)
  json_data = json.loads(res.text)
  statuses = json_data['data']['statuses']
  for status in statuses:
    text = status['text']
    new_text = re.sub('[a-zA-Z0-9\s<="_>:/.?]+', '', text, re.S)
    print(new_text)
  next_cursor = json_data['data']['next_cursor']
  page = page + 1
  if page <= 10:
    next_url = 'https://m.weibo.cn/feed/group?gid=3935483719458447&max_id={}'.format(next_cursor)
    get_info(next_url, page)
  else:
    pass

get_info(url, 1)

你可能感兴趣的:(第六讲作业)