人人网页爬取

from http import cookiejar
import json
from urllib import request,parse
cookie_o = cookiejar.CookieJar()
header = request.HTTPCookieProcessor(cookie_o)
opener = request.build_opener(header)

url = 'http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=20187216111'

form = {
    'email': '18701334468',
    'icode': '',
    'origURL': 'http://www.renren.com/home',
    'domain': 'renren.com',
    'key_id': '1',
    'captcha_type': 'web_login',
    'password': '7d4520e0d16710e025a631b4befe0fa15db31a7ab8117a3e274308f0cc8e7025',
    'rkey': 'f0c0f9643f5839fb4c7885b963708900',
    'f': 'http%3A%2F%2Fwww.renren.com%2F967454034',
}
# 必须是字节
form_str = parse.urlencode(form).encode('utf-8')
response = opener.open(url,form_str)
html = response.read()
print(html)

res_dict = json.loads(html.decode('utf-8'))
print(res_dict)
mm = res_dict['homeUrl']
response = opener.open(mm)
html_bytes = response.read()
with open('renren.html','wb') as f:
    f.write(html_bytes)

你可能感兴趣的:(人人网页爬取)