概述
- 使用
urllib
模拟人人网的登录的过程
- 使用
urllib
模拟人人网登录后获取个人数据的过程
- 使用
urllib
模拟登录及获取个人数据的过程
- 使用
requests
模拟登录及获取个人数据的过程
- 重点了解其中用于状态维持和会话跟踪的技术
- 此文档具有时效性, 只针对当前人人网有效;在代码中模拟的数据并非真实数据, 如要正确运行, 请替换成自己的数据
- 此文档的目的:仅提供积累知识、运用相关库解决问题的思路分享
使用urllib
库模拟人人网的登录的过程
from urllib import request, parse
login_url = "http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=201980202238"
data = {
'email': '15311111111',
'icode': '',
'origURL': 'http://www.renren.com/home',
'domain': 'renren.com',
'key_id': 1,
'captcha_type': 'web_login',
'password': '4566d0f4a94fyyz4f940abcd23f6a48f62589726a63106a2d7112ce194113cd1',
'rkey': '00b732e6c4b8d408b75655e15dd43a82',
'f': 'http%3A%2F%2Fwww.renren.com%2F111111111%2Fnewsfeed%2Fphoto'
}
data = parse.urlencode(data)
headers = {
'Content-Length': len(data)
}
req = request.Request(login_url, data=bytes(data, encoding="utf-8"), headers=headers)
res = request.urlopen(req)
print(res.read().decode('utf-8'))
使用urllib
库模拟人人网登录后获取个人数据的过程
from urllib import request, parse
import gzip
import re
req_url = "http://www.renren.com/your-id"
headers = {
'Host': 'www.renren.com',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': 1,
'DNT': 1,
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'Referer': 'http://www.renren.com/SysHome.do',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
'Cookie': 'anonymid=k2jogzmy45d6g1; depovince=GW; _r01_=1; JSESSIONID=abcTX9kPXDXiC-MXhZT4w; ick_login=eda6bae3-1016-4b43-8653-c0b0e88bf6c1; ick=ef6c24f7-ccc8-4110-8c8d-e60fa7783869; XNESSESSIONID=367a0c8b5c12; Hm_lvt_668f5751b331d2a1eec31f2dc0253443=1567945461; first_login_flag=1; ln_uact=15311111111; ln_hurl=http://head.xiaonei.com/photos/0/0/men_main.gif; wp_fold=0; jebecookies=34444812-0d3f-48b3-b7ca-01ae41fcf755|||||; Hm_lpvt_668f5751b331d2a1eec31f2dc0253443=1567947038; _de=57D139C64A11F200F1AEE8096E718C50; p=3e26031d8967cf591201ca56c7a2147d7; t=41388b62070609b8ad5462bac60219cd7; societyguester=41388b62070609b8ad5462bac60219cd7; id=772777777; xnsid=cc1b3f4c; loginfrom=syshome'
}
req = request.Request(req_url, headers=headers)
res = request.urlopen(req)
html = gzip.decompress(res.read()).decode('utf-8')
print(re.findall(("(.*?)"), html))
使用urllib
库模拟登录及获取个人数据的过程
'''
模拟人人网登录和获取个人主页数据的整个过程
通过cookie管理器,用于状态维持和会话跟踪技术
'''
from urllib import request, parse
import gzip
import re,time
from http import cookiejar
cookie = cookiejar.CookieJar()
cookie_handler = request.HTTPCookieProcessor(cookie)
opener = request.build_opener(cookie_handler)
def doLogin():
login_url = "http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=201980202238"
data = {
'email': '15311111111',
'icode': '',
'origURL': 'http://www.renren.com/home',
'domain': 'renren.com',
'key_id': 1,
'captcha_type': 'web_login',
'password': '4566d0f4a94fyyz4f940abcd23f6a48f62589726a63106a2d7112ce194113cd1',
'rkey': '00b732e6c4b8d408b75655e15dd43a82',
'f': 'http%3A%2F%2Fwww.renren.com%2F111111111%2Fnewsfeed%2Fphoto'
}
data = parse.urlencode(data)
headers = {
'Content-Length': len(data)
}
req = request.Request(login_url, data=bytes(data, encoding="utf-8"), headers=headers)
res = opener.open(req)
print(res.read().decode('utf-8'))
print('登录成功!')
def getData():
req_url = "http://www.renren.com/your-id"
req = request.Request(req_url)
res = opener.open(req)
html= res.read().decode('utf-8')
print(re.findall(("(.*?)"), html))
if __name__ == "__main__":
print("登录中...")
doLogin()
time.sleep(2)
getData()
使用requests
库模拟登录及获取个人数据的过程
'''
模拟人人网登录和获取个人主页数据的整个过程
通过cookie管理器,用于状态维持和会话跟踪技术
'''
import requests
import gzip
import re,time
ses = requests.Session()
def doLogin():
login_url = "http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=201980202238"
data = {
'email': '15311111111',
'icode': '',
'origURL': 'http://www.renren.com/home',
'domain': 'renren.com',
'key_id': 1,
'captcha_type': 'web_login',
'password': '4566d0f4a94fyyz4f940abcd23f6a48f62589726a63106a2d7112ce194113cd1',
'rkey': '00b732e6c4b8d408b75655e15dd43a82',
'f': 'http%3A%2F%2Fwww.renren.com%2F111111111%2Fnewsfeed%2Fphoto'
}
ses.post(login_url, data=data)
print('登录成功!')
def getData():
req_url = "http://www.renren.com/your-id"
res = ses.get(req_url)
html = res.content.decode('utf-8')
print(re.findall(("(.*?)"), html))
if __name__ == "__main__":
print("登录中...")
doLogin()
time.sleep(2)
getData()