那就拿试水吧
这是登陆时post的表单
POST http://www.jianshu.com/sessions HTTP/1.1
Host: www.jianshu.com
Connection: keep-alive
Content-Length: 835
Cache-Control: max-age=0
Origin: http://www.jianshu.com
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36
Content-Type: application/x-www-form-urlencoded
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
Referer: http://www.jianshu.com/sign_in
Accept-Encoding: gzip, deflate
Accept-Language: zh-CN,zh;q=0.8,en;q=0.6
Cookie: _ga=GA1.2.1359361344.1477749523; read_mode=night; default_font=font1; signin_redirect=http%3A%2F%2Fwww.jianshu.com%2F; _session_id=aTFiVFg4a3A4dFhVcW9XSFdudTJ4aWJnSVVzVDAwK291WC83SER1ZVExSFliODI1Z05oUkJ1RnJTSXQzWUdSOEdnKzlDY25mY1BLTWhFYWZFbmpEK1RZSW44SnMyaGt6OE5DZmdwNk5ySGhIT2g3ckk4a0tmNjN6TmJZaWt0UHJPVEM0N3N0U0VIcXVtRTBtcXpWTEJXbzgvVW1RcUtQZlRoc0wrbUFaWVFZS3VYQitWTVA4NEgrVFlvN29RN2J6YTNXNDhzNU9Uc0hhSEF0aEM1bWRSTFNhUDZVWlowYklyMi9pdDFIN08rcktIVDBNZmw1b2hrR0dyL1RNMkppTkpVb0cwVVZmWUVpLytRdWJmdjlXeFpzSFVPYVdPUGRKL3JwVlYyeStEOGhpa0lJZFBvV1FBRmEvUmtBbHNxMFk4RzJJS1o2RmNQNU4xTnlWY1Nycmd6RWJOaGxIK1lpOW83dDl5K1pHWTcvRVRqcVg0dDBWdEwzaHBlVGRzZ3YrTDNPbHozS3F2S3pNSzUyaU9lVk1Bdz09LS1heWpqcEJtYnpVRXhNMmZHQ2kvdGNBPT0%3D--f17d48bece75b85a89a1107b59ba07f65dd485c8; CNZZDATA1258679142=528760319-1475637385-https%253A%252F%252Fwww.baidu.com%252F%7C1478000581
utf8=%E2%9C%93&authenticity_token=plr76S4V%2B7ET0NL9ck%2FTQsnqkeRp1EKdY7%2FVD%2FOF36Nnx1XkDBt0tOrDnE5xMJL1d1tJwZg%2BdYsGAu%2FqhGb9bQ%3D%3D&sign_in%5Bcountry_code%5D=CN&sign_in%5Bmobile_number%5D=&sign_in%5Bname%5D=1107819603%40qq.com&sign_in%5Bpassword%5D=3692580000000000&sign_in%5Bis_foreign%5D=false&captcha%5Bvalidation%5D%5Bchallenge%5D=d3175e64726fd469fe57bf23bb00215fk3&captcha%5Bvalidation%5D%5Bgt%5D=a10ea6a23a441db3d956598988dff3c4&captcha%5Bvalidation%5D%5Bvalidate%5D=b3a855cfd86cf869ee45a2d9961c4eb9&captcha%5Bvalidation%5D%5Bseccode%5D=b3a855cfd86cf869ee45a2d9961c4eb9%7Cjordan&captcha%5Bid%5D=915721be-7c5e-43c8-82f0-ff861f21ebf0&geetest_challenge=d3175e64726fd469fe57bf23bb00215fk3&geetest_validate=b3a855cfd86cf869ee45a2d9961c4eb9&geetest_seccode=b3a855cfd86cf869ee45a2d9961c4eb9%7Cjordan&sign_in%5Bremember_me%5D=true
哈哈,极验验证太吊,我也回归爬虫入门专用模拟登录网页知乎吧!
- 这是知乎登录post内容
_xsrf:8218b238f5b5e7cb573038806c7ac09c
password:1232323232323
captcha_type:cn
remember_me:true
email:[email protected]
按照前辈的经验,下一步就是去知乎主页取_xsrf
的值了。都凑齐了就能进行模拟登录了
#encoding=utf-8
import urllib
import urllib.request
import http.cookiejar
import re
class browserTest:
def __init__(self):
self.__url_xsrf='http://www.zhihu.com/#signin';#用来获取_xsrf
self.__url_login='http://www.zhihu.com/login/email';#用来登录
self.__url_host='http://www.zhihu.com';#验证登录后的页面什么样
self.__header_xsrf={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36'
};
def createOpener(self,header,foo=1):#创建带cookie和header的opener
if(1 == foo):#避免重复创建
cookieJ=http.cookiejar.CookieJar();#添加cookie
HCPro=urllib.request.HTTPCookieProcessor(cookieJ);
self.__opener=urllib.request.build_opener(HCPro);
headerTmp=[];
for key,value in header.items():
elem=(key,value);
headerTmp.append(elem);
self.__opener.addheaders=headerTmp;
def get_xsrf(self,header):
xsrf='';
try:
self.createOpener(header);
res=self.__opener.open(self.__url_xsrf);
resdata=res.read().decode();
redata=re.compile('');
xsrf=redata.findall(resdata)[0];
#print(xsrf);
except:
print('erro');
return xsrf;
def createPostData(self,xsrf):
post_data={
#'_xsrf':xsrf,#事实证明这个值没有卵用
'password':'17216187246564',
'remember_me':'true',
'email':'[email protected]'
};
poster=urllib.parse.urlencode(post_data).encode();
return poster;
def go(self):
xsrf=self.get_xsrf(self.__header_xsrf);
print(xsrf);
header_post={
'Accept':'*/*',
'Accept-Encoding':'gzip, deflate',
'Accept-Language':'zh-CN,zh;q=0.8,en;q=0.6',
'Connection':'keep-alive',
'Content-Length':'101',#请求网页不能有这个
'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
'Host':'www.zhihu.com',
'Origin':'http://www.zhihu.com',
'Referer':'http://www.zhihu.com/',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36',
'X-Requested-With':'XMLHttpRequest'#,
#'X-Xsrftoken':xsrf
};
self.createOpener(header_post);#覆盖了获取xsrf时的cookie
__back=self.__opener.open(self.__url_login,self.createPostData(xsrf));
print(__back.read().decode());
self.createOpener(self.__header_xsrf,0);#置个0,避免新的cookie覆盖登陆时的cookie
print(self.__opener.open(self.__url_host).read().decode());
if __name__=='__main__':
app=browserTest();
app.go();
print('Done !');
哈哈,我要去XX论坛爬小黄图和种子去了,具体实现我选择私密文档,老司机先出发了,嘀嘀......