这几天一直在学习用python 爬取网页, 今天试着写了一个小程序, 模拟登陆 人人网,并且提供了发送文字状态的函数。在登录的时候,已经可以处理有验证码的 情况 ,就拿来练手吧。 (在处理字符串的部分比较麻烦===因为不敢用正则表达式QAQ )
代码:
# -*- coding: utf-8 -*- import urllib2,urllib,cookielib,json,sys from bs4 import BeautifulSoup ### reload(sys) sys.setdefaultencoding('utf8') ## ##cookie cookiejar= cookielib.CookieJar() cookproc= urllib2.HTTPCookieProcessor(cookiejar) ### ###get the page def get(url,headers=False): if headers: req=urllib2.Request(url,headers) else: req=urllib2.Request(url) opener=urllib2.build_opener(cookproc) urllib2.install_opener(opener) page=urllib2.urlopen(req) try: page=page.decode('utf-8') except : page=page.decode('gbk','ignore') return page def post(url,postdata,headers=False): postdata=urllib.urlencode(postdata) if headers: req=urllib2.Request(url,postdata,headers) else : req=urllib2.Request(url,postdata) opener=urllib2.build_opener(cookproc) urllib2.install_opener(opener) page=urllib2.urlopen(req).read() try: page=page.decode('utf-8') except : page=page.decode('gbk','ignore') return page class RenRen: email ="" pwd ="" _id ="" name ="" icode ="" _rtk ="" _tok ="" def __init__(self,email,pwd): self.email=email self.pwd=pwd self.check() self.login() def check(self): postdata={ '_rtk' :self._rtk, 'email' :self.email } url='http://www.renren.com/ajax/ShowCaptcha' if post(url,postdata)==1: self.getCode() def getCode(self): par={ 't' :'web_login', 'rnd' :0.47730758627661407 } url='http://icode.renren.com/getcode.do?%s' %urllib.urlencode(par) data=get(url) tmp = tempfile.mkstemp(suffix='.png') os.write(tmp[0], data) os.close(tmp[0]) os.startfile(tmp[1]) self.icode=input("输入验证码:") ## get necesary paraments def getpar(self): for x in cookiejar: if x.name=='id': self._id=x.value break url='http://www.renren.com/%s' % self._id page=urllib2.urlopen(url).read() html=BeautifulSoup(page) parli=html.find(name='script') s=str(parli) index1=s.index('nx.user') index2=s.index('nx.user.isvip') s=s[index1+10:index2-1] parli=s.split(',') self.name=parli[3][8:] self._tok=parli[4][17:-1] self._rtk=parli[5][9:-3] print self.name,self._tok,self._rtk def login(self): postdata={ 'captcha_type' :'web_login', 'domain' :'renren.com', 'email' :self.email, 'f' :'http%3A%2F%2Fwww.renren.com%2F476284792', 'icode' :self.icode, 'key_id' :1, 'origURL' :'http://www.renren.com/home', 'password' :self.pwd, 'rkey' :'' } headers={ 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0' } url="http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=2014611340911" info=json.loads(post(url,postdata,headers)) if info["code"]: self.getpar() print self.name,'登录成功' else: print '登录失败' def publish(self,content): postdata={ '_rtk' :self._rtk, 'channel' :'renren', 'content' :content, 'privacyParams' :{"sourceControl":"99"}, 'requestToken' :self._tok, 'withInfo' :'%7B%22wpath%22%3A%5B%5D%7D' } url='http://shell.renren.com/'+self._id+'/status' post(url,postdata) print 'ok' email= input('用户名') password=input('密码') rr=RenRen(email,password) #rr.publish('下雨吧')