又重新学了下python的urllib2,参考http://www.cnpythoner.com/post/30.html和http://www.pythoner.com/65.html,写了个登陆人人的脚本,能够处理验证码的情况会jpg写到本地,主页新鲜事正则貌似有点问题,先不管了。
# -*- coding: utf-8 -*- import urllib import urllib2 import cookielib import re import config class Renren(object): def __init__(self): self.operate = '' # response的对象(不含read) self.requestToken = self.rtk = '' self.icode = '' # 验证码 self.is_login = False #added self.web_content = '' self.cj = cookielib.CookieJar() self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj)) urllib2.install_opener(self.opener) self.requestToken_pattern = re.compile(r"get_check:'([-0-9]*)'") self.rtk_pattern = re.compile(r"get_check_x:'([a-zA-Z0-9]+)'") def login(self, email='', password='', origURL=''): postdata = { 'email': email, 'password': password, 'origURL': origURL, } ruid_pattern = re.compile(r"'ruid':'(\d+)'") failCode_pattern = re.compile(r"&failCode=(\d+)") print 'Login...' while not self.is_login: self.operate = self._get_response(config.LOGINURL, postdata) cur_url = self.operate.geturl() self.web_content = self.operate.read() #print web_content ruid = ruid_pattern.search(self.web_content) if ruid: self.is_login = True print u"用户 %s %s" % (ruid.group(1), config.FAILCODE['-1']) return True else: failCode = failCode_pattern.search(cur_url) if not failCode: print '无法获得错误代码' else: definate_failCode = failCode.group(1) # 确切的failCode字符串 if definate_failCode in config.FAILCODE.keys(): print config.FAILCODE[definate_failCode] if definate_failCode == '512': self._get_icode_img() self.icode = raw_input(u"请输入验证码: ") postdata['icode'] = self.icode continue else: print '未知错误' return False def _get_response(self, url, data = None): if data is not None: req = urllib2.Request(url, urllib.urlencode(data)) else: req = urllib2.Request(url) response = self.opener.open(req) return response def _get_requestToken(self, data): self.requestToken = self.requestToken_pattern.search(data).group(1) self.rtk = self.rtk_pattern.search(data).group(1) def _get_icode_img(self): icode_img = self._get_response(config.ICODEURL).read() self._write_file('icode.jpg', icode_img) def _write_file(self, filename, data): try: output_file = open(filename, 'wb') output_file.writelines(data) output_file.close() print u'文件 %s 写入完成!' % filename except IOError: print "写文件失败!" #------------------------------------------------------- def viewnewinfo(self): """查看好友的更新状态""" self.__caiinfo() def __caiinfo(self): """采集信息""" h3patten = re.compile('<h3>(.*?)</h3>')#匹配范围 apatten = re.compile('<a.+>(.+)</a>:')#匹配作者 cpatten = re.compile('</a>(.+)\s')#匹配内容 infocontent = self.operate.readlines() infocontent = self.web_content print 'friend newinfo:' #for i in infocontent: content = h3patten.findall(infocontent) if len(content) != 0: for m in content: username = apatten.findall(m) info = cpatten.findall(m) if len(username) != 0: print username[0],'说',info[0] print '----------------------------------------------' else: continue if __name__ == "__main__": my_account = Renren() my_account.login(config.EMAIL, config.PASSWORD, '') my_account.viewnewinfo()
# -*- coding: utf-8 -*- LOGINURL = r'http://www.renren.com/PLogin.do' ICODEURL = r'http://icode.renren.com/getcode.do?t=login&rnd=Math.random()' EMAIL = r'你的邮箱' PASSWORD = r'你的密码' # FailCode via "login-v6.js" FAILCODE = { '-1': u'登录成功', '0': u'登录系统错误,请稍后尝试', '1': u'您的用户名和密码不匹配', '2': u'您的用户名和密码不匹配', '4': u'您的用户名和密码不匹配', '8': u'请输入帐号,密码', '16': u'您的账号已停止使用', '32': u'帐号未激活,请激活帐号', '64': u'您的帐号需要解锁才能登录', '128': u'您的用户名和密码不匹配', '512': u'请您输入验证码', '4096': u'登录系统错误,稍后尝试', }