Python学习之爬虫模拟登录新浪微博

首先感谢崔大神的书让我对模拟登录了解更细致,其次感谢位博主,模仿他们的帖子1最后终于登录成功

1谷歌浏览器打开F12,登录一下网页,看请求

Python学习之爬虫模拟登录新浪微博_第1张图片
1预登陆,网页内容有post登录需要的信息
用户名需要用base64加密,网页地址需要用户名、时间戳构建

self.su = base64.b64encode(self.user.encode()).decode()
url = 'https://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su={}&rsakt=mod&checkpin=1&client=ssologin.js(v1.4.19)&_={}'.format(parse.quote(self.su),int(time.time()*1000))
#网页参数获取
res = self.session.get(url).text
self.nonce = re.findall(r'"nonce":"(.*?)"', res)[0]
self.pubkey = re.findall(r'"pubkey":"(.*?)"', res)[0]
self.rsakv = re.findall(r'"rsakv":"(.*?)"', res)[0]
self.servertime = re.findall(r'"servertime":(.*?),', res)[0]

2携带data信息登录,2次登录对此Form data信息

Python学习之爬虫模拟登录新浪微博_第2张图片

3有sp的加密方式

JS加密代码

if ((me.loginType & rsa) && me.servertime && sinaSSOEncoder && sinaSSOEncoder.RSAKey) {
			request.servertime = me.servertime;
			request.nonce = me.nonce;
			request.pwencode = "rsa2";
			request.rsakv = me.rsakv;
			var RSAKey = new sinaSSOEncoder.RSAKey();
			RSAKey.setPublic(me.rsaPubkey, "10001");
			password = RSAKey.encrypt([me.servertime, me.nonce].join("\t") + "\n" + password)
		} else {
			if ((me.loginType & wsse) && me.servertime && sinaSSOEncoder && sinaSSOEncoder.hex_sha1) {
				request.servertime = me.servertime;
				request.nonce = me.nonce;
				request.pwencode = "wsse";
				password = sinaSSOEncoder.hex_sha1("" + sinaSSOEncoder.hex_sha1(sinaSSOEncoder.hex_sha1(password)) + me.servertime + me.nonce)
			}
		}

Python解密代码

   def get_sp(self):
       '''用rsa对明文密码进行加密,加密规则通过阅读js代码得知'''
       publickey = rsa.PublicKey(int(self.pubkey, 16), int('10001', 16))
       message = str(self.servertime) + '\t' + str(self.nonce) + '\n' + str(self.password)
       self.sp = rsa.encrypt(message.encode(), publickey)
       return b2a_hex(self.sp)

4二次登录验证

#判断post登录是否成功
if json_data['retcode'] == '0':
       params = {
           'ticket': json_data['ticket'],
           'ssosavestate': int(time.time()),
           'callback': 'sinaSSOController.doCrossDomainCallBack',
           'scriptId': 'ssoscript0',
           'client': 'ssologin.js(v1.4.19)',
           '_': int(time.time()*1000)
       }
       #二次登录网页验证
       url = 'https://passport.weibo.com/wbsso/login'
       res = self.session.get(url,params=params)
       json_data1 = json.loads(re.search(r'{("result":.*})', res.text).group())
       #判断是否登录成功
       if json_data1['result'] is True:

代码

import requests, base64, time, re, rsa, random, json, logging
from urllib import parse
from binascii import b2a_hex

class Weibo():
    def __init__(self,user,password):
        self.user = user
        self.password = password
        #用户名进过base64加密
        self.su = base64.b64encode(self.user.encode()).decode()
        self.session = requests.session()
        self.session.get('https://login.sina.com.cn/signup/signin.php')

    def pre_log(self):
        #预登陆,获取信息
        url = 'https://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su={}&rsakt=mod&checkpin=1&client=ssologin.js(v1.4.19)&_={}'.format(parse.quote(self.su),int(time.time()*1000))
        try:
           res = self.session.get(url).text
           res = re.findall(r"({.*})", res)[0]
           self.res = json.loads(res)
           self.nonce = self.res["nonce"]
           self.pubkey = self.res["pubkey"]
           self.rsakv = self.res["rsakv"]
           self.servertime = self.res["servertime"]
           # print(self.nonce,'\n',self.pubkey,'\n',self.rsakv,'\n',self.servertime)
       except Exception as error:
           logging.error("WeiBoLogin pre_log error: %s", error)

   def get_sp(self):
       '''用rsa对明文密码进行加密,加密规则通过阅读js代码得知'''
       publickey = rsa.PublicKey(int(self.pubkey, 16), int('10001', 16))
       message = str(self.servertime) + '\t' + str(self.nonce) + '\n' + str(self.password)
       self.sp = rsa.encrypt(message.encode(), publickey)
       return b2a_hex(self.sp)

   def login(self):

       data = {
           'entry': 'account',
           'gateway': '1',
           'from': 'null',
           'savestate': '30',
           'useticket': '0',
           'vsnf': '1',
           'su': self.su,
           'service': 'account',
           'servertime': self.servertime,
           'nonce': self.nonce,
           'pwencode': 'rsa2',
           'rsakv': self.rsakv,
           'sp': self.get_sp(),
           'sr': '1920*1080',
           'encoding': 'UTF-8',
           'prelt': random.randint(1,100),
           'url': 'https://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack',
           'returntype': 'TEXT'
       }

       # 验证码
       if self.res["showpin"] == 1:
           url = "http://login.sina.com.cn/cgi/pin.php?r=%d&s=0&p=%s" % (int(time.time()), self.res["pcid"])
           with open("captcha.jpg", "wb") as file_out:
               file_out.write(self.session.get(url).content)
           code = input("请输入验证码:")
           data["pcid"] = self.res["pcid"]
           data["door"] = code


       url = 'https://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.19)'
       json_data = self.session.post(url,data=data).json()

       #判断post登录是否成功
       if json_data['retcode'] == '0':
           params = {
               'ticket': json_data['ticket'],
               'ssosavestate': int(time.time()),
               'callback': 'sinaSSOController.doCrossDomainCallBack',
               'scriptId': 'ssoscript0',
               'client': 'ssologin.js(v1.4.19)',
               '_': int(time.time()*1000)
           }
           #二次登录网页验证
           url = 'https://passport.weibo.com/wbsso/login'
           res = self.session.get(url,params=params)
           print(res.text)
           json_data1 = json.loads(re.search(r'({"result":.*})', res.text).group())
           #判断是否登录成功
           if json_data1['result'] is True:
               print(res.cookies)
               logging.warning('WeiBologin Successed: %s',json_data1)
           else:
               logging.warning('WeiBologin Faild: %s',json_data1)
       else:
           logging.warning('WeiBologin Successed: %s', json_data)
    def main(self):
        self.pre_log()
        self.login()

if __name__ == '__main__':
    user = '用户名'
    password = '密码'
    weib = Weibo(user,password)
    weib.main()

你可能感兴趣的:(爬虫基础)