python模拟登陆知乎

一、查看登陆

先在浏览器中 按住 ctrl + shift + N 打开隐身模式,避免我们已经登陆的cookie信息。
然后进入知乎登陆页 https://www.zhihu.com/#signin
按F12,点击 Network 。尝试随便输个手机号登陆下,可以看到所要登陆页的请求了

python模拟登陆知乎_第1张图片

从headers中可以看到请求页和所需传送的data

python模拟登陆知乎_第2张图片
查看请求

即用手机号登陆请求页面的url为 https://www.zhihu.com/login/phone_num
phone_num 为手机号
password 为密码
那么 _xsrf是什么呢? 点击 element,按ctrl + F 查找xsrf,可以在网页源代码中搜索到,不难猜出这其实是一个动态验证码,既然在网页中,那么我们同样可以轻松获得它。
那么可以写出一个登陆代码了

二、登陆代码

import requests
import re

headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
}
# 知乎有反爬,用个浏览器头

def get_xsrf():                    #获取网页_xsrf验证码
    #'''_xsrf 是一个动态变化的参数'''
    index_url = 'https://www.zhihu.com/#signin'
    #获取登录时要用到的_xsrf
    index_page = requests.get(index_url, headers=headers)
    html = index_page.text
    pattern = r'name="_xsrf" value="(.*?)"'
    #这里的_xsrf返回的是一个list
    _xsrf = re.findall(pattern, html)
    return _xsrf[0]

def login(account, secret):
    xsrf = get_xsrf()
    #通过输入的用户名判断是否是手机号
    if re.match(r"^1\d{10}$", account):                                   #正则检验是否为手机号
        print('login by phone_num\n')
        login_url = 'https://www.zhihu.com/login/phone_num' #手机登陆请求url
        formdata = {
        'phone_num':account,
        'password':secret,
        '_xsrf':xsrf
    }
    else:
        if "@" in account:
            print('login by Email\n')
        else:
            print('你的帐号有问题,请重新输入')
            return 0
        login_url = 'https://www.zhihu.com/login/email' #邮箱登陆请求url
        formdata = {
        'email':account,
        'password':secret,
        '_xsrf':_xsrf
    }
    login_page = requests.post(url=login_url, data=formdata, headers=headers)
    print(login_page.status_code) #检验网页响应状态码
    print(login_page.content)
    print(login_page.json()['msg']) #检验登陆是否成功

if __name__ == "__main__":
    username = input('请输入你的用户名\n>  ')
    password = input("请输入你的密码\n>  ")
    login(username, password)
    

三、使用cookie

接着我们如果想用我们登陆的ID号做点什么别的事情时,就得保存下cookie,存进headers里就行了。
由于我上面代码的登陆封装在函数里,下面用交互操作演示一下后续步骤:

>>> import re
>>> import requests
>>> headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
}

>>> def get_xsrf():
    #'''_xsrf 是一个动态变化的参数'''
    index_url = 'https://www.zhihu.com/#signin'
    #获取登录时要用到的_xsrf
    index_page = requests.get(index_url, headers=headers)
    html = index_page.text
    pattern = r'name="_xsrf" value="(.*?)"'
    #这里的_xsrf返回的是一个list
    _xsrf = re.findall(pattern, html)
    return _xsrf[0]

>>> xsrf = get_xsrf()
>>> formdata = {
    'phone_num':'手机号',          #输入手机号
    'password':'密码',             #输入密码
    '_xsrf':xsrf
}
>>> url = 'https://www.zhihu.com/login/phone_num'
>>> r = requests.post(url=url, data=formdata)
>>> r.json()['msg']
'登录成功'
>>> r.headers['Set-Cookie']
'aliyungf_tc=AQAAANuoyleZZwEADpFq2sqbL1fysy78; Path=/; HttpOnly, q_c1=41d9412dc9d6431cb6849a754f41f7a0|1489474734000|1489474734000; Domain=zhihu.com; expires=Fri, 13 Mar 2020 06:58:54 GMT; Path=/, z_c0="QUJCS0pUaFBYZ2dYQUFBQVlRSlZUYTRoNzFpUE5ScjNaNk5oTXJ3Q29ZVW9lT1JlM245Yk93PT0=|1489474734|79a8a1792039b9f1bb14f7562b6cba8068d95721"; Domain=zhihu.com; expires=Thu, 13 Apr 2017 06:58:54 GMT; httponly; Path=/, _xsrf=; Domain=zhihu.com; expires=Mon, 14 Mar 2016 06:58:54 GMT; Path=/, r_cap_id="ZWEwNTVlMjAxYjliNDcyNTgwZjA4MjQ4Y2ZlNGRjMmU=|1489474734|f422359103c09d0b366fa17960ef06bdb214dbe4"; Domain=zhihu.com; expires=Thu, 13 Apr 2017 06:58:54 GMT; Path=/, cap_id="NTBjZWJjNGI1MjkwNDRhNGI5YmU3N2Q4ZWY0YTJiOTc=|1489474734|451abe100589202b9e67ed860b77dfc4d076e051"; Domain=zhihu.com; expires=Thu, 13 Apr 2017 06:58:54 GMT; Path=/, l_cap_id=; Domain=zhihu.com; expires=Mon, 14 Mar 2016 06:58:54 GMT; Path=/'
>>> headers["Cookie"] = r2.headers['Set-Cookie']
>>> headers
{'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36', 'Cookie': 'aliyungf_tc=AQAAANuoyleZZwEADpFq2sqbL1fysy78; Path=/; HttpOnly, q_c1=41d9412dc9d6431cb6849a754f41f7a0|1489474734000|1489474734000; Domain=zhihu.com; expires=Fri, 13 Mar 2020 06:58:54 GMT; Path=/, z_c0="QUJCS0pUaFBYZ2dYQUFBQVlRSlZUYTRoNzFpUE5ScjNaNk5oTXJ3Q29ZVW9lT1JlM245Yk93PT0=|1489474734|79a8a1792039b9f1bb14f7562b6cba8068d95721"; Domain=zhihu.com; expires=Thu, 13 Apr 2017 06:58:54 GMT; httponly; Path=/, _xsrf=; Domain=zhihu.com; expires=Mon, 14 Mar 2016 06:58:54 GMT; Path=/, r_cap_id="ZWEwNTVlMjAxYjliNDcyNTgwZjA4MjQ4Y2ZlNGRjMmU=|1489474734|f422359103c09d0b366fa17960ef06bdb214dbe4"; Domain=zhihu.com; expires=Thu, 13 Apr 2017 06:58:54 GMT; Path=/, cap_id="NTBjZWJjNGI1MjkwNDRhNGI5YmU3N2Q4ZWY0YTJiOTc=|1489474734|451abe100589202b9e67ed860b77dfc4d076e051"; Domain=zhihu.com; expires=Thu, 13 Apr 2017 06:58:54 GMT; Path=/, l_cap_id=; Domain=zhihu.com; expires=Mon, 14 Mar 2016 06:58:54 GMT; Path=/'}

可以看到登陆后的 cookie 已经存入 headers 了
接着传入这个 headers 就可以保持登陆状态。
用requests库的session会是更方便的方法

四、使用session

import requests
import re

s = requests.Session()
#session可以自动跟踪cookie

s.headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
}


def get_xsrf():
    #'''_xsrf 是一个动态变化的参数'''
    index_url = 'https://www.zhihu.com/#signin'
    #获取登录时要用到的_xsrf
    index_page = s.get(index_url)
    html = index_page.text
    pattern = r'name="_xsrf" value="(.*?)"'
    #这里的_xsrf返回的是一个list
    _xsrf = re.findall(pattern, html)
    return _xsrf[0]

xsrf = get_xsrf()
login_url = 'https://www.zhihu.com/login/phone_num'  #用手机登陆
formdata ={
    'phone_num':'手机号',              #输入手机号
    'password':'密码',                 #输入密码
    '_xsrf':xsrf
    }
r = s.post(url=login_url, data=formdata)
print(r.json()['msg'])
print(r.headers)

以后通过 s 对象的请求 headers 都会自动加上 cookie

你可能感兴趣的:(python模拟登陆知乎)