1. 关于cookie和session
2. 英文验证码登录知乎(零碎知识点...喔喔...自己怕忘随意整理一下, 有点乱)
---判断验证码是否存在时, 请求的网址相对路径为图中:path(captcha?lang=en 请求的是英文的验证码)
---图中的{"show_captcha": false} 表明本次登录不用输入验证码
---captcha?lang=en : 验证码是英文的
---captcha?lang=cn : 验证码是中文的
---图中的{"show_captcha": true} 表明本次登录需要输入验证码
---图中{"show_captcha": true}, 在判断是否含有验证码是需要拿到---(判断是否有验证码是get请求)
======================================================================
---图中圈出的"img-base64", 在获取验证码是需要拿到---(获取验证码是put请求)
---(索取验证码图片, 在保证有验证码的前提下才会发送put)
======================================================================
---图中圈出的"success", 在验证验证码是否正确时需要拿到---(验证验证码是否正确是post请求)
---需要传参数
====================================================================
---set-cookie: ... 的位置
=====================================================================
---登录时发送的post请求,且需要传参数:
---登录成功
---图中圈出的相对路径是在登录时请求的, 如:("https://www.zhihu.com/api/v3/oauth/sign_in")
====================================================================
登录成功之后---
session.get("https://www.zhihu.com/") 知乎首页的网址
====================================================================
3. 中文验证码登录知乎
---首先请求的:path(/api/v3/oauth/captcha?lang=cn)
---其次,要把所有点的坐标记录下来(通过抓包工具抓取)
---get_captchca时判断
---完成,大概过程就是这样了,嘿嘿...
再来补充一下, 附上英文验证码的栗子~
# 英文验证码的登录方式
# 中文登录(点击倒立文字)
import requests,time,json
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0",
'Referer': "https://www.zhihu.com/signup?next=%2F",
'origin': "https://www.zhihu.com",
"Authorization": "oauth c3cef7c66a1843f8b3a9e6a1e3160e20",
}
# 获取的服务器的Set-Cookie用session直接自动解析并保存, 在后续的请求中, 会在请求头中自动携带这些cookie
# LWPCookieJar: 对cookie进行自动操作, load() save()
from http.cookiejar import LWPCookieJar
"""创建session管理cookie"""
session = requests.Session()
session.cookies = LWPCookieJar(filename='zhihucookie.txt')
"""加载cookie"""
try:
session.cookies.load(filename='zhihucookie.txt', ignore_expires=True, ignore_discard=True)
except Exception as e:
print('加载失败')
"""请求知乎登录界面"""
res = session.get('https://www.zhihu.com/', headers=headers, verify=False)
print(res)
"""定义知乎登录函数"""
def zhihu_login():
# 登录之前判断是否有验证码
has_captcha = is_captcha()
if has_captcha:
# 获取验证码
captcha = get_captcha() # get_captcha()的返回值
# 在提交登录之前需要对输入的验证码的正确性进行检验
is_true = check_captcha(captcha)
if is_true == False:
return
login_url = "https://www.zhihu.com/api/v3/oauth/sign_in"
# 登录需要传的参数
post_params = {
"client_id": "c3cef7c66a1843f8b3a9e6a1e3160e20",
"grant_type": "password",
"timestamp": "1530194590142",
"source": "com.zhihu.web",
"signature": "9c16a7c48a9a74c3a5747a56125d9ab08a79f070",
"username": "XXX",
"password": "XXXXXX",
"captcha": captcha,
"lang": "cn",
"ref_source": "other_",
"utm_source": "baidu",
}
try:
response = session.post(login_url, data=post_params, headers=headers, verify=False)
if response.status_code == 201:
session.cookies.save(ignore_discard=True, ignore_expires=True)
print(response.text)
else:
print('登录失败')
except Exception as e:
print('请求失败', e)
"""判断是否有英文验证码"""
def is_captcha():
"""获取验证码请求的url"""
captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'
try:
response = session.get(url=captcha_url, headers=headers, verify=False)
if response.status_code == 200:
show_captcha = json.loads(response.text)['show_captcha']
if show_captcha:
print('有验证码')
return True
else:
print('没有验证码')
return False
except Exception as e:
print('')
import base64
from PIL import Image
from io import BytesIO
"""获取验证码"""
def get_captcha():
captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'
try:
"""索取验证码图片, 在保证有验证码的前提下才会发送put"""
response = session.put(url=captcha_url, headers=headers, verify=False)
if response.status_code == 202: # 注意状态码
captcha_url = json.loads(response.text)['img_base64']
# 解码图片
url = base64.b64decode(captcha_url)
url = BytesIO(url)
image = Image.open(url)
image.show()
captcha = input('请输入验证码:')
return captcha
except Exception as e:
print('')
"""验证验证码是否输入正确(需要传入参数captcha)"""
def check_captcha(captcha):
captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'
post_params = {
'input_text': captcha
}
# verify = False: 在发送https请求的时候, 关闭证书认证
response = session.post(url=captcha_url, data=post_params, headers=headers, verify=False)
json_obj= json.loads(response.text)
if 'success' in json_obj:
print('输入验证码正确')
return True
else:
print('输入验证码不正确')
return False
if __name__ == '__main__':
zhihu_login()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
改版后的知乎登录:
原来form表单里的数据是直接呈现出来的, 改版之后就没有了, 经过使用花瓶抓包以及多次的尝试发现:
其他数据没啥太大变化但是signature是经过sha1加密后生成的一串数据,所以需要做出一下改变
下面附上代码:
# 英文验证码的登录方式
# 中文登录(点击倒立文字)
import requests,time,json
from hashlib import sha1
import hmac
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0",
"Referer": "https://www.zhihu.com/signup?next=%2F",
"origin": "https://www.zhihu.com",
"Authorization": "oauth c3cef7c66a1843f8b3a9e6a1e3160e20",
"Host": "www.zhihu.com"
}
# cookies的自动化管理。
# 获取的服务器的Set-Cookie用session直接自动解析并保存,在后续的请求中,会在请求头中自动携带这些cookie
# LWPCookieJar:对cookie进行自动操作,load() save()
from http.cookiejar import LWPCookieJar
session = requests.Session()
session.cookies = LWPCookieJar(filename='zhihucookie.txt')
try:
session.cookies.load(filename='zhihucookie.txt', ignore_expires=True, ignore_discard=True)
except Exception as e:
print('暂时没有Cookie')
# res = session.get('https://www.zhihu.com/', headers=headers, verify=False)
# print(res)
def zhihu_login():
global session
has_captcha = is_captcha()
if has_captcha:
# 获取验证码
captcha = get_captcha()
# 在提交登陆之前,还需要对输入的验证码的正确性进行单独验证
is_true = check_captcha(captcha)
if is_true == False:
return
else:
captcha = ''
# 1528450244046.0112
# print(time.time())
login_url = "https://www.zhihu.com/api/v3/oauth/sign_in"
# key(配合着加密数据而使用的Key:d1b964811afb40118a12068ff74a12f4),
# msg = None, 要加密的重要数据。(适合一个数据加密)
# digestmod = None, 采用的加密方式, md5, sha1
# 1. 创建哈希加密对象
hm = hmac.new(str.encode('d1b964811afb40118a12068ff74a12f4'), msg=None, digestmod=sha1)
tm = str(int(time.time() * 1000))
print('tm = ',tm)
# 2. 开始向加密对象中传入需要加密的数据
# 注意添加顺序。
hm.update(str.encode('password'))
hm.update(str.encode('c3cef7c66a1843f8b3a9e6a1e3160e20'))
hm.update(str.encode('com.zhihu.web'))
hm.update(str.encode(tm))
# 3. 获取加密后的结果(就是signature签名。)
res = hm.hexdigest()
print('signature = ',res)
post_params = {
"client_id":"c3cef7c66a1843f8b3a9e6a1e3160e20",
"grant_type": "password",
"timestamp": tm,
"source": "com.zhihu.web",
"signature": res,
"username": "***",
"password": "***",
"captcha": captcha,
"lang": "cn",
"ref_source": "homepage",
"utm_source": "",
}
try:
response = session.post(login_url, data=post_params, headers=headers, verify=False)
if response.status_code == 201:
print('登录成功')
session.cookies.save(ignore_discard=True, ignore_expires=True)
print(response.text)
else:
print('登录失败')
print(response.text)
except Exception as e:
print('请求失败',e)
def is_captcha():
global COOKIE
captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'
try:
response = session.get(url=captcha_url, headers=headers,verify=False)
if response.status_code == 200:
show_captcha = json.loads(response.text)['show_captcha']
if show_captcha:
print('有验证码')
return True
else:
print('没有验证码')
return False
except Exception as e:
print('')
import base64
from PIL import Image
from io import BytesIO
def get_captcha():
captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'
# set-cookie: capsion_ticket="2|1:0|10:1528448404|14:capsion_ticket|44:MjIyMTdjMDNlNWQ0NDU4NDk3YWJiYTJhMGNhYzdhMGU=|27fc1b86cbb52d627f270fdc6ee72f58f88ae09b76483d30ff1026418d83bfce"; Domain=zhihu.com; expires=Sun, 08 Jul 2018 09:00:04 GMT; httponly; Path=/
try:
# 索取验证码图片,在保证有验证码的前提下才会发送PUT
response = session.put(url=captcha_url, headers=headers,verify=False)
if response.status_code == 202:
captcha_url = json.loads(response.text)['img_base64']
# 解码captcha_url
url = base64.b64decode(captcha_url)
url = BytesIO(url)
image = Image.open(url)
image.show()
captcha = input('请输入验证码:')
return captcha
except Exception as e:
print('')
def check_captcha(captcha):
captcha_url = 'https://www.zhihu.com/api/v3/oauth/captcha?lang=en'
post_params = {
'input_text': captcha
}
# verify=False: 在发送https请求的时候,关闭证书认证
response = session.post(url=captcha_url, data=post_params, headers=headers, verify=False)
json_obj = json.loads(response.text)
if 'success' in json_obj:
print('输入验证码正确')
return True
else:
print('输入验证码不正确')
return False
if __name__ == '__main__':
zhihu_login()
# res = session.get('https://www.zhihu.com/', headers=headers, verify=False).text
# print(res)
# [SSL: CERTIFICATE_VERIFY_FAILED]: 在requests发送https请求时,出现的证书认证失败,解决办法:verify=False
# InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
# InsecureRequestWarning)