知乎问题答案图片爬虫(二)

翻阅了网上很多大神的知乎登录的文章,很多因为知乎修改了登录的方法都不能用了,经过多次实验采用了某大神在github上的方法。

session = requests.session()

# session.cookies = cookiejar.LWPCookieJar(filename='cookies.txt')

session.cookies = cookielib.LWPCookieJar(filename='cookies.txt')

headers = {

'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 '

'(KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'

}

try:

#从本地文件加载cookies

# ignore_discard的意思是即使cookies将被丢弃也将它保存下来,ignore_expires的意思是如果在该文件中cookies已经存在,则覆盖原文件写入

session.cookies.load(ignore_discard=True)

exceptExceptionase:

print('exception:', e)

print('还没有cookie信息')

defget_xsrf():

url ='https://www.zhihu.com'

response = session.get(url,headers=headers)

soup = BeautifulSoup(response.text,'lxml')

#

tag = soup.find('input',attrs={'name':'_xsrf'})

returntag['value']

defget_captcha():

t =str(int(time.time() *1000))

captcha_url ='https://www.zhihu.com/captcha.gif?r='+ t +"&type=login"

response = session.get(captcha_url,headers=headers)

captcha_name ='captcha.gif'

withopen(captcha_name,'wb')asf:

f.write(response.content)

im = Image.open(captcha_name)

im.show()

returnraw_input('请输入验证码: ')

defget_phone():

returnraw_input('请输入手机号: ')

defget_password():

returnraw_input('请输入密码: ')

deflogin(phone, password, _xsrf, captcha):

data = {

'_xsrf': _xsrf,

'password': password,

'phone_num': phone,

'captcha': captcha

}

login_url ='https://www.zhihu.com/login/phone_num'

response = session.post(login_url,data=data,headers=headers)

print('response.json() =', response.json())

#保存cookies到本地

session.cookies.save()

defisLogin():

#通过查看用户个人信息来判断是否已经登录

url ="https://www.zhihu.com/settings/profile"

#这里重定向一定要设置为false,否则就算没有登录会被重定向到登录的地址去,然后code就返回200了

response = session.get(url,headers=headers,allow_redirects=False)

code = response.status_code

ifcode ==200:

returnTrue

else:

returnFalse

if isLogin():

print('您已经登录')

else:

phone = get_email()

password = get_password()

_xsrf = get_xsrf()

print('_xsrf =', _xsrf)

captcha = get_captcha()

login(phone, password, _xsrf, captcha)

你可能感兴趣的:(知乎问题答案图片爬虫(二))