#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Date====: 2015-09-30 20:53:15
import requests
from bs4 import BeautifulSoup
import time
import json
import os
#登陆知乎
url = ' http://www.zhihu.com'
loginURL = ' http://www.zhihu.com/login/email'
#头部
headers = {
===="User-Agent": 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:41.0) Gecko/20100101 Firefox/41.0',
===="Referer": " http://www.zhihu.com/",
===='Host': ' http://www.zhihu.com',
}
#需要post的数据
data = {
===='email': '
[email protected]',
===='password': 'zhejiushimima',
===='rememberme': "true",
}
#requests库的session()对话,保证每次链接都是同一个cookie
s = requests.session()
#如果有cookie文件,读取cookie登录
if os.path.exists('cookiefile'):
====with open('cookiefile') as f:
========cookie = json.load(f)
====s.cookies.update(cookie)
====req1 = s.get(url, headers=headers)
====with open('zhihu.html', 'w') as f:
========f.write(req1.content)
#用用户名和密码和验证码登录
else:
====req = s.get(url, headers=headers)
====print req
#用bs4找出post数据所需的xsrf
====soup = BeautifulSoup(req.text, "html.parser")
====xsrf = soup.find('input', {'name': '_xsrf', 'type': 'hidden'}).get('value')
====data['_xsrf'] = xsrf
#下载验证码到本地
====timestamp = int(time.time() * 1000)
====captchaURL = 'http ://ww w.zhih u.co m/captcha.gif?=' + str(timestamp)
====print captchaURL
====with open('zhihucaptcha.gif', 'wb') as f:
========captchaREQ = s.get(captchaURL)
========f.write(captchaREQ.content)
#手动打开图片输入验证码
====loginCaptcha = raw_input('input captcha:\n').strip()
====data['captcha'] = loginCaptcha
====print data
#登录,并将cookies保存到本地文件中, 下次登录时调用
====loginREQ = s.post(loginURL, headers=headers, data=data)
====print s.cookies.get_dict()
====with open('cookiefile', 'wb') as f:
========json.dump(s.cookies.get_dict(), f)