python模拟登陆豆瓣

登陆豆瓣

  • 登陆豆瓣
    • requests模块登陆豆瓣
    • urllib2模块登陆豆瓣

requests模块登陆豆瓣

前提:
  使用pip命令安装requests和BeautifulSoup4
思路:
  1.访问登陆界面,使用google自带的开发工具分析出需要post的代码
  2.把验证码下载下来,进行手动输入
  3.访问登陆之后的页面,测试是否成功

# -*-coding:utf-8 -*-4
import requests
from bs4 import BeautifulSoup

#拿到验证码,写入硬盘
def verifyCode(verifyCodeData):
    with open("verifyCode.jpg" , "wb") as f:
        f.write(verifyCodeData)

    text = raw_input("请输入验证码:")

    return text


def doubanLogin():
    #构建一个Session对象,可以保存页面的Cookie
    sess = requests.Session()

    #请求报头
    headers = {"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"}

    #获取登陆页面的html,get表示发送的是get请求
    html = sess.get("https://www.douban.com/accounts/login" , headers = headers).text

    #解析html页面
    bs = BeautifulSoup(html , "lxml")

    #拿到captcha_id,因为验证码就是根据这个captcha_id生成的
    captcha_id = bs.find("input" , attrs = {"name" : "captcha-id"}).get("value")

    print captcha_id

    #拼接出完整的验证码地址
    captcha_url = "https://www.douban.com/misc/captcha?id="+captcha_id+"&size=s"

    #发送请求,获取验证码图片数据
    verifyCodeData = sess.get(captcha_url , headers = headers).content()

    #获取用户输入的验证码
    text = verifyCode(verifyCodeData)

    #构造post数据
    data = {
            "form_email" : "你的账号",
            "form_password" : "你的密码",
            "captcha-solution" : text,
            "captcha-id" : captcha_id
            }

    #发送post请求
    response = sess.post("https://accounts.douban.com/login", data = data , headers = headers)

    #访问需要登陆权限的地址
    response = sess.get("https://www.douban.com/people/155260137/" , headers = headers)


if __name__ == "__main__":

    doubanLogin()

urllib2模块登陆豆瓣

思路与上面相似

# -*- coding:utf-8 -*-

import urllib2
import urllib
import cookielib
from bs4 import BeautifulSoup

def verifyCode(verifyCodeData):
    with open("verifyCode.jpg" , "wb") as f:
        f.write(verifyCodeData)

    text = raw_input("请输入验证码:")

    return text

def doubanLogin():

    cookie = cookielib.CookieJar()

    cookie_hanler = urllib2.HTTPCookieProcessor(cookie)

    opener = urllib2.build_opener(cookie_hanlder)

    urllib2.install_opener(opener)

    #请求报头
    headers = {"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"}

    url = "https://www.douban.com/accounts/login"

    request = urllib2.Request(url , headers = headers)

    html = urllib2.urlopen(request).read()

    bs = BeautifulSoup(html , "lxml")

    captcha_id = bs.find("input" , attrs = {"name" : "captcha-id"}).get("value")

    print captcha

    captcha_url = "https://www.douban.com/misc/captcha?id="+captcha_id+"&size=s"

    request = urllib2.Request(captcha_url , headers = headers)

    verifyCodeData = urllib2.urlopen(request).read()

    text = verifyCode(verifyCodeData)

    #构造post数据
    data = {
            "form_email" : "你的账号",
            "form_password" : "你的密码",
            "captcha-solution" : text,
            "captcha-id" : captcha_id
            }
    data = urllib.urlencode(data)
    #发送post请求
    url = "https://accounts.douban.com/login"
    request = urllib2.Request(url , data = data , headers = headers)

    response = urllib2.urlopen(request)

    response = urllib2.urlopen("https://www.douban.com/people/155260137/") 

    print response.read()


if __name__ == "__main__":
    doubanLogin()

你可能感兴趣的:(Python爬虫)