python模拟登录百度

# -*- coding: utf-8 -*-
import requests
import re
import urllib
from lxml import etree

def login_baidu(user, password):
    # get之后获得cookies
    session.get('http://www.baidu.com')
    session.get('https://passport.baidu.com/v2/api/?login')
    # 带着cookies访问,获取token
    token_data = session.get('https://passport.baidu.com/v2/api/?getapi&tpl=mn&apiver=v3').text
    token = re.findall(r'"token" : "(.*?)"', token_data)[0]
    print(token)
    # 构造headers
    headers = {
        'Host': 'passport.baidu.com',
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36",
        'Content-Type': 'application/x-www-form-urlencoded',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'h-CN,zh;q=0.8,en;q=0.6'
    }
    # 构造POST data
    data = {
        'staticpage': 'https://www.baidu.com/cache/user/html/v3Jump.html',
        'charset': 'utf-8',
        'token': token,
        'tpl': 'mn',
        'apiver': 'v3',
        'tt': '1471614939403',
        'safeflg': '0',
        'u': 'https://www.baidu.com/',
        'isPhone': 'false',
        'quick_user': '0',
        'loginmerge': 'true',
        'logintype': 'dialogLogin',
        'username': user,
        'password': password,
        'ppui_logintime': '4173',
        'callback': 'parent.bd__pcbs__nmm7i5'
    }

    login = session.post('https://passport.baidu.com/v2/api/?login', data=data, headers=headers)
    if 'BDUSS' in session.cookies:
        print("登录成功")
    else:
        print("登录失败")

    web_data = session.get('http://www.baidu.com').text
    page = etree.HTML(web_data)
    my_name = page.xpath(u'//span[@class="user-name"]/text()')
    print my_name[0]

if __name__ == '__main__':
    # 构造一个会话,用来跨请求保存cookie
    session = requests.Session()
    user = '用户名'
    password = '密码'
    login_baidu(user, password)
    html = session.get("http://index.baidu.com")
    index = html.content.decode('gbk').encode('utf-8')

你可能感兴趣的:(python模拟登录百度)