python爬虫系列(2.3-requests库模拟用户登录)

一、模拟登录拉钩网

import re
import requests


class LoginLaGou(object):
    """
    模拟登录拉钩网
    """

    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36',
            'Referer': 'https://passport.lagou.com/login/login.html'
        }
        self.data = {
            'isValidate': 'true',
            'username': '181****1666',
            'password': 'root',
            'request_form_verifyCode': '',
            'submit': ''
        }
        self.lagou = 'https://www.lagou.com/'
        self.login_index_url = 'https://passport.lagou.com/login/login.html'
        self.login_url = 'https://passport.lagou.com/login/login.json'
        self.session = requests.session()

    def login_html(self):
        """
        登录页面获取token认证
        :return:
        """
        response = self.session.get(url=self.login_index_url, headers=self.headers)
        if response.status_code == 200:
            X_Anti_Forge_Token = re.findall("X_Anti_Forge_Token.*?=.*?'(.*?)'", response.text, re.S)[0]
            X_Anti_Forge_Code = re.findall("X_Anti_Forge_Code.*?=.*?'(.*?)'", response.text, re.S)[0]
            self.headers['X-Anit-Forge-Code'] = X_Anti_Forge_Code
            self.headers['X-Anit-Forge-Token'] = X_Anti_Forge_Token

    def login(self):
        """
        登录接口
        :return:
        """
        self.login_html()  # 先访问登录页面
        response = self.session.post(url=self.login_url, headers=self.headers, data=self.data)
        print(response.text)
        print(response.cookies.get_dict())


if __name__ == "__main__":
    lagou = LoginLaGou()
    lagou.login()

二、模拟登录github

import re
import requests


class LoginGitHub(object):
    """
    模拟登录github
    """

    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36',
            'Referer': 'https://github.com/'
        }
        self.data = {
            'commit': 'Sign in',
            'utf8': '✓',
            'authenticity_token': '',
            'login': 'root',
            'password': '123456',
        }
        self.github = 'https://github.com/login'
        self.login_url = 'https://github.com/session'
        self.session = requests.session()

    def login_html(self):
        """
        获取登录的csrf_token
        :return:
        """
        response = self.session.get(url=self.github, headers=self.headers)
        authenticity_token = re.findall('.*?name="authenticity_token".*?value="(.*?)"', response.text, re.S)[0]
        self.data['authenticity_token'] = authenticity_token

    def login(self):
        """
        登录github
        :return:
        """
        self.login_html()  # 先获取csrf_token
        response = self.session.post(url=self.login_url, headers=self.headers, data=self.data)
        print(response.text)


if __name__ == "__main__":
    github = LoginGitHub()
    github.login()

三、更多博主文章请访问

你可能感兴趣的:(爬虫)