python爬虫-selenium模拟登录豆瓣获取cookies给requests使用

1. selenium如何切换到iframe中操作

import time
from selenium import webdriver

url = "https://www.douban.com"
brower = webdriver.Chrome(
    executable_path="/Users/zhangqiang/Desktop/05-python/03-spider-learn/spider/csdn_spider/chromedriver")


def login():
    # 通过selenium模拟登录豆瓣
    brower.get(url)
    time.sleep(3)
    brower.switch_to.frame(brower.find_element_by_tag_name("iframe"))  # 切换到iframe中去
    login_ele = brower.find_element_by_xpath("//li[@class='account-tab-account']")
    login_ele.click()


if __name__ == "__main__":
    login()

关键如下: 不然无法直接用 find_element_by_xpath 找到对象 

brower.switch_to.frame(brower.find_element_by_tag_name("iframe")) # 切换到iframe中去

2. 怎么在selenium中提取cookies,先登录,然后brower.get_cookies()方法获取,如下

import time
from selenium import webdriver

url = "https://www.douban.com"
brower = webdriver.Chrome(
    executable_path="/Users/zhangqiang/Desktop/05-python/03-spider-learn/spider/csdn_spider/chromedriver")


def login():
    username = '[email protected]'
    password = 'xxxxxx'
    # 通过selenium模拟登录豆瓣
    brower.get(url)
    time.sleep(3)
    brower.switch_to.frame(brower.find_element_by_tag_name("iframe"))  # 切换到iframe中去
    login_ele = brower.find_element_by_xpath("//li[@class='account-tab-account']")
    login_ele.click()

    # 找到 用户名和密码的节点
    username_ele = brower.find_element_by_xpath("//input[@id='username']")
    password_ele = brower.find_element_by_xpath("//input[@id='password']")

    # 填写用户名和密码
    username_ele.send_keys(username)
    password_ele.send_keys(password)

    submit_btn = brower.find_element_by_xpath("//a[@class='btn btn-account btn-active']")
    submit_btn.click()

    time.sleep(10)
    # 获取cookies
    print(brower.get_cookies())


if __name__ == "__main__":
    login()

3, 上面selenium获取到的 cookies是 list的类型,我们要从中取出 name和value,组成一个新的 dict类型,如下

time.sleep(10)
# 获取cookies
cookies = brower.get_cookies()  # list
cookie_dict = {}
for item in cookies:
    cookie_dict[item['name']] = item['value']
print(cookie_dict)

4. 把 selenium获取到的cookies给requests 使用

    # 把cookies给到request
    headers = {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
    }
    res = requests.get(url, cookies=cookie_dict, headers=headers).text
    if "8ll" in res:
        print("已经登录")
    else:
        print("未登录")

完整代码如下

import time
from selenium import webdriver
import requests

url = "https://www.douban.com"
brower = webdriver.Chrome(
    executable_path="/Users/zhangqiang/Desktop/05-python/03-spider-learn/spider/csdn_spider/chromedriver")


def login():
    username = '[email protected]'
    password = 'zl4921272'
    # 通过selenium模拟登录豆瓣
    brower.get(url)
    time.sleep(3)
    brower.switch_to.frame(brower.find_element_by_tag_name("iframe"))  # 切换到iframe中去
    login_ele = brower.find_element_by_xpath("//li[@class='account-tab-account']")
    login_ele.click()

    # 找到 用户名和密码的节点
    username_ele = brower.find_element_by_xpath("//input[@id='username']")
    password_ele = brower.find_element_by_xpath("//input[@id='password']")

    # 填写用户名和密码
    username_ele.send_keys(username)
    password_ele.send_keys(password)

    submit_btn = brower.find_element_by_xpath("//a[@class='btn btn-account btn-active']")
    submit_btn.click()

    time.sleep(10)
    # 获取cookies
    cookies = brower.get_cookies()  # list
    cookie_dict = {}
    for item in cookies:
        cookie_dict[item['name']] = item['value']

    # 把cookies给到request
    headers = {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
    }
    res = requests.get(url, cookies=cookie_dict, headers=headers).text
    if "8ll" in res:
        print("已经登录")
    else:
        print("未登录")


if __name__ == "__main__":
    login()

 

 

 

 

 

 

你可能感兴趣的:(python)