有很多时候,爬取的页面需要cookie来维持会话,但是其登录这块反爬又很验证,个人采用了这种手工登录获取cookie的方式来获取cookie,用的很多,于是写成了一个模板.
#!D:\Python\Python36 python
# encoding: utf-8
"""
@author: sy_dove
@license: (C) Copyright 2019-2020, Node Supply Chain Manager Corporation Limited.
@contact: [email protected]
@file: get_cookie.py
@time: 2019/6/8 10:57
@desc:
"""
from selenium import webdriver
import selenium.webdriver.support.ui as ui
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support import expected_conditions as EC
class GetCookie(object):
def __init__(self, url, is_driver):
self.url = url
self.is_driver = is_driver
def run(self):
driver = webdriver.Chrome()
driver.get(url=self.url)
driver.maximize_window()
# 设置等待,等待到已经手工完成登录
GetCookie.is_visible('//a[text()="我的订单"]', driver)
driver.refresh()
cookies = driver.get_cookies()
# 判断cookie是否是给webdriver使用或是requests使用
if self.is_driver:
return cookies
else:
the_cookies = {}
# 获取cookie中的name和value,转化成requests可以使用的形式
for cookie in cookies:
the_cookies[cookie['name']] = cookie['value']
return the_cookies
@staticmethod
def is_visible(locator, driver):
"""设置显示等待,等待到登录头像出现"""
try:
ui.WebDriverWait(driver, 80).until(EC.visibility_of_element_located((By.XPATH, locator)))
return True
except TimeoutException:
return False
def main(url):
g1 = GetCookie(url=url, is_driver=True)
g1.run()
if __name__ == '__main__':
main(url='https://passport.jd.com/new/login.aspx')
#!D:\Python\Python36 python
# encoding: utf-8
"""
@author: sy_dove
@license: (C) Copyright 2019-2020, Node Supply Chain Manager Corporation Limited.
@contact: [email protected]
@file: get_cookie.py
@time: 2019/6/8 10:57
@desc:
"""
from selenium import webdriver
import selenium.webdriver.support.ui as ui
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support import expected_conditions as EC
class GetCookie(object):
def __init__(self, url, is_driver):
self.url = url
self.is_driver = is_driver
def run(self):
driver = webdriver.Chrome()
driver.get(url=self.url)
driver.maximize_window()
# 设置等待,等待到已经手工完成登录
GetCookie.is_visible('//a[text()="我的订单"]', driver)
driver.refresh()
cookies = driver.get_cookies()
# 判断cookie是否是给webdriver使用或是requests使用
if self.is_driver:
return cookies
else:
the_cookies = {}
# 获取cookie中的name和value,转化成requests可以使用的形式
for cookie in cookies:
the_cookies[cookie['name']] = cookie['value']
return the_cookies
@staticmethod
def is_visible(locator, driver):
"""设置显示等待,等待到登录头像出现"""
try:
ui.WebDriverWait(driver, 80).until(EC.visibility_of_element_located((By.XPATH, locator)))
return True
except TimeoutException:
return False
def main(url):
g1 = GetCookie(url=url, is_driver=True)
g1.run()
if __name__ == '__main__':
main(url='https://passport.jd.com/new/login.aspx')