基于浏览器自动化的模块。
pip install selenium
以下以谷歌浏览器为例:
http://chromedriver.storage.googleapis.com/index.html
下载对应版本的驱动
from selenium import webdriver
driver = webdriver.Chrome(executable_path='E:\chromedriver.exe')
也可直接将驱动程序,放在python的文件目录中
from selenium import webdriver
driver = webdriver.Chrome()
发送请求 get()
driver.get('https://www.taobao.com/')
find定位系列
执行js代码 execute_script()
driver.execute_script('js代码')
交互 输入值 send_keys()
driver.find_element_by_id('q').send_keys('狗子')
点击事件 click()
driver.find_element_by_xpath('//*[@id="J_TSearchForm"]/div[1]/button').click()
回退返回 back()
driver.back()
前进 forward()
driver.forward()
退出浏览器
driver.quit()
获取iframe子页面的数据,直接用find系列的方法得不到数据,需要使用switch_to.frame(‘iframeid’)切换标签定位的作用域
driver.switch_to.frame('iframeResult')
driver.find_element_by_id('draggable')
实现一系列连续操作,例如长按后拖动等
perform()让动作链立即执行
from selenium.webdriver import ActionChains
action = ActionChains(driver)
点击长按 click_and_hold(arg)
action.click_and_hold(span)
像素偏移 move_by_offset(x, y)
action.move_by_offset(20, 0).perform()
释放动作链
perform()
http://scxk.nmpa.gov.cn:81/xk/
from selenium import webdriver
from lxml import etree
import time
driver = webdriver.Chrome(executable_path='E:\chromedriver.exe')
driver.get('http://scxk.nmpa.gov.cn:81/xk/')
page_text = driver.page_source
tree = etree.HTML(page_text)
lists = tree.xpath('//ul[@id="gzlist"]//dl/@title')
print(lists)
driver.quit()
from selenium import webdriver
from lxml import etree
import time
driver = webdriver.Chrome(executable_path='E:\chromedriver.exe')
driver.get('https://www.taobao.com/')
# js代码 滚动屏幕
driver.execute_script('window.scrollTo(0, 1200)')
time.sleep(2)
# find系类定位元素
driver.find_element_by_id('q').send_keys('狗子')
driver.find_element_by_xpath('//*[@id="J_TSearchForm"]/div[1]/button').click()
time.sleep(2)
driver.get('https://www.baidu.com')
time.sleep(2)
# 返回上一页
driver.back()
time.sleep(2)
# 前进一页
driver.forward()
time.sleep(2)
driver.quit()
https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable
由于滑块是在iframe中
from selenium import webdriver
from selenium.webdriver import ActionChains
from lxml import etree
import time
driver = webdriver.Chrome(executable_path='E:\chromedriver.exe')
driver.get('https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')
driver.switch_to.frame('iframeResult')
span = driver.find_element_by_id('draggable')
action = ActionChains(driver)
action.click_and_hold(span)
for i in range(5):
action.move_by_offset(17, 0).perform()
time.sleep(0.3)
action.release()
driver.quit()
https://qzone.qq.com/
登录元素也是在iframe中
from selenium import webdriver
# 将私密信息username password放在key.py中导入
from key import *
import time
driver = webdriver.Chrome(executable_path='E:\chromedriver.exe')
driver.get('https://qzone.qq.com/')
driver.switch_to.frame('login_frame')
driver.find_element_by_xpath('//*[@id="switcher_plogin"]').click()
driver.find_element_by_id('u').send_keys(username)
driver.find_element_by_id('p').send_keys(password)
driver.find_element_by_id('login_button').click()
time.sleep(3)
driver.quit()
导入模块
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
配置参数
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
将参数带入
driver = webdriver.Chrome(
executable_path='E:\chromedriver.exe', chrome_options=chrome_options)
无头访问百度
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
driver = webdriver.Chrome(
executable_path='E:\chromedriver.exe', chrome_options=chrome_options)
driver.get('https://www.baidu.com/')
print(driver.page_source)
time.sleep(2)
driver.quit()
另一种写法
from selenium import webdriver
import time
option = webdriver.ChromeOptions()
option.add_argument('headless')
driver = webdriver.Chrome(chrome_options=option)
driver.get('http:/www.baidu.com')
print(driver.page_source)
time.sleep(2)
driver.quit()
门户网站检测如果是selenium请求的,有可能会拒绝访问。这也是一种反爬机制
实现规避检测
from selenium.webdriver import ChromeOptions
option = ChromeOptions()
option.add_experimental_option('excludeSwitcher', ['enable-automation'])
driver = webdriver.Chrome(
executable_path='E:\chromedriver.exe', chrome_options=chrome_options, options=option)
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver import ChromeOptions
import time
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
option = ChromeOptions()
option.add_experimental_option('excludeSwitcher', ['enable-automation'])
driver = webdriver.Chrome(
executable_path='E:\chromedriver.exe', chrome_options=chrome_options, options=option)
driver.get('https://www.baidu.com/')
print(driver.page_source)
time.sleep(2)
driver.quit()
或
from selenium import webdriver
import time
option = webdriver.ChromeOptions()
option.add_argument('headless')
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitcher', ['enable-automation'])
driver = webdriver.Chrome(chrome_options=option, options=options)
driver.get('http:/www.baidu.com')
print(driver.page_source)
time.sleep(2)
driver.quit()