Python爬虫常用库(三)selenium

API:selenium.webdriver API
一、声明浏览器对象

from selenium import webdriver

option = webdriver.ChromeOptions()
option.add_argument("--headless")
'''
Chrome无头模式
'''
browser = webdriver.Chrome(options = option)

二、访问页面 Chrome().get(url)

from selenium import webdriver

option = webdriver.ChromeOptions()
option.add_argument("--headless")
browser = webdriver.Chrome(options = option)

browser.get("http://www.baidu.com")
print(browser.page_source)

三、查找元素
(一)单个元素
调用By选择方式

from selenium import webdriver
from selenium.webdriver.common.by import By

option = webdriver.ChromeOptions()
option.add_argument("--headless")

browser = webdriver.Chrome(options=option)
browser.get("http://www.baidu.com")
input = browser.find_element(By.ID, "kw")
print(input)
# 

其他多种选择方式:
1、Chrome.find_element_by_id()
2、Chrome.find_element_by_name()
3、Chrome.find_element_by_class_name()
4、Chrome.find_element_by_css_selector()
5、Chrome.find_element_by_tag_name()
6、Chrome.find_element_by_xpath()

from selenium import webdriver

option = webdriver.ChromeOptions()
option.add_argument("--headless")

browser = webdriver.Chrome(options=option)
browser.get("http://www.baidu.com")
input_0 = browser.find_element_by_id("kw")
input_1 = browser.find_element_by_css_selector("#kw")
input_2 = browser.find_element_by_name("wd")
input_3 = browser.find_element_by_class_name("s_ipt")
input_4 = browser.find_element_by_css_selector(".s_ipt")
print(input_0, input_1, input_2, input_3, input_4, sep="\n")
# 
# 
# 
# 
# 

(二)查找多个元素

Chrome().find_elements(By, " " )
Chrome().find_elements_by_*("")
返回元素列表

elements = browser.find_elements(By.CSS_SELECTOR, "div.more-meta")
elements = browser.find_elements_by_css_selector("div.more-meta")

for element in elements:
    print(element)

四、元素交互
(一)元素交互,百度macbook pro
1、element.send_keys()
2、element.clear()
3、element.click()
4、…

from selenium import webdriver
from selenium.webdriver.common.by import By

option = webdriver.ChromeOptions()
option.add_argument("--headless")

browser = webdriver.Chrome()
browser.get("http://www.baidu.com")
input = browser.find_element_by_css_selector("input#kw")
input.send_keys("macbook pro")
button = browser.find_element_by_css_selector('input[type="submit"]')
button.click()

(二)交互动作
使用selenium.webdriver.common.action_chains.ActionChains
动作链
可支持链式操作

例:拖拽

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time

option = webdriver.ChromeOptions()
option.add_argument("--headless")

browser = webdriver.Chrome()
browser.get("http://sahitest.com/demo/dragDropMooTools.htm")

source = browser.find_element_by_css_selector("#dragger")

for x in range(7, 380, 122):
    action = ActionChains(browser)
    action.drag_and_drop_by_offset(source, x, 118).perform()
    time.sleep(0.5)


你可能感兴趣的:(Python)