学习目的: 使用selenium自动化测试工具,模拟人为操作浏览器,达到可以注册/登录网页,滑动下拉框,选择,鼠标点击等操作,为以后编写突破反爬虫机制的个人小爬虫建立基础。
学习背景: windows系统,python语言,PyCharm
学习资源: https://www.selenium.dev/documentation/en/
是最重要的学习内容之一,学习如何定位到网页中的元素
driver.find_element(By.ID, "cheese")
tips:driver是WebDriver的实例对象
一旦“定位到了”以上的网页元素,可以在上一步结果的基础上进一步缩小范围
cheese = driver.find_element(By.ID, "cheese")
cheddar = cheese.find_elements_by_id("cheddar")
同时,也可以使用另一种方法实现
cheddar = driver.find_element_by_css_selector("#cheese #cheddar")
如果网页结构如下
- …
- …
- …
- …
想要定位cheese下的所有元素,如下
mucho_cheese = driver.find_elements_by_css_selector("#cheese li")
WebDriver有八个内置的元素选择方法
定位法 | 使用描述 |
---|---|
class name | 寻找包含查找值的class name元素(不包括复合class name) |
css selector | 用css法定位元素 |
id | 依据id属性值定位元素 |
name | 依据name属性值定位元素 |
link text | 定位其可视文本与搜索值匹配的锚元素 |
partial link text | 定位其可视文本包含搜索值的第一个锚元素 |
tag name | 定位tag name与搜索值匹配的元素 |
xpath | 查找与xpaht表达式匹配的元素 |
在定位到想要的元素后,可以使用以下方法定位到相邻的元素
above() | 定位到现元素上面的元素 |
---|---|
below() | 定位到现元素下面的元素 |
toLeftOf() | 定位到现元素左面的元素 |
tpRightOf() | 定位到现元素右面的元素 |
near() | 定位到最多距现元素50个像素远的元素 |
name = "Charles"
driver.find_element(By.NAME, "name").send_keys(name)
source = driver.find_element(By.ID, "source")
target = driver.find_element(By.ID, "target")
ActionChains(driver).drag_and_drop(source, target).perform()
driver.find_element(By.CSS_SELECTOR, "input[type='submit']").click()
from selenium.webdriver import Chrome
driver = Chrome()
或
from selenium.webdriver import Chrome
with Chrome() as driver:
#your code inside this indent
driver.get("https://selenium.dev")
从浏览器的地址栏读取当前的URL
driver.current_url
driver.back()
driver.forward()
driver.refresh()
driver.title
driver.current_window_handle
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# Start the driver
with webdriver.Chrome() as driver:
# Open URL
driver.get("https://seleniumhq.github.io")
# Setup wait for later
wait = WebDriverWait(driver, 10)
# Store the ID of the original window
original_window = driver.current_window_handle
# Check we don't have other windows open already
assert len(driver.window_handles) == 1
# Click the link which opens in a new window
driver.find_element(By.LINK_TEXT, "new window").click()
# Wait for the new window or tab
wait.until(EC.number_of_windows_to_be(2))
# Loop through until we find a new window handle
for window_handle in driver.window_handles:
if window_handle != original_window:
driver.switch_to.window(window_handle)
break
# Wait for the new tab to finish loading content
wait.until(EC.title_is("SeleniumHQ Browser Automation"))
# Opens a new tab and switches to new tab
driver.switch_to.new_window('tab')
# Opens a new window and switches to new window
driver.switch_to.new_window('window')
#Close the tab or window
driver.close()
#Switch back to the old tab or window
driver.switch_to.window(original_window)
driver.quit()
代码如下
方法一
# Store iframe web element
iframe = driver.find_element(By.CSS_SELECTOR, "#modal > iframe")
# switch to selected iframe
driver.switch_to.frame(iframe)
# Now click on button
driver.find_element(By.TAG_NAME, 'button').click()
方法二
# Switch frame by id
driver.switch_to.frame('buttonframe')
# Now, Click on the button
driver.find_element(By.TAG_NAME, 'button').click()
方法三
# Switch to the second frame
driver.switch_to.frame(1)
driver.switch_to.default_content()
# Access each dimension individually
width = driver.get_window_size().get("width")
height = driver.get_window_size().get("height")
# Or store the dimensions and query them later
size = driver.get_window_size()
width1 = size.get("width")
height1 = size.get("height")
driver.set_window_size(1024, 768)
# Access each dimension individually
x = driver.get_window_position().get('x')
y = driver.get_window_position().get('y')
# Or store the dimensions and query them later
position = driver.get_window_position()
x1 = position.get('x')
y1 = position.get('y')
# Move the window to the top left of the primary monitor
driver.set_window_position(0, 0)
driver.maximize_window()
driver.minimize_window()
driver.fullscreen_window()
例子:将下列代码保存为一个文件,路径为:file://race_condition.html
Race Condition Example
方法一
from selenium.webdriver.support.ui import WebDriverWait
def document_initialised(driver):
return driver.execute_script("return initialised")
driver.navigate("file:///race_condition.html")
WebDriverWait(driver).until(document_initialised)
el = driver.find_element(By.TAG_NAME, "p")
assert el.text == "Hello from JavaScript!"
方法二
from selenium.webdriver.support.ui import WebDriverWait
driver.navigate("file:///race_condition.html")
el = WebDriverWait(driver).until(lambda d: d.find_element_by_tag_name("p"))
assert el.text == "Hello from JavaScript!"
方法三
WebDriverWait(driver, timeout=3).until(some_condition)
driver = Chrome()
driver.implicitly_wait(10)
driver.get("http://somedomain/url_that_delays_loading")
my_dynamic_element = driver.find_element(By.ID, "myDynamicElement")
driver = Chrome()
driver.get("http://somedomain/url_that_delays_loading")
wait = WebDriverWait(driver, 10, poll_frequency=1, ignored_exceptions=[ElementNotVisibleException, ElementNotSelectableException])
element = wait.until(EC.element_to_be_clickable((By.XPATH, "//div")))
# Click the link to activate the alert
driver.find_element(By.LINK_TEXT, "See an example alert").click()
# Wait for the alert to be displayed and store it in a variable
alert = wait.until(expected_conditions.alert_is_present())
# Store the alert text in a variable
text = alert.text
# Press the OK button
alert.accept()
# Click the link to activate the alert
driver.find_element(By.LINK_TEXT, "See a sample confirm").click()
# Wait for the alert to be displayed
wait.until(expected_conditions.alert_is_present())
# Store the alert in a variable for reuse
alert = driver.switch_to.alert
# Store the alert text in a variable
text = alert.text
# Press the Cancel button
alert.dismiss()
# Click the link to activate the alert
driver.find_element(By.LINK_TEXT, "See a sample prompt").click()
# Wait for the alert to be displayed
wait.until(expected_conditions.alert_is_present())
# Store the alert in a variable for reuse
alert = Alert(driver)
# Type your message
alert.send_keys("Selenium")
# Press the OK button
alert.accept()
from selenium import webdriver
PROXY = ""
webdriver.DesiredCapabilities.CHROME['proxy'] = {
"httpProxy": PROXY,
"ftpProxy": PROXY,
"sslProxy": PROXY,
"proxyType": "MANUAL",
}
with webdriver.Chrome() as driver:
# Open URL
driver.get("https://selenium.dev")
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options = Options()
options.page_load_strategy = 'normal'
driver = webdriver.Chrome(options=options)
# Navigate to url
driver.get("http://www.google.com")
driver.quit()
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options = Options()
options.page_load_strategy = 'eager'
driver = webdriver.Chrome(options=options)
# Navigate to url
driver.get("http://www.google.com")
driver.quit()
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options = Options()
options.page_load_strategy = 'none'
driver = webdriver.Chrome(options=options)
# Navigate to url
driver.get("http://www.google.com")
driver.quit()
from selenium import webdriver
from selenium.webdriver.common.by import By
driver = webdriver.Chrome()
driver.get("http://www.google.com")
# Get search box element from webElement 'q' using Find Element
search_box = driver.find_element(By.NAME, "q")
search_box.send_keys("webdriver")
from selenium import webdriver
from selenium.webdriver.common.by import By
driver = webdriver.Chrome()
# Navigate to Url
driver.get("https://www.example.com")
# Get all the elements available with tag name 'p'
elements = driver.find_elements(By.TAG_NAME, 'p')
for e in elements:
print e.text
from selenium import webdriver
from selenium.webdriver.common.by import By
driver = webdriver.Chrome()
driver.get("http://www.google.com")
search_form = driver.find_element(By.TAG_NAME, "form")
search_box = search_form.find_element(By.NAME, "q")
search_box.send_keys("webdriver")
from selenium import webdriver
from selenium.webdriver.common.by import By
driver = webdriver.Chrome()
driver.get("https://www.example.com")
# Get element with tag name 'div'
element = driver.find_element(By.TAG_NAME, 'div')
# Get all the elements available with tag name 'p'
elements = element.find_elements(By.TAG_NAME, 'p')
for e in elements:
print e.text
from selenium import webdriver
from selenium.webdriver.common.by import By
driver = webdriver.Chrome()
driver.get("https://www.google.com")
driver.find_element(By.CSS_SELECTOR, '[name="q"]').send_keys("webElement")
# Get attribute of current active element
attr = driver.switch_to.active_element.get_attribute("title")
print attr
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome()
# Navigate to url
driver.get("http://www.google.com")
# Enter "webdriver" text and perform "ENTER" keyboard action
driver.find_element(By.NAME, "q").send_keys("webdriver" + Keys.ENTER)
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome()
# Navigate to url
driver.get("http://www.google.com")
# Enter "webdriver" text and perform "ENTER" keyboard action
driver.find_element(By.NAME, "q").send_keys("webdriver" + Keys.ENTER)
# Perform action ctrl + A (modifier CONTROL + Alphabet A) to select the page
webdriver.ActionChains(driver).key_down(Keys.CONTROL).send_keys("a").perform()
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome()
# Navigate to url
driver.get("http://www.google.com")
# Store google search box WebElement
search = driver.find_element(By.NAME, "q")
action = webdriver.ActionChains(driver)
# Enters text "qwerty" with keyDown SHIFT key and after keyUp SHIFT key (QWERTYqwerty)
action.key_down(Keys.SHIFT).send_keys_to_element(search, "qwerty").key_up(Keys.SHIFT).send_keys("qwerty").perform()
from selenium import webdriver
from selenium.webdriver.common.by import By
driver = webdriver.Chrome()
# Navigate to url
driver.get("http://www.google.com")
# Store 'SearchInput' element
SearchInput = driver.find_element(By.NAME, "q")
SearchInput.send_keys("selenium")
# Clears the entered text
SearchInput.clear()