前言
- 个人总结的几个常用的网页操作方式,使用google浏览器
- 采用 selenium 库,需要安装:chromedriver
- 元素定位方式多种多样,这里统一采用 xpath 定位
启动浏览器
from selenium import webdriver
save_path = r"C:\Users\Aiden\Desktop"
timeout = 2
url = "https://www.baidu.com/"
prefs = {
"profile.default_content_settings.popups": 0,
"download.default_directory": save_path,
"profile.default_content_setting_values.automatic_downloads": 1
}
chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option('prefs', prefs)
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
driver = webdriver.Chrome(options=chrome_options)
print('[网页] 启动浏览器')
driver.implicitly_wait(timeout)
driver.get(url=url)
print('[网页] 打开网页')
debug模式启动(即正常启动)
import os, win32api
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
timeout = 2
url = "https://www.baidu.com/"
params = "--remote-debugging-port=9222 " + url
path = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
win32api.ShellExecute(0, "open", path, os.path.split(path)[0], 1)
chrome_options = Options()
chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
driver = webdriver.Chrome(options=chrome_options)
driver.implicitly_wait(timeout)
driver.maximize_window()
一、捕获网页元素
from time import sleep
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def find_element(website, **kwargs):
"""
定位元素
:param website: 网页对象
:param kwargs: 参数
:return: 网页元素对象
"""
name = kwargs.get("name", "元素")
xpath = kwargs.get("xpath", None)
id = kwargs.get("id", None)
wait = kwargs.get("wait", 0)
timeout = kwargs.get("timeout", 10)
poll = kwargs.get("poll", 0.5)
try:
sleep(wait)
method = (By.XPATH, xpath) if xpath else (By.ID, id)
element = WebDriverWait(website, timeout, poll).until(EC.presence_of_element_located(method))
print(f'[网页] 捕获 {name}')
return element
except Exception as e:
print(f'[网页] 捕获 {name} 失败 {e}')
raise e
二、点击网页元素
def element_click(website, **kwargs):
"""
元素点击
:param website: 网页对象
:param kwargs: 参数
"""
name = kwargs.get('name', '元素')
xpath = kwargs.get('xpath')
wait = kwargs.get('sleep', 1)
is_js = kwargs.get('is_js', False)
try:
sleep(wait)
element = website.find_element_by_xpath(xpath=xpath)
if not is_js:
element.click()
else:
website.execute_script("arguments[0].click();", element)
print(f'[网页] 点击 {name}')
except Exception as e:
print(f'[网页] 点击 {name} 失败 {e}')
raise e
三、内容输入
def element_input(website, **kwargs):
"""
元素输入
:param website: 网页对象
:param kwargs: 参数
"""
name = kwargs.get('name', '元素')
xpath = kwargs.get('xpath')
value = kwargs.get('value')
wait = kwargs.get('sleep', 1)
is_js = kwargs.get('is_js', False)
try:
sleep(wait)
element = website.find_element_by_xpath(xpath=xpath)
if not is_js:
element.clear()
element.click()
element.send_keys(value)
else:
website.execute_script(f"arguments[0].value='{value}';", element)
print(f'[网页] 输入 {name}')
except Exception as e:
print(f'[网页] 输入 {name} 失败 {e}')
raise e
四、网页元素截图
def element_screenshot(website, **kwargs):
"""
元素截图
:param website: 网页对象
:param kwargs: 参数
"""
name = kwargs.get('name', '网页元素')
xpath = kwargs.get('xpath')
path = kwargs.get('path')
try:
sleep(1)
element = website.find_element_by_xpath(xpath=xpath)
element.screenshot(path)
print(f'[网页] {name} 截图')
except Exception as e:
print(f'[网页] {name} 截图 失败 {e}')
raise e
五、获取网页元素文本内容
def element_text(website, **kwargs):
"""
元素文本
:param website: 网页对象
:param kwargs: 参数
:return: 文本内容
"""
name = kwargs.get('name', '元素')
xpath = kwargs.get('xpath')
try:
sleep(1)
element = website.find_element_by_xpath(xpath=xpath)
print(f'[网页] 获取 {name} ')
return element.text
except Exception as e:
print(f'[网页] 获取 {name} 失败 {e}')
return False
六、修改网页元素属性
def change_attribute(website, **kwargs):
"""
修改元素属性值
:param website: 网页对象
:param kwargs: 参数
"""
name = kwargs.get('name', '元素')
xpath = kwargs.get('xpath')
key = kwargs.get('key')
value = kwargs.get('value')
try:
sleep(1)
element = website.find_element_by_xpath(xpath=xpath)
website.execute_script("arguments[0].setAttribute(arguments[1],arguments[2]);", element, key, value)
print(f'[网页] 修改 {name} 属性')
except Exception as e:
print(f'[网页] 修改 {name} 属性 失败 {e}')
raise e
七、选择下拉列表(options)
from selenium.webdriver.support.ui import Select
def element_select(website, **kwargs):
"""
选择下拉列表, 根据内容
:param website: 网页对象
:param kwargs: 参数
"""
name = kwargs.get('name', '元素')
xpath = kwargs.get('xpath')
wait = kwargs.get('sleep', 0)
value = kwargs.get('value')
try:
sleep(wait)
element = website.find_element_by_xpath(xpath=xpath)
s = Select(element)
s.select_by_visible_text(value)
print(f'[网页] 下拉列表 {name}')
except Exception as e:
print(f'[网页] 下拉列表 {name} 失败 {e}')
raise e