常用网页操作(selenium)

前言

  • 个人总结的几个常用的网页操作方式,使用google浏览器
  • 采用 selenium 库,需要安装:chromedriver
  • 元素定位方式多种多样,这里统一采用 xpath 定位

启动浏览器

from selenium import webdriver


save_path = r"C:\Users\Aiden\Desktop"           # 下载路径
timeout = 2                                     # 超时等待时间
url = "https://www.baidu.com/"                  # 网页

# 编辑配置
prefs = {
    "profile.default_content_settings.popups": 0,                       # 防止保存弹窗
    "download.default_directory": save_path,                            # 修改默认下载路径
    "profile.default_content_setting_values.automatic_downloads": 1     # 允许多文件下载
}

chrome_options = webdriver.ChromeOptions()
# 添加配置
chrome_options.add_experimental_option('prefs', prefs)
# 修改 windows.navigator.webdriver, 防机器人识别机制, selenium 自动登陆判别机制
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
# 禁用启用blink运行时的功能
chrome_options.add_argument("--disable-blink-features=AutomationControlled")

# 启动浏览器
# driver = webdriver.Chrome(chrome_options=chrome_options)  # chrome_options 此选项已被弃用
driver = webdriver.Chrome(options=chrome_options)
print('[网页] 启动浏览器')
# 超时等待, 隐性等待
driver.implicitly_wait(timeout)
# 打开网页
driver.get(url=url)
print('[网页] 打开网页')

debug模式启动(即正常启动)

import os, win32api
from selenium import webdriver
from selenium.webdriver.chrome.options import Options


timeout = 2                                     # 超时等待时间
url = "https://www.baidu.com/"                  # 网址

params = "--remote-debugging-port=9222 " + url

path = r"C:\Program Files\Google\Chrome\Application\chrome.exe"

win32api.ShellExecute(0, "open", path, os.path.split(path)[0], 1)

chrome_options = Options()
chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")

driver = webdriver.Chrome(options=chrome_options)

# 设置隐式等待
driver.implicitly_wait(timeout)

# 窗口最大化
driver.maximize_window()

一、捕获网页元素

  • 捕获的元素支持一系列操作
from time import sleep
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
 

def find_element(website, **kwargs):
    """
    定位元素
    :param website: 网页对象
    :param kwargs: 参数
    :return: 网页元素对象
    """
    name = kwargs.get("name", "元素")            # 元素名
    xpath = kwargs.get("xpath", None)           # 元素xpath
    id = kwargs.get("id", None)                 # 元素id
    wait = kwargs.get("wait", 0)                # 强制等待时间
    timeout = kwargs.get("timeout", 10)         # 超时时间
    poll = kwargs.get("poll", 0.5)              # 间隔时间
    try:
        sleep(wait)
        method = (By.XPATH, xpath) if xpath else (By.ID, id)
        element = WebDriverWait(website, timeout, poll).until(EC.presence_of_element_located(method))
        print(f'[网页] 捕获 {name}')
        return element
    except Exception as e:
        print(f'[网页] 捕获 {name} 失败 {e}')
        raise e

二、点击网页元素

def element_click(website, **kwargs):
    """
    元素点击
    :param website: 网页对象
    :param kwargs: 参数
    """
    name = kwargs.get('name', '元素')                 # 元素名称
    xpath = kwargs.get('xpath')                      # 元素 xpath
    wait = kwargs.get('sleep', 1)                    # 等待时间, 默认等待 1 秒
    is_js = kwargs.get('is_js', False)               # 是否采用 js 点击, 默认元素点击方式
    try:
        sleep(wait)
        element = website.find_element_by_xpath(xpath=xpath)
        if not is_js:
            element.click()
        else:
            website.execute_script("arguments[0].click();", element)
        print(f'[网页] 点击 {name}')
    except Exception as e:
        print(f'[网页] 点击 {name} 失败 {e}')
        raise e

三、内容输入

def element_input(website, **kwargs):
    """
    元素输入
    :param website: 网页对象
    :param kwargs: 参数
    """
    name = kwargs.get('name', '元素')                    # 元素名称
    xpath = kwargs.get('xpath')                         # 元素 xpath
    value = kwargs.get('value')                         # 要输入的内容
    wait = kwargs.get('sleep', 1)                       # 等待时间, 默认等待 1 秒
    is_js = kwargs.get('is_js', False)                  # 是否采用 js 输入, 默认元素输入方式
    try:
        sleep(wait)
        element = website.find_element_by_xpath(xpath=xpath)
        if not is_js:
            element.clear()
            element.click()
            element.send_keys(value)
        else:
            website.execute_script(f"arguments[0].value='{value}';", element)
        print(f'[网页] 输入 {name}')
    except Exception as e:
        print(f'[网页] 输入 {name} 失败 {e}')
        raise e

四、网页元素截图

def element_screenshot(website, **kwargs):
    """
    元素截图
    :param website: 网页对象
    :param kwargs: 参数
    """
    name = kwargs.get('name', '网页元素')                   # 元素名称
    xpath = kwargs.get('xpath')                           # 元素 xpath
    path = kwargs.get('path')                             # 截图保存路径
    try:
        sleep(1)
        element = website.find_element_by_xpath(xpath=xpath)
        element.screenshot(path)
        print(f'[网页] {name} 截图')
    except Exception as e:
        print(f'[网页] {name} 截图 失败 {e}')
        raise e

五、获取网页元素文本内容

def element_text(website, **kwargs):
    """
    元素文本
    :param website: 网页对象
    :param kwargs: 参数
    :return: 文本内容
    """
    name = kwargs.get('name', '元素')                     # 元素名称
    xpath = kwargs.get('xpath')                          # 元素 xpath
    try:
        sleep(1)
        element = website.find_element_by_xpath(xpath=xpath)
        print(f'[网页] 获取 {name} ')
        return element.text
    except Exception as e:
        print(f'[网页] 获取 {name} 失败 {e}')
        return False

六、修改网页元素属性

def change_attribute(website, **kwargs):
    """
    修改元素属性值
    :param website: 网页对象
    :param kwargs: 参数
    """
    name = kwargs.get('name', '元素')                    # 元素名称
    xpath = kwargs.get('xpath')                         # 元素 xpath
    key = kwargs.get('key')                             # 要修改的属性名
    value = kwargs.get('value')                         # 要修改的属性值
    try:
        sleep(1)
        element = website.find_element_by_xpath(xpath=xpath)
        website.execute_script("arguments[0].setAttribute(arguments[1],arguments[2]);", element, key, value)
        print(f'[网页] 修改 {name} 属性')
    except Exception as e:
        print(f'[网页] 修改 {name} 属性 失败 {e}')
        raise e

七、选择下拉列表(options)

from selenium.webdriver.support.ui import Select


def element_select(website, **kwargs):
    """
    选择下拉列表, 根据内容
    :param website: 网页对象
    :param kwargs: 参数
    """
    name = kwargs.get('name', '元素')                 # 元素名称
    xpath = kwargs.get('xpath')                      # 元素 xpath
    wait = kwargs.get('sleep', 0)                    # 等待时间, 默认等待 0 秒
    value = kwargs.get('value')                      # 要选择的内容
    try:
        sleep(wait)
        element = website.find_element_by_xpath(xpath=xpath)
        s = Select(element)
        s.select_by_visible_text(value)
        print(f'[网页] 下拉列表 {name}')
    except Exception as e:
        print(f'[网页] 下拉列表 {name} 失败 {e}')
        raise e

你可能感兴趣的:(Python,爬虫,python,selenium,chrome,爬虫)