Python进阶爬虫——Class14:selenium

知识点:

  1. selenium基本操作
  2. 页面元素定位
  3. 操作表单元素
  4. 动作链
  5. 显式等待

知识点说明:

1.selenium基本操作

导入模块:from selenium import webdriver

使用Chrome浏览器(浏览器首字母大写):driver = webdriver.Chrome()

使用谷歌打开页面:driver.get("https://www.baidu.com")

窗口最大化:driver.maximize_window()

获取页面源代码:driver.page_source

获取所有cookie:driver.get_cookies()

from selenium import webdriver

#打开chrome浏览器
driver = webdriver.Chrome()
#浏览器下打开某一网站
driver.get("https://www.baidu.com")
#窗口最大化
driver.maximize_window()
#获取源代码
resp = driver.page_source
#获取所有cookies
cookies = driver.get_cookies()

2.页面元素定位

导入模块(元素定位):from selenium.webdriver.common.by import By

通过ID值定位输入框,并输入python(加等待时间,避免自动关闭):
driver.find_element(By.ID,"kw").send_keys("python")
time.sleep(3)

通过class定位输入框,并输入python(加等待时间,避免自动关闭): driver.find_element(By.CLASS_NAME,"s_ipt").send_keys("python")
time.sleep(3)

通过xpath定位输入框,并输入python(目标元素复制xpath路径) :driver.find_element(By.XPATH,'//*[@id="kw"]').send_keys("python")
time.sleep(3)

通过css定位输入框,并输入python(目标元素复制selector路径):
driver.find_element(By.CSS_SELECTOR,'#kw').send_keys("python")
time.sleep(3)

from selenium import webdriver
from selenium.webdriver.common.by import By
import time

driver = webdriver.Chrome()
driver.get("https://www.baidu.com")
#通过ID值定位输入框,并输入python(加等待时间,避免自动关闭)
driver.find_element(By.ID,"kw").send_keys("python")
time.sleep(3)
#通过class定位输入框,并输入python(加等待时间,避免自动关闭)
driver.find_element(By.CLASS_NAME,"s_ipt").send_keys("python")
time.sleep(3)
#通过xpath定位输入框,并输入python(目标元素复制xpath路径)
driver.find_element(By.XPATH,'//*[@id="kw"]').send_keys("python")
time.sleep(3)
#通过css定位输入框,并输入python(目标元素复制selector路径)
driver.find_element(By.CSS_SELECTOR,'#kw').send_keys("python")
time.sleep(3)

3.操作表单元素

输入内容:driver.find_element(By.ID,"kw").send_keys("搜索内容")
time.sleep(3)

清空输入框内容:driver.find_element(By.ID,"kw").clear()
time.sleep(3)

鼠标单击(点击元素的ID):driver.find_element(By.ID,"su").click()

from selenium import webdriver
from selenium.webdriver.common.by import By
import time

driver = webdriver.Chrome()
driver.get("https://www.baidu.com")
#输入内容
driver.find_element(By.ID,"kw").send_keys("搜索内容")
time.sleep(3)
#清空输入框内容
driver.find_element(By.ID,"kw").clear()
time.sleep(3)
#鼠标单击
driver.find_element(By.ID,"su").click()

4.动作链

click(on_element=None) ——单击鼠标左键
click_and_hold(on_element=None) ——点击鼠标左键,不松开
context_click(on_element=None) ——点击鼠标右键
double_click(on_element=None) ——双击鼠标左键
drag_and_drop(source, target) ——拖拽到某个元素然后松开
key_down(value, element=None) ——按下某个键盘上的键
key_up(value, element=None) ——松开某个键
move_to_element(to_element) ——鼠标移动到某个元素
perform() ——执行链中的所有动作
release(on_element=None) ——在某个元素位置松开鼠标左键
send_keys(*keys_to_send) ——发送某个键到当前焦点的元素
send_keys_to_element(element, *keys_to_send) ——发送某个键到指定元素

模块导入(动作链):from selenium.webdriver.common.action_chains import ActionChains

创建类:action = ActionChains(目标网站函数)

所有动作需要.perform()来执行

5.显式等待

导入模块:
捕获异常:from selenium.common import TimeoutException
等待:from selenium.webdriver.support.ui import WebDriverWait
反馈等待加载状态:from selenium.webdriver.support import expected_conditions as EC

from selenium import webdriver
from selenium.webdriver.common.by import By
import time
from selenium.common import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

driver = webdriver.Chrome()
wait = WebDriverWait(driver,10)
driver.get("https://www.baidu.com")
def search():
    try:
        element1 = wait.until(EC.presence_of_element_located((By.ID,"kw")))
        element1.send_keys("python")
        time.sleep(3)
        element2 = wait.until(EC.presence_of_element_located((By.ID,"su")))
        element2.click()
        time.sleep(3)

    except TimeoutException:
        return search()

if __name__ == '__main__':
    search()

你可能感兴趣的:(python,爬虫,selenium)