安装selenium
pip3 install selenium # 查看是否安装成功 # 进入到python环境中 import selenium print(selenium.__version__)
网页元素定位
# 通过属性id和name来实现定位 find_element_by_id() find_element_by_name() # 如果多个元素的id和name相同的话只会定位到第一个元素 # 通过HTML标签类型,和属性class来实现定位 find_element_by_class_name() fine_element_by_tag_name() # 只能定位到符合条件的第一个元素 # 通过标签的值实现定位,partial_link用于模糊匹配 find_element_by_link_text() find_element_by_partial_link_text() # 如果网页中的文字不是唯一,那么也只会定位到第一个元素 # 元素的路径定位选择器 find_element_by_xpath() find_element_by_css_selector()
# 如果有多个相同元素,有想要同时获取 find_elements_by_id() find_elements_by_name() find_elements_by_class_name() find_elements_by_tag_name() find_elements_by_link_text() find_elements_by_partial_link_text() find_elements_by_xpath() find_elements_by_css_selector()
节点交互:
import time browser = webdriver.Chrome() browser.get('https://www.taobao.com') input = browser.find_element_by_id('q') # 输入文字用send_keys() input.send_keys('ipone') time.sleep(1) #清空文字用clear() input.clear() input.send_keys('ipad') button = browser.find_element_by_class_name('btn-search') #点击 button.click()
动作链
from selenium import webdriver from selenium.webdriver import ActionChains browser = webdriver.Chrome() url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable' browser.get(url) browser.switch_to.frame('iframeResult') #找到被拖拽的标签 source = browser.find_element_by_css_selector('#draggable') #找到拖拽目的地的标签 target = browser.find_element_by_css_selector('#droppable') actions = ActionChains(browser) actions.drag_and_drop(source,target) actions.perform()
执行js
例如下拉进度条,可以直接模拟运行JavaScript,适用execute_script()
即可实现
from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.zhihu.com/explore') browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') browser.execute_script('alert("To Bottom")')
获取节点信息
from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.zhihu.com/explore') logo = browser.find_element_by_id('zh-top-link-logo') print(logo) # 获取class属性 print(logo.get_attribute('class'))
获取文本值
from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.zhihu.com/explore') input = browser.find_element_by_class_name('zu-top-add-question') print(input.text)
输出id、位置、标签名、大小
from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.zhihu.com/explore') input = browser.find_element_by_class_name('zu-top-add-question') print(input.id) # 输出位置 print(input.location) #标签名 print(input.tag_name) #大小 print(input.size)
界面切换
from selenium import webdriver from selenium.common.exceptions import NoSuchElementException browser = webdriver.Chrome() browser.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable') #界面切换到子界面 browser.switch_to.frame('iframeResult') try: # 查找logo logo = browser.find_element_by_class_name('logo') except NoSuchElementException: print('NO LOGO') # 界面切换到父级界面 browser.switch_to.parent_frame() # 查找logo logo = browser.find_element_by_class_name('logo') print(logo) print(logo.text)
延时等待(隐式等待)
browser = webdriver.Chrome() browser.implicitly_wait(10) browser.get('https://www.zhihu.com/explore') input = browser.find_element_by_class_name('zu-top-add-question') print(input)
显式等待
browser = webdriver.Chrome() browser.get('https://www.taobao.com/') wait = WebDriverWait(browser,10) input = wait.until(EC.presence_of_element_located((By.ID,'q'))) # 节点可点击 button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'.btn-search'))) print(input,button)
Cookies
browser = webdriver.Chrome() browser.get('https://www.zhihu.com/explore') # 获取cookies print(browser.get_cookies()) # 添加cookie browser.add_cookie({'name':'name','domin':'www.zhihu.com','value':'germey'}) print(browser.get_cookies()) # 删除所有的cookies browser.delete_all_cookies() print(browser.get_cookies())