selenium的使用
概念:一个自动化测试工具,利用他可以驱动浏览器执行特定的动作,同时可以获取浏览器当前呈现的页面的源代码
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
browser=webdriver.Chrome()
try:
browser.get('https://www.baidu.com')
input=browser.find_element_by_id('kw')
input.send_keys('PYTHON')
input.send_keys(Keys.ENTER)
wait=WebDriverWait(browser,10)
wait.until(EC.presence_of_element_located((By.ID,'content_left')))
print(browser.current_url)
print(brwoser.get_cookies())
print(broser.page_source)
except:
pass
finally:
browser.close()
1.声明浏览器
from selenium import webdriver
browser=webdriver.Chrome() # Firefox()、Edge()、PhantomJS()、Safari()
2.访问页面
from selenium import webdriver
browser=webdriver.Chrome()
browser.get('https:/www.taobao.com')
print(browser.page_source)
browser.close()
3.查找节点
# 源码
# ID、CSS选择器、XPath获取
from selenium import webdriver
browser=webdirver.Chrome()
browser.get('https://www.taobao.com')
input_first=browser.find_element_by_id('q')
input_second=browser.find_element_by_css_selector('#q')
input_third=browser.find_element_by_xpath('//*[@id='q']')
browser.close()
# 获取单个节点的方法 find_element_by_*
# find_element_by_id、find_element_by_name[xpath、link_text、partial_link_text、tag_name、class_name、css_selector]
# 等同于 find_element(By.ID,id) ---browser.find_element(By.ID,'q')
# 获取多个节点的方法 find_elements
lis=browser.find_elements_by_css_selector('.service-bd li')
# find_elements_by_id[name、xpath、link_text、partial_link_text、tag_name、class_name、css_selector]
4.节点交互
解析:常见动作 send_keys()、clear()、click()
input.send_keys('iphone') # 输入iphone 文本
time.sleep(1)
input.clear() # clear() 清空输入框
input.send_keys('ipad')
button=browser.find_element_by_class_name('btn-search') # 获取按钮
button.click() # 完成搜索动作
5.动作链
解析:鼠标拖曳、键盘按键等,这些动作的执行
from selenium import webdriver
from selenium.webdriver import ActionChains
browser=web.driver.Chrome()
url='*'
browser.get(url)
browser.switch_to.frame('iframeResult') # 切换,此处有ID属性
source=browser.find_element_by_css_selector('#draggable')
target=browser.find_element_by_css_selector('#droppable')
actions=ActionChains(browser)
actions.drag_and_drop(source,target) # 鼠标拖曳动作
actions.perform() # 执行动作
6.执行JavaScript
解析:执行下拉进度条等动作,直接模拟运行JavaScript
此处用execute_script()方法实现
from selenium import webdriver
browser=webdriver.Chrome()
browser.get('*')
browser.execute_script('window.scroll(0,document.body.scrollHeight)') # 将进度条下拉至最底部
browser.execute_script('alert('TO Bottom')') # 弹出alert提示框
7.获取节点信息
解析:selenium已提供了选择节点的方法,可通过相关的方法和属性来直接提取节点信息
# 获取属性
from selenium import webdriver
from selenium.webdriver import ActionChains
browser=webdriver.Chrome()
url='*'
browser.get(url)
logo=browser.find_element_by_id('zh-top-link-logo')
print(logo)
print(logo.get_attribute('class')) # 通过获取的属性名来捕获它的值
input=browser.find_element_by_id('zh-top-add-question')
print(ipput.text) # 获取文本值
print(input.id,input.location,input.tag_name,input.size) # 获取id、位置、标签名和大小
8.切换Frame
解析: 网页中有一种节点iframe 也称子Frame,相当于页面的子页面,selenium默认在父节点操作的,可通过switch_to.frame()来切换到子Frame
import time
form selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
browser=webdriver.Chrome()
url='*'
browser.get(url)
browser.switch_to.frame('iframeResult') # 切换到子节点
try:
logo=browser.find_element_by_class_name('logo') # 尝试获取父节点,引起异常,打印结果:NO LOGO
except NoSuchElementException:
print('NO LOGO')
browser.switch_to.parent_frame() # 重新切换到父节点,获取父节点,正常获取信息
logo=browser.find_element_by_class_name('logo')
print(logo)
print(logo.text)
9.延时等待
解析:get()方法在网页加载结束后执行,若此时获取page_source或许得到完整的页面,即使有额外的Ajax请求,获取的源码未必完整
# 隐式等待 时间固定
from selenium import webdriver
browser=webdriver.Chrome()
browser.implicitly_wait(10) # 隐式等待 若没有找到节点,将继续等待,超时则抛出异常
browser.get('*')
ipnput=browser.find_element_by_class_name('*')
print(input)
# 显示等待 根据网络条件优化
from selenium import webdriver
form selenium.webdirver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
browser=webdriver.Chrome()
browser.get('*')
wait=WebDriverWait(browser,10) # 显示等待,指定最长等待时间
input=wait.until(EC.presence_of_element_located((By.ID,'q'))) # 调用until() 传入要等待条件expected_conditions,
# presence_of_element_located 代表节点出现,否则抛出异常
button=wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'.btn-search')))
print(input,button)
title_contains |
标题包含某内容 |
presence_of_element_located |
节点加载出来,传入定位元组,eg (By.ID,'p') |
visibility_of |
节点可见,传入节点对象 |
text_to_be_present_in_element |
某个节点文本包含某文字 |
element_to_be_clickable |
节点可点击 |
alert_is_present |
是否出现警告 |
10. 前进和后退
browser.back() # 后退
time.sleep(1)
browser.forward() # 前进
browser.close()
11.Cookies
解析:对Cookies操作,如:获取、添加、删除
print(browser.get_cookies()) # 获取当前Cookies
browser.add_cookies({'name':'name','domain':'*','value':'*'})
print(browser.get_cookies())
browser.delete_all_cookies() # 删除所有的Cookies
print(browser.get_cookies())
12. 选项卡管理
browser.get('**')
browser.execute_script('window.open()') # window.open() 新开启一个选项卡
print(browser.window_handles) # window_handles 属性获取当前开启的所有选项卡
browser.switch_to_window(browser.window_handles[1]) # switch_to_window 切换选项卡
browser.get(*)
time.sleep(1)
browser.switch_to_window(browser.window_handles[0])
browser.get('*')
13.异常处理
from selenium import webdriver
form selenium.common.exceptions import TimeoutException,NoSuchElementException
browser=webdriver.Chrome()
try:
browser.get('*')
except TimeoutException:
print('Time out')
try:
browser.find_element_by_id('hello')
except NoSuchElementException:
print('No Element')
finally:
browser.close()
# selenium基本实例
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
brower = webdriver.Chrome()
try:
brower.get('http://www.baidu.com') # 请求网页
input=brower.find_element(By.ID,'kw') # 根据对应ID获取
input.send_keys('Python')
input.send_keys(Keys.ENTER)
wait = WebDriverWait(brower,10)
wait.until(EC.presence_of_element_located((By.ID,'content_left')))
print(brower.current_url)
print(brower.get_cookies())
print(brower.page_source) # 打印源代码
finally:
# brower.close()
pass
# 查找节点
# 单个节点 ID ,CSS,XPATH
from selenium import webdriver
from selenium.webdriver.common.by import By
browser = webdriver.Chrome()
browser.get('https://www.taobao.com')
# 单个节点
first=browser.find_element(By.ID,'q')
second=browser.find_element(By.CSS_SELECTOR,'#q')
third=browser.find_element(By.XPATH,'//*[@id="q"]')
# 多个节点
four=browser.find_elements(By.CSS_SELECTOR,'.service-bd li')
print(first,'\n',second,'\n',third,'\n\n',four)
#
#
#
# 节点交互 send_keys(),clear(),click()
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
browser= webdriver.Chrome()
browser.get('https://www.taobao.com')
input = browser.find_element(By.ID,'q')
input.send_keys('iPone13')
time.sleep(1)
input.clear()
time.sleep(3)
input.send_keys('iPad')
button= browser.find_element(By.CLASS_NAME,'btn-search')
button.click()
# 动作链
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
import time
browser= webdriver.Chrome()
url='https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
browser.get(url)
browser.switch_to.frame('iframeResult')
source = browser.find_element(By.CSS_SELECTOR,'#draggable')
target = browser.find_element(By.CSS_SELECTOR,'#droppable')
actions = ActionChains(browser)
actions.drag_and_drop(source,target)
actions.perform()
# 执行js execute_script()
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('https://www.zhihu.com/explore')
browser.execute_script('window.scrollTo(0,document.body.scrollHeight)')
browser.execute_script('alert("To Bottom")')
# 获取属性 get_attribute() 文本text
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
browser = webdriver.Chrome()
url = 'https://www.zhihu.com/explore'
browser.get(url)
logo = browser.find_element(By.ID,'root')
# print(logo.text)
# print(logo)
# print('\n')
# print(logo.get_attribute('class')) #属性
print(logo.id) # id
print(logo.location) # 位置
print(logo.tag_name) # 标签
print(logo.size) # 大小
browser.close()
# 切换Frame
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchFrameException
from selenium.webdriver.common.by import By
browser =webdriver.Chrome()
url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
browser.get(url)
browser.switch_to.frame('iframeResult')
try:
logo = browser.find_element(By.CLASS_NAME,'logo')
except NoSuchFrameException:
print('NO LOGO')
browser.switch_to.parent_frame()
logo = browser.find_element(By.CLASS_NAME,'logo')
print(logo)
print(logo.text)
# 延时等待
from selenium import webdriver
from selenium.webdriver.common.by import By
browser= webdriver.Chrome()
browser.implicitly_wait(10) # 隐式等待
browser.get('https://www.zhihu.com/explore')
input=browser.find_element(By.CLASS_NAME,'zu-top-add-question')
print(input)
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait # 显式等待
from selenium.webdriver.support import expected_conditions as EC
browser= webdriver.Chrome()
browser.get('https://www.taobao.com/')
wait = WebDriverWait(browser,10)
input =wait.until(EC.presence_of_element_located((By.ID,'q')))
button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'.btn-search')))
print(input,'\n',button)
# 前进或后退
import time
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('https://www.baidu.com/')
browser.get('https://www.taobao.com/')
browser.get('https://www.python.org/')
browser.back()
time.sleep(1)
browser.forward()
browser.close()
# Cookies
from selenium import webdriver
browser =webdriver.Chrome()
browser.get('https://www.zhihu.com/explore')
print(browser.get_cookies())
browser.add_cookie({'name':'name','domain':'www.zhihu.com','value':'germey'})
print(browser.get_cookies())
browser.delete_all_cookies()
print(browser.get_cookies())
browser.close()
# [{'domain': '.zhihu.com', 'httpOnly': False, 'name': 'Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49', 'path': '/', 'secure': False, 'value': '1660980908'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'JOID', 'path': '/', 'secure': False, 'value': 'WloWBkpXuqcwA0exIFvZv4H41Nw-Gu7sVmUkwXUO2JZlQDTJbPozpV4ATLYi23jHHkOk4SXADwKFCoiclQvPJtE='}, {'domain': '.zhihu.com', 'expiry': 1692516908, 'httpOnly': False, 'name': 'Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49', 'path': '/', 'secure': False, 'value': '1660980908'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'KLBRSID', 'path': '/', 'secure': False, 'value': '76ae5fb4fba0f519d97e594f1cef9fab|1660980902|1660980900'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'osd', 'path': '/', 'secure': False, 'value': 'UVwQAk5cvKE0B0y3Jl_dtIf-0Ng1HOjoUm4ix3EK05BjRDDCavw3oVUGSrIm0H7BGkev5yPECwmDDIyYng3JItU='}, {'domain': '.zhihu.com', 'expiry': 1695540905, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': 'fe594060-fef6-4fca-b59c-4cafe146b257'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': '46dc1123-1966-4906-b7fc-c7462df65abe'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'SESSIONID', 'path': '/', 'secure': False, 'value': 'GUMboiqSPh0SGzrMLFRjrdl34WhUBf3zpMD5lGohNvW'}, {'domain': '.zhihu.com', 'expiry': 1695540905, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': 'AMDXsveebhWPTiudfRrzU69qi3k-JbQMQgo=|1660980900'}]
# [{'domain': '.www.zhihu.com', 'httpOnly': False, 'name': 'name', 'path': '/', 'secure': True, 'value': 'germey'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': 'Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49', 'path': '/', 'secure': False, 'value': '1660980908'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'JOID', 'path': '/', 'secure': False, 'value': 'WloWBkpXuqcwA0exIFvZv4H41Nw-Gu7sVmUkwXUO2JZlQDTJbPozpV4ATLYi23jHHkOk4SXADwKFCoiclQvPJtE='}, {'domain': '.zhihu.com', 'expiry': 1692516908, 'httpOnly': False, 'name': 'Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49', 'path': '/', 'secure': False, 'value': '1660980908'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'KLBRSID', 'path': '/', 'secure': False, 'value': '76ae5fb4fba0f519d97e594f1cef9fab|1660980902|1660980900'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'osd', 'path': '/', 'secure': False, 'value': 'UVwQAk5cvKE0B0y3Jl_dtIf-0Ng1HOjoUm4ix3EK05BjRDDCavw3oVUGSrIm0H7BGkev5yPECwmDDIyYng3JItU='}, {'domain': '.zhihu.com', 'expiry': 1695540905, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': 'fe594060-fef6-4fca-b59c-4cafe146b257'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': '46dc1123-1966-4906-b7fc-c7462df65abe'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'SESSIONID', 'path': '/', 'secure': False, 'value': 'GUMboiqSPh0SGzrMLFRjrdl34WhUBf3zpMD5lGohNvW'}, {'domain': '.zhihu.com', 'expiry': 1695540905, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': 'AMDXsveebhWPTiudfRrzU69qi3k-JbQMQgo=|1660980900'}]
# []
# 选项卡管理
import time
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('https://www.baidu.com')
browser.execute_script('window.open()')
print(browser.window_handles)
browser.switch_to.window(browser.window_handles[1])
browser.get('https://www.taobao.com')
time.sleep(1)
browser.switch_to.window(browser.window_handles[0])
browser.get('https://python.org')
time.sleep(3)
browser.close()
代码摘抄之《Python 3网络爬虫开发实战》