有时候在页面中的操作可能有很多步,那么这时候可以使用鼠标行为链类:ActionChains来完成。下面我们通过一个案例来展示。打开百度网页,在输入框中输入“Happy new year!!!”,然后点击搜索。搜索后,在搜索按钮上右键。五秒后关闭网页。代码如下,注意看注释:
from selenium import webdriver
import time
from selenium.webdriver import ActionChains # 导入Actions模块
# 定义driver
driver = webdriver.Chrome()
# 打开百度网页
driver.get('https://www.baidu.com/')
time.sleep(1)
# 定位输入框
input_tag = driver.find_element_by_id('kw')
# 定位搜索按钮
button_tag = driver.find_element_by_id('su')
# 实例化,将driver当作参数传入
Actions = ActionChains(driver) # 如果没有导入ActionChains会提示你导入,这时候按住Ctrl+Enter键即可快速导入
# 将鼠标移到输入框中
Actions.move_to_element(input_tag)
time.sleep(1)
# 在输入框中输入Happy new year!!!
Actions.send_keys_to_element(input_tag,'Happy new year!!!')
time.sleep(1)
# 将鼠标移到搜索按钮上
Actions.move_to_element(button_tag)
time.sleep(1)
# 做点击动作
Actions.click()
time.sleep(1)
Actions.context_click() # 右键操作
time.sleep(1)
# 提交以上行为
Actions.perform()
time.sleep(5)
driver.close()
'''
小窍门:
如果要修改一批相同的代码,先选中一个要修改的代码,然后重复按Ctrl+j,
每按一次,会向下选中一个相同的代码,这样可以实现局部批量改动。
'''
运行后,结果正如我们描述的那样。
我们以前已经接触过cookie了,其作用是模拟登录,以及反反爬。
from selenium import webdriver
import time
driver = webdriver.Chrome()
driver.get('https://www.baidu.com/')
cookies = driver.get_cookies()
for cookie in cookies:
print(cookie)
结果
{'domain': '.baidu.com', 'expiry': 1611330550, 'httpOnly': False, 'name': 'BA_HECTOR', 'path': '/', 'secure': False, 'value': '882k0gala120ag4lmm1g0lpf70q'}
{'domain': '.baidu.com', 'expiry': 3758810596, 'httpOnly': False, 'name': 'BAIDUID_BFESS', 'path': '/', 'sameSite': 'None', 'secure': True, 'value': 'E60D4039A027FB92ABECD498946C3730:FG=1'}
{'domain': '.baidu.com', 'httpOnly': False, 'name': 'H_PS_PSSID', 'path': '/', 'secure': False, 'value': '33425_33258_33272_31660_33319_33545'}
{'domain': '.baidu.com', 'expiry': 1642862949, 'httpOnly': False, 'name': 'BAIDUID', 'path': '/', 'secure': False, 'value': 'E60D4039A027FB9295D8E83B7E347209:FG=1'}
{'domain': '.baidu.com', 'expiry': 3758810596, 'httpOnly': False, 'name': 'BIDUPSID', 'path': '/', 'secure': False, 'value': 'E60D4039A027FB92ABECD498946C3730'}
{'domain': '.baidu.com', 'expiry': 3758810596, 'httpOnly': False, 'name': 'PSTM', 'path': '/', 'secure': False, 'value': '1611326948'}
{'domain': 'www.baidu.com', 'expiry': 1612190950, 'httpOnly': False, 'name': 'BD_UPN', 'path': '/', 'secure': False, 'value': '12314753'}
{'domain': 'www.baidu.com', 'httpOnly': False, 'name': 'BD_HOME', 'path': '/', 'secure': False, 'value': '1'}
# @Time : 2021/1/25 11:56
# @Author : Guanghui Li
# @File : login_qq.py
# @Software: PyCharm
from selenium import webdriver
import time
import json
import requests
url = 'https://i.qq.com/?s_url=http%3A%2F%2Fuser.qzone.qq.com%2F767362321%2Finfocenter'
url_1 = 'https://xui.ptlogin2.qq.com/cgi-bin/xlogin?proxy_url=https%3A//qzs.qq.com/qzone/v6/portal/proxy.html&daid=5&&hide_title_bar=1&low_login=0&qlogin_auto_login=1&no_verifyimg=1&link_target=blank&appid=549000912&style=22&target=self&s_url=https%3A%2F%2Fqzs.qzone.qq.com%2Fqzone%2Fv5%2Floginsucc.html%3Fpara%3Dizone&pt_qr_app=%E6%89%8B%E6%9C%BAQQ%E7%A9%BA%E9%97%B4&pt_qr_link=http%3A//z.qzone.com/download.html&self_regurl=https%3A//qzs.qq.com/qzone/v6/reg/index.html&pt_qr_help_link=http%3A//z.qzone.com/download.html&pt_no_auth=0'# 登录的界面
# 点击登录的按钮 class="face"
url_2 = 'https://user.qzone.qq.com/767362321/infocenter?_t_=0.4369968262011419'
# driver = webdriver.Chrome()
# driver.get(url_2)
# driver.implicitly_wait(5)
# driver.maximize_window()
#
# time.sleep(5) # 等待网页加载完成后再获取cookie,此时手动点击登录
# cookieLists = driver.get_cookies()
# jsonCookies = json.dumps(cookieLists)
# driver.quit()
# # 把获取的cookies以json保存到本地
# with open('qqzone_cookies.json','w') as f:
# f.write(jsonCookies)
with open('qqzone_cookies.json','r') as f:
cookieLists = f.read()
cookieLists = json.loads(cookieLists)
cookie = [item['name'] + '=' + item['value']for item in cookieLists]
cookie_str = '; '.join(item for item in cookie)
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36',
'cookie':cookie_str }
res = requests.get(url_2)
html = res.text
with open('qqzone.html','w',encoding='utf-8') as f:
f.write(html)
(好像有些问题,后面再研究和修改)
原来是url的问题,把那一行url换成这个:
driver.get('https://xui.ptlogin2.qq.com/cgi-bin/xlogin?proxy_url=https%3A//qzs.qq.com/qzone/v6/portal/proxy.html&daid=5&&hide_title_bar=1&low_login=0&qlogin_auto_login=1&no_verifyimg=1&link_target=blank&appid=549000912&style=22&target=self&s_url=https%3A%2F%2Fqzs.qzone.qq.com%2Fqzone%2Fv5%2Floginsucc.html%3Fpara%3Dizone&pt_qr_app=手机QQ空间&pt_qr_link=http%3A//z.qzone.com/download.html&self_regurl=https%3A//qzs.qq.com/qzone/v6/reg/index.html&pt_qr_help_link=http%3A//z.qzone.com/download.html&pt_no_auth=0')
就可以了。
# @Time : 2021/1/25 14:52
# @Author : Guanghui Li
# @File : qqzone_mine.py
# @Software: PyCharm
import time
from selenium import webdriver
import json
driver = webdriver.Chrome()
url = 'https://user.qzone.qq.com/767362321/infocenter?_t_=0.4369968262011419'
driver.get(url)
time.sleep(5)
driver.maximize_window()
time.sleep(3)
cookies = driver.get_cookies()
cookies = json.dumps(cookies) # 转储cookies dumps:转储
driver.quit()
driver = webdriver.Chrome() # 由于之前的关闭了,所以这里需要再开启一次
driver.get(url) # 关键就在这里,再添加cookies之前先打开一下要登录的网站。
with open('cookies_zone.json','w',encoding='utf-8') as f:
f.write(cookies)
with open('cookies_zone.json','r',encoding='utf-8') as f:
lst_cookies = json.loads(f.read())
for i in lst_cookies:
driver.add_cookie(i)
driver.get(url)
time.sleep(1)
driver.maximize_window()
time.sleep(5)
driver.quit()
'''
解决问题的帖子:
在使用selenium进行自动化登录的过程中已经获取到cookie后,依旧报错:selenium.common.exceptions.InvalidCookieDomainException: Message: invalid cookie domain
获取cookie和添加cookie原代码如下:
#获取cookie
dr = webdriver.Chrome("D:\softwarePro\BrowserDriver\chromedriver.exe")
dr.maximize_window()
dr.get(url)
c = dr.get_cookie('JSESSIONID')
print(c)
#添加cookie
dr = webdriver.Chrome("D:\softwarePro\BrowserDriver\chromedriver.exe")
dr.maximize_window()
dr.add_cookie({'domain': '192.168.2.211', 'httpOnly': True, 'name': 'JSESSIONID', 'path': '/smartcommty', 'sameSite': 'Lax', 'secure': False, 'value': '5574c24a-dbc4-4a7d-9607-cc24f5653ebf'})
dr.get(url)
dr.refresh()
经过网上查找资料,自我分析得知:selenium的默认域名为data,cookie中带域名,在设置cookie时发现当前域名不包含在cookie中,所以设置失败,一直都是data的这个页面。
解决方法就是:在设置cookies前,先访问需要登录的地址,然后设置cookies登录跳转,就OK了。
如下:
#添加cookie
dr = webdriver.Chrome("D:\softwarePro\BrowserDriver\chromedriver.exe")
dr.maximize_window()
dr.get(url)
dr.add_cookie({'domain': '192.168.2.211', 'httpOnly': True, 'name': 'JSESSIONID', 'path': '/smartcommty', 'sameSite': 'Lax', 'secure': False, 'value': '5574c24a-dbc4-4a7d-9607-cc24f5653ebf'})
dr.get(url)
dr.refresh()
解决了这个坑
'''
这里有两个要点:
# @Time : 2021/1/23 0:01
# @Author : Guanghui Li
# @File : login_fengbian_cookie.py
# @Software: PyCharm
from selenium import webdriver
import time
import json
from bs4 import BeautifulSoup
from selenium.webdriver.common.keys import Keys
# driver = webdriver.Chrome()
url = 'https://www.pypypy.cn/#/apps/2/lecture/5dc547b3faeb8f00015a0ed0'
# driver.get(url)
# time.sleep(15) # 趁这个时间扫码登录
# cookies = driver.get_cookies() # 获得cookies
# cookies = json.dumps(cookies) # 转储cookies dumps:转储
# driver.quit()
#
#
# with open('cookies.json','w',encoding='utf-8') as f:
# f.write(cookies)
driver = webdriver.Chrome()
driver.get(url)
time.sleep(1)
driver.maximize_window()
time.sleep(2)
with open('cookies.json','r',encoding='utf-8') as f:
lst_cookies = json.loads(f.read())
for i in lst_cookies:
driver.add_cookie(i)
urls = ['https://www.pypypy.cn/#/apps/2/lecture/5dc547a8faeb8f00015a0ea8','https://www.pypypy.cn/#/apps/2/lecture/5dc547a9faeb8f00015a0ead','https://www.pypypy.cn/#/apps/2/lecture/5dc547aafaeb8f00015a0eb0','https://www.pypypy.cn/#/apps/2/lecture/5dc547acfaeb8f00015a0eb5','https://www.pypypy.cn/#/apps/2/lecture/5dc547adfaeb8f00015a0eb9','https://www.pypypy.cn/#/apps/2/lecture/5dc547adfaeb8f00015a0ebb','https://www.pypypy.cn/#/apps/2/lecture/5dc547aefaeb8f00015a0ec0','https://www.pypypy.cn/#/apps/2/lecture/5dc547affaeb8f00015a0ec3','https://www.pypypy.cn/#/apps/2/lecture/5dc547b0faeb8f00015a0ec6','https://www.pypypy.cn/#/apps/2/lecture/5dc547b2faeb8f00015a0ecc','https://www.pypypy.cn/#/apps/2/lecture/5dc547b3faeb8f00015a0ed0','https://www.pypypy.cn/#/apps/2/lecture/5dc547b4faeb8f00015a0ed4','https://www.pypypy.cn/#/apps/2/lecture/5dc547b4faeb8f00015a0ed7','https://www.pypypy.cn/#/apps/2/lecture/5dc547b5faeb8f00015a0ed9','https://www.pypypy.cn/#/apps/2/lecture/5dc547b6faeb8f00015a0edc','https://www.pypypy.cn/#/apps/2/lecture/5dc547b6faeb8f00015a0edf']
k = 0
for url in urls:
driver.get(url)
time.sleep(2)
driver.maximize_window()
time.sleep(0.5)
driver.find_element_by_tag_name('body').click()
time.sleep(0.5)
num_0 = 0
num_1 = 1
while num_0 < num_1:
pageSource = driver.page_source
soup = BeautifulSoup(pageSource, 'lxml')
elements = soup.find_all('div')
num_0 = len(elements)
for j in range(50):
driver.find_element_by_tag_name('body').send_keys(Keys.PAGE_UP)
time.sleep(0.01)
pageSource = driver.page_source
soup = BeautifulSoup(pageSource, 'lxml')
elements = soup.find_all('div')
num_1 = len(elements)
time.sleep(1)
pageSource = driver.page_source
soup = BeautifulSoup(pageSource,'lxml')
contents = soup.find_all('div',class_="plugin-markdown-chat")
with open('wind_spider_2021-01-25.txt','a',encoding='utf-8') as f:
f.write('\n')
f.write('**=这是第{}关=**'.format(k))
f.write('\n')
for i in contents:
words = i.text
f.write(words)
f.write('\n')
f.write('*='*100)
f.write('*='*100)
f.write('\n')
k += 1
print('over')
time.sleep(3)
driver.quit()
注释掉的内容是第一次登录的时候获取cookies的代码,第二次登录就可以直接注释掉了,因为cookies已经保存到本地,直接读取携带着登录就行了。
打开页面后并没有马上全部加载所有内容,如果我们选择的元素没有出来就会导致报错。这个时候,我们可以设置等待。等待共有两种方式,显示等待,隐式等待。
又叫强制等待
import time
from selenium import webdriver
url = 'https://www.baidu.com/'
driver = webdriver.Chrome()
driver.get(url)
time.sleep(2) # 显式等待
driver.find_element_by_id('kw').send_keys('英国疫情')
driver.find_element_by_id('su').click()
time.sleep(1)
driver.maximize_window() # 窗口最大化
time.sleep(10) # 显式等待
driver.quit()
import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait # 条件等待工具
from selenium.webdriver.support import expected_conditions as EC # 期望条件工具
from selenium.webdriver.common.by import By # 查找工具
# //*[@id="qd_closeDefaultWarningWindowDialog_id"] # 通知框确定按钮
driver = webdriver.Chrome()
driver.get('https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc') # 购票网址
driver.implicitly_wait(5)
time.sleep(1)
driver.maximize_window()
time.sleep(1)
# 这个操作之后需要手动操作输入出发地和目的地
driver.find_element_by_xpath('//*[@id="qd_closeDefaultWarningWindowDialog_id"]').click() # 在跳出的通知里点确定
driver.implicitly_wait(5)
WebDriverWait(driver,30).until(EC.text_to_be_present_in_element_value((By.ID,"fromStationText"),'西安')) # 其内两个括号里都是传入两个参数
WebDriverWait(driver,30).until(EC.text_to_be_present_in_element_value((By.ID,"toStationText"),'郑州'))
# 当出发地和目的地出现后等3秒点确定按钮
time.sleep(3)
driver.find_element_by_xpath('//*[@id="query_ticket"]').click()
# 10秒后退出浏览器
time.sleep(10)
driver.quit()
# 查询按钮 //*[@id="query_ticket"]
因为输入框是隐藏的,我们暂时手动输入出发地和目的地,后面再解决这个问题。
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
url = 'https://www.baidu.com/'
driver = webdriver.Chrome()
driver.get(url)
# time.sleep(2)
driver.implicitly_wait(10) # 隐式等待,页面刷新后停止,可以设定最大时间,超时后报错
driver.find_element_by_id('kw').send_keys('英国疫情')
driver.find_element_by_id('su').click()
driver.implicitly_wait(10) # 隐式等待,页面刷新后停止,可以设定最大时间,超时后报错
# time.sleep(1)
driver.maximize_window() # 窗口最大化
time.sleep(10)
driver.find_element_by_tag_name('body').click() # 点击屏幕
# 翻页循环
for i in range(10):
# 滚动循环
for i in range(3):
driver.find_element_by_tag_name('body').send_keys(Keys.PAGE_DOWN)
time.sleep(1)
time.sleep(2)
driver.find_element_by_xpath('//*[@id="page"]/div/a[10]').click() # 点击下一页按钮
driver.quit()
这个案例是打开百度网页,搜索输入内容。向下滚动,点击下一页,循环10次。
能不能同时打开两个窗口呢?当然可以,下面我们同时代开百度和豆瓣。
同时打开另一个窗口selenium提供了Java的方法,看注释
from selenium import webdriver
import time
driver = webdriver.Chrome()
driver.get('https://www.baidu.com/') # 打开百度
time.sleep(2)
driver.execute_script('window.open("https://www.douban.com")') # 打开豆瓣,这是Java的语句,慢慢积累
time.sleep(10)
driver.close() # 关闭先代开的页面
time.sleep(1)
driver.quit() # 关闭后打开的页面
我们在打开的窗口间通过索引值切换,并打印出当前活动窗口的url来验证
from selenium import webdriver
import time
driver = webdriver.Chrome()
driver.get('https://www.baidu.com/') # 打开百度
time.sleep(2)
driver.execute_script('window.open("https://www.douban.com")') # 打开豆瓣,这是Java的语句,慢慢积累
time.sleep(3)
# driver.close() # 关闭先代开的页面
time.sleep(1)
# driver.quit() # 关闭后打开的页面
driver.switch_to.window(driver.window_handles[1]) # 通过索引值切换打开窗口,0是百度,1是豆瓣
print(driver.current_url) # 打印当前的url
这次博客就写到这里。