代码操作非常快 ⇢ \dashrightarrow ⇢ 有的标签还没加载 ⇢ \dashrightarrow ⇢ 找就找不到 ⇢ \dashrightarrow ⇢ 就会报错
设置等待:显示等待,隐士等待
# 找某个标签,如果找不到,最多等待10s
bro.implicitly_wait(10)
click()
send_keys("内容")
clear()
在使用selenium操作浏览器的时候,可以自己写js执行,会用这东西做什么?
bro.execute_script('alert("美女")')
res=bro.execute_script('console.log(urlMap)')
bro.execute_script('open()')
bro.execute_script('scrollTo(0,document.documentElement.scrollHeight)')
bro.execute_script('alert(location)')
bro.execute_script('location="http://www.baidu.com"')
bro.execute_script('alert(document.cookie)')
from selenium import webdriver
import time
bro = webdriver.Firefox()
bro.get('https://www.cnblogs.com/liuqingzheng/p/16005896.html')
bro.implicitly_wait(10)
# 开启选项卡
bro.execute_script('window.open()')
# 切换到某个选项卡
bro.switch_to.window(bro.window_handles[1])
bro.get('https://www.baidu.com/')
time.sleep(2)
bro.get('http://www.taobao.com')
time.sleep(2)
# 回退
bro.back()
time.sleep(2)
# 前进
bro.forward()
time.sleep(2)
# 关闭选项卡
bro.close()
# 关闭页面
bro.quit()
bro.close()
以后要爬取的数据,要登录后才能看到
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import json
from selenium.webdriver.common.by import By
# 去掉自动化软件控制的检测
options = Options()
options.add_argument("--disable-blink-features=AutomationControlled")
bro = webdriver.Chrome(options=options)
bro.get('https://www.cnblogs.com/')
bro.implicitly_wait(10)
bro.maximize_window()
login_btn = bro.find_element(By.LINK_TEXT, '登录')
login_btn.click()
time.sleep(2)
# 找到用户名和密码输入框
username = bro.find_element(By.CSS_SELECTOR, '#mat-input-0')
password = bro.find_element(By.ID, 'mat-input-1')
submit_btn = bro.find_element(By.CSS_SELECTOR,
'body > app-root > app-sign-in-layout > div > div > app-sign-in > app-content-container > div > div > div > form > div > button')
# 验证码
code=bro.find_element(By.ID,'Shape3')
time.sleep(1)
username.send_keys('@qq.com')
time.sleep(1)
password.send_keys('#')
time.sleep(1)
submit_btn.click() # 一种情况直接登录成功 一种情况会弹出验证码
code.click()
time.sleep(10)
# 让程序先停在这---》手动操作浏览器---》把验证码搞好---》程序再继续往下走
# 到现在,是登录成功的状态
# 取出cookie存起来
cookies = bro.get_cookies()
with open('cnblogs.json', 'w', encoding='utf-8') as f:
json.dump(cookies, f)
time.sleep(2)
bro.close()
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import json
from selenium.webdriver.common.by import By
options = Options()
options.add_argument("--disable-blink-features=AutomationControlled")
bro = webdriver.Chrome(options=options)
bro.get('https://www.cnblogs.com/')
bro.implicitly_wait(10)
bro.maximize_window()
time.sleep(5)
# 取出cookie--》写入到浏览器中---》刷新浏览器---》登录状态
with open('cnblogs.json', 'r') as f:
cookies = json.load(f)
# 写到浏览器中
for item in cookies:
bro.add_cookie(item) # 如果是没登录的cookie,往里写会报错
# 刷新浏览器
bro.refresh()
time.sleep(5)
bro.close()
使用selenium登录 ⇢ \dashrightarrow ⇢ 拿到cookie
点赞 使用requests 用cookie点赞
# 访问首页,解析出id号
import requests
from bs4 import BeautifulSoup
#### 携带cookie访问#####
session = requests.Session()
cookie = {} # 本地取出来,写入
with open('chouti.json', 'r') as f:
cookie_list = json.load(f)
##### selenium的cookie和requests的cookie格式不一样,要转换 {key:value,key:value}
for item in cookie_list:
cookie[item['name']] = item['value']
header={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'}
res = session.get('https://dig.chouti.com/', cookies=cookie,headers=header)
soup = BeautifulSoup(res.text, 'html.parser')
print(res.text)
divs = soup.find_all(name='div', class_='link-item')
for div in divs:
article_id = div.attrs.get('data-id')
data = {
'linkId': article_id
}
res1 = session.post('https://dig.chouti.com/link/vote', data=data,headers=header)
print(res1.text)