#描述
脚本一共有两版,分别使用了BeautifulSoup+selenium +chrome和BeautifulSoup+selenium +firefox。实现了对于某网站刷投票的功能。
1.该网站投票选项每次刷新页面后会重新随机排列,但是id不可能改变,因此可以通过 browser.find_element_by_id(ID).click(),进行操作。
2.该网站选满有且只有10个选项才能投票,并且会弹窗提示。
3.该网站限制IP投票次数。
4.该网站限制每次选择选项的间隔时间。
5.网站是ajax异步更新。
#准备工作
####下载chrome浏览器和chromedriver
chromedriver下载地址:
http://chromedriver.storage.googleapis.com/index.html (有墙)
http://npm.taobao.org/mirrors/chromedriver/ (无墙)
####下载火狐浏览器和geckodriver
geckodriver 下载地址:
https://github.com/mozilla/geckodriver/releases/
##BeautifulSoup+selenium +chrome
from bs4 import BeautifulSoup
from selenium import webdriver
import time
from random import sample,choice
import requests
import re
def scanWeb(addr,ips=[]):
"""执行刷票"""
success=0
for num,ipport in enumerate(ips):
result=str(num)+". 代理:"+ipport
#设置headers
options = webdriver.ChromeOptions()
options.add_argument('lang=zh_CN.UTF-8')
options.add_argument('user-agent="'+selectUserAgent()+'"')
#设置代理
options.add_argument('--proxy-server=http://'+ipport)
browser = webdriver.Chrome(chrome_options=options)
browser.set_page_load_timeout(10) #设置超时报错
browser.set_script_timeout(10) #设置脚本超时时间。
try:
browser.get(addr)
except:
print("加载页面太慢,停止加载,继续下一步操作")
browser.execute_script("window.stop()")
#等待页面更新完毕
time.sleep(3)
sltI=selectItemID()
#给10个项目投票
try:
for ID in sltI:
browser.find_element_by_id(ID).click()
time.sleep(1)
# js2 = "var q=document.getElementById('"+ID+"').click()"
# browser.execute_script(js2)
#消除屏幕弹窗
time.sleep(1)
al = browser.switch_to_alert()
al.accept()
browser.find_element_by_class_name("btn").click() #提交
except:
print(result+"失败")
browser.quit()
continue
success+=1
time.sleep(2)
browser.quit()
print("{0}成功,共成功{1}次".format(result,success))
def selectItemID():
"""获取4个必选项目以及6个随机项目ID"""
cmID = ["v1275", "v1270", "v1300", "v1278"]
othID = ['v1344', 'v1267', 'v1268', 'v1280', 'v1304', 'v1148', 'v1283', 'v1276', 'v1274',
'v1288', 'v1222','v1286', 'v1277', 'v1303', 'v1285', 'v1273', 'v1309', 'v1305', 'v1284',
'v1282']
sltID=cmID+sample(othID,6)
return sltID
def selectUserAgent():
"""随机获取一个useragent"""
uas = [
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0; Baiduspider-ads) Gecko/17.0 Firefox/17.0",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9b4) Gecko/2008030317 Firefox/3.0b4",
"Mozilla/5.0 (Windows; U; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; BIDUBrowser 7.6)",
"Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko",
"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.99 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.3; Win64; x64; Trident/7.0; Touch; LCJB; rv:11.0) like Gecko",
"Mozilla/5.0(Macintosh;U;IntelMacOSX10_6_8;en-us)AppleWebKit/534.50(KHTML,likeGecko)Version/5.1Safari/534.50",
"Mozilla/5.0(Windows;U;WindowsNT6.1;en-us)AppleWebKit/534.50(KHTML,likeGecko)Version/5.1Safari/534.50",
"Mozilla/5.0(compatible;MSIE9.0;WindowsNT6.1;Trident/5.0",
"Opera/9.80(WindowsNT6.1;U;en)Presto/2.8.131Version/11.11",
"Mozilla/5.0(Macintosh;IntelMacOSX10_7_0)AppleWebKit/535.11(KHTML,likeGecko)Chrome/17.0.963.56Safari/535.11",
]
return choice(uas)
def get_ip():
"""获取代理IP"""
url = "http://www.xicidaili.com/nn"
headers = { "Accept":"text/html,application/xhtml+xml,application/xml;",
"Accept-Encoding":"gzip, deflate, sdch",
"Accept-Language":"zh-CN,zh;q=0.8,en;q=0.6",
"Referer":"http://www.xicidaili.com",
"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36"
}
r = requests.get(url,headers=headers)
soup = BeautifulSoup(r.text, 'html.parser')
data = soup.table.find_all("td")
ip_compile= re.compile(r'(\d+\.\d+\.\d+\.\d+) ') # 匹配IP
port_compile = re.compile(r'(\d+) ') # 匹配端口
ip = re.findall(ip_compile,str(data)) # 获取所有IP
port = re.findall(port_compile,str(data)) # 获取所有端口
return [":".join(i) for i in zip(ip,port)] # 组合IP+端口,如:115.112.88.23:8080
scanWeb("http://xxxxxxxxxxxxxxxxxxxxxxxxxx/",get_ip())
##BeautifulSoup+selenium +firefox
from bs4 import BeautifulSoup
from selenium import webdriver
import time
from random import sample,choice
import requests
import re
def scanWeb(addr,ips=[]):
"""执行刷票"""
for num,ipstring in enumerate(ips):
ip,port=ipstring.split(":")
profile = webdriver.FirefoxProfile()
# 设置代理
profile.set_preference("network.proxy.type", 1)
profile.set_preference("network.proxy.http", ip)
profile.set_preference("network.proxy.http_port", port)
profile.set_preference("general.useragent.override",selectUserAgent())
profile.update_preferences()
driver = webdriver.Firefox(profile)
driver.set_page_load_timeout(10) #设置超时报错
driver.set_script_timeout(10) #设置脚本超时时间。
#now_handle = driver.current_window_handle #获得当前句柄
try:
driver.get(addr)
except:
print("加载页面太慢,停止加载,继续下一步操作")
browser.execute_script("window.stop()")
#等待页面更新完毕
time.sleep(3)
sltI=selectItemID()
#给10个项目投票
try:
for ID in sltI:
js2 = "var q=document.getElementById('"+ID+"').click()"
driver.execute_script(js2)
time.sleep(3)
al = driver.switch_to_alert()
al.accept()
driver.find_element_by_class_name("btn").click()
except:
pass
driver.quit()
def selectUserAgent():
"""随机获取一个useragent"""
uas = [
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0; Baiduspider-ads) Gecko/17.0 Firefox/17.0",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9b4) Gecko/2008030317 Firefox/3.0b4",
"Mozilla/5.0 (Windows; U; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; BIDUBrowser 7.6)",
"Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko",
"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.99 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.3; Win64; x64; Trident/7.0; Touch; LCJB; rv:11.0) like Gecko",
"Mozilla/5.0(Macintosh;U;IntelMacOSX10_6_8;en-us)AppleWebKit/534.50(KHTML,likeGecko)Version/5.1Safari/534.50",
"Mozilla/5.0(Windows;U;WindowsNT6.1;en-us)AppleWebKit/534.50(KHTML,likeGecko)Version/5.1Safari/534.50",
"Mozilla/5.0(compatible;MSIE9.0;WindowsNT6.1;Trident/5.0",
"Opera/9.80(WindowsNT6.1;U;en)Presto/2.8.131Version/11.11",
"Mozilla/5.0(Macintosh;IntelMacOSX10_7_0)AppleWebKit/535.11(KHTML,likeGecko)Chrome/17.0.963.56Safari/535.11",
]
return choice(uas)
def selectItemID():
"""获取4个必选项目以及6个随机项目ID"""
cmID = ["v1275", "v1270", "v1300", "v1278"]
othID = ['v1344', 'v1267', 'v1268', 'v1280', 'v1304', 'v1148', 'v1283', 'v1276', 'v1274',
'v1288', 'v1222','v1286', 'v1277', 'v1303', 'v1285', 'v1273', 'v1309', 'v1305', 'v1284',
'v1282']
sltID=cmID+sample(othID,6)
return sltID
def get_ip():
"""获取代理IP"""
url = "http://www.xicidaili.com/nn"
headers = { "Accept":"text/html,application/xhtml+xml,application/xml;",
"Accept-Encoding":"gzip, deflate, sdch",
"Accept-Language":"zh-CN,zh;q=0.8,en;q=0.6",
"Referer":"http://www.xicidaili.com",
"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36"
}
r = requests.get(url,headers=headers)
soup = BeautifulSoup(r.text, 'html.parser')
data = soup.table.find_all("td")
ip_compile= re.compile(r'(\d+\.\d+\.\d+\.\d+) ') # 匹配IP
port_compile = re.compile(r'(\d+) ') # 匹配端口
ip = re.findall(ip_compile,str(data)) # 获取所有IP
port = re.findall(port_compile,str(data)) # 获取所有端口
return [":".join(i) for i in zip(ip,port)] # 组合IP+端口,如:115.112.88.23:8080
scanWeb("http://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",get_ip())
欢迎关注我的公众号。