关于selenium登陆某些页面或者系统都会遇到验证码
获取验证码 并自动填写 就是使用selenium自动登陆的关键
首先关于识别图片中的验证码有以下步骤
1,获取验证码图片
2,使用某些识别软件识别图中验证码的信息
3,返回获取的验证码,并填写
关于识别验证码文件,我选择云打码http://www.yundama.com/about.html
登陆注册并花费1元购买使用权
注册开发者进入我的软件
如图点击添加新软件,获取其中的软件代码及通讯秘钥保存下来
下载pythonhttp调试获取其中的
并把它放入你要使用的文件夹内,该代码内容如下
import http.client, mimetypes, urllib, json, time, requests
######################################################################
class YDMHttp:
apiurl = 'http://api.yundama.com/api.php'
username = ''
password = ''
appid = ''
appkey = ''
def __init__(self, username, password, appid, appkey):
self.username = username
self.password = password
self.appid = str(appid)
self.appkey = appkey
def request(self, fields, files=[]):
response = self.post_url(self.apiurl, fields, files)
response = json.loads(response)
return response
def balance(self):
data = {'method': 'balance', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey}
response = self.request(data)
if (response):
if (response['ret'] and response['ret'] < 0):
return response['ret']
else:
return response['balance']
else:
return -9001
def login(self):
data = {'method': 'login', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey}
response = self.request(data)
if (response):
if (response['ret'] and response['ret'] < 0):
return response['ret']
else:
return response['uid']
else:
return -9001
def upload(self, filename, codetype, timeout):
data = {'method': 'upload', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey, 'codetype': str(codetype), 'timeout': str(timeout)}
file = {'file': filename}
response = self.request(data, file)
if (response):
if (response['ret'] and response['ret'] < 0):
return response['ret']
else:
return response['cid']
else:
return -9001
def result(self, cid):
data = {'method': 'result', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey, 'cid': str(cid)}
response = self.request(data)
return response and response['text'] or ''
def decode(self, filename, codetype, timeout):
cid = self.upload(filename, codetype, timeout)
if (cid > 0):
for i in range(0, timeout):
result = self.result(cid)
if (result != ''):
return cid, result
else:
time.sleep(1)
return -3003, ''
else:
return cid, ''
def report(self, cid):
data = {'method': 'report', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey, 'cid': str(cid), 'flag': '0'}
response = self.request(data)
if (response):
return response['ret']
else:
return -9001
def post_url(self, url, fields, files=[]):
for key in files:
files[key] = open(files[key], 'rb');
res = requests.post(url, files=files, data=fields)
return res.text
######################################################################
def yanzheng(filename):
# 用户名
username = '*********'
# 密码
password = '************'
# 软件ID,开发者分成必要参数。登录开发者后台【我的软件】获得!
appid = ****
# 软件密钥,开发者分成必要参数。登录开发者后台【我的软件】获得!
appkey = '***************'
# 图片文件
filename = 'captcha.png'
# 验证码类型,# 例:1004表示4位字母数字,不同类型收费不同。请准确填写,否则影响识别率。在此查询所有类型 http://www.yundama.com/price.html
codetype = 5000
# 超时时间,秒
timeout = 60
# 检查
if (username == 'username'):
print('请设置好相关参数再测试')
else:
# 初始化
yundama = YDMHttp(username, password, appid, appkey)
# 登陆云打码
uid = yundama.login();
print('uid: %s' % uid)
# 查询余额
balance = yundama.balance();
print('balance: %s' % balance)
# 开始识别,图片路径,验证码类型ID,超时时间(秒),识别结果
cid, result = yundama.decode(filename, codetype, timeout);
print('cid: %s, result: %s' % (cid, result))
return cid,result
######################################################################
if __name__ == '__main__':
# 用户名
username = '********'
# 密码
password = '**********'
# 软件ID,开发者分成必要参数。登录开发者后台【我的软件】获得!
appid = *****
# 软件密钥,开发者分成必要参数。登录开发者后台【我的软件】获得!
appkey = '**************'
# 图片文件
filename = 'captcha.png'
# 验证码类型,# 例:1004表示4位字母数字,不同类型收费不同。请准确填写,否则影响识别率。在此查询所有类型 http://www.yundama.com/price.html
codetype = 5000
# 超时时间,秒
timeout = 60
# 检查
if (username == 'username'):
print('请设置好相关参数再测试')
else:
# 初始化
yundama = YDMHttp(username, password, appid, appkey)
# 登陆云打码
uid = yundama.login();
print('uid: %s' % uid)
# 查询余额
balance = yundama.balance();
print('balance: %s' % balance)
# 开始识别,图片路径,验证码类型ID,超时时间(秒),识别结果
cid, result = yundama.decode(filename, codetype, timeout);
print('cid: %s, result: %s' % (cid, result))
登陆信息即为账号密码,以及之前获得的软件代码,通讯秘钥
现在开始编写selenium代码(这里博主自动登陆的是云打码的登陆页面)
import time
from YDMHTTP import yanzheng
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
#安装pip install pillow
from PIL import Image
driver = webdriver.Firefox()
driver.get('http://www.yundama.com/')
#定位用户名的输入框
username = WebDriverWait(driver,10).until(lambda driver:driver.find_element_by_id('username'))
username.send_keys('账号')
#定位密码
password = WebDriverWait(driver,10).until(lambda driver:driver.find_element_by_id('password'))
password.send_keys('密码')
#定位验证码标签,对验证码实现截图
captcha = WebDriverWait(driver,10).until(lambda driver:driver.find_element_by_id('verifyImg'))
#截取整个页面
driver.save_screenshot('page.png')
#获取验证码图片的x,y坐标以及自身的宽和高
left = captcha.location['x']
top = captcha.location['y']
right = captcha.location['x'] + captcha.size['width']
bottom = captcha.location['y']+ captcha.size['height']
print('right',right)
print('bottom',bottom)
#截取验证码保存到本地
img = Image.open('page.png')
img = img.crop((left,top,right,bottom))
img.save('captcha.png')
#将截取的验证码上传到云打码进行在线识别
text = yanzheng('captcha.png')
print('识别结果:',text)
#定位验证码输入框
captcha_input = driver.find_element_by_id('vcode')
captcha_input.send_keys(text)
#点击登录按钮
driver.find_element_by_css_selector('.sub').click()
#登录之后,休眠一段时间在获取网页源代码,因为页面的渲染需要一定的时间
time.sleep(5)
print(driver.page_source)