云打码:http://www.yundama.com/ (能够解决通用的验证码识别)
极验验证码智能识别辅助:http://jiyandoc.c2567.com/ (能够解决复杂验证码的识别)
下面代码是云打码平台提供,做了个简单修改,实现了两个方法:
其中需要自己配置的地方是:
username = 'whoarewe' # 用户名
password = '***' # 密码
appid = 4283 # appid
appkey = '02074c64f0d0bb9efb2df455537b01c3' # appkey
codetype = 1004 # 验证码类型
云打码官方提供的api如下:
# THE WINTER IS COMING! the old driver will be driving who was a man of the world!
# -*- coding: utf-8 -*- python 3.6.7, create time is 18-12-13 上午9:33 GMT+8
# coding:utf-8
import requests
import json
import time
class YDMHttp:
apiurl = 'http://api.yundama.com/api.php'
username = ''
password = ''
appid = ''
appkey = ''
def __init__(self, username, password, appid, appkey):
self.username = username
self.password = password
self.appid = str(appid)
self.appkey = appkey
def request(self, fields, files=[]):
print(files)
response = self.post_url(self.apiurl, fields, files)
response = json.loads(response)
return response
def balance(self):
data = {'method': 'balance', 'username': self.username, 'password': self.password, 'appid': self.appid,
'appkey': self.appkey}
response = self.request(data)
if (response):
if (response['ret'] and response['ret'] < 0):
return response['ret']
else:
return response['balance']
else:
return -9001
def login(self):
data = {'method': 'login', 'username': self.username, 'password': self.password, 'appid': self.appid,
'appkey': self.appkey}
response = self.request(data)
if (response):
if (response['ret'] and response['ret'] < 0):
return response['ret']
else:
return response['uid']
else:
return -9001
def upload(self, filename, codetype, timeout):
data = {'method': 'upload', 'username': self.username, 'password': self.password, 'appid': self.appid,
'appkey': self.appkey, 'codetype': str(codetype), 'timeout': str(timeout)}
file = {'file': filename}
print(file)
response = self.request(data, file)
print(response)
if (response):
if (response['ret'] and response['ret'] < 0):
return response['ret']
else:
return response['cid']
else:
return -9001
def result(self, cid):
data = {'method': 'result', 'username': self.username, 'password': self.password, 'appid': self.appid,
'appkey': self.appkey, 'cid': str(cid)}
response = self.request(data)
return response and response['text'] or ''
def decode(self, filename, codetype, timeout):
cid = self.upload(filename, codetype, timeout)
if (cid > 0):
for i in range(0, timeout):
result = self.result(cid)
if (result != ''):
return cid, result
else:
time.sleep(1)
return -3003, ''
else:
return cid, ''
def post_url(self, url, fields, files=[]):
for key in files:
files[key] = open(files[key], 'rb')
res = requests.post(url, files=files, data=fields)
# print(res.request.__dict__)
return res.text
appid = 4283 # appid
appkey = '02074c64f0d0bb9efb2df455537b01c3' # appkey
filename = 'b.jpg' # 文件位置
codetype = 1004 # 验证码类型
# 超时
timeout = 60
def indetify(response_content):
if (username == 'username'):
print('请设置好相关参数再测试')
else:
# 初始化
yundama = YDMHttp(username, password, appid, appkey)
# 登陆云打码
uid = yundama.login()
print('uid: %s' % uid)
# 查询余额
balance = yundama.balance()
print('balance: %s' % balance)
# 开始识别,图片路径,验证码类型ID,超时时间(秒),识别结果
cid, result = yundama.decode(response_content, codetype, timeout)
print('cid: %s, result: %s' % (cid, result))
return result
def indetify_by_filepath(file_path):
if (username == 'username'):
print('请设置好相关参数再测试')
else:
# 初始化
yundama = YDMHttp(username, password, appid, appkey)
# 登陆云打码
uid = yundama.login()
print('uid: %s' % uid)
# 查询余额
balance = yundama.balance()
print('balance: %s' % balance)
# 开始识别,图片路径,验证码类型ID,超时时间(秒),识别结果
cid, result = yundama.decode(file_path, codetype, timeout)
print('cid: %s, result: %s' % (cid, result))
return result
if __name__ == '__main__':
indetify_by_filepath('b.jpg')
下面以豆瓣登录过程中的验证码为例,了解云打码如何使用
# coding=utf-8
from selenium import webdriver
import time
import requests
from yundama import indetify
driver = webdriver.Chrome()
driver.get("https://www.douban.com/")
#输入用户名
driver.find_element_by_id("form_email").send_keys("78****@qq.com")
#输入密码
driver.find_element_by_id("form_password").send_keys("****")
#获取验证码的地址
img_url = driver.find_element_by_id("captcha_image").get_attribute("src")
response = requests.get(img_url) #请求验证码的地址
ret = indetify(response.content) #验证码识别
#输入验证码
driver.find_element_by_id("captcha_field").send_keys(ret)
time.sleep(5)
#点击登录
driver.find_element_by_class_name("bn-submit").click()
time.sleep(10)
print(driver.get_cookies())
driver.quit()
import hashlib
import requests
from datetime import datetime
RUOUSER = ''
RUOPASS = ''
# 若快 12306打码 直接传入本地文件路径
def getCode(img):
url = "http://api.ruokuai.com/create.json"
fileBytes = open(img, "rb").read()
paramDict = {
'username': RUOUSER,
'password': RUOPASS,
'typeid': 6113, # 专门用来识别12306图片验证的类型id
'timeout': 90,
'softid': 117157, # 推广用的
'softkey': '70acaa1e477a4374a7736264a24b974b' # 推广用的
}
paramKeys = ['username',
'password',
'typeid',
'timeout',
'softid',
'softkey'
]
result = http_upload_image(url, paramKeys, paramDict, fileBytes)
return result['Result']
# 若快12306打码 上传图片
def http_upload_image(url, paramKeys, paramDict, filebytes):
timestr = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
boundary = '------------' + hashlib.md5(timestr.encode("utf8")).hexdigest().lower()
boundarystr = '\r\n--%s\r\n' % (boundary)
bs = b''
for key in paramKeys:
bs = bs + boundarystr.encode('ascii')
param = "Content-Disposition: form-data; name=\"%s\"\r\n\r\n%s" % (key, paramDict[key])
# print param
bs = bs + param.encode('utf8')
bs = bs + boundarystr.encode('ascii')
header = 'Content-Disposition: form-data; name=\"image\"; filename=\"%s\"\r\nContent-Type: image/gif\r\n\r\n' % ('sample')
bs = bs + header.encode('utf8')
bs = bs + filebytes
tailer = '\r\n--%s--\r\n' % (boundary)
bs = bs + tailer.encode('ascii')
headers = {'Content-Type': 'multipart/form-data; boundary=%s' % boundary,
'Connection': 'Keep-Alive',
'Expect': '100-continue',
}
response = requests.post(url, params='', data=bs, headers=headers)
return response.json()
if __name__ == '__main__':
# 测试
ret = getCode('../captcha_imgs/1544505888345_3.png')
print(ret)
这是验证码里面非常简单的一种类型,对应的只需要获取验证码的地址,然后请求,通过打码平台识别即可。
这种验证码的类型是更加常见的一种类型,对于这种验证码,大家需要思考:
在登录的过程中,假设我输入的验证码是对的,对方服务器是如何判断当前我输入的验证码是显示在我屏幕上的验证码,而不是其他的验证码呢?
在获取网页的时候,请求验证码,以及提交验证码的时候,对方服务器肯定通过了某种手段验证我之前获取的验证码和最后提交的验证码是同一个验证码,那这个手段是什么手段呢?
很明显,就是通过cookie来实现的,所以对应的,在请求页面,请求验证码,提交验证码的到时候需要保证cookie的一致性,对此可以使用requests.session来解决