目录
EasyDL 机器学习识别(实践:京东商城)
(一)批量获取验证码图片
(二)EasyDL机器学习(百度智能云)
(三)调用EasyDLAPI接口识别验证码
云码平台一站式识别(实战:豆瓣网)
(一)获取验证码图片
(二)验证码识别
(三)动态获取验证码
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
import time
import requests
service = Service(executable_path="../_resources/chromedriver")
driver = webdriver.Chrome(service=service)
driver.get("https://passport.jd.com/uc/login")
select = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[1]/div/div[3]/a')
select.click()
username = driver.find_element(By.XPATH, '//*[@id="loginname"]')
ActionChains(driver).pause(0.5).click(username).send_keys("xxxxxxxxxx").parform()
password = driver.find_element(By.XPATH, '//*[@id="nloginpwd"]')
ActionChains(driver).pause(0.5).click(password).send_keys("xxxxxxxxxx").parform()
submit = driver.find_element(By.XPATH, '//*[@id="loginsubmit"]')
submit.click()
for i in range(30):
time.sleep(1)
img = driver.find_element(By.XPATH, '/html/body/div[4]/div/div/div/div[1]/div[2]/div[1]/img')
img.screenshot("./img/{}.png".format(i))
time.sleep(0.5)
refresh = driver.find_element(By.XPATH, '/html/body/div[4]/div/div/div/div[1]/div[2]/div[2]')
refresh.click()
time.sleep(3)
driver.quit()
(1)EasyDL零门槛AI开发平台-立即使用-物体检测(图像)
(2)创建模型:填写模型名称(自定义例如:myModel)+业务描述(自定义例如:Captcha Recognition)+完成创建
(3)数据总览中的创建数据集:填写数据集名称(自定义例如:myModelData)+完成
(4)数据总览中在上述数据集中:点击导入+无标注信息+导入方式:本地导入+上传图片(刚才代码抓取的所有图片)+确认并返回
(4)导入成功后在线标注:右上角添加标签(自定义例如:Target)后确定+标注每个图片的黑色区域即滑块移动位置
(5)标注好后训练模型:选择模型名称+添加数据集+公有云部署+开始训练+等待训练结束+查看模型效果+申请发布
(6)发布模型:公有云部署+提交申请
(1)在上述模型中点击服务详情+查看API文档+点击EasyDL版控制台+公有云部署:应用列表+点击创建应用+输入应用名称+选择接口EasyDL+立即创建+获得此应用的API KEY和Secret Key
(2)再次来到API文档:导航栏中:模型发布-公有云部署-物体检测API调用文档中的复制代码创建新py文件修改代码如下:(通过main.png来测试)
import json
import base64
import requests
IMAGE_FILEPATH = "./img/main.png"
PARAMS = {"threshold": 0.3}
# 服务详情 中的 接口地址
MODEL_API_URL = "【您的API地址】"
# 调用 API 需要 ACCESS_TOKEN。若已有 ACCESS_TOKEN 则于下方填入该字符串
# 否则,留空 ACCESS_TOKEN,于下方填入 该模型部署的 API_KEY 以及 SECRET_KEY,会自动申请并显示新 ACCESS_TOKEN
# ACCESS_TOKEN = "【您的ACESS_TOKEN】"
ACCESS_TOKEN = None
API_KEY = "【您的API_KEY】"
SECRET_KEY = "【您的SECRET_KEY】"
print("1. 读取目标图片 '{}'".format(IMAGE_FILEPATH))
with open(IMAGE_FILEPATH, 'rb') as f:
base64_data = base64.b64encode(f.read())
base64_str = base64_data.decode('UTF8')
print("将 BASE64 编码后图片的字符串填入 PARAMS 的 'image' 字段")
PARAMS["image"] = base64_str
if not ACCESS_TOKEN:
print("2. ACCESS_TOKEN 为空,调用鉴权接口获取TOKEN")
auth_url = "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials" "&client_id={}&client_secret={}".format(API_KEY, SECRET_KEY)
auth_resp = requests.get(auth_url)
auth_resp_json = auth_resp.json()
ACCESS_TOKEN = auth_resp_json["access_token"]
print("新 ACCESS_TOKEN: {}".format(ACCESS_TOKEN))
else:
print("2. 使用已有 ACCESS_TOKEN")
print("3. 向模型接口 'MODEL_API_URL' 发送请求")
request_url = "{}?access_token={}".format(MODEL_API_URL, ACCESS_TOKEN)
response = requests.post(url=request_url, json=PARAMS)
response_json = response.json()
response_str = json.dumps(response_json, indent=4, ensure_ascii=False)
print("结果:
{}".format(response_str))
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
import time
import requests
service = Service(executable_path="../_resources/chromedriver")
driver = webdriver.Chrome(service=service)
driver.get("https://www.douban.com/")
time.sleep(2)
frame = driver.find_element(By.XPATH, '//*[@id="anony-reg-new"]/div/div[1]/iframe')
driver.switch_to.frame(frame)
time.sleep(0.5)
select = driver.find_element(By.XPATH, '/html/body/div[1]/div[1]/ul[1]/li[2]')
select.click()
username = driver.find_element(By.XPATH, '//*[@id="username"]')
ActionChains(driver).pause(0.5).click(username).send_keys("xxxxxxxxxx").parform()
password = driver.find_element(By.XPATH, '//*[@id="password"]')
ActionChains(driver).pause(0.5).click(password).send_keys("xxxxxxxxxx").parform()
submit = driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div[1]/div[5]/a')
submit.click()
time.sleep(2)
driver.switch_to.frame("tcaptcha_iframe_dy")
img = driver.find_element(By.XPATH, '//*[@id="slideBg"]')
img.screenshot("./img2/captcha.png")
time.sleep(3)
driver.quit()
云码-自动验证码识别平台:在线测试:点击单图滑块(截图)-上传刚获取的验证码图片-点击测试
以下为通过代码实现方法:登陆账号成功后点击个人中心-点击开发文档-点击python
修改文档代码如下(新建py文件名称为:ydm.py):
注意根据个人情况修改token
import json
import time
import requests
import base64
class YdmVerify(object):
_custom_url = "http://api.jfbym.com/api/YmServer/customApi"
_token = ""
_headers = {
'Content-Type': 'application/json'
}
def common_verify(self, image, verify_type="60000"):
# 数英汉字类型
# 通用数英1-4位 10110
# 通用数英5-8位 10111
# 通用数英9~11位 10112
# 通用数英12位及以上 10113
# 通用数英1~6位plus 10103
# 定制-数英5位~qcs 9001
# 定制-纯数字4位 193
# 中文类型
# 通用中文字符1~2位 10114
# 通用中文字符 3~5位 10115
# 通用中文字符6~8位 10116
# 通用中文字符9位及以上 10117
# 定制-XX西游苦行中文字符 10107
# 计算类型
# 通用数字计算题 50100
# 通用中文计算题 50101
# 定制-计算题 cni 452
payload = {
"image": base64.b64encode(image).decode(),
"token": self._token,
"type": verify_type
}
print(payload)
resp = requests.post(self._custom_url, headers=self._headers, data=json.dumps(payload))
print(resp.text)
return resp.json()['data']['data']
def slide_verify(self, slide_image, background_image, verify_type="20101"):
# 滑块类型
# 通用双图滑块 20111
payload = {
"slide_image": base64.b64encode(slide_image).decode(),
"background_image": base64.b64encode(background_image).decode(),
"token": self._token,
"type": verify_type
}
resp = requests.post(self._custom_url, headers=self._headers, data=json.dumps(payload))
print(resp.text)
return resp.json()['data']['data']
def sin_slide_verify(self, image, verify_type="20110"):
# 通用单图滑块(截图) 20110
payload = {
"image": base64.b64encode(image).decode(),
"token": self._token,
"type": verify_type
}
resp = requests.post(self._custom_url, headers=self._headers, data=json.dumps(payload))
print(resp.text)
return resp.json()['data']['data']
def traffic_slide_verify(self, seed, data, href, verify_type="900010"):
# 定制-滑块协议slide_traffic 900010
payload = {
"seed": seed,
"data": data,
"href": href,
"token": self._token,
"type": verify_type
}
resp = requests.post(self._custom_url, headers=self._headers, data=json.dumps(payload))
print(resp.text)
return resp.json()['data']['data']
def click_verify(self, image, label_image=None, extra=None, verify_type="30100"):
# 通用任意点选1~4个坐标 30009
# 通用文字点选1(extra,点选文字逗号隔开,原图) 30100
# 定制-文字点选2(extra="click",原图) 30103
# 定制-单图文字点选 30102
# 定制-图标点选1(原图) 30104
# 定制-图标点选2(原图,extra="icon") 30105
# 定制-语序点选1(原图,extra="phrase") 30106
# 定制-语序点选2(原图) 30107
# 定制-空间推理点选1(原图,extra="请点击xxx") 30109
# 定制-空间推理点选1(原图,extra="请_点击_小尺寸绿色物体。") 30110
# 定制-tx空间点选(extra="请点击侧对着你的字母") 50009
# 定制-tt_空间点选 30101
# 定制-推理拼图1(原图,extra="交换2个图块") 30108
# 定制-xy4九宫格点选(原图,label_image,image) 30008
payload = {
"image": base64.b64encode(image).decode(),
# "label_image": base64.b64encode(label_image).decode(),
"token": self._token,
"type": verify_type
}
if extra:
payload['extra'] = extra
resp = requests.post(self._custom_url, headers=self._headers, data=json.dumps(payload))
print(resp.text)
return resp.json()['data']['data']
def rotate(self, out_ring_image, inner_circle_image):
# 定制-X度单图旋转 90007
# payload = {
# "image": base64.b64encode(image).decode(),
# "token": self._token,
# "type": "90007"
# }
# 定制-Tt双图旋转,2张图,内圈图,外圈图 90004
payload = {
"out_ring_image": base64.b64encode(out_ring_image).decode(),
"inner_circle_image": base64.b64encode(inner_circle_image).decode(),
"token": self._token,
"type": "90004"
}
resp = requests.post(self._custom_url, headers=self._headers, data=json.dumps(payload))
print(resp.text)
return resp.json()['data']['data']
def google_verify(self, googlekey, pageurl, invisible=1, data_s=""):
_headers = {
'Content-Type': 'application/json'
}
"""
第一步,创建验证码任务
:param
:return taskId : string 创建成功的任务ID
"""
url = "http://122.9.52.147/api/YmServer/funnelApi"
payload = json.dumps({
"token": self._token,
# "type": "40011", ## v3
"type": "40010", ## v2
"googlekey": googlekey,
"enterprise": 1, ## 是否为企业版
"pageurl": pageurl,
"invisible": invisible,
"data-s": data_s,
# 'action':"TEMPLATE" #V3必传
})
# 发送JSON格式的数据
result = requests.request("POST", url, headers=_headers, data=payload).json()
print(result)
# {'msg': '识别成功', 'code': 10000, 'data': {'code': 0, 'captchaId': '51436618130', 'recordId': '74892'}}
captcha_id = result.get('data').get("captchaId")
record_id = result.get('data').get("recordId")
times = 0
while times < 150:
try:
url = f"http://122.9.52.147/api/YmServer/funnelApiResult"
data = {
"token": self._token,
"captchaId": captcha_id,
"recordId": record_id
}
result = requests.post(url, headers=_headers, json=data).json()
print(result)
# {'msg': '结果准备中,请稍后再试', 'code': 10009, 'data': []}
if result['msg'] == "结果准备中,请稍后再试":
time.sleep(5)
times += 5
continue
if result['msg'] == '请求成功' and result['code'] == 10001:
print(result['data']['data'])
return result['data']['data']
# {'msg': '请求成功', 'code': 10001, 'data': {'data': '03AGdBq2611GTOgA2v9HUpMMEUE70p6dwOtYyHJQK4xhdKF0Y8ouSGsFZt647SpJvZ22qinYrm6MYBJGFQxMUIApFfSBN6WTGspk6DmFdQAoWxynObRGV7qNMQOjZ_m4w3_6iRu8SJ3vSUXH_HHuA7wXARJbKEpU4J4R921NfpKdahgeFD8rK1CFYAqLd5fz4l-8_VRmRE83dRSfkgyTN338evQ1doWKJRipZbk4ie-89Ud0KGdOsP4QzG3stRZgj2oaEoMDSAP62vxKGYqtDEqTcwtlgo-ot3rF5SmntaoKGwcKPo0NrekWA5gtj0vqKLU6lY2GcnSci_tgBzBwuH40uvyR1PFu02VK_E44mopJ7FOO4cUukNaLGqypU2YCA8QuaaebOIoCMU7RGqGs_41RYNCG1GSdthiwcwk2hHFbi-TXuICXSwh4Er5mgVW9A3t_9Ndp0eJcyr3HtuJrcA7BtlcgruuQxK5h4Ew4ert4KPH_aQGN9ww5VsUtbSManzUDnUOs7aEdvFk1DOOPmLys-aX20ZFN2CcQcZZSO-7HZpZZt3EDeWWE5S02HFDY8gl3_0xqIts8774Tr4GMVJaddG0NR6pcBFC11FqNcK2a18gM3gaKDy3_2ZMeSU4nj4NWwoAhPjQN2BS8JxX4kKVpX4rD959kc93vczVD3TYD6_4GJahGSpBvM7Y5_GGIdLL8imXde1R35mZnEcFYXQ40zcy3DdJFkk_gzGTVOEb1Q1IZpjMxzCxyGgwjgL9dtDIgst5H5CSZoerX_Lz-DmsBvYIYZdpbPLEMROx9MODImaEw8Cp6M8Xj7_foijiGE9hh-pzJSTlKl3HytiSUyJJ7r1BssrX5C_TFWxl0IXNg8azP8H-ZIOWwnYlMWCS1w9piHdoLg5zACiYIN3Txdlsvi61MuPmzJggJd1_dlyMdAlzb5_zdfweqj0_Ko1ODP378YT7sV7LECgRj5QJU6sF5nlf4m2g5sFypBw9GFAkEE-OaWGYxRJOy2ioU41ggAJIkcza2B_N5AL2KLROtm0-c2MxplM4ZzHxrUv9A24zlgzo3Pz4NONwU_gaOcDB7j1dZKXD8UaoIrZv0BTd8JeojYowm9Usdg7Rt4Fpo_vDLJdrEUfbxVlXieDD9Fr1fu72-d4AduT_J3n-rIhyX4gFav-KfP-qOxqOZsmjXZirsBxZs7042NYeirRYnLv35cxIAJARz03FJmeKViUivwC5mCWw64hjRad9XyyBOP2n8KFOrTXhPskC-WwEfksGtfLxi6VW76FHGvRdwHXzMwVfNqe3P5H_WZUc-vxeTAsTnqZz3WA97lM4MLrX0nTZYgXxCEiS6raSOiEMqcx_Nv7Zxre-abj4LZRbFpH8nx1SEiaOV2Dm-a1iPFEmCs0L4kDtt6VImSVIQaTOAd3KFSo7W_XTvRPsQJOtblrcKyuagztX_Yr0lT0YqN9I9MZAARo7M5OfwSLJW16rdmp4NuRefEvNPNHO2cVh1Xha1qNGuF_QDvWFFmWG0Y6IbRqLmF-Dv8BY4TWyOeVnADJftGQw2QSr8RmbCHryA'}}
except Exception as e:
print(e)
continue
def fun_captcha_verify(self, publickey, pageurl, verify_type="40007"):
# 定制类接口-Hcaptcha 40007
payload = {
"publickey": publickey,
"pageurl": pageurl,
"token": self._token,
"type": verify_type
}
resp = requests.post(self._custom_url, headers=self._headers, data=json.dumps(payload))
print(resp.text)
return resp.json()['data']['data']
def hcaptcha_verify(self):
# 定制类接口-Hcaptcha
_headers = {
'Content-Type': 'application/json'
}
_custom_url = "http://api.jfbym.com/api/YmServer/funnelApi"
payload = {
"sitekey": "",
"pageurl": "",
"token": self._token,
"type": '50013'
}
result = requests.post(_custom_url, headers=_headers, data=json.dumps(payload)).json()
print(result)
captcha_id = result.get('data').get("captchaId")
record_id = result.get('data').get("recordId")
times = 0
while times < 150:
try:
url = f"http://api.jfbym.com/api/YmServer/funnelApiResult"
data = {
"token": self._token,
"captchaId": captcha_id,
"recordId": record_id
}
result = requests.post(url, headers=_headers, json=data).json()
print(result)
# {'msg': '结果准备中,请稍后再试', 'code': 10009, 'data': []}
if result['msg'] == "结果准备中,请稍后再试":
time.sleep(5)
times += 5
continue
if result['msg'] == '请求成功' and result['code'] == 10001:
print(result['data']['data'])
return result['data']['data']
# {'msg': '请求成功', 'code': 10001, 'data': {'data': '03AGdBq2611GTOgA2v9HUpMMEUE70p6dwOtYyHJQK4xhdKF0Y8ouSGsFZt647SpJvZ22qinYrm6MYBJGFQxMUIApFfSBN6WTGspk6DmFdQAoWxynObRGV7qNMQOjZ_m4w3_6iRu8SJ3vSUXH_HHuA7wXARJbKEpU4J4R921NfpKdahgeFD8rK1CFYAqLd5fz4l-8_VRmRE83dRSfkgyTN338evQ1doWKJRipZbk4ie-89Ud0KGdOsP4QzG3stRZgj2oaEoMDSAP62vxKGYqtDEqTcwtlgo-ot3rF5SmntaoKGwcKPo0NrekWA5gtj0vqKLU6lY2GcnSci_tgBzBwuH40uvyR1PFu02VK_E44mopJ7FOO4cUukNaLGqypU2YCA8QuaaebOIoCMU7RGqGs_41RYNCG1GSdthiwcwk2hHFbi-TXuICXSwh4Er5mgVW9A3t_9Ndp0eJcyr3HtuJrcA7BtlcgruuQxK5h4Ew4ert4KPH_aQGN9ww5VsUtbSManzUDnUOs7aEdvFk1DOOPmLys-aX20ZFN2CcQcZZSO-7HZpZZt3EDeWWE5S02HFDY8gl3_0xqIts8774Tr4GMVJaddG0NR6pcBFC11FqNcK2a18gM3gaKDy3_2ZMeSU4nj4NWwoAhPjQN2BS8JxX4kKVpX4rD959kc93vczVD3TYD6_4GJahGSpBvM7Y5_GGIdLL8imXde1R35mZnEcFYXQ40zcy3DdJFkk_gzGTVOEb1Q1IZpjMxzCxyGgwjgL9dtDIgst5H5CSZoerX_Lz-DmsBvYIYZdpbPLEMROx9MODImaEw8Cp6M8Xj7_foijiGE9hh-pzJSTlKl3HytiSUyJJ7r1BssrX5C_TFWxl0IXNg8azP8H-ZIOWwnYlMWCS1w9piHdoLg5zACiYIN3Txdlsvi61MuPmzJggJd1_dlyMdAlzb5_zdfweqj0_Ko1ODP378YT7sV7LECgRj5QJU6sF5nlf4m2g5sFypBw9GFAkEE-OaWGYxRJOy2ioU41ggAJIkcza2B_N5AL2KLROtm0-c2MxplM4ZzHxrUv9A24zlgzo3Pz4NONwU_gaOcDB7j1dZKXD8UaoIrZv0BTd8JeojYowm9Usdg7Rt4Fpo_vDLJdrEUfbxVlXieDD9Fr1fu72-d4AduT_J3n-rIhyX4gFav-KfP-qOxqOZsmjXZirsBxZs7042NYeirRYnLv35cxIAJARz03FJmeKViUivwC5mCWw64hjRad9XyyBOP2n8KFOrTXhPskC-WwEfksGtfLxi6VW76FHGvRdwHXzMwVfNqe3P5H_WZUc-vxeTAsTnqZz3WA97lM4MLrX0nTZYgXxCEiS6raSOiEMqcx_Nv7Zxre-abj4LZRbFpH8nx1SEiaOV2Dm-a1iPFEmCs0L4kDtt6VImSVIQaTOAd3KFSo7W_XTvRPsQJOtblrcKyuagztX_Yr0lT0YqN9I9MZAARo7M5OfwSLJW16rdmp4NuRefEvNPNHO2cVh1Xha1qNGuF_QDvWFFmWG0Y6IbRqLmF-Dv8BY4TWyOeVnADJftGQw2QSr8RmbCHryA'}}
except Exception as e:
print(e)
continue
新建py文件:
from ydm import YdmVerify
Y = YdmVerify()
with open('./img2/captcha.png','rb') as f:
s = f.read()
Y.sin_slide_verify(image=s)
根据一和二结合代码,修改py文件
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
import time
import requests
from ydm import YdmVerify
service = Service(executable_path="../_resources/chromedriver")
driver = webdriver.Chrome(service=service)
driver.get("https://www.douban.com/")
time.sleep(2)
frame = driver.find_element(By.XPATH, '//*[@id="anony-reg-new"]/div/div[1]/iframe')
driver.switch_to.frame(frame)
time.sleep(0.5)
select = driver.find_element(By.XPATH, '/html/body/div[1]/div[1]/ul[1]/li[2]')
select.click()
username = driver.find_element(By.XPATH, '//*[@id="username"]')
ActionChains(driver).pause(0.5).click(username).send_keys("xxxxxxxxxx").parform()
password = driver.find_element(By.XPATH, '//*[@id="password"]')
ActionChains(driver).pause(0.5).click(password).send_keys("xxxxxxxxxx").parform()
submit = driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div[1]/div[5]/a')
submit.click()
time.sleep(2)
driver.switch_to.frame("tcaptcha_iframe_dy")
img = driver.find_element(By.XPATH, '//*[@id="slideBg"]')
img.screenshot("./img2/captcha.png")
time.sleep(1)
Y = YdmVerify()
with open('./img2/captcha.png','rb') as f:
s = f.read()
Y.sin_slide_verify(image=s)
time.sleep(3)
driver.quit()