官网有教程:
接入指南: https://ai.baidu.com/docs#/Begin/top
Python sdk 文档: http://ai.baidu.com/docs#/OCR-Python-SDK/07883957
from aip import AipOcr
from PIL import Image
""" 你的 APPID AK SK """
APP_ID = '11673820'
API_KEY = '8kEGtNlLBCDz6iYGeuNFgGBG'
SECRET_KEY = 'tgP0bkUFWtRkDvy7VQ0dKz9tCdqDKj8u '
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
""" 读取图片 """
def get_file_content(filePath):
with open(filePath, 'rb') as fp:
return fp.read()
# 先将图像进行处理
image =Image.open('captha111.jpg')
# 灰度处理
# image = image.convert('L')
# 二值化处理
image = image.convert('1')
image.save('aa.jpg')
image = get_file_content('aa.jpg')
# """ 调用通用文字识别, 图片参数为本地图片 """
result = client.basicGeneral(image)
print(result)
# """ 如果有可选参数 """
# options = {}
# options["language_type"] = "ENG"
# options["detect_direction"] = "true"
# options["detect_language"] = "true"
# options["probability"] = "true"
# """ 带参数调用通用文字识别(高精度版) """
# result = client.basicGeneral(image, options)
# print(result)
for word in result['words_result']:
print(word['words'])
必要文件 ( 云打码平台 会给出几个 源文件 下载相对于的版本下来即可, 有教如何使用 ):
以 古诗文网 为例:
import requests
sessions = requests.session()
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36',
}
# 获取验证码的url
response1 = sessions.get('https://so.gushiwen.org/RandCode.ashx',headers=headers)
# 二进制保存图片
with open('yanzhengma.png','wb')as fp:
fp.write(response1.content)
# 分析验证码
from Shibie import Recoginitier
file_path = 'yanzhengma.png'
s = Recoginitier()
value = s.esay_recoginition(file_path)
value = str(value).split("'")[1]
print(value)
data = {
('email', '账号'),
('pwd', '密码'),
('code', value),
}
# 获取用户收藏页详情
response3 = sessions.post('https://so.gushiwen.org/user/login.aspx?from=http%3a%2f%2fso.gushiwen.org%2fuser%2fcollect.aspx', headers=headers, params=data)
with open('bmx.html','w',encoding='utf-8')as fp:
fp.write(response3.text)
print(response3.status_code)
此平台 需要 花钱购买题分 , 不一样的验证码破解 所花的 题分 价格也不一样 , 官网也有 教学文档 :
这里就不详细的介绍了 , 直接 给出一个 成功的 实例 :
下载下来的 API 是 2.x 版本的 需要手动修改成3.x版本的即可:
第一个代码块是: 下载的API
第二个代码块是: 以简书为例的一个完整工程
import requests
from hashlib import md5
class Chaojiying(object):
def __init__(self, username, password, soft_id):
self.username = username
self.password = md5(password.encode('utf-8')).hexdigest()
self.soft_id = soft_id
self.base_params = {
'user': self.username,
'pass2': self.password,
'softid': self.soft_id,
}
self.headers = {
'Connection': 'Keep-Alive',
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
}
def post_pic(self, im, codetype):
"""
im: 图片字节
codetype: 题目类型 参考 http://www.chaojiying.com/price.html
"""
params = {
'codetype': codetype,
}
params.update(self.base_params)
files = {'userfile': ('ccc.jpg', im)}
r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files,
headers=self.headers)
return r.json()
# 验证不通过,请求该函数 , 后台 则对该次判断不做扣分处理
def report_error(self, im_id):
"""
im_id:报错题目的图片ID
"""
params = {
'id': im_id,
}
params.update(self.base_params)
r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
return r.json()
'''
下面代码 用于检测 验证码
'''
# if __name__ == '__main__':
#
#
# cjy = Chaojiying('账号', '密码', '软件ID')
# im = open('captcha.jpg', 'rb').read()
# content = cjy.post_pic(im, 验证码类型)
# yanzhengma = ''
# for key,value in content.items():
# if key == 'pic_str':
# yanzhengma = value
# print(yanzhengma)
import time
from PIL import Image
from selenium import webdriver
from selenium.webdriver import ActionChains
from 点触验证码的识别.chaojiying import Chaojiying
def crack():
# 保存网页截图
browser.save_screenshot('222.jpg')
# 获取 验证码确定按钮
button = browser.find_element_by_xpath(xpath='//div[@class="geetest_panel"]/a/div')
# 获取 验证码图片的 位置信息
img1 = browser.find_element_by_xpath(xpath='//div[@class="geetest_widget"]')
location = img1.location
size = img1.size
top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size[
'width']
print('图片的宽:', img1.size['width'])
print(top, bottom, left, right)
# 根据获取的验证码位置信息和网页图片 对验证码图片进行裁剪 保存
img_1 = Image.open('222.jpg')
capcha1 = img_1.crop((left, top, right, bottom-54))
capcha1.save('tu1-1.png')
# 接入超级鹰 API 获取图片中的一些参数 (返回的是一个字典)
cjy = Chaojiying('账号', '密码', '软件ID')
im = open('tu1-1.png', 'rb').read()
content = cjy.post_pic(im, 验证码类型)
print(content)
# 将图片中汉字的坐标位置 提取出来
positions = content.get('pic_str').split('|')
locations = [[int(number)for number in group.split(",")] for group in positions]
print(positions)
print(locations)
# 根据获取的坐标信息 模仿鼠标点击验证码图片
for location1 in locations:
print(location1)
ActionChains(browser).move_to_element_with_offset(img1 , location1[0],location1[1]).click().perform()
time.sleep(1)
button.click()
time.sleep(1)
# 失败后重试
lower = browser.find_element_by_xpath('//div[@class="geetest_table_box"]/div[2]').text
print('判断', lower)
if lower != '验证失败 请按提示重新操作'and lower != None:
print('登录成功')
time.sleep(3)
else:
time.sleep(3)
print('登录失败')
# 登录失败后 , 调用 该函数 , 后台 则对该次判断不做扣分处理
pic_id = content.get('pic_id')
print('图片id为:',pic_id)
cjy = Chaojiying('账号', '密码', '软件ID')
cjy.report_error(pic_id)
crack()
if __name__ == '__main__':
patn = 'chromedriver.exe'
browser = webdriver.Chrome(patn)
browser.get('https://www.jianshu.com/sign_in')
browser.save_screenshot('lodin.png')
# 填写from表单 点击登陆 获取验证码 的网页截图
login = browser.find_element_by_id('sign-in-form-submit-btn')
username = browser.find_element_by_id('session_email_or_mobile_number')
password = browser.find_element_by_id('session_password')
username.send_keys('账号')
password.send_keys('密码')
login.click()
time.sleep(5)
crack()