1.tesseerocr
tesseract a.jpg result -l eng && cat result.txt
识别率低
2、识别验证码平台(打码平台)
超级鹰
网站地址可以百度搜索
#!/usr/bin/env python
# coding:utf-8
import requests
from hashlib import md5
class Chaojiying_Client(object):
def __init__(self, username, password, soft_id):
self.username = username
password = password.encode('utf8')
self.password = md5(password).hexdigest()
self.soft_id = soft_id
self.base_params = {
'user': self.username,
'pass2': self.password,
'softid': self.soft_id,
}
self.headers = {
'Connection': 'Keep-Alive',
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
}
def PostPic(self, im, codetype):
"""
im: 图片字节
codetype: 题目类型 参考 http://www.chaojiying.com/price.html
"""
params = {
'codetype': codetype,
}
params.update(self.base_params)
files = {'userfile': ('ccc.jpg', im)}
r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
return r.json()
def ReportError(self, im_id):
"""
im_id:报错题目的图片ID
"""
params = {
'id': im_id,
}
params.update(self.base_params)
r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
return r.json()
def main1(urlstr):
chaojiying = Chaojiying_Client('carmack', 'Vff635241', '96001')
im = open(urlstr, 'rb').read()
return chaojiying.PostPic(im, 1902)['pic_str']
if __name__ == '__main__':
chaojiying = Chaojiying_Client('carmack', 'Vff635241', '96001')
im = open('pic_b.jpg', 'rb').read()
print(chaojiying.PostPic(im, 1902))
极验平台
step1. 模拟点击验证按钮
step2. 识别滑动缺口位置
遍历没有缺口和有缺口的两张图片,找出相同位置像素差距超过指定值的像素点,即缺口位置
(目前极验已经改进了了算法)
step3. 模拟拖动滑块
import random
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
from lxml import etree
import time
from PIL import Image
from chaojiying import main1
from io import BytesIO
browser = webdriver.Chrome()
browser.set_window_size(1300, 600)
wait = WebDriverWait(browser, 10)
def get_page():
url = 'http://bm.e21cn.com/log/reg.aspx'
browser.get(url)
html = browser.page_source
return html
def get_msg(html):
etree_html = etree.HTML(html)
username = 'lalala'
password = '123456'
tel = '18011405897'
img_url = etree_html.xpath('//img[@id="imgCheckCode"]/@src')
check_url = 'http://bm.e21cn.com' + img_url[0][2:]
img = get_geetest_image('1.png')
print(img)
# headers = {"User-Agent": "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)"}
# response = requests.get(url=check_url, headers=headers)
# time.sleep(5)
# with open('./yanzhengma/1.jpg', 'wb') as f:
# f.write(response.content)
check_msg = main1('1.png')
print(check_msg)
input_username = wait.until(expected_conditions.presence_of_element_located
((By.CSS_SELECTOR, 'input#username')))
input_password1 = wait.until(expected_conditions.presence_of_element_located
((By.CSS_SELECTOR, 'input#pwd')))
input_password2 = wait.until(expected_conditions.presence_of_element_located
((By.CSS_SELECTOR, 'input#pwd_Q')))
input_tel = wait.until(expected_conditions.presence_of_element_located
((By.CSS_SELECTOR, 'input#tel')))
input_check = wait.until(expected_conditions.presence_of_element_located
((By.CSS_SELECTOR, 'input#CheckCode')))
sublime = wait.until(expected_conditions.element_to_be_clickable((By.CSS_SELECTOR, 'input#btn_login')))
input_username.send_keys(username)
input_password1.send_keys(password)
input_password2.send_keys(password)
input_tel.send_keys(tel)
input_check.send_keys(check_msg)
time.sleep(2)
sublime.click()
def get_position():
"""
获取验证码位置
:return: 验证码位置元组
"""
img = wait.until(expected_conditions.presence_of_element_located((By.CSS_SELECTOR, '#imgCheckCode')))
time.sleep(2)
location = img.location
size = img.size
print(size)
top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size[
'width']
return (top, bottom, left, right)
def get_screenshot():
"""
获取网页截图
:return: 截图对象
"""
screenshot = browser.get_screenshot_as_png()
screenshot = Image.open(BytesIO(screenshot))
return screenshot
def get_geetest_image(name):
"""
获取验证码图片
:return: 图片对象
"""
top, bottom, left, right = get_position()
print('验证码位置', top, bottom, left, right)
screenshot = get_screenshot()
captcha = screenshot.crop((left, top, right, bottom))
path = name
captcha.save(path)
return captcha
def main():
html = get_page()
get_msg(html)
if __name__ == '__main__':
main()