项目-验证码识别

首先,生成训练集,用PIL库生成150张验证码图片
项目-验证码识别_第1张图片

from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
import random

def getRandomColor():
    """
    获取一个随机颜色(r,g,b)格式的
    :return:
    """
    c1 = random.randint(0, 255)
    c2 = random.randint(0, 255)
    c3 = random.randint(0, 255)
    if c1 == 255:
        c1 = 0
    if c2 == 255:
        c2 = 0
    if c3 == 255:
        c3 = 0
    return(c1, c2, c3)

def getRandomStr():
    """
    获取一个随机数字,每个数字的颜色也是随机的
    :return:
    """
    random_num = str(random.randint(0, 9))
    return random_num

def generate_captcha():
    # 获取一个Image对象,参数分别是RGB模式。宽150,高30, 随机颜色
    image = Image.new('RGB', (150, 50), (255,255,255))
    # 获取一个画笔对象,将图片对象传过去
    draw = ImageDraw.Draw(image)
    # 获取一个font字体对象参数是ttf的字体文件的目录,以及字体的大小
    font = ImageFont.truetype("Lohit-Bengali.ttf", size=32)

    label = ""

    for i in range(5):
        random_char = getRandomStr()

        label += random_char

        # 在图片上写东西,参数是:定位,字符串,颜色,字体
        draw.text((10+i*30, 0), random_char, getRandomColor(), font=font)

    # 噪点噪线
    width = 150
    height = 30
    # 画线
    for i in range(3):
        x1 = random.randint(0, width)
        x2 = random.randint(0, width)
        y1 = random.randint(0, height)
        y2 = random.randint(0, height)
        draw.line((x1, y1, x2, y2), fill=(0, 0, 0))
    # 画点
    for i in range(5):
        draw.point([random.randint(0, width), random.randint(0, height)], fill=getRandomColor())
        x = random.randint(0, width)
        y = random.randint(0, height)
        draw.arc((x, y, x + 4, y + 4), 0, 90, fill=(0, 0, 0))

    # 保存到硬盘,名为test.png格式为png的图片
    image.save(open(''.join(['captcha_images/', label, '.png']), 'wb'), 'png')
    # image.save(open(''.join(['captcha_predict/', label, '.png']), 'wb'), 'png')

if __name__ == '__main__':
    for i in range(150):
        generate_captcha()

对验证码进行处理,首先将图片的RGB值(0~255,0~255,0~255)转为灰度值,再对灰度值二值化,得到像素点只有黑白(0,1)的图片,对该图片降噪,切分,得到训练集

from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import os


def binarizaion(path):
    img = Image.open(path)
    img_gray = img.convert('L')
    img_gray = np.array(img_gray)
    w, h = img_gray.shape
    for x in range(w):
        for y in range(h):
            gray = img_gray[x, y]
            if gray <= 220:
                img_gray[x, y] = 0
            else:
                img_gray[x, y] = 1

    plt.figure('')
    plt.imshow(img_gray, cmap='gray')
    plt.axis('off')
    plt.show()

    return img_gray


def noiseReduction(img_gray, label):
    height, width = img_gray.shape
    for x in range(height):
        for y in range(width):
            cnt = 0
            # 白色的点不用管
            if img_gray[x, y] == 1:
                continue
            else:
                try:
                    if img_gray[x-1, y-1] == 0:
                        cnt += 1
                except:
                    pass

                try:
                    if img_gray[x-1, y] == 0:
                        cnt += 1
                except:
                    pass

                try:
                    if img_gray[x-1, y+1] == 0:
                        cnt += 1
                except:
                    pass

                try:
                    if img_gray[x, y-1] == 0:
                        cnt += 1
                except:
                    pass

                try:
                    if img_gray[x, y+1] == 0:
                        cnt += 1
                except:
                    pass

                try:
                    if img_gray[x+1, y-1] == 0:
                        cnt += 1
                except:
                    pass

                try:
                    if img_gray[x+1, y] == 0:
                        cnt += 1
                except:
                    pass

                try:
                    if img_gray[x+1, y+1] == 0:
                        cnt += 1
                except:
                    pass

                if cnt < 4:  # 周围少于4点就算是噪点
                    img_gray[x, y] = 1

    plt.figure('')
    plt.imshow(img_gray, cmap='gray')
    plt.axis('off')
    # plt.show()
    plt.savefig(''.join(['clean_captcha_img/', label, '.png']))

def cutImg(label):
    labels = list(label)
    img = Image.open(''.join(['clean_captcha_img/', label, '.png']))
    for i in range(5):
        pic = img.crop((100*(1+i), 170, 100*(1+i)+100, 280))
        plt.imshow(pic)
        seq = get_save_seq(label[i])
        pic.save(''.join(['cut_number/', str(label[i]), '/', str(seq), '.png']))

def get_save_seq(num):
    numlist = os.listdir(''.join(['cut_number/', num, '/']))
    if len(numlist) == 0 or numlist is None:
        return 0
    else:
        max_file = 0
        for file in numlist:
            if int(file.split('.')[0]) > max_file:
                max_file = int(file.split('.')[0])
        return int(max_file)+1

def create_dir():
    for i in range(10):
        os.mkdir(''.join(['cut_number/', str(i)]))

def img_2_clean():
    captchas = os.listdir(''.join(['captcha_images/']))
    for captcha in captchas:
        label = captcha.split('.')[0]
        img_path = ''.join(['captcha_images/', captcha])
        # 二值化
        im = binarizaion(img_path)
        # 降噪
        noiseReduction(im, label)

def clean_to_cut():
    captchas = os.listdir(''.join(['clean_captcha_img/']))
    for captcha in captchas:
        label = captcha.split('.')[0]
        cutImg(label)


if __name__ == '__main__':
    img_2_clean()
    create_dir()
    clean_to_cut()
    lee = os.listdir('captcha_images/')
    for name in lee:
        lable = name.split('.')[0]
        path = 'captcha_images/'+name
        pic = binarizaion(path)
        noiseReduction(pic,lable)

你可能感兴趣的:(机器学习,python)