本文先是自定义生成了51000张验证码图片,5w张作为训练集,1k张作为测试集,先用训练集训练DCGAN模型,之后基于DCGAN模型生成验证码,这边规定生成的验证码需满足两个要求,一个是判别器评分大于0.95,另一个是验证码的4字符不在训练集中。最后,用模型所生成的这些验证码去训练一个CNN模型,并用一开始的1k张测试集去验证模型的准确率,最终的准确率为80%左右。
使用GAN生成指定字符的验证码图片,即生成器不但要会生成逼真的验证码,并且还能生成指定字符的验证码,同样,判别器不但要会判别真伪,还要会识别验证码的字符。
这一步就省略了,因为跟我的上一篇博客基本是一样的,只是生成的验证码数量多了,即self.train_num
改为50000。
import string
import torch
import torch.nn as nn
word2num = {v:k for k,v in enumerate(list(string.digits+string.ascii_uppercase))}
captcha_number = 4
nc = 3
image_size = 64
latent_space_size = 100
ngf = 128
ndf = 128
class Generator(nn.Module):
'''生成器'''
def __init__(self):
super(Generator,self).__init__()
self.deconv_x = nn.Sequential(nn.ConvTranspose2d(latent_space_size, ngf//2, 4, 1, 0),nn.ReLU(True))
self.deconv_y = nn.Sequential(nn.ConvTranspose2d(captcha_number*len(word2num), ngf//2, 4, 1, 0),nn.ReLU(True))
self.model = nn.Sequential(
nn.ConvTranspose2d(ngf,ngf*8,4,1,0,bias=False),
nn.BatchNorm2d(ngf*8),
nn.ReLU(True),
nn.ConvTranspose2d(ngf*8,ngf*4,4,1,1,bias=False),
nn.BatchNorm2d(ngf*4),
nn.ReLU(True),
nn.ConvTranspose2d(ngf*4,ngf*2,4,2,1,bias=False),
nn.BatchNorm2d(ngf*2),
nn.ReLU(True),
nn.ConvTranspose2d(ngf*2,ngf,4,2,1,bias=False),
nn.BatchNorm2d(ngf),
nn.ReLU(True),
nn.ConvTranspose2d(ngf,nc,4,2,1,bias=False),
nn.Tanh())
def forward(self,x,y):
x = self.deconv_x(x)
y = self.deconv_y(y.unsqueeze(2).unsqueeze(3))
out = torch.cat([x,y],1)
output = self.model(out)
return output
class Discriminator(nn.Module):
'''判别器'''
def __init__(self):
super(Discriminator,self).__init__()
self.conv1_x = nn.Sequential(nn.Conv2d(nc, ndf//2, 4, 2, 1),nn.LeakyReLU(0.2,inplace=True))
self.conv1_y = nn.Sequential(nn.Conv2d(captcha_number*len(word2num), ndf//2, 4, 2, 1),nn.LeakyReLU(0.2,inplace=True))
self.model = nn.Sequential(
nn.Conv2d(ndf,ndf,4,2,1,bias=False),
nn.LeakyReLU(0.2,inplace=True),
nn.Conv2d(ndf,ndf*2,4,2,1,bias=False),
nn.BatchNorm2d(ndf*2),
nn.LeakyReLU(0.2,inplace=True),
nn.Conv2d(ndf*2,ndf*4,4,2,1,bias=False),
nn.BatchNorm2d(ndf*4),
nn.LeakyReLU(0.2,inplace=True),
nn.Conv2d(ndf*4,ndf*8,4,2,1,bias=False),
nn.BatchNorm2d(ndf*8),
nn.LeakyReLU(0.2,inplace=True),
nn.Conv2d(ndf*8,1,2,1,0,bias=False),
nn.Sigmoid())
def forward(self,x,y):
x = self.conv1_x(x)
y = self.conv1_y(y.view(x.size(0), captcha_number*len(word2num), 1, 1).expand(-1,-1,image_size,image_size))
out = torch.cat([x,y],1)
output = self.model(out)
return output
import numpy as np
import string
import os
from PIL import Image
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.autograd import Variable
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.utils import save_image, make_grid
import matplotlib.pyplot as plt
%matplotlib inline
device = 'cuda' if torch.cuda.is_available() else 'cpu'
image_path = 'images/train/'
latent_space_size = 100
nc = 3 # chanel of img
batch_size = 64
epochs = 100
learning_rate = 0.00001
beta1 = 0.5
workers = 2
def one_hot_encode(value):
order = []
shape = captcha_number * len(word2num)
vector = np.zeros(shape, dtype=float)
for k, v in enumerate(value):
index = k * len(word2num) + word2num.get(v)
vector[index] = 1.0
order.append(index)
return vector, order
def one_hot_decode(value):
res = []
for ik, iv in enumerate(value):
val = iv - ik * len(word2num) if ik else iv
for k, v in word2num.items():
if val == int(v):
res.append(k)
break
return "".join(res)
class ImageDataSet(Dataset):
def __init__(self, folder):
self.transform=transforms.Compose([
transforms.Resize((image_size,image_size)),
transforms.ToTensor(),
transforms.Normalize([0.5]*nc,[0.5]*nc)
])
self.images = [os.path.join(folder,i) for i in os.listdir(folder)]
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
image_path = self.images[idx]
captcha_str=image_path[-8:-4]
vector,order = one_hot_encode(captcha_str)
vector=torch.FloatTensor(vector)
image = self.transform(Image.open(image_path))
return image,vector,order
def loader(image_path,batch_size):
imgdataset=ImageDataSet(image_path)
return DataLoader(imgdataset,batch_size=batch_size,shuffle=True,num_workers=workers)
if __name__ == '__main__':
netd=Discriminator().to(device=device) # 生成器
netg=Generator().to(device=device) # 判别器
# Adam优化器
optimizerD = Adam(netd.parameters(),lr=learning_rate,betas=(beta1,0.999))
optimizerG = Adam(netg.parameters(),lr=learning_rate,betas=(beta1,0.999))
# BCELoss损失函数
criterion = nn.BCELoss().to(device=device)
# 生成一批固定的噪声z和字符标签,用于观看模型拟合的效果
fix_z = Variable(torch.FloatTensor(10,latent_space_size,1,1).normal_(0,1)).to(device=device)
fix_y=[]
random_strs=[np.random.choice(os.listdir(image_path))[:4] for _ in range(10)]
print(random_strs)
print()
for i in random_strs:
fix_y.append(one_hot_encode(i)[0])
fix_y=Variable(torch.FloatTensor(fix_y)).to(device=device)
G_LOSS=[]
D_LOSS=[]
dataloader=loader(image_path,batch_size)
for epoch in range(epochs):
mean_G=[]
mean_D=[]
for ii,(img,vector,order) in enumerate(dataloader):
img=Variable(img).to(device=device)
vector=Variable(vector).to(device=device)
is_real = Variable(torch.ones(img.size(0))).to(device=device) # 1 for real
is_fake = Variable(torch.zeros(img.size(0))).to(device=device) # 0 for fake
# 训练判别器
netd.zero_grad()
output=netd(img,vector)
errD_real = criterion(output.view(-1), is_real)
errD_real.backward()
z = Variable(torch.randn(img.size(0),latent_space_size,1,1).normal_(0,1)).to(device=device)
fake_pic=netg(z,vector).detach()
output=netd(fake_pic,vector)
errD_fake = criterion(output.view(-1), is_fake)
errD_fake.backward()
optimizerD.step()
# 训练生成器
netg.zero_grad()
fake_pic=netg(z,vector)
output=netd(fake_pic,vector)
errG = criterion(output.view(-1), is_real)
errG.backward()
optimizerG.step()
mean_G.append(errG.item())
mean_D.append(errD_real.item()+errD_fake.item())
print(f'epoch:{epoch} D_LOSS:{np.mean(mean_D)} G_LOSS:{np.mean(mean_G)}')
G_LOSS.append(np.mean(mean_G))
D_LOSS.append(np.mean(mean_D))
if epoch%20==0:
fake_u=netg(fix_z,fix_y)
imgs = make_grid(fake_u.data*0.5+0.5,nrow=5).cpu()
plt.imshow(imgs)
plt.show()
plt.plot(list(range(len(G_LOSS))),G_LOSS)
plt.plot(list(range(len(G_LOSS))),D_LOSS)
plt.show()
# 保存模型
torch.save(netd.state_dict(),'dcgan_netd.pth')
torch.save(netg.state_dict(),'dcgan_netg.pth')
大约三个小时左右,模型训练完成,来看看效果咋样,DCGAN模型生成的与真实的验证码图片比较如下:
DCGAN生成的验证码 | 真实的验证码 |
---|---|
效果其实还算不错,不过跟真实的相比还是有点差距的。
生成的验证码将满足两个条件:
import torch
from torch.autograd import Variable
from torchvision.utils import save_image, make_grid
from PIL import Image
from tqdm import tqdm
import os
import string
word2num={v:k for k,v in enumerate(list(string.digits+string.ascii_uppercase))}
image_path='images/train/'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
latent_space_size = 100
image_height=40
image_width=132
if not os.path.exists('生成的图片/')
os.makedirs('生成的图片/')
temp=list(word2num.keys())
all_a=[] # 储存所有可能的4字符验证码
for i in temp:
for j in temp:
for k in temp:
for l in temp:
what=i+j+k+l
all_a.append(what)
netd=Discriminator().to(device=device)
netg=Generator().to(device=device)
netd.load_state_dict(torch.load('dcgan_netd.pth'))
netg.load_state_dict(torch.load('dcgan_netg.pth'))
all_aa=[i[:4] for i in os.listdir(image_path)]
for a in tqdm(all_a):
z = Variable(torch.randn(1,latent_space_size,1,1).normal_(0,1)).to(device=device)
fake_pic=netg(z,Variable(torch.FloatTensor([one_hot_encode(a)[0]])).to(device=device))
score=netd(fake_pic,Variable(torch.FloatTensor([one_hot_encode(a)[0]])).to(device=device)).view(-1).data.cpu().numpy()[0]
if (score>0.95)&(a not in all_aa):
imgs = make_grid(fake_pic.data*0.5+0.5).cpu() # CHW
save_image(imgs,f'生成的图片/{a}.png')
imgs=Image.open(f'生成的图片/{a}.png')
imgs=transforms.Resize((image_height,image_width))(imgs)
imgs.save(f'生成的图片/{a}.png')
运行以上代码,三个小时左右便完成了验证码图片的生成,无意外的话生成的验证码有1w+以上
限于边幅这一步的代码就省略了吧,因为跟我的上一篇博客是基本一样的,只是训练集需要改用DCGAN生成的验证码,process_img
函数做二值化时的阈值改为120,epochs改为10,以及batch_size改为32。
以下为训练过程打印输出:
Train start
Iteration is 383
epoch:1, step:100, loss:0.11870528757572174
epoch:1, step:200, loss:0.09803573042154312
epoch:1, step:300, loss:0.07167644798755646
epoch:2, step:100, loss:0.060339584946632385
epoch:2, step:200, loss:0.0454578697681427
epoch:2, step:300, loss:0.045735735446214676
epoch:3, step:100, loss:0.03509911149740219
epoch:3, step:200, loss:0.03168116882443428
epoch:3, step:300, loss:0.03217519074678421
epoch:4, step:100, loss:0.029901988804340363
epoch:4, step:200, loss:0.032566048204898834
epoch:4, step:300, loss:0.028481818735599518
epoch:5, step:100, loss:0.022674065083265305
epoch:5, step:200, loss:0.019393315538764
epoch:5, step:300, loss:0.023355185985565186
epoch:6, step:100, loss:0.027277015149593353
epoch:6, step:200, loss:0.018431685864925385
epoch:6, step:300, loss:0.01690380461513996
epoch:7, step:100, loss:0.022878311574459076
epoch:7, step:200, loss:0.02011089399456978
epoch:7, step:300, loss:0.020655091851949692
epoch:8, step:100, loss:0.013621113263070583
epoch:8, step:200, loss:0.015619204379618168
epoch:8, step:300, loss:0.024786094203591347
epoch:9, step:100, loss:0.016219446435570717
epoch:9, step:200, loss:0.015738267451524734
epoch:9, step:300, loss:0.016928061842918396
epoch:10, step:100, loss:0.01601400598883629
epoch:10, step:200, loss:0.015124175697565079
epoch:10, step:300, loss:0.01665317639708519
Train done
这一步也就省略了吧,因为跟我的上一篇博客测试模型准确率时的代码完全一样的
以下为测试打印输出:
load cnn model
Fail, captcha:NTJI->NTJ1
Fail, captcha:E57N->E5Z6
Fail, captcha:BKI6->BKT6
Fail, captcha:U0IQ->UCIQ
Fail, captcha:GEQI->GEQ1
Fail, captcha:KIC4->K1C4
Fail, captcha:PCSO->PCS0
Fail, captcha:XW4O->XW40
Fail, captcha:TQXU->TQYU
Fail, captcha:4KCY->4K0Y
Fail, captcha:COG1->CCG1
Fail, captcha:CZX7->CZY7
Fail, captcha:Q508->Q5D8
Fail, captcha:79GR->798R
Fail, captcha:DNBT->DNBI
......
Fail, captcha:V043->VO43
Fail, captcha:G1XF->G1YF
完成。总预测图片数为1000张,准确率为81%
可以看到准确率为81%
以上即本篇的全部内容,主要核心代码全都在上面了,若想要完整的源代码,那就关注一下《Python王者之路》公众号,回复关键词:20210601
,即可获取源代码。
本文所参考的链接
时隔半年,我又重新捡起了我的CSDN,半年了,你知道我这半年是怎么过的吗。。。
一个原因是上班太忙了,根本没时间静下心来写写东西,虽然目前是双休,而且总是幻想着周末一定要好好学习一番,可真正到了周末才发现,还是床比较舒服一点~
另一个原因是有个卡点一直过不去,做不到完美,一开始我想的是只用500张真实的验证码图片,就能训练出一个能够输出逼真验证码图片的GAN模型,这样的话就可以完美地解决了所有字符型验证码了,然而想象很美好,现实却很骨感。。
不过呢,现在总算是完成了半年前就想写的这篇博客了,其实把这个探究的过程记录下来,成就感也是满满的,悬挂着的心也可以放松一下了,然后,继续奔着下一个目标前进!!
刚好,今天正好是六一,那就在这祝各位小朋友&大朋友们,节日快乐啦~