继续我们的百度paddle学习,这次我们我们的项目跟上次其实差别不大,不过这次的数据图片分辨率比较小,而且类别更多,我们我们接下来学习一下微调AlexNet,使得它更加适合我们的数据集训练
还是那句话深度学习不外乎四个步骤:
1. 数据标签处理
2. 构建网络模型
3. 规划网络超参
4. 训练评估模型
首先导入需要的库
import numpy as np
import paddle as paddle
import paddle.fluid as fluid
from PIL import Image
import cv2
import matplotlib.pyplot as plt
import os
from multiprocessing import cpu_count
from paddle.fluid.dygraph import Pool2D,Conv2D
from paddle.fluid.dygraph import Linear
paddle为大家准备的车牌数据集有65个类别,每个类别有500±张黑白图片,分辨率为20x20.数据集链接
首先是数据集处理
# 生成车牌字符图像列表
data_path = 'dataset'#此处为你的数据集解压路径
character_folders = os.listdir(data_path)
label = 0
LABEL_temp = {}
if(os.path.exists('./train_data.list')):
os.remove('./train_data.list')
if(os.path.exists('./test_data.list')):
os.remove('./test_data.list')
for character_folder in character_folders:
with open('./train_data.list', 'a') as f_train:
with open('./test_data.list', 'a') as f_test:
if character_folder == '.DS_Store' or character_folder == '.ipynb_checkpoints' or character_folder == 'data23617':
continue
print(character_folder + " " + str(label))
LABEL_temp[str(label)] = character_folder #存储一下标签的对应关系
character_imgs = os.listdir(os.path.join(data_path, character_folder))
for i in range(len(character_imgs)):
if i%10 == 0:
f_test.write(os.path.join(os.path.join(data_path, character_folder), character_imgs[i]) + "\t" + str(label) + '\n')
else:
f_train.write(os.path.join(os.path.join(data_path, character_folder), character_imgs[i]) + "\t" + str(label) + '\n')
label = label + 1
print('图像列表已生成')
讲图片路径及文件夹标签写到list文件方便读取,接下来使用paddle的reader模块制作训练集和测试集
# 用上一步生成的图像列表定义车牌字符训练集和测试集的reader
def data_mapper(sample):
img, label = sample
img = paddle.dataset.image.load_image(file=img, is_color=False)
img = img.flatten().astype('float32') / 255.0
return img, label
def data_reader(data_list_path):
def reader():
with open(data_list_path, 'r') as f:
lines = f.readlines()
for line in lines:
img, label = line.split('\t')
yield img, int(label)
return paddle.reader.xmap_readers(data_mapper, reader, cpu_count(), 1024)
# 用于训练的数据提供器
train_reader = paddle.batch(reader=paddle.reader.shuffle(reader=data_reader('./train_data.list'), buf_size=4096), batch_size=128)
# 用于测试的数据提供器
test_reader = paddle.batch(reader=data_reader('./test_data.list'), batch_size=128)
由于数据集较手势识别更加庞大,这里的我把buf_size设为4096,batch_size设置为128
这里我们以典型的AlexNet构建我们的神经网络结构,并进行微调
构建代码如下:
#定义CNN网络
class AlexNet(fluid.dygraph.Layer):
def __init__(self, name_scope, num_classes=65):
super(AlexNet, self).__init__(name_scope)
name_scope = self.full_name()
#在conv1中我将filter_size改成了3,stride改为了2,padding改为2,目的是使得图片卷积后尺度不变,(20+2*2(padding参数)-3(filter_size))/2(stride)+1 = 11(10.5+1向下取整)其他地方对照conv1
self.conv1 = Conv2D(num_channels=1, num_filters=96, filter_size=3, stride=2, padding=2, act='relu')
self.pool1 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
self.conv2 = Conv2D(num_channels=96, num_filters=256, filter_size=3, stride=1, padding=2, act='relu')
self.pool2 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
self.conv3 = Conv2D(num_channels=256, num_filters=384, filter_size=3, stride=1, padding=2, act='relu')
self.conv4 = Conv2D(num_channels=384, num_filters=384, filter_size=3, stride=1, padding=2, act='relu')
self.conv5 = Conv2D(num_channels=384, num_filters=256, filter_size=3, stride=1, padding=2, act='relu')
self.pool5 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
self.fc1 = Linear(input_dim=4096, output_dim=4096, act='relu')
self.drop_ratio1 = 0.5
self.fc2 = Linear(input_dim=4096, output_dim=4096, act='relu')
self.drop_ratio2 = 0.5
self.fc3 = Linear(input_dim=4096, output_dim=num_classes)
def forward(self, x):
x = self.conv1(x)
x = self.pool1(x)
x = self.conv2(x)
x = self.pool2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
x = self.pool5(x)
x = fluid.layers.reshape(x, [x.shape[0], -1])
x = self.fc1(x)
# 在全连接之后使用dropout抑制过拟合
x= fluid.layers.dropout(x, self.drop_ratio1)
x = self.fc2(x)
# 在全连接之后使用dropout抑制过拟合
x = fluid.layers.dropout(x, self.drop_ratio2)
x = self.fc3(x)
return x
这里我们的优化器是
MomentumOptimizer
,损失函数是
softmax_with_cross_entropy
训练了100轮次
with fluid.dygraph.guard():
#model=MyLeNet() #模型实例化
model = AlexNet('AlexNet')
model.train() #训练模式
opt = fluid.optimizer.MomentumOptimizer(learning_rate=0.01,momentum=0.9, parameter_list=model.parameters())
#opt=fluid.optimizer.AdamOptimizer(learning_rate=0.001, parameter_list=model.parameters())#优化器选用SGD随机梯度下降,学习率为0.001.
epochs_num= 100#迭代次数为2
for pass_num in range(epochs_num):
for batch_id,data in enumerate(train_reader()):
images=np.array([x[0].reshape(1,20,20) for x in data],np.float32)
labels = np.array([x[1] for x in data]).astype('int64')
labels = labels[:, np.newaxis]
image=fluid.dygraph.to_variable(images)
label=fluid.dygraph.to_variable(labels)
predict=model(image)#预测
loss=fluid.layers.softmax_with_cross_entropy(predict,label)
avg_loss=fluid.layers.mean(loss)#获取loss值
acc=fluid.layers.accuracy(predict,label)#计算精度
if batch_id!=0 and batch_id%100==0:
print("train_pass:{},batch_id:{},train_loss:{},train_acc:{}".format(pass_num,batch_id,avg_loss.numpy(),acc.numpy()))
avg_loss.backward()
opt.minimize(avg_loss)
model.clear_gradients()
fluid.save_dygraph(model.state_dict(),'AlexNet')#保存模型
#模型校验
with fluid.dygraph.guard():
accs = []
#model=MyLeNet()#模型实例化
model = AlexNet('AlexNet')
model_dict,_=fluid.load_dygraph('AlexNet')
model.load_dict(model_dict)#加载模型参数
model.eval()#评估模式
for batch_id,data in enumerate(test_reader()):#测试集
images=np.array([x[0].reshape(1,20,20) for x in data],np.float32)
labels = np.array([x[1] for x in data]).astype('int64')
labels = labels[:, np.newaxis]
image=fluid.dygraph.to_variable(images)
label=fluid.dygraph.to_variable(labels)
predict=model(image)#预测
acc=fluid.layers.accuracy(predict,label)
accs.append(acc.numpy()[0])
avg_acc = np.mean(accs)
print(avg_acc)
到此整个训练过程就结束了~
我训练了100个epoch的结果是97%,大家可以尝试着更高的精度,比如换不同的优化器,多训练几个epoch,增大图像分辨率等等
接下来是对车牌图片的分割和标签的对应,我就不解释了直接贴代码
# 对车牌图片进行处理,分割出车牌中的每一个字符并保存
license_plate = cv2.imread('./车牌.png')
gray_plate = cv2.cvtColor(license_plate, cv2.COLOR_RGB2GRAY)
ret, binary_plate = cv2.threshold(gray_plate, 175, 255, cv2.THRESH_BINARY)
result = []
for col in range(binary_plate.shape[1]):
result.append(0)
for row in range(binary_plate.shape[0]):
result[col] = result[col] + binary_plate[row][col]/255
character_dict = {}
num = 0
i = 0
while i < len(result):
if result[i] == 0:
i += 1
else:
index = i + 1
while result[index] != 0:
index += 1
character_dict[num] = [i, index-1]
num += 1
i = index
for i in range(8):
if i==2:
continue
padding = (170 - (character_dict[i][1] - character_dict[i][0])) / 2
ndarray = np.pad(binary_plate[:,character_dict[i][0]:character_dict[i][1]], ((0,0), (int(padding), int(padding))), 'constant', constant_values=(0,0))
ndarray = cv2.resize(ndarray, (20,20))
cv2.imwrite('./' + str(i) + '.png', ndarray)
def load_image(path):
img = paddle.dataset.image.load_image(file=path, is_color=False)
img = img.astype('float32')
img = img[np.newaxis, ] / 255.0
return img
#将标签进行转换
print('Label:',LABEL_temp)
match = {'A':'A','B':'B','C':'C','D':'D','E':'E','F':'F','G':'G','H':'H','I':'I','J':'J','K':'K','L':'L','M':'M','N':'N',
'O':'O','P':'P','Q':'Q','R':'R','S':'S','T':'T','U':'U','V':'V','W':'W','X':'X','Y':'Y','Z':'Z',
'yun':'云','cuan':'川','hei':'黑','zhe':'浙','ning':'宁','jin':'津','gan':'赣','hu':'沪','liao':'辽','jl':'吉','qing':'青','zang':'藏',
'e1':'鄂','meng':'蒙','gan1':'甘','qiong':'琼','shan':'陕','min':'闽','su':'苏','xin':'新','wan':'皖','jing':'京','xiang':'湘','gui':'贵',
'yu1':'渝','yu':'豫','ji':'冀','yue':'粤','gui1':'桂','sx':'晋','lu':'鲁',
'0':'0','1':'1','2':'2','3':'3','4':'4','5':'5','6':'6','7':'7','8':'8','9':'9'}
L = 0
LABEL ={}
for V in LABEL_temp.values():
LABEL[str(L)] = match[V]
L += 1
print(LABEL)
#构建预测动态图过程
with fluid.dygraph.guard():
#model=MyLeNet()#模型实例化
model = AlexNet('AlexNet')
model_dict,_=fluid.load_dygraph('AlexNet')
model.load_dict(model_dict)#加载模型参数
model.eval()#评估模式
lab=[]
for i in range(8):
if i==2:
continue
infer_imgs = []
infer_imgs.append(load_image('./' + str(i) + '.png'))
infer_imgs = np.array(infer_imgs)
infer_imgs = fluid.dygraph.to_variable(infer_imgs)
result=model(infer_imgs)
lab.append(np.argmax(result.numpy()))
# print(lab)
display(Image.open('./车牌.png'))
print('\n车牌识别结果为:',end='')
for i in range(len(lab)):
print(LABEL[str(lab[i])],end='')
这次训练在有上次手势识别的基础上学习不难,主要还是自己调参遇到的困难居多,比如明明训练测试效果都不错,偏偏图片预测就不准。。。。。。
总之python对于模块的封装效果还是不错的,整个训练过程相对之前也就稍稍微调了一下网络模型,整体代码差异不大。
继续加油 fight!fight!fight!