https://www.jianshu.com/writer#/notebooks/24210100/notes/30304041前一小章我们讲述了数据集的产生,这一章我将描述模型的训练过程。
由于数据多种多样(有二进制文件,文本文件,编码后的图像文件等),我们需要把数据转换为caffe可接受的LMDB或LEVELDB格式,然后再进行模型训练。
一、数据处理
这里我们要用到caffe编译后产生的工具包里面的convert_imageset.exe。在命令行的执行指令如下:
生成 chi_sim 的 train集
convert_imageset --shuffle --gray --resize_height=64 --resize_width=64 C:\myWorkspace\Pycharm\deepLearning_OCR-master\caffe_dataset\chi_sim\ C:\myWorkspace\Pycharm\deepLearning_OCR-master\caffe_dataset\chi_sim\train.txt C:\myWorkspace\Pycharm\deepLearning_OCR-master\caffe_dataset\chi_sim\fn_train_lmdb
生成chi_sim 的 test集
convert_imageset --shuffle --gray --resize_height=64 --resize_width=64 C:\myWorkspace\Pycharm\deepLearning_OCR-master\caffe_dataset\chi_sim\ C:\myWorkspace\Pycharm\deepLearning_OCR-master\caffe_dataset\chi_sim\test.txt C:\myWorkspace\Pycharm\deepLearning_OCR-master\caffe_dataset\chi_sim\fn_test_lmdb
在这里我们用到了四个参数:
1、图片格式参数
--shuffle:是否随机打乱图片顺序,默认为False
--gray:是否以灰度图的方式打开图片。程序调用opencv库中的imread()函数来打开图片,默认为false
--resize_height/--resize_width:改变图片的大小。在运行中,要求所有图片的尺寸一致,因此需要改变图片大小。 程序调用opencv库的resize()函数来对图片放大缩小,默认为0,不改变
2、图片存放的绝对路径
3、图片文件列表清单,一般为一个txt文件,一行一张图片
4、DB文件最终存放的路径
指令执行后,生成如图所示文件:
其中每个文件夹中各包含一个data和lock mdb文件。
同理,其它字体的训练集和测试集数据进行以上相似的数据处理。
二、模型的创建
caffe模型的训练需要三个配置文件:
1、lenet_solver.prototxt训练超参数文件,在这里我们定义了网络的基础学习率、冲量、全衰量以及最大迭代次数等。
#设置内存中模型训练时的超参数变量值。 solver.prototxt的主要作用就是交替调用前向算法和后向算法来更新参数,从而最小化loss,实际上就是一种迭代的优化算法。
# The train/test net protocol buffer definition
net: "E:/WorkSpace/Jupyter/DeepLearning/deepLearning_OCR-master/deepLearning_OCR-master/caffe_dataset/digits/lenet_train_test.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
#预测阶段迭代100次可以覆盖全部10000个测试集
test_iter: 100
# Carry out testing every 500 training iterations.
#训练每迭代500次,进行一次预测。
test_interval: 500
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01 #基础学习率
momentum: 0.9 #动量
weight_decay: 0.0005 #权重衰减
# The learning rate policy #采用衰减学习策略
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every 100 iterations
display: 100
# The maximum number of iterations #最大迭代次数
max_iter: 50000
# snapshot intermediate results
snapshot: 5000
snapshot_prefix: "E:/WorkSpace/Jupyter/DeepLearning/deepLearning_OCR-master/deepLearning_OCR-master/caffe_dataset/digits/lenet"
# solver mode: CPU or GPU #解算模式:CPU或GPU
solver_mode: CPU
2、lenet_train_test.prototxt网络配置文件,只在训练的时候使用,里面描述了各层网络结构。我们在经典的LeNet-5模型的基础上增加了两个卷积层和两个池化层,有利于特征捕获,同时降低维度。大大提高了模型的分类准确度。
name: "LeNet" #数据层
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN #这个层仅在train阶段
}
transform_param {
# 输入像素归一化到【0,1】 1/256=0.00390625
scale: 0.00390625
}
data_param {
source: "E:/WorkSpace/Jupyter/DeepLearning/deepLearning_OCR-master/deepLearning_OCR-master/caffe_dataset/digits/fn_train_lmdb"
batch_size: 64 #一次读取64张图
backend: LMDB
}
}
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST #这个层仅在test阶段
}
transform_param {
scale: 0.00390625
}
data_param {
source: "E:/WorkSpace/Jupyter/DeepLearning/deepLearning_OCR-master/deepLearning_OCR-master/caffe_dataset/digits/fn_test_lmdb"
batch_size: 10
#测试数据10张为一批,batchsize大小,乘以test_iter = 测试集大小
backend: LMDB
}
}
layer {
name: "conv11" #conv11(即产生图上 C1数据)层是一个卷积层
type: "Convolution"
bottom: "data"
top: "conv11"
param { #图层可学习参数的学习率调整
lr_mult: 1 #第一个表示权值的学习率
}
param {
lr_mult: 2 #第二个表示偏置项的学习率
}
convolution_param {
num_output: 64 #卷积核的个数64
pad: 5 #扩充边缘,默认为0,不扩充
kernel_size: 11
stride: 1 #卷积步长为1
weight_filler {
type: "xavier" #使用xavier算法初始化权值
}
bias_filler {
type: "constant" #偏置项的初始化。一般设置为”constant”, 值全为0
}
}
}
layer {
name: "pool11" #pool1(即产生S1数据)是一个降采样层
type: "Pooling"
bottom: "conv11"
top: "pool11"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv7"
type: "Convolution"
bottom: "pool11"
top: "conv7"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 128
pad: 3
kernel_size: 7
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool7"
type: "Pooling"
bottom: "conv7"
top: "pool7"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv5"
type: "Convolution"
bottom: "pool7"
top: "conv5"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "pool5"
top: "conv3"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "fc10000" #全连接层
type: "InnerProduct"
# learning rate and decay multipliers for the weights
#学习率和衰减倍数的权重
param { lr_mult: 1 }
# learning rate and decay multipliers for the biases
#学习率和衰减倍数的权重
param { lr_mult: 2 }
inner_product_param {
num_output: 10000
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
bottom: "pool3"
top: "fc10000"
}
layer {
name: "relu1" #激活函数层
type: "ReLU" #线性修正函数
bottom: "fc10000"
top: "fc10000"
}
layer {
name: "fc6503"
type: "InnerProduct"
# learning rate and decay multipliers for the weights
param { lr_mult: 1 }
# learning rate and decay multipliers for the biases
param { lr_mult: 2 }
inner_product_param {
num_output: 6503
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
bottom: "fc10000"
top: "fc6503"
}
layer {
#分类准确率层(计算网络输出相对目标值的准确率),只在testing阶段有效,因此需要加入include参数
name: "accuracy"
type: "Accuracy"
bottom: "fc6503"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
layer {
#损失层,损失函数采用softmaxloss它需要两个blob,第一个是预测,第二个是数据层提供的标签
name: "loss"
type: "SoftmaxWithLoss"
bottom: "fc6503"
bottom: "label"
top: "loss"
}
3、deploy_lenet_train_test.prototxt
deploy_lenet_train_test.prototxt文件的构造和lenet_train_test.prototxt文件的构造稍有不同,该文件里少了训练时用的部分,还少了损失层。
name: "LeNet"
layer {
name: "data"
type: "Input"
top: "data"
input_param { shape: { dim:1 dim: 1 dim: 64 dim: 64 } }
}
layer {
name: "conv11"
type: "Convolution"
bottom: "data"
top: "conv11"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 64
pad: 5
kernel_size: 11
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool11"
type: "Pooling"
bottom: "conv11"
top: "pool11"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv7"
type: "Convolution"
bottom: "pool11"
top: "conv7"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 128
pad: 3
kernel_size: 7
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool7"
type: "Pooling"
bottom: "conv7"
top: "pool7"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv5"
type: "Convolution"
bottom: "pool7"
top: "conv5"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "pool5"
top: "conv3"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "fc10000"
type: "InnerProduct"
# learning rate and decay multipliers for the weights
param { lr_mult: 1 }
# learning rate and decay multipliers for the biases
param { lr_mult: 2 }
inner_product_param {
num_output: 10000
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
bottom: "pool3"
top: "fc10000"
}
layer {
name: "relu1"
type: "ReLU"
bottom: "fc10000"
top: "fc10000"
}
layer {
name: "fc6503"
type: "InnerProduct"
# learning rate and decay multipliers for the weights
param { lr_mult: 1 }
# learning rate and decay multipliers for the biases
param { lr_mult: 2 }
inner_product_param {
num_output: 6503
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
bottom: "fc10000"
top: "fc6503"
}
layer {
name: "prob"
type: "Softmax"
bottom: "fc6503"
top: "prob"
}
三、模型训练
这三个文件准备好了,我们开始训练模型。在命令行执行以下指令:C:\myWorkspace\caffe_tool\caffemaster\Build\x64\Release\caffe train --solver=C:\myWorkspace\Pycharm\deepLearning_OCRmaster\caffe_dataset\chi_sim\lenet_solver.prototxt
模型开始训练,如图:
图中显示了模型的迭代次数,以及当前的损失度。网络每训练500次进行一次测试。
模型训练完成后生成如图模型文件:
之前我设置了每迭代5000次进行一次快照,以保持其训练状态。如果模型训练意外终止,可以通过solverstate文件继续训练。而caffemodel中则保存了模型训练过程中的各类参数,相当于后面图片分类时要用到的分类器。
至此,模型训练完成。