一、 数据准备,生成lmdb文件
方案一:直接通过cmd执行以下命令:
convert_imageset.exe --resize_height=240 --resize_width=240 --shuffle --backend="lmdb" G:\ G:\caffe_python\label\label_train.txt G:\caffe_python\label\train_lmdb
convert_imageset.exe 由caffe编译时得到,路径一般为:\Build\x64\Release。resize_height & resize_width对原始图片resize到统一大小。G:\为训练图片的路径。label_train.txt为训练图片的标注数据,格式:filename label。最后生成的lmdb文件为G:\caffe_python\label\train_lmdb。同理可生成验证集lmdb文件。
方案二:通过Python脚本生成lmdb文件
# -*- coding: utf-8 -*-
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
sys.path.insert(0, 'C:/Anaconda3/envs/py27/Lib/site-packages/pycaffe')
import caffe
import lmdb
import random
import cv2
import numpy as np
from caffe.proto import caffe_pb2
from sklearn.model_selection import train_test_split
from Bconfig import config
def get_dataset(label_dir):
with open(label_dir, 'r') as f:
annotations = f.readlines()
random.shuffle(annotations)
dataset = []
for annotation in annotations:
annotation = annotation.strip().split(' ')
data_example = dict()
data_example['filename'] = annotation[0]
data_example['label'] = int(annotation[1])
dataset.append(data_example)
return dataset
if __name__ == '__main__':
train_image_path = config.train_image_path
train_list_path = config.train_list_path
batch_size = config.BATCH_SIZE
patchSize = config.PatchSize
lmdb_file = config.trainlmdb_file #存放lmdb数据的目录
dataset = get_dataset(train_list_path)
trainDataset, testDataset = train_test_split(dataset, test_size=0.1, random_state=1)
fw = open(config.lmdb_record, 'w')
#打开lmdb环境,生成一个数据文件,定义最大空间1e12
lmdb_env = lmdb.open(lmdb_file, map_size=int(1e12))
lmdb_txn = lmdb_env.begin(write=True) #创建操作数据库句柄
datum = caffe_pb2.Datum()
for idx, image_example in enumerate(trainDataset):
filename = image_example['filename']
label = image_example['label']
image = cv2.imdecode(np.fromfile(filename, dtype=np.uint8),-1)
resizeImg = cv2.resize(image, (patchSize, patchSize))
#save in datum
datum = caffe.io.array_to_datum(resizeImg, label) #lmdb每一个数据都由键值对构成
key_str = '{:0>8d}'.format(idx) #生成一个用递增顺序排列的定长唯一的key
lmdb_txn.put(key_str.encode(), datum.SerializeToString()) #调用句柄,写入内存
print('{:0>8d}'.format(idx) + ':' + filename)
# write batch
if (idx+1) % batch_size == 0:
lmdb_txn.commit()
lmdb_txn = lmdb_env.begin(write=True)
print(idx + 1)
# write last batch
if (idx+1) % batch_size != 0:
lmdb_txn.commit()
print('last batch')
print(idx + 1)
lmdb_env.close() #结束后释放资源
lmdb_env1 = lmdb.open(config.vallmdb_file, map_size=int(1e10))
lmdb_txn1 = lmdb_env1.begin(write=True) #创建操作数据库句柄
datum = caffe_pb2.Datum()
for idt, image_example in enumerate(testDataset):
filename = image_example['filename']
label = image_example['label']
image = cv2.imdecode(np.fromfile(filename, dtype=np.uint8),-1)
resizeImg = cv2.resize(image, (patchSize, patchSize))
#save in datum
datum = caffe.io.array_to_datum(resizeImg, label) #lmdb每一个数据都由键值对构成
key_str = '{:0>8d}'.format(idt) #生成一个用递增顺序排列的定长唯一的key
lmdb_txn1.put(key_str.encode(), datum.SerializeToString()) #调用句柄,写入内存
print('{:0>8d}'.format(idt) + ':' + filename)
# write batch
if (idt+1) % batch_size == 0:
lmdb_txn1.commit()
lmdb_txn1 = lmdb_env1.begin(write=True)
print(idt + 1)
# write last batch
if (idt+1) % batch_size != 0:
lmdb_txn1.commit()
print('last batch')
print(idt + 1)
lmdb_env1.close() #结束后释放资源
fw.write('trainDataset size: %d, testDataset size: %d' %(idx+1, idt+1))
fw.close()
二、计算训练数据均值
compute_image_mean --backend="lmdb" train_lmdb mean.binaryproto
compute_image_mean.exe 由caffe编译时得到,只需计算训练集的均值。
同理也可由Python脚本生成:
import sys
sys.path.insert(0, 'C:/Anaconda3/envs/py27/Lib/site-packages/pycaffe')
import caffe
import numpy as np
from Bconfig import config
blob = caffe.proto.caffe_pb2.BlobProto()
bin_mean = open(config.mean_binary, 'rb').read()
blob.ParseFromString(bin_mean)
arr = np.array(caffe.io.blobproto_to_array(blob))
npy_mean = arr[0]
np.save(config.mean_npy, npy_mean)
以上生成的是逐像素均值,均值文件大小与图像维度保持一致,图片维度为M*N*C,则均值文件维度为M*N*C。
caffe还可以使用逐通道减均值,每个通道的均值为一个数,直接作为参数在prototxt文件中指定即可,如下图
layer {
name: "InputData"
type: "Data"
top: "data"
top: "label"
transform_param {
mirror: false
crop_size: 240
mean_value: 78.3
mean_value: 76.7
mean_value: 73.2
}
data_param {
...
}
}
关于通道均值的计算,在compute_image_mean.cpp中,保存的是像素均值,同时最后会输出channel mean:
for (int c = 0; c < channels; ++c) {
for (int i = 0; i < dim; ++i) {
mean_values[c] += sum_blob.data(dim * c + i);
}
LOG(INFO) << "mean_value channel [" << c << "]: " << mean_values[c] / dim;
}
三、搭建深度学习网络结构
1.卷积层:
from caffe import layers as L
n = caffe.NetSpec()
n.data, n.label = L.Data(source=lmdb, name='InputData', backend=P.Data.LMDB,
batch_size=batch_size, ntop=2,
transform_param=dict(crop_size=240, mean_file=mean_file, mirror=False))
n.conv1 = L.Convolution(n.data, kernel_size=3, stride=1, pad=1, num_output=32,
weight_filler=dict(type='xavier'),
bias_term=True,
bias_filler=dict(type='constant'),
name='conv')
2.激励层ReLU:
n.relu = L.ReLU(n.conv, in_place=True, name='conv_relu')
in_place字段为True,表示其top和bottom是一样的情况。
3.池化层:
n.pool_max = L.Pooling(n.relu1, pool=P.Pooling.MAX, kernel_size=4, stride=3, pad=0,
name='pool_max')
n.pool_ave = L.Pooling(n.conv_out, pool=P.Pooling.AVE, kernel_size=12, stride=1, pad=0,
name='pool_ave')
n.global_ave = L.Pooling(n.dense3, pool=P.Pooling.AVE, global_pooling=True,
name='pool_global_ave')
caffe中卷积层输出的feature map尺寸计算方式为:
out_size = (in_size + 2*pad - kernel_size) / stride +1
池化层输出的feature map尺寸计算方式为:
out_size = ceil [ (in_size + 2*pad - kernel_size) / stride ]+1
其计算方式与tensorflow略有不同,caffe中卷积和池化层的输出尺寸计算方式也不一样。详细可查阅caffe源码“caffe/layers/conv_layer.cpp” 和“caffe/layers/pooling_layer.cpp”
4.BatchNorm层:
n.bachnorm = L.BatchNorm(n.pool1, include=dict(phase=caffe.TRAIN), in_place=True,
batch_norm_param=dict(moving_average_fraction=0.9), name='bn')
n.scale_bn=L.Scale(n.bachnorm1, scale_param=dict(bias_term=True), in_place=True, name='bn_scale')
5.全连接层(fully_connected):
n.innerP = L.InnerProduct(n.scale_bn, num_output=class_num,
weight_filler=dict(type='xavier'),
bias_filler=dict(type='constant',value=0),
name='inner_product')
6.Softmax层:
n.loss = L.SoftmaxWithLoss(n.innerP, n.label)
7.Accuracy层:
n.acc = L.Accuracy(n.innerP, n.label, accuracy_param=dict(top_k=1))
将网络结构写入到prototxt文件,即caffe格式的网络结构搭建文件。将生成好的.prototxt文件拷入到http://ethereon.github.io/netscope/#/editor,即可查看网络结构图。
数据输入网络时,L.Data输入参数即为之前生成的lmdb格式数据,若要对图片采取一些额外的在线数据增强,或针对已生成的lmdb数据改变增强方式,可在data_layer.cpp中修改源码(位于caffe目录下.\src\caffe\layers),针对每一个batch读入的数据进行在线数据增强等处理。具体修改方式可见https://blog.csdn.net/qq295456059/article/details/53494612。
四、Solver文件
利用Solver文件配置相关训练及测试参数。
from caffe.proto import caffe_pb2
from Bconfig import config
sp = caffe_pb2.SolverParameter()
solver_file = config.solver_file #solver文件保存位置
sp.train_net = config.train_proto #上一环节得到的prototxt文件
sp.test_net.append(config.val_proto)
sp.test_interval = 1405 #测试间隔
sp.test_iter.append(157) # 测试迭代次数
sp.max_iter = 210750 #最大迭代次数
sp.base_lr = 0.001 #基础学习率
sp.momentum = 0.9 #momentum系数
sp.weight_decay = 5e-4 #权值衰减系数
sp.lr_policy = 'step' #学习率衰减方法
sp.stepsize = 70250
sp.gamma = 0.1 #学习率衰减指数
sp.display = 1405
sp.snapshot = 1405
sp.snapshot_prefix = './model/BeltClassify' #保存model前半部分
sp.type = "SGD" #优化算法
sp.solver_mode = caffe_pb2.SolverParameter.GPU
with open(solver_file, 'w') as f:
f.write(str(sp))
五、训练模型
import caffe
caffe.set_device(0)
caffe.set_mode_gpu()
solver = caffe.SGDSolver('G:/Belt_CaffeP/prototxt/solver.prototxt')
test_iter = 157
test_interval = 1405
epoch_num = 150
#solver.net.forward()
#solver.solve()
#iter = solver.iter
for i in range(epoch_num):
for j in range(test_interval):
solver.step(1) #单步训练更新参数
loss_train = solver.net.blobs['loss'].data
acc_train = solver.net.blobs['acc'].data
print('epoch %d %d/%d: loss_train: %.4f, accuracy_train: %.4f' %(i, j, test_interval, loss_train, acc_train))
for test_i in range(test_iter):
solver.test_nets[0].forward() #test net
loss_test = solver.test_nets[0].blobs['loss'].data
acc_test = solver.test_nets[0].blobs['acc'].data
print('epoch %d %d/%d: loss: %.4f, accuracy: %.4f' %(i, test_i, test_iter, loss_test, acc_test))
f.close()
以上可得到每一轮迭代时训练集和测试集的loss及accuracy。
六、测试图片
方法一:直接调用opencv 读入的图像
net = caffe.Net(prototxt, model, caffe.TEST)
img_bgr = cv2.imdecode(np.fromfile('XXX.jpg', dtype=np.uint8), -1)
image = img_bgr - mean #减均值操作
input_img = image.transpose((2,0,1))
net.blobs['data'].data[...] = input_img
output = net.forward()
prob = output['prob'][0] #'prob'为prototxt文件中最后一层的输出量
label_pre = prob.argsort()[-1]
方法二:caffe加载图像
net = caffe.Net(prototxt, model, caffe.TEST)
image = caffe.io.load_image('XXX.jpg')
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1)) # H*W*C --> C*H*W
transformer.set_raw_scale('data', 255)
transformer.set_mean('data', np.array([mean1, mean2, mean3]))
transformer.set_channel_swap('data', (2,1,0))
net.blobs['data'].data[...] = transformer.preprocess('data',img)
output = net.forward()
prob = output['prob'][0]
label_pre = np.argsort(-prob)[0]
caffe.io.load_image加载的图片为RGB格式,0~1(float),而caffe中图像为BGR格式,图像存储范围[0, 255],因此需转换维度空间,以及取值缩放到0~255。transformer中不考虑设置的图像变换顺序,transformer.preprocess 函数中写明了(1)set_transpose (2)channel_swap (3)raw_scale (4)减mean。
而opencv 读取的图像即为BGR格式,范围为0~255,无需做变换。