用lmdb训练的模型准确率很低,可能是lmdb出了问题,这里是直接使用图片进行训练的%99准确率
生成配置文件的时候特别要注意路径问题
下面是我的几个文件夹的位置关系和路径
wang是放着所有的数据和配置文件和脚本
wang/mnist是存放训练和测试数据两个文件夹
wang/mnist/train:训练数据
wang/mnist/test : 测试数据
配置文件和文件列表都在mnist文件夹下面
下面是脚本和数据和配置文件的目录结构图
转换数据图像为lmdb格式
生成图片文件列表txt
# 生成图片列表的txt文件,image_path:图像文件夹的位置
# save_path:保存图像列表txt文件的位置
import os
def create_flist(image_path, save_path):
with open(save_path, 'a') as f:
label_list = os.listdir(image_path)
for label in label_list:
filenames = os.listdir(image_path+"/"+label)
for fname in filenames:
f.write(label+'/'+fname+' '+label+'\n')
create_flist('mnist/train', 'mnist/train.txt')
create_flist('mnist/test', 'mnist/test.txt')
生成lmdb
caffe/bulid/tools/convert_imageset转换图像,需要四个参数
”’
-gray: 是否以灰度图的方式打开,调用opencv库中的imread()函数,默认为false
-shuffle: 是否随机打乱图片顺序。默认为false
-backend:需要转换成的db文件格式,可选为leveldb或lmdb,默认为lmdb
-resize_width/resize_height: 改变图片的大小。调用opencv库的resize()
-check_size: 检查所有的数据是否有相同的尺寸。默认为false,不检查
-encoded: 是否将原图片编码放入最终的数据中,默认为false
-encode_type: 与前一个参数对应,将图片编码为哪一个格式:‘png’,’jpg’……
”’
import commands
# lmdb_name:生成的db文件的名字
# images_path:图片文件的路径(相对的)
# txt_save_path:图片列表文件的位置
def create_db(caffe_root, images_path, txt_save_path, lmdb_name):
#生成的db文件的保存目录
lmdb_save_path = '/home/jinghui/wang/mnist/' + lmdb_name
#convert_imageset工具路径
convert_imageset_path = caffe_root + 'build/tools/convert_imageset'
cmd = """%s --shuffle --resize_height=256 --resize_width=256 %s %s %s"""
status, output = commands.getstatusoutput(cmd % (convert_imageset_path, images_path,
txt_save_path, lmdb_save_path))
print output
if(status == 0):
print "lmbd文件生成成功"
# create train.lmdb test.lmdb
caffe_root = '/home/jinghui/caffe/'
# ipynb跟mnist实在同一文件夹下的,
# 文件路径和图片列表路径使用相对路径也可以
create_db(caffe_root, 'mnist/train/','mnist/train.txt','train.lmdb')
I0314 04:55:31.386880 2936 convert_imageset.cpp:86] Shuffling data
I0314 04:55:32.726089 2936 convert_imageset.cpp:89] A total of 60000 images.
I0314 04:55:32.726358 2936 db_lmdb.cpp:35] Opened lmdb /home/hao/wang/mnist/train.lmdb
I0314 04:55:36.342209 2936 convert_imageset.cpp:147] Processed 1000 files.
I0314 04:55:38.617094 2936 convert_imageset.cpp:147] Processed 2000 files.
I0314 04:55:40.841969 2936 convert_imageset.cpp:147] Processed 3000 files.
I0314 04:55:43.125208 2936 convert_imageset.cpp:147] Processed 4000 files.
I0314 04:55:45.316761 2936 convert_imageset.cpp:147] Processed 5000 files.
I0314 04:55:47.574946 2936 convert_imageset.cpp:147] Processed 6000 files.
I0314 04:55:49.799901 2936 convert_imageset.cpp:147] Processed 7000 files.
I0314 04:55:51.999716 2936 convert_imageset.cpp:147] Processed 8000 files.
I0314 04:55:54.232971 2936 convert_imageset.cpp:147] Processed 9000 files.
I0314 04:55:56.441159 2936 convert_imageset.cpp:147] Processed 10000 files.
I0314 04:55:58.674425 2936 convert_imageset.cpp:147] Processed 11000 files.
I0314 04:56:00.949287 2936 convert_imageset.cpp:147] Processed 12000 files.
I0314 04:56:03.115809 2936 convert_imageset.cpp:147] Processed 13000 files.
I0314 04:56:05.340706 2936 convert_imageset.cpp:147] Processed 14000 files.
I0314 04:56:07.615679 2936 convert_imageset.cpp:147] Processed 15000 files.
I0314 04:56:09.882153 2936 convert_imageset.cpp:147] Processed 16000 files.
I0314 04:56:12.082021 2936 convert_imageset.cpp:147] Processed 17000 files.
I0314 04:56:14.173527 2936 convert_imageset.cpp:147] Processed 18000 files.
I0314 04:56:16.390081 2936 convert_imageset.cpp:147] Processed 19000 files.
I0314 04:56:18.531627 2936 convert_imageset.cpp:147] Processed 20000 files.
I0314 04:56:20.773252 2936 convert_imageset.cpp:147] Processed 21000 files.
I0314 04:56:22.973078 2936 convert_imageset.cpp:147] Processed 22000 files.
I0314 04:56:25.123054 2936 convert_imageset.cpp:147] Processed 23000 files.
I0314 04:56:27.297933 2936 convert_imageset.cpp:147] Processed 24000 files.
I0314 04:56:29.564419 2936 convert_imageset.cpp:147] Processed 25000 files.
I0314 04:56:31.739270 2936 convert_imageset.cpp:147] Processed 26000 files.
I0314 04:56:33.914283 2936 convert_imageset.cpp:147] Processed 27000 files.
I0314 04:56:36.239109 2936 convert_imageset.cpp:147] Processed 28000 files.
I0314 04:56:38.555645 2936 convert_imageset.cpp:147] Processed 29000 files.
I0314 04:56:40.872205 2936 convert_imageset.cpp:147] Processed 30000 files.
I0314 04:56:43.088791 2936 convert_imageset.cpp:147] Processed 31000 files.
I0314 04:56:45.196931 2936 convert_imageset.cpp:147] Processed 32000 files.
I0314 04:56:47.371914 2936 convert_imageset.cpp:147] Processed 33000 files.
I0314 04:56:49.480028 2936 convert_imageset.cpp:147] Processed 34000 files.
I0314 04:56:51.580004 2936 convert_imageset.cpp:147] Processed 35000 files.
I0314 04:56:53.871465 2936 convert_imageset.cpp:147] Processed 36000 files.
I0314 04:56:56.154773 2936 convert_imageset.cpp:147] Processed 37000 files.
I0314 04:56:58.404657 2936 convert_imageset.cpp:147] Processed 38000 files.
I0314 04:57:00.712838 2936 convert_imageset.cpp:147] Processed 39000 files.
I0314 04:57:02.929388 2936 convert_imageset.cpp:147] Processed 40000 files.
I0314 04:57:05.179281 2936 convert_imageset.cpp:147] Processed 41000 files.
I0314 04:57:07.245843 2936 convert_imageset.cpp:147] Processed 42000 files.
I0314 04:57:09.520730 2936 convert_imageset.cpp:147] Processed 43000 files.
I0314 04:57:11.720535 2936 convert_imageset.cpp:147] Processed 44000 files.
I0314 04:57:13.895457 2936 convert_imageset.cpp:147] Processed 45000 files.
I0314 04:57:16.228660 2936 convert_imageset.cpp:147] Processed 46000 files.
I0314 04:57:18.436964 2936 convert_imageset.cpp:147] Processed 47000 files.
I0314 04:57:20.536757 2936 convert_imageset.cpp:147] Processed 48000 files.
I0314 04:57:22.695047 2936 convert_imageset.cpp:147] Processed 49000 files.
I0314 04:57:24.928246 2936 convert_imageset.cpp:147] Processed 50000 files.
I0314 04:57:27.028134 2936 convert_imageset.cpp:147] Processed 51000 files.
I0314 04:57:29.111331 2936 convert_imageset.cpp:147] Processed 52000 files.
I0314 04:57:31.302942 2936 convert_imageset.cpp:147] Processed 53000 files.
I0314 04:57:33.436180 2936 convert_imageset.cpp:147] Processed 54000 files.
I0314 04:57:35.660996 2936 convert_imageset.cpp:147] Processed 55000 files.
I0314 04:57:37.827560 2936 convert_imageset.cpp:147] Processed 56000 files.
I0314 04:57:40.077458 2936 convert_imageset.cpp:147] Processed 57000 files.
I0314 04:57:42.235694 2936 convert_imageset.cpp:147] Processed 58000 files.
I0314 04:57:44.560613 2936 convert_imageset.cpp:147] Processed 59000 files.
I0314 04:57:46.760392 2936 convert_imageset.cpp:147] Processed 60000 files.
lmbd文件生成成功
create_db(caffe_root , 'mnist/test/','mnist/test.txt','test.lmdb')
I0314 05:02:23.772184 2951 convert_imageset.cpp:86] Shuffling data
I0314 05:02:25.104369 2951 convert_imageset.cpp:89] A total of 10000 images.
I0314 05:02:25.104758 2951 db_lmdb.cpp:35] Opened lmdb /home/jinghui/wang/mnist/test.lmdb
I0314 05:02:28.693987 2951 convert_imageset.cpp:147] Processed 1000 files.
I0314 05:02:30.643874 2951 convert_imageset.cpp:147] Processed 2000 files.
I0314 05:02:32.718792 2951 convert_imageset.cpp:147] Processed 3000 files.
I0314 05:02:34.635351 2951 convert_imageset.cpp:147] Processed 4000 files.
I0314 05:02:36.643580 2951 convert_imageset.cpp:147] Processed 5000 files.
I0314 05:02:38.610074 2951 convert_imageset.cpp:147] Processed 6000 files.
I0314 05:02:40.660053 2951 convert_imageset.cpp:147] Processed 7000 files.
I0314 05:02:42.659940 2951 convert_imageset.cpp:147] Processed 8000 files.
I0314 05:02:44.843329 2951 convert_imageset.cpp:147] Processed 9000 files.
I0314 05:02:46.934710 2951 convert_imageset.cpp:147] Processed 10000 files.
lmbd文件生成成功
计算图片数据的均值文件.binaryproto
def create_meandb(caffe_tools, train_lmdb, save_meandb):
compute_image_mean_path = caffe_tools+ 'compute_image_mean'
cmd = """%s %s %s """
status, output = commands.getstatusoutput(cmd%(compute_image_mean_path,
train_lmdb, save_meandb))
print output
if (status==0):
print "train mean lmdb 生成完成"
create_meandb('/home/jinhui/caffe/build/tools/','mnist/train.lmdb','mnist/train.binaryproto')
I0314 06:19:05.025748 3129 db_lmdb.cpp:35] Opened lmdb mnist/train.lmdb
I0314 06:19:05.034680 3129 compute_image_mean.cpp:70] Starting iteration
I0314 06:19:16.149374 3129 compute_image_mean.cpp:95] Processed 10000 files.
I0314 06:19:27.005234 3129 compute_image_mean.cpp:95] Processed 20000 files.
I0314 06:19:37.893340 3129 compute_image_mean.cpp:95] Processed 30000 files.
I0314 06:19:41.742666 3129 compute_image_mean.cpp:95] Processed 40000 files.
I0314 06:19:43.716142 3129 compute_image_mean.cpp:95] Processed 50000 files.
I0314 06:19:45.689353 3129 compute_image_mean.cpp:95] Processed 60000 files.
I0314 06:19:45.689848 3129 compute_image_mean.cpp:108] Write to mnist/train.binaryproto
I0314 06:19:45.712528 3129 compute_image_mean.cpp:114] Number of channels: 3
I0314 06:19:45.712604 3129 compute_image_mean.cpp:119] mean_value channel [0]: 221.659
I0314 06:19:45.712698 3129 compute_image_mean.cpp:119] mean_value channel [1]: 221.659
I0314 06:19:45.712772 3129 compute_image_mean.cpp:119] mean_value channel [2]: 221.659
train mean lmdb 生成完成
生成训练/测试配置文件
from caffe import layers as L,params as P, to_proto
# 训练测试数据LMDB文件位置
train_lmdb = 'mnist/train.lmdb'
test_lmdb = 'mnist/test.lmdb'
# 训练测试的配置文件保存位置位置
train_proto = 'mnist/train.prototxt'
test_proto = 'mnist/test.prototxt'
# 训练文件均值文件的位置
train_mean = 'mnist/train.binaryproto'
# lmdb:lmdb文件的位置,batch_size:每次批训练图片的数量,include_acc:Fasle不计算准确率
def create_net(lmdb,batch_size, include_acc=False):
# 输入层:同时传递,图片数据和标签,transform_param:对图像进行增强处理
data, label = L.Data(source=lmdb, backend=P.Data.LMDB, batch_size=batch_size,ntop=2,
transform_param=dict(crop_size=40,mean_file=train_mean,mirror=True))
# 卷积层
# data:图片的像素,kernelsize=卷积核的大小即:5x5,
# siride:卷积核滑动的步长
# num_output:输出特征图的数量(卷积核的数量)
# pad : 对输入像素矩阵各边增加像素的数量
# weight_filler:卷积核的初始化方式:xavier
conv1 = L.Convolution(data, kernel_size=5, stride=1, num_output=16, pad=2,
weight_filler=dict(type='xavier'))
# 激活层
# in_place:meaning the bottom and the top blob could be the same to preserve memory consumption.
relu1 = L.ReLU(conv1, in_place=True)
# 池化层
pool1 = L.Pooling(relu1, pool=P.Pooling.MAX, kernel_size=3, stride=2)
conv2 = L.Convolution(pool1, kernel_size=3, stride=1, num_output=32, pad=1,
weight_filler=dict(type='xavier'))
relu2 = L.ReLU(conv2, in_place=True)
pool2 = L.Pooling(relu2, pool=P.Pooling.MAX, kernel_size=3, stride=2)
# 全连接层
fc1 = L.InnerProduct(pool2, num_output=1024, weight_filler=dict(type='xavier'))
relu3 = L.ReLU(fc1, in_place=True)
# dropout防止过拟合
drop1 = L.Dropout(relu3, in_place=True)
fc2 = L.InnerProduct(drop1, num_output=10, weight_filler=dict(type='xavier'))
# softmax
loss = L.SoftmaxWithLoss(fc2, label)
# 根据需要是否要计算accuracy
if include_acc:
acc = L.Accuracy(fc2, label)
return to_proto(loss, acc)
else:
return to_proto(loss)
def create_prototxt(save_proto, lmdb,batch_size, acc=False):
with open(save_proto, 'w') as f:
f.write(str(create_net(lmdb, batch_size, acc)))
create_prototxt(train_proto, train_lmdb, 64)
create_prototxt(test_proto, test_lmdb,32, acc=True)
生成solver文件
from caffe.proto import caffe_pb2
s = caffe_pb2.SolverParameter()
solver_save_path = 'mnist/solver.prototxt'
# train_net :训练配置文件
# test_net :测试配置文件
# test_iter :迭代多少次后训练完一次所有的样本
# test_interval :测试间隔[训练多少个batch_size后测试一次]
# base_lr :初始学习效率
# display :屏幕显示间隔
# max_iter :最大迭代次数
# lr_policy :学习效率变化
# setpsize :每隔多少次迭代,调整一次lr,
# gamma : 学习效率的变化指数
# momentum: 影响lr的动量
# weight_decay: 权值衰减率
# snapshot : 保存模型的间隔
# snapshot_prefix: 保存模型的前缀
# solver_mode : 是否选择GPU
# type : 优化算法
s.train_net = 'mnist/train.prototxt'
s.test_net.append('mnist/test.prototxt')
s.test_iter.append(313)
s.test_interval = 782
s.base_lr = 0.001
s.display = 782
s.max_iter = 78200
s.lr_policy = 'step'
s.stepsize = 26067 # 调整3次lr,78200/3=26067
s.gamma = 0.1
s.momentum = 0.9
s.weight_decay = 5e-4
s.snapshot = 7820
s.snapshot_prefix = 'snapshot'
s.solver_mode = caffe_pb2.SolverParameter.GPU
s.type = 'SGD'
with open(solver_save_path, 'w') as f:
f.write(str(s))
Training Model
import caffe
caffe.set_device(0)
caffe.set_mode_gpu()
solver = caffe.SGDSolver('mnist/solver.prototxt')
solver.solve()
有时间在补充