#ecoding:utf_8
import os
folderName = '/home/terrence/caffe_case/mnist/'
for i in range(10):
#列出文件夹所有文件
dirList = os.listdir(folderName + 'train/' + str(i) + '/')
f = open(folderName+'train.txt', 'a+');
for name in dirList:
print name
f.write(str(i) + '/' + name + ' '+ str(i) +'\n')
f.close()
for i in range(10):
#列出文件夹所有文件
dirList = os.listdir(folderName + 'test/' + str(i) + '/')
f = open(folderName+'test.txt', 'a+');
for name in dirList:
print name
f.write(str(i) + '/' + name + ' '+ str(i) +'\n')
f.close()
#!/usr/bin/env sh
# Create the imagenet lmdb inputs
# N.B. set the path to the imagenet train + val data dirs
# ~/caffe/examples/imagenet/create_imagenet.sh
set -e
EXAMPLE=/home/terrence/caffe_case/mnist
DATA=/home/terrence/caffe_case/mnist
TOOLS=/home/terrence/caffe/build/tools
TRAIN_DATA_ROOT=/home/terrence/caffe_case/mnist/train/
VAL_DATA_ROOT=/home/terrence/caffe_case/mnist/test/
# Set RESIZE=true to resize the images to 256x256. Leave as false if images have
# already been resized using another tool.
RESIZE=false
if $RESIZE; then
RESIZE_HEIGHT=28
RESIZE_WIDTH=28
else
RESIZE_HEIGHT=0
RESIZE_WIDTH=0
fi
if [ ! -d "$TRAIN_DATA_ROOT" ]; then
echo "Error: TRAIN_DATA_ROOT is not a path to a directory: $TRAIN_DATA_ROOT"
echo "Set the TRAIN_DATA_ROOT variable in create_imagenet.sh to the path" \
"where the ImageNet training data is stored."
exit 1
fi
if [ ! -d "$VAL_DATA_ROOT" ]; then
echo "Error: VAL_DATA_ROOT is not a path to a directory: $VAL_DATA_ROOT"
echo "Set the VAL_DATA_ROOT variable in create_imagenet.sh to the path" \
"where the ImageNet validation data is stored."
exit 1
fi
echo "Creating train lmdb..."
GLOG_logtostderr=1 $TOOLS/convert_imageset \
--resize_height=$RESIZE_HEIGHT \
--resize_width=$RESIZE_WIDTH \
--shuffle \
--gray=true \
$TRAIN_DATA_ROOT \
$DATA/train.txt \
$EXAMPLE/img_train_lmdb
echo "Creating val lmdb..."
GLOG_logtostderr=1 $TOOLS/convert_imageset \
--resize_height=$RESIZE_HEIGHT \
--resize_width=$RESIZE_WIDTH \
--shuffle \
--gray=true \
$VAL_DATA_ROOT \
$DATA/test.txt \
$EXAMPLE/img_test_lmdb
echo "Done."
除了路径要修改之外,我们还需要加上–gray=true \一个参数,目的是代表将图像当做灰度图像来处理。此外,由于我们源数据的图像大小都是一样的(28*28),所以就不需要resize了,因此设置RESIZE=false。
运行这个脚本,就可以生成这两个文件夹:img_test_lmdb和img_train_lmdb。至此,lmdb数据源制作完毕。
#!/usr/bin/env sh
set -e
/home/terrence/caffe/build/tools/caffe train --solver=/home/terrence/caffe_case/mnist/lenet_solver.prototxt $@
# The train/test net protocol buffer definition
net: "/home/terrence/caffe_case/mnist/lenet_train_test.prototxt" #lenet_train_test.prototxt
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
test_iter: 100
# Carry out testing every 500 training iterations.
test_interval: 500
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every 100 iterations
display: 100
# The maximum number of iterations
max_iter: 10000
# snapshot intermediate results
snapshot: 5000
snapshot_prefix: "/home/terrence/caffe_case/mnist/model/lenet" #caffemodel
# solver mode: CPU or GPU
solver_mode: GPU
具体参数的含义参照:http://blog.csdn.net/terrenceyuu/article/details/76232968
这个文件需要指定一个网络架构描述文件lenet_train_test.prototxt,其内容为:
name: "LeNet"
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
scale: 0.00390625
}
data_param {
source: "/home/terrence/caffe_case/mnist/img_train_lmdb"
batch_size: 64
backend: LMDB
}
}
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
scale: 0.00390625
}
data_param {
source: "/home/terrence/caffe_case/mnist/img_test_lmdb"
batch_size: 100
backend: LMDB
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 20
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 50
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "ip1"
type: "InnerProduct"
bottom: "pool2"
top: "ip1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 500
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "ip1"
top: "ip1"
}
layer {
name: "ip2"
type: "InnerProduct"
bottom: "ip1"
top: "ip2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 10
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "ip2"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "ip2"
bottom: "label"
top: "loss"
}
这个文件需要制定文件路径的修改即可。
这几个文件都创建完成后,我们就可以运行train_lenet.sh这个脚本了,运行完成会在model文件夹下生成迭代5000以及10000次的最终模型结构的描述文件,其中的以.caffemodel结尾的文件在我们测试的时候会用到。
sudo /home/terrence/caffe/build/tools/compute_image_mean ./img_train_lmdb ./mean.binaryproto
它就是将img_train_lmdb这个训练的lmdb数据源减均值并存放在mean.binaryproto文件下。运行脚本就可以得到此文件了。
测试某一张图片需要以下指令:
python classify.py --print_results --force_grayscale --center_only --labels_file ./synset_words.txt ./02972.png resultsfile
其中,02972.png是一张手写体数字图片:
0 zero
1 one
2 two
3 three
4 four
5 five
6 six
7 seven
8 eight
9 nine
最后是一个classify.py文件,它指定了一些测试相关的信息:
#!/usr/bin/env python
#coding:utf-8
"""
classify.py is an out-of-the-box image classifer callable from the command line.
By default it configures and runs the Caffe reference ImageNet model.
"""
caffe_root = '/home/terrence/caffe/'
import sys
sys.path.insert(0, caffe_root + 'python')
import numpy as np
import os
import sys
import argparse
import glob
import time
import pandas as pd #插入数据分析包
import caffe
def main(argv):
pycaffe_dir = os.path.dirname(__file__)
parser = argparse.ArgumentParser()
# Required arguments: input and output files.
parser.add_argument(
"input_file",
help="Input image, directory, or npy."
)
parser.add_argument(
"output_file",
help="Output npy filename."
)
# Optional arguments.
parser.add_argument(
"--model_def",
default=os.path.join(pycaffe_dir,
"./deploy.prototxt"), #指定deploy.prototxt的模型位置
help="Model definition file."
)
parser.add_argument(
"--pretrained_model",
default=os.path.join(pycaffe_dir,
"./model/lenet_iter_10000.caffemodel"), #指定caffemodel模型位置,这就是我们前面自己训练得到的模型
help="Trained model weights file."
)
#######新增^^^^^^^^^start^^^^^^^^^^^^^^^^^^^^^^
parser.add_argument(
"--labels_file",
default=os.path.join(pycaffe_dir,
"./synset_words.txt"), #指定输出结果对应的类别名文件???????????????????????????
help="mnist result words file"
)
parser.add_argument(
"--force_grayscale",
action='store_true', #增加一个变量将输入图像强制转化为灰度图,因为lenet-5训练用的就是灰度图
help="Converts RGB images down to single-channel grayscale versions," +
"useful for single-channel networks like MNIST."
)
parser.add_argument(
"--print_results",
action='store_true', #输入参数要求打印输出结果
help="Write output text to stdout rather than serializing to a file."
)
#######新增^^^^^^^^^end^^^^^^^^^^^^^^^^^^^^^^
parser.add_argument(
"--gpu",
action='store_true',
help="Switch for gpu computation."
)
parser.add_argument(
"--center_only",
action='store_true',
help="Switch for prediction from center crop alone instead of " +
"averaging predictions across crops (default)."
)
parser.add_argument(
"--images_dim",
default='28,28', #指定图像寬高
help="Canonical 'height,width' dimensions of input images."
)
parser.add_argument(
"--mean_file",
default=os.path.join(pycaffe_dir,
'./meanfile.npy'), #指定均值文件
help="Data set image mean of [Channels x Height x Width] dimensions " +
"(numpy array). Set to '' for no mean subtraction."
)
parser.add_argument(
"--input_scale",
type=float,
help="Multiply input features by this scale to finish preprocessing."
)
parser.add_argument(
"--raw_scale",
type=float,
default=255.0,
help="Multiply raw input by this scale before preprocessing."
)
parser.add_argument(
"--channel_swap",
default='2,1,0',
help="Order to permute input channels. The default converts " +
"RGB -> BGR since BGR is the Caffe default by way of OpenCV."
)
parser.add_argument(
"--ext",
default='jpg',
help="Image file extension to take as input when a directory " +
"is given as the input file."
)
args = parser.parse_args()
image_dims = [int(s) for s in args.images_dim.split(',')]
mean, channel_swap = None, None
if args.mean_file:
mean = np.load(args.mean_file).mean(1).mean(1)
if args.channel_swap:
channel_swap = [int(s) for s in args.channel_swap.split(',')]
if args.gpu:
caffe.set_mode_gpu()
print("GPU mode")
else:
caffe.set_mode_cpu()
print("CPU mode")
# Make classifier.
classifier = caffe.Classifier(args.model_def, args.pretrained_model,
image_dims=image_dims, mean=mean,
input_scale=args.input_scale, raw_scale=args.raw_scale,
channel_swap=None)
# Load numpy array (.npy), directory glob (*.jpg), or image file.
args.input_file = os.path.expanduser(args.input_file)
if args.input_file.endswith('npy'):
print("Loading file: %s" % args.input_file)
inputs = np.load(args.input_file)
elif os.path.isdir(args.input_file):
print("Loading folder: %s" % args.input_file)
inputs =[caffe.io.load_image(im_f)
for im_f in glob.glob(args.input_file + '/*.' + args.ext)]
else:
print("Loading file: %s" % args.input_file)
inputs = [caffe.io.load_image(args.input_file,not args.force_grayscale)] #强制图片为灰度图
print("Classifying %d inputs." % len(inputs))
# Classify.
start = time.time()
scores = classifier.predict(inputs, not args.center_only).flatten()
print("Done in %.2f s." % (time.time() - start))
#增加输出结果打印到终端^^^start^^^^^
# print
if args.print_results:
with open(args.labels_file) as f:
labels_df = pd.DataFrame([{'synset_id':l.strip().split(' ')[0], 'name': ' '.join(l.strip().split(' ')[1:]).split(',')[0]} for l in f.readlines()])
labels = labels_df.sort('synset_id')['name'].values
indices =(-scores).argsort()[:5]
predictions = labels[indices]
print predictions
print scores
meta = [(p, '%.5f' % scores[i]) for i,p in zip(indices, predictions)]
print meta
#增加输出结果打印到终端vvvvendvvvvvvv
# Save
print("Saving results into %s" % args.output_file)
np.save(args.output_file, predictions)
if __name__ == '__main__':
main(sys.argv)
这里需要注意的就是指定一个mean_file文件,这是均值文件,因为我们在lenet_train_test.prototxt并没有做减均值的操作,所以生成一个值为0大小是(28*28*1)的数组即可,注意是[Channels x Height x Width]
生成数组的python代码如下:
import numpy as np
zeros = np.zeros((1,28,28), dtype=np.float32)
np.save('meanfile.npy', zeros)