在上一篇文章中——石头剪刀布之神经网络训练,利用预训练的MobileNet虽然可以得到很高的精度,而且模型占用的内存很小,只有5M左右。但是,在实际应用的时候,在我的笔记本上识别一张图片需要0.5s(使用如下代码),更不用说在树莓派上运行了。究其原因,是因为MobileNet最少需要128*128的图片输入,因此还是需要不少的运算量。因此,为了减少运行时间和模型的大小,本文将通过Tensorflow,创建一个比较简单的神经网络。
以下为MobileNet识别单张图片代码:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import numpy as np
import PIL.Image as Image
from pylab import *
import time
# 下载模型
def load_graph(model_file):
graph = tf.Graph()
graph_def = tf.GraphDef()
with open(model_file, "rb") as f:
graph_def.ParseFromString(f.read())
with graph.as_default():
tf.import_graph_def(graph_def)
return graph
# 识别手势
def recognize(jpg_path, pb_file_path, classes):
with tf.Graph().as_default():
graph = load_graph(pb_file_path)
with tf.Session(graph=graph) as sess:
# 获取输入张量
input_x = graph.get_tensor_by_name("import/input:0")
# 获取输出张量
out = graph.get_tensor_by_name("import/final_result:0")
# 读入待识别图片
img = Image.open(jpg_path)
# 该MobileNet模型需要128*128的图片输入
img = array(img.resize((128, 128)),dtype=float32)
# 图片预处理
img = (img-128)*1.0/128
t1 = time.time()
img_out = sess.run(out, feed_dict={input_x:np.reshape(img, [-1, 128, 128, 3])})
t2 = time.time()
#print("running time: ", t2-t1)
#print(img_out)
prediction_labels = np.squeeze(np.argmax(img_out, axis=1))
print(classes[prediction_labels])
print('probability: %.3g, running time: %.3g' % (img_out[0][prediction_labels], t2-t1))
if __name__=="__main__":
classes = ['scissors','others','rock','paper']
jpg_path = "images/scissors/image671.jpg"
pb_file_path="model/output_graph.pb"
recognize(jpg_path, pb_file_path, classes)
本文采用的网络结构如下图所示:
类型 | Kernel尺寸/步长(或注释) | 输入尺寸 |
卷积 | 3*3*16/1 | 28*28*1 |
池化 | 2*2/2 | 28*28*16 |
卷积 | 3*3*32/1 | 14*14*16 |
池化 | 2*2/2 | 14*14*32 |
全连接 | (7*7*32)*256 | 1*(7*7*32) |
Dropout | 注:随机失活 | 1*256 |
全连接 | 256*4 | 1*256 |
Softmax | 分类输出 | 1*4 |
相应的代码如下(model.py):
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import numpy as np
w = np.int32(28)
h = np.int32(28)
def weight_variable(shape, name):
'''Define weight
Args:
shape: The shape of weight.
name: The name of weight.
Returns:
The Variable object
'''
return tf.Variable(tf.truncated_normal(shape, stddev=0.1), name=name)
def bias_variable(shape, name):
'''Define bias
Args:
shape: The shape of bias
name: The name of bias
Returns:
The Variable object
'''
return tf.Variable(tf.constant(0.1, shape=shape), name=name)
def model(x, keep_prob):
'''Build the MNIST model
Args:
x: Input, tf.placeholder, the dimension is [-1, 784]
keep_prob:
Returns:
y: Classification probability
'''
x_image = tf.reshape(x, [-1, w, h, 1])
# Conv1
with tf.name_scope('conv1'):
W_conv1 = weight_variable([3, 3, 1, 16], name="weight")
b_conv1 = bias_variable([16], name='bias')
h_conv1 = tf.nn.relu(
tf.nn.conv2d(x_image, W_conv1, strides=[1,1,1,1], padding="SAME", name='conv')
+ b_conv1)
h_pool1 = tf.nn.max_pool(h_conv1, ksize=[1,2,2,1], strides=[1,2,2,1],
padding="SAME", name="pool")
# Conv2
with tf.name_scope('conv2'):
W_conv2 = weight_variable([3, 3, 16, 32], name="weight")
b_conv2 = bias_variable([32], name='bias')
h_conv2 = tf.nn.relu(
tf.nn.conv2d(h_pool1, W_conv2, strides=[1,1,1,1], padding="SAME", name='conv')
+ b_conv2)
h_pool2 = tf.nn.max_pool(h_conv2, ksize=[1,2,2,1], strides=[1,2,2,1],
padding="SAME", name="pool")
# fc1
with tf.name_scope('fc1'):
W_fc1 = weight_variable([7*7*32, 256], name="weight")
b_fc1 = bias_variable([256], name='bias')
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*32])
h_fc1 = tf.nn.relu(
tf.matmul(h_pool2_flat, W_fc1)+b_fc1)
# Dropout
#keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
# fc2
with tf.name_scope('fc2'):
W_fc2 = weight_variable([256, 4], name="weight")
b_fc2 = bias_variable([4], name='bias')
y = tf.nn.softmax(
tf.matmul(h_fc1_drop, W_fc2)+b_fc2, name="output")
return y
这样,网络模型就构造完成了,由于我们收集的数据一般是以图片格式存在各个文件中。利用以下代码可以将文件夹中的图片读入内存中,等待训练(read_image.py)
from PIL import Image
import numpy as np
from PIL import Image
from pylab import *
import os
import glob
# 训练时所用输入长、宽和通道大小
w = 28
h = 28
c = 3
# 将标签转换成one-hot矢量
def to_one_hot(label):
label_one = np.zeros((len(label),np.max(label)+1))
for i in range(len(label)):
label_one[i, label[i]]=1
return label_one
# 读入图片并转化成相应的维度
def read_img(path):
cate = [path + x for x in os.listdir(path) if os.path.isdir(path + x)]
imgs = []
labels = []
for idx, folder in enumerate(cate):
for im in glob.glob(folder + '/*.jpg'):
print('reading the image: %s' % (im))
# 读入图片,转化成灰度图,并缩小到相应维度
img = array(Image.open(im).convert('L').resize((w,h)),dtype=float32)
imgs.append(img)
#img = array(img)
labels.append(idx)
data,label = np.asarray(imgs, np.float32), to_one_hot(np.asarray(labels, np.int32))
# 将图片随机打乱
num_example = data.shape[0]
arr = np.arange(num_example)
np.random.shuffle(arr)
data = data[arr]
label = label[arr]
# 80%用于训练,20%用于验证
ratio = 0.8
s = np.int(num_example * ratio)
x_train = data[:s]
y_train = label[:s]
x_val = data[s:]
y_val = label[s:]
x_train = np.reshape(x_train, [-1, w*h])
x_val = np.reshape(x_val, [-1, w*h])
return x_train, y_train, x_val, y_val
if __name__=="__main__":
path = 'images/'
x_train, y_train, x_val, y_val = read_img(path)
接下来,就可以进行训练了(train.py)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import argparse
import sys
import model
import read_image
import numpy as np
w = 28
h = 28
def main(args):
lr = args.learning_rate
batch_size = args.batch_size
epoches = args.epoches
keep_prob_value = args.keep_prob
train(lr,batch_size, epoches, keep_prob_value)
def train(lr, batch_size, epoches, keep_prob_value):
# 下载图片
path = 'images/'
x_train, y_train, x_val, y_val = read_image.read_img(path)
x = tf.placeholder(tf.float32, [None, w*h], name="images")
y_ = tf.placeholder(tf.float32, [None, 4], name="labels")
keep_prob = tf.placeholder(tf.float32,name="keep_prob")
y = model.model(x, keep_prob)
# Cost function
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_*tf.log(y),
reduction_indices=[1]),name="corss_entropy")
train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy")
saver = tf.train.Saver()
# Start training
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(epoches):
iters = np.int32(len(x_train)/batch_size)+1
for j in range(iters):
if j==iters-1:
batch0 = x_train[j*batch_size:]
batch1 = y_train[j*batch_size:]
else:
batch0 = x_train[j*batch_size:(j+1)*batch_size]
batch1 = y_train[j*batch_size:(j+1)*batch_size]
if i%25==0:
train_accuracy = sess.run(accuracy,
feed_dict={x:batch0, y_:batch1,
keep_prob: keep_prob_value})
print("step %d, training accuracy %g" % (i, train_accuracy))
# Save model
saver_path = saver.save(sess,"model/model.ckpt")
print("Model saved in file:", saver_path)
sess.run(train_step, feed_dict={x:batch0, y_:batch1,
keep_prob:keep_prob_value})
test_accuracy = sess.run(accuracy, feed_dict={x:x_val,
y_:y_val,
keep_prob: 1.0})
print("test accuracy %g" % test_accuracy)
def parse_arguments(argv):
parser = argparse.ArgumentParser()
parser.add_argument('--learning_rate', type=float,
help="learning rate", default=1e-4)
parser.add_argument('--batch_size', type=float,
help="batch_size", default=50)
parser.add_argument('--epoches', type=float,
help="max iterations", default=50)
parser.add_argument('--keep_prob', type=float,
help="keep prob", default=0.5)
return parser.parse_args(argv)
if __name__=="__main__":
main(parse_arguments(sys.argv[1:]))
运行上述代码,就可以进行训练了。我现在收集的数据大概每类200张左右,训练精度和验证精度都在89%左右,模型大小也在5M左右。下一步可以通过调参进一步提高精度。
接下来可以通过以下代码看识别一张图片需要多长时间(tesy.py)
from PIL import Image
from matplotlib.pylab import *
import numpy as np
import argparse
import tensorflow as tf
import time
w = 28
h = 28
classes = ['others','paper','rock','scissors']
def main(args):
'''
filename = args.filename
model_dir = args.model_dir
'''
filename = 'images/paper/image516.jpg'
model_dir = 'model/model.ckpt'
# Restore model
saver = tf.train.import_meta_graph(model_dir+".meta")
with tf.Session() as sess:
saver.restore(sess, model_dir)
x = tf.get_default_graph().get_tensor_by_name("images:0")
keep_prob = tf.get_default_graph().get_tensor_by_name("keep_prob:0")
y = tf.get_default_graph().get_tensor_by_name("fc2/output:0")
# Read image
pil_im = array(Image.open(filename).convert('L').resize((w,h)),dtype=float32)
#pil_im = (255-pil_im)/255.0
pil_im = pil_im.reshape((1,w*h))
time1 = time.time()
prediction = sess.run(y, feed_dict={x:pil_im,keep_prob: 1.0})
index = np.argmax(prediction)
time2 = time.time()
print("The classes is: %s. (the probability is %g)" % (classes[index], prediction[0][index]))
print("Using time %g" % (time2-time1))
def parse_arguments(argv):
parser = argparse.ArgumentParser()
parser.add_argument('--filename', type=str,
help="The image name",default="images/paper/image104.jpg")
parser.add_argument('--model_dir', type=str,
help="learning rate", default="model/model.ckpt")
return parser.parse_args(argv)
if __name__=="__main__":
main(parse_arguments(sys.argv[1:]))