最近用Tensorflow实现了一个Alexnet的模型,并在Imagenet的数据集上跑了一下训练,测试结果是能达到Top5接近71%的准确度。我想测试一下这个训练好的模型是否可以用于其他的图像分类的任务中,因此我选取了Tensorflow的迁移学习教程里面提到的Flowers分类任务的数据来做一个测试。
首先是下载Flowers的数据,具体可以参见Tensorflow里面的介绍。下载后的数据解压之后有5个文件夹,分别带有5种不同的花的图像。我编写了一个程序来把图像转换为TFRECORD格式的数据,方便后续的处理。代码如下:
import os
import cv2
import tensorflow as tf
import numpy as np
def make_example(image, label):
return tf.train.Example(features=tf.train.Features(feature={
'image' : tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])),
'label' : tf.train.Feature(bytes_list=tf.train.BytesList(value=[label]))
}))
flower_classes = {"daisy":0, "dandelion":1, "roses":2, "sunflowers":3, "tulips":4}
for flower_class in flower_classes.keys():
writer = tf.python_io.TFRecordWriter(flower_class+".tfrecord")
folder_path = flower_class + "/"
files = os.listdir(folder_path)
label = np.array([flower_classes[flower_class]])
for jpgfile in files:
img = cv2.imread(folder_path+"/"+jpgfile, cv2.IMREAD_COLOR)
img_jpg = cv2.imencode('.jpg', img)[1].tobytes()
ex = make_example(img_jpg, label.tobytes())
writer.write(ex.SerializeToString())
writer.close()
程序执行完后会生成5个TFRECORD文件,每个文件对应一种花的图像数据。
之后我们可以利用之前训练好的Alexnet来进行迁移学习了。在上一篇博客中我已介绍了我的Alexnet的模型,我将增加一个新的全连接层来取代原有模型中的最后一层的全连接层,保留之前几层的训练好的参数,只用新的图像数据来训练新加的全连接层。为此,我需要把新的图像数据用原有的Alexnet模型计算后,把倒数第2层的计算结果输出,然后在用新加的全连接层进行计算。原有的Alexnet模型的这几层的参数都要设置为不可训练,只训练新加的层即可。以下是原有的Alexnet模型的代码,注意倒数第2层及之上的层的参数需要改为Trainable=False,模型代码如下:
def inference(images, dropout_rate=1.0, wd=None):
with tf.variable_scope('conv1', reuse=tf.AUTO_REUSE):
kernel = tf.get_variable(initializer=tf.truncated_normal([11,11,3,96], dtype=tf.float32, stddev=1e-1), trainable=False, name='weights')
conv = tf.nn.conv2d(images, kernel, [1,4,4,1], padding='SAME')
biases = tf.get_variable(initializer=tf.constant(0.1, shape=[96], dtype=tf.float32), trainable=False, name='biases')
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name='conv1')
#lrn1 = tf.nn.lrn(conv1, 4, bias=1.0, alpha=0.001/9, beta=0.75, name='lrn1')
pool1 = tf.nn.max_pool(conv1, ksize=[1,3,3,1], strides=[1,2,2,1], padding='VALID', name='pool1')
with tf.variable_scope('conv2', reuse=tf.AUTO_REUSE):
kernel = tf.get_variable(initializer=tf.truncated_normal([5,5,96,256], dtype=tf.float32, stddev=1e-1), trainable=False, name='weights')
conv = tf.nn.conv2d(pool1, kernel, [1,1,1,1], padding='SAME')
biases = tf.get_variable(initializer=tf.constant(0.1, shape=[256], dtype=tf.float32), trainable=False, name='biases')
bias = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(bias, name='conv2')
#lrn2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001/9, beta=0.75, name='lrn2')
pool2 = tf.nn.max_pool(conv2, ksize=[1,3,3,1], strides=[1,2,2,1], padding='VALID', name='pool2')
with tf.variable_scope('conv3', reuse=tf.AUTO_REUSE):
kernel = tf.get_variable(initializer=tf.truncated_normal([3,3,256,384], dtype=tf.float32, stddev=1e-1), trainable=False, name='weights')
conv = tf.nn.conv2d(pool2, kernel, [1,1,1,1], padding='SAME')
biases = tf.get_variable(initializer=tf.constant(0.1, shape=[384], dtype=tf.float32), trainable=False, name='biases')
bias = tf.nn.bias_add(conv, biases)
conv3 = tf.nn.relu(bias, name='conv3')
with tf.variable_scope('conv4', reuse=tf.AUTO_REUSE):
kernel = tf.get_variable(initializer=tf.truncated_normal([3,3,384,384], dtype=tf.float32, stddev=1e-1), trainable=False, name='weights')
conv = tf.nn.conv2d(conv3, kernel, [1,1,1,1], padding='SAME')
biases = tf.get_variable(initializer=tf.constant(0.1, shape=[384], dtype=tf.float32), trainable=False, name='biases')
bias = tf.nn.bias_add(conv, biases)
conv4 = tf.nn.relu(bias, name='conv4')
with tf.variable_scope('conv5', reuse=tf.AUTO_REUSE):
kernel = tf.get_variable(initializer=tf.truncated_normal([3,3,384,256], dtype=tf.float32, stddev=1e-1), trainable=False, name='weights')
conv = tf.nn.conv2d(conv4, kernel, [1,1,1,1], padding='SAME')
biases = tf.get_variable(initializer=tf.constant(0.1, shape=[256], dtype=tf.float32), trainable=False, name='biases')
bias = tf.nn.bias_add(conv, biases)
conv5 = tf.nn.relu(bias, name='conv5')
pool5 = tf.nn.max_pool(conv5, ksize=[1,3,3,1], strides=[1,2,2,1], padding='VALID', name='pool5')
flatten = tf.layers.flatten(inputs=pool5, name='flatten')
with tf.variable_scope('local1', reuse=tf.AUTO_REUSE):
weights = tf.get_variable(initializer=tf.truncated_normal([6*6*256,4096], dtype=tf.float32, stddev=1/4096.0), trainable=False, name='weights')
if wd is not None:
weights_loss = tf.multiply(tf.nn.l2_loss(weights), wd, name='weight_loss')
tf.add_to_collection('losses', weights_loss)
biases = tf.get_variable(initializer=tf.constant(1.0, shape=[4096], dtype=tf.float32), trainable=False, name='biases')
local1 = tf.nn.relu(tf.nn.xw_plus_b(flatten, weights, biases), name='local1')
local1 = tf.nn.dropout(local1, dropout_rate)
with tf.variable_scope('local2', reuse=tf.AUTO_REUSE):
weights = tf.get_variable(initializer=tf.truncated_normal([4096,4096], dtype=tf.float32, stddev=1/4096.0), trainable=False, name='weights')
if wd is not None:
weights_loss = tf.multiply(tf.nn.l2_loss(weights), wd, name='weight_loss')
tf.add_to_collection('losses', weights_loss)
biases = tf.get_variable(initializer=tf.constant(1.0, shape=[4096], dtype=tf.float32), trainable=False, name='biases')
local2 = tf.nn.relu(tf.nn.xw_plus_b(local1, weights, biases), name='local2')
local2 = tf.nn.dropout(local2, dropout_rate)
with tf.variable_scope('local3', reuse=tf.AUTO_REUSE):
weights = tf.get_variable(initializer=tf.truncated_normal([4096,1000], dtype=tf.float32, stddev=1e-3), trainable=True, name='weights')
biases = tf.get_variable(initializer=tf.constant(1.0, shape=[1000], dtype=tf.float32), trainable=True, name='biases')
local3 = tf.nn.xw_plus_b(local2, weights, biases, name='local3')
return local3, local2
然后我们就可以编写代码来读取之前训练的参数来进行新的训练了,代码如下:
import tensorflow as tf
import alexnet_model
imageWidth = 224
imageHeight = 224
imageDepth = 3
batch_size = 10
resize_min = 256
#解析TFRecord文件的格式
def _parse_function(example_proto):
features = {"image": tf.FixedLenFeature((), tf.string, default_value=""),
"label": tf.FixedLenFeature((), tf.string, default_value="")}
parsed_features = tf.parse_single_example(example_proto, features)
image_decoded = tf.image.decode_jpeg(parsed_features["image"], channels=3)
shape = tf.shape(image_decoded)
height, width = shape[0], shape[1]
resized_height, resized_width = tf.cond(height
训练第一个EPOCH,测试集准确率就可以达到60%,可见迁移测试确实利用了原有模型来有效的提取了图像的特征,加快了训练速度。在训练50个EPOCH,在测试集上达到96%左右的准确率,和Tensorflow官网上用Inception V3模型迁移学习测试的结果相近。