猫狗大战是一个非常经典的二分类问题,其任务目标是让模型学习到区分猫和狗的能力。在这个问题中 kaggle 已经有很多大神达到了非常不错的精准度。我接触深度学习目前有半年多的时间了,基本能够熟练地使用 tensorflow,但仍然有很多特性我没有接触过,故本文旨在进一步加深自己对 tensorflow 框架的理解和练习,选择使用猫狗大战这一项目进行试验。
注:本文均采取先将某个功能模块整体展示,再对其中某些细节和代码进行补充说明
其数据集包括以下两个部分:
其中 test1.zip 中为测试用的猫狗图片,没有标记图片的 label。
train.zip 为训练用的数据集,其中图片的命名格式为 class.id.jpg,class 指图片的类别(cat 或 dog)。
我们先将训练集划分成 20000 张用于训练,5000 张用于测试,即 train / val = 4: 1,其中 20000张图片分别为 10000 张狗和 10000张猫,同样地,5000 张图片也分为 2500 张狗和 2500 张猫,如下如所示:
|-- cats_vs_dogs
|-- train
|-- dogs
|-- cats
|-- val
|-- dogs
|-- cats
我们将采用 tf.data 提供的 pipeline 对数据进行高效加载。整个函数定义如下:
# data_load.py
def load_train_data(train_path, sess, batch_size=32, num_epochs=200):
""" 加载训练数据集
args:
train_path: 训练图片存放路径
sess: 一个 tf.Session() 的实例
batch_size: 一个 batch 的大小
num_epochs: 总迭代代数
return:
train_iter: 训练集迭代器
num_examples: 训练集数量
"""
train_img_paths = []
train_labels = []
# 加载训练图片路径
for img_path in os.listdir(train_path + 'cats/'):
train_img_paths.append(train_path + 'cats/' + img_path)
for img_path in os.listdir(train_path + 'dogs/'):
train_img_paths.append(train_path + 'dogs/' + img_path)
num_examples = len(train_img_paths)
for i in range(num_examples):
if i < num_examples/2:
train_labels.append([1, 0])
else:
train_labels.append([0, 1])
# 构建数据集
train_img_paths = np.array(train_img_paths)
train_labels = np.array(train_labels).astype('float32')
train_img_paths_ph = tf.placeholder(train_img_paths.dtype, train_img_paths.shape)
train_labels_ph = tf.placeholder(train_labels.dtype, train_labels.shape)
train_ds = tf.data.Dataset.from_tensor_slices((train_img_paths_ph, train_labels_ph))
train_ds = train_ds.repeat(num_epochs) \
.shuffle(len(train_img_paths)) \
.map(lambda train_img_path, train_label: tf.py_func(
load_img,
[train_img_path, train_label],
[tf.float32, tf.float32]
), num_parallel_calls=6) \
.map(lambda img, label: process(img, label)) \
.batch(batch_size) \
.prefetch(batch_size)
train_iter = train_ds.make_initializable_iterator()
# train_iter = train_ds.make_one_shot_iterator()
# 对数据集进行初始化
sess.run(train_iter.initializer,
feed_dict={train_img_paths_ph: train_img_paths, train_labels_ph: train_labels})
return [train_iter, num_examples]
在这个函数中,我们加载了训练集的图片路径(train_img_paths),并用 one-hot 编码方法为图片打上标签(train_labels),其中将猫标定为 [1,0],将狗标定为 [0, 1]。
然后将 train_img_paths 和 train_labels 包装成 Dataset:
train_ds = tf.data.Dataset.from_tensor_slices((train_img_paths_ph, train_labels_ph))
然后对数据集进行 map 变换,对图片进行加载和预处理:
train_ds = train_ds.repeat(num_epochs) \
.shuffle(len(train_img_paths)) \
.map(lambda train_img_path, train_label: tf.py_func(
load_img,
[train_img_path, train_label],
[tf.float32, tf.float32]
), num_parallel_calls=6) \
.map(lambda img, label: process(img, label)) \
.batch(batch_size) \
.prefetch(batch_size)
其中 load_img 和 process 函数的定义如下:
def load_img(img_path, label):
img_path = img_path.decode()
# cv2.imread() 读取的图片格式为 BGR, 将其转化成 RGB
img = cv2.imread(img_path)[:,:,(2,1,0)].astype('float32')
return img, label
def process(img, label):
img = tf.image.per_image_standardization(img) # 标准化
# 对照片进行填充或剪裁, 使其 shape 为 (224, 224, 3)
img = tf.image.resize_image_with_crop_or_pad(img, 224, 224)
return img, label
至此,对训练集的预处理就完成了。对于验证集的加载情况类似,不再赘述。代码如下:
# data_load.py
def load_val_data(val_path, sess, batch_size):
""" 加载验证数据集
args:
val_path: 验证数据集目录
sess: 一个 tf.Session() 实例
batch_size: 一个 batch 的大小
returns:
val_iter: 验证集的迭代器
num_exampls: 验证集数量
"""
val_img_paths = []
val_labels = []
# 加载验证图片路径
for img_path in os.listdir(val_path + 'cats/'):
val_img_paths.append(val_path + 'cats/' + img_path)
for img_path in os.listdir(val_path + 'dogs/'):
val_img_paths.append(val_path + 'dogs/' + img_path)
num_examples = len(val_img_paths)
for i in range(len(val_img_paths)):
if i < len(val_img_paths)/2:
val_labels.append([1, 0])
else:
val_labels.append([0, 1])
val_img_paths = np.array(val_img_paths)
val_labels = np.array(val_labels).astype('float32')
val_img_paths_ph = tf.placeholder(val_img_paths.dtype, val_img_paths.shape)
val_labels_ph = tf.placeholder(val_labels.dtype, val_labels.shape)
val_ds = tf.data.Dataset.from_tensor_slices((val_img_paths_ph, val_labels_ph))
val_ds = val_ds.repeat(1) \
.map(lambda val_img_path, val_label: tf.py_func(
load_img,
[val_img_path, val_label],
[tf.float32, tf.float32]
), num_parallel_calls=6) \
.map(lambda img, label: process(img, label)) \
.batch(batch_size) \
.prefetch(batch_size)
val_iter = val_ds.make_initializable_iterator()
sess.run(val_iter.initializer,
feed_dict={val_img_paths_ph: val_img_paths, val_labels_ph: val_labels})
return [val_iter, num_examples]
我打算将 vgg16 作为基础,再接上 1-2 个全连接直接完成分类任务。对于 vgg16,我选用了 tensorflow.contrib.slim 提供的训练好的模型进行 finetune。我们可以简单地用一下几行命令完成加载:
from tensorflow.contrib.slim import nets
vgg16 = nets.vgg.vgg_16()
整个网络结构定义如下:
# net.py
def full_net(x, is_training=True):
pretrained_model_path = 'pretrained_model/vgg_16.ckpt'
nets.vgg.vgg_16(x, num_classes=2, is_training=is_training)
x = tf.get_default_graph().get_tensor_by_name('vgg_16/pool5/MaxPool:0')
with tf.variable_scope('trainable'):
x = slim.flatten(x)
x = slim.fully_connected(x, 1024, scope='fc1')
x = slim.fully_connected(x, 2, activation_fn=None, scope='fc2')
total_vars = tf.trainable_variables()
init_vars = []
for v in total_vars:
if 'fc' not in v.name:
init_vars.append(v)
var_init_op, feed_dict = slim.assign_from_checkpoint(pretrained_model_path, init_vars, ignore_missing_vars=True)
return x, var_init_op, feed_dict
注意这里全连接层的最后一层我设置了激活函数为 None,这是因为在后面我将使用 tf.nn.softmax_cross_entropy_with_logits (logits=None, label=None) 作为损失函数,这里会进行一次 softmax 激活。
整个训练代码定义如下:
# train.py
def main():
train_path = 'train/' # 训练集路径
checkpoint_dir = 'checkpoint/' # 模型保存和恢复的路径
model_name = 'model.ckpt' # 模型名称
if not os.path.exists(checkpoint_dir):
os.mkdir(checkpoint_dir)
inp = tf.placeholder(tf.float32, (None, 224, 224, 3))
labels = tf.placeholder(dtype=tf.float32, shape=(None, 2))
out, var_init_op, var_init_dict = net.full_net(inp)
global_step = tf.Variable(0, trainable=False)
batch_size = 32
with tf.Session() as sess:
# 加载数据集
train_iter, num_train_examples = data_load.load_train_data(
train_path, sess, batch_size, 100)
train_img_batch, train_label_batch = train_iter.get_next()
# 得到需要训练的变量列表
t_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'trainable')
print('trainable variables', t_vars)
# l2 正则
reg = tf.contrib.layers.apply_regularization(tf.contrib.layers.l2_regularizer(1e-4), t_vars)
# 交叉熵损失函数
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=out, labels=labels)
loss = tf.reduce_mean(cross_entropy) + reg
optimizer = tf.train.AdamOptimizer(1e-5).minimize(loss, var_list=t_vars, global_step=global_step)
# 保存图
saver = tf.train.Saver(max_to_keep=2)
# 初始化全局变量
sess.run(tf.global_variables_initializer())
# 加载模型,若没有模型则用 resnet50 参数初始化
ckpt = tf.train.latest_checkpoint(checkpoint_dir)
if ckpt:
print('loaded ' + ckpt)
saver.restore(sess, ckpt)
else:
sess.run(var_init_op, feed_dict=var_init_dict)
# 每一代的总步数
steps_per_epoch = num_train_examples // batch_size + 1
print('start training......')
while True:
img_batch, label_batch = sess.run([train_img_batch, train_label_batch])
_, cur_loss, cur_step = sess.run([optimizer, loss, global_step],
feed_dict={inp: img_batch, labels: label_batch})
cur_epoch = cur_step // steps_per_epoch + 1
# 每 500 个 step 保存一次
if cur_step % 500 == 0:
saver.save(sess, checkpoint_dir + model_name, global_step=global_step)
print('model saved')
# 每 100 个 step 总结一次
if cur_step % 100 == 0:
print('epoch %d, step %d, loss %.4f' % (cur_epoch, cur_step, cur_loss))
至此整个训练过程就搭建完成了。以下给出验证模型的代码:
# val.py
def main():
val_path = 'val/' # 验证集路径
checkpoint_dir = 'checkpoint/' # 加载模型的路径
input_img = tf.placeholder(dtype=tf.float32, shape=(None, 224, 224, 3))
y_true = tf.placeholder(dtype=tf.float32, shape=(None, 2))
out_img, _, _ = net.full_net(input_img, is_training=False)
batch_size = 32
with tf.Session() as sess:
val_iter, num_examples = data_load.load_val_data(val_path, sess, batch_size=batch_size)
print('num_examples {}'.format(num_examples))
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())
ckpt = tf.train.latest_checkpoint(checkpoint_dir)
if ckpt:
print('loaded', ckpt)
saver.restore(sess, ckpt)
else:
print('can not find model')
return
img_batch, label_batch = val_iter.get_next()
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=out_img, labels=y_true))
total_loss = []
cur_num = 0 # 当前验证数量
right = 0 # 预测正确数量
print('start validation......')
while True:
try:
img, labels = sess.run([img_batch, label_batch])
cur_loss, outs = sess.run([loss, out_img], feed_dict={input_img: img, y_true: labels})
total_loss.append(cur_loss)
for out, label in zip(outs, labels):
if np.argmax(out) == np.argmax(label):
right += 1
cur_num += batch_size
print('process {}/{}'.format(cur_num, num_examples))
except:
print('mean loss {} acc {}'.format(np.mean(total_loss), right/cur_num))
break
我的运行环境如下:
操作系统: linux 16.04
cuda 及 cudnn 版本: cuda 10.0 + cudnn 7.5
显卡: 2080 ti
num_epochs: 27
total_steps: 16500
val_acc: 0.968
val_mean_loss: 0.128
欢迎访问我的个人博客:hfwang.net.cn
完整代码在我的 github 上。