经过一系列的计算机视觉的公开课视频的学习,回归到本课的最初应用,就是实现对于十类图像的图像分类。之前的Assignment 已经通过knn和svm做过基础的分析。这里我们通过卷积神经网络来实现。首先我们下载cs231n提供的训练集和测试集。
下载地址:http://www.cs.toronto.edu/~kriz/cifar.html
对于训练集一共是十类,每一类1000张,像素值为32*32的图像。而测试集是十类,每一类5000张同等大小的图像。为了最后判断图像分类的准确性,已经在图像的命名中由0~9,分别标注十类图像。
项目结构如下(无拓展名的为文件夹):
>assignment
>>data(测试集和训练集的图像)
. >>>test
>>>train
>>c.py (训练文件)
>>model (存放训练后的模型)
train-test.py
import os
from PIL import Image
import numpy as np
import tensorflow as tf
test = True
test_dir = "./data/test"
model_path = "./model"
# 从文件夹读取图片和标签到numpy数组
def read_data(test_dir):
datas = []
labels = []
fpaths = []
for fname in os.listdir(test_dir):
fpath = os.path.join(test_dir, fname)
fpaths.append(fpath)
image = Image.open(fpath)
data = np.array(image) / 255.0
label = int(fname.split("_")[0])
datas.append(data)
labels.append(label)
datas = np.array(datas)
labels = np.array(labels)
print("shape of datas: {}\tshape of labels: {}".format(datas.shape, labels.shape))
return fpaths, datas, labels
fpaths, datas, labels = read_data(test_dir)
num_classes = len(set(labels))
# 存放输入和标签
datas_placeholder = tf.placeholder(tf.float32, [None, 32, 32, 3])
labels_placeholder = tf.placeholder(tf.int32, [None])
# 存放DropOut
dropout_placeholdr = tf.placeholder(tf.float32)
# 卷积层
conv0 = tf.layers.conv2d(datas_placeholder, 20, 5, activation=tf.nn.relu)
# 池化
pool0 = tf.layers.max_pooling2d(conv0, [2, 2], [2, 2])
# 卷积层
conv1 = tf.layers.conv2d(pool0, 40, 4, activation=tf.nn.relu)
# 池化
pool1 = tf.layers.max_pooling2d(conv1, [2, 2], [2, 2])
# 将3维特征转换为1维向量
flatten = tf.layers.flatten(pool1)
# 全连接层
fc = tf.layers.dense(flatten, 400, activation=tf.nn.relu)
# DropOut层
dropout_fc = tf.layers.dropout(fc, dropout_placeholdr)
# 输出层
logits = tf.layers.dense(dropout_fc, num_classes)
predicted_labels = tf.arg_max(logits, 1)
# 交叉定义损失
losses = tf.nn.softmax_cross_entropy_with_logits(
labels=tf.one_hot(labels_placeholder, num_classes),
logits=logits
)
# 平均损失
mean_loss = tf.reduce_mean(losses)
# 定义优化器,指定要优化的损失函数
optimizer = tf.train.AdamOptimizer(learning_rate=1e-2).minimize(losses)
# 用于保存和载入模型
saver = tf.train.Saver()
with tf.Session() as sess:
if test:
print("测试")
saver.restore(sess, model_path)
print("从{}载入模型".format(model_path))
# label和名称的对照关系
label_name_dict = {
0:"飞机",
1:"汽车",
2:"鸟",
3:"猫",
4:"鹿",
5:"狗",
6:"青蛙",
7:"马",
8:"船",
9:"卡车"
}
# 定义输入和Label
test_feed_dict = {
datas_placeholder: datas,
labels_placeholder: labels,
dropout_placeholdr: 0
}
predicted_labels_val = sess.run(predicted_labels, feed_dict=test_feed_dict)
# 真实label与模型预测label
for fpath, real_label, predicted_label in zip(fpaths, labels, predicted_labels_val):
# 将label id转换为label名
real_label_name = label_name_dict[real_label]
predicted_label_name = label_name_dict[predicted_label]
print("{}\t{} => {}".format(fpath, real_label_name, predicted_label_name))