一文了解深度学习实战——分类篇

本文将从两个案例 MNIST手写数字识别狗的品种识别 入手,让童鞋们从实战角度快速入门深度学习的分类部分!

目录

  • MNIST手写数字识别
    • TensorFlow搭建MLP
    • TensorFlow搭建CNN
    • Keras搭建MLP
    • Keras搭建CNN
  • 狗的品种识别
    • Keras搭建CNN
    • 迁移学习(InceptionV3)

MNIST手写数字识别

TensorFlow搭建MLP

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

# 下载数据集
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

print("训练集图像大小:{}".format(mnist.train.images.shape))
print("训练集标签大小:{}".format(mnist.train.labels.shape))
print("验证集图像大小:{}".format(mnist.validation.images.shape))
print("验证集标签大小:{}".format(mnist.validation.labels.shape))
print("测试集图像大小:{}".format(mnist.test.images.shape))
print("测试集标签大小:{}".format(mnist.test.labels.shape))

# 为了便于读取,我们把数据集先各自使用一个变量指向它们
x_train, y_train = mnist.train.images, mnist.train.labels
x_valid, y_valid = mnist.validation.images, mnist.validation.labels
x_test, y_test = mnist.test.images, mnist.test.labels

# 绘制和显示前5个训练集的图像 
fig = plt.figure(figsize=(10, 10))
for i in range(5):
    ax = fig.add_subplot(1, 5, i+1, xticks=[], yticks=[])
    ax.imshow(np.reshape(x_train[i:i+1], (28, 28)), cmap='gray')
# 绘制和显示前(2*12)之后的五个训练集的图像 
fig = plt.figure(figsize=(10, 10))
for i in range(5):
    ax = fig.add_subplot(1, 5, i+1, xticks=[], yticks=[])
    ax.imshow(np.reshape(x_train[i+2*12:i+1+2*12], (28, 28)), cmap='gray')

# 定义可视化图像的函数,传入一个图像向量和figure对象
def visualize_input(img, ax):
    # 绘制并输出图像
    ax.imshow(img, cmap='gray')
    
    # 对于该图像的宽和高,我们输出它们的具体的数值,
    # 以便于我们更清晰的知道计算机是如何看待一张图像的
    width, height = img.shape
    
    # 将图像中的具体数值转换成0-1之间的值
    thresh = img.max()/2.5 
    # 遍历行
    for x in range(width):
        # 遍历列
        for y in range(height):
            # 将图像的数值在它对应的位置上标出,且水平垂直居中
            ax.annotate(str(round(img[x][y],2)), xy=(y,x),
                        horizontalalignment='center',
                        verticalalignment='center',
                        color='white' if img[x][y]<thresh else 'black')

fig = plt.figure(figsize=(10, 10)) 
ax = fig.add_subplot(111)
# 假设我们就取出下标为5的样本来作为例子
visualize_input(np.reshape(x_train[5:6], (28, 28)), ax)


import math
#模型搭建和训练
# 参数准备
img_size = 28 * 28
num_classes = 10
learning_rate = 0.1
epochs = 100
batch_size = 128

# 创建模型
# x表示输入,创建输入占位符,该占位符会在训练时,会对每次迭代的数据进行填充上
x = tf.placeholder(tf.float32, [None, img_size])

# W表示weight,创建权重,初始化时都是为0,它的大小是(图像的向量大小,图像的总类别)
W = tf.Variable(tf.zeros([img_size, num_classes]))

# b表示bias,创建偏移项
b = tf.Variable(tf.zeros([num_classes]))

# y表示计算输出结果,softmax表示激活函数是多类别分类的输出
# 感知器的计算公式就是:(x * W) + b
y = tf.nn.softmax(tf.matmul(x, W) + b)

# 定义输出预测占位符y_
y_ = tf.placeholder(tf.float32, [None, 10])

valid_feed_dict = { x: x_valid, y_: y_valid  }
test_feed_dict = { x: x_test, y_: y_test }

# 通过激活函数softmax的交叉熵来定义损失函数
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
# 定义梯度下降优化器
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

# 比较正确的预测结果
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
# 计算预测准确率
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
iteration = 0
# 定义训练时的检查点
saver = tf.train.Saver()

# 创建一个TensorFlow的会话
with tf.Session() as sess:
  
    # 初始化全局变量
    sess.run(tf.global_variables_initializer())
        
    # 根据每批次训练128个样本,计算出一共需要迭代多少次
    batch_count = int(math.ceil(mnist.train.labels.shape[0] / 128.0))
    
    # 开始迭代训练样本
    for e in range(epochs):
        
        # 每个样本都需要在TensorFlow的会话里进行运算,训练
        for batch_i in range(batch_count):
          
            # 样本的索引,间隔是128个
            batch_start = batch_i * batch_size
            # 取出图像样本
            batch_x = mnist.train.images[batch_start:batch_start+batch_size]
            # 取出图像对应的标签
            batch_y = mnist.train.labels[batch_start:batch_start+batch_size]
            # 训练模型
            loss, _ = sess.run([cost, optimizer], feed_dict={x: batch_x, y_: batch_y})
            
            # 每20个批次时输出一次训练损失等日志信息
            if batch_i % 20 == 0:
                print("Epoch: {}/{}".format(e+1, epochs), 
                      "Iteration: {}".format(iteration), 
                      "Training loss: {:.5f}".format(loss))
            iteration += 1

            # 每128个样本时,验证一下训练的效果如何,并输出日志信息
            if iteration % batch_size == 0:
                valid_acc = sess.run(accuracy, feed_dict=valid_feed_dict)
                print("Epoch: {}/{}".format(e, epochs),
                      "Iteration: {}".format(iteration),
                      "Validation Accuracy: {:.5f}".format(valid_acc))
    
    # 保存训练模型的检查点
    saver.save(sess, "checkpoints/mnist_mlp_tf.ckpt")

# 预测测试数据集精确度
saver = tf.train.Saver()
with tf.Session() as sess:
    # 从训练模型的检查点恢复
    saver.restore(sess, tf.train.latest_checkpoint('checkpoints'))
    
    # 预测测试集精确度
    test_acc = sess.run(accuracy, feed_dict=test_feed_dict)
    print("test accuracy: {:.5f}".format(test_acc))

TensorFlow搭建CNN

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# 下载并加载数据集
mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)

# 为了便于读取,我们把数据集先各自使用一个变量指向它们
x_train, y_train = mnist.train.images, mnist.train.labels
x_valid, y_valid = mnist.validation.images, mnist.validation.labels
x_test, y_test = mnist.test.images, mnist.test.labels

print("训练集图像大小:{}".format(x_train.shape))
print("训练集标签大小:{}".format(y_train.shape))
print("验证集图像大小:{}".format(x_valid.shape))
print("验证集标签大小:{}".format(y_valid.shape))
print("测试集图像大小:{}".format(x_test.shape))
print("测试集标签大小:{}".format(y_test.shape))

# 参数准备
img_size = 28 * 28
num_classes = 10
learning_rate = 1e-4
epochs = 10
batch_size = 50

# 定义输入占位符
x = tf.placeholder(tf.float32, shape=[None, img_size])
x_shaped = tf.reshape(x, [-1, 28, 28, 1])

# 定义输出占位符
y = tf.placeholder(tf.float32, shape=[None, num_classes])

# 定义卷积函数
def create_conv2d(input_data, num_input_channels, num_filters, filter_shape, pool_shape, name):
    # 卷积的过滤器大小结构是[filter_height, filter_width, in_channels, out_channels]
    conv_filter_shape = [filter_shape[0], filter_shape[1], num_input_channels, num_filters]
    
    # 定义权重Tensor变量,初始化时是截断正态分布,标准差是0.03
    weights = tf.Variable(tf.truncated_normal(conv_filter_shape, stddev=0.03), name=name+"_W")
    
    # 定义偏移项Tensor变量,初始化时是截断正态分布
    bias = tf.Variable(tf.truncated_normal([num_filters]), name=name+"_b")
    
    # 定义卷积层
    out_layer = tf.nn.conv2d(input_data, weights, (1, 1, 1, 1), padding="SAME")
    out_layer += bias
    # 通过激活函数ReLU来计算输出
    out_layer = tf.nn.relu(out_layer)
    # 添加最大池化层
    out_layer = tf.nn.max_pool(out_layer, ksize=(1, pool_shape[0], pool_shape[1], 1), strides=(1, 2, 2, 1), padding="SAME")
    return out_layer

# 添加第一层卷积层
layer1 = create_conv2d(x_shaped, 1, 32, (5, 5), (2, 2), name="layer1")
# 添加第二层卷积层
layer2 = create_conv2d(layer1, 32, 64, (5, 5), (2, 2), name="layer2")
# 添加扁平化层
flattened = tf.reshape(layer2, (-1, 7 * 7 * 64))

# 添加全连接层
wd1 = tf.Variable(tf.truncated_normal((7 * 7 * 64, 1000), stddev=0.03), name="wd1")
bd1 = tf.Variable(tf.truncated_normal([1000], stddev=0.01), name="bd1")
dense_layer1 = tf.add(tf.matmul(flattened, wd1), bd1)
dense_layer1 = tf.nn.relu(dense_layer1)

# 添加输出全连接层
wd2 = tf.Variable(tf.truncated_normal((1000, num_classes), stddev=0.03), name="wd2")
bd2 = tf.Variable(tf.truncated_normal([num_classes], stddev=0.01), name="bd2")
dense_layer2 = tf.add(tf.matmul(dense_layer1, wd2), bd2)

# 添加激活函数的softmax输出层
y_ = tf.nn.softmax(dense_layer2)

# 通过softmax交叉熵定义计算损失值
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_, labels=y))
# 定义优化器是Adam
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# 定义预测结果的比较
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
# 定义预测的精确度
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

iteration = 0


import math

# 定义要保存训练模型的变量
saver = tf.train.Saver()

# 创建TensorFlow会话
with tf.Session() as sess:
  
    # 初始化TensorFlow的全局变量
    sess.run(tf.global_variables_initializer())
    
    # 计算所有的训练集需要被训练多少次,当每批次是batch_size个时
    batch_count = int(math.ceil(x_train.shape[0] / float(batch_size)))
    
    # 要迭代epochs次训练
    for e in range(epochs):
        # 对每张图像进行训练
        for batch_i in range(batch_count):
            # 每次取出batch_size张图像
            batch_x, batch_y = mnist.train.next_batch(batch_size=batch_size)
            # 训练模型
            _, loss = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
            
            # 每训练20次图像时打印一次日志信息,也就是20次乘以batch_size个图像已经被训练了
            if batch_i % 20 == 0:
                print("Epoch: {}/{}".format(e+1, epochs), 
                      "Iteration: {}".format(iteration), 
                      "Training loss: {:.5f}".format(loss))
            iteration += 1
            
            # 每迭代一次时,做一次验证,并打印日志信息
            if iteration % batch_size == 0:
                valid_acc = sess.run(accuracy, feed_dict={x: x_valid, y: y_valid})
                print("Epoch: {}/{}".format(e, epochs),
                      "Iteration: {}".format(iteration),
                      "Validation Accuracy: {:.5f}".format(valid_acc))

    # 保存模型的检查点
    saver.save(sess, "checkpoints/mnist_cnn_tf.ckpt")

# 预测测试数据集
saver = tf.train.Saver()
with tf.Session() as sess:
    # 从TensorFlow会话中恢复之前保存的模型检查点
    saver.restore(sess, tf.train.latest_checkpoint('checkpoints/'))
    
    # 通过测试集预测精确度
    test_acc = sess.run(accuracy, feed_dict={x: x_test, y: y_test})
    print("test accuracy: {:.5f}".format(test_acc))

Keras搭建MLP

import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import RMSprop

# 参数准备
batch_size = 128
num_classes = 10
epochs = 20
img_size = 28 * 28

# 下载并读取MNIST数据集数据
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# 分割验证集数据
valid_len = 5000
x_len = x_train.shape[0]
train_len = x_len-valid_len

# 验证集数据
x_valid = x_train[train_len:]
y_valid = y_train[train_len:]

# 训练集数据
x_train = x_train[:train_len]
y_train = y_train[:train_len]

# 将训练集、验证集和测试集数据进行图像向量转换
x_train = x_train.reshape(x_train.shape[0], img_size)
x_valid = x_valid.reshape(x_valid.shape[0], img_size)
x_test = x_test.reshape(x_test.shape[0], img_size)

# 将训练集、验证集和测试集数据都转换成float32类型
x_train = x_train.astype('float32')
x_valid = x_valid.astype('float32')
x_test = x_test.astype('float32')

# 将训练集、验证集和测试集数据都转换成0到1之间的数值,就是归一化处理
x_train /= 255
x_valid /= 255
x_test /= 255

# 通过to_categorical()函数将训练集标签、验证集标签和测试集标签独热编码(one-hot encoding)
y_train = keras.utils.to_categorical(y_train, num_classes)
y_valid = keras.utils.to_categorical(y_valid, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# 创建模型
model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(img_size,)))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))
# 模型架构预览
model.summary()

# 编译模型
model.compile(loss='categorical_crossentropy', optimizer=RMSprop(), metrics=['accuracy'])
# 训练模型
model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, verbose=1, validation_data=(x_valid, y_valid))

# 评估模型
score = model.evaluate(x_test, y_test, verbose=0)
print('Test accuracy:{}, Test loss: {}, {}'.format(score[1], score[0], score))


#模型预测,画图
import matplotlib.pyplot as plt
import numpy as np
x_img = x_test[7:8]
# 预测单张图像的概率
prediction = model.predict(x_img)
x_coordinates = np.arange(prediction.shape[1])
plt.bar(x_coordinates, prediction[0][:])
plt.xticks(x_coordinates, np.arange(10))
plt.show()

Keras搭建CNN

import numpy as np
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras import utils

# 参数准备
batch_size = 128
epochs = 15
num_classes = 10

img_width = 28
img_height = 28
img_channels = 1

# 下载并读取MNIST数据集数据
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# 分割验证集数据
valid_len = 5000
x_len = x_train.shape[0]
train_len = x_len-valid_len

# 验证集数据
x_valid = x_train[train_len:]
y_valid = y_train[train_len:]

# 训练集数据
x_train = x_train[:train_len]
y_train = y_train[:train_len]

# 将训练集、验证集和测试集数据进行图像转换,
# 图像的形状大小是 [batch, height, width, channels]
x_train = x_train.reshape(x_train.shape[0], img_height, img_width, img_channels)
x_valid = x_valid.reshape(x_valid.shape[0], img_height, img_width, img_channels)
x_test = x_test.reshape(x_test.shape[0], img_height, img_width, img_channels)

# 将训练集、验证集和测试集数据都转换成float32类型
x_train = x_train.astype(np.float32)
x_valid = x_valid.astype(np.float32)
x_test = x_test.astype(np.float32)

# 将训练集、验证集和测试集数据都转换成0到1之间的数值,就是归一化处理
x_train /= 255
x_valid /= 255
x_test /= 255

# 通过to_categorical()函数将训练集标签、验证集标签和测试集标签独热编码(one-hot encoding)
y_train = keras.utils.to_categorical(y_train, num_classes)
y_valid = keras.utils.to_categorical(y_valid, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# 创建模型
model = Sequential()
model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(img_width, img_height, img_channels)))
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
# 模型架构预览
model.summary()

# 编译模型
model.compile(loss=keras.losses.categorical_crossentropy, 
              optimizer=keras.optimizers.Adadelta(), metrics=['accuracy'])

# 训练模型
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, 
          verbose=1, validation_data=(x_valid, y_valid))

# 评估模型
score = model.evaluate(x_test, y_test, verbose=0)
print("Test Loss: {:.5f}, Test Accuracy: {:.5f}".format(score[0], score[1]))

# 单张图像预测
import matplotlib.pyplot as plt

# 取出第一张图像
x_img = x_test[0:1]
# 通过模型预测
prediction = model.predict(x_img)

# 绘制图展示
x_coordinate = np.arange(prediction.shape[1])
plt.bar(x_coordinate, prediction[0][:])
plt.xticks(x_coordinate, np.arange(10))
plt.show()

print("预测的图中的数字是{}。".format(y_test[0:1]))

 

狗的品种识别

狗狗图片数据:
链接:https://pan.baidu.com/s/1cEgg2aqXvAvI58M8EAS9CQ 密码:8ahu

Keras搭建CNN

from sklearn.datasets import load_files       
from sklearn.model_selection import train_test_split
from keras.utils import np_utils
from keras.preprocessing import image   
import numpy as np
from glob import glob
import matplotlib.pyplot as plt
from matplotlib import image
import tqdm

# 共有120种狗狗的品种
num_classes = 120

# 定义加载数据集的函数
def load_dataset(path):
    # 通过sklearn提供的load_files()方法加载文件
    # 返回一个类字典对象,包含文件相对路径和文件所属编号
    data = load_files(path)
    # 将文件路径转变成NumPy对象
    dog_files = np.array(data['filenames'])
    # 狗狗的每张图片都按照顺序排成列表
    raw_targets = np.array(data['target'])
    # 通过to_categorical()方法将文件所属编号转换成二进制类别矩阵(就是one-hot encoding)
    dog_targets = np_utils.to_categorical(raw_targets, num_classes)
    # 返回所有图片文件路径,图片文件编号和图片文件的二进制类别矩阵
    return dog_files, raw_targets, dog_targets
  
# 加载数据集
dog_filepaths, dog_raw_targets, dog_targets = load_dataset('Images/')

# 加载狗狗的品种名称列表
# glob是一个文件操作相关的模块,通过指定的匹配模式,返回相应的文件或文件夹路径
# 这里的操作就是返回Images目录下的所有文件夹
# 最后通过列表推导式遍历每个文件路径字符串,并截取狗狗类别名称那段字符串
dogpath_prefix_len = len('Images/n02085620-')
dog_names = [item[dogpath_prefix_len:] for item in sorted(glob("Images/*"))]

print('狗狗的品种有{}种。'.format(len(dog_names)))
print('狗狗的图片一共有{}张。\n'.format(len(dog_filepaths)))


# 为了训练更快些,也考虑到一些读者的本地机器性能不高,我们就用前9000张狗狗的图片吧
# 如果读者的机器性能还不错,那就注释这两行,直接训练所有的图片数据
dog_filepaths = dog_filepaths[:9000]
dog_targets = dog_targets[:9000]

# 分割训练数据集和测试数据集
X_train, X_test, y_train, y_test = train_test_split(dog_filepaths, dog_targets, test_size=0.2)

# 将测试集数据分割一半给验证集
half_test_count = int(len(X_test) / 2)
X_valid = X_test[:half_test_count]
y_valid = y_test[:half_test_count]

X_test = X_test[half_test_count:]
y_test = y_test[half_test_count:]

print("X_train.shape={}, y_train.shape={}.".format(X_train.shape, y_train.shape))
print("X_valid.shape={}, y_valid.shape={}.".format(X_valid.shape, y_valid.shape))
print("X_test.shape={}, y_test.shape={}.".format(X_test.shape, y_test.shape))

# 设置matplotlib在绘图时的默认样式
plt.style.use('default')


# 查看随机9张狗狗的图像
def draw_random_9_dog_images():
    # 创建9个绘图对象,3行3列
    fig, axes = plt.subplots(nrows=3, ncols=3)
    # 设置绘图的总容器大小
    fig.set_size_inches(10, 9)

    # 随机选择9个数,也就是9个品种的狗(可能重复,且每次都不一样)
    random_9_nums = np.random.choice(len(X_train), 9)
    # 从训练集中选出9张图
    random_9_imgs = X_train[random_9_nums]
    print(random_9_imgs)

    # 根据这随机的9张图片路径,截取取得相应的狗狗品种名称
    imgname_list = []
    for imgpath in random_9_imgs:
        imgname = imgpath[dogpath_prefix_len:] 
        imgname = imgname[:imgname.find('/')]
        imgname_list.append(imgname)

    index = 0
    for row_index in range(3): # 行
        for col_index in range(3): # 列
            # 读取图片的数值内容
            img = image.imread(random_9_imgs[index])
            # 获取绘图Axes对象,根据[行索引, 列索引]
            ax = axes[row_index, col_index]
            # 在Axes对象上显示图像
            ax.imshow(img)
            # 在绘图对象上设置狗狗品种名称
            ax.set_xlabel(imgname_list[index])
            # 索引加1
            index += 1
            
draw_random_9_dog_images()

# 对数据集进行遍历,读取每张图片,并获取它的大小,
# 最后返回的图片shape存储在变量dogs_shape_list列表里
dogs_shape_list = []
for filepath in dog_filepaths:
    shape = image.imread(filepath).shape
    if len(shape) == 3:
        dogs_shape_list.append(shape)
             
dogs_shapes = np.asarray(dogs_shape_list)

print("总共{}张。".format(len(dogs_shapes)))
print("随机抽取三张图片的维度是{}。".format(dogs_shapes[np.random.choice(len(dogs_shapes), 3)]))

dogs_mean_width = np.mean(dogs_shapes[:,0])
dogs_mean_height = np.mean(dogs_shapes[:,1])
print("狗狗的图片的平均宽:{:.1f} * 平均高:{:.1f}。".format(dogs_mean_width, dogs_mean_height))

# 定义一个函数,将每张图片都转换成标准大小(1, 224, 224, 3)
def path_to_tensor(img_path):
    # 加载图片
    # 图片对象的加载用的是PIL库,通过load_img()方法返回的就是一个PIL对象
    img = image.load_img(img_path, target_size=(224, 224, 3))
    # 将PIL图片对象类型转化为格式(224, 224, 3)的3维张量
    x = image.img_to_array(img)
    # 将3维张量转化格式为(1, 224, 224, 3)的4维张量并返回
    return np.expand_dims(x, axis=0)

# 定义一个函数,将数组里的所有路径的图片都转换成图像数值类型并返回
def paths_to_tensor(img_paths):
    # tqdm模块表示使用进度条显示,传入一个所有图片的数组对象
    # 将所有图片的对象一个个都转换成numpy数值对象张量后,并返回成数组
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    # 将对象垂直堆砌排序摆放
    return np.vstack(list_of_tensors)


from PIL import ImageFile 
# 为了防止PIL读取图片对象时出现IO错误,则设置截断图片为True
ImageFile.LOAD_TRUNCATED_IMAGES = True                 

# 将所有图片都转换成标准大小的数值图像对象,然后除以255,进行归一化处理
# RGB的颜色值,最大为255,最小为0
# 对训练集数据进行处理
train_tensors = paths_to_tensor(X_train).astype(np.float32) / 255
# 对验证集数据进行处理
valid_tensors = paths_to_tensor(X_valid).astype(np.float32) / 255
# 对测试集数据进行处理
test_tensors = paths_to_tensor(X_test).astype(np.float32) / 255


from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential

# 创建Sequential模型
model = Sequential()

# 创建输入层,输入层必须传入input_shape参数以表示图像大小,深度是16
model.add(Conv2D(filters=16, kernel_size=(2, 2), strides=(1, 1), padding='same', 
                 activation='relu', input_shape=train_tensors.shape[1:]))
# 添加最大池化层,大小为2x2,有效范围默认是valid,就是说,不够2x2的大小的空间数据就丢弃了
model.add(MaxPooling2D(pool_size=(2, 2)))
# 添加Dropout层,每次丢弃20%的网络节点,防止过拟合
model.add(Dropout(0.2))

# 添加卷积层,深度是32,内核大小是2x2,跨步是1x1,有效范围是same则表示不够数据范围的就用0填充
model.add(Conv2D(filters=32, kernel_size=(2, 2), strides=(1, 1), padding='same', activation='relu'))
# 添加最大池化层,大小为2x2,有效范围默认是valid,就是说,不够2x2的大小的空间数据就丢弃了
model.add(MaxPooling2D(pool_size=(2, 2)))
# 添加Dropout层,每次丢弃20%的网络节点,防止过拟合
model.add(Dropout(0.2))

# 添加卷积层,深度是64
model.add(Conv2D(filters=64, kernel_size=(2, 2), strides=(1, 1), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

# 添加全局平均池化层
model.add(GlobalAveragePooling2D())
# 添加Dropout,每次丢弃50%
model.add(Dropout(0.5))
# 添加输出层,120个类别输出
model.add(Dense(num_classes, activation="softmax"))
                 
# 打印输出网络模型架构
model.summary()

# 编译模型
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

from keras.callbacks import ModelCheckpoint 

epochs = 20
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.from_scratch.hdf5', 
                               verbose=1, 
                               save_best_only=True)

model.fit(train_tensors, 
          y_train, 
          validation_data=(valid_tensors, y_valid),
          epochs=epochs, 
          batch_size=20, 
          callbacks=[checkpointer], 
          verbose=1)

## 加载具有最好验证权重的模型
model.load_weights('saved_models/weights.best.from_scratch.hdf5')

# 获取测试数据集中每一个图像所预测的狗品种的index
dog_breed_predictions = [np.argmax(model.predict(np.expand_dims(tensor, axis=0))) for tensor in test_tensors]

# 测试准确率
test_accuracy = 100*np.sum(np.array(dog_breed_predictions)==np.argmax(y_test, axis=1))/len(dog_breed_predictions)
print('Test Accuracy: {:.4f}'.format(test_accuracy))

#结果发现准确率很低很低,这是我们需要迁移学习

 

迁移学习(InceptionV3)


# 导入InceptionV3预训练模型和数据处理模块
from keras.applications.inception_v3 import InceptionV3, preprocess_input, decode_predictions
# 导入构建Keras的Model所需模块
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D, Dropout
from keras.preprocessing import image
from keras.optimizers import SGD
from keras.callbacks import ModelCheckpoint  
# 导入图片数据增强生成器
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt


class InceptionV3Retrained:
    """
    定义一个类,用来在预训练模型上去训练新的数据
    """
    

    def add_new_last_layers(self, base_model, num_classes):
        """
        添加新的全连接层
        """
        # 添加一个全局空间平均池化层
        x = base_model.output
        x = GlobalAveragePooling2D()(x)

        # 添加1024个全连接层
        x = Dense(1024, activation='relu')(x)

        # 添加全连接输出层,有num_classes个类别输出,使用softmax多类别分类激活函数
        predictions = Dense(num_classes, activation='softmax')(x)

        # 通过上面定义的base_model对象和它的输出层
        # 我们自定义创建一个新的Keras的Model模型对象
        model = Model(input=base_model.input, output=predictions)
        return model


    def freeze_previous_layers(self, model, base_model):
        """
        冻结预训练模型之前的层
        """
        # 冻结InceptionV3模型的所有卷积层,因为我们迁移学习就是对顶部的几个层进行训练
        for layer in base_model.layers:
            layer.trainable = False

        # 编译模型
        # 优化器rmsprop,参数使用默认值即可
        # 分类交叉熵使用多类别的
        model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])


    def fine_tune_model(self, model):
        """
        微调模型
        """
        # 我们冻结模型的前面172层,然后把剩下的层数都解冻
        for layer in model.layers[:172]:
            layer.trainable = False
        for layer in model.layers[172:]:
            layer.trainable = True

        # 再编译模型
        # 优化器使用随机梯度下降,学习率我们调小点0.0001
        # 分类交叉熵依旧使用多类别的
        model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])
  
  
    def plot_training(self, history):
        """
        绘制训练模型时的损失值和精确度
        """
        # 取出训练时的精确度
        acc = history.history['acc']
        # 取出验证时的精确度
        val_acc = history.history['val_acc']
        # 取出训练时的损失值
        loss = history.history['loss']
        # 取出验证时的损失值
        val_loss = history.history['val_loss']
        # 根据精确度的个数,就可以得知训练了多少次
        epochs = range(len(acc))

        # 绘制训练精确度和验证精确度
        plt.plot(epochs, acc, 'r.')
        plt.plot(epochs, val_acc, 'r')
        plt.title('Training and validation accuracy')

        # 绘制训练损失和验证损失
        plt.figure()
        plt.plot(epochs, loss, 'r.')
        plt.plot(epochs, val_loss, 'r-')
        plt.title('Training and validation loss')
        plt.show()


    def train(self, num_classes, batch_size, epochs):
        """
        训练模型
        """

        # 定义训练数据增强生成器
        # 参数preprocessing_function表示每次输入都进行预处理
        # 参数rotation_range表示图像随机旋转的度数范围
        # 参数width_shift_range表示图像的宽度可移动范围
        # 参数height_shift_range表示图像的高度可移动范围
        # 参数shear_range表示逆时针方向剪切角度
        # 参数zoom_range表示随机缩放的角度值
        # 参数horizontal_flip表示是否水平翻转
        train_datagen = ImageDataGenerator(
          preprocessing_function=preprocess_input,
          rotation_range=20,
          width_shift_range=0.2,
          height_shift_range=0.2,
          shear_range=0.2,
          zoom_range=0.2,
          horizontal_flip=True
        )
        
        # 定义验证数据增强生成器
        valid_datagen = ImageDataGenerator(
          preprocessing_function=preprocess_input,
          rotation_range=20,
          width_shift_range=0.2,
          height_shift_range=0.2,
          shear_range=0.2,
          zoom_range=0.2,
          horizontal_flip=True
        )

        # 训练数据增强
        train_generator = train_datagen.flow(train_tensors, y_train, batch_size=batch_size)
        # 验证数据增强
        validation_generator = valid_datagen.flow(valid_tensors, y_valid, batch_size=batch_size)

        # 初始化InceptionV3模型
        # include_top=False表示初始化模型时不包含InceptionV3网络结构层中的最后的全连接层
        base_model = InceptionV3(weights='imagenet', include_top=False)  
        
        # 添加新的全连接层
        model = self.add_new_last_layers(base_model, num_classes)

        # 冻结刚创建的InceptionV3的模型的所有卷积层
        self.freeze_previous_layers(model, base_model)
        
        # 定义模型检查点,只保存最佳的
        checkpointer = ModelCheckpoint(filepath='inception_v3.dogs.133.best.weights.h5', 
                                       verbose=1, 
                                       save_best_only=True)

        print("首次训练模型")
        # 在新数据集上训练模型
        history_tl = model.fit_generator(train_generator, 
                          steps_per_epoch=train_tensors.shape[0] / batch_size, 
                          validation_steps=valid_tensors.shape[0] / batch_size, 
                          epochs=epochs,
                          verbose=1, 
                          callbacks=[checkpointer], 
                          validation_data=validation_generator)

        # 微调模型
        self.fine_tune_model(model)

        print("微调模型后,再次训练模型")
        # 我们再次训练模型
        history_ft = model.fit_generator(train_generator, 
                          steps_per_epoch=train_tensors.shape[0] / batch_size, 
                          validation_steps=valid_tensors.shape[0] / batch_size,
                          epochs=epochs,
                          verbose=1, 
                          callbacks=[checkpointer], 
                          validation_data=validation_generator)

        # 绘制模型的损失值和精确度
        self.plot_training(history_ft)

# 每批次大小是128
batch_size = 128
# 训练5个批次
epochs = 5

incepV3_model = InceptionV3Retrained()
incepV3_model.train(num_classes, batch_size, epochs)


# 测试模型的精确度
# 创建一个不带全连接层的InceptionV3模型
test_model = InceptionV3(weights='imagenet', include_top=False, input_shape=test_tensors.shape[1:]) 

# 添加全连接层输出层
incepV3_model = InceptionV3Retrained()
trained_model = incepV3_model.add_new_last_layers(test_model, num_classes)

# 加载刚才训练的权重到模型中
trained_model.load_weights("inception_v3.dogs.133.best.weights.h5") 

# 编译模型
trained_model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])

# 通过summary()方法,可以看到完整的InceptionV3的神经网络模型架构
# trained_model.summary()

# 评估模型
score = trained_model.evaluate(test_tensors, y_test, verbose=1)
print("Test {}: {:.2f}. Test {}: {:.2f}.".format(trained_model.metrics_names[0], 
                                                 score[0]*100, 
                                                 trained_model.metrics_names[1], 
                                                 score[1]*100))

# 预测狗狗品种
def predict_dog_breed(model, img_path):
    # 加载图像
    x = load_img(img_path)
    # 图片预处理
    x = preprocess_input(x)
    # 模型预测
    predictions = model.predict(x)
    # 取出预测数值
    prediction_list = predictions[0]

    # 取出最大值索引和最大值
    def get_max_arg_value(prediction_list):
        arg_max = np.argmax(prediction_list)
        max_val = prediction_list[arg_max]
        preds = np.delete(prediction_list, arg_max)
        return preds, arg_max, max_val

    # 取出前3个预测值的最大值索引和最大值
    def get_list_of_max_arg_value(prediction_list):
        preds, argmax1, max1val = get_max_arg_value(prediction_list)
        preds, argmax2, max2val = get_max_arg_value(preds)
        preds, argmax3, max3val = get_max_arg_value(preds)

        top_3_argmax = np.array([argmax1, argmax2, argmax3])
        top_3_max_val = np.array([max1val, max2val, max3val])
        return top_3_argmax, top_3_max_val

    top_3_argmax, top_3_max_val = get_list_of_max_arg_value(prediction_list)
    dog_titles = [dog_names[index] for index in top_3_argmax]

    print('前3个最大值: {}'.format(top_3_max_val))

#     # 如果希望显示直方图,可以取消注释这三行代码
#     plt.barh(np.arange(3), top_3_max_val)
#     plt.yticks(np.arange(3), dog_titles)
#     plt.show()
    
    # 创建绘图对象
    fig, ax = plt.subplots()
    # 设置绘图的总容器大小
    fig.set_size_inches(5, 5)
    # 将最大值乘以100就是百分比
    top_3_max_val *= 100
    # 拼接前三个最大值的字符串
    dog_title = "{}: {:.2f}%\n".format(dog_titles[0], top_3_max_val[0]) + \
                "{}: {:.2f}%\n".format(dog_titles[1], top_3_max_val[1]) + \
                "{}: {:.2f}%\n".format(dog_titles[2], top_3_max_val[2])
    # 在绘图的右上角显示加上识别的值字符串
    ax.text(1.01, 0.8, 
            dog_title, 
            horizontalalignment='left', 
            verticalalignment='bottom',
            transform=ax.transAxes)
    # 读取图片的数值内容
    img = matplotlib.image.imread(img_path)
    # 在Axes对象上显示图像
    ax.imshow(img)

你可能感兴趣的:(人工智能,Python,深度学习,tensorflow,人工智能,神经网络,计算机视觉)