使用卷积神经网络对CIFAR-10数据集进行分类

导入CIFAR数据集

import os
import numpy as np
import pickle as p
'''
根据文件目录结构 共5个训练文件 每个文件有10000条数据
所以批量导入CIFAR数据集 每次为10000条
'''

def load_CIFAR_batch(filename):
    '''load single batch of cifar'''
    with open(filename, 'rb') as f:
        # 一个样本由标签数据和图像数据组成
        data_dict = p.load(f, encoding='bytes')
        images = data_dict[b'data']
        labels = data_dict[b'labels']
        
        # 把原始数据调整为:BCWH B:batch C:channels W:widght H:height
        images = images.reshape(10000, 3, 32, 32)
        # tensorflow 处理图像数据的结构为BWHC 所以需要把通道数据C移动到最后一个维度
        images = images.transpose(0,2,3,1)
        
        labels = np.array(labels)
        
        return images, labels
    
'''
循环批量读取数据 读取全部数据 
'''
def load_CIFAR_data(data_dir):
    images_train = []
    labels_train = []
    
    for i in range(5):
        f = os.path.join(data_dir, 'data_batch_%d' % (i+1))
        print('loading', f)
        
        # 调用load_CIFAR_batch() 获得批量的图像及其对应的标签
        image_batch, label_batch = load_CIFAR_batch(f)
        
        images_train.append(image_batch)
        labels_train.append(label_batch)
        
        Xtrain=np.concatenate(images_train)
        Ytrain=np.concatenate(labels_train)
        
        del image_batch, label_batch
        
    Xtest, Ytest = load_CIFAR_batch(os.path.join(data_dir, 'test_batch'))
    print('finished loading CIFAR-10 data')
    return Xtrain, Ytrain, Xtest, Ytest

data_dir = 'data/cifar-10-batches-py/'
Xtrain,Ytrain,Xtest,Ytest = load_CIFAR_data(data_dir)

显示数据集信息

print('training data shape:', Xtrain.shape)
print('training labels shape:', Ytrain.shape)
print('test data shape:', Xtest.shape)
print('test labels shape:', Ytest.shape)

数据预处理

图像数据预处理

# 图像为RGB三个通道 每个通道的像素值的范围为0-255 
# 因此要将图像进行数字标准化/255 使之范围变为0-1
Xtrain_normalize = Xtrain.astype('float32')/255.0
Xtest_normalize = Xtest.astype('float32')/255.0

Xtest_normalize[0][0][0]

标签数据预处理

独热编码

  • 能够处理非连续型数值特征
  • 在一定程度上也扩充了特征
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(sparse=False)

yy = [[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]]
encoder.fit(yy)

Ytrain_reshape = Ytrain.reshape(-1, 1)
Ytrain_onehot = encoder.transform(Ytrain_reshape)

Ytest_reshape = Ytest.reshape(-1, 1)
Ytest_onehot = encoder.transform(Ytest_reshape)

# 原标签值和onehot编码后的标签值对比
print(Ytrain[:10])
print(Ytrain_onehot[:10])

定义共享函数

import tensorflow as tf
# 定义权值
def weight(shape):
    # 在构建模型时,需要使用tf.Variable来创建一个变量
    # 在训练时,这个变量不断更新
    # 使用函数tf.truncated_normal(截断的正态分布)生成标准差为0.1的随机数来初始化权值
    return tf.Variable(tf.truncated_normal(shape, stddev=0.1), name = "W")

# 定义偏置
# 初始化值为0.1
def bias(shape):
    return tf.Variable(tf.constant(0.1, shape=shape), name = "b")

'''
定义卷积操作
步长为1 padding为’SAME‘
x 表示输入数据
W 表示卷积核
'''
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


'''
定义池化操作
步长为2 即原尺寸的长和宽各除以2
'''
def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

定义网络结构

# 输入层
# 32*32图像, 通道为3 RGB
with tf.name_scope('input_layer'):
    x = tf.placeholder('float', shape=[None, 32, 32, 3], name = "X")

# 第一个卷积层
# 输入通道:3, 输出通道:12(3*4  4个卷积核), 卷积后图像尺寸不变 依然是32*32
with tf.name_scope('conv_1'):
    # k_width k_height input_channels 卷积核个数(output_channels) W1和卷积核相关
    W1 = weight([3, 3, 3, 32])
    b1 = bias([32]) # 与output_channels
    
    # 卷积操作 输出是32*32*32
    conv_1 = conv2d(x, W1) + b1
    conv_1 = tf.nn.relu(conv_1)
    
# 第一个池化层
# 将32*32图像大小缩小为16*16
with tf.name_scope('pool_1'):
    pool_1 = max_pool_2x2(conv_1)
    
# 第二个卷积层
# 输入通道:32 输出通道:64, 卷积后图像尺寸不变
with tf.name_scope('conv_2'):
    W2 = weight([3, 3, 32, 64])
    b2 = bias([64])
    
    conv_2 = conv2d(pool_1, W2) + b2
    conv_2 = tf.nn.relu(conv_2)
    
# 第二个池化层
# 将16*16的图像缩小为8*8
with tf.name_scope('pool_2'):
    pool_2 = max_pool_2x2(conv_2)
    

# 全连接层
# 将第二个池化层的8*8*64的图像转换为一维向量,长度是8*8*64=4096
# 128个神经元
with tf.name_scope('fc'):
    W3 = weight([4096, 128])
    b3 = bias([128])
    flat = tf.reshape(pool_2, [-1, 4096])
    h = tf.nn.relu(tf.matmul(flat, W3) + b3)
    # tf.nn.dropout()是tensorflow里面为了防止或减轻过拟合而使用的函数,它一般用在全连接层
    h_dropout = tf.nn.dropout(h, keep_prob = 0.8)
    
# 输出层 共有10个类别 故输出有10个神经元
with tf.name_scope('output_layer'):
    W4 = weight([128, 10])
    b4 = bias([10])
    forward = tf.matmul(h_dropout, W4) + b4
    pred = tf.nn.softmax(forward)

构建模型

with tf.name_scope('optimizer'):
    # 定义占位符
    y = tf.placeholder('float', shape=[None, 10], name = 'label')
    
    # 定义损失函数
    loss_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=forward, labels=y))
    
    # 选择优化器
    optimizer = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss_function)

# 定义准确率
with tf.name_scope('evaluation'):
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
    

定义超参数 启动会话

import os 
from time import time

train_epochs = 25
batch_size = 50
total_batch = int(len(Xtrain) / batch_size)
display_step = 1
epoch_list = []
accuracy_list = []
loss_list = []
epoch = tf.Variable(0, name='epoch', trainable=False)

startTime = time()

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

迭代训练

for epoch in range(train_epochs):
    for batch in range(total_batch):
        xs, ys = Xtrain_normalize[batch*batch_size:(batch+1)*batch_size], Ytrain_onehot[batch*batch_size:(batch+1)*batch_size]
        sess.run(optimizer, feed_dict={x:xs, y:ys})
    
    loss,acc = sess.run([loss_function, accuracy], feed_dict={x:xs, y:ys})
    
    if (epoch+1) % display_step == 0:
        print("Train Epoch:", "%02d" % (epoch+1), "Loss=", "{:.9f}".format(loss), "Accuracy=", "{:.4f}".format(acc))
 
    
    epoch_list.append(ep+1)
    loss_list.append(loss)
    accuracy_list.append(acc)

duration = time() - startTime

print("Trianing finished takes:", duration)

参考视频:深度学习应用开发TensorFlow实践

你可能感兴趣的:(tensorflow)