导入相关库和定义一些需要用到的变量
import os
import tensorflow as tf
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import numpy as np
import time
import math
num_classes = 10 #一共有10个类别
num_examples_pre_epoch_for_train = 50000 #50000个样本用于训练
num_examples_pre_epoch_for_eval = 10000 #10000个样本用于测试
max_steps = 4000 #训练4000步
batch_size = 100 #每次训练100个样本
num_examples_for_eval = 10000
data_dir = "C:/Users/Administrator/Desktop/Tensorflow/cifar-10-batches-bin" #下载的样本的路径
class CIFAR10Record(object): #定义一个空类,用于返回读取的Cifar-10数据
pass
接着定义一个read_cifar10()函数用于读取文件队列中的数据
def read_cifar10(file_queue): #file_queue为图片路径
result = CIFAR10Record() #创建一个CIFAR10Record对象
label_bytes = 1 #标签占一个字节
result.height = 32 #图像高为32像素
result.width = 32 #图像宽为32像素
result.depth = 3 #因为是RGB三通道,所以深度为3
image_bytes = result.height * result.width * result.depth #结果为3072,即一幅图像的大小为3072字节
record_bytes = label_bytes + image_bytes #加上标签,即一个样本一共有3073字节
reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) #使用FixedLengthRecordReader类创建一个用于读取固定长度字节数信息的对象(针对bin文件而言)
result.key, value = reader.read(file_queue) #使用该类的read()方法读取指定路径下的文件
#这里得到的value就是record_bytes长度的包含多个label数据和image数据的字符串
record_bytes = tf.decode_raw(value, tf.uint8)
#decode_raw()可以将字符串解析成图像对应的像素数组
#strided_slice(input,begin,end)用于对输入的input截取[begin,end)区间的数据
result.label = tf.cast(tf.strided_slice(record_bytes, [0], [label_bytes]), tf.int32)
#这里把record_bytes的第一个元素截取下来然后转换成int32类型的数
#剪切label之后剩下的就是图片数据,我们将这些数据的格式从[depth*height*width]转换为[depth,height,width]
depth_major = tf.reshape(tf.strided_slice(record_bytes, [label_bytes], [label_bytes + image_bytes]),
[result.depth, result.height, result.width])
#将[depth,height,width]的格式转换为[height,width,depth]的格式
result.uint8image = tf.transpose(depth_major, [1, 2, 0])
return result
紧接着read_cifar10()函数的是inputs函数,这个函数用于构建文件路径,将构建的文件路径传给read_cifar10()函数读取样本,对读取到的样本进行数据增强处理。
def inputs(data_dir, batch_size, distorted): #data_dir为文件路径,batch_size为读取批量大小,distorted是否对样本进行增强处理
#拼接文件名路径列表
filenames = [os.path.join(data_dir, "data_batch_%d.bin" % i) for i in range(1, 6)]
#创建一个文件队列,并调用read_cifar10()函数读取队列中的文件,在后面还要调用一个tf.train.start_queue_runners()函数才开始读取图片
file_queue = tf.train.string_input_producer(filenames)
read_input = read_cifar10(file_queue)
#使用tf.cast()对图片数据进行转换
reshaped_image = tf.cast(read_input.uint8image, tf.float32)
num_examples_per_epoch = num_examples_pre_epoch_for_train
if distorted != None:
#将[32,32,3]大小的图片随机剪裁成[24,24,3]的大小
cropped_image = tf.random_crop(reshaped_image, [24, 24, 3])
#随机左右翻转图片
flipped_image = tf.image.random_flip_left_right(cropped_image)
#随机调整亮度
adjusted_brightness = tf.image.random_brightness(flipped_image, max_delta=0.8)
#随机调整对比度
adjusted_contrast = tf.image.random_contrast(adjusted_brightness, lower=0.2, upper=1.8)
#对图片每一像素减去平均值并除以像素方差
float_image = tf.image.per_image_standardization(adjusted_contrast)
#设置图片及标签的形状
float_image.set_shape([24, 24, 3])
read_input.label.set_shape([1])
min_queue_examples = int(num_examples_pre_epoch_for_eval * 0.4)
print("Filling queue with %d CIFAR image before starting to train.""This will take a few minutes." % min_queue_examples)
#shuffle_batch()函数通过随机打乱张量的顺序创建批次.
images_train, labels_train = tf.train.shuffle_batch([float_image, read_input.label], batch_size=batch_size,
num_threads=16,
capacity=min_queue_examples + 3 * batch_size,
min_after_dequeue=min_queue_examples)
return images_train, tf.reshape(labels_train, [batch_size])
#不对图像进行数据增强处理
else:
resized_image = tf.image.resize_image_with_crop_or_pad(reshaped_image, 24, 24)
float_image = tf.image.per_image_standardization(resized_image)
float_image.set_shape([24, 24, 3])
read_input.label.set_shape([1])
min_queue_examples = int(num_examples_per_epoch * 0.4)
images_test, labels_test = tf.train.batch([float_image, read_input.label], batch_size=batch_size,
num_threads=16, capacity=min_queue_examples + 3 * batch_size)
return images_test, tf.reshape(labels_test, [batch_size])
导入相关库和定义一些变量
import tensorflow as tf
import numpy as np
import time
import math
import Cifar10_data
max_steps=300000 #最大步数,其实到15万步的时候loss率就可以达到0了,可以自行调整
batch_size=32 #如果显存不足的话可以调小一点,12或者10以下也行
data_dir="/content/drive/My Drive/cifar-10-batches-bin/" #cifar-10数据保存路径
def conv_op(input,name,kernel_h,kernel_w,num_out,step_h,step_w,para):
num_in=input.get_shape()[-1].value #num_in是输入深度,这个参数被用来确定卷积核的输入通道数
with tf.name_scope(name) as scope:
kernel=tf.get_variable(scope+"w",shape=[kernel_h,kernel_w,num_in,num_out],dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer_conv2d())
conv=tf.nn.conv2d(input,kernel,(1,step_h,step_w,1),padding="SAME")
biases=tf.Variable(tf.constant(0.0,dtype=tf.float32,shape=[num_out]),trainable=True,name="b")
activation=tf.nn.relu(tf.nn.bias_add(conv,biases),name=scope)
para+=[kernel,biases]
return activation
def fc_op(input,name,num_out,para):
num_in=input.get_shape()[-1].value #num_in为输入单元的数量
with tf.name_scope(name) as scope:
weights=tf.get_variable(scope+"w",shape=[num_in,num_out],dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer())
biases=tf.Variable(tf.constant(0.1,dtype=tf.float32,shape=[num_out]),name="b")
activation=tf.nn.relu_layer(input,weights,biases) #tf.nn.relu_layer()函数会同时完成矩阵乘法和加偏置项并计算relu激活值
para+=[weights,biases]
return activation
def inference_op_loss(input,keep_prob,y_):
parameters=[]
#第一段卷积,输出大小为16*16*64(省略了第一个batch_size参数)
conv1_1=conv_op(input,name="conv1_1",kernel_h=3,kernel_w=3,num_out=4,step_h=1,step_w=1,para=parameters)
conv1_2=conv_op(conv1_1,name="conv1_2",kernel_h=3,kernel_w=3,num_out=64,step_h=1,step_w=1,para=parameters)
pool1=tf.nn.max_pool(conv1_2,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME",name="pool1")
print(pool1.op.name,' ',pool1.get_shape().as_list())
#第二段卷积,输出大小为8*8*128(省略了第一个batch_size参数)
conv2_1=conv_op(pool1,name="conv2_1",kernel_h=3,kernel_w=3,num_out=128,step_h=1,step_w=1,para=parameters)
conv2_2=conv_op(conv2_1,name="conv2_2",kernel_h=3,kernel_w=3,num_out=128,step_h=1, step_w=1,para=parameters)
pool2=tf.nn.max_pool(conv2_2,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME",name="pool2")
print(pool2.op.name,' ',pool2.get_shape().as_list())
#第三段卷积,输出大小为4*4*256(省略了第一个batch_size参数)
conv3_1=conv_op(pool2,name="conv3_1",kernel_h=3,kernel_w=3,num_out=256,step_h=1,step_w=1,para=parameters)
conv3_2=conv_op(conv3_1,name="conv3_2",kernel_h=3,kernel_w=3,num_out=256,step_h=1,step_w=1,para=parameters)
conv3_3=conv_op(conv3_2,name="conv3_3",kernel_h=3,kernel_w=3,num_out=256,step_h=1,step_w=1,para=parameters)
pool3=tf.nn.max_pool(conv3_3,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME",name="bool3")
print(pool3.op.name,' ',pool3.get_shape().as_list())
#第四段卷积,输出大小为2*2*512(省略了第一个batch_size参数)
conv4_1=conv_op(pool3,name="conv4_1",kernel_h=3,kernel_w=3,num_out=512,step_h=1,step_w=1,para=parameters)
conv4_2=conv_op(conv4_1,name="conv4_2",kernel_h=3,kernel_w=3,num_out=512,step_h=1,step_w=1,para=parameters)
conv4_3=conv_op(conv4_2,name="conv4_3",kernel_h=3,kernel_w=3,num_out=512,step_h=1,step_w=1,para=parameters)
pool4=tf.nn.max_pool(conv4_3,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME",name="pool4")
print(pool4.op.name,' ',pool4.get_shape().as_list())
#第五段卷积,输出大小为1*1*512(省略了第一个batch_size参数)
conv5_1 = conv_op(pool4, name="conv5_1", kernel_h=3, kernel_w=3, num_out=512, step_h=1, step_w=1, para=parameters)
conv5_2 = conv_op(conv5_1, name="conv5_2", kernel_h=3, kernel_w=3, num_out=512, step_h=1, step_w=1, para=parameters)
conv5_3 = conv_op(conv5_2, name="conv5_3", kernel_h=3, kernel_w=3, num_out=512, step_h=1, step_w=1, para=parameters)
pool5 = tf.nn.max_pool(conv5_3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME", name="pool5")
print(pool5.op.name,' ',pool5.get_shape().as_list())
pool_shape=pool5.get_shape().as_list()
flattened_shape=pool_shape[1]*pool_shape[2]*pool_shape[3]
reshaped=tf.reshape(pool5,shape=[-1,flattened_shape],name="reshaped")
print(reshaped.op.name,' ',reshaped.get_shape().as_list())
#第一个全连接层
fc_6=fc_op(reshaped,name="fc6",num_out=4096,para=parameters)
fc_6_drop=tf.nn.dropout(fc_6,keep_prob,name="fc6_drop")
#第二个全连接层
fc_7=fc_op(fc_6_drop,name="fc7",num_out=4096,para=parameters)
fc_7_drop=tf.nn.dropout(fc_7,keep_prob,name="fc7_drop")
#第三个全连层
fc_8_weights=tf.get_variable("fc8w",shape=[fc_7_drop.get_shape()[-1],10],dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer())
fc_8_biases=tf.Variable(tf.constant(0.1,dtype=tf.float32,shape=[10]),name="b")
fc_8=tf.add(tf.matmul(fc_7_drop,fc_8_weights),fc_8_biases)
parameters+=[fc_8_weights,fc_8_biases]
cross_entropy=tf.nn.sparse_softmax_cross_entropy_with_logits(logits=fc_8,labels=tf.cast(y_,tf.int64))
loss=tf.reduce_mean(cross_entropy,name="Train_cost")
train_op=tf.train.AdamOptimizer(0.001).minimize(loss)
top_k_op=tf.nn.in_top_k(fc_8,y_,1)
return train_op,loss,top_k_op,parameters
with tf.Graph().as_default():
images_train,labels_train=Cifar10_data.inputs(data_dir=data_dir,batch_size=batch_size,distorted=True) #用于训练的数据
iamges_test,labels_test=Cifar10_data.inputs(data_dir=data_dir,batch_size=batch_size,distorted=None) #用于验证准确率的数据
x=tf.placeholder(tf.float32,[batch_size,32,32,3])
y_=tf.placeholder(tf.int32,[batch_size])
train_op, loss, top_k_op, parameters = inference_op_loss(x, keep_prob=1.0, y_=y_)
config = tf.ConfigProto()
config.gpu_options.allocator_type = "BFC"
init_op=tf.global_variables_initializer()
with tf.Session(config=config) as sess:
sess.run(init_op)
tf.train.start_queue_runners() #开启多线程
print(sess.run(tf.report_uninitialized_variables()))
for step in range(max_steps):
start_time=time.time()
image_batch,label_batch=sess.run([images_train,labels_train])
_,loss_value=sess.run([train_op,loss],feed_dict={x:image_batch,y_:label_batch})
duration=time.time()-start_time
if step % 100 == 0: #每100step打印loss,每秒钟能训练的数量,以及训练一个batch数据所花费的时间
examples_per_sec=batch_size/duration
sec_per_batch=float(duration)
print("Step %d ,loss=%.2f (%.1f examples/sec;%.3f sec/batch)"%(step,loss_value,examples_per_sec,sec_per_batch))
if step % 1000 ==0 and step > 0:#每1000step在测试集上评测模型的准确率
num_batch = int(math.ceil(num_examples_for_eval/batch_size))
true_count = 0
total_sample_count = num_batch * batch_size
#在一个for循环内统计所有预测正确的样例个数
for j in range (num_batch):
image_batch,label_batch = sess.run([iamges_test,labels_test])
predictions = sess.run([top_k_op],feed_dict={x:image_batch,y_:label_batch})
true_count += np.sum(predictions)
#打印准确率信息
print("accuracy = %.3f%%"%((true_count/total_sample_count)*100))
saver.save(sess,"/content/drive/My Drive/使用VGGNet跑Cifar10数据集/model_path_MovingAverage/model.ckpt",global_step=step)
参考书籍:《TensorFlow深度学习算法原理与编程实战》 蒋子阳 著