前言:window10+cuda9.0+cudnn+pycharm+py3.5+tensorflow1.9
这是本文的开发环境
参考视频:kevinrush
一、数据部分:
cifar-10下载地址:
http://www.cs.toronto.edu/~kriz/cifar.html
训练集50000张图,测试集10000张,大小均为32*32,共有10类,分别为飞机,鸟,猫,狗等。
该数据集有三种形式,python,matlab,二进制形式,区别如下:
二进制形式可以单张图片读取,读取方式如本文下面介绍;python和matlab图片不能单张读取!!只能一次性放入内存,读取方式官网有提到,集cpickle,代码如下:
def unpickle(file):
import pickle
with open(file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
return dict
本文下载的是二进制形式数据集,每张图片占3073个字节,训练集中每个图的第一个字节是图片的类别。解压后五个训练文件,一个test文件,一个多伦多大学官网网址,一个是标签。
二、代码部分:
1.cifar10_input.py
这部分主要用于读取cifar-10 的二进制文件,原视频中的代码有错误,已在备注中指出
import tensorflow as tf
import numpy as np
import os
#%% Reading data
def read_cifar10(data_dir, is_train, batch_size, shuffle):
"""Read CIFAR10
Args:
data_dir: the directory of CIFAR10
is_train: boolen
batch_size:
shuffle:
Returns:
label: 1D tensor, tf.int32
image: 4D tensor, [batch_size, height, width, 3], tf.float32
"""
img_width = 32
img_height = 32
img_depth = 3
label_bytes = 1
image_bytes = img_width*img_height*img_depth
with tf.name_scope('input'):
if is_train:
filenames = [os.path.join(data_dir, 'data_batch_%d.bin' %ii)
for ii in np.arange(1, 6)]
else:
filenames = [os.path.join(data_dir, 'test_batch.bin')]
filename_queue = tf.train.string_input_producer(filenames)
reader = tf.FixedLengthRecordReader(label_bytes + image_bytes)
key, value = reader.read(filename_queue)
record_bytes = tf.decode_raw(value, tf.uint8)
label = tf.slice(record_bytes, [0], [label_bytes])
label = tf.cast(label, tf.int32)
image_raw = tf.slice(record_bytes, [label_bytes], [image_bytes])
image_raw = tf.reshape(image_raw, [img_depth, img_height, img_width])
image = tf.transpose(image_raw, (1,2,0)) # convert from D/H/W to H/W/D
image = tf.cast(image, tf.float32)
# # data argumentation
# image = tf.random_crop(image, [24, 24, 3])# randomly crop the image size to 24 x 24
# image = tf.image.random_flip_left_right(image)
# image = tf.image.random_brightness(image, max_delta=63)
# image = tf.image.random_contrast(image,lower=0.2,upper=1.8)
image = tf.image.per_image_standardization(image) #substract off the mean and divide by the variance
if shuffle:
images, label_batch = tf.train.shuffle_batch(
[image, label],
batch_size = batch_size,
num_threads= 16,
capacity = 2000,
min_after_dequeue = 1500)
else:
images, label_batch = tf.train.batch(
[image, label],
batch_size = batch_size,
num_threads = 16,
capacity= 2000)
# return images, tf.reshape(label_batch, [batch_size])
## ONE-HOT
n_classes = 10
label_batch = tf.one_hot(label_batch, depth= n_classes)
return images, tf.reshape(label_batch, [batch_size, n_classes])
#%% TEST
# To test the generated batches of images
# When training the model, DO comment the following codes
# import matplotlib.pyplot as plt
#
# data_dir = 'E:\datasets\cifar-10-batches-bin'#这里是数据集的地址,要自己更改
# BATCH_SIZE = 10
# image_batch, label_batch = read_cifar10(data_dir,
# is_train=True,
# batch_size=BATCH_SIZE,
# shuffle=True)
#
# with tf.Session() as sess:
# i = 0
# coord = tf.train.Coordinator()
# threads = tf.train.start_queue_runners(coord=coord)
#
# try:
# while not coord.should_stop() and i<1:
#
# img, label = sess.run([image_batch, label_batch])
#
# # just test one batch
# for j in np.arange(BATCH_SIZE):
# print('label: %s' %label[j])#这里原来视频里写的%d会报错
# plt.imshow(img[j,:,:,:])
# plt.show()
# i+=1
#
# except tf.errors.OutOfRangeError:
# print('done!')
# finally:
# coord.request_stop()
# coord.join(threads)
#
#
#
# #%%
#
#
#
2.cifar10_train.py
这个文件用来训练模型
import os
import os.path
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
import math
import numpy as np
import tensorflow as tf
import cifar10_input
#%%
BATCH_SIZE = 128
learning_rate = 0.05
MAX_STEP = 10000 # with this setting, it took less than 30 mins on my laptop to train.
#%%
def inference(images):
'''
Args:
images: 4D tensor [batch_size, img_width, img_height, img_channel]
Notes:
In each conv layer, the kernel size is:
[kernel_size, kernel_size, number of input channels, number of output channels].
number of input channels are from previuous layer, if previous layer is THE input
layer, number of input channels should be image's channels.
'''
#conv1, [5, 5, 3, 96], The first two dimensions are the patch size,
#the next is the number of input channels,
#the last is the number of output channels
with tf.variable_scope('conv1') as scope:
weights = tf.get_variable('weights',
shape = [3, 3, 3, 96],
dtype = tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.05,dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[96],
dtype=tf.float32,
initializer=tf.constant_initializer(0.0))
conv = tf.nn.conv2d(images, weights, strides=[1,1,1,1], padding='SAME')
pre_activation = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(pre_activation, name= scope.name)
#pool1 and norm1
with tf.variable_scope('pooling1_lrn') as scope:
pool1 = tf.nn.max_pool(conv1, ksize=[1,3,3,1],strides=[1,2,2,1],
padding='SAME', name='pooling1')
norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1.0, alpha=0.001/9.0,
beta=0.75, name='norm1')
#conv2
with tf.variable_scope('conv2') as scope:
weights = tf.get_variable('weights',
shape=[3,3,96, 64],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.05,dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[64],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
conv = tf.nn.conv2d(norm1, weights, strides=[1,1,1,1],padding='SAME')
pre_activation = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(pre_activation, name='conv2')
#pool2 and norm2
with tf.variable_scope('pooling2_lrn') as scope:
norm2 = tf.nn.lrn(conv2, depth_radius=4, bias=1.0, alpha=0.001/9.0,
beta=0.75,name='norm2')
pool2 = tf.nn.max_pool(norm2, ksize=[1,3,3,1], strides=[1,1,1,1],
padding='SAME',name='pooling2')
#local3
with tf.variable_scope('local3') as scope:
reshape = tf.reshape(pool2, shape=[BATCH_SIZE, -1])
dim = reshape.get_shape()[1].value
weights = tf.get_variable('weights',
shape=[dim,384],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.004,dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[384],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
#local4
with tf.variable_scope('local4') as scope:
weights = tf.get_variable('weights',
shape=[384,192],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.004,dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[192],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name='local4')
# softmax
with tf.variable_scope('softmax_linear') as scope:
weights = tf.get_variable('softmax_linear',
shape=[192, 10],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.004,dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[10],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
softmax_linear = tf.add(tf.matmul(local4, weights), biases, name='softmax_linear')
return softmax_linear
#%%
def losses(logits, labels):
with tf.variable_scope('loss') as scope:
labels = tf.cast(labels, tf.int64)
# to use this loss fuction, one-hot encoding is needed!
cross_entropy = tf.nn.softmax_cross_entropy_with_logits\
(logits=logits, labels=labels, name='xentropy_per_example')
# cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits\
# (logits=logits, labels=labels, name='xentropy_per_example')
loss = tf.reduce_mean(cross_entropy, name='loss')
tf.summary.scalar(scope.name+'/loss', loss)
return loss
#%% Train the model on the training data
# you need to change the training data directory below
def train():
my_global_step = tf.Variable(0, name='global_step', trainable=False)
data_dir = 'E:\datasets\cifar-10-batches-bin'
log_dir = './log/train/'
images, labels = cifar10_input.read_cifar10(data_dir=data_dir,
is_train=True,
batch_size= BATCH_SIZE,
shuffle=True)
logits = inference(images)
loss = losses(logits, labels)
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.minimize(loss, global_step= my_global_step)
saver = tf.train.Saver(tf.global_variables())
summary_op = tf.summary.merge_all()
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
try:
for step in np.arange(MAX_STEP):
if coord.should_stop():
break
_, loss_value = sess.run([train_op, loss])
if step % 50 == 0:
print ('Step: %d, loss: %.4f' % (step, loss_value))
if step % 100 == 0:
summary_str = sess.run(summary_op)
summary_writer.add_summary(summary_str, step)
if step % 2000 == 0 or (step + 1) == MAX_STEP:
checkpoint_path = os.path.join(log_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)
except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
finally:
coord.request_stop()
coord.join(threads)
sess.close()
train()
3.cifar10_elv.py
这一部分用来进行评估模型
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from datetime import datetime
import os.path
import time
import tensorflow.python.platform
from tensorflow.python.platform import gfile
import numpy as np
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
import cifar10
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('train_dir', 'data/train',
"""Directory where to write event logs """
"""and checkpoint.""")
tf.app.flags.DEFINE_integer('max_steps', 1000000,
"""Number of batches to run.""")
tf.app.flags.DEFINE_boolean('log_device_placement', False,
"""Whether to log device placement.""")
def train():
"""Train CIFAR-10 for a number of steps."""
with tf.Graph().as_default():
global_step = tf.Variable(0, trainable=False)
# Get images and labels for CIFAR-10.
# 输入图像的预处理,包括亮度、对比度、图像翻转等操作
images, labels = cifar10.distorted_inputs()
# Build a Graph that computes the logits predictions from the
# inference model.
logits = cifar10.inference(images)
# Calculate loss.
loss = cifar10.loss(logits, labels)
# Build a Graph that trains the model with one batch of examples and
# updates the model parameters.
train_op = cifar10.train(loss, global_step)
# Create a saver.
saver = tf.train.Saver(tf.all_variables())
# Build the summary operation based on the TF collection of Summaries.
summary_op = tf.summary.merge_all()
# Build an initialization operation to run below.
init = tf.initialize_all_variables()
# Start running operations on the Graph.
sess = tf.Session(config=tf.ConfigProto(
log_device_placement=FLAGS.log_device_placement))
sess.run(init)
# Start the queue runners.
tf.train.start_queue_runners(sess=sess)
summary_writer = tf.summary.FileWriter(FLAGS.train_dir,
graph_def=sess.graph_def)
# 按照设置的迭代次数迭代
for step in xrange(FLAGS.max_steps):
start_time = time.time()
_, loss_value = sess.run([train_op, loss])
duration = time.time() - start_time
assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
# 每10个输入数据显示次step,loss,时间等运行数据
if step % 10 == 0:
num_examples_per_step = FLAGS.batch_size
examples_per_sec = num_examples_per_step / duration
sec_per_batch = float(duration)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print(format_str % (datetime.now(), step, loss_value,
examples_per_sec, sec_per_batch))
# 每100个输入数据将网络的状况体现在summary里
if step % 100 == 0:
summary_str = sess.run(summary_op)
summary_writer.add_summary(summary_str, step)
# Save the model checkpoint periodically.
# 每1000个输入数据保存次模型
if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)
def main(argv=None): # pylint: disable=unused-argument
# 检查目录下是否有数据,没有则下载。
cifar10.maybe_download_and_extract()
# 删除训练日志。
if gfile.Exists(FLAGS.train_dir):
gfile.DeleteRecursively(FLAGS.train_dir)
gfile.MakeDirs(FLAGS.train_dir)
# 训练
train()