学习tensorflow中的练习,包括了常见的手段,留在这里做个备忘
1、以mnist为数据集,建立简单的1层conv+2层res结构 +2层全连接,其中res采用 bottleneck结构(为了练习,实际效果可能没用)
conv层->[-1,28,28,64] ,max_pooling->[-1,14,14,64],res1->[-1,14,14,64], res2->[-1,7,7,128],fc,fc,softmax
2、采用学习率递减方案,及采用滑动平均方案;并用图形化形式
3、summary图形化形式显示损失、学习率,
4、计算准确率,并用图形化显示
5、每20个epoch保存模型,并支持加载finetune
6、利用summary图形化显示计算图
7、利用queue加载数据
8 、res结构中使用BN
9 、全连接中使用dropout
10、初始化参数采用xavier
11、全局参数使用L2正则
12、读取数据局需要打乱
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
#定义全局变量
#批量数量
BATCH_SIZE=200
#学习率,这是个超参数,和很多超参数都有关系(Batch_size、优化方法、BN、输入数据),需要自己不断的调试
LEARN_RATE_BASE=0.2
#学习率衰减率,是前一次的多少,通常是0.95,这是个超参数需要调试
LEARN_RATE_DECAY=0.95
#正则项的λ
REGULARIZATION_RATE=0.0001
#训练总的步数
TRAINING_STEP=5000
#总循环次数
EPOCH=60
#滑动平均 ,用于bn中
MOVING_AVERAGE_DECAY=0.99
#dropout rate,本例中的全连接神经单元个数较少,dropout 保持率比实际使用时大
KEEP_PROB=tf.Variable(0.8,dtype=tf.float32)
def res(inputs,depth,bottleneck_depth,stride,name,is_training=False):
'''
bottleneck结构:
1、1*1,64 conv
2、BN
3、relu
4、3*3,64 conv
5、BN
6、relu
7、1*1 ,256 conv
8、BN
shortcut :1*1 256 conv+BN
:param inputs:
:return:
'''
with tf.variable_scope(name) as scope:
bottleneck_conv1 = conv(inputs,bottleneck_depth,[1,1],strides=stride,name='bottleneck_conv1')
bottleneck_conv1_bn = bn(bottleneck_conv1,is_training=is_training)
bottleneck_conv1_relu = tf.nn.relu(bottleneck_conv1_bn)
bottleneck_conv2 = conv(bottleneck_conv1_relu, bottleneck_depth, [3, 3], strides=1, name='bottleneck_conv2')
bottleneck_conv2_bn = bn(bottleneck_conv2, is_training=is_training)
bottleneck_conv2_relu = tf.nn.relu(bottleneck_conv2_bn)
bottleneck_conv3 = conv(bottleneck_conv2_relu,depth,[1,1],strides=1,name='bottleneck_conv3')
bottleneck_conv3_bn = bn(bottleneck_conv3, is_training=is_training)
#如果不涉及到尺度变换,即stride=1时,可以采用identity变换
#还有种思路,是1/2 下采样,这样就会少很多的参数
shortcut = conv(inputs,depth,[1,1],strides=stride,name='shortcut')
shortcut_bn = bn(shortcut, is_training=is_training)
layers = tf.nn.relu(bottleneck_conv3_bn+shortcut_bn)
return layers
#BN 训练阶段要采用滑动平均,预测阶段不用
def bn(inputs,is_training=False,name='BN' ):
# bn和 batch_size 相关性非常大。如果batch_size过小会照成BN的均值的方差抖动明显
# 那么 学习到的 beta,gamma就是个废的
# 当然还有一种思路,是减少每次BN的均值和方差的变化,我们可以采用移动平均值,及均值和方差的历史积分值
# 实际上都是这么做的,在计算图中保存了均值和方差的积分值
'''
tf中有两个BN函数,一个全自动,一个半自动
tf.contrib.layers.batch_norm
tf.nn.batch_normalization
'''
if(is_training == True):
layers = tf.contrib.layers.batch_norm(inputs,decay=MOVING_AVERAGE_DECAY,is_training=True)
else:
layers = tf.contrib.layers.batch_norm(inputs,decay=MOVING_AVERAGE_DECAY,is_training=False)
return layers
#conv结构
def conv(inputs,filters,kernel_size,strides,name='conv'):
return tf.layers.conv2d(
inputs=inputs,
filters=filters,
strides=strides,
kernel_size=kernel_size,
padding='same',
activation=tf.nn.relu,
kernel_initializer=tf.contrib.layers.xavier_initializer( uniform=True, seed=None,dtype=tf.float32),
#kernel_regularizer=tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE),
bias_initializer= tf.constant_initializer(0) ,
#bias_regularizer=tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE),
name=name)
#fc结构
def fc(inputs,units=50,name='fc'):
return tf.layers.dense(inputs=inputs,
units=units,
activation=tf.nn.relu,
kernel_regularizer=tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE),
kernel_initializer=tf.truncated_normal_initializer(stddev=0.01),
bias_initializer=tf.constant_initializer(0) ,
bias_regularizer=tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE),
name=name)
def dropout(inputs,keep_prob):
return tf.layers.dropout(inputs=inputs,rate=keep_prob)
def max_pooling(inputs, pool_size, strides,name):
return tf.layers.max_pooling2d(inputs, pool_size, strides,name=name)
# loss
def loss(logits,labels):
#采用softmax,和交叉熵
return tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=labels)
#定义一个方法,随机批量取得数据。因为mnist.train.next_batch不支持随机,所以我们自己造一个
#返回一个list,list包含每批数据
def mini_batch(trainimg,trainlabel):
indices = np.arange(trainimg.shape[0])
np.random.shuffle(indices)
for start_index in range(0,trainimg.shape[0]-BATCH_SIZE+1,BATCH_SIZE):
excerpt = indices[start_index:start_index + BATCH_SIZE]
yield trainimg[excerpt], trainlabel[excerpt]
#该方法是mini_batch 高级版本,可以多线程的执行读取数据,稍微比上面一种方法简单,且可服务于分布式学习
def mini_batch_premium(trainimg,trainlabel):
#文件名队列
input_queue = tf.train.slice_input_producer([trainimg, trainlabel], num_epochs=20, shuffle=True, capacity=8)
#取数据
x_batch, y_batch = tf.train.batch(input_queue, batch_size=BATCH_SIZE, num_threads=1, capacity=8,
allow_smaller_final_batch=False)
return x_batch, y_batch
#main函数,初始化全局变量
def main_run():
#===============建立占位变量 总共有4个=====================
inputs = tf.placeholder(tf.float32, [None, 28, 28, 1], "x")
labels = tf.placeholder(tf.int32, [None, ], "y")
global_step = tf.Variable(0, trainable=False)
is_training = tf.placeholder(tf.bool, name="is_training")
#====================网络骨架================================
# 3*3,64卷积
con1 = conv(inputs, filters=64, kernel_size =[1, 1], strides=1, name='con1')
# max_pooling
max_pooling1 = max_pooling(con1, [2, 2], 2, 'pooling1')
res1 = res(max_pooling1, 128, 64, 1, 'res1', is_training) # 尺寸不变
res2 = res(res1, 256, 128, 2, 'res2', is_training) # 尺寸减半,filter数量加倍
# 矩阵打散成向量
vec = tf.reshape(res2, [-1, 7 * 7 * 256])
# 全连接
fc1 = fc(vec, 100, 'fc1')
# dropout
#dropout_layer = dropout(fc1, keep_prob=KEEP_PROB)
fc2 = fc(fc1, 50, 'fc2')
#dropout_layer = dropout(fc2, keep_prob=KEEP_PROB)
# 分类
logits = fc(fc2, 10, 'logits')
cross_entropy = loss(logits, labels)
cross_entropy_mean = tf.reduce_mean(cross_entropy)
#附加正则项
reg_set = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
regularization = tf.add_n(reg_set)
loss_with_reg = cross_entropy_mean+regularization
tf.summary.scalar("loss",loss_with_reg)
learn_rate = tf.train.exponential_decay(LEARN_RATE_BASE, global_step, 1000, LEARN_RATE_DECAY)
#采用AdamOptimizer的学习率要小一点
#train_step = tf.train.AdamOptimizer(learn_rate).minimize(loss_with_reg, global_step=global_step)
# 采用GradientDescentOptimizer的学习率要大一点
train_step = tf.train.GradientDescentOptimizer(learn_rate).minimize(loss_with_reg, global_step=global_step)
with tf.control_dependencies([train_step]):
train_op = tf.no_op(name="train")
# 计算精度
correct_prediction = tf.equal(tf.cast(tf.argmax(logits, axis=1), tf.int32), labels)
acc = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
tf.summary.scalar("acc", acc)
merged = tf.summary.merge_all()
#=====================加载mnist数据======================================
MNIST_data_folder = "E:\\TestTensorflow\\Data_Set\\MNIST_data"
mnist = input_data.read_data_sets(MNIST_data_folder)
trainimg = mnist.train.images # (55000,784)
trainlabel = mnist.train.labels # (55000,10)
saver = tf.train.Saver(max_to_keep=1) # 设置为0 ,即每轮都保存
image_batch, label_batch = mini_batch_premium(trainimg,trainlabel)
with tf.Session() as session:
session.run(tf.global_variables_initializer())
#这是tf中无脑的设计,input_queue中的num_epoch居然是个本地变量,所以下一句一定要写上
session.run(tf.local_variables_initializer())
#保存图信息
train_writer = tf.summary.FileWriter('log/train/', session.graph)
test_writer = tf.summary.FileWriter('log/test/')
#如果模型存在,则加载模型
#model_file = tf.train.latest_checkpoint('ckpt/')
#if(model_file is not None):
#saver.restore(session, model_file)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(session,coord)
epoch = 0
try:
while not coord.should_stop():
data, label = session.run([image_batch, label_batch])
#训练样本总数/batch_size
#for inx, val in enumerate(mini_batch(trainimg, trainlabel)):
#image_batch, label_batch = val
_,_,loss_,global_step_,train_merged=session.run([train_op,acc,loss_with_reg,global_step,merged],feed_dict={inputs:np.reshape(data,[-1,28,28,1]),labels:label,is_training:True})
train_writer.add_summary(train_merged, global_step_)
if global_step_ % 20 == 0:
#数据量取多了,我的显存要爆炸,所以这里少取了一点
test_feed = {inputs: np.reshape(mnist.test.images[0:500, :], [-1, 28, 28, 1]),
labels: mnist.test.labels[0:500], is_training: False}
validate_feed = {inputs: np.reshape(mnist.validation.images[0:500, :], [-1, 28, 28, 1]),
labels: mnist.validation.labels[0:500], is_training: False}
validate_acc = session.run(acc, feed_dict=validate_feed)
test_acc,test_merged = session.run([acc,merged], feed_dict=test_feed)
print("global_step_ % d ===> validation accuracy|test accuracy|loss %g|%g|%g" % (global_step_,validate_acc,test_acc,loss_))
#保存参数到摘要图
#保存测试精度,校验精度,损失值
test_writer.add_summary(test_merged,global_step_)
#保存一次模型
if epoch % 10 == 0:
saver.save(session,'ckpt/mnist.ckpt',global_step=global_step)
epoch = epoch + 1
except tf.errors.OutOfRangeError:
print("done")
finally:
coord.request_stop()
coord.join(threads)
if __name__ == "__main__":
main_run()