1、tensorflow中通过共享 **变量作用域(variable_scope)来实现共享变量 ,节约变量存储空间.TensorFlow用于变量管理的函数主要有两个:
(1) tf. get_variable() 用于创建或获取变量的值
(2) tf.variable_scope() 用于生成上下文管理器,创建命名空间,命名空间可以嵌套。
函数tf.get_variable()**既可以创建变量,也可以获取变量。用函数tf.variable_scope()中的参数reuse来控制,分两种情况进行说明:
(1) 设置reuse=False时,函数get_variable()表示创建变量;
import tensorflow as tf
with tf.variable_scope("foo"):
v=tf.get_variable("v",[1],initializer=tf.constant_initializer(1.0))
v1=tf.get_variable("v",[1])
# ValueError: Variable foo/v already exists, disallowed.
(2) 设置reuse=True时,函数get_variable()表示获取变量.
import tensorflow as tf
with tf.variable_scope("foo"):
v=tf.get_variable("v",[1],initializer=tf.constant_initializer(1.0))
with tf.variable_scope("foo",reuse=True):
v1=tf.get_variable("v",[1])
print(v1==v) #结果为:True
在tf.variable_scope()函数中,设置reuse=True时,在其命名空间"foo"中执行函数get_variable()时,表示获取变量"v"。若在该命名空间中还没有该变量,则在获取时会报错,如下面的例子:
import tensorflow as tf
with tf.variable_scope("foo",reuse=True):
v1=tf.get_variable("v",[1])
# ValueError: Variable foo/v does not exist, or was not created with tf.get_variable()
TensorFlow通过tf. get_variable()和tf.variable_scope()两个函数,可以创建多个并列的或嵌套的命名空间,用于存储神经网络中的各层的权重、偏置、学习率、滑动平均衰减率、正则化系数等参数值,神经网络不同层的参数可放置在不同的命名空间中。同时,变量重用检错和读取不存在变量检错两种机制保证了数据存放的安全性。
#part1:必要模块导入, numpy主要是矩阵变化和运算
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
#part2:读数据,预处理
mnist = input_data.read_data_sets("/home/promise/PycharmProjects/LeNet-5", one_hot = True)
#part3:超参数初始化
batch_size = 100
learning_rate = 0.01
learning_rate_decay = 0.99
max_steps = 30000
#part4:设计网络结构
def hidden_layer(input_tensor, regularizer, avg_class, resuse):
with tf.variable_scope("C1-conv", reuse=resuse):#含有训练参数层(卷积层,全连接层)创建变量空间,variable_scope()常与get_variable()搭配使用
conv1_weights = tf.get_variable("weight", [5,5,1,32], initializer=tf.truncated_normal_initializer(stddev=0.1))#常用truncated_normal_initializer()初始化含可训练权重层,且stddev长设置为0.1
con1_biases = tf.get_variable("bias", [32], initializer=tf.constant_initializer(0.0))#卷积层初始化为0.0
conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1,1,1,1], padding="SAME")
relu1 = tf.nn.relu(tf.nn.bias_add(conv1, con1_biases))
with tf.name_scope("S2-max_pool",):#池化层前用name_scope(,)
pool1 = tf.nn.max_pool(relu1, ksize=[1,2,2,1], strides=[1,2,2,1],padding="SAME")
with tf.variable_scope("C3-conv", reuse=resuse):
conv2_weights = tf.get_variable("weight", [5,5,32,64], initializer=tf.truncated_normal_initializer(stddev=0.1))
conv2_biases = tf.get_variable("bias", [64], initializer=tf.constant_initializer(0.0))
conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1,1,1,1], padding="SAME")
relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))
with tf.name_scope("S4-max_pool",):
pool2 = tf.nn.max_pool(relu2, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")
#多维矩阵转化为:batch_size×features的二维矩阵
shape = pool2.get_shape().as_list()
nodes = shape[1] * shape[2] * shape[3]
reshaped = tf.reshape(pool2, [shape[0], nodes])
with tf.variable_scope("layer5-full1",reuse=resuse):
Full_connection1_weights = tf.get_variable("weight", [nodes, 512], initializer=tf.truncated_normal_initializer(stddev=0.1))
tf.add_to_collection("losses", regularizer(Full_connection1_weights))#全连接层权重需要加正则化项(数量多)
Full_connection1_biasses = tf.get_variable("bias", [512], initializer=tf.constant_initializer(0.1))#全连接层偏置项初始化为0.1
if avg_class == None:
Full_1 = tf.nn.relu(tf.matmul(reshaped, Full_connection1_weights) + Full_connection1_biasses)
else:
Full_1 = tf.nn.relu(tf.matmul(reshaped, avg_class.average(Full_connection1_weights)) + avg_class.average(Full_connection1_biasses))#只对全连接层进行平滑操作
with tf.variable_scope("layer6-full2", reuse=resuse):
Full_connection2_weights = tf.get_variable("weight", [512, 10], initializer=tf.truncated_normal_initializer(stddev=0.1))
tf.add_to_collection("losses", regularizer(Full_connection2_weights))
Full_connection2_biasses = tf.get_variable("bias", [10], initializer=tf.constant_initializer(0.1))
if avg_class == None:
result = tf.nn.relu(tf.matmul(Full_1, Full_connection2_weights) + Full_connection2_biasses)
else:
result = tf.nn.relu(tf.matmul(Full_1, avg_class.average(Full_connection2_weights)) + avg_class.average(Full_connection2_biasses))
return result
x = tf.placeholder(tf.float32, [batch_size, 28,28,1], name="x-input")#x第一维为batch_size
y_ = tf.placeholder(tf.float32, [None, 10], name="y-input")#y的最后一维是classes
#part5:正则化,损失函数,优化方法(定义训练方法)
regularizer = tf.contrib.layers.l2_regularizer(0.0001)#l2范数
y = hidden_layer(x, regularizer, avg_class=None, resuse=False)#无平滑训练,所以变量空间不需要复用
training_step = tf.Variable(0, trainable=False)
variable_averages = tf.train.ExponentialMovingAverage(0.99, training_step)
variable_averages_op = variable_averages.apply(tf.trainable_variables())#对可训练变量实行平滑训练
average_y = hidden_layer(x, regularizer, variable_averages, resuse=True)#平滑训练变量空间需要复用
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.arg_max(y_, 1))#交叉熵计算的是正常无平滑训练结果与真实标签之间的对比
cross_entropy_mean = tf.reduce_mean(cross_entropy)
loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))#总损失函数等于交叉熵加正则化项(全连接层)
learning_rate = tf.train.exponential_decay(learning_rate,training_step,mnist.train.num_examples/batch_size, learning_rate_decay,staircase=True)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=training_step)
with tf.control_dependencies([train_step, variable_averages_op]):#train_step写成training_step造成准确率下降
train_op = tf.no_op(name='train')#反向传播过程中同步更新影子变量
#part6:模型评价
crorent_prediction = tf.equal(tf.arg_max(average_y, 1), tf.arg_max(y_, 1))#用平滑处理预测结果进行评价
accuracy = tf.reduce_mean(tf.cast(crorent_prediction, tf.float32))
'''
with tf.Session() as sess:
tf.global_variables_initializer().run()
for i in range(max_steps):
if i % 1000 == 0:
x_val, y_val = mnist.validation.next_batch(batch_size)
reshaped_x2 = np.reshape(x_val, (batch_size, 28, 28, 1))
validate_feed = {x:reshaped_x2, y_:y_val}
validate_accuracy = sess.run(accuracy, feed_dict=validate_feed)
print("After %d training steps, validation accuracy using average model is %g%%" % (i, validate_accuracy * 100))
x_train, y_train = mnist.train.next_batch(batch_size)
reshaped_xs = np.reshape(x_train, (batch_size, 28, 28, 1))
sess.run(train_op, feed_dict={x:reshaped_xs, y_:y_train})
'''
#part7:执行训练
with tf.Session() as sess:
tf.global_variables_initializer().run()
for i in range(max_steps):
if i %1000==0:
x_val, y_val = mnist.validation.next_batch(batch_size)
reshaped_x2 = np.reshape(x_val, (batch_size,28,28, 1))#测试和训练数据都得变化为标准输入格式
validate_feed = {x: reshaped_x2, y_: y_val}
validate_accuracy = sess.run(accuracy, feed_dict=validate_feed)
print("After %d trainging step(s) ,validation accuracy"
"using average model is %g%%" % (i, validate_accuracy * 100))
x_train, y_train = mnist.train.next_batch(batch_size)
reshaped_xs = np.reshape(x_train, (batch_size ,28,28,1))
sess.run(train_op, feed_dict={x: reshaped_xs, y_: y_train})