作者:chen_h
微信号 & QQ:862251340
微信公众号:coderpai
# Rank 0 tensor (scalar)
fruit = tf.Variable("Orange", tf.string)
quantity = tf.Variable(2, tf.int16)
price = tf.Variable(3.23, tf.float32)
# Rank 1 tensor
strings = tf.Variable(["Fruit", "orange"], tf.string)
prices = tf.Variable([3.23, 4.02], tf.float64)
# Rank 2 tensor
answers = tf.Variable([[False, True],[False, False]], tf.bool)
当你训练一个模型的时候,我们需要使用变量去存储训练的参数,比如权重和偏差项,超参数,比如学习率,步数等等信息。
但是,最好定义变量的方式是去使用 tf.get_variable()
函数,当我们设计的网络非常深的时候,这个 API 允许去重复使用一些变量。
import tensorflow as tf
import numpy as np
v1 = tf.get_variable("v1", [5, 5, 3]) # A tensor with shape (5, 5, 3) filled with random values
v2 = tf.get_variable("v2", initializer=tf.constant(2)) # 2, float32 scalar
v3 = tf.get_variable("v3", initializer=tf.constant([[2, 3], [4, 5]])) # [[2, 3], [4, 5]]
v4 = tf.get_variable("v1", [3, 2], initializer=tf.zeros_initializer)
v5 = tf.get_variable("v2", [3, 2], initializer=tf.ones_initializer)
# [[ 1. 2.], [ 3. 4.], [ 5. 6.]]
v6 = tf.get_variable("v3", [3, 2], initializer=tf.constant_initializer([1, 2, 3, 4, 5, 6]))
W = tf.get_variable("W", [784, 256], initializer=tf.truncated_normal_initializer(stddev=np.sqrt(2.0 / 784)))
Z = tf.get_variable("z", [4, 5], initializer=tf.random_uniform_initializer(-1, 1))
以下程序,我们定义了这些东西:
import tensorflow as tf
import numpy as np
### Using variables
# Define variables and its initializer
weights = tf.get_variable("W", [784, 256], initializer=tf.truncated_normal_initializer(stddev=np.sqrt(2.0 / 784)))
biases = tf.get_variable("z", [256], initializer=tf.zeros_initializer)
counter = tf.get_variable("counter", initializer=tf.constant(0))
# Add an Op to increment a counter
increment = tf.assign(counter , counter + 1)
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
# Execute the init_op to initialize all variables
sess.run(init_op)
# Retrieve the value of a variable
b = sess.run(biases)
print(b)
在训练期间,我们的变量可以保存到磁盘。然后这些变量可以被重新加载到模型进行训练,也可以被作为一个接口进行使用。
import tensorflow as tf
import numpy as np
# Create some variables
v1 = tf.get_variable("v1", shape=[3], initializer = tf.zeros_initializer)
v2 = tf.get_variable("v2", shape=[5], initializer = tf.zeros_initializer)
# Create the op
inc_v1 = v1.assign(v1+1)
dec_v2 = v2.assign(v2-1)
init_op = tf.global_variables_initializer()
# Add ops to save and restore all the variables.
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(init_op)
inc_v1.op.run()
dec_v2.op.run()
# Save the variables to disk.
save_path = saver.save(sess, "/tmp/model.ckpt")
import tensorflow as tf
import numpy as np
# Create some variables.
# We do not need to provide initializer or init_op if it is restored from a checkpoint.
v1 = tf.get_variable("v1", shape=[3])
v2 = tf.get_variable("v2", shape=[5])
saver = tf.train.Saver()
with tf.Session() as sess:
# Restore variables from disk.
saver.restore(sess, "/tmp/model.ckpt")
# Check the values of the variables
print("v1 : %s" % v1.eval())
print("v2 : %s" % v2.eval())
如果你只想保存模型的其中一些变量,那么你可以如下操作:
import tensorflow as tf
import numpy as np
v1 = tf.get_variable("v1", [3], initializer = tf.zeros_initializer)
v2 = tf.get_variable("v2", [5], initializer = tf.zeros_initializer)
# Save only v2
saver = tf.train.Saver({"v2": v2})
with tf.Session() as sess:
# Initialize v1 since the saver will not.
v1.initializer.run()
saver.restore(sess, "/tmp/model.ckpt")
这个实例代码在模型的开始先保存模型,并且在训练的期间定期的保存模型。
import tensorflow as tf
import os
def loadmodel(session, saver, checkpoint_dir):
session.run(tf.global_variables_initializer())
ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
if ckpt and ckpt.model_checkpoint_path:
ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
saver.restore(session, os.path.join(checkpoint_dir, ckpt_name))
return True
else:
return False
def save(session, saver, checkpoint_dir, step):
dir = os.path.join(checkpoint_dir, "model")
saver.save(session, dir, global_step=step)
with tf.Session() as session:
saver = tf.train.Saver()
...
loadmodel(session, saver, "./checkpoint")
...
for i in range(10000):
...
if (i % 1000 == 0):
save(session, saver, "./checkpoint", i)
在迁移学习中,我们可能会从检查点加载模型,但模型的有些部分可能不用训练,所有我们可以通过设置 trainable=False
来进行控制。
freezed_W = tf.get_variable('CNN_W!', [5, 5, 3, 32], trainable=False,
initializer=tf.truncated_normal_initializer(stddev=0.02))
...
loadmodel(session, saver, "./checkpoint")
在一些问题中,我们可能需要同时训练几个多层的深度网络。针对不同的可训练参数,我们会使用不同的优化器和不同的损失函数。
import tensorflow as tf
def scope_variables(name):
with tf.variable_scope(name):
return tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
scope=tf.get_variable_scope().name)
# Model parameters for the discriminator network
with tf.variable_scope("discriminator"):
v1 = tf.get_variable("v1", [3], initializer=tf.zeros_initializer)
...
# Model parameters for the generator network
with tf.variable_scope("generator"):
v2 = tf.get_variable("v2", [2], initializer=tf.zeros_initializer)
...
# Get all the trainable parameters for the discriminator
discriminator_variables = scope_variables("discriminator")
# Get all the trainable parameters for the generator
generator_variables = scope_variables("generator")
# 2 optimizers each for different networks
train_discriminator = discriminator_optimizer.minimize(d_loss,
var_list=discriminator_variables)
train_generator = generator_optimizer.minimize(g_loss,
var_list=generator_variables)
我们可以使用作用域来创建两个不同的网络层,使得他们有各自不同的参数。比如,CNN1 和 CNN2 有自己的权重 w
和偏差项 b
。
import tensorflow as tf
def conv2d(input, output_dim, filter_h=5, filter_w=5, stride_h=2, stride_w=2, stddev=0.02):
w = tf.get_variable('w', [filter_h, filter_w, input.get_shape()[-1], output_dim],
initializer=tf.truncated_normal_initializer(stddev=stddev))
conv = tf.nn.conv2d(input, w, strides=[1, stride_h, stride_w, 1], padding='SAME')
biases = tf.get_variable('biases', [output_dim], initializer=tf.constant_initializer(0.0))
conv = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape())
return conv
input1 = tf.random_normal([1,10,10,32])
input2 = tf.random_normal([1,20,20,32])
with tf.variable_scope("conv1"):
cnn1 = conv2d(input1, 16)
with tf.variable_scope("conv2"):
cnn1 = conv2d(input2, 16)
在研究变量共享之前,我们首先描述 tf.Variable
是如何工作的。tf.Variable
总是会创建一个新的变量,即使给定了相同的名称。
# tf.Variable always create new variable even given the same name.
v1 = tf.Variable(10, name="name1")
v2 = tf.Variable(10, name="name1")
assert(v1 is not v2)
print(v1.name) # name1:0
print(v2.name) # name1_1:0
如果名称为 name1
已经存在,那么 TensorFlow 会在命名后面添加上 _1
,_2
等,用来保证命名的唯一性。
因此,当我们调用下面的 affine 方法,我们创建了 2 组不同的权重 w 和偏差项 b,也就是说每个 affine 都有他们自己的权重 w 和偏差项 b。
def affine(x, shape):
W = tf.Variable(tf.truncated_normal(shape))
b = tf.Variable(tf.zeros([shape[1]]))
model = tf.nn.relu(tf.matmul(x, W) + b)
return model
x = tf.placeholder(tf.float32, [None, 784])
with tf.variable_scope("n1"):
n1 = affine(x, [784, 500])
with tf.variable_scope("n1"):
n2 = affine(x, [784, 500])
有时候,在一个复杂的网络中,我们想要共享一个图层或者参数,那么我们如何修改一下刚刚的 affine 函数,使得能共享相同的权重 w 和偏差项 b。
def affine_reuseable(x, shape):
W = tf.get_variable("W", shape,
initializer=tf.random_normal_initializer())
b = tf.get_variable("b", [shape[1]],
initializer=tf.constant_initializer(0.0))
model = tf.nn.relu(tf.matmul(x, W) + b)
return model
x = tf.placeholder(tf.float32, [None, 784])
with tf.variable_scope("n2"):
nn1 = affine_reuseable(x, [784, 500])
with tf.variable_scope("n2", reuse=True):
nn2 = affine_reuseable(x, [784, 500])
如果一个变量给定的 scope/name
存在,则 tf.get_variable
将会返回现在存在的变量,而不会去重新创建一个。
W = tf.get_variable("W", shape, initializer=tf.random_normal_initializer())
因此,对于第二次调用 affine 函数的时候,tf.get_variable
就调用了已经存在的权重 w 和偏差项 b。
with tf.variable_scope("n2", reuse=True):
nn2 = affine_reuseable(x, [784, 500])
但是,TensorFlow 希望开发人员可以自己知道哪些变量已经被使用了。在调用 tf.get_variable
之前,开发人员需要明确知道是否可以设置 reuse
这个标志。在调用 tf.get_varialbe 时,厦门的这两种情况会引发异常:
reuse
设置为 False 或者 None(默认),且变量已经存在了。reuse
设置为 True,但是变量不存在。如下程序:
with tf.variable_scope("foo"):
v = tf.get_variable("v", [1])
v1 = tf.get_variable("v")
# Raises ValueError("... v already exists ...").
with tf.variable_scope("foo", reuse=True):
v = tf.get_variable("v")
# Raises ValueError("... v does not exists ...").
对于 reuse
标志我们可能需要按照如下使用:
with tf.variable_scope("foo"):
v = tf.get_variable("v2", [1]) # Create a new variable.
with tf.variable_scope("foo", reuse=True):
v1 = tf.get_variable("v2") # reuse/share the variable "foo/v2".
assert v1 == v
with tf.variable_scope("foo") as scope:
v = tf.get_variable("v3", [1])
scope.reuse_variables()
v1 = tf.get_variable("v3")
assert v1 == v
我们可以重新使用作用了,而不是再次去编写作用域的范围:
with tf.variable_scope("model") as scope:
output1 = my_image_filter(input1)
with tf.variable_scope(scope, reuse=True): # Can use scope instead of "model"
output2 = my_image_filter(input2)
嵌套范围
with tf.variable_scope("foo"):
with tf.variable_scope("bar"):
v = tf.get_variable("v", [1])
assert v.name == "foo/bar/v:0"
很多的开发者都熟悉使用 tf.name_scope
和 tf.Variables
方法。但是,这些 API 不适合于共享变量。例如,下面的 tf.get_varialbe
不会选择从 tf.name_scope
创建的命名。
with tf.name_scope("foo1"):
v1 = tf.get_variable("v", [1])
v2 = tf.Variable(1, name="v2")
with tf.variable_scope("foo2"):
v3 = tf.get_variable("v", [1])
v4 = tf.Variable(1, name="v2")
print(v1.name) # v:0 (Unexpected!)
print(v2.name) # foo1/v2:0
print(v3.name) # foo2/v:0
print(v4.name) # foo2/v2:0
为了避免这些问题,我们最好采用如下方法:
tf.name_scope
和 tf.Variable
;tf.variable_scope
来定义共享变量的范围;tf.get_variable
来创建和检索共享变量;with tf.variable_scope("foo"):
v = tf.get_variable("v2", [1]) # Create a new variable
with tf.variable_scope("foo", reuse=True):
v1 = tf.get_variable("v2") # Reuse a variable created before.
v = tf.get_variable("v", shape=(), initializer=tf.zeros_initializer())
v1 = v.assign_add(1) # 1.0
v.assign(v1) # 1.0
with tf.Session() as session:
tf.global_variables_initializer().run()
value, value1 = session.run([v, v1])
print(value, value1)
# 1.0 1.0
来源:jhui