具体见 深度学习: Batch Normalization (归一化)
可视化 batch normalization 过程中的 tensor演化(以输入一张[1, 4 , 4, 1]的图片为例)
# -*- coding: utf-8 -*-
import tensorflow as tf
def func_convolution(in_put, in_channel, out_channel, layer_name, is_training=True):
with tf.variable_scope(layer_name) as scope:
weights = tf.get_variable(name="weights", shape=[2, 2, in_channel, out_channel],
initializer=tf.contrib.layers.xavier_initializer_conv2d())
biases = tf.get_variable(name="biases", shape=[out_channel],
initializer=tf.constant_initializer())
weight_decay = tf.multiply(tf.nn.l2_loss(weights), 0.00001, name="weight_decay_loss")
tf.add_to_collection("weight_loss", weight_decay)
convolution = tf.nn.bias_add(value=tf.nn.conv2d(input=in_put,
filter=weights,
strides=[1, 1, 1, 1],
padding="SAME"),
bias=biases)
bn = tf.contrib.layers.batch_norm(inputs=convolution,
decay=0.9,
is_training=is_training,
updates_collections=None)
output = tf.nn.relu(bn, name=scope.name)
return in_put, weights, biases, convolution, bn, output
def main():
with tf.Graph().as_default():
input_x_1 = tf.placeholder(dtype=tf.float32, shape=[1, 4, 4, 1])
in_put, weights, biases, convolution, bn, output = func_convolution(input_x_1, 1, 1, 'my', True)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print '\n\nglobal_variables:\n\n'
for name, value in zip(tf.global_variables(), sess.run(tf.global_variables())):
print name, ': \n', value, '\n'
import numpy as np
_in_put, _weights, _biases, _convolution, _bn, _output = sess.run([in_put, weights, biases, convolution, bn, output], feed_dict={input_x_1:np.random.uniform(low=0, high=255, size=[1, 4, 4, 1])})
print
print '_in_put:'
print _in_put
print
print '_weights:'
print _weights
print
print '_biases:'
print _biases
print
print 'convolution:'
print _convolution
print
print 'bn:'
print _bn
print
print 'output:'
print _output
if __name__ == "__main__":
main()
global_variables: # 全局变量。
'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> :
[[[[ 0.40765101]]
[[ 0.06195515]]]
[[[-0.09728742]]
[[-0.36299753]]]]
'my/biases:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 1.]
_in_put: # 随机初始化生成的随机批图片(batch_size=1, size=4×4, depth=1)
[[[[ 219.29150391]
[ 64.56425476]
[ 193.92094421]
[ 76.09475708]]
[[ 226.17414856]
[ 126.58197021]
[ 63.82304382]
[ 108.58100891]]
[[ 196.27723694]
[ 10.7613945 ]
[ 35.6981163 ]
[ 64.7461319 ]]
[[ 134.63713074]
[ 213.41607666]
[ 193.53451538]
[ 86.11763 ]]]]
_weights: # 随机初始化生成的weights
[[[[ 0.40765101]]
[[ 0.06195515]]]
[[[-0.09728742]]
[[-0.36299753]]]]
_biases: # 随机初始化生成的biases
[ 0.]
convolution: # 经过 tf.nn.bias_add(tf.nn.conv2d()) 之后的tensor
[[[[ 25.4416523 ]
[ 2.85164356]
[ 38.14271164]
[ 20.45653725]]
[[ 77.04086304]
[ 41.55015945]
[ 5.7690196 ]
[ 37.96417236]]
[[ -9.88866806]
[-84.41667175]
[-31.52523041]
[ 18.01566315]]
[[ 68.10718536]
[ 98.98973846]
[ 84.22997284]
[ 35.10593796]]]]
bn: # 接着经过 tf.contrib.layers.batch_norm() 之后的tensor
[[[[-0.02923918]
[-0.53810263]
[ 0.25686544]
[-0.14153406]]
[[ 1.13308668]
[ 0.33362174]
[-0.47238567]
[ 0.25284362]]
[[-0.82509136]
[-2.50391221]
[-1.31247747]
[-0.19651729]]
[[ 0.93184632]
[ 1.62750816]
[ 1.29502892]
[ 0.18845892]]]]
output: # 最后经过 tf.nn.relu() 的tensor
[[[[ 0. ]
[ 0. ]
[ 0.25686544]
[ 0. ]]
[[ 1.13308668]
[ 0.33362174]
[ 0. ]
[ 0.25284362]]
[[ 0. ]
[ 0. ]
[ 0. ]
[ 0. ]]
[[ 0.93184632]
[ 1.62750816]
[ 1.29502892]
[ 0.18845892]]]]
探究 batch normalization 过程中的 decay的具体影响,以及 tf.global_variables() 的具体变化
# -*- coding: utf-8 -*-
import tensorflow as tf
def func_convolution(in_put, in_channel, out_channel, layer_name, is_training=True, decay=0.9): # 新添了一个接口的参数项:decay
with tf.variable_scope(layer_name, reuse=tf.AUTO_REUSE) as scope: # tf.variable_scope() 中添加了 reuse=tf.AUTO_REUSE 参数项
weights = tf.get_variable(name="weights", shape=[2, 2, in_channel, out_channel],
initializer=tf.contrib.layers.xavier_initializer_conv2d())
biases = tf.get_variable(name="biases", shape=[out_channel],
initializer=tf.constant_initializer())
weight_decay = tf.multiply(tf.nn.l2_loss(weights), 0.00001, name="weight_decay_loss")
tf.add_to_collection("weight_loss", weight_decay)
convolution = tf.nn.bias_add(value=tf.nn.conv2d(input=in_put,
filter=weights,
strides=[1, 1, 1, 1],
padding="SAME"),
bias=biases)
bn = tf.contrib.layers.batch_norm(inputs=convolution,
decay=decay,
is_training=is_training,
updates_collections=None)
output = tf.nn.relu(bn, name=scope.name)
return in_put, weights, biases, convolution, bn, output
def main():
with tf.Graph().as_default():
input_x_1 = tf.placeholder(dtype=tf.float32, shape=[1, 4, 4, 1])
for decay_value in [0, 0.5, 0.9, 1]: # 将decay分别取值0、0.5、0.9和1进行实验
in_put, weights, biases, convolution, bn, output = func_convolution(input_x_1, 1, 1, 'my', True, decay=decay_value)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print '\n\nglobal_variables: (decay = {})\n'.format(decay_value)
for name, value in zip(tf.global_variables(), sess.run(tf.global_variables())):
print name, ': \n', value, '\n'
import numpy as np
_in_put, _weights, _biases, _convolution, _bn, _output = sess.run([in_put, weights, biases, convolution, bn, output],
feed_dict={input_x_1:np.random.uniform(low=0, high=255, size=[1, 4, 4, 1])})
print '\n\nglobal_variables:\n\n'
for name, value in zip(tf.global_variables(), sess.run(tf.global_variables())):
print name, ': \n', value, '\n'
if __name__ == "__main__":
main()
2017-09-27 22:33:12.399901: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1052] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: GeForce GTX 1070, pci bus id: 0000:01:00.0, compute capability: 6.1)
global_variables: (decay = 0) # decay=0,此时移动零损失,'my/BatchNorm/beta:0' 和'my/BatchNorm/moving_mean:0' 都发生了完整的移动
'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> :
[[[[-0.15596229]]
[[-0.79179955]]]
[[[ 0.25267524]]
[[-0.13646322]]]]
'my/biases:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 1.]
global_variables:
'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> :
[[[[-0.15596229]]
[[-0.79179955]]]
[[[ 0.25267524]]
[[-0.13646322]]]]
'my/biases:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[-77.38012695]
'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 5024.97949219]
2017-09-27 22:33:13.193005: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1052] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: GeForce GTX 1070, pci bus id: 0000:01:00.0, compute capability: 6.1)
global_variables: (decay = 0.5) # decay=0.5,此时移动损失五成,'my/BatchNorm/beta:0' 和'my/BatchNorm/moving_mean:0' 都只移动了计算量的一半
'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> :
[[[[ 0.83213621]]
[[ 0.46162659]]]
[[[-0.55298626]]
[[ 0.28148931]]]]
'my/biases:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 1.]
global_variables:
'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> :
[[[[ 0.83213621]]
[[ 0.46162659]]]
[[[-0.55298626]]
[[ 0.28148931]]]]
'my/biases:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[ 76.91065216]
'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 3560.95825195]
2017-09-27 22:33:13.227898: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1052] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: GeForce GTX 1070, pci bus id: 0000:01:00.0, compute capability: 6.1)
global_variables: (decay = 0.9) # decay=0.9,此时移动损失九成,'my/BatchNorm/beta:0' 和'my/BatchNorm/moving_mean:0' 都只移动了计算量的一成
'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> :
[[[[ 0.03880662]]
[[ 0.68812126]]]
[[[ 0.71565622]]
[[ 0.71868998]]]]
'my/biases:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 1.]
global_variables:
'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> :
[[[[ 0.03880662]]
[[ 0.68812126]]]
[[[ 0.71565622]]
[[ 0.71868998]]]]
'my/biases:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[ 17.47742271]
'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 1068.61047363]
2017-09-27 22:33:13.262399: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1052] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: GeForce GTX 1070, pci bus id: 0000:01:00.0, compute capability: 6.1)
global_variables: (decay = 1) # decay=1,此时移动全损失,'my/BatchNorm/beta:0' 和'my/BatchNorm/moving_mean:0' 都不发生移动
'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> :
[[[[-0.08102882]]
[[ 0.76455539]]]
[[[-0.41627529]]
[[-0.51934695]]]]
'my/biases:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 1.]
global_variables:
'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> :
[[[[-0.08102882]]
[[ 0.76455539]]]
[[[-0.41627529]]
[[-0.51934695]]]]
'my/biases:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 1.]
Process finished with exit code 0
探究 batch normalization 过程中的 tf.all_variables() 、tf.global_variables() 、tf.trainable_variables() 的具体范围
# -*- coding: utf-8 -*-
import tensorflow as tf
def func_convolution(in_put, in_channel, out_channel, layer_name, is_training=True):
with tf.variable_scope(layer_name, reuse=tf.AUTO_REUSE) as scope:
weights = tf.get_variable(name="weights", shape=[2, 2, in_channel, out_channel],
initializer=tf.contrib.layers.xavier_initializer_conv2d())
biases = tf.get_variable(name="biases", shape=[out_channel],
initializer=tf.constant_initializer())
weight_decay = tf.multiply(tf.nn.l2_loss(weights), 0.00001, name="weight_decay_loss")
tf.add_to_collection("weight_loss", weight_decay)
convolution = tf.nn.bias_add(value=tf.nn.conv2d(input=in_put,
filter=weights,
strides=[1, 1, 1, 1],
padding="SAME"),
bias=biases)
bn = tf.contrib.layers.batch_norm(inputs=convolution,
decay=0.9,
is_training=is_training,
updates_collections=None)
output = tf.nn.relu(bn, name=scope.name)
return in_put, weights, biases, convolution, bn, output
def main():
with tf.Graph().as_default():
input_x_1 = tf.placeholder(dtype=tf.float32, shape=[1, 4, 4, 1])
in_put, weights, biases, convolution, bn, output = func_convolution(input_x_1, 1, 1, 'my', True)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print '\n\nall_variables:\n'
for name, value in zip(tf.all_variables(), sess.run(tf.all_variables())):
print name, ': \n', value, '\n'
print '\n\nglobal_variables:\n'
for name, value in zip(tf.global_variables(), sess.run(tf.global_variables())):
print name, ': \n', value, '\n'
print '\n\ntrainable_variables:\n'
for name, value in zip(tf.trainable_variables(), sess.run(tf.trainable_variables())):
print name, ': \n', value, '\n'
if __name__ == "__main__":
main()
all_variables: # 根据下面的 WARNING 易知, tf.all_variables() == tf.global_variables()
WARNING:tensorflow:From /home/user/Desktop/test/15.py:35: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
WARNING:tensorflow:From /home/user/Desktop/test/15.py:35: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> :
[[[[-0.32202744]]
[[ 0.09379423]]]
[[[ 0.36457914]]
[[ 0.84247833]]]]
'my/biases:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 1.]
global_variables:
'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> :
[[[[-0.32202744]]
[[ 0.09379423]]]
[[[ 0.36457914]]
[[ 0.84247833]]]]
'my/biases:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 1.]
trainable_variables:
"""
trainable_variables 相比 global_variables 少了 'my/BatchNorm/moving_mean:0' 和'my/BatchNorm/moving_variance:0' 这两个项。
因为这两个项是存放临时变量的,不需要被 train 到,所以不是 trainable 的。
"""
'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> :
[[[[-0.32202744]]
[[ 0.09379423]]]
[[[ 0.36457914]]
[[ 0.84247833]]]]
'my/biases:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]
探究 batch normalization 过程中的 is_training 参数项 的作用。
import tensorflow as tf
def func(in_put, layer_name, reuse, is_training=True):
with tf.variable_scope(layer_name, reuse=reuse):
bn = tf.contrib.layers.batch_norm(inputs=in_put,
decay=0.9,
is_training=is_training,
updates_collections=None)
return bn
import numpy as np
def main():
bns = [0,0]
vs = [0,0]
with tf.Graph().as_default():
input_x = tf.placeholder(dtype=tf.float32, shape=[1, 4, 4, 1])
with tf.Session() as sess:
for i,is_training in enumerate([True, False]):
vs[i] = np.random.uniform(low=0, high=255, size=[1, 4, 4, 1])
bns[i] = func(input_x, 'my', i>0, is_training=is_training)
vs[1] = vs[0]
sess.run(tf.global_variables_initializer())
for i in xrange(2):
t= sess.run(bns[i], feed_dict={input_x:vs[0]})
#print '\n\n_bn: (is_training = {})\n'.format(is_training)
print '-------------'
#print 'vs',vs[i]
print t, '\n'
for (x, y) in zip(tf.global_variables(), sess.run(tf.global_variables())):
print '\n', x, ':\n', y
if __name__ == "__main__":
main()
2017-09-28 16:26:06.485467: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1052] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: GeForce GTX 1070, pci bus id: 0000:01:00.0, compute capability: 6.1)
-------------
# is_training=True 时
[[[[-1.70509255]
[ 1.01002324]
[-0.36365438]
[ 0.46331275]]
[[ 1.56672919]
[-1.53579748]
[-1.07274151]
[ 0.3656975 ]]
[[ 0.62969148]
[ 0.97397387]
[-0.55495548]
[ 0.35338986]]
[[ 0.51738369]
[ 0.93780458]
[-1.64297044]
[ 0.05720568]]]]
'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[ 12.54585934]
'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 537.48309326]
-------------
# is_training=False 时
[[[[-0.51710552]
[ 8.06164551]
[ 3.7213378 ]
[ 6.3342452 ]]
[[ 9.82062817]
[ 0.01780343]
[ 1.48088753]
[ 6.02581739]]
[[ 6.85993958]
[ 7.94774342]
[ 3.11689758]
[ 5.98692942]]
[[ 6.50508881]
[ 7.83346176]
[-0.32082313]
[ 5.05109835]]]]
'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]
'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[ 12.54585934]
'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 537.48309326]
Process finished with exit code 0