tensorflow: bn层

Introduction

具体见 深度学习: Batch Normalization (归一化)

Experiment

实验一

可视化 batch normalization 过程中的 tensor演化(以输入一张[1, 4 , 4, 1]的图片为例)

# -*- coding: utf-8 -*-

import tensorflow as tf


def func_convolution(in_put, in_channel, out_channel, layer_name, is_training=True):
    with tf.variable_scope(layer_name) as scope:
        weights = tf.get_variable(name="weights", shape=[2, 2, in_channel, out_channel],
                                  initializer=tf.contrib.layers.xavier_initializer_conv2d())
        biases = tf.get_variable(name="biases", shape=[out_channel],
                                 initializer=tf.constant_initializer())
        weight_decay = tf.multiply(tf.nn.l2_loss(weights), 0.00001, name="weight_decay_loss")
        tf.add_to_collection("weight_loss", weight_decay)
        convolution = tf.nn.bias_add(value=tf.nn.conv2d(input=in_put,
                                                        filter=weights,
                                                        strides=[1, 1, 1, 1],
                                                        padding="SAME"),
                                     bias=biases)
        bn = tf.contrib.layers.batch_norm(inputs=convolution,
                                          decay=0.9,
                                          is_training=is_training,
                                          updates_collections=None)
        output = tf.nn.relu(bn, name=scope.name)
    return in_put, weights, biases, convolution, bn, output


def main():
    with tf.Graph().as_default():
        input_x_1 = tf.placeholder(dtype=tf.float32, shape=[1, 4, 4, 1])

        in_put, weights, biases, convolution, bn, output = func_convolution(input_x_1, 1, 1, 'my', True)

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            print '\n\nglobal_variables:\n\n'
            for name, value in zip(tf.global_variables(), sess.run(tf.global_variables())):
                print name, ': \n', value, '\n'
            import numpy as np
            _in_put, _weights, _biases, _convolution, _bn, _output = sess.run([in_put, weights, biases, convolution, bn, output], feed_dict={input_x_1:np.random.uniform(low=0, high=255, size=[1, 4, 4, 1])})
            print
            print '_in_put:'
            print _in_put
            print
            print '_weights:'
            print _weights
            print
            print '_biases:'
            print _biases
            print
            print 'convolution:'
            print _convolution
            print
            print 'bn:'
            print _bn
            print
            print 'output:'
            print _output

if __name__ == "__main__":
    main()

global_variables: # 全局变量。

'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> : 
[[[[ 0.40765101]]

  [[ 0.06195515]]]


 [[[-0.09728742]]

  [[-0.36299753]]]] 

'my/biases:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> : 
[ 1.] 


_in_put: # 随机初始化生成的随机批图片(batch_size=1, size=4×4, depth=1)
[[[[ 219.29150391]
   [  64.56425476]
   [ 193.92094421]
   [  76.09475708]]

  [[ 226.17414856]
   [ 126.58197021]
   [  63.82304382]
   [ 108.58100891]]

  [[ 196.27723694]
   [  10.7613945 ]
   [  35.6981163 ]
   [  64.7461319 ]]

  [[ 134.63713074]
   [ 213.41607666]
   [ 193.53451538]
   [  86.11763   ]]]]

_weights: # 随机初始化生成的weights
[[[[ 0.40765101]]

  [[ 0.06195515]]]


 [[[-0.09728742]]

  [[-0.36299753]]]]

_biases: # 随机初始化生成的biases
[ 0.]

convolution: # 经过 tf.nn.bias_add(tf.nn.conv2d()) 之后的tensor
[[[[ 25.4416523 ]
   [  2.85164356]
   [ 38.14271164]
   [ 20.45653725]]

  [[ 77.04086304]
   [ 41.55015945]
   [  5.7690196 ]
   [ 37.96417236]]

  [[ -9.88866806]
   [-84.41667175]
   [-31.52523041]
   [ 18.01566315]]

  [[ 68.10718536]
   [ 98.98973846]
   [ 84.22997284]
   [ 35.10593796]]]]

bn: # 接着经过 tf.contrib.layers.batch_norm() 之后的tensor
[[[[-0.02923918]
   [-0.53810263]
   [ 0.25686544]
   [-0.14153406]]

  [[ 1.13308668]
   [ 0.33362174]
   [-0.47238567]
   [ 0.25284362]]

  [[-0.82509136]
   [-2.50391221]
   [-1.31247747]
   [-0.19651729]]

  [[ 0.93184632]
   [ 1.62750816]
   [ 1.29502892]
   [ 0.18845892]]]]

output: # 最后经过 tf.nn.relu() 的tensor
[[[[ 0.        ]
   [ 0.        ]
   [ 0.25686544]
   [ 0.        ]]

  [[ 1.13308668]
   [ 0.33362174]
   [ 0.        ]
   [ 0.25284362]]

  [[ 0.        ]
   [ 0.        ]
   [ 0.        ]
   [ 0.        ]]

  [[ 0.93184632]
   [ 1.62750816]
   [ 1.29502892]
   [ 0.18845892]]]]


实验二

探究 batch normalization 过程中的 decay的具体影响,以及 tf.global_variables() 的具体变化

# -*- coding: utf-8 -*-

import tensorflow as tf


def func_convolution(in_put, in_channel, out_channel, layer_name, is_training=True, decay=0.9): # 新添了一个接口的参数项:decay
    with tf.variable_scope(layer_name, reuse=tf.AUTO_REUSE) as scope: # tf.variable_scope() 中添加了 reuse=tf.AUTO_REUSE 参数项
        weights = tf.get_variable(name="weights", shape=[2, 2, in_channel, out_channel],
                                  initializer=tf.contrib.layers.xavier_initializer_conv2d())
        biases = tf.get_variable(name="biases", shape=[out_channel],
                                 initializer=tf.constant_initializer())
        weight_decay = tf.multiply(tf.nn.l2_loss(weights), 0.00001, name="weight_decay_loss")
        tf.add_to_collection("weight_loss", weight_decay)
        convolution = tf.nn.bias_add(value=tf.nn.conv2d(input=in_put,
                                                        filter=weights,
                                                        strides=[1, 1, 1, 1],
                                                        padding="SAME"),
                                     bias=biases)
        bn = tf.contrib.layers.batch_norm(inputs=convolution,
                                          decay=decay,
                                          is_training=is_training,
                                          updates_collections=None)
        output = tf.nn.relu(bn, name=scope.name)
    return in_put, weights, biases, convolution, bn, output


def main():
    with tf.Graph().as_default():
        input_x_1 = tf.placeholder(dtype=tf.float32, shape=[1, 4, 4, 1])

        for decay_value in [0, 0.5, 0.9, 1]: # 将decay分别取值0、0.5、0.9和1进行实验
            in_put, weights, biases, convolution, bn, output = func_convolution(input_x_1, 1, 1, 'my', True, decay=decay_value)
            with tf.Session() as sess:
                sess.run(tf.global_variables_initializer())
                print '\n\nglobal_variables:  (decay = {})\n'.format(decay_value)
                for name, value in zip(tf.global_variables(), sess.run(tf.global_variables())):
                    print name, ': \n', value, '\n'
                import numpy as np
                _in_put, _weights, _biases, _convolution, _bn, _output = sess.run([in_put, weights, biases, convolution, bn, output],
                                                                                  feed_dict={input_x_1:np.random.uniform(low=0, high=255, size=[1, 4, 4, 1])})
                print '\n\nglobal_variables:\n\n'
                for name, value in zip(tf.global_variables(), sess.run(tf.global_variables())):
                    print name, ': \n', value, '\n'

if __name__ == "__main__":
    main()
2017-09-27 22:33:12.399901: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1052] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: GeForce GTX 1070, pci bus id: 0000:01:00.0, compute capability: 6.1)


global_variables:  (decay = 0) # decay=0,此时移动零损失,'my/BatchNorm/beta:0' 和'my/BatchNorm/moving_mean:0' 都发生了完整的移动

'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> : 
[[[[-0.15596229]]

  [[-0.79179955]]]


 [[[ 0.25267524]]

  [[-0.13646322]]]] 

'my/biases:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> : 
[ 1.] 



global_variables:

'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> : 
[[[[-0.15596229]]

  [[-0.79179955]]]


 [[[ 0.25267524]]

  [[-0.13646322]]]] 

'my/biases:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> : 
[-77.38012695] 

'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> : 
[ 5024.97949219] 

2017-09-27 22:33:13.193005: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1052] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: GeForce GTX 1070, pci bus id: 0000:01:00.0, compute capability: 6.1)


global_variables:  (decay = 0.5) # decay=0.5,此时移动损失五成,'my/BatchNorm/beta:0' 和'my/BatchNorm/moving_mean:0' 都只移动了计算量的一半

'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> : 
[[[[ 0.83213621]]

  [[ 0.46162659]]]


 [[[-0.55298626]]

  [[ 0.28148931]]]] 

'my/biases:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> : 
[ 1.] 



global_variables:

'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> : 
[[[[ 0.83213621]]

  [[ 0.46162659]]]


 [[[-0.55298626]]

  [[ 0.28148931]]]] 

'my/biases:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> : 
[ 76.91065216] 

'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> : 
[ 3560.95825195] 

2017-09-27 22:33:13.227898: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1052] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: GeForce GTX 1070, pci bus id: 0000:01:00.0, compute capability: 6.1)


global_variables:  (decay = 0.9) # decay=0.9,此时移动损失九成,'my/BatchNorm/beta:0' 和'my/BatchNorm/moving_mean:0' 都只移动了计算量的一成

'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> : 
[[[[ 0.03880662]]

  [[ 0.68812126]]]


 [[[ 0.71565622]]

  [[ 0.71868998]]]] 

'my/biases:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> : 
[ 1.] 



global_variables:

'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> : 
[[[[ 0.03880662]]

  [[ 0.68812126]]]


 [[[ 0.71565622]]

  [[ 0.71868998]]]] 

'my/biases:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> : 
[ 17.47742271] 

'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> : 
[ 1068.61047363] 

2017-09-27 22:33:13.262399: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1052] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: GeForce GTX 1070, pci bus id: 0000:01:00.0, compute capability: 6.1)


global_variables:  (decay = 1) # decay=1,此时移动全损失,'my/BatchNorm/beta:0' 和'my/BatchNorm/moving_mean:0' 都不发生移动

'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> : 
[[[[-0.08102882]]

  [[ 0.76455539]]]


 [[[-0.41627529]]

  [[-0.51934695]]]] 

'my/biases:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> : 
[ 1.] 



global_variables:

'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> : 
[[[[-0.08102882]]

  [[ 0.76455539]]]


 [[[-0.41627529]]

  [[-0.51934695]]]] 

'my/biases:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> : 
[ 1.] 


Process finished with exit code 0


实验三

探究 batch normalization 过程中的 tf.all_variables() 、tf.global_variables() 、tf.trainable_variables() 的具体范围

# -*- coding: utf-8 -*-

import tensorflow as tf


def func_convolution(in_put, in_channel, out_channel, layer_name, is_training=True):
    with tf.variable_scope(layer_name, reuse=tf.AUTO_REUSE) as scope:
        weights = tf.get_variable(name="weights", shape=[2, 2, in_channel, out_channel],
                                  initializer=tf.contrib.layers.xavier_initializer_conv2d())
        biases = tf.get_variable(name="biases", shape=[out_channel],
                                 initializer=tf.constant_initializer())
        weight_decay = tf.multiply(tf.nn.l2_loss(weights), 0.00001, name="weight_decay_loss")
        tf.add_to_collection("weight_loss", weight_decay)
        convolution = tf.nn.bias_add(value=tf.nn.conv2d(input=in_put,
                                                        filter=weights,
                                                        strides=[1, 1, 1, 1],
                                                        padding="SAME"),
                                     bias=biases)
        bn = tf.contrib.layers.batch_norm(inputs=convolution,
                                          decay=0.9,
                                          is_training=is_training,
                                          updates_collections=None)
        output = tf.nn.relu(bn, name=scope.name)
    return in_put, weights, biases, convolution, bn, output


def main():
    with tf.Graph().as_default():
        input_x_1 = tf.placeholder(dtype=tf.float32, shape=[1, 4, 4, 1])

        in_put, weights, biases, convolution, bn, output = func_convolution(input_x_1, 1, 1, 'my', True)
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            print '\n\nall_variables:\n'
            for name, value in zip(tf.all_variables(), sess.run(tf.all_variables())):
                print name, ': \n', value, '\n'
            print '\n\nglobal_variables:\n'
            for name, value in zip(tf.global_variables(), sess.run(tf.global_variables())):
                print name, ': \n', value, '\n'
            print '\n\ntrainable_variables:\n'
            for name, value in zip(tf.trainable_variables(), sess.run(tf.trainable_variables())):
                print name, ': \n', value, '\n'

if __name__ == "__main__":
    main()
all_variables: # 根据下面的 WARNING 易知, tf.all_variables() == tf.global_variables()

WARNING:tensorflow:From /home/user/Desktop/test/15.py:35: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
WARNING:tensorflow:From /home/user/Desktop/test/15.py:35: all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Please use tf.global_variables instead.
'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> : 
[[[[-0.32202744]]

  [[ 0.09379423]]]


 [[[ 0.36457914]]

  [[ 0.84247833]]]] 

'my/biases:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> : 
[ 1.] 



global_variables:

'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> : 
[[[[-0.32202744]]

  [[ 0.09379423]]]


 [[[ 0.36457914]]

  [[ 0.84247833]]]] 

'my/biases:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> : 
[ 1.] 



trainable_variables:
"""
trainable_variables 相比 global_variables 少了 'my/BatchNorm/moving_mean:0' 和'my/BatchNorm/moving_variance:0' 这两个项。
因为这两个项是存放临时变量的,不需要被 train 到,所以不是 trainable 的。
"""

'my/weights:0' shape=(2, 2, 1, 1) dtype=float32_ref> : 
[[[[-0.32202744]]

  [[ 0.09379423]]]


 [[[ 0.36457914]]

  [[ 0.84247833]]]] 

'my/biases:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 

'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> : 
[ 0.] 


实验四

探究 batch normalization 过程中的 is_training 参数项 的作用。

import tensorflow as tf


def func(in_put, layer_name, reuse, is_training=True):
    with tf.variable_scope(layer_name, reuse=reuse):
        bn = tf.contrib.layers.batch_norm(inputs=in_put,
                                          decay=0.9,
                                          is_training=is_training,
                                          updates_collections=None)
    return bn


import numpy as np
def main():
    bns = [0,0]
    vs = [0,0]
    with tf.Graph().as_default():
        input_x = tf.placeholder(dtype=tf.float32, shape=[1, 4, 4, 1])
        with tf.Session() as sess:
            for i,is_training in enumerate([True, False]):
                vs[i] = np.random.uniform(low=0, high=255, size=[1, 4, 4, 1])
                bns[i] = func(input_x, 'my', i>0, is_training=is_training)

            vs[1] = vs[0]
            sess.run(tf.global_variables_initializer())
            for i in xrange(2):
                t= sess.run(bns[i], feed_dict={input_x:vs[0]})
                #print '\n\n_bn:  (is_training = {})\n'.format(is_training)
                print '-------------'
                #print 'vs',vs[i]
                print t, '\n'
                for (x, y) in zip(tf.global_variables(), sess.run(tf.global_variables())):
                    print '\n', x, ':\n', y

if __name__ == "__main__":
    main()
2017-09-28 16:26:06.485467: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1052] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: GeForce GTX 1070, pci bus id: 0000:01:00.0, compute capability: 6.1)
-------------
# is_training=True 时
[[[[-1.70509255]
   [ 1.01002324]
   [-0.36365438]
   [ 0.46331275]]

  [[ 1.56672919]
   [-1.53579748]
   [-1.07274151]
   [ 0.3656975 ]]

  [[ 0.62969148]
   [ 0.97397387]
   [-0.55495548]
   [ 0.35338986]]

  [[ 0.51738369]
   [ 0.93780458]
   [-1.64297044]
   [ 0.05720568]]]] 


'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]

'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[ 12.54585934]

'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 537.48309326]
-------------
# is_training=False 时
[[[[-0.51710552]
   [ 8.06164551]
   [ 3.7213378 ]
   [ 6.3342452 ]]

  [[ 9.82062817]
   [ 0.01780343]
   [ 1.48088753]
   [ 6.02581739]]

  [[ 6.85993958]
   [ 7.94774342]
   [ 3.11689758]
   [ 5.98692942]]

  [[ 6.50508881]
   [ 7.83346176]
   [-0.32082313]
   [ 5.05109835]]]] 


'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]

'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[ 12.54585934]

'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 537.48309326]

Process finished with exit code 0


你可能感兴趣的:(TensorFlow,TensorFlow,框架)