1. 初始化为常量
tf.constant_initializer
__init__(
value=0, #指定的常量
dtype=tf.float32, #数据类型
verify_shape=False #是否可以调整tensor的形状,默认可以调整
)
#常量初始化
#tf.constant_initializer
value=[0,1,2,3,4,5,6,7]
print('fitting shape:')
with tf.Session():
x=tf.get_variable('x',shape=[2,4],initializer=tf.constant_initializer(value))
x.initializer.run()
print(x.eval())
#output:
#[ 0. 1. 2. 3. 4. 5. 6. 7.]
tf提供了 tf.zeros_initializer() 和 tf.ones_initializer() 类,分别用来初始化全0和全1的tensor对象:
tf.initializers.zeros
__init__(dtype=tf.float32)
tf.initializers.ones
__init__(dtype=tf.float32)
with tf.Session() as sess:
x = tf.get_variable('x', shape=[8], initializer=tf.zeros_initializer())
y = tf.get_variable('y', shape=[8], initializer=tf.ones_initializer)
x.initializer.run()
y.initializer.run()
print(x.eval())
print(y.eval())
#output:
# [ 0. 0. 0. 0. 0. 0. 0. 0.]
# [ 1. 1. 1. 1. 1. 1. 1. 1.]
2. 初始化为均匀分布
tf.random_uniform_initializer
__init__(
minval=0,
maxval=None,
seed=None,
dtype=tf.float32
)
minval
: A python scalar or a scalar tensor. 生成随机值范围的下限maxval
: A python scalar or a scalar tensor. 要生成的随机值范围的上限。浮点类型默认为1。seed
: A Python integer. Used to create random seeds. See tf.set_random_seed
for behavior.dtype
: The data type.均匀分布生成的随机数并不是从小到大或者从大到小均匀分布的,这里均匀分布的意义是每次从一组服从均匀分布的数里边随机抽取一个数
tf.uniform_unit_scaling_initializer
__init__(
factor=1.0,
seed=None,
dtype=tf.float32
)
tf.uniform_unit_scaling_initializer 跟 tf.random_uniform_initializer 不同的地方是前者不需要指定最大最小值,是通过公式计算出来的:
max_val = math.sqrt(3 / input_size) * factor
min_val = -max_val
input_size是生成数据的维度,factor是系数。
#均匀分布初始化
#tf.random_uniform_initializer
#tf.uniform_unit_scaling_initializer
init_uniform = tf.random_uniform_initializer(minval=0, maxval=10, seed=None, dtype=tf.float32)
init_uniform_unit = tf.uniform_unit_scaling_initializer(factor=1.0, seed=None, dtype=tf.float32)
with tf.Session() as sess:
x_uni = tf.get_variable('x_uni', shape=[10], initializer=init_uniform)
x_uni_scale=tf.get_variable('x_uni_scale', shape=[10], initializer=init_uniform_unit)
x_uni.initializer.run()
x_uni_scale.initializer.run()
print(x_uni.eval())
print(x_uni_scale.eval())
#output:
#[5.804309 7.8211856 4.7346306 6.652793 7.749939 5.932355 5.0870695
# 5.6576195 2.7801192 8.98697 ]
#output:
#[ 1.5332981 0.16584563 1.4062299 -0.3267752 -0.8763422 0.8975915
# -1.3831029 0.573007 0.01714456 -0.43182337]
3. 初始化为正态分布
tf.random_normal_initializer
生成一组符合标准正太分布的tensor
__init__(
mean=0.0,
stddev=1.0,
seed=None,
dtype=tf.float32
)
mean
: a python scalar or a scalar tensor. 要生成的随机值的平均值stddev
: a python scalar or a scalar tensor. 要生成的随机值的标准差seed
: A Python integer. Used to create random seeds. See tf.set_random_seed
for behavior.dtype
: The data type. 仅支持浮点类型tf.truncated_normal_initializer
生成一组符合截断正太分布的tensor
__init__(
mean=0.0,
stddev=1.0,
seed=None,
dtype=tf.float32
)
init_random = tf.random_normal_initializer(mean=0.0, stddev=1.0, seed=None, dtype=tf.float32)
init_truncated = tf.truncated_normal_initializer(mean=0.0, stddev=1.0, seed=None, dtype=tf.float32)
with tf.Session() as sess:
x = tf.get_variable('x', shape=[10], initializer=init_random)
y = tf.get_variable('y', shape=[10], initializer=init_truncated)
x.initializer.run()
y.initializer.run()
print(x.eval())
print(y.eval())
#output:
# [-0.40236568 -0.35864913 -0.94253045 -0.40153521 0.1552504 1.16989613
# 0.43091929 -0.31410623 0.70080078 -0.9620409 ]
# [ 0.18356581 -0.06860946 -0.55245203 1.08850253 -1.13627422 -0.1006074
# 0.65564936 0.03948414 0.86558545 -0.4964745 ]
4. 初始化为变尺度正太、均匀分布
tf.variance_scaling_initializer()
可以生成截断正太分布和均匀分布的tensor,增加了更多的控制参数
__init__(
scale=1.0,
mode='fan_in',
distribution='truncated_normal',
seed=None,
dtype=tf.float32
)
scale
: 缩放尺度(positive float).mode
: 有3个值可选:"fan_in", "fan_out", "fan_avg",用于控制计算标准差 stddev的值distribution
: 2个值可选: One of "normal", "uniform",定义生成的tensor的分布是截断正太分布还是均匀分布seed
: A Python integer. Used to create random seeds. See tf.set_random_seed
for behavior.dtype
: The data type. 仅支持浮点类型distribution选‘normal’的时候,生成的是截断正太分布,标准差 stddev = sqrt(scale / n), n的取值根据mode的不同设置而不同:
distribution选 ‘uniform’,生成均匀分布的随机数tensor,最大值 max_value和 最小值 min_value 的计算公式:
max_value = sqrt(3 * scale / n)
min_value = -max_value
#初始化为变尺度正太、均匀分布
init_variance_scaling_normal = tf.variance_scaling_initializer(scale=1.0,mode="fan_in",
distribution="normal",seed=None,dtype=tf.float32)
init_variance_scaling_uniform = tf.variance_scaling_initializer(scale=1.0,mode="fan_in",
distribution="uniform",seed=None,dtype=tf.float32)
with tf.Session() as sess:
x_var_nor = tf.get_variable('x_var_nor', shape=[10], initializer=init_variance_scaling_normal)
y_var_uni = tf.get_variable('y_var_uni', shape=[10], initializer=init_variance_scaling_uniform)
x_var_nor.initializer.run()
y_var_uni.initializer.run()
print(x_var_nor.eval())
print(y_var_uni.eval())
#output:
[-0.47120634 -0.12735353 -0.21091501 -0.4237936 0.22056238 0.53884083
0.03047677 -0.11111113 -0.12246209 -0.27407783]
[-0.5104643 -0.14123923 -0.5194504 0.43422425 -0.1864321 -0.49164253
-0.03280574 -0.1064662 0.5060091 0.3518808 ]
5. 其他初始化方式
tf.orthogonal_initializer
初始化为正交矩阵的随机数,形状最少需要是二维的
__init__(
gain=1.0,
seed=None,
dtype=tf.float32
)
gain
: 应用于正交矩阵的乘法因子seed
: A Python integer. Used to create random seeds. See tf.set_random_seed
for behavior.dtype
: The data type.tf.glorot_uniform_initializer()
也称之为Xavier uniform initializer,生成与输入输出节点数相关的均匀分布随机数
__init__(
seed=None,
dtype=tf.float32
)
假设均匀分布的区间是[-limit, limit],则 limit=sqrt(6 / (fan_in + fan_out))
tf.glorot_normal_initializer
也称之为 Xavier normal initializer. 生成与输入输出节点数相关的截断正太分布随机数
__init__(
seed=None,
dtype=tf.float32
)
stddev = sqrt(2 / (fan_in + fan_out)),其中的fan_in和fan_out分别表示输入单元的结点数和输出单元的结点数。
init_orthogonal = tf.orthogonal_initializer(gain=1.0, seed=None, dtype=tf.float32)
init_glorot_uniform = tf.glorot_uniform_initializer()
init_glorot_normal = tf.glorot_normal_initializer()
with tf.Session() as sess:
x_orth = tf.get_variable('x_orth', shape=[4,4], initializer=init_orthogonal)
y_glo_uni = tf.get_variable('y_glo_uni', shape=[10], initializer=init_glorot_uniform)
z_glo_nor = tf.get_variable('z_glo_nor', shape=[10], initializer=init_glorot_normal)
x_orth.initializer.run()
y_glo_uni.initializer.run()
z_glo_nor.initializer.run()
print(x_orth.eval())
print(y_glo_uni.eval())
print(z_glo_nor.eval())
#output:
[[-0.23597395 0.66629636 0.70593053 -0.04502723]
[-0.5642902 0.04045057 -0.27636054 -0.77689457]
[ 0.16582792 -0.64549994 0.64033526 -0.38183987]
[-0.7735592 -0.37113696 0.12352219 0.49860382]]
[ 0.09983057 0.2719695 -0.44999748 -0.15994337 -0.51829576 0.0245778
-0.09081429 -0.05701229 -0.51071364 0.07084149]
[ 0.10609829 0.15208444 0.5383526 -0.42126465 0.0384669 0.37158582
0.07740056 -0.40733275 0.09609934 -0.08710001]