本文写了一些小的训练demo,分别是参数初始化、dropout和batch_norm、梯度截断,中间两者可以单独使用,一起使用的话,需要尝试一下。
看了其他人的博客,发现有一种参数初始化的方法没有被提到,需要的话可以尝试一下:
w1 = tf.get_variable('w1', [2, 2], tf.float32, xavier_initializer())
h_prev=tf.matmul(x,w1)+b1
mean_, variance_ = tf.nn.moments(h_prev, axes=[0])
h=tf.nn.relu(tf.nn.batch_normalization(
h_prev,
mean=mean_,
variance=variance_,
offset=None,
scale=None,
variance_epsilon=0.001))
h = tf.nn.dropout(h, keep_prob=0.8)
#定义代价函数或代价函数
loss = tf.reduce_mean(tf.square(out - y))
#利用Adam自适应优化算法
adam = tf.train.AdamOptimizer(0.05)
gradients, v = zip(*adam.compute_gradients(loss))
gradients, _ = tf.clip_by_global_norm(gradients, 1)
updates = adam.apply_gradients(zip(gradients, v))
import tensorflow as tf
import numpy as np
from tensorflow.contrib.layers import xavier_initializer
#定义输入与目标值
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [1], [1], [0]])
#定义占位符,从输入或目标中按行取数据
x = tf.placeholder(tf.float32, [None, 2])
y = tf.placeholder(tf.float32, [None, 1])
#初始化权重,使权重满足正态分布
#w1是输入层到隐含层间的权重矩阵,w2是隐含层到输出层的权重
# w1 = tf.Variable(tf.random_normal([2,2]))
with tf.variable_scope("scope1"):
w1 = tf.get_variable('w11', [2, 2], tf.float32, xavier_initializer())
w2 = tf.Variable(tf.random_normal([2,1]))
#定义偏移量,b1为隐含层上偏移量,b2是输出层上偏移量。
b1=tf.Variable([0.01,0.01])
b2=tf.Variable(0.1)
#利用激活函数就隐含层的输出值
h_prev=tf.matmul(x,w1)+b1
mean_, variance_ = tf.nn.moments(h_prev, axes=[0])
h=tf.nn.relu(tf.nn.batch_normalization(h_prev,
mean=mean_,
variance=variance_,
offset=None,
scale=None,
variance_epsilon=0.001)) # dropout必须省略
# h = tf.nn.dropout(h, keep_prob=0.8)
#计算输出层的值
out=tf.matmul(h,w2)+b2
#定义代价函数或代价函数
loss = tf.reduce_mean(tf.square(out - y))
#利用Adam自适应优化算法
adam = tf.train.AdamOptimizer(0.05)
gradients, v = zip(*adam.compute_gradients(loss))
gradients, _ = tf.clip_by_global_norm(gradients, 1)
updates = adam.apply_gradients(zip(gradients, v))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(4000):
sess.run(updates, feed_dict={x: X, y: Y})
loss_ = sess.run(loss, feed_dict={x: X, y: Y})
if i%200==0 :
print("step: %d, loss: %.3f"%(i, loss_))
print("X: %r"%X)
print("pred: %r"%sess.run(out, feed_dict={x: X}))