正则化
这一课突然间难度直接暴涨,毫无心理准备的我一脸懵逼地听了三遍,还有好多没有理解的地方
缓解过拟合,在损失函数中引入模型复杂度指标,利用给w加权值,弱化训练数据的噪声(一般不正则化b)
核心代码:
def get_weight(shape, regularizer):
w = tf.Variable(tf.random_normal(shape), dtype=tf.float32)
tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w)) # 把每一个w的损失加到总损失 losses中
return w
loss_total = loss_mse + tf.add_n(tf.get_collection('losses'))
train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_total)
对比两种反向传播方式,没有正则化和有正则化的代码差别只在loss的计算上,但实际的优化效果却提升显著
模块化的神经网络搭建八股
我竟然都复现成功啦,虽然debug了半个小时,但实在是太高兴了,美中不足的是对numpy和matplotlib两个库实在不太了解
forward.py
import tensorflow as tf
def get_weight(shape,regularizer):
w = tf.Variable(tf.random_normal(shape),dtype = tf.float32)
tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w))
return w
def get_bias(shape):
b = tf.Variable(tf.constant(0.01,shape=shape))
return b
def forward(x,regularizer):
w1 = get_weight([2,11], regularizer)
b1 = get_bias([11])
y1 = tf.nn.relu(tf.matmul(x, w1) + b1)
w2 = get_weight([11,1], regularizer)
b2 = get_bias([1])
y = tf.matmul(y1, w2) + b2
return y
generateds.py
import tensorflow as tf
import numpy as np
seed = 2
def generateds():
rdm = np.random.RandomState(seed)
X = rdm.randn(300,2)
Y_ = [int(x0 * x0 + x1 * x1 <2) for (x0,x1) in X]
Y_c = [['red' if y else 'blue'] for y in Y_]
X = np.vstack(X).reshape(-1,2)
Y_ = np.vstack(Y_).reshape(-1,1)
return X,Y_,Y_c
backward.py
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import opt4_8_forward as qian
import opt4_8_generateds as ge
STEPS=40000
BATCH_SIZE = 30
LR_BASE = 0.001
LR_DECAY = 0.999
REGULARIZER = 0.01
def backward():
x = tf.placeholder(tf.float32,shape = (None,2))
y_ = tf.placeholder(tf.float32,shape = (None,1))
X,Y_,Y_c = ge.generateds()
y = qian.forward(x, REGULARIZER)
global_step = tf.Variable(0,trainable = False)
learning_rate = tf.train.exponential_decay(LR_BASE,global_step,300/BATCH_SIZE,LR_DECAY,staircase = True)
loss_mse = tf.reduce_mean(tf.square(y_-y))
loss_total = loss_mse + tf.add_n(tf.get_collection('losses'))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss_total)
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
for i in range(STEPS):
start = (i*BATCH_SIZE) % 300
end = start + BATCH_SIZE
sess.run(train_step, feed_dict = {x:X[start:end],y_:Y_[start:end]})
if i % 2000 == 0:
loss_v = sess.run(loss_total,feed_dict = {x:X, y_:Y_})
print("After %d steps,the loss is %f" % (i,loss_v))
xx,yy = np.mgrid[-3:3:.01, -3:3:.01]
grid = np.c_[xx.ravel(), yy.ravel()]
probs = sess.run(y, feed_dict={x:grid})
probs = probs.reshape(xx.shape)
plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c))
plt.contour(xx, yy, probs, levels=[.5])
plt.show()
if __name__=='__main__':
backward()