Tensorflow学习笔记(三)

正则化

这一课突然间难度直接暴涨,毫无心理准备的我一脸懵逼地听了三遍,还有好多没有理解的地方
缓解过拟合,在损失函数中引入模型复杂度指标,利用给w加权值,弱化训练数据的噪声(一般不正则化b

loss新的计算方法

L1正则化和L2正则化

核心代码:

def get_weight(shape, regularizer):
    w = tf.Variable(tf.random_normal(shape), dtype=tf.float32)
    tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w)) # 把每一个w的损失加到总损失 losses中
    return w

loss_total = loss_mse + tf.add_n(tf.get_collection('losses'))

train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_total)

对比两种反向传播方式,没有正则化和有正则化的代码差别只在loss的计算上,但实际的优化效果却提升显著

模块化的神经网络搭建八股

我竟然都复现成功啦,虽然debug了半个小时,但实在是太高兴了,美中不足的是对numpy和matplotlib两个库实在不太了解

forward.py

import tensorflow as tf

def get_weight(shape,regularizer):
    w = tf.Variable(tf.random_normal(shape),dtype = tf.float32)
    tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w))
    return w

def get_bias(shape):
    b = tf.Variable(tf.constant(0.01,shape=shape))
    return b

def forward(x,regularizer):
    w1 = get_weight([2,11], regularizer)
    b1 = get_bias([11])
    y1 = tf.nn.relu(tf.matmul(x, w1) + b1)

    w2 = get_weight([11,1], regularizer)
    b2 = get_bias([1])
    y = tf.matmul(y1, w2) + b2
    
    return y

generateds.py

import tensorflow as tf
import numpy as np

seed = 2
def generateds():
    rdm = np.random.RandomState(seed)
    X = rdm.randn(300,2)
    Y_ = [int(x0 * x0 + x1 * x1 <2) for (x0,x1) in X]
    Y_c = [['red' if y else 'blue'] for y in Y_]
    
    X = np.vstack(X).reshape(-1,2)
    Y_ = np.vstack(Y_).reshape(-1,1)

    return X,Y_,Y_c

backward.py

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import opt4_8_forward as qian
import opt4_8_generateds as ge

STEPS=40000
BATCH_SIZE = 30
LR_BASE = 0.001
LR_DECAY = 0.999
REGULARIZER = 0.01

def backward():
    x = tf.placeholder(tf.float32,shape = (None,2))
    y_ = tf.placeholder(tf.float32,shape = (None,1))
    
    X,Y_,Y_c = ge.generateds()
    y = qian.forward(x, REGULARIZER)
    global_step = tf.Variable(0,trainable = False)
    learning_rate = tf.train.exponential_decay(LR_BASE,global_step,300/BATCH_SIZE,LR_DECAY,staircase = True)

    loss_mse = tf.reduce_mean(tf.square(y_-y))
    loss_total = loss_mse + tf.add_n(tf.get_collection('losses'))

    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss_total)

    with tf.Session() as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        for i in range(STEPS):
            start = (i*BATCH_SIZE) % 300
            end = start + BATCH_SIZE
            sess.run(train_step, feed_dict = {x:X[start:end],y_:Y_[start:end]})
            if i % 2000 == 0:
                loss_v = sess.run(loss_total,feed_dict = {x:X, y_:Y_})
                print("After %d steps,the loss is %f" % (i,loss_v))
        
        xx,yy = np.mgrid[-3:3:.01, -3:3:.01]
        grid = np.c_[xx.ravel(), yy.ravel()]
        probs = sess.run(y, feed_dict={x:grid})
        probs = probs.reshape(xx.shape)

    plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c))
    plt.contour(xx, yy, probs, levels=[.5])
    plt.show()

if __name__=='__main__':
    backward()

你可能感兴趣的:(Tensorflow学习笔记(三))