https://mlnotebook.github.io/post/CNN1/ 这个Convolutional Neural Networks - Basics 很优秀,但是放错地方了。大家忽略。
哈哈哈
搭建模块化的神经网络八股:使用了学习lv衰减值、L2正则化
尝试交叉熵和滑动平均但是都没有得到很好的效果,是失败的,不知道是不是因为参数的设置的问题还是什么。
#coding:utf-8
#0导入模块 ,生成模拟数据集
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import opt4_8_generateds
import opt4_8_forward
STEPS = 40000
BATCH_SIZE = 30
LEARNING_RATE_BASE = 0.001
LEARNING_RATE_DECAY = 0.999
REGULARIZER = 0.01
def backward():
x = tf.placeholder(tf.float32, shape=(None, 2))
y_ = tf.placeholder(tf.float32, shape=(None, 1))
X, Y_, Y_c = opt4_8_generateds.generateds()
y = opt4_8_forward.forward(x, REGULARIZER)
global_step = tf.Variable(0,trainable=False)
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
300/BATCH_SIZE,
LEARNING_RATE_DECAY,
staircase=True)
#定义损失函数
loss_mse = tf.reduce_mean(tf.square(y-y_))
loss_total = loss_mse + tf.add_n(tf.get_collection('losses'))
#定义反向传播方法:包含正则化
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss_total)
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
for i in range(STEPS):
start = (i*BATCH_SIZE) % 300
end = start + BATCH_SIZE
sess.run(train_step, feed_dict={x: X[start:end], y_:Y_[start:end]})
if i % 2000 == 0:
loss_v = sess.run(loss_total, feed_dict={x:X,y_:Y_})
print("After %d steps, loss is: %f" %(i, loss_v))
xx, yy = np.mgrid[-3:3:.01, -3:3:.01]
grid = np.c_[xx.ravel(), yy.ravel()]
probs = sess.run(y, feed_dict={x:grid})
probs = probs.reshape(xx.shape)
plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c))
plt.contour(xx, yy, probs, levels=[.5])
plt.show()
if __name__=='__main__':
backward()
#coding:utf-8
#0导入模块 ,生成模拟数据集
import tensorflow as tf
#定义神经网络的输入、参数和输出,定义前向传播过程
def get_weight(shape, regularizer):
w = tf.Variable(tf.random_normal(shape), dtype=tf.float32)
tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w))
return w
def get_bias(shape): #shape实际上就是某层中b的数量
b = tf.Variable(tf.constant(0.01, shape=shape))
return b
def forward(x, regularizer):
w1 = get_weight([2,11], regularizer)
b1 = get_bias([11])
y1 = tf.nn.relu(tf.matmul(x, w1) + b1)
w2 = get_weight([11,1], regularizer)
b2 = get_bias([1])
y = tf.matmul(y1, w2) + b2
return y
#coding:utf-8
#0导入模块 ,生成模拟数据集
import numpy as np
import matplotlib.pyplot as plt
seed = 2
def generateds():
#基于seed产生随机数
rdm = np.random.RandomState(seed)
#随机数返回300行2列的矩阵,表示300组坐标点(x0,x1)作为输入数据集
X = rdm.randn(300,2)
#从X这个300行2列的矩阵中取出一行,判断如果两个坐标的平方和小于2,给Y赋值1,其余赋值0
#作为输入数据集的标签(正确答案)
Y_ = [int(x0*x0 + x1*x1 <2) for (x0,x1) in X]
#遍历Y中的每个元素,1赋值'red'其余赋值'blue',这样可视化显示时人可以直观区分
Y_c = [['red' if y else 'blue'] for y in Y_]
#对数据集X和标签Y进行形状整理,第一个元素为-1表示跟随第二列计算,第二个元素表示多少列,可见X为两列,Y为1列
X = np.vstack(X).reshape(-1,2)
Y_ = np.vstack(Y_).reshape(-1,1)
return X, Y_, Y_c
#print X
#print Y_
#print Y_c
#用plt.scatter画出数据集X各行中第0列元素和第1列元素的点即各行的(x0,x1),用各行Y_c对应的值表示颜色(c是color的缩写)
#plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c))
#plt.show()
你不妨试一试,看和你的一样吗?
什么叫滑动平均?然后用了发现不能出想要的结果
记录了一段时间内模型中所有参数 w 和 b 各自的平均值。利用滑动平均值可以增强模型的泛化能力。
用 Tesnsorflow 函数表示为:
√ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
其中, MOVING_AVERAGE_DECAY 表示滑动平均衰减率,一般会赋接近 1 的值, global_step 表示当前
训练了多少轮。
√ema_op = ema.apply(tf.trainable_variables())
其中, ema.apply()函数实现对括号内参数求滑动平均, tf.trainable_variables()函数实现把所有
待训练参数汇总为列表。
√with tf.control_dependencies([train_step, ema_op]):
train_op = tf.no_op(name='train')
其中,该函数实现将滑动平均和训练过程同步运行。
查看模型中参数的平均值,可以用 ema.average()函数。
就主函数变了,其余都没有变。
#coding:utf-8
#0导入模块 ,生成模拟数据集
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import opt4_8_generateds
import opt4_8_forward
STEPS = 40000 #迭代次数
BATCH_SIZE = 30 #batch
LEARNING_RATE_BASE = 0.001 #学习率初始值
LEARNING_RATE_DECAY = 0.999 #学习lv衰减值
REGULARIZER = 0.01 #用超参数REGULARIZER给出参数w在总loss中的比例,即正则化的权重
MOVING_AVERAGE_DECAY = 0.99
def backward():
x = tf.placeholder(tf.float32, shape=(None, 2))
y_ = tf.placeholder(tf.float32, shape=(None, 1))
X, Y_, Y_c = opt4_8_generateds.generateds()
y = opt4_8_forward.forward(x, REGULARIZER)
global_step = tf.Variable(0,trainable=False)
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
300/BATCH_SIZE,
LEARNING_RATE_DECAY,
staircase=True)
#定义损失函数
loss_mse = tf.reduce_mean(tf.square(y-y_))
# ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
# loss_mse = tf.reduce_mean(ce)
loss_total = loss_mse + tf.add_n(tf.get_collection('losses'))
#定义反向传播方法:包含正则化
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss_total, global_step=global_step)
#
ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
ema_op = ema.apply(tf.trainable_variables())
# with tf.control_dependencies([train_step, ema_op]):
# train_op = tf.no_op(name='train')
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
for i in range(STEPS):
start = (i*BATCH_SIZE) % 300
end = start + BATCH_SIZE
sess.run(train_step, feed_dict={x: X[start:end], y_:Y_[start:end]})
sess.run(ema_op)
# print("current global_step:", sess.run(global_step))
if i % 2000 == 0:
loss_v = sess.run(loss_total, feed_dict={x: X, y_: Y_})
print("After %d steps, loss is: %f" % (i, loss_v))
xx, yy = np.mgrid[-3:3:.01, -3:3:.01]
grid = np.c_[xx.ravel(), yy.ravel()]
probs = sess.run(y, feed_dict={x:grid})
probs = probs.reshape(xx.shape)
plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c))
plt.contour(xx, yy, probs, levels=[.5])
plt.show()
if __name__=='__main__':
backward()
结果: 为左,右边是没有用滑动平均做的。对比两者发现滑平均没有得到很好的泛化性,尖锐,且loss也没有下降很平滑。
参考下面的例子
#coding:utf-8
import tensorflow as tf
#1. 定义变量及滑动平均类
#定义一个32位浮点变量,初始值为0.0 这个代码就是不断更新w1参数,优化w1参数,滑动平均做了个w1的影子
w1 = tf.Variable(0, dtype=tf.float32)
#定义num_updates(NN的迭代轮数),初始值为0,不可被优化(训练),这个参数不训练
global_step = tf.Variable(0, trainable=False)
#实例化滑动平均类,给衰减率为0.99,当前轮数global_step
MOVING_AVERAGE_DECAY = 0.99
ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
#ema.apply后的括号里是更新列表,每次运行sess.run(ema_op)时,对更新列表中的元素求滑动平均值。
#在实际应用中会使用tf.trainable_variables()自动将所有待训练的参数汇总为列表
#ema_op = ema.apply([w1])
ema_op = ema.apply(tf.trainable_variables())
#2. 查看不同迭代中变量取值的变化。
with tf.Session() as sess:
# 初始化
init_op = tf.global_variables_initializer()
sess.run(init_op)
#用ema.average(w1)获取w1滑动平均值 (要运行多个节点,作为列表中的元素列出,写在sess.run中)
#打印出当前参数w1和w1滑动平均值
print ("current global_step:", sess.run(global_step))
print ("current w1", sess.run([w1, ema.average(w1)]) )
# 参数w1的值赋为1
sess.run(tf.assign(w1, 1))
sess.run(ema_op)
print ("current global_step:", sess.run(global_step))
print ("current w1", sess.run([w1, ema.average(w1)]) )
# 更新global_step和w1的值,模拟出轮数为100时,参数w1变为10, 以下代码global_step保持为100,每次执行滑动平均操作,影子值会更新
sess.run(tf.assign(global_step, 100))
sess.run(tf.assign(w1, 10))
sess.run(ema_op)
print ("current global_step:", sess.run(global_step))
print ("current w1:", sess.run([w1, ema.average(w1)]) )
# 每次sess.run会更新一次w1的滑动平均值
sess.run(ema_op)
print ("current global_step:" , sess.run(global_step))
print ("current w1:", sess.run([w1, ema.average(w1)]))
sess.run(ema_op)
print ("current global_step:" , sess.run(global_step))
print ("current w1:", sess.run([w1, ema.average(w1)]))
sess.run(ema_op)
print ("current global_step:" , sess.run(global_step))
print ("current w1:", sess.run([w1, ema.average(w1)]))
sess.run(ema_op)
print ("current global_step:" , sess.run(global_step))
print ("current w1:", sess.run([w1, ema.average(w1)]))
sess.run(ema_op)
print ("current global_step:" , sess.run(global_step))
print ("current w1:", sess.run([w1, ema.average(w1)]))
sess.run(ema_op)
print ("current global_step:" , sess.run(global_step))
print ("current w1:", sess.run([w1, ema.average(w1)]))
#更改MOVING_AVERAGE_DECAY 为 0.1 看影子追随速度
"""
current global_step: 0
current w1 [0.0, 0.0]
current global_step: 0
current w1 [1.0, 0.9]
current global_step: 100
current w1: [10.0, 1.6445453] #当w1更新到10.0时 滑动平均等于1.6445453
current global_step: 100
current w1: [10.0, 2.3281732]
current global_step: 100
current w1: [10.0, 2.955868]
current global_step: 100
current w1: [10.0, 3.532206]
current global_step: 100
current w1: [10.0, 4.061389]
current global_step: 100
current w1: [10.0, 4.547275]
current global_step: 100
current w1: [10.0, 4.9934072]
"""