TensorFlow北大公开课学习笔记4.5-神经网络搭建八股

https://mlnotebook.github.io/post/CNN1/  这个Convolutional Neural Networks - Basics 很优秀,但是放错地方了。大家忽略。

哈哈哈

搭建模块化的神经网络八股:使用了学习lv衰减值、L2正则化

尝试交叉熵和滑动平均但是都没有得到很好的效果,是失败的,不知道是不是因为参数的设置的问题还是什么。

TensorFlow北大公开课学习笔记4.5-神经网络搭建八股_第1张图片

#coding:utf-8
#0导入模块 ,生成模拟数据集
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import opt4_8_generateds
import opt4_8_forward

STEPS = 40000
BATCH_SIZE = 30 
LEARNING_RATE_BASE = 0.001
LEARNING_RATE_DECAY = 0.999
REGULARIZER = 0.01

def backward():
	x = tf.placeholder(tf.float32, shape=(None, 2))
	y_ = tf.placeholder(tf.float32, shape=(None, 1))

	X, Y_, Y_c = opt4_8_generateds.generateds()

	y = opt4_8_forward.forward(x, REGULARIZER)
	
	global_step = tf.Variable(0,trainable=False)	

	learning_rate = tf.train.exponential_decay(
		LEARNING_RATE_BASE,
		global_step,
		300/BATCH_SIZE,
		LEARNING_RATE_DECAY,
		staircase=True)


	#定义损失函数
	loss_mse = tf.reduce_mean(tf.square(y-y_))
	loss_total = loss_mse + tf.add_n(tf.get_collection('losses'))
	
	#定义反向传播方法:包含正则化
	train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss_total)

	with tf.Session() as sess:
		init_op = tf.global_variables_initializer()
		sess.run(init_op)
		for i in range(STEPS):
			start = (i*BATCH_SIZE) % 300
			end = start + BATCH_SIZE
			sess.run(train_step, feed_dict={x: X[start:end], y_:Y_[start:end]})
			if i % 2000 == 0:
				loss_v = sess.run(loss_total, feed_dict={x:X,y_:Y_})
				print("After %d steps, loss is: %f" %(i, loss_v))

		xx, yy = np.mgrid[-3:3:.01, -3:3:.01]
		grid = np.c_[xx.ravel(), yy.ravel()]
		probs = sess.run(y, feed_dict={x:grid})
		probs = probs.reshape(xx.shape)
	
	plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c)) 
	plt.contour(xx, yy, probs, levels=[.5])
	plt.show()
	
if __name__=='__main__':
	backward()

 

#coding:utf-8
#0导入模块 ,生成模拟数据集
import tensorflow as tf

#定义神经网络的输入、参数和输出,定义前向传播过程 
def get_weight(shape, regularizer):
	w = tf.Variable(tf.random_normal(shape), dtype=tf.float32)
	tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w))
	return w

def get_bias(shape):  #shape实际上就是某层中b的数量
    b = tf.Variable(tf.constant(0.01, shape=shape)) 
    return b
	
def forward(x, regularizer):
	
	w1 = get_weight([2,11], regularizer)	
	b1 = get_bias([11])
	y1 = tf.nn.relu(tf.matmul(x, w1) + b1)

	w2 = get_weight([11,1], regularizer)
	b2 = get_bias([1])
	y = tf.matmul(y1, w2) + b2 
	
	return y

 

#coding:utf-8
#0导入模块 ,生成模拟数据集
import numpy as np
import matplotlib.pyplot as plt
seed = 2 
def generateds():
	#基于seed产生随机数
	rdm = np.random.RandomState(seed)
	#随机数返回300行2列的矩阵,表示300组坐标点(x0,x1)作为输入数据集
	X = rdm.randn(300,2)
	#从X这个300行2列的矩阵中取出一行,判断如果两个坐标的平方和小于2,给Y赋值1,其余赋值0
	#作为输入数据集的标签(正确答案)
	Y_ = [int(x0*x0 + x1*x1 <2) for (x0,x1) in X]
	#遍历Y中的每个元素,1赋值'red'其余赋值'blue',这样可视化显示时人可以直观区分
	Y_c = [['red' if y else 'blue'] for y in Y_]
	#对数据集X和标签Y进行形状整理,第一个元素为-1表示跟随第二列计算,第二个元素表示多少列,可见X为两列,Y为1列
	X = np.vstack(X).reshape(-1,2)
	Y_ = np.vstack(Y_).reshape(-1,1)
	
	return X, Y_, Y_c
	
#print X
#print Y_
#print Y_c
#用plt.scatter画出数据集X各行中第0列元素和第1列元素的点即各行的(x0,x1),用各行Y_c对应的值表示颜色(c是color的缩写) 
#plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c)) 
#plt.show()

                                    TensorFlow北大公开课学习笔记4.5-神经网络搭建八股_第2张图片

                                       TensorFlow北大公开课学习笔记4.5-神经网络搭建八股_第3张图片 

你不妨试一试,看和你的一样吗?

什么叫滑动平均?然后用了发现不能出想要的结果

       记录了一段时间内模型中所有参数 w 和 b 各自的平均值。利用滑动平均值可以增强模型的泛化能力。

      TensorFlow北大公开课学习笔记4.5-神经网络搭建八股_第4张图片

用 Tesnsorflow 函数表示为:
√ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
其中, MOVING_AVERAGE_DECAY 表示滑动平均衰减率,一般会赋接近 1 的值, global_step 表示当前
训练了多少轮。
√ema_op = ema.apply(tf.trainable_variables())
其中, ema.apply()函数实现对括号内参数求滑动平均, tf.trainable_variables()函数实现把所有
待训练参数汇总为列表。
√with tf.control_dependencies([train_step, ema_op]):
train_op = tf.no_op(name='train')
其中,该函数实现将滑动平均和训练过程同步运行。
查看模型中参数的平均值,可以用 ema.average()函数。

  • 想要用滑动平均.....

就主函数变了,其余都没有变。

#coding:utf-8
#0导入模块 ,生成模拟数据集
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import opt4_8_generateds
import opt4_8_forward

STEPS = 40000  #迭代次数
BATCH_SIZE = 30  #batch
LEARNING_RATE_BASE = 0.001  #学习率初始值
LEARNING_RATE_DECAY = 0.999  #学习lv衰减值
REGULARIZER = 0.01 #用超参数REGULARIZER给出参数w在总loss中的比例,即正则化的权重
MOVING_AVERAGE_DECAY = 0.99

def backward():
	x = tf.placeholder(tf.float32, shape=(None, 2))
	y_ = tf.placeholder(tf.float32, shape=(None, 1))

	X, Y_, Y_c = opt4_8_generateds.generateds()

	y = opt4_8_forward.forward(x, REGULARIZER)
	
	global_step = tf.Variable(0,trainable=False)	

	learning_rate = tf.train.exponential_decay(
		LEARNING_RATE_BASE,
		global_step,
		300/BATCH_SIZE,
		LEARNING_RATE_DECAY,
		staircase=True)


	#定义损失函数
	loss_mse = tf.reduce_mean(tf.square(y-y_))
	# ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
	# loss_mse = tf.reduce_mean(ce)
	loss_total = loss_mse + tf.add_n(tf.get_collection('losses'))
	
	#定义反向传播方法:包含正则化
	train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss_total, global_step=global_step)
	#
	ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
	ema_op = ema.apply(tf.trainable_variables())

	# with tf.control_dependencies([train_step, ema_op]):
	# 	train_op = tf.no_op(name='train')
	with tf.Session() as sess:
		init_op = tf.global_variables_initializer()
		sess.run(init_op)

		for i in range(STEPS):
			start = (i*BATCH_SIZE) % 300
			end = start + BATCH_SIZE
			sess.run(train_step, feed_dict={x: X[start:end], y_:Y_[start:end]})
			sess.run(ema_op)
			# print("current global_step:", sess.run(global_step))
			if i % 2000 == 0:
				loss_v = sess.run(loss_total, feed_dict={x: X, y_: Y_})
				print("After %d steps, loss is: %f" % (i, loss_v))

		xx, yy = np.mgrid[-3:3:.01, -3:3:.01]
		grid = np.c_[xx.ravel(), yy.ravel()]
		probs = sess.run(y, feed_dict={x:grid})
		probs = probs.reshape(xx.shape)
	
	plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c)) 
	plt.contour(xx, yy, probs, levels=[.5])
	plt.show()
	
if __name__=='__main__':
	backward()

结果: 为左,右边是没有用滑动平均做的。对比两者发现滑平均没有得到很好的泛化性,尖锐,且loss也没有下降很平滑。

TensorFlow北大公开课学习笔记4.5-神经网络搭建八股_第5张图片TensorFlow北大公开课学习笔记4.5-神经网络搭建八股_第6张图片

TensorFlow北大公开课学习笔记4.5-神经网络搭建八股_第7张图片TensorFlow北大公开课学习笔记4.5-神经网络搭建八股_第8张图片

参考下面的例子

#coding:utf-8
import tensorflow as tf

#1. 定义变量及滑动平均类
#定义一个32位浮点变量,初始值为0.0  这个代码就是不断更新w1参数,优化w1参数,滑动平均做了个w1的影子
w1 = tf.Variable(0, dtype=tf.float32)
#定义num_updates(NN的迭代轮数),初始值为0,不可被优化(训练),这个参数不训练
global_step = tf.Variable(0, trainable=False)
#实例化滑动平均类,给衰减率为0.99,当前轮数global_step
MOVING_AVERAGE_DECAY = 0.99
ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
#ema.apply后的括号里是更新列表,每次运行sess.run(ema_op)时,对更新列表中的元素求滑动平均值。
#在实际应用中会使用tf.trainable_variables()自动将所有待训练的参数汇总为列表
#ema_op = ema.apply([w1])
ema_op = ema.apply(tf.trainable_variables())

#2. 查看不同迭代中变量取值的变化。
with tf.Session() as sess:
    # 初始化
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
	#用ema.average(w1)获取w1滑动平均值 (要运行多个节点,作为列表中的元素列出,写在sess.run中)
	#打印出当前参数w1和w1滑动平均值
    print ("current global_step:", sess.run(global_step))
    print ("current w1", sess.run([w1, ema.average(w1)]) )
    
    # 参数w1的值赋为1
    sess.run(tf.assign(w1, 1))
    sess.run(ema_op)
    print ("current global_step:", sess.run(global_step))
    print ("current w1", sess.run([w1, ema.average(w1)]) )
    
    # 更新global_step和w1的值,模拟出轮数为100时,参数w1变为10, 以下代码global_step保持为100,每次执行滑动平均操作,影子值会更新 
    sess.run(tf.assign(global_step, 100))  
    sess.run(tf.assign(w1, 10))
    sess.run(ema_op)
    print ("current global_step:", sess.run(global_step))
    print ("current w1:", sess.run([w1, ema.average(w1)]) )
    
    # 每次sess.run会更新一次w1的滑动平均值
    sess.run(ema_op)
    print ("current global_step:" , sess.run(global_step))
    print ("current w1:", sess.run([w1, ema.average(w1)]))

    sess.run(ema_op)
    print ("current global_step:" , sess.run(global_step))
    print ("current w1:", sess.run([w1, ema.average(w1)]))

    sess.run(ema_op)
    print ("current global_step:" , sess.run(global_step))
    print ("current w1:", sess.run([w1, ema.average(w1)]))

    sess.run(ema_op)
    print ("current global_step:" , sess.run(global_step))
    print ("current w1:", sess.run([w1, ema.average(w1)]))

    sess.run(ema_op)
    print ("current global_step:" , sess.run(global_step))
    print ("current w1:", sess.run([w1, ema.average(w1)]))

    sess.run(ema_op)
    print ("current global_step:" , sess.run(global_step))
    print ("current w1:", sess.run([w1, ema.average(w1)]))

#更改MOVING_AVERAGE_DECAY 为 0.1  看影子追随速度

"""

current global_step: 0
current w1 [0.0, 0.0] 
current global_step: 0
current w1 [1.0, 0.9]
current global_step: 100
current w1: [10.0, 1.6445453] #当w1更新到10.0时 滑动平均等于1.6445453
current global_step: 100
current w1: [10.0, 2.3281732]
current global_step: 100
current w1: [10.0, 2.955868]
current global_step: 100
current w1: [10.0, 3.532206]
current global_step: 100
current w1: [10.0, 4.061389]
current global_step: 100
current w1: [10.0, 4.547275]
current global_step: 100
current w1: [10.0, 4.9934072]

"""
  • 想用交叉熵还是......

TensorFlow北大公开课学习笔记4.5-神经网络搭建八股_第9张图片TensorFlow北大公开课学习笔记4.5-神经网络搭建八股_第10张图片
 

你可能感兴趣的:(Tensorflow,python)