-
简单线性回归
import tensorflow as tf import numpy # 创造数据 x_data = numpy.random.rand(100).astype(numpy.float32) y_data = x_data*0.1 + 0.3 print(x_data,y_data) Weights = tf.Variable(tf.random_uniform([1],-1.0,1)) biases = tf.Variable(tf.zeros([1])) y = Weights*x_data+biases loss = tf.reduce_mean(tf.square(y-y_data)) optimizer = tf.train.GradientDescentOptimizer(0.5) train = optimizer.minimize(loss) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) for step in range(201): sess.run(train) if step%20 == 0: print(step,sess.run(Weights),sess.run(biases))
-
矩阵相乘,和Session()的两种使用方法
import tensorflow as tf #创建两个矩阵 matrix1 = tf.constant([[3,3]]) matrix2 = tf.constant([[2],[2]]) product = tf.matmul(matrix1,matrix2) #到此都是在准备计算关系,并没有实际计算 #启动session并计算的第一种形式 sess = tf.Session() result = sess.run(product) print(result) sess.close() #启动session并计算的第二种方法 with tf.Session() as sess: result = sess.run(product) print(result)
-
变量定义,常量定义,步骤定义,操作定义 ,Session 本身对状态保存的特性
# Tensorflow中必须定义变量,添加到构建的流图中 基本语法 state = tensorflow.Variable() import tensorflow as tf #定义变量 state = tf.Variable(0, name='counter') #定义常量 one = tf.constant(1) #定义步骤 new_value = tf.add(state,one) #定义赋值操作 update = tf.assign(state, new_value) #定义变量以后初始化变量就是必须的 init = tf.global_variables_initializer() #启动Session with tf.Session() as sess: sess.run(init) for _ in range(3): sess.run(update) print(sess.run(state))
-
placeholder
#placeholder 有时候会出现一些量我们不想在,定义流图阶段就把这些量写成常量,而是想计算的时候再输入。此时就要用到placeholder,定义流图的时候占位 import tensorflow as tf #在 Tensorflow 中需要定义 placeholder 的 type ,一般为 float32 形式 input1 = tf.placeholder(tf.float32) input2 = tf.placeholder(tf.float32) # mul = multiply 是将input1和input2 做乘法运算,并输出为 output ouput = tf.multiply(input1, input2) with tf.Session() as sess: print(sess.run(ouput, feed_dict={input1: [7.], input2: [2.]}))
激励函数 (Activation Function),人工智能领域为了适应复杂多变的现实世界专门找到的一些形状奇特的函数。特点或是要求:1.必须是非线性函数,因为要适应非线性问题。2.必须是可微分的,backpropagation误差反向传递要使用到可微分特性。
-
添加层函数
# 神经网络层的构建 import tensorflow as tf #定义添加层的操作,新版的TensorFlow库中自带层不用手动怼 def add_layer(inputs, in_size, out_size, activation_function = None): Weights = tf.Variable(tf.random_normal([in_size, out_size])) biases = tf.Variable(tf.zeros(1,out_size))+0.1 Wx_plus_b = tf.matmul(inputs, Weights)+biases if activation_function is None: outputs = Wx_plus_b else: outputs = activation_function(Wx_plus_b) return outputs
-
数据可视化
#结果可视化, 数据转换成图像 # 1. matplotlib import tensorflow as tf import numpy as np import matplotlib.pyplot as plt x_data = np.linspace(-1,1,300, dtype=np.float32)[:,np.newaxis] noise = np.random.normal(0,0.05,x_data.shape).astype(np.float32) y_data = np.square(x_data) - 0.5 +noise plt.figure(1, figsize=(8, 6)) plt.subplot(111) plt.plot(x_data, y_data, c='red', label='relu') plt.ylim((-1, 5)) plt.legend(loc='best') plt.show()
动画过程
# 神经网络建造,训练过程 import tensorflow as tf import numpy as np import matplotlib.pyplot as plt def add_layer(inputs, in_size, out_size, activation_function=None): Weights = tf.Variable(tf.random_normal([in_size, out_size])) biases = tf.Variable(tf.zeros([1, out_size]) + 0.1) Wx_plus_b = tf.matmul(inputs, Weights) + biases if activation_function is None: outputs = Wx_plus_b else: outputs = activation_function(Wx_plus_b) return outputs x_data = np.linspace(-1,1,300, dtype=np.float32)[:,np.newaxis] noise = np.random.normal(0,0.05,x_data.shape).astype(np.float32) y_data = np.square(x_data) - 0.5 +noise xs = tf.placeholder(tf.float32,[None,1]) ys = tf.placeholder(tf.float32,[None,1]) #开始搭建神经网络 #1个输入,10个输出 激励函数为tf.nn.relu l1 = add_layer(xs,1,10,activation_function=tf.nn.relu) #输出层定义 prediction = add_layer(l1,10,1,activation_function=None) #误差计算 二者差的平方求和再取平均 loss = tf.reduce_mean(tf.reduce_sum(tf.square(ys-prediction),reduction_indices=[1])) #学习效率参数 学习效率 0-1 train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss) #初始化变量 init = tf.global_variables_initializer() #准备显示数据 fig = plt.figure() ax = fig.add_subplot(1,1,1) ax.scatter(x_data, y_data) plt.ion() plt.show() #启动Session 开始训练 with tf.Session() as sess: sess.run(init) for i in range(1000): sess.run(train_step,feed_dict={xs:x_data,ys:y_data}) #每过50步输出状态 if i%50 == 0 : # to visualize the result and improvement try: ax.lines.remove(lines[0]) except Exception: pass prediction_value = sess.run(prediction, feed_dict={xs: x_data}) # plot the prediction lines = ax.plot(x_data, prediction_value, 'r-', lw=5) plt.pause(0.1)
-
加速神经网络训练 包括以下几种模式:
- Stochastic Gradient Descent (SGD)
- Momentum
- AdaGrad
- RMSProp
- Adam
-
计算可视化(tensorboard)
# TensorFlow 中自带一个流图可视化工具tensorboard 可以用图的方式显示定义的流图 # 神经网络建造,训练过程 import tensorflow as tf import numpy as np import matplotlib.pyplot as plt def add_layer(inputs, in_size, out_size, activation_function=None): #都放到命名空间内 with tf.name_scope('layer'): with tf.name_scope('weights'): Weights = tf.Variable( tf.random_normal([in_size, out_size]), name='W') with tf.name_scope('biases'): biases = tf.Variable( tf.zeros([1, out_size]) + 0.1, name='b') with tf.name_scope('Wx_plus_b'): Wx_plus_b = tf.add( tf.matmul(inputs, Weights), biases) if activation_function is None: outputs = Wx_plus_b else: outputs = activation_function(Wx_plus_b, ) return outputs x_data = np.linspace(-1,1,300, dtype=np.float32)[:,np.newaxis] noise = np.random.normal(0,0.05,x_data.shape).astype(np.float32) y_data = np.square(x_data) - 0.5 +noise #图结构分层 把两个placeholder放在一个方框中 with tf.name_scope('inputs'): #站位名称给定 以前没有name参数 xs= tf.placeholder(tf.float32, [None, 1],name='x_in') ys= tf.placeholder(tf.float32, [None, 1],name='y_in') #开始搭建神经网络 #1个输入,10个输出 激励函数为tf.nn.relu l1 = add_layer(xs,1,10,activation_function=tf.nn.relu) #输出层定义 prediction = add_layer(l1,10,1,activation_function=None) with tf.name_scope('loss'): #误差计算 二者差的平方求和再取平均 loss = tf.reduce_mean(tf.reduce_sum(tf.square(ys-prediction),reduction_indices=[1])) with tf.name_scope('train'): #学习效率参数 学习效率 0-1 train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss) #初始化变量 init = tf.global_variables_initializer() #准备显示数据 fig = plt.figure() ax = fig.add_subplot(1,1,1) ax.scatter(x_data, y_data) plt.ion() plt.show() #启动Session 开始训练 with tf.Session() as sess: sess.run(init) #手动建立logs文件夹,运行后没有错误 再执行tensorboard --logdir logs writer = tf.summary.FileWriter("logs/", sess.graph) for i in range(1000): sess.run(train_step,feed_dict={xs:x_data,ys:y_data}) #每过50步输出状态 if i%50 == 0 : # to visualize the result and improvement try: ax.lines.remove(lines[0]) except Exception: pass prediction_value = sess.run(prediction, feed_dict={xs: x_data}) # plot the prediction lines = ax.plot(x_data, prediction_value, 'r-', lw=5) plt.pause(0.1)
-
训练可视化,在计算结构中记录变量
tf.summary.histogram(layer_name+'/weights',Weights)
记录标量tf.summary.scalar('loss', loss)
。Seesion初始化以后merged = tf.summary.merge_all()
相当于初始化,通过rs = sess.run(merged,feed_dict={xs:x_data,ys:y_data})
,writer.add_summary(rs, i)
进行步进记录# TensorFlow 中自带一个流图可视化工具tensorboard 可以用图的方式显示定义的流图 # 神经网络建造,训练过程 import tensorflow as tf import numpy as np import matplotlib.pyplot as plt def add_layer(inputs, in_size, out_size,layer_n, activation_function=None): #都放到命名空间内 layer_name = 'layer%s'%layer_n with tf.name_scope('layer'): with tf.name_scope('weights'): Weights = tf.Variable( tf.random_normal([in_size, out_size]), name='W') with tf.name_scope('biases'): biases = tf.Variable( tf.zeros([1, out_size]) + 0.1, name='b') with tf.name_scope('Wx_plus_b'): Wx_plus_b = tf.add( tf.matmul(inputs, Weights), biases) if activation_function is None: outputs = Wx_plus_b else: outputs = activation_function(Wx_plus_b, ) #添加分析数据 tf.summary.histogram(layer_name+'/weights',Weights) tf.summary.histogram(layer_name+'/biase',biases) tf.summary.histogram(layer_name+'/outputs',outputs) return outputs x_data = np.linspace(-1,1,300, dtype=np.float32)[:,np.newaxis] noise = np.random.normal(0,0.05,x_data.shape).astype(np.float32) y_data = np.square(x_data) - 0.5 +noise #图结构分层 把两个placeholder放在一个方框中 with tf.name_scope('inputs'): #站位名称给定 以前没有name参数 xs= tf.placeholder(tf.float32, [None, 1],name='x_in') ys= tf.placeholder(tf.float32, [None, 1],name='y_in') #开始搭建神经网络 #1个输入,10个输出 激励函数为tf.nn.relu l1 = add_layer(xs,1,10,1,activation_function=tf.nn.relu) #输出层定义 prediction = add_layer(l1,10,1,2,activation_function=None) with tf.name_scope('loss'): #误差计算 二者差的平方求和再取平均 loss = tf.reduce_mean(tf.reduce_sum(tf.square(ys-prediction),reduction_indices=[1])) #添加分析数据 tf.summary.scalar('loss', loss) with tf.name_scope('train'): #学习效率参数 学习效率 0-1 train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss) #初始化变量 init = tf.global_variables_initializer() #准备显示数据 fig = plt.figure() ax = fig.add_subplot(1,1,1) ax.scatter(x_data, y_data) plt.ion() plt.show() #启动Session 开始训练 with tf.Session() as sess: sess.run(init) #数据分析初始化 merged = tf.summary.merge_all() #手动建立logs文件夹,运行后没有错误 再执行tensorboard --logdir logs writer = tf.summary.FileWriter("logs/", sess.graph) for i in range(1000): sess.run(train_step,feed_dict={xs:x_data,ys:y_data}) #每过50步输出状态 if i%50 == 0 : #图标统计 rs = sess.run(merged,feed_dict={xs:x_data,ys:y_data}) writer.add_summary(rs, i) # to visualize the result and improvement try: ax.lines.remove(lines[0]) except Exception: pass prediction_value = sess.run(prediction, feed_dict={xs: x_data}) # plot the prediction lines = ax.plot(x_data, prediction_value, 'r-', lw=5) plt.pause(0.1)
-
分类器:利用MNIST数据实现测试分类器,程序中主要新的学习点有1. MNIST数据使用。2.优化目标函数中的交叉熵函数 3. 训练方法采用梯度下降法
import tensorflow as tf def add_layer(inputs, in_size, out_size,layer_n, activation_function=None): #都放到命名空间内 layer_name = 'layer%s'%layer_n with tf.name_scope('layer'): with tf.name_scope('weights'): Weights = tf.Variable( tf.random_normal([in_size, out_size]), name='W') with tf.name_scope('biases'): biases = tf.Variable( tf.zeros([1, out_size]) + 0.1, name='b') with tf.name_scope('Wx_plus_b'): Wx_plus_b = tf.add( tf.matmul(inputs, Weights), biases) if activation_function is None: outputs = Wx_plus_b else: outputs = activation_function(Wx_plus_b, ) #添加分析数据 tf.summary.histogram(layer_name+'/weights',Weights) tf.summary.histogram(layer_name+'/biase',biases) tf.summary.histogram(layer_name+'/outputs',outputs) return outputs def compute_accuracy(v_xs, v_ys): global prediction y_pre = sess.run(prediction, feed_dict={xs: v_xs}) correct_prediction = tf.equal(tf.argmax(y_pre,1), tf.argmax(v_ys,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) result = sess.run(accuracy, feed_dict={xs: v_xs, ys: v_ys}) return result from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('MNIST_data',one_hot = True) xs = tf.placeholder(tf.float32,[None,784]) ys = tf.placeholder(tf.float32,[None,10]) prediction = add_layer(xs,784,10,1,activation_function=tf.nn.softmax) #loss函数(即最优化目标函数)选用交叉熵函数。交叉熵用来衡量预测值和真实值的相似程度,如果完全相同,它们的交叉熵等于零 cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys*tf.log(prediction),reduction_indices=[1])) #train方法(最优化算法)采用梯度下降法。 train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(1000): batch_xs,batch_ys = mnist.train.next_batch(100) sess.run(train_step,feed_dict={xs:batch_xs,ys:batch_ys}) if i%50 == 0: print(compute_accuracy(mnist.test.images, mnist.test.labels))
-
过度拟合(Overfitting),过度学习。在处理现实问题的时候,数据来源是不可控的。总会出现对机器学习神经网络不利的数据,来源主要可以分为,测量误差,文化背景,外界干扰。总结起来就是机器学习的神经网络自设计之初就处理不了的数据。对于由于过度拟合人们找到了优化神经网络的思路。
- 增加数据量,机器学习的结果来源于数据的思想。数据量大了,有一个半个的异常数据也就不算什么了,或是出现对立的数来平衡(小概率数据)。没有提高神经网络的质量。
- 正规化。
1.修改误差计算函数,使得神经网络得到不同程度的反馈。原始的 cost 误差是这样计算, cost = 预测值-真实值的平方。如果 W 变得太大, 我们就让 cost 也跟着变大, 变成一种惩罚机制. 所以我们把 W 自己考虑进来. 这里 abs 是绝对值. 这一种形式的 正规化, 叫做 l1 正规化. L2 正规化和 l1 类似, 只是绝对值换成了平方. 其他的l3, l4 也都是换成了立方和4次方等等. 形式类似. 用这些方法,我们就能保证让学出来的线条不会过于扭曲.(引用https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-02-A-overfitting/)- dropout。一种专门用在神经网络的正规化的方法。Dropout 的做法是从根本上让神经网络没机会过度依赖.信息存在网络中而不是关键节点。
-
overfitting和dropout 效果对比,dropout 对于不重复的数据很有效但是如果数据有限,过度训练的情况下效果会反弹。
import tensorflow as tf import numpy as np import matplotlib.pyplot as plt tf.set_random_seed(1) np.random.seed(1) # Hyper parameters N_SAMPLES = 20 N_HIDDEN = 300 LR = 0.01 # training data x = np.linspace(-1, 1, N_SAMPLES)[:, np.newaxis] y = x + 0.3*np.random.randn(N_SAMPLES)[:, np.newaxis] # test data test_x = x.copy() test_y = test_x + 0.3*np.random.randn(N_SAMPLES)[:, np.newaxis] # show data plt.scatter(x, y, c='magenta', s=50, alpha=0.5, label='train') plt.scatter(test_x, test_y, c='cyan', s=50, alpha=0.5, label='test') plt.legend(loc='upper left') plt.ylim((-2.5, 2.5)) plt.show() # tf placeholders tf_x = tf.placeholder(tf.float32, [None, 1]) tf_y = tf.placeholder(tf.float32, [None, 1]) tf_is_training = tf.placeholder(tf.bool, None) # to control dropout when training and testing # overfitting net o1 = tf.layers.dense(tf_x, N_HIDDEN, tf.nn.relu) o2 = tf.layers.dense(o1, N_HIDDEN, tf.nn.relu) o_out = tf.layers.dense(o2, 1) o_loss = tf.losses.mean_squared_error(tf_y, o_out) o_train = tf.train.AdamOptimizer(LR).minimize(o_loss) # dropout net d1 = tf.layers.dense(tf_x, N_HIDDEN, tf.nn.relu) d1 = tf.layers.dropout(d1, rate=0.5, training=tf_is_training) # drop out 50% of inputs d2 = tf.layers.dense(d1, N_HIDDEN, tf.nn.relu) d2 = tf.layers.dropout(d2, rate=0.5, training=tf_is_training) # drop out 50% of inputs d_out = tf.layers.dense(d2, 1) d_loss = tf.losses.mean_squared_error(tf_y, d_out) d_train = tf.train.AdamOptimizer(LR).minimize(d_loss) sess = tf.Session() sess.run(tf.global_variables_initializer()) plt.ion() # something about plotting for t in range(5000): sess.run([o_train, d_train], {tf_x: x, tf_y: y, tf_is_training: True}) # train, set is_training=True if t % 50 == 0: # plotting plt.cla() o_loss_, d_loss_, o_out_, d_out_ = sess.run( [o_loss, d_loss, o_out, d_out], {tf_x: test_x, tf_y: test_y, tf_is_training: False} # test, set is_training=False ) plt.scatter(x, y, c='magenta', s=50, alpha=0.3, label='train'); plt.scatter(test_x, test_y, c='cyan', s=50, alpha=0.3, label='test') plt.plot(test_x, o_out_, 'r-', lw=3, label='overfitting'); plt.plot(test_x, d_out_, 'b--', lw=3, label='dropout(50%)') plt.text(0, -1.2, 'overfitting loss=%.4f' % o_loss_, fontdict={'size': 20, 'color': 'red'}); plt.text(0, -1.5, 'dropout loss=%.4f' % d_loss_, fontdict={'size': 20, 'color': 'blue'}) plt.legend(loc='upper left'); plt.ylim((-2.5, 2.5)); plt.pause(0.1) plt.ioff() plt.show()
-
卷积神经网络,非常消耗计算资源,pc机器已经显得慢了。参考:https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-03-A-CNN/
import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import numpy as np import matplotlib.pyplot as plt tf.set_random_seed(1) np.random.seed(1) BATCH_SIZE = 50 LR = 0.001 # learning rate mnist = input_data.read_data_sets('./mnist', one_hot=True) # they has been normalized to range (0,1) test_x = mnist.test.images[:2000] test_y = mnist.test.labels[:2000] # plot one example print(mnist.train.images.shape) # (55000, 28 * 28) print(mnist.train.labels.shape) # (55000, 10) plt.imshow(mnist.train.images[0].reshape((28, 28)), cmap='gray') plt.title('%i' % np.argmax(mnist.train.labels[0])); plt.show() tf_x = tf.placeholder(tf.float32, [None, 28*28]) / 255. image = tf.reshape(tf_x, [-1, 28, 28, 1]) # (batch, height, width, channel) tf_y = tf.placeholder(tf.int32, [None, 10]) # input y # CNN conv1 = tf.layers.conv2d( # shape (28, 28, 1) inputs=image, filters=16, kernel_size=5, strides=1, padding='same', activation=tf.nn.relu ) # -> (28, 28, 16) pool1 = tf.layers.max_pooling2d( conv1, pool_size=2, strides=2, ) # -> (14, 14, 16) conv2 = tf.layers.conv2d(pool1, 32, 5, 1, 'same', activation=tf.nn.relu) # -> (14, 14, 32) pool2 = tf.layers.max_pooling2d(conv2, 2, 2) # -> (7, 7, 32) flat = tf.reshape(pool2, [-1, 7*7*32]) # -> (7*7*32, ) output = tf.layers.dense(flat, 10) # output layer loss = tf.losses.softmax_cross_entropy(onehot_labels=tf_y, logits=output) # compute cost train_op = tf.train.AdamOptimizer(LR).minimize(loss) accuracy = tf.metrics.accuracy( # return (acc, update_op), and create 2 local variables labels=tf.argmax(tf_y, axis=1), predictions=tf.argmax(output, axis=1),)[1] sess = tf.Session() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # the local var is for accuracy_op sess.run(init_op) # initialize var in graph # following function (plot_with_labels) is for visualization, can be ignored if not interested from matplotlib import cm try: from sklearn.manifold import TSNE; HAS_SK = True except: HAS_SK = False; print('\nPlease install sklearn for layer visualization\n') def plot_with_labels(lowDWeights, labels): plt.cla(); X, Y = lowDWeights[:, 0], lowDWeights[:, 1] for x, y, s in zip(X, Y, labels): c = cm.rainbow(int(255 * s / 9)); plt.text(x, y, s, backgroundcolor=c, fontsize=9) plt.xlim(X.min(), X.max()); plt.ylim(Y.min(), Y.max()); plt.title('Visualize last layer'); plt.show(); plt.pause(0.01) plt.ion() for step in range(600): b_x, b_y = mnist.train.next_batch(BATCH_SIZE) _, loss_ = sess.run([train_op, loss], {tf_x: b_x, tf_y: b_y}) if step % 50 == 0: accuracy_, flat_representation = sess.run([accuracy, flat], {tf_x: test_x, tf_y: test_y}) print('Step:', step, '| train loss: %.4f' % loss_, '| test accuracy: %.2f' % accuracy_) if HAS_SK: # Visualization of trained flatten layer (T-SNE) tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000); plot_only = 500 low_dim_embs = tsne.fit_transform(flat_representation[:plot_only, :]) labels = np.argmax(test_y, axis=1)[:plot_only]; plot_with_labels(low_dim_embs, labels) plt.ioff() # print 10 predictions from test data test_output = sess.run(output, {tf_x: test_x[:10]}) pred_y = np.argmax(test_output, 1) print(pred_y, 'prediction number') print(np.argmax(test_y[:10], 1), 'real number')
-
神经网络的保存或提取。
1. 保存,本质是session的保存。import tensorflow as tf import numpy as np ## Save to file # remember to define the same dtype and shape when restore W = tf.Variable([[1,2,3],[3,4,5]], dtype=tf.float32, name='weights') b = tf.Variable([[1,2,3]], dtype=tf.float32, name='biases') # 替换成下面的写法: init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) save_path = saver.save(sess, "my_net/save_net.ckpt") print("Save to path: ", save_path)
- 提取,session的恢复
import tensorflow as tf import numpy as np # 先建立 W, b 的容器 W = tf.Variable(np.arange(6).reshape((2, 3)), dtype=tf.float32, name="weights") b = tf.Variable(np.arange(3).reshape((1, 3)), dtype=tf.float32, name="biases") # 这里不需要初始化步骤 init= tf.initialize_all_variables() saver = tf.train.Saver() with tf.Session() as sess: # 提取变量 saver.restore(sess, "my_net/save_net.ckpt") print("weights:", sess.run(W)) print("biases:", sess.run(b))
循环神经网络。参考:https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-07-A-RNN/