直接贴代码,第三章,原来教程在这里:http://blog.csdn.net/jerr__y/article/category/6747409,手动感谢永永夜大大
数据集和笔记是大大的:https://github.com/yongyehuang/Tensorflow-Tutorial
对了,大大的是GPU条件下的,我的是虚拟机上的CPU版本。
import numpy as np import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('MNIST_data', one_hot=True) #导入MNIST数据集 sess = tf.InteractiveSession() #初始化session print mnist.test.labels.shape print mnist.train.labels.shape #数据的输入输出大小 #权值定义函数 def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) #tf.truncated_normal(shape, mean, stddev) :shape表示生成张量的维度,mean是均值,stddev是标准差。这个函数产生正太分布,均值和标准差自己设定。这是一个截断的产 #生正太分布的函数,就是说产生正太分布的值如果与均值的差值大于两倍的标准差,那就重新生成。和一般的正太分布的产生随机数据比起来, #这个函数产生的随机数与均值的差距不会超过两倍的标准差,但是一般的别的函数是可能de #博客:http://blog.csdn.net/u013713117/article/details/65446361 return tf.Variable(initial) #对Variable有疑问的可以参看:http://blog.csdn.net/gg_18826075157/article/details/78368924 #偏差定义函数 def bias_variable(shape): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial) # 定义卷积层 def conv2d(x, W): # 默认 strides[0]=strides[3]=1, strides[1]为x方向步长,strides[2]为y方向步长 return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME',use_cudnn_on_gpu=False) #对tf.nn.conv2d不理解的请看:https://www.cnblogs.com/qggg/p/6832342.html # pooling 层 def max_pool_2x2(x): return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME') #池化操作:http://blog.csdn.net/mao_xiao_feng/article/details/53453926 X_ = tf.placeholder(tf.float32, [None, 784]) y_ = tf.placeholder(tf.float32, [None, 10]) # 把X转为卷积所需要的形式 X = tf.reshape(X_, [-1, 28, 28, 1]) # 第一层卷积:5×5×1卷积核32个 [5,5,1,32],h_conv1.shape=[-1, 28, 28, 32] W_conv1 = weight_variable([5,5,1,32]) b_conv1 = bias_variable([32]) h_conv1 = tf.nn.relu(conv2d(X, W_conv1) + b_conv1) # 第一个pooling 层[-1, 28, 28, 32]->[-1, 14, 14, 32] h_pool1 = max_pool_2x2(h_conv1) # 第二层卷积:5×5×32卷积核64个 [5,5,32,64],h_conv2.shape=[-1, 14, 14, 64] W_conv2 = weight_variable([5,5,32,64]) b_conv2 = bias_variable([64]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) # 第二个pooling 层,[-1, 14, 14, 64]->[-1, 7, 7, 64] h_pool2 = max_pool_2x2(h_conv2) # flatten层,[-1, 7, 7, 64]->[-1, 7*7*64],即每个样本得到一个7*7*64维的样本 h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64]) # fc1 W_fc1 = weight_variable([7*7*64, 1024]) b_fc1 = bias_variable([1024]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) # dropout: 输出的维度和h_fc1一样,只是随机部分值被值为零 keep_prob = tf.placeholder(tf.float32) h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) #tf.nn.dropout:https://www.cnblogs.com/qggg/p/6849881.html # 输出层 W_fc2 = weight_variable([1024, 10]) b_fc2 = bias_variable([10]) y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) # 题外话:在做这个例子的过程中遇到过:资源耗尽的错误,为什么? # -> 因为之前每次做 train_acc 的时候用了全部的 55000 张图,显存爆了. # 1.损失函数:cross_entropy cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv)) # 2.优化函数:AdamOptimizer train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) #tf.train.AdamOptimizer:http://blog.csdn.net/xierhacker/article/details/53174558 # 3.预测准确结果统计 # 预测值中最大值(1)即分类结果,是否等于原始标签中的(1)的位置。argmax()取最大值所在的下标 correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.arg_max(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 如果一次性来做测试的话,可能占用的显存会比较多,所以测试的时候也可以设置较小的batch来看准确率 test_acc_sum = tf.Variable(0.0) batch_acc = tf.placeholder(tf.float32) new_test_acc_sum = tf.add(test_acc_sum, batch_acc) update = tf.assign(test_acc_sum, new_test_acc_sum) # 定义了变量必须要初始化,或者下面形式 sess.run(tf.global_variables_initializer()) # 或者某个变量单独初始化 如: # x.initializer.run() # 训练 for i in range(5000): X_batch, y_batch = mnist.train.next_batch(batch_size=50) if i % 500 == 0: train_accuracy = accuracy.eval(feed_dict={X_: X_batch, y_: y_batch, keep_prob: 1.0}) print "step %d, training acc %g" % (i, train_accuracy) train_step.run(feed_dict={X_: X_batch, y_: y_batch, keep_prob: 0.5}) # 全部训练完了再做测试,batch_size=100 for i in range(100): X_batch, y_batch = mnist.test.next_batch(batch_size=100) test_acc = accuracy.eval(feed_dict={X_: X_batch, y_: y_batch, keep_prob: 1.0}) update.eval(feed_dict={batch_acc: test_acc}) if (i+1) % 20 == 0: print "testing step %d, test_acc_sum %g" % (i+1, test_acc_sum.eval()) print " test_accuracy %g" % (test_acc_sum.eval() / 100.0)