牛逼的初始化(必学)
初始化:有人用normal初始化cnn的参数,最后acc只能到70%多,仅仅改成xavier,acc可以到98%
1.修改初始化过程
导入
from tensorflow.contrib.layers import xavier_initializer
原来的
Weights = tf.Variable(tf.truncated_normal([n_hiddens, n_classes],stddev=0.01), dtype=tf.float32, name='W')
改成xavier(高斯分布)
Weights = tf.get_variable('W', [n_hiddens, n_classes], tf.float32, xavier_initializer())
2.然后初始化
#初始化
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
3.完整代码
转自:代码地址
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# created by fhqplzj on 2017/07/07 下午3:22
import random
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.contrib.layers import xavier_initializer
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('/Users/fhqplzj/PycharmProjects/tensorflow_examples/tutorials/rnn/translate',
one_hot=True)
tf.set_random_seed(777)
learning_rate = 0.001
training_epochs = 15
batch_size = 100
X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, 10])
keep_prob = tf.placeholder(tf.float32)
W1 = tf.get_variable('W1', [784, 512], tf.float32, xavier_initializer())
b1 = tf.Variable(tf.random_normal([512]))
L1 = tf.nn.relu(tf.add(tf.matmul(X, W1), b1))
L1 = tf.nn.dropout(L1, keep_prob=keep_prob)
W2 = tf.get_variable('W2', [512, 512], tf.float32, xavier_initializer())
b2 = tf.Variable(tf.random_normal([512]))
L2 = tf.nn.relu(tf.add(tf.matmul(L1, W2), b2))
L2 = tf.nn.dropout(L2, keep_prob=keep_prob)
W3 = tf.get_variable('W3', [512, 512], tf.float32, xavier_initializer())
b3 = tf.Variable(tf.random_normal([512]))
L3 = tf.nn.relu(tf.add(tf.matmul(L2, W3), b3))
L3 = tf.nn.dropout(L3, keep_prob=keep_prob)
W4 = tf.get_variable('W4', [512, 512], tf.float32, xavier_initializer())
b4 = tf.Variable(tf.random_normal([512]))
L4 = tf.nn.relu(tf.add(tf.matmul(L3, W4), b4))
L4 = tf.nn.dropout(L4, keep_prob=keep_prob)
W5 = tf.get_variable('W5', [512, 10], tf.float32, xavier_initializer())
b5 = tf.Variable(tf.random_normal([10]))
hypothesis = tf.add(tf.matmul(L4, W5), b5)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=hypothesis))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(hypothesis, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(training_epochs):
avg_cost = 0
total_batch = mnist.train.num_examples // batch_size
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
c, _ = sess.run([cost, optimizer], feed_dict={
X: batch_xs,
Y: batch_ys,
keep_prob: 0.7
})
avg_cost += c / total_batch
print 'Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost)
print 'Learning Finished!'
print 'Accuracy:', sess.run(accuracy, feed_dict={
X: mnist.test.images,
Y: mnist.test.labels,
keep_prob: 1
})
r = random.randint(0, mnist.test.num_examples - 1)
print 'Label: ', sess.run(tf.argmax(mnist.test.labels[r:r + 1], 1))
print 'Prediction: ', sess.run(tf.argmax(hypothesis, 1), feed_dict={
X: mnist.test.images[r:r + 1],
keep_prob: 1
})
plt.imshow(mnist.test.images[r:r + 1].reshape(28, 28), cmap='Greys', interpolation='nearest')
plt.show()