为了防止过拟合的发生,通常会使用正则化法或者dropout法来解决。
首先给出使用dropou法防止过拟合的程序,该方法是随机选取一些神经元并将它们的激活值设置为0(即不参与之后的运算),同时维度保持不变,使得最终的参数数据由多个子训练集共同求出:
import tensorflow as tf
from sklearn.datasets import load_digits
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import LabelBinarizer
digits = load_digits()
X = digits.data
y = LabelBinarizer().fit_transform(digits.target)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .3)
def add_layer(inputs, in_size, out_size, layer_name, activation_function = None):
Weights = tf.Variable(tf.random_normal([in_size, out_size]))
biases = tf.Variable(tf.zeros([1, out_size]) + 0.1)
Wx_plus_b = tf.matmul(inputs, Weights) + biases
Wx_plus_b = tf.nn.dropout(Wx_plus_b, keep_prob)
if activation_function is None:
outputs = Wx_plus_b
else:
outputs = activation_function(Wx_plus_b)
tf.summary.histogram(layer_name + '/outputs', outputs)
return outputs
keep_prob = tf.placeholder(tf.float32)
xs = tf.placeholder(tf.float32, [None, 64])
ys = tf.placeholder(tf.float32, [None, 10])
l1 = add_layer(xs, 64, 50, 'l1', activation_function=tf.nn.tanh)
prediction = add_layer(l1, 50, 10, 'l2', activation_function=tf.nn.softmax)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction), reduction_indices=[1]))
tf.summary.scalar('loss', cross_entropy)
train_step = tf.train.GradientDescentOptimizer(0.6).minimize(cross_entropy)
if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
init = tf.initialize_all_variables()
else:
init = tf.global_variables_initializer()
with tf.Session() as sess:
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('logs/train', sess.graph)
test_writer = tf.summary.FileWriter('logs/test', sess.graph)
sess.run(init)
for step in range(500):
sess.run(train_step, feed_dict={xs: X_train, ys: y_train, keep_prob: 0.5})
if step % 50 == 0:
train_result = sess.run(merged, feed_dict={xs:X_train, ys:y_train, keep_prob: 0.5})
test_result = sess.run(merged, feed_dict={xs: X_test, ys: y_test, keep_prob: 0.5})
train_writer.add_summary(train_result, step)
test_writer.add_summary(test_result, step)
接下来给出使用正则化法防止过拟合,该方法是所有的参数数据加上惩罚项,惩罚力度的大小根正则化项中的参数lamda控制,从而控制模型的复杂程度,进而防止过拟合的发生:
import tensorflow as tf
from sklearn.datasets import load_digits
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import LabelBinarizer
def regularizer(parameter):
tf.add_to_collection("losses", tf.contrib.layers.l2_regularizer(0.1)(parameter))
return
digits = load_digits()
X = digits.data
y = LabelBinarizer().fit_transform(digits.target)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .3)
def add_layer(inputs, in_size, out_size, layer_name, activation_function = None):
Weights = tf.Variable(tf.random_normal([in_size, out_size]))
regularizer(Weights)
biases = tf.Variable(tf.zeros([1, out_size]) + 0.1)
regularizer(biases)
Wx_plus_b = tf.matmul(inputs, Weights) + biases
if activation_function is None:
outputs = Wx_plus_b
else:
outputs = activation_function(Wx_plus_b)
tf.summary.histogram(layer_name + '/outputs', outputs)
return outputs
xs = tf.placeholder(tf.float32, [None, 64])
ys = tf.placeholder(tf.float32, [None, 10])
l1 = add_layer(xs, 64, 50, 'l1', activation_function=tf.nn.tanh)
prediction = add_layer(l1, 50, 10, 'l2', activation_function=tf.nn.softmax)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction), reduction_indices=[1]))
tf.add_to_collection("losses",cross_entropy)
cross_entropy = tf.add_n(tf.get_collection("losses"))
tf.summary.scalar('loss', cross_entropy)
train_step = tf.train.GradientDescentOptimizer(0.6).minimize(cross_entropy)
if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
init = tf.initialize_all_variables()
else:
init = tf.global_variables_initializer()
with tf.Session() as sess:
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('logs/train', sess.graph)
test_writer = tf.summary.FileWriter('logs/test', sess.graph)
sess.run(init)
for step in range(500):
sess.run(train_step, feed_dict={xs: X_train, ys: y_train})
if step % 50 == 0:
train_result = sess.run(merged, feed_dict={xs:X_train, ys:y_train})
test_result = sess.run(merged, feed_dict={xs: X_test, ys: y_test})
train_writer.add_summary(train_result, step)
test_writer.add_summary(test_result, step)