dropout解决过拟合问题

过度拟合是机器学习中常见的问题,为了解决这个问题,可以使用dropout方法。
当不用dropout处理时的效果:模型对训练数据的适应性优于测试数据,存在overfittingdropout解决过拟合问题_第1张图片

当使用dropout时,测试效果得以改善了:
dropout解决过拟合问题_第2张图片

from __future__ import print_function
import tensorflow as tf
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

# 准备数据
digits=load_digits()
x=digits.data
y=digits.target
y=LabelBinarizer().fit_transform(y)
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=.3)

def add_layer(inputs,in_size,out_size,layer_name,activation_function=None,):
    Weights=tf.Variable(tf.random_normal([in_size,out_size]))
    biases=tf.Variable(tf.zeros([1,out_size])+0.1,)
    Wx_plus_b=tf.matmul(inputs,Weights)+biases
    # keep_prob是保留概率,当keep_prob=1的时候,相当于100%保留,也就是dropout没有起作用
    Wx_plus_b=tf.nn.dropout(Wx_plus_b,keep_prob)
    if activation_function is None:
        outputs=Wx_plus_b
    else:
        outputs=activation_function(Wx_plus_b)
    tf.summary.histogram(layer_name+'/outputs',outputs)
    return outputs

keep_prob=tf.placeholder(tf.float32)
xs=tf.placeholder(tf.float32,[None,64])
ys=tf.placeholder(tf.float32,[None,10])

# 隐藏层
l1=add_layer(xs,64,50,'l1',activation_function=tf.nn.tanh)
# 输出层
prediction=add_layer(l1,50,10,'l2',activation_function=tf.nn.softmax)
# loss函数(即最优化目标函数)选用交叉熵函数。交叉熵用来衡量预测值和真实值的相似程度,如果完全相同,交叉熵就等于零
cross_entropy=tf.reduce_mean(-tf.reduce_sum(ys*tf.log(prediction),reduction_indices=[1]))

tf.summary.scalar('loss',cross_entropy)
# train方法(最优化算法)采用梯度下降法
train_step=tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

sess=tf.Session()
merged=tf.summary.merge_all()

train_writer=tf.summary.FileWriter("logs/train",sess.graph)
test_writer=tf.summary.FileWriter("logs/test",sess.graph)

init=tf.global_variables_initializer()
sess.run(init)
for i in range(500):
    # 训练时dropout掉50%的数据
    sess.run(train_step,feed_dict={xs:x_train,ys:y_train,keep_prob:0.5})
    if i % 50 ==0:
        train_result=sess.run(merged,feed_dict={xs:x_train,ys:y_train,keep_prob:1})
        test_result=sess.run(merged,feed_dict={xs:x_test,ys:y_test,keep_prob:1})
        train_writer.add_summary(train_result,i)
        test_writer.add_summary(test_result,i)

感谢莫烦大神,又让我学到了新的知识

你可能感兴趣的:(机器学习)