【深度学习】【python】实现多层感知机及跑一遍MINIST 中文注释版

【深度学习】【python】实现多层感知机并实验MINIST 中文注释版


  • python3.5
  • tensorflow 1.4
  • pytorch 0.2.0


#!/usr/bin/env python
# -*- coding: utf-8 -*-

import numpy as np
import tensorflow as tf
import input_data
from logisticRegression import LogisticRegression

class HiddenLayer(object):
    def __init__(self, inpt, n_in, n_out, W=None, b=None,

        inpt: tf.Tensor, 维度为 [n_examples, n_in]
        n_in: int, 输入的维度
        n_out: int, 隐含层单元数目
        W, b: tf.Tensor, 权重和偏置
        activation: tf.op, 激活函数

        # 未设定W;自行初始化;
        if W is None:
            bound_val = 4.0*np.sqrt(6.0/(n_in + n_out))
            W = tf.Variable(tf.random_uniform([n_in, n_out], minval=-bound_val, maxval=bound_val),
                            dtype=tf.float32, name="W")
        # 未设定b;自行初始化;
        if b is None:
            b = tf.Variable(tf.zeros([n_out,]), dtype=tf.float32, name="b")

        self.W = W
        self.b = b
        # 计算输出;
        sum_W = tf.matmul(inpt, self.W) + self.b
        self.output = activation(sum_W) if activation is not None else sum_W
        # 设置参数;
        self.params = [self.W, self.b]

class MLP(object):
    def __init__(self, inpt, n_in, n_hidden, n_out):

        inpt: tf.Tensor, 维度为 [n_examples, n_in]
        n_in: int, input的维度
        n_hidden: int, 隐含层的单元数
        n_out: int, output的维度
        # 定义隐含层;
        self.hiddenLayer = HiddenLayer(inpt, n_in=n_in, n_out=n_hidden)
        # 定义输出层(logistic layer);
        self.outputLayer = LogisticRegression(self.hiddenLayer.output, n_in=n_hidden,
        # L1正则化;
        self.L1 = tf.reduce_sum(tf.abs(self.hiddenLayer.W)) + \
        # L2正则化;
        self.L2 = tf.reduce_sum(tf.square(self.hiddenLayer.W)) + \
        # cross_entropy代价函数;
        self.cost = self.outputLayer.cost
        # 准确率计算函数;
        self.accuracy = self.outputLayer.accuarcy

        # 参数集合;
        self.params = self.hiddenLayer.params + self.outputLayer.params
        # 保持input更新;
        self.input = inpt

if __name__ == "__main__":
    # mnist数据集;
    mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
    # 定义输入输出占位符;
    x = tf.placeholder(tf.float32, shape=[None, 784])
    y_ = tf.placeholder(tf.float32, shape=[None, 10])
    # 声明mlp模型;
    mlp_classifier = MLP(inpt=x, n_in=784, n_hidden=500, n_out=10)
    # 计算cost;
    l2_reg = 0.0001
    cost = mlp_classifier.cost(y_) + l2_reg*mlp_classifier.L2
    # 计算准确率;
    accuracy = mlp_classifier.accuracy(y_)
    # 模型给出的预测值;
    predictor = mlp_classifier.outputLayer.y_pred
    # 定义训练器;
    train_op = tf.train.AdamOptimizer(learning_rate=0.01).minimize(
        cost, var_list=mlp_classifier.params)
    # 初始化所有变量;
    init = tf.global_variables_initializer()
    # 定义训练参数;
    training_epochs = 10
    batch_size = 100
    display_step = 1
    # 开始训练;
    print("Start to train...")
    with tf.Session() as sess:
        # 执行初始化所有变量;
        # 执行多个epoch;
        for epoch in range(training_epochs):
            # 训练参数;
            avg_cost = 0.0
            batch_num = int(mnist.train.num_examples / batch_size)
            # 执行多个batch;
            for i in range(batch_num):
                # 获取当前batch的样本;
                x_batch, y_batch = mnist.train.next_batch(batch_size)
                # 训练;
                sess.run(train_op, feed_dict={x: x_batch, y_: y_batch})
                # 计算cost;
                avg_cost += sess.run(cost, feed_dict={x: x_batch, y_: y_batch}) / batch_num
            # 输出训练详情;
            if epoch % display_step == 0:
                val_acc = sess.run(accuracy, feed_dict={x: mnist.validation.images,
                                                       y_: mnist.validation.labels})
                print("Epoch {0} cost: {1}, validation accuacy: {2}".format(epoch,
                                                                            avg_cost, val_acc))
        # 完成训练;输出预测和真实labels对比;
        test_x = mnist.test.images[:10]
        test_y = mnist.test.labels[:10]
        print("Ture lables:")
        print("  ", np.argmax(test_y, 1))
        print("  ", sess.run(predictor, feed_dict={x: test_x}))
