【深度学习笔记1.1】人工神经网络(内含模型保存与恢复介绍)

线性阈值单元

线性阈值单元(LTU):输入和输出是数字(而不是二进制开/关值),并且每个输入连接都与权重相连。LTU计算其输入的加权和(z = W1×1 + W2×2 + … + + WN×n = Wt·x),然后将阶跃函数应用于该和,并输出结果:HW(x) = STEP(Z) = STEP(W^T·x) [1]。单一的 LTU 可被用作简单线性二元分类[2]。

代码示例1

sklearn 提供了一个感知器类,它实现了一个 LTU 网络 [2]。

import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import Perceptron
iris = load_iris()
X = iris.data[:, (2, 3)]  # 花瓣长度,宽度
y = (iris.target == 0).astype(np.int)
per_clf = Perceptron(random_state=42)
per_clf.fit(X, y)
y_pred = per_clf.predict([[2, 0.5]])
print(y_pred)

感知器

感知器(Perceptron),也可翻译为感知机,是 Frank Rosenblatt 在1957年就职于Cornell航空实验室(Cornell Aeronautical Laboratory)时所发明的一种人工神经网络[1]。感知器简单地由一层 LTU 组成,每个神经元连接到所有输入[2]。

多层感知器

事实证明,感知器的一些局限性可以通过堆叠多个感知器来消除。由此产生的人工神经网络被称为多层感知器(MLP)[2]。

MLP 由一个(通过)输入层、一个或多个称为隐藏层的 LTU 组成,一个最终层 LTU 称为输出层(见图 10-7)。除了输出层之外的每一层包括偏置神经元,并且全连接到下一层。当人工神经网络有两个或多个隐含层时,称为深度神经网络(DNN)。

代码示例2

import tensorflow as tf
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

iris = load_iris()
X_train = iris.data
y_train = iris.target
feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X_train)
dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units=[300, 100], n_classes=3, feature_columns=feature_columns)
dnn_clf.fit(x=X_train, y=y_train, batch_size=50, steps=40)

y_predicted = list(dnn_clf.predict(X_train))
print(accuracy_score(y_train, y_predicted))
print(dnn_clf.evaluate(X_train, y_train))

print('end')

DNN基于Tensorflow的简单应用

在 TensorFlow 训练 MLP 最简单的方法是使用高级 API:tf.contrib.learn,这与 sklearn 的 API 非常相似。但如果您想要更好地控制网络架构,您可能更喜欢使用 TensorFlow 的较低级别的 Python API [2]。暂且先主要学习这些较低级别的API吧。

代码示例3

import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

datapath = "/home/***/res/MNIST_data"
mnist = input_data.read_data_sets(datapath, validation_size=0, one_hot=True)

n_epochs = 10
batch_size = 50
learning_rate = 0.01

n_inputs = 28*28  # MNIST
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None, n_outputs), name="y")
yy = tf.placeholder(tf.int64, shape=(None, ), name="yy")


def neuron_layer(X, n_neurons, name, activation=None):
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1])
        stddev = 2 / np.sqrt(n_inputs)
        init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
        W = tf.Variable(init, name="weights")
        b = tf.Variable(tf.zeros([n_neurons]), name="biases")
        z = tf.matmul(X, W) + b
        if activation == "relu":
            return tf.nn.relu(z)
        else:
            return z


# 创建DNN层方法1:
''
with tf.name_scope("dnn"):
    hidden1 = neuron_layer(X, n_hidden1, "hidden1", activation="relu")
    hidden2 = neuron_layer(hidden1, n_hidden2, "hidden2", activation="relu")
    logits = neuron_layer(hidden2, n_outputs, "outputs")
''

# 创建DNN层方法2:
'''
from tensorflow.contrib.layers import fully_connected
with tf.name_scope("dnn"):
    hidden1 = fully_connected(X, n_hidden1, scope="hidden1")
    hidden2 = fully_connected(hidden1, n_hidden2, scope="hidden2")
    logits = fully_connected(hidden2, n_outputs, scope="outputs", activation_fn=None)
'''

# 创建DNN层方法3:
'''
with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1", activation=tf.nn.relu)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name="hidden2", activation=tf.nn.relu)
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")

    #prediction = tf.layers.dense(hidden2, n_outputs)
    #logits = tf.identity(prediction, name='outputs')
'''


with tf.name_scope("loss"):
    # xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.argmax(y, 1), logits=logits)  # [3]
    loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, yy, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))


init = tf.global_variables_initializer()
saver = tf.train.Saver()

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})

        yy_batch = np.argmax(y_batch, 1)
        acc_train = accuracy.eval(feed_dict={X: X_batch, yy: yy_batch})
        yy_batch = np.argmax(mnist.test.labels, 1)
        acc_test = accuracy.eval(feed_dict={X: mnist.test.images, yy: yy_batch})
        print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)

    save_path = saver.save(sess, "./my_model_final.ckpt")

代码提示:
上述代码中y和yy这两个占位符实际上也可以只用yy一个即可,这要求代码做如下改动。

# 交叉熵计算方法:
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=yy, logits=logits)

# 训练、预测时都将y_batch改为np.argmax(y_batch, axis=1),例如:
sess.run(training_op, feed_dict={X: X_batch, yy: np.argmax(y_batch, axis=1)})

模型恢复

import os
import re
import numpy as np
import tensorflow as tf
from tensorflow.python.platform import gfile
import tensorflow.examples.tutorials.mnist.input_data as input_data


def get_model_filenames(model_dir):
    files = os.listdir(model_dir)
    meta_files = [s for s in files if s.endswith('.meta')]
    if len(meta_files)==0:
        raise ValueError('No meta file found in the model directory (%s)' % model_dir)
    elif len(meta_files)>1:
        raise ValueError('There should not be more than one meta file in the model directory (%s)' % model_dir)
    meta_file = meta_files[0]
    ckpt = tf.train.get_checkpoint_state(model_dir)
    if ckpt and ckpt.model_checkpoint_path:
        ckpt_file = os.path.basename(ckpt.model_checkpoint_path)
        return meta_file, ckpt_file

    meta_files = [s for s in files if '.ckpt' in s]
    max_step = -1
    for f in files:
        step_str = re.match(r'(^model-[\w\- ]+.ckpt-(\d+))', f)
        if step_str is not None and len(step_str.groups())>=2:
            step = int(step_str.groups()[1])
            if step > max_step:
                max_step = step
                ckpt_file = step_str.groups()[0]
    return meta_file, ckpt_file


def load_model(model, input_map=None):
    # Check if the model is a model directory (containing a metagraph and a checkpoint file)
    #  or if it is a protobuf file with a frozen graph
    model_exp = os.path.expanduser(model)
    if (os.path.isfile(model_exp)):
        print('Model filename: %s' % model_exp)
        with gfile.FastGFile(model_exp, 'rb') as f:
            graph_def = tf.GraphDef()
            graph_def.ParseFromString(f.read())
            tf.import_graph_def(graph_def, input_map=input_map, name='')
    else:
        print('Model directory: %s' % model_exp)
        meta_file, ckpt_file = get_model_filenames(model_exp)

        print('Metagraph file: %s' % meta_file)
        print('Checkpoint file: %s' % ckpt_file)

        saver = tf.train.import_meta_graph(os.path.join(model_exp, meta_file), input_map=input_map)
        saver.restore(tf.get_default_session(), os.path.join(model_exp, ckpt_file))


datapath = "/home/***/res/MNIST_data"
mnist = input_data.read_data_sets(datapath, validation_size=0, one_hot=True)

with tf.Graph().as_default():
    with tf.Session() as sess:
        # load_model('./my_model_final_no_dnnscope')  # OK

        saver = tf.train.import_meta_graph(
            '/home/***/devstu/handml/my_model_final/my_model_final.ckpt.meta')
        saver.restore(sess, tf.train.latest_checkpoint('./my_model_final'))

        ''
        # 如果在下面的 get_tensor_by_name 中不知道张量名,则可以通过这里的打印看出来。
        for op in tf.get_default_graph().as_graph_def().node:
            print(op.name)
        ''

        X_placeholder = tf.get_default_graph().get_tensor_by_name('X:0')
        yy = tf.get_default_graph().get_tensor_by_name('yy:0')
        # logits = tf.get_default_graph().get_tensor_by_name('outputs:0')  # error
        logits = tf.get_default_graph().get_tensor_by_name('dnn/outputs/add:0')
        accuracy = tf.get_default_graph().get_tensor_by_name('eval/Mean:0')

        X_batch = mnist.test.images
        y_batch = mnist.test.labels
        yy_batch = np.argmax(y_batch, 1)
        acc_rate = accuracy.eval(feed_dict={X_placeholder: X_batch, yy: yy_batch})

        # 上面是模型训练时定义的accuracy评测标准,实际使用时也可以用下面的方法进行预测
        predict_array = sess.run(logits, feed_dict={X_placeholder:X_batch})
        # predict_array = logits.eval(feed_dict={X_placeholder: X_new_scaled})  # 效果同上
        y_pred = np.argmax(predict_array, axis=1)
        acc_rate = np.mean(np.equal(y_pred, yy_batch))
        print(y_pred)
        print(acc_rate)

print('end')

参考文献

[1] 百度百科:感知器(神经网络模型)
[2] hands_on_Ml_with_Sklearn_and_TF.第9章.人工神经网络简介
[3] 《TensorFlow实战实例》的一些错误更正ValueError: Only call sparse_softmax_cross_entropy_with_logits with named a

你可能感兴趣的:(深度学习笔记,深度学习,ANN)