线性阈值单元(LTU):输入和输出是数字(而不是二进制开/关值),并且每个输入连接都与权重相连。LTU计算其输入的加权和(z = W1×1 + W2×2 + … + + WN×n = Wt·x),然后将阶跃函数应用于该和,并输出结果:HW(x) = STEP(Z) = STEP(W^T·x) [1]。单一的 LTU 可被用作简单线性二元分类[2]。
sklearn 提供了一个感知器类,它实现了一个 LTU 网络 [2]。
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import Perceptron
iris = load_iris()
X = iris.data[:, (2, 3)] # 花瓣长度,宽度
y = (iris.target == 0).astype(np.int)
per_clf = Perceptron(random_state=42)
per_clf.fit(X, y)
y_pred = per_clf.predict([[2, 0.5]])
print(y_pred)
感知器(Perceptron),也可翻译为感知机,是 Frank Rosenblatt 在1957年就职于Cornell航空实验室(Cornell Aeronautical Laboratory)时所发明的一种人工神经网络[1]。感知器简单地由一层 LTU 组成,每个神经元连接到所有输入[2]。
事实证明,感知器的一些局限性可以通过堆叠多个感知器来消除。由此产生的人工神经网络被称为多层感知器(MLP)[2]。
MLP 由一个(通过)输入层、一个或多个称为隐藏层的 LTU 组成,一个最终层 LTU 称为输出层(见图 10-7)。除了输出层之外的每一层包括偏置神经元,并且全连接到下一层。当人工神经网络有两个或多个隐含层时,称为深度神经网络(DNN)。
import tensorflow as tf
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
iris = load_iris()
X_train = iris.data
y_train = iris.target
feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X_train)
dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units=[300, 100], n_classes=3, feature_columns=feature_columns)
dnn_clf.fit(x=X_train, y=y_train, batch_size=50, steps=40)
y_predicted = list(dnn_clf.predict(X_train))
print(accuracy_score(y_train, y_predicted))
print(dnn_clf.evaluate(X_train, y_train))
print('end')
在 TensorFlow 训练 MLP 最简单的方法是使用高级 API:tf.contrib.learn,这与 sklearn 的 API 非常相似。但如果您想要更好地控制网络架构,您可能更喜欢使用 TensorFlow 的较低级别的 Python API [2]。暂且先主要学习这些较低级别的API吧。
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
datapath = "/home/***/res/MNIST_data"
mnist = input_data.read_data_sets(datapath, validation_size=0, one_hot=True)
n_epochs = 10
batch_size = 50
learning_rate = 0.01
n_inputs = 28*28 # MNIST
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None, n_outputs), name="y")
yy = tf.placeholder(tf.int64, shape=(None, ), name="yy")
def neuron_layer(X, n_neurons, name, activation=None):
with tf.name_scope(name):
n_inputs = int(X.get_shape()[1])
stddev = 2 / np.sqrt(n_inputs)
init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
W = tf.Variable(init, name="weights")
b = tf.Variable(tf.zeros([n_neurons]), name="biases")
z = tf.matmul(X, W) + b
if activation == "relu":
return tf.nn.relu(z)
else:
return z
# 创建DNN层方法1:
''
with tf.name_scope("dnn"):
hidden1 = neuron_layer(X, n_hidden1, "hidden1", activation="relu")
hidden2 = neuron_layer(hidden1, n_hidden2, "hidden2", activation="relu")
logits = neuron_layer(hidden2, n_outputs, "outputs")
''
# 创建DNN层方法2:
'''
from tensorflow.contrib.layers import fully_connected
with tf.name_scope("dnn"):
hidden1 = fully_connected(X, n_hidden1, scope="hidden1")
hidden2 = fully_connected(hidden1, n_hidden2, scope="hidden2")
logits = fully_connected(hidden2, n_outputs, scope="outputs", activation_fn=None)
'''
# 创建DNN层方法3:
'''
with tf.name_scope("dnn"):
hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1", activation=tf.nn.relu)
hidden2 = tf.layers.dense(hidden1, n_hidden2, name="hidden2", activation=tf.nn.relu)
logits = tf.layers.dense(hidden2, n_outputs, name="outputs")
#prediction = tf.layers.dense(hidden2, n_outputs)
#logits = tf.identity(prediction, name='outputs')
'''
with tf.name_scope("loss"):
# xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.argmax(y, 1), logits=logits) # [3]
loss = tf.reduce_mean(xentropy, name="loss")
with tf.name_scope("train"):
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, yy, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
for iteration in range(mnist.train.num_examples // batch_size):
X_batch, y_batch = mnist.train.next_batch(batch_size)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
yy_batch = np.argmax(y_batch, 1)
acc_train = accuracy.eval(feed_dict={X: X_batch, yy: yy_batch})
yy_batch = np.argmax(mnist.test.labels, 1)
acc_test = accuracy.eval(feed_dict={X: mnist.test.images, yy: yy_batch})
print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)
save_path = saver.save(sess, "./my_model_final.ckpt")
代码提示:
上述代码中y和yy这两个占位符实际上也可以只用yy一个即可,这要求代码做如下改动。
# 交叉熵计算方法:
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=yy, logits=logits)
# 训练、预测时都将y_batch改为np.argmax(y_batch, axis=1),例如:
sess.run(training_op, feed_dict={X: X_batch, yy: np.argmax(y_batch, axis=1)})
import os
import re
import numpy as np
import tensorflow as tf
from tensorflow.python.platform import gfile
import tensorflow.examples.tutorials.mnist.input_data as input_data
def get_model_filenames(model_dir):
files = os.listdir(model_dir)
meta_files = [s for s in files if s.endswith('.meta')]
if len(meta_files)==0:
raise ValueError('No meta file found in the model directory (%s)' % model_dir)
elif len(meta_files)>1:
raise ValueError('There should not be more than one meta file in the model directory (%s)' % model_dir)
meta_file = meta_files[0]
ckpt = tf.train.get_checkpoint_state(model_dir)
if ckpt and ckpt.model_checkpoint_path:
ckpt_file = os.path.basename(ckpt.model_checkpoint_path)
return meta_file, ckpt_file
meta_files = [s for s in files if '.ckpt' in s]
max_step = -1
for f in files:
step_str = re.match(r'(^model-[\w\- ]+.ckpt-(\d+))', f)
if step_str is not None and len(step_str.groups())>=2:
step = int(step_str.groups()[1])
if step > max_step:
max_step = step
ckpt_file = step_str.groups()[0]
return meta_file, ckpt_file
def load_model(model, input_map=None):
# Check if the model is a model directory (containing a metagraph and a checkpoint file)
# or if it is a protobuf file with a frozen graph
model_exp = os.path.expanduser(model)
if (os.path.isfile(model_exp)):
print('Model filename: %s' % model_exp)
with gfile.FastGFile(model_exp, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
tf.import_graph_def(graph_def, input_map=input_map, name='')
else:
print('Model directory: %s' % model_exp)
meta_file, ckpt_file = get_model_filenames(model_exp)
print('Metagraph file: %s' % meta_file)
print('Checkpoint file: %s' % ckpt_file)
saver = tf.train.import_meta_graph(os.path.join(model_exp, meta_file), input_map=input_map)
saver.restore(tf.get_default_session(), os.path.join(model_exp, ckpt_file))
datapath = "/home/***/res/MNIST_data"
mnist = input_data.read_data_sets(datapath, validation_size=0, one_hot=True)
with tf.Graph().as_default():
with tf.Session() as sess:
# load_model('./my_model_final_no_dnnscope') # OK
saver = tf.train.import_meta_graph(
'/home/***/devstu/handml/my_model_final/my_model_final.ckpt.meta')
saver.restore(sess, tf.train.latest_checkpoint('./my_model_final'))
''
# 如果在下面的 get_tensor_by_name 中不知道张量名,则可以通过这里的打印看出来。
for op in tf.get_default_graph().as_graph_def().node:
print(op.name)
''
X_placeholder = tf.get_default_graph().get_tensor_by_name('X:0')
yy = tf.get_default_graph().get_tensor_by_name('yy:0')
# logits = tf.get_default_graph().get_tensor_by_name('outputs:0') # error
logits = tf.get_default_graph().get_tensor_by_name('dnn/outputs/add:0')
accuracy = tf.get_default_graph().get_tensor_by_name('eval/Mean:0')
X_batch = mnist.test.images
y_batch = mnist.test.labels
yy_batch = np.argmax(y_batch, 1)
acc_rate = accuracy.eval(feed_dict={X_placeholder: X_batch, yy: yy_batch})
# 上面是模型训练时定义的accuracy评测标准,实际使用时也可以用下面的方法进行预测
predict_array = sess.run(logits, feed_dict={X_placeholder:X_batch})
# predict_array = logits.eval(feed_dict={X_placeholder: X_new_scaled}) # 效果同上
y_pred = np.argmax(predict_array, axis=1)
acc_rate = np.mean(np.equal(y_pred, yy_batch))
print(y_pred)
print(acc_rate)
print('end')
[1] 百度百科:感知器(神经网络模型)
[2] hands_on_Ml_with_Sklearn_and_TF.第9章.人工神经网络简介
[3] 《TensorFlow实战实例》的一些错误更正ValueError: Only call sparse_softmax_cross_entropy_with_logits
with named a