使用Numpy在MNIST数据集上实现3层BP神经网络! |
注意:python中的zip() 函数用于将可迭代的对象作为参数,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的列表。
zip 语法:zip([iterable, ...])
; 返回元组列表。>>a = [1,2,3] >> b = [4,5,6] >> c = [4,5,6,7,8] >> zipped = list(zip(a,b)) # 打包为元组的列表 [(1, 4), (2, 5), (3, 6)] >> zip(a,c) # 元素个数与最短的列表一致 [(1, 4), (2, 5), (3, 6)] >> list(zip(*zipped)) # 与 zip 相反,*zipped 可理解为解压,返回二维矩阵式 [(1, 2, 3), (4, 5, 6)]
mnist.pkl.gz
"""
mnist_loader
~~~~~~~~~~~~
A library to load the MNIST image data. For details of the data
structures that are returned, see the doc strings for ``load_data``
and ``load_data_wrapper``. In practice, ``load_data_wrapper`` is the
function usually called by our neural network code.
"""
#### Libraries
# Standard library
import pickle
import gzip
# Third-party libraries
import numpy as np
def load_data():
"""
Return the MNIST data as a tuple containing the training data,
the validation data, and the test data.
The ``training_data`` is returned as a tuple with two entries.
The first entry contains the actual training images. This is a
numpy ndarray with 50,000 entries. Each entry is, in turn, a
numpy ndarray with 784 values, representing the 28 * 28 = 784
pixels in a single MNIST image.
The second entry in the ``training_data`` tuple is a numpy ndarray
containing 50,000 entries. Those entries are just the digit
values (0...9) for the corresponding images contained in the first
entry of the tuple.
The ``validation_data`` and ``test_data`` are similar, except
each contains only 10,000 images.
This is a nice data format, but for use in neural networks it's
helpful to modify the format of the ``training_data`` a little.
That's done in the wrapper function ``load_data_wrapper()``, see
below.
:return:
"""
f = gzip.open('data/mnist.pkl.gz', 'rb')
training_data, validation_data, test_data = pickle.load(f, encoding='latin1')
f.close()
return (training_data, validation_data, test_data)
def load_data_wrapper():
"""
Return a tuple containing ``(training_data, validation_data,
test_data)``. Based on ``load_data``, but the format is more
convenient for use in our implementation of neural networks.
In particular, ``training_data`` is a list containing 50,000
2-tuples ``(x, y)``. ``x`` is a 784-dimensional numpy.ndarray
containing the input image. ``y`` is a 10-dimensional
numpy.ndarray representing the unit vector corresponding to the
correct digit for ``x``.
``validation_data`` and ``test_data`` are lists containing 10,000
2-tuples ``(x, y)``. In each case, ``x`` is a 784-dimensional
numpy.ndarry containing the input image, and ``y`` is the
corresponding classification, i.e., the digit values (integers)
corresponding to ``x``.
Obviously, this means we're using slightly different formats for
the training data and the validation / test data. These formats
turn out to be the most convenient for use in our neural network
code.
:return:
"""
tr_d, va_d, te_d = load_data()
training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
training_results = [vectorized_result(y) for y in tr_d[1]]
training_data = list(zip(training_inputs, training_results))
validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
validation_data = list(zip(validation_inputs, va_d[1]))
test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
test_data = list(zip(test_inputs, te_d[1]))
return (training_data, validation_data, test_data)
def vectorized_result(j):
"""
Return a 10-dimensional unit vector with a 1.0 in the jth
position and zeroes elsewhere. This is used to convert a digit
(0...9) into a corresponding desired output from the neural
network.
:param j:
:return:
"""
e = np.zeros((10, 1))
e[j] = 1.0
return e
def load_data():
f = gzip.open('data/mnist.pkl.gz', 'rb')
training_data, validation_data, test_data = pickle.load(f, encoding='latin1')
print(training_data[0], training_data[1])
print(training_data[0].shape, training_data[1].shape, validation_data[0].shape, validation_data[1].shape,
test_data[0].shape, test_data[1].shape)
print(training_data[0][0].shape, training_data[1][0].shape)
f.close()
return (training_data, validation_data, test_data)
tr_d, va_d, te_d = load_data()
training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
print(training_inputs[0].shape)
training_results = [vectorized_result(y) for y in tr_d[1]]
print(training_results[0].shape)
training_data = list(zip(training_inputs, training_results))
validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
validation_data = list(zip(validation_inputs, va_d[1]))
test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
test_data = list(zip(test_inputs, te_d[1]))
return (training_data, validation_data, test_data)
随机梯度下降算法可以参考:
"""
network.py
~~~~~~~~~~
A module to implement the stochastic gradient descent learning
algorithm for a forward neural network. Gradients are calculated
using backpropagation. Note that I have focused on making the code
simple, easily readable, and easily modifiable. It is not optimized,
and omits many desirable features.
"""
#### Libraries
# Standard library
import random
# Third-party libraries
import numpy as np
#### Miscellaneous functions
def sigmoid(z):
"""
The sigmoid function.
"""
return 1.0 / (1.0 + np.exp(-z))
def sigmoid_prime(z):
"""Derivative of the sigmoid function."""
return sigmoid(z) * (1 - sigmoid(z))
# 新建一个类,表示3层的神经网络结构!
class Network:
def __init__(self, sizes):
"""
The list ``sizes`` contains the number of neurons in the
respective layers of the network. For example, if the list
was [2, 3, 1] then it would be a three-layer network, with the
first layer containing 2 neurons, the second layer 3 neurons,
and the third layer 1 neuron. The biases and weights for the
network are initialized randomly, using a Gaussian
distribution with mean 0, and variance 1. Note that the first
layer is assumed to be an input layer, and by convention we
won't set any biases for those neurons, since biases are only
ever used in computing the outputs from later layers.
:param size: [784, 30, 10] 元素表示每层的维度,我们设置为一个list
"""
self.num_layers = len(sizes)
# sizes: [784, 30, 10]
self.sizes = sizes
# b: [ch_out, 1] 偏置
self.biases = [np.random.randn(ch_out, 1) for ch_out in sizes[1:]]
# w: [ch_out, ch_in] 权重
self.weights = [np.random.randn(ch_out, ch_in) for ch_in, ch_out in zip(sizes[:-1], sizes[1:])]
def forward(self, x):
"""
Return the output of the network if ``a`` is input.
:param x: [784, 1] 表示输入的纬度。
:return: [10, 1]
"""
for b, w in zip(self.biases, self.weights):
# [30, 784] @ [784, 1]=> [30, 1] + [30, 1] => [30, 1]
# [10, 30] @ [30, 1] + [10, 1] => [10, 1]
z = np.dot(w, x) + b
# [30, 1]
# [10, 1]
x = sigmoid(z)
return x
def train(self, training_data, epochs, batchsz, lr, test_data=None):
"""
Train the neural network using mini-batch stochastic gradient descent.
The ``training_data`` is a list of tuples
``(x, y)`` representing the training inputs and the desired
outputs. The other non-optional parameters are self-explanatory. If
``test_data`` is provided then the network will be evaluated against
the test data after each epoch, and partial progress printed out.
This is useful for tracking progress, but slows things down substantially.
"""
if test_data:
n_test = len(test_data)
n = len(training_data)
for j in range(epochs):
random.shuffle(training_data)
mini_batches = [training_data[k:k+batchsz] for k in range(0, n, batchsz)]
# for every (x,y)
for mini_batch in mini_batches:
loss = self.update_mini_batch(mini_batch, lr) # 返回的有损失值!
if test_data:
print("Epoch {0}: {1} / {2}".format(j, self.evaluate(test_data), n_test), 'loss: ', loss)
else:
print("Epoch {0} complete".format(j) )
def update_mini_batch(self, batch, lr):
"""
Update the network's weights and biases by applying
gradient descent using backpropagation to a single mini batch.
The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``
is the learning rate.
"""
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]
loss = 0 # 损失值
# for every sample in current batch
for x, y in batch:
# list of every w, b gradient
delta_nabla_b, delta_nabla_w, loss_ = self.backprop(x, y) # 得到当前的梯度值
# 就比如是:[w1, w2, w3]这个是一个样本的,多样本的时候我们应该吧对应位置的累加起来求一个平均值。
nabla_b = [accu + cur for accu, cur in zip(nabla_b, delta_nabla_b)]
nabla_w = [accu + cur for accu, cur in zip(nabla_w, delta_nabla_w)] # cur当前的,accu为之前的;进行对应位置累加。
loss += loss_ #损失值
# 求平均值梯度值w, b ,这个除是点除,因为前面累加也是相应位置进行累加。
nabla_w = [w / len(batch) for w in nabla_w]
nabla_b = [b / len(batch) for b in nabla_b]
# 使用SGD随机梯度下降算法进行更新权值w偏置b
# w = w - lr * nabla_w
self.weights = [w - lr * nabla for w, nabla in zip(self.weights, nabla_w)]
self.biases = [b - lr * nabla for b, nabla in zip(self.biases, nabla_b)]
loss = loss / len(batch) # 损失值
return loss
def backprop(self, x, y):
"""
Return a tuple ``(nabla_b, nabla_w)`` representing the
gradient for the cost function C_x. ``nabla_b`` and
``nabla_w`` are layer-by-layer lists of numpy arrays, similar
to ``self.biases`` and ``self.weights``.
:param x: [1, 784]
:param y: [1, 10], one_hot encoding
:return:
"""
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]
# 1. forward
# 为什么反向传播过程中仍然需要forward, 因为我们需要在forward中记录每层z,activation变量,方便我们以后计算梯度。
# 但是为什么还有一个单独的forward过程,因为单独forward方便我们以后的预测。因为做测试的时候不需要backword的。
activation = x
activations = [x] # list to store all the activations, layer by layer
# w*x = z => sigmoid => x/activation
zs = [] # list to store all the z vectors, layer by layer
for b, w in zip(self.biases, self.weights):
# https://stackoverflow.com/questions/34142485/difference-between-numpy-dot-and-python-3-5-matrix-multiplication
# np.dot vs np.matmul = @ vs element-wise *
z = np.dot(w, activation) + b
zs.append(z)
activation = sigmoid(z)
activations.append(activation)
# 损失函数的值
loss = np.power(activation[-1] - y, 2).sum()
# 2. backward pass
# (Ok-tk)*(1-Ok)*Ok 参考公式;倒数第一层
# 2.1 compute gradient on output layer 首先输出层计算梯度。
# [10, 1] * [10, 1] => [10, 1]
# 和下面这2个都可以的: delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
delta = activations[-1] * (1 - activations[-1]) * (activations[-1] - y)
nabla_b[-1] = delta
# delta: [10, 1]
# activations[-2]: [30, 1]
# [10, 1] @ [1, 30] => [10, 30]
nabla_w[-1] = np.dot(delta, activations[-2].transpose())
# Note that the variable l in the loop below is used a little
# differently to the notation in Chapter 2 of the book. Here,
# l = 1 means the last layer of neurons, l = 2 is the
# second-last layer, and so on. It's a renumbering of the
# scheme in the book, used here to take advantage of the fact
# that Python can use negative indices in lists.
# 2.2 compute hidden gradient
for l in range(2, self.num_layers):
# [30, 1]
z = zs[-l]
sp = sigmoid_prime(z)
# delta_j的公式
# [10, 30].T @ [10, 1] => [30, 10] @ [10, 1] => [30, 1] * [30, 1] => [30, 1]
delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp # 公式
nabla_b[-l] = delta
# [30, 1] @ [784, 1].T => [30, 784]
nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose()) # 矩阵相乘。
return (nabla_b, nabla_w, loss)
def evaluate(self, test_data):
"""
Return the number of test inputs for which the neural
network outputs the correct result. Note that the neural
network's output is assumed to be the index of whichever
neuron in the final layer has the highest activation.
:param test_data: list of [x, y]
:return:
"""
# x, y不加括号也是一样。
test_results = [(np.argmax(self.forward(x)), y) for (x, y) in test_data]
correct = sum(int(pred == y) for pred, y in test_results)
return correct
def main():
import mnist_loader
# Loading the MNIST data
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
print(len(training_data), training_data[0][0].shape, training_data[0][1].shape)
print(len(test_data), test_data[0][0].shape, test_data[0][1].shape)
print(test_data[0][1])
# Set up a Network with 30 hidden neurons
net = Network([784, 30, 10])
# Use stochastic gradient descent to learn from the MNIST training_data over
# 30 epochs, with a mini-batch size of 10, and a learning rate of η = 3.0
net.train(training_data, 500, 10, 0.1, test_data=test_data)
if __name__ == '__main__':
main()
C:\Anaconda3\envs\tf2\python.exe E:/Codes/MyCodes/TF2/TF2_5/network.py
50000 (784, 1) (10, 1)
10000 (784, 1) ()
7
Epoch 0: 6790 / 10000 loss: 1.2717699678849692
Epoch 1: 7916 / 10000 loss: 1.397263838572778
Epoch 2: 8433 / 10000 loss: 1.3628234379683601
Epoch 3: 8713 / 10000 loss: 1.5095953646561449
Epoch 4: 8821 / 10000 loss: 1.2249536061966981
Epoch 5: 8903 / 10000 loss: 1.5677245800293518
Epoch 6: 8960 / 10000 loss: 1.276735236529393
Epoch 7: 9002 / 10000 loss: 1.331073749968803
Epoch 8: 9047 / 10000 loss: 1.5302422968215337
Epoch 9: 9065 / 10000 loss: 1.4312348092663052
Epoch 10: 9089 / 10000 loss: 1.7322775984815209
Epoch 11: 9119 / 10000 loss: 1.6721216908194372
Epoch 12: 9123 / 10000 loss: 1.3733733412862665
Epoch 13: 9136 / 10000 loss: 1.7441048794605307
Epoch 14: 9151 / 10000 loss: 1.5243845736976125
Epoch 15: 9187 / 10000 loss: 1.1566135242955142
Epoch 16: 9207 / 10000 loss: 1.7391568735262681
Epoch 17: 9221 / 10000 loss: 1.2911041159782035
Epoch 18: 9217 / 10000 loss: 1.5401371908739707
Epoch 19: 9233 / 10000 loss: 1.6740181554857625
Epoch 20: 9243 / 10000 loss: 1.2672687649233518
Epoch 21: 9260 / 10000 loss: 1.1402616448074525
Epoch 22: 9262 / 10000 loss: 1.5355428114108485
Epoch 23: 9272 / 10000 loss: 2.2567888575387025
Epoch 24: 9275 / 10000 loss: 1.63390026971472
Epoch 25: 9289 / 10000 loss: 1.4642996974852398
Epoch 26: 9283 / 10000 loss: 1.7163730312764676
Epoch 27: 9294 / 10000 loss: 1.5675447190660474
Epoch 28: 9301 / 10000 loss: 1.4295239290240425
Epoch 29: 9311 / 10000 loss: 1.1059822188771817
Epoch 30: 9307 / 10000 loss: 2.2337759985132926
Epoch 31: 9318 / 10000 loss: 1.3430972588880217
Epoch 32: 9315 / 10000 loss: 1.802715387262949
Epoch 33: 9333 / 10000 loss: 1.9715957489803078
Epoch 34: 9330 / 10000 loss: 2.0008817386724744
Epoch 35: 9338 / 10000 loss: 1.491349621247504
Epoch 36: 9337 / 10000 loss: 1.6088625907016274
Epoch 37: 9323 / 10000 loss: 1.279351041477705
Epoch 38: 9335 / 10000 loss: 1.6142982000934092
Epoch 39: 9343 / 10000 loss: 2.0628572025393495
Epoch 40: 9349 / 10000 loss: 1.3685968429962663
Epoch 41: 9344 / 10000 loss: 1.5065851810437505
Epoch 42: 9346 / 10000 loss: 1.2730524348312353
Epoch 43: 9341 / 10000 loss: 1.2922570671487508
Epoch 44: 9345 / 10000 loss: 1.4456940118797863
Epoch 45: 9353 / 10000 loss: 1.7682293300106937
Epoch 46: 9346 / 10000 loss: 1.1462436907819187
Epoch 47: 9363 / 10000 loss: 1.898640531855814
Epoch 48: 9354 / 10000 loss: 1.816733854798791
Epoch 49: 9357 / 10000 loss: 2.068485504439674
Epoch 50: 9356 / 10000 loss: 1.9343630465518862
Epoch 51: 9352 / 10000 loss: 2.065367704843483
Epoch 52: 9357 / 10000 loss: 1.4581586339434816
Epoch 53: 9362 / 10000 loss: 1.8463656795711696
Epoch 54: 9362 / 10000 loss: 1.822759841599951
Epoch 55: 9363 / 10000 loss: 1.41461072229652
Epoch 56: 9360 / 10000 loss: 1.7609602644152533
Epoch 57: 9362 / 10000 loss: 1.9060203636458284
Epoch 58: 9362 / 10000 loss: 1.5475415977795641
Epoch 59: 9364 / 10000 loss: 1.4961981726477205
Epoch 60: 9368 / 10000 loss: 1.463688794207377
Epoch 61: 9368 / 10000 loss: 1.232086460928041
Epoch 62: 9365 / 10000 loss: 1.9020362532508082
Epoch 63: 9370 / 10000 loss: 1.9499993101300925
Epoch 64: 9378 / 10000 loss: 2.0434362917697046
Epoch 65: 9377 / 10000 loss: 2.140470828837837
Epoch 66: 9376 / 10000 loss: 1.436130493182695
Epoch 67: 9386 / 10000 loss: 1.6176064532674141
Epoch 68: 9387 / 10000 loss: 1.8594372245159712
Epoch 69: 9387 / 10000 loss: 1.5882995264684434
Epoch 70: 9384 / 10000 loss: 1.9925233799326378
Epoch 71: 9379 / 10000 loss: 1.1496289920380847
Epoch 72: 9386 / 10000 loss: 1.7817216479865234
Epoch 73: 9378 / 10000 loss: 1.9764447445441562
Epoch 74: 9387 / 10000 loss: 1.9112415189740517
Epoch 75: 9388 / 10000 loss: 1.5919455914765441
Epoch 76: 9388 / 10000 loss: 1.222452310018744
Epoch 77: 9393 / 10000 loss: 1.462052865166651
Epoch 78: 9389 / 10000 loss: 1.4055952002079557
Epoch 79: 9397 / 10000 loss: 1.4939061626124406
Epoch 80: 9395 / 10000 loss: 2.156465815321301
Epoch 81: 9393 / 10000 loss: 1.962730391540312
Epoch 82: 9404 / 10000 loss: 2.2707301775321533
Epoch 83: 9410 / 10000 loss: 1.866332244940262
Epoch 84: 9405 / 10000 loss: 1.5448187043312271
Epoch 85: 9401 / 10000 loss: 1.6454633591256256
Epoch 86: 9405 / 10000 loss: 1.7812180457218523
Epoch 87: 9405 / 10000 loss: 2.444235659832826
Epoch 88: 9393 / 10000 loss: 2.228215193214038