Algorithm 1. 标准BP算法
----
输入: 训练集 D,学习率 η.
过程:
1. 随即初始化连接权与阈值 (ω,θ).
2. Repeat:
3. for x_k,y_k in D:
4. 根据当前参数计算出样本误差 E_k.
5. 根据公式计算出随机梯度项 g_k.
6. 根据公式更新 (ω,θ).
7. end for
8. until 达到停止条件
输出:(ω,θ) - 即相应的多层前馈神经网络.
----
class BP_network:
def __init__(self):
'''
initial variables
'''
# node number each layer
self.i_n = 0
self.h_n = 0
self.o_n = 0
# output value for each layer
self.i_v = []
self.h_v = []
self.o_v = []
# parameters (w, t)
self.ih_w = [] # weight for each link
self.ho_w = []
self.h_t = [] # threshold for each neuron
self.o_t = []
# definition of alternative activation functions and it's derivation
self.fun = {
'Sigmoid': Sigmoid,
'SigmoidDerivate': SigmoidDerivate,
'Tanh': Tanh,
'TanhDerivate': TanhDerivate,
# for more, add here
}
def CreateNN(self, ni, nh, no, actfun):
'''
build a BP network structure and initial parameters
@param ni, nh, no: the neuron number of each layer
@param actfun: string, the name of activation function
'''
# dependent packages
import numpy as np
# assignment of node number
self.i_n = ni
self.h_n = nh
self.o_n = no
# initial value of output for each layer
self.i_v = np.zeros(self.i_n)
self.h_v = np.zeros(self.h_n)
self.o_v = np.zeros(self.o_n)
# initial weights for each link (random initialization)
self.ih_w = np.zeros([self.i_n, self.h_n])
self.ho_w = np.zeros([self.h_n, self.o_n])
for i in range(self.i_n):
for h in range(self.h_n):
self.ih_w[i][h] = rand(0, 1)
for h in range(self.h_n):
for j in range(self.o_n):
self.ho_w[h][j] = rand(0, 1)
# initial threshold for each neuron
self.h_t = np.zeros(self.h_n)
self.o_t = np.zeros(self.o_n)
for h in range(self.h_n): self.h_t[h] = rand(0, 1)
for j in range(self.o_n): self.o_t[j] = rand(0, 1)
# initial activation function
self.af = self.fun[actfun]
self.afd = self.fun[actfun+'Derivate']
def Pred(self, x):
'''
predict process through the network
@param x: the input array for input layer
'''
# activate input layer
for i in range(self.i_n):
self.i_v[i] = x[i]
# activate hidden layer
for h in range(self.h_n):
total = 0.0
for i in range(self.i_n):
total += self.i_v[i] * self.ih_w[i][h]
self.h_v[h] = self.af(total - self.h_t[h])
# activate output layer
for j in range(self.o_n):
total = 0.0
for h in range(self.h_n):
total += self.h_v[h] * self.ho_w[h][j]
self.o_v[j] = self.af(total - self.o_t[j])
def BackPropagate(self, x, y, lr):
'''
the implementation of BP algorithms on one slide of sample
@param x, y: array, input and output of the data sample
@param lr: float, the learning rate of gradient decent iteration
'''
# dependent packages
import numpy as np
# get current network output
self.Pred(x)
# calculate the gradient based on output
o_grid = np.zeros(self.o_n)
for j in range(self.o_n):
o_grid[j] = (y[j] - self.o_v[j]) * self.afd(self.o_v[j])
h_grid = np.zeros(self.h_n)
for h in range(self.h_n):
for j in range(self.o_n):
h_grid[h] += self.ho_w[h][j] * o_grid[j]
h_grid[h] = h_grid[h] * self.afd(self.h_v[h])
# updating the parameter
for h in range(self.h_n):
for j in range(self.o_n):
self.ho_w[h][j] += lr * o_grid[j] * self.h_v[h]
for i in range(self.i_n):
for h in range(self.h_n):
self.ih_w[i][h] += lr * h_grid[h] * self.i_v[i]
for j in range(self.o_n):
self.o_t[j] -= lr * o_grid[j]
for h in range(self.h_n):
self.h_t[h] -= lr * h_grid[h]
def TrainStandard(self, data_in, data_out, lr=0.05):
'''
standard BP training
@param lr, learning rate, default 0.05
@return: e, accumulated error
@return: e_k, error array of each step
'''
e_k = []
for k in range(len(data_in)):
x = data_in[k]
y = data_out[k]
self.BackPropagate(x, y, lr)
# error in train set for each step
y_delta2 = 0.0
for j in range(self.o_n):
y_delta2 += (self.o_v[j] - y[j]) * (self.o_v[j] - y[j])
e_k.append(y_delta2/2)
# total error of training
e = sum(e_k)/len(e_k)
return e, e_k
def PredLabel(self, X):
'''
predict process through the network
@param X: the input sample set for input layer
@return: y, array, output set (0,1 - class) based on [winner-takes-all]
'''
import numpy as np
y = []
for m in range(len(X)):
self.Pred(X[m])
if self.o_v[0] > 0.5: y.append(1)
else : y.append(0)
# max_y = self.o_v[0]
# label = 0
# for j in range(1,self.o_n):
# if max_y < self.o_v[j]: label = j
# y.append(label)
return np.array(y)
'''
the definition of activation functions
'''
def Sigmoid(x):
'''
definition of sigmoid function and it's derivation
'''
from math import exp
return 1.0 / (1.0 + exp(-x))
def SigmoidDerivate(y):
return y * (1 - y)
def Tanh(x):
'''
definition of sigmoid function and it's derivation
'''
from math import tanh
return tanh(x)
def TanhDerivate(y):
return 1 - y*y
'''
the definition of random function
'''
def rand(a, b):
'''
random value generation for parameter initialization
@param a,b: the upper and lower limitation of the random value
'''
from random import random
return (b - a) * random() + a
bp
import numpy
# scipy.special for the sigmoid function expit()
import scipy.special
# In[3]:
# neural network class definition
class neuralNetwork:
# initialise the neural network
def __init__(self, inputnodes, hiddennodes, outputnodes, learningrate):
# set number of nodes in each input, hidden, output layer
self.inodes = inputnodes
self.hnodes = hiddennodes
self.onodes = outputnodes
# link weight matrices, wih and who
# weights inside the arrays are w_i_j, where link is from node i to node j in the next layer
# w11 w21
# w12 w22 etc
self.wih = numpy.random.normal(0.0, pow(self.hnodes, -0.5), (self.hnodes, self.inodes))
self.who = numpy.random.normal(0.0, pow(self.onodes, -0.5), (self.onodes, self.hnodes))
# learning rate
self.lr = learningrate
# activation function is the sigmoid function
self.activation_function = lambda x: scipy.special.expit(x)
pass
# train the neural network
def train(self, inputs_list, targets_list):
# convert inputs list to 2d array
inputs = numpy.array(inputs_list, ndmin=2).T
targets = numpy.array(targets_list, ndmin=2).T
# calculate signals into hidden layer
hidden_inputs = numpy.dot(self.wih, inputs)
# calculate the signals emerging from hidden layer
hidden_outputs = self.activation_function(hidden_inputs)
# calculate signals into final output layer
final_inputs = numpy.dot(self.who, hidden_outputs)
# calculate the signals emerging from final output layer
final_outputs = self.activation_function(final_inputs)
# output layer error is the (target - actual)
output_errors = targets - final_outputs
# hidden layer error is the output_errors, split by weights, recombined at hidden nodes
hidden_errors = numpy.dot(self.who.T, output_errors)
# update the weights for the links between the hidden and output layers
self.who += self.lr * numpy.dot((output_errors * final_outputs * (1.0 - final_outputs)), numpy.transpose(hidden_outputs))
# update the weights for the links between the input and hidden layers
self.wih += self.lr * numpy.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)), numpy.transpose(inputs))
pass
# query the neural network
def query(self, inputs_list):
# convert inputs list to 2d array
inputs = numpy.array(inputs_list, ndmin=2).T
# calculate signals into hidden layer
hidden_inputs = numpy.dot(self.wih, inputs)
# calculate the signals emerging from hidden layer
hidden_outputs = self.activation_function(hidden_inputs)
# calculate signals into final output layer
final_inputs = numpy.dot(self.who, hidden_outputs)
# calculate the signals emerging from final output layer
final_outputs = self.activation_function(final_inputs)
return final_outputs
# In[4]:
# number of input, hidden and output nodes
input_nodes = 3
hidden_nodes = 3
output_nodes = 3
# learning rate is 0.3
learning_rate = 0.3
# create instance of neural network
n = neuralNetwork(input_nodes,hidden_nodes,output_nodes, learning_rate)
# In[5]:
# test query (doesn't mean anything useful yet)
n.query([1.0, 0.5, -1.5])
完整版测试
# numpy provides arrays and useful functions for working with them
import numpy
# scipy.special for the sigmoid function expit()
import scipy.special
# scipy.ndimage for rotating image arrays
import scipy.ndimage
# In[3]:
# neural network class definition
class neuralNetwork:
# initialise the neural network
def __init__(self, inputnodes, hiddennodes, outputnodes, learningrate):
# set number of nodes in each input, hidden, output layer
self.inodes = inputnodes
self.hnodes = hiddennodes
self.onodes = outputnodes
# link weight matrices, wih and who
# weights inside the arrays are w_i_j, where link is from node i to node j in the next layer
# w11 w21
# w12 w22 etc
self.wih = numpy.random.normal(0.0, pow(self.hnodes, -0.5), (self.hnodes, self.inodes))
self.who = numpy.random.normal(0.0, pow(self.onodes, -0.5), (self.onodes, self.hnodes))
# learning rate
self.lr = learningrate
# activation function is the sigmoid function
self.activation_function = lambda x: scipy.special.expit(x)
pass
# train the neural network
def train(self, inputs_list, targets_list):
# convert inputs list to 2d array
inputs = numpy.array(inputs_list, ndmin=2).T
targets = numpy.array(targets_list, ndmin=2).T
# calculate signals into hidden layer
hidden_inputs = numpy.dot(self.wih, inputs)
# calculate the signals emerging from hidden layer
hidden_outputs = self.activation_function(hidden_inputs)
# calculate signals into final output layer
final_inputs = numpy.dot(self.who, hidden_outputs)
# calculate the signals emerging from final output layer
final_outputs = self.activation_function(final_inputs)
# output layer error is the (target - actual)
output_errors = targets - final_outputs
# hidden layer error is the output_errors, split by weights, recombined at hidden nodes
hidden_errors = numpy.dot(self.who.T, output_errors)
# update the weights for the links between the hidden and output layers
self.who += self.lr * numpy.dot((output_errors * final_outputs * (1.0 - final_outputs)), numpy.transpose(hidden_outputs))
# update the weights for the links between the input and hidden layers
self.wih += self.lr * numpy.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)), numpy.transpose(inputs))
pass
# query the neural network
def query(self, inputs_list):
# convert inputs list to 2d array
inputs = numpy.array(inputs_list, ndmin=2).T
# calculate signals into hidden layer
hidden_inputs = numpy.dot(self.wih, inputs)
# calculate the signals emerging from hidden layer
hidden_outputs = self.activation_function(hidden_inputs)
# calculate signals into final output layer
final_inputs = numpy.dot(self.who, hidden_outputs)
# calculate the signals emerging from final output layer
final_outputs = self.activation_function(final_inputs)
return final_outputs
# In[4]:
# number of input, hidden and output nodes
input_nodes = 784
hidden_nodes = 200
output_nodes = 10
# learning rate
learning_rate = 0.01
# create instance of neural network
n = neuralNetwork(input_nodes,hidden_nodes,output_nodes, learning_rate)
# In[5]:
# load the mnist training data CSV file into a list
training_data_file = open("mnist_dataset/mnist_train.csv", 'r')
training_data_list = training_data_file.readlines()
training_data_file.close()
# In[6]:
# train the neural network
# epochs is the number of times the training data set is used for training
epochs = 10
for e in range(epochs):
# go through all records in the training data set
for record in training_data_list:
# split the record by the ',' commas
all_values = record.split(',')
# scale and shift the inputs
inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
# create the target output values (all 0.01, except the desired label which is 0.99)
targets = numpy.zeros(output_nodes) + 0.01
# all_values[0] is the target label for this record
targets[int(all_values[0])] = 0.99
n.train(inputs, targets)
## create rotated variations
# rotated anticlockwise by x degrees
inputs_plusx_img = scipy.ndimage.interpolation.rotate(inputs.reshape(28,28), 10, cval=0.01, order=1, reshape=False)
n.train(inputs_plusx_img.reshape(784), targets)
# rotated clockwise by x degrees
inputs_minusx_img = scipy.ndimage.interpolation.rotate(inputs.reshape(28,28), -10, cval=0.01, order=1, reshape=False)
n.train(inputs_minusx_img.reshape(784), targets)
# rotated anticlockwise by 10 degrees
#inputs_plus10_img = scipy.ndimage.interpolation.rotate(inputs.reshape(28,28), 10, cval=0.01, order=1, reshape=False)
#n.train(inputs_plus10_img.reshape(784), targets)
# rotated clockwise by 10 degrees
#inputs_minus10_img = scipy.ndimage.interpolation.rotate(inputs.reshape(28,28), -10, cval=0.01, order=1, reshape=False)
#n.train(inputs_minus10_img.reshape(784), targets)
pass
pass
# In[7]:
# load the mnist test data CSV file into a list
test_data_file = open("mnist_dataset/mnist_test.csv", 'r')
test_data_list = test_data_file.readlines()
test_data_file.close()
# In[8]:
# test the neural network
# scorecard for how well the network performs, initially empty
scorecard = []
# go through all the records in the test data set
for record in test_data_list:
# split the record by the ',' commas
all_values = record.split(',')
# correct answer is first value
correct_label = int(all_values[0])
# scale and shift the inputs
inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
# query the network
outputs = n.query(inputs)
# the index of the highest value corresponds to the label
label = numpy.argmax(outputs)
# append correct or incorrect to list
if (label == correct_label):
# network's answer matches correct answer, add 1 to scorecard
scorecard.append(1)
else:
# network's answer doesn't match correct answer, add 0 to scorecard
scorecard.append(0)
pass
pass
# In[9]:
# calculate the performance score, the fraction of correct answers
scorecard_array = numpy.asarray(scorecard)
print ("performance = ", scorecard_array.sum() / scorecard_array.size)