bp算法

Algorithm 1. 标准BP算法
----
    输入: 训练集 D,学习率 η.
    过程: 
        1. 随即初始化连接权与阈值 (ω,θ).
        2. Repeat:
        3.   for x_k,y_k in D:
        4.     根据当前参数计算出样本误差 E_k.
        5.     根据公式计算出随机梯度项 g_k.
        6.     根据公式更新 (ω,θ).
        7.   end for
        8. until 达到停止条件
    输出:(ω,θ) - 即相应的多层前馈神经网络.
----

 

class BP_network: 

    def __init__(self):
        
        '''
        initial variables
        '''
        # node number each layer
        self.i_n = 0           
        self.h_n = 0   
        self.o_n = 0

        # output value for each layer
        self.i_v = []       
        self.h_v = []
        self.o_v = []

        # parameters (w, t)
        self.ih_w = []    # weight for each link
        self.ho_w = []
        self.h_t  = []    # threshold for each neuron
        self.o_t  = []

        # definition of alternative activation functions and it's derivation
        self.fun = {
            'Sigmoid': Sigmoid, 
            'SigmoidDerivate': SigmoidDerivate,
            'Tanh': Tanh, 
            'TanhDerivate': TanhDerivate,
            
            # for more, add here
            }
        
        
    def CreateNN(self, ni, nh, no, actfun):
        '''
        build a BP network structure and initial parameters
        @param ni, nh, no: the neuron number of each layer
        @param actfun: string, the name of activation function
        '''
        
        # dependent packages
        import numpy as np       
               
        # assignment of node number
        self.i_n = ni
        self.h_n = nh
        self.o_n = no
        
        # initial value of output for each layer
        self.i_v = np.zeros(self.i_n)
        self.h_v = np.zeros(self.h_n)
        self.o_v = np.zeros(self.o_n)

        # initial weights for each link (random initialization)
        self.ih_w = np.zeros([self.i_n, self.h_n])
        self.ho_w = np.zeros([self.h_n, self.o_n])
        for i in range(self.i_n):  
            for h in range(self.h_n): 
                self.ih_w[i][h] = rand(0, 1)
        for h in range(self.h_n):  
            for j in range(self.o_n): 
                self.ho_w[h][j] = rand(0, 1)
                
        # initial threshold for each neuron
        self.h_t = np.zeros(self.h_n)
        self.o_t = np.zeros(self.o_n)
        for h in range(self.h_n): self.h_t[h] = rand(0, 1)
        for j in range(self.o_n): self.o_t[j] = rand(0, 1)

        # initial activation function
        self.af  = self.fun[actfun]
        self.afd = self.fun[actfun+'Derivate']


    def Pred(self, x):
        '''
        predict process through the network
        @param x: the input array for input layer
        '''
        
        # activate input layer
        for i in range(self.i_n):
            self.i_v[i] = x[i]
            
        # activate hidden layer
        for h in range(self.h_n):
            total = 0.0
            for i in range(self.i_n):
                total += self.i_v[i] * self.ih_w[i][h]
            self.h_v[h] = self.af(total - self.h_t[h])
            
        # activate output layer
        for j in range(self.o_n):
            total = 0.0
            for h in range(self.h_n):
                total += self.h_v[h] * self.ho_w[h][j]
            self.o_v[j] = self.af(total - self.o_t[j])
        
        
    def BackPropagate(self, x, y, lr):
        '''
        the implementation of BP algorithms on one slide of sample
        
        @param x, y: array, input and output of the data sample
        @param lr: float, the learning rate of gradient decent iteration
        '''
        
        # dependent packages
        import numpy as np 

        # get current network output
        self.Pred(x)
        
        # calculate the gradient based on output
        o_grid = np.zeros(self.o_n) 
        for j in range(self.o_n):
            o_grid[j] = (y[j] - self.o_v[j]) * self.afd(self.o_v[j])
        
        h_grid = np.zeros(self.h_n)
        for h in range(self.h_n):
            for j in range(self.o_n):
                h_grid[h] += self.ho_w[h][j] * o_grid[j]
            h_grid[h] = h_grid[h] * self.afd(self.h_v[h])   

        # updating the parameter
        for h in range(self.h_n):  
            for j in range(self.o_n): 
                self.ho_w[h][j] += lr * o_grid[j] * self.h_v[h]
           
        for i in range(self.i_n):  
            for h in range(self.h_n): 
                self.ih_w[i][h] += lr * h_grid[h] * self.i_v[i]     

        for j in range(self.o_n):
            self.o_t[j] -= lr * o_grid[j]    
                
        for h in range(self.h_n):
            self.h_t[h] -= lr * h_grid[h]
   
   
    def TrainStandard(self, data_in, data_out, lr=0.05):
        '''
        standard BP training
        @param lr, learning rate, default 0.05
        @return: e, accumulated error
        @return: e_k, error array of each step
        '''    
        e_k = []
        for k in range(len(data_in)):
            x = data_in[k]
            y = data_out[k]
            self.BackPropagate(x, y, lr)
            
            # error in train set for each step
            y_delta2 = 0.0
            for j in range(self.o_n):
                y_delta2 += (self.o_v[j] - y[j]) * (self.o_v[j] - y[j])  
            e_k.append(y_delta2/2)

        # total error of training
        e = sum(e_k)/len(e_k)
        
        return e, e_k
        
    def PredLabel(self, X):
        '''
        predict process through the network
        
        @param X: the input sample set for input layer
        @return: y, array, output set (0,1 - class) based on [winner-takes-all] 
        '''    
        import numpy as np
               
        y = []
        
        for m in range(len(X)):
            self.Pred(X[m])
            if self.o_v[0] > 0.5:  y.append(1)
            else : y.append(0)
#             max_y = self.o_v[0]
#             label = 0
#             for j in range(1,self.o_n):
#                 if max_y < self.o_v[j]: label = j
#             y.append(label)
           
        return np.array(y)     
            

'''
the definition of activation functions
'''
def Sigmoid(x):
    '''
    definition of sigmoid function and it's derivation
    '''
    from math import exp
    return 1.0 / (1.0 + exp(-x))
def SigmoidDerivate(y):
    return y * (1 - y)

def Tanh(x):
    '''
    definition of sigmoid function and it's derivation
    '''
    from math import tanh
    return tanh(x)
def TanhDerivate(y):
    return 1 - y*y

'''
the definition of random function
'''
def rand(a, b):
    '''
    random value generation for parameter initialization
    @param a,b: the upper and lower limitation of the random value
    '''
    from random import random
    return (b - a) * random() + a

bp


import numpy
# scipy.special for the sigmoid function expit()
import scipy.special


# In[3]:


# neural network class definition
class neuralNetwork:
    
    
    # initialise the neural network
    def __init__(self, inputnodes, hiddennodes, outputnodes, learningrate):
        # set number of nodes in each input, hidden, output layer
        self.inodes = inputnodes
        self.hnodes = hiddennodes
        self.onodes = outputnodes
        
        # link weight matrices, wih and who
        # weights inside the arrays are w_i_j, where link is from node i to node j in the next layer
        # w11 w21
        # w12 w22 etc 
        self.wih = numpy.random.normal(0.0, pow(self.hnodes, -0.5), (self.hnodes, self.inodes))
        self.who = numpy.random.normal(0.0, pow(self.onodes, -0.5), (self.onodes, self.hnodes))

        # learning rate
        self.lr = learningrate
        
        # activation function is the sigmoid function
        self.activation_function = lambda x: scipy.special.expit(x)
        
        pass

    
    # train the neural network
    def train(self, inputs_list, targets_list):
        # convert inputs list to 2d array
        inputs = numpy.array(inputs_list, ndmin=2).T
        targets = numpy.array(targets_list, ndmin=2).T
        
        # calculate signals into hidden layer
        hidden_inputs = numpy.dot(self.wih, inputs)
        # calculate the signals emerging from hidden layer
        hidden_outputs = self.activation_function(hidden_inputs)
        
        # calculate signals into final output layer
        final_inputs = numpy.dot(self.who, hidden_outputs)
        # calculate the signals emerging from final output layer
        final_outputs = self.activation_function(final_inputs)
        
        # output layer error is the (target - actual)
        output_errors = targets - final_outputs
        # hidden layer error is the output_errors, split by weights, recombined at hidden nodes
        hidden_errors = numpy.dot(self.who.T, output_errors) 
        
        # update the weights for the links between the hidden and output layers
        self.who += self.lr * numpy.dot((output_errors * final_outputs * (1.0 - final_outputs)), numpy.transpose(hidden_outputs))
        
        # update the weights for the links between the input and hidden layers
        self.wih += self.lr * numpy.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)), numpy.transpose(inputs))
        
        pass

    
    # query the neural network
    def query(self, inputs_list):
        # convert inputs list to 2d array
        inputs = numpy.array(inputs_list, ndmin=2).T
        
        # calculate signals into hidden layer
        hidden_inputs = numpy.dot(self.wih, inputs)
        # calculate the signals emerging from hidden layer
        hidden_outputs = self.activation_function(hidden_inputs)
        
        # calculate signals into final output layer
        final_inputs = numpy.dot(self.who, hidden_outputs)
        # calculate the signals emerging from final output layer
        final_outputs = self.activation_function(final_inputs)
        
        return final_outputs


# In[4]:


# number of input, hidden and output nodes
input_nodes = 3
hidden_nodes = 3
output_nodes = 3

# learning rate is 0.3
learning_rate = 0.3

# create instance of neural network
n = neuralNetwork(input_nodes,hidden_nodes,output_nodes, learning_rate)


# In[5]:


# test query (doesn't mean anything useful yet)
n.query([1.0, 0.5, -1.5])

完整版测试

# numpy provides arrays and useful functions for working with them
import numpy
# scipy.special for the sigmoid function expit()
import scipy.special
# scipy.ndimage for rotating image arrays
import scipy.ndimage


# In[3]:


# neural network class definition
class neuralNetwork:
    
    
    # initialise the neural network
    def __init__(self, inputnodes, hiddennodes, outputnodes, learningrate):
        # set number of nodes in each input, hidden, output layer
        self.inodes = inputnodes
        self.hnodes = hiddennodes
        self.onodes = outputnodes
        
        # link weight matrices, wih and who
        # weights inside the arrays are w_i_j, where link is from node i to node j in the next layer
        # w11 w21
        # w12 w22 etc 
        self.wih = numpy.random.normal(0.0, pow(self.hnodes, -0.5), (self.hnodes, self.inodes))
        self.who = numpy.random.normal(0.0, pow(self.onodes, -0.5), (self.onodes, self.hnodes))

        # learning rate
        self.lr = learningrate
        
        # activation function is the sigmoid function
        self.activation_function = lambda x: scipy.special.expit(x)
        
        pass

    
    # train the neural network
    def train(self, inputs_list, targets_list):
        # convert inputs list to 2d array
        inputs = numpy.array(inputs_list, ndmin=2).T
        targets = numpy.array(targets_list, ndmin=2).T
        
        # calculate signals into hidden layer
        hidden_inputs = numpy.dot(self.wih, inputs)
        # calculate the signals emerging from hidden layer
        hidden_outputs = self.activation_function(hidden_inputs)
        
        # calculate signals into final output layer
        final_inputs = numpy.dot(self.who, hidden_outputs)
        # calculate the signals emerging from final output layer
        final_outputs = self.activation_function(final_inputs)
        
        # output layer error is the (target - actual)
        output_errors = targets - final_outputs
        # hidden layer error is the output_errors, split by weights, recombined at hidden nodes
        hidden_errors = numpy.dot(self.who.T, output_errors)
        
        # update the weights for the links between the hidden and output layers
        self.who += self.lr * numpy.dot((output_errors * final_outputs * (1.0 - final_outputs)), numpy.transpose(hidden_outputs))
        
        # update the weights for the links between the input and hidden layers
        self.wih += self.lr * numpy.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)), numpy.transpose(inputs))
        
        pass

    
    # query the neural network
    def query(self, inputs_list):
        # convert inputs list to 2d array
        inputs = numpy.array(inputs_list, ndmin=2).T
        
        # calculate signals into hidden layer
        hidden_inputs = numpy.dot(self.wih, inputs)
        # calculate the signals emerging from hidden layer
        hidden_outputs = self.activation_function(hidden_inputs)
        
        # calculate signals into final output layer
        final_inputs = numpy.dot(self.who, hidden_outputs)
        # calculate the signals emerging from final output layer
        final_outputs = self.activation_function(final_inputs)
        
        return final_outputs


# In[4]:


# number of input, hidden and output nodes
input_nodes = 784
hidden_nodes = 200
output_nodes = 10

# learning rate
learning_rate = 0.01

# create instance of neural network
n = neuralNetwork(input_nodes,hidden_nodes,output_nodes, learning_rate)


# In[5]:


# load the mnist training data CSV file into a list
training_data_file = open("mnist_dataset/mnist_train.csv", 'r')
training_data_list = training_data_file.readlines()
training_data_file.close()


# In[6]:


# train the neural network

# epochs is the number of times the training data set is used for training
epochs = 10

for e in range(epochs):
    # go through all records in the training data set
    for record in training_data_list:
        # split the record by the ',' commas
        all_values = record.split(',')
        # scale and shift the inputs
        inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
        # create the target output values (all 0.01, except the desired label which is 0.99)
        targets = numpy.zeros(output_nodes) + 0.01
        # all_values[0] is the target label for this record
        targets[int(all_values[0])] = 0.99
        n.train(inputs, targets)
        
        ## create rotated variations
        # rotated anticlockwise by x degrees
        inputs_plusx_img = scipy.ndimage.interpolation.rotate(inputs.reshape(28,28), 10, cval=0.01, order=1, reshape=False)
        n.train(inputs_plusx_img.reshape(784), targets)
        # rotated clockwise by x degrees
        inputs_minusx_img = scipy.ndimage.interpolation.rotate(inputs.reshape(28,28), -10, cval=0.01, order=1, reshape=False)
        n.train(inputs_minusx_img.reshape(784), targets)
        
        # rotated anticlockwise by 10 degrees
        #inputs_plus10_img = scipy.ndimage.interpolation.rotate(inputs.reshape(28,28), 10, cval=0.01, order=1, reshape=False)
        #n.train(inputs_plus10_img.reshape(784), targets)
        # rotated clockwise by 10 degrees
        #inputs_minus10_img = scipy.ndimage.interpolation.rotate(inputs.reshape(28,28), -10, cval=0.01, order=1, reshape=False)
        #n.train(inputs_minus10_img.reshape(784), targets)
        
        pass
    pass


# In[7]:


# load the mnist test data CSV file into a list
test_data_file = open("mnist_dataset/mnist_test.csv", 'r')
test_data_list = test_data_file.readlines()
test_data_file.close()


# In[8]:


# test the neural network

# scorecard for how well the network performs, initially empty
scorecard = []

# go through all the records in the test data set
for record in test_data_list:
    # split the record by the ',' commas
    all_values = record.split(',')
    # correct answer is first value
    correct_label = int(all_values[0])
    # scale and shift the inputs
    inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
    # query the network
    outputs = n.query(inputs)
    # the index of the highest value corresponds to the label
    label = numpy.argmax(outputs)
    # append correct or incorrect to list
    if (label == correct_label):
        # network's answer matches correct answer, add 1 to scorecard
        scorecard.append(1)
    else:
        # network's answer doesn't match correct answer, add 0 to scorecard
        scorecard.append(0)
        pass
    
    pass


# In[9]:


# calculate the performance score, the fraction of correct answers
scorecard_array = numpy.asarray(scorecard)
print ("performance = ", scorecard_array.sum() / scorecard_array.size)

 

 

你可能感兴趣的:(机器学习,python3)