BP神经网络python代码实现

首先直接上代码,后面有时间再把代码里面的详解写出来,

import numpy as np
import random

class neru_bp():
    def __init__(self, learning_rate = .2, n_iterations = 4000):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.bp_gredient=[]
        self.train_weights=[];     #  训练权重
        self.train_weights_grad=[];#  权重梯度
        self.train_bias=[];        #  训练偏置
        self.train_bias_grad=[];   #  偏置梯度
        self.feedforward_a=[];     #  前向传播得到的激活值的中间输出
        self.error_term=[];        #  残差
        self.w_gredient=[]
        self.predict_a=[];         #  预测中间值, 用于单个样本的预测输出
        self.train_labels=[]#1Xmini_batch_size
        #self.learning_rate;                           #  反向传播学习率
        self.lambda1=0.0                                #  过拟合参数
        self.mini_batch_size=10;                            #  批量梯度下降的一个批次数量
        self.iteration_size=3000;                             #  迭代次数
        self.architecture=[64,100,10];                  # 神经网络的结构(4, 4, 1) 表示有一个input layer(4个神经元, 和输入数据的维度一致), 
    def init_value(self):
        for i in range(len(self.architecture)):
            if i>=1:
                w=np.random.rand(self.architecture[i],self.architecture[i-1])*0.01
                self.train_weights.append(w)
                wg=np.zeros([self.architecture[i],self.architecture[i-1]])
                self.train_weights_grad.append(wg)
                b=np.random.rand(self.architecture[i],1)
                self.train_bias.append(b)
                bg=np.zeros([self.architecture[i],1])
                self.train_bias_grad.append(bg)
                a=np.zeros([self.architecture[i],self.mini_batch_size])
                self.feedforward_a.append(a)#这个和bais,bais_gredient以及下面的变量都是从第二层才开始有的!!!!!
                e=np.zeros([self.architecture[i],self.mini_batch_size])
                self.error_term.append(e)
                self.bp_gredient.append(e)
                pa=np.zeros([self.architecture[i],1])
                self.predict_a.append(pa)
    def get_batch_bias(self,bias):
        n=bias.shape[0]
        res_bias=np.zeros([n,self.mini_batch_size])
        for i in range(self.mini_batch_size):
            res_bias[:,i]=bias.reshape(n,)
        return res_bias
    def sigmoid(self,z):
        return 1/(1+np.exp(-z))
    def feedforward(self,X):#不包括输入层的x
        for i in range(len(self.feedforward_a)):
            
            #print(i)
            if i==0:
                
                z=np.dot(self.train_weights[i],X)+self.get_batch_bias(self.train_bias[i])
                z=z.reshape(z.shape[0],self.mini_batch_size)
                
                self.feedforward_a[i]=self.sigmoid(z)
               
            else:
                self.feedforward_a[i]=self.sigmoid(np.dot(self.train_weights[i],self.feedforward_a[i-1])+self.get_batch_bias(self.train_bias[i]))
       
        return self.feedforward_a
    def sig_bg(self,z):
        return self.sigmoid(z)*(1-self.sigmoid(z))
    def back_forward(self,X,train_labels):
        train_labels=train_labels.reshape(train_labels.shape[0],self.mini_batch_size)
        X=X.reshape(X.shape[0],self.mini_batch_size)
        
        bp_gredient=[]
        bk_len=list(range(len(self.architecture)))
        bk_len.reverse()
        bk_len.pop()#第一层是x,不是z,
        self.feedforward(X)
        for i in bk_len:
            
            if i==len(self.architecture)-1:
                
                #er1=-(self.feedforward_a[i-1]-train_labels)*self.sig_bg(np.dot(self.train_weights[i-1],self.feedforward_a[i-2]))
                er1=-(-self.feedforward_a[i-1]+train_labels)*self.sig_bg(self.feedforward_a[i-1])
               
                #self.bp_gredient.append(er1)
                self.bp_gredient[i-1]=er1
                #bp_mean=np.mean(self.bp_gredient[i-1],axis=1)
                w_g=np.dot(self.bp_gredient[i-1],self.feedforward_a[i-2].T)/self.mini_batch_size
                b_g=np.mean(self.bp_gredient[i-1],axis=1)
#                 self.train_weights_grad.append(w_g)
#                 self.train_bias_grad.append(b_g)
                b_g=np.reshape(b_g,newshape=self.train_bias_grad[i-1].shape)#!!!!!!!!
                self.train_weights_grad[i-1]=w_g
                self.train_bias_grad[i-1]=b_g
            elif i==1:
                
                #print(self.bp_gredient[i-1].T)
                #er1=(np.dot(self.bp_gredient[i].T,self.train_weights[i])).T*self.sig_bg(np.dot(self.train_weights[i-1],X))
                er1=(np.dot(self.bp_gredient[i].T,self.train_weights[i])).T*self.sig_bg(self.feedforward_a[i-1])
                self.bp_gredient[i-1]=er1
                bp_mean=np.mean(self.bp_gredient[i-1],axis=1)
                w_g=np.dot(self.bp_gredient[i-1],X.T)/self.mini_batch_size
                b_g=np.mean(self.bp_gredient[i-1],axis=1)
#                 self.train_weights_grad.append(w_g)
#                 self.train_bias_grad.append(b_g)
                self.train_weights_grad[i-1]=w_g
                b_g=np.reshape(b_g,newshape=self.train_bias_grad[i-1].shape)#!!!!!!!!
                self.train_bias_grad[i-1]=b_g
            else:
                #er1=(np.dot(self.bp_gredient[i].T,self.train_weights[i])).T*self.sig_bg(np.dot(self.train_weights[i-1],self.feedforward_a[i-2]))
                er1=(np.dot(self.bp_gredient[i].T,self.train_weights[i])).T*self.sig_bg(self.feedforward_a[i-1])
                self.bp_gredient[i-1]=er1
                w_g=np.dot(self.bp_gredient[i-1],self.feedforward_a[i-2].T)/self.mini_batch_size
#                 self.train_weights_grad.append(w_g)
#                 self.train_bias_grad.append(b_g)
                b_g=np.mean(self.bp_gredient[i-1],axis=1)
                b_g=np.reshape(b_g,newshape=self.train_bias_grad[i-1].shape)
                self.train_weights_grad[i-1]=w_g
                self.train_bias_grad[i-1]=b_g
            
        #self.bp_gredient.clear()
    def get_minibatch(self,X,y):
        m=X.shape[1]
        list1=list(range(m))
        sample_num=random.sample(list1,self.mini_batch_size)
        #sample_num=np.random.randint(m)
        #print("sample is:",sample_num)
        return X[:,sample_num],y[:,sample_num]
    def predict(self,X):#对单个样本预测
        n=X.shape[0]
        X=X.reshape([n,1])
        for i in range(len(self.predict_a)):
            if i==0:
                #print(np.dot(self.train_weights[i],X))
                z=np.dot(self.train_weights[i],X)+self.train_bias[i]
                #print(self.train_bias[i])
                self.predict_a[i]=self.sigmoid(z)
            else:
                self.predict_a[i]=self.sigmoid(np.dot(self.train_weights[i],self.predict_a[i-1])+self.train_bias[i])
        return self.predict_a[len(self.predict_a)-1]
        
    def loss(self,X,y,pred_y):
        m=y.shape[1]
        loss_squre=0
        loss_regular=0
        for i in range(m):
            loss_squre+=np.dot((y-pred_y).T,(y-pred_y))
        loss_squre=loss_squre/m
        
        for j in range(len(self.train_weights)):
            temp_w=self.train_weights[j]
            loss_regular+=np.sum(temp_w*temp_w)
        loss_regular*=self.lambda1/2
        return loss_squre+loss_regular
    def evaluate(self,test_X,test_y,onehot):
        print(test_X.shape)
        cnt=0
        for i in range(test_X.shape[1]):
            pred_y=self.predict(test_X[:,i])
            #print("pred_y",pred_y)
            #print("真实结果与预测结果",np.argmax(test_y[:,i],axis=0),np.argmax(pred_y,axis=0)[0])
            if(onehot):
                if np.argmax(test_y[:,i],axis=0)==np.argmax(pred_y,axis=0)[0]:
                    #print("真实结果与预测结果",np.argmax(test_y[:,i],axis=0),np.argmax(pred_y,axis=0)[0])
                    cnt+=1
            else:
                if(pred_y[0,0]==test_y[0,i]):
                    cnt+=1
        return cnt/test_y.shape[1]
                      
    def train(self,X,y):
        for i in range(self.iteration_size):
            X_batch,y_batch=self.get_minibatch(X,y)
            
            self.back_forward(X_batch,y_batch)
            for j in range(len(self.train_weights)):
                self.train_weights[j]=self.train_weights[j]-self.learning_rate*(self.train_weights_grad[j]+self.lambda1*self.train_weights[j])
                self.train_bias[j]-=self.learning_rate*self.train_bias_grad[j]
        #for i in range(len(self.train_weights)):
            #print("train_weights %d"%i)
            #print(self.train_weights[i])
            #print("train_bias %d"%i)
            #print(self.train_bias[i])

接下来利用MINIST的数字手写图片数据进行训练及预测:

BP神经网络python代码实现_第1张图片
BP神经网络python代码实现_第2张图片
BP神经网络python代码实现_第3张图片
准确度达到了80%,在开始准确度很低,后来检查代码发现是里面的一些公式表示出了问题,花了很久进行完善,后面写代码还是要仔细一些。

你可能感兴趣的:(神经网络,深度学习)