吴恩达 神经网络-数字识别 正向传播和反向传播整合 python实现

主函数

不懂的同学可以直接cv尝试理解哦

import time
from Four_Week.Regularized import regularized_cost, regularized_gradient
from Four_Week.Tool import random_init, serialize, deserialize, accuracy
from Three_Week.dispaly_Data import display_data
from Three_Week.predict import predict
import numpy as np
import scipy.io as scio
import scipy.optimize as opt
picture_DataFile = 'ex3data1.mat'
picture_Data = scio.loadmat(picture_DataFile)
X=picture_Data['X']#X:5000x400  图像是20x20的灰色图像,把一个图像展开成一维向量,有5000个图像,所以是5000x400


y=picture_Data['y'].flatten()#X每一行代表的数字,y:5000x1
print(y)
ylabel=[]
for i in range(y.size):
    zeros = np.zeros(10)
    zeros[y[i]-1]=1
    ylabel.append(zeros)
yarray=np.array(ylabel)#yarray:5000x10,由于是多元输出,
init_theta = random_init(10285)  # 25*401 + 10*26
res = opt.minimize(fun=regularized_cost,
                       x0=init_theta,#theta1,theta2展开合成的一维向量,算法需要
                       args=(X, yarray),#X:5000x10 yarray:5000x10
                       method='TNC',
                       jac=regularized_gradient,
                       options={'maxiter': 400})#(x0 args) 都是fun和jac的参数,而x0是需要训练的参数

theta1,theta2=deserialize(res.x)
rand_indices = np.random.permutation(range(5000))  # 获取0-4999 5000个无序随机索引
accuracy(res.x, X, y,res)
for i in range(5000):
    example = X[rand_indices[i]]
    example = example.reshape((1, example.size)) #example:1x400
    display_data(example)
    pred = predict(theta1, theta2, example)
    print('Neural network prediction is: {}'.format(pred, np.mod(pred, 10)))

工具函数

def costFunction(theta,X,yarray):#X:5000x10  yarray:5000x10
    theta1, theta2 = deserialize(theta)#theta1:25x401,theta2:10x26
    m = X.shape[0]
    X = np.c_[np.ones(m), X]  # X:5000x401
    z2 = X.dot(theta1.T)  # 5000x401 * 401x25
    a2 = 1 / (1 + np.exp(-z2))#a2:5000x25
    a2 = np.c_[np.ones(m), a2]#a2:5000x26
    z3 = a2.dot(theta2.T)
    a3 = 1 / (1 + np.exp(-z3))  # a3:5000x10
    sum=0
    for i in range(m):
        '''代价函数求和'''
        first=(-yarray[i]).dot(np.log(a3[i]))
        second=(1-yarray[i]).dot(np.log(1-a3[i]))
        sum=sum+(first-second)

    return sum/m


def gradient(theta,X,yarray):
    '''梯地下降的偏导数'''
    init_theta1, init_theta2 = deserialize(theta)#获得两个随机初始化的矩阵
    a1, z2, a2, z3, h = feed_forward(theta, X)#z2:5000x25
    delta3 = h - yarray#不用算delta1,第一层输入层不用计算 误差 delta3:5000x10
    delta2 = delta3.dot(init_theta2[:,1:])*sigmod(z2)[1]  # (5000, 25)#theta2去掉第一列 5000x10 10x25 z2:5000x25,removing delta0^2
    D1 = delta2.T.dot(a1)  #25x5000  5000x401    (25, 401)
    D2 = delta3.T.dot(a2)   # (10, 26)#只用算D1,D2,没有D3,因为输出层没有偏导数
    D = (1 / len(X)) * serialize(D1, D2)  # (10285,)#展开为了好调用高级优化算法,就是theta1,theta2的展开
return D


import numpy as np
from Four_Week.CostFunction import *
from Four_Week.Gradient import gradient
from Four_Week.Tool import serialize,deserialize
def regularized_cost(theta, X, y):
    '''正则化时忽略每层的偏置项,也就是参数矩阵的第一列'''
    theta1, theta2 = deserialize(theta)#theta1:25x401,theta2:10x26
    reg = np.sum(theta1[:,1:] ** 2) + np.sum(theta2[:,1:] ** 2)  # or use np.power(a, 2),不算偏置单元,theta1的第一列就是偏执单元列,theta2同理
    return 1/ (2 * len(X)) * reg + costFunction(theta, X, y)


def regularized_gradient(theta, X, y, l=1):
    """不惩罚偏置单元的参数"""
    theta1_d,theta2_d = deserialize(gradient(theta, X, y))
    theta1,theta2=deserialize(theta)#theta1:25x401,theta2:10x26
    theta1[:, 0] = 0
    theta2[:, 0] = 0
    reg_d1 = theta1_d + (l / len(X)) * theta1
    reg_d2 = theta2_d + (l / len(X)) * theta2

    return serialize(reg_d1, reg_d2)


def serialize(a, b):
    '''展开参数'''
    return np.r_[a.flatten(),b.flatten()]
def deserialize(seq):
    '''提取参数'''
    return seq[:25*401].reshape(25, 401), seq[25*401:].reshape(10, 26)

def random_init(size):#随机初始化矩阵
    '''从服从的均匀分布的范围中随机返回size大小的值'''
    return np.random.uniform(-0.12, 0.12, size)


def feed_forward(theta,X):#前向传播算法
    theta1, theta2 = deserialize(theta)#theta1:25x401,theta2:10x26
    m = X.shape[0]
    X = np.c_[np.ones(m), X]  # X:5000x401
    a1=X
    z2 = X.dot(theta1.T)  # 5000x401 * 401x25
    a2 = 1 / (1 + np.exp(-z2))  # a2:5000x25
    a2 = np.c_[np.ones(m), a2]  # a2:5000x26
    z3 = a2.dot(theta2.T)
    a3 = 1 / (1 + np.exp(-z3))  # a3:5000x10
    return a1,z2,a2,z3,a3;

你可能感兴趣的:(python,神经网络,机器学习)