不懂的同学可以直接cv尝试理解哦
import time
from Four_Week.Regularized import regularized_cost, regularized_gradient
from Four_Week.Tool import random_init, serialize, deserialize, accuracy
from Three_Week.dispaly_Data import display_data
from Three_Week.predict import predict
import numpy as np
import scipy.io as scio
import scipy.optimize as opt
picture_DataFile = 'ex3data1.mat'
picture_Data = scio.loadmat(picture_DataFile)
X=picture_Data['X']#X:5000x400 图像是20x20的灰色图像,把一个图像展开成一维向量,有5000个图像,所以是5000x400
y=picture_Data['y'].flatten()#X每一行代表的数字,y:5000x1
print(y)
ylabel=[]
for i in range(y.size):
zeros = np.zeros(10)
zeros[y[i]-1]=1
ylabel.append(zeros)
yarray=np.array(ylabel)#yarray:5000x10,由于是多元输出,
init_theta = random_init(10285) # 25*401 + 10*26
res = opt.minimize(fun=regularized_cost,
x0=init_theta,#theta1,theta2展开合成的一维向量,算法需要
args=(X, yarray),#X:5000x10 yarray:5000x10
method='TNC',
jac=regularized_gradient,
options={'maxiter': 400})#(x0 args) 都是fun和jac的参数,而x0是需要训练的参数
theta1,theta2=deserialize(res.x)
rand_indices = np.random.permutation(range(5000)) # 获取0-4999 5000个无序随机索引
accuracy(res.x, X, y,res)
for i in range(5000):
example = X[rand_indices[i]]
example = example.reshape((1, example.size)) #example:1x400
display_data(example)
pred = predict(theta1, theta2, example)
print('Neural network prediction is: {}'.format(pred, np.mod(pred, 10)))
def costFunction(theta,X,yarray):#X:5000x10 yarray:5000x10
theta1, theta2 = deserialize(theta)#theta1:25x401,theta2:10x26
m = X.shape[0]
X = np.c_[np.ones(m), X] # X:5000x401
z2 = X.dot(theta1.T) # 5000x401 * 401x25
a2 = 1 / (1 + np.exp(-z2))#a2:5000x25
a2 = np.c_[np.ones(m), a2]#a2:5000x26
z3 = a2.dot(theta2.T)
a3 = 1 / (1 + np.exp(-z3)) # a3:5000x10
sum=0
for i in range(m):
'''代价函数求和'''
first=(-yarray[i]).dot(np.log(a3[i]))
second=(1-yarray[i]).dot(np.log(1-a3[i]))
sum=sum+(first-second)
return sum/m
def gradient(theta,X,yarray):
'''梯地下降的偏导数'''
init_theta1, init_theta2 = deserialize(theta)#获得两个随机初始化的矩阵
a1, z2, a2, z3, h = feed_forward(theta, X)#z2:5000x25
delta3 = h - yarray#不用算delta1,第一层输入层不用计算 误差 delta3:5000x10
delta2 = delta3.dot(init_theta2[:,1:])*sigmod(z2)[1] # (5000, 25)#theta2去掉第一列 5000x10 10x25 z2:5000x25,removing delta0^2
D1 = delta2.T.dot(a1) #25x5000 5000x401 (25, 401)
D2 = delta3.T.dot(a2) # (10, 26)#只用算D1,D2,没有D3,因为输出层没有偏导数
D = (1 / len(X)) * serialize(D1, D2) # (10285,)#展开为了好调用高级优化算法,就是theta1,theta2的展开
return D
import numpy as np
from Four_Week.CostFunction import *
from Four_Week.Gradient import gradient
from Four_Week.Tool import serialize,deserialize
def regularized_cost(theta, X, y):
'''正则化时忽略每层的偏置项,也就是参数矩阵的第一列'''
theta1, theta2 = deserialize(theta)#theta1:25x401,theta2:10x26
reg = np.sum(theta1[:,1:] ** 2) + np.sum(theta2[:,1:] ** 2) # or use np.power(a, 2),不算偏置单元,theta1的第一列就是偏执单元列,theta2同理
return 1/ (2 * len(X)) * reg + costFunction(theta, X, y)
def regularized_gradient(theta, X, y, l=1):
"""不惩罚偏置单元的参数"""
theta1_d,theta2_d = deserialize(gradient(theta, X, y))
theta1,theta2=deserialize(theta)#theta1:25x401,theta2:10x26
theta1[:, 0] = 0
theta2[:, 0] = 0
reg_d1 = theta1_d + (l / len(X)) * theta1
reg_d2 = theta2_d + (l / len(X)) * theta2
return serialize(reg_d1, reg_d2)
def serialize(a, b):
'''展开参数'''
return np.r_[a.flatten(),b.flatten()]
def deserialize(seq):
'''提取参数'''
return seq[:25*401].reshape(25, 401), seq[25*401:].reshape(10, 26)
def random_init(size):#随机初始化矩阵
'''从服从的均匀分布的范围中随机返回size大小的值'''
return np.random.uniform(-0.12, 0.12, size)
def feed_forward(theta,X):#前向传播算法
theta1, theta2 = deserialize(theta)#theta1:25x401,theta2:10x26
m = X.shape[0]
X = np.c_[np.ones(m), X] # X:5000x401
a1=X
z2 = X.dot(theta1.T) # 5000x401 * 401x25
a2 = 1 / (1 + np.exp(-z2)) # a2:5000x25
a2 = np.c_[np.ones(m), a2] # a2:5000x26
z3 = a2.dot(theta2.T)
a3 = 1 / (1 + np.exp(-z3)) # a3:5000x10
return a1,z2,a2,z3,a3;