git 参考(课程+代码+作业)
代码不包括画图部分
import numpy as np
import matplotlib.pylab as plt
import scipy.io as sio
import math
import scipy.optimize as op
# 神经网络
# 分类(识别)手写数字图片
np.set_printoptions(threshold=np.inf) # print()可以显示所有数据
data = sio.loadmat('ex4data1.mat')
# data包含两个矩阵X和y,X有5000行400列,y为5000行向量;
# X有5000个实例,每个实例是20*20像素的灰度图像,即每行都是一个手写数字图片;
# y是对应实例的对应数字1-10(10代表手写图片0)
X = data['X']
Y = data['y'][:, 0] # [:, 0]才能变成向量
data = sio.loadmat('ex4weights.mat')
# print(data)
theta1 = data['Theta1']
theta2 = data['Theta2']
nn_params = np.concatenate((theta1.flatten(), theta2.flatten()))
#-------------------------------------------------------------------------------
# 正向传播,已知权值,求代价函数
def sigmoid(z):
return 1/(1+np.exp(-z))
def sigmoidGradient(z):
return sigmoid(z)*(1-sigmoid(z))
def nnCostFunction(params, input_layer_size, hidden_layer_size, num_labels, x, y, lam):
theta1 = params[0:hidden_layer_size*(input_layer_size+1)].reshape((hidden_layer_size, input_layer_size+1))
theta2 = params[(hidden_layer_size*(input_layer_size+1)):].reshape((num_labels, hidden_layer_size+1))
m = np.size(y)
a1 = np.column_stack((np.ones(m), x)) # shape=(5000, 401)
z2 = a1@(theta1.T)
print(z2.shape)
a2 = sigmoid(z2)
a2 = np.column_stack((np.ones(m), a2)) # shape=(5000, 26)
z3 = a2@(theta2.T)
print(z3.shape)
a3 = sigmoid(z3) # shape=(5000, 10)
yt = np.zeros((m, num_labels)) # shape=(5000, 10)
yt[np.arange(m), y-1] = 1
# yt[0, y[0]-1] = 1 ; yt[1, y[1]-1] = 1 ...
# yt[0, 9] = 1 ; yt[1, 9] = 1 ...
# a3中10列的概率值对应是1234567890
j_first = np.sum(-yt*np.log(a3) - (1-yt)*np.log(1-a3)) /m
j_second = np.sum(np.power(theta1[:, 1:], 2)) + np.sum(np.power(theta2[:, 1:], 2))
j_second = lam/2/m * j_second
# theta_0不需要正则化
return j_first + j_second
input_layer_size = 400
hidden_layer_size = 25
num_labels = 10
lamb = 0 #无正则化
cost_=nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, Y, lamb)
print(cost_)
#0.2876291651613189
lamb = 1 #正则化
cost_=nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, Y, lamb)
print(cost_)
#0.38376985909092365
import numpy as np
import matplotlib.pylab as plt
import scipy.io as sio
import math
import scipy.optimize as op
# 神经网络
# 分类(识别)手写数字图片
np.set_printoptions(threshold=np.inf) # print()可以显示所有数据
data = sio.loadmat('ex4data1.mat')
# data包含两个矩阵X和y,X有5000行400列,y为5000行向量;
# X有5000个实例,每个实例是20*20像素的灰度图像,即每行都是一个手写数字图片;
# y是对应实例的对应数字1-10(10代表手写图片0)
X = data['X']
Y = data['y'][:, 0] # [:, 0]才能变成向量
#-------------------------------------------------------------------------------
# 正向传播,求代价函数
# 反向传播,求梯度
# g'(z)
def sigmoid(z):
return 1/(1+np.exp(-z))
# sigmoid求导,即g'(z)
def sigmoidGradient(z):
return sigmoid(z)*(1-sigmoid(z))
# 9.6节 随机初始化
def randInitializeWeight(lin, lout):
epsilon_init = 0.12
w = np.random.rand(lout, lin+1)*2*epsilon_init-epsilon_init
return w
def nnCost(params, input_layer_size, hidden_layer_size, num_labels, x, y, lam):
theta1 = params[0:hidden_layer_size*(input_layer_size+1)].reshape((hidden_layer_size, input_layer_size+1))
theta2 = params[(hidden_layer_size*(input_layer_size+1)):].reshape((num_labels, hidden_layer_size+1))
# 正向传播
m = np.size(y)
a1 = np.column_stack((np.ones(m), x)) # shape=(5000, 401)
z2 = a1@(theta1.T)
a2 = sigmoid(z2)
a2 = np.column_stack((np.ones(m), a2)) # shape=(5000, 26)
z3 = a2@(theta2.T)
a3 = sigmoid(z3) # shape=(5000, 10)
yt = np.zeros((m, num_labels)) # shape=(5000, 10)
yt[np.arange(m), y%10] = 1
# yt[0, y[0]%10] = 1 ; yt[1, y[1]%10] = 1 ...
# yt[0, 0] = 1 ; yt[1, 0] = 1 ...
# a3中10列的概率值对应是0123456789
j_first = np.sum(-yt*np.log(a3) - (1-yt)*np.log(1-a3)) /m
j_second = np.sum(np.power(theta1[:, 1:], 2)) + np.sum(np.power(theta2[:, 1:], 2))
j_second = lam/2/m * j_second
j = j_first + j_second
return j
def nnGrad(params, input_layer_size, hidden_layer_size, num_labels, x, y, lamb):
theta1 = params[0:hidden_layer_size*(input_layer_size+1)].reshape((hidden_layer_size, input_layer_size+1))
theta2 = params[(hidden_layer_size*(input_layer_size+1)):].reshape((num_labels, hidden_layer_size+1))
# 正向传播
m = np.size(y)
a1 = np.column_stack((np.ones(m), x)) # shape=(5000, 401)
z2 = a1@(theta1.T)
a2 = sigmoid(z2)
a2 = np.column_stack((np.ones(m), a2)) # shape=(5000, 26)
z3 = a2@(theta2.T)
a3 = sigmoid(z3) # shape=(5000, 10)
yt = np.zeros((m, num_labels)) # shape=(5000, 10)
yt[np.arange(m), y%10] = 1
# 反向传播
delta3 = a3-yt # shape=(5000, 10)
l2 = np.size(z2, 0)
z2 = np.column_stack((np.ones(l2), z2)) # shape=(5000, 26)
delta2 = delta3.dot(theta2) * sigmoidGradient(z2) # shape=(5000, 26)
theta2_grad = delta3.T.dot(a2)/m
theta2_grad[:, 1:] = theta2_grad[:, 1:]+lamb/m*theta2[:, 1:]
theta1_grad = delta2[:, 1:].T.dot(a1)/m
theta1_grad[:, 1:] = theta1_grad[:, 1:]+lamb/m*theta1[:, 1:]
grad = np.concatenate((theta1_grad.flatten(), theta2_grad.flatten()))
return grad
input_layer_size = 400
hidden_layer_size = 25
num_labels = 10
# 随即初始化
init_theta1 = randInitializeWeight(input_layer_size, hidden_layer_size)
init_theta2 = randInitializeWeight(hidden_layer_size, num_labels)
init_nn_params = np.concatenate((init_theta1.flatten(), init_theta2.flatten()))
lamb = 1
# 不过拟合的情况下,lamb越小,迭代次数越大,效果越好
param = op.fmin_cg(nnCost, init_nn_params, fprime=nnGrad, \
args=(input_layer_size, hidden_layer_size, num_labels, X, Y, lamb), maxiter=50)
theta1 = param[0: hidden_layer_size*(input_layer_size+1)].reshape(hidden_layer_size, input_layer_size+1)
theta2 = param[hidden_layer_size*(input_layer_size+1):].reshape(num_labels, hidden_layer_size+1)
# 预测函数
def predict(theta1, theta2, x):
m = np.size(Y)
x = np.column_stack((np.ones(m), X))
z2 = x@(theta1.T)
a2 = sigmoid(z2)
a2 = np.column_stack((np.ones(m), a2))
z3 = a2@(theta2.T)
a3 = sigmoid(z3)
p = np.argmax(a3, axis=1)
return p
pred = predict(theta1, theta2, X) #直接比较z3也可以
print('Training Set Accuracy: ', np.sum(pred == Y%10)/np.size(Y))
#结果不确定,但一般大于95%