吴恩达机器学习作业4(python)

git 参考(课程+代码+作业)

代码不包括画图部分

正向传播

import numpy as np
import matplotlib.pylab as plt
import scipy.io as sio
import math
import scipy.optimize as op
# 神经网络
# 分类(识别)手写数字图片

np.set_printoptions(threshold=np.inf) # print()可以显示所有数据
data = sio.loadmat('ex4data1.mat')
# data包含两个矩阵X和y,X有5000行400列,y为5000行向量;
# X有5000个实例,每个实例是20*20像素的灰度图像,即每行都是一个手写数字图片;
# y是对应实例的对应数字1-10(10代表手写图片0)
X = data['X']
Y = data['y'][:, 0] # [:, 0]才能变成向量

data = sio.loadmat('ex4weights.mat')
# print(data)
theta1 = data['Theta1']
theta2 = data['Theta2']
nn_params = np.concatenate((theta1.flatten(), theta2.flatten()))

#-------------------------------------------------------------------------------
# 正向传播,已知权值,求代价函数

def sigmoid(z):
    return 1/(1+np.exp(-z))

def sigmoidGradient(z):
    return sigmoid(z)*(1-sigmoid(z))

def nnCostFunction(params, input_layer_size, hidden_layer_size, num_labels, x, y, lam):
    theta1 = params[0:hidden_layer_size*(input_layer_size+1)].reshape((hidden_layer_size, input_layer_size+1))
    theta2 = params[(hidden_layer_size*(input_layer_size+1)):].reshape((num_labels, hidden_layer_size+1))
    
    m = np.size(y)
    a1 = np.column_stack((np.ones(m), x)) # shape=(5000, 401)
    z2 = a1@(theta1.T) 
    print(z2.shape)
    a2 = sigmoid(z2)
    a2 = np.column_stack((np.ones(m), a2)) # shape=(5000, 26)
    z3 = a2@(theta2.T)
    print(z3.shape)
    a3 = sigmoid(z3) # shape=(5000, 10)
    yt = np.zeros((m, num_labels)) # shape=(5000, 10)
    yt[np.arange(m), y-1] = 1
    # yt[0, y[0]-1] = 1 ; yt[1, y[1]-1] = 1 ... 
    # yt[0, 9] = 1 ;      yt[1, 9] = 1      ...
    # a3中10列的概率值对应是1234567890

    j_first = np.sum(-yt*np.log(a3) - (1-yt)*np.log(1-a3)) /m
    j_second = np.sum(np.power(theta1[:, 1:], 2)) + np.sum(np.power(theta2[:, 1:], 2))
    j_second = lam/2/m * j_second
    # theta_0不需要正则化
    return j_first + j_second

input_layer_size = 400
hidden_layer_size = 25
num_labels = 10
lamb = 0 #无正则化
cost_=nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, Y, lamb)
print(cost_)
#0.2876291651613189

lamb = 1 #正则化
cost_=nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, Y, lamb)
print(cost_)
#0.38376985909092365

正+反+fmin_cg函数

import numpy as np
import matplotlib.pylab as plt
import scipy.io as sio
import math
import scipy.optimize as op
# 神经网络
# 分类(识别)手写数字图片

np.set_printoptions(threshold=np.inf) # print()可以显示所有数据
data = sio.loadmat('ex4data1.mat')
# data包含两个矩阵X和y,X有5000行400列,y为5000行向量;
# X有5000个实例,每个实例是20*20像素的灰度图像,即每行都是一个手写数字图片;
# y是对应实例的对应数字1-10(10代表手写图片0)
X = data['X']
Y = data['y'][:, 0] # [:, 0]才能变成向量

#-------------------------------------------------------------------------------
# 正向传播,求代价函数
# 反向传播,求梯度

# g'(z)
def sigmoid(z):
    return 1/(1+np.exp(-z))

# sigmoid求导,即g'(z)
def sigmoidGradient(z):
    return sigmoid(z)*(1-sigmoid(z))

# 9.6节 随机初始化
def randInitializeWeight(lin, lout):
    epsilon_init = 0.12
    w = np.random.rand(lout, lin+1)*2*epsilon_init-epsilon_init
    return w

def nnCost(params, input_layer_size, hidden_layer_size, num_labels, x, y, lam):
    theta1 = params[0:hidden_layer_size*(input_layer_size+1)].reshape((hidden_layer_size, input_layer_size+1))
    theta2 = params[(hidden_layer_size*(input_layer_size+1)):].reshape((num_labels, hidden_layer_size+1))
    
    # 正向传播
    m = np.size(y)
    a1 = np.column_stack((np.ones(m), x)) # shape=(5000, 401)
    z2 = a1@(theta1.T)
    a2 = sigmoid(z2)
    a2 = np.column_stack((np.ones(m), a2)) # shape=(5000, 26)
    z3 = a2@(theta2.T)
    a3 = sigmoid(z3) # shape=(5000, 10)
    yt = np.zeros((m, num_labels)) # shape=(5000, 10)
    yt[np.arange(m), y%10] = 1
    # yt[0, y[0]%10] = 1 ; yt[1, y[1]%10] = 1 ... 
    # yt[0, 0] = 1 ;       yt[1, 0] = 1      ...
    # a3中10列的概率值对应是0123456789

    j_first = np.sum(-yt*np.log(a3) - (1-yt)*np.log(1-a3)) /m
    j_second = np.sum(np.power(theta1[:, 1:], 2)) + np.sum(np.power(theta2[:, 1:], 2))
    j_second = lam/2/m * j_second
    j = j_first + j_second

    return j

def nnGrad(params, input_layer_size, hidden_layer_size, num_labels, x, y, lamb):
    theta1 = params[0:hidden_layer_size*(input_layer_size+1)].reshape((hidden_layer_size, input_layer_size+1))
    theta2 = params[(hidden_layer_size*(input_layer_size+1)):].reshape((num_labels, hidden_layer_size+1))
    
    # 正向传播
    m = np.size(y)
    a1 = np.column_stack((np.ones(m), x)) # shape=(5000, 401)
    z2 = a1@(theta1.T)
    a2 = sigmoid(z2)
    a2 = np.column_stack((np.ones(m), a2)) # shape=(5000, 26)
    z3 = a2@(theta2.T)
    a3 = sigmoid(z3) # shape=(5000, 10)
    yt = np.zeros((m, num_labels)) # shape=(5000, 10)
    yt[np.arange(m), y%10] = 1

    # 反向传播
    delta3 = a3-yt # shape=(5000, 10)
    l2 = np.size(z2, 0)
    z2 = np.column_stack((np.ones(l2), z2)) # shape=(5000, 26)
    delta2 = delta3.dot(theta2) * sigmoidGradient(z2) # shape=(5000, 26)

    theta2_grad = delta3.T.dot(a2)/m
    theta2_grad[:, 1:] = theta2_grad[:, 1:]+lamb/m*theta2[:, 1:]
    theta1_grad = delta2[:, 1:].T.dot(a1)/m
    theta1_grad[:, 1:] = theta1_grad[:, 1:]+lamb/m*theta1[:, 1:]
    grad = np.concatenate((theta1_grad.flatten(), theta2_grad.flatten()))

    return grad

input_layer_size = 400
hidden_layer_size = 25
num_labels = 10

# 随即初始化
init_theta1 = randInitializeWeight(input_layer_size, hidden_layer_size)
init_theta2 = randInitializeWeight(hidden_layer_size, num_labels)
init_nn_params = np.concatenate((init_theta1.flatten(), init_theta2.flatten()))

lamb = 1
# 不过拟合的情况下,lamb越小,迭代次数越大,效果越好
param = op.fmin_cg(nnCost, init_nn_params, fprime=nnGrad, \
                    args=(input_layer_size, hidden_layer_size, num_labels, X, Y, lamb), maxiter=50)

theta1 = param[0: hidden_layer_size*(input_layer_size+1)].reshape(hidden_layer_size, input_layer_size+1)
theta2 = param[hidden_layer_size*(input_layer_size+1):].reshape(num_labels, hidden_layer_size+1)

# 预测函数
def predict(theta1, theta2, x):
    m = np.size(Y)
    x = np.column_stack((np.ones(m), X))
    z2 = x@(theta1.T)
    a2 = sigmoid(z2)
    a2 = np.column_stack((np.ones(m), a2))
    z3 = a2@(theta2.T)
    a3 = sigmoid(z3)
    p = np.argmax(a3, axis=1)
    return p

pred = predict(theta1, theta2, X) #直接比较z3也可以
print('Training Set Accuracy: ', np.sum(pred == Y%10)/np.size(Y))
#结果不确定,但一般大于95%

你可能感兴趣的:(机器学习,python,机器学习,numpy)