吴恩达机器学习作业4(python实现)

利用神经网络预测手写数字

import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
import scipy.optimize as opt
from sklearn.metrics import classification_report  # 这个包是评价报告
from sklearn.preprocessing import OneHotEncoder


def expand_y(y):
    """
    设置标签值为向量 例如y[0]=6转化为y[0]=[0,0,0,0,0,1,0,0,0,0]
    """
    result = []
    for i in y:
        y_array = np.zeros(10)
        y_array[i-1] = 1
        result.append(y_array)
    return np.array(result)

def load_mat(path):
    '''读取数据'''
    data = loadmat('ex4data1.mat')  # loadmat返回值是一个字典
    X = data['X']                   # X中存储的是像素值   5000*400
    y = data['y'].flatten()         # flatten函数返回一维数组  1*5000
    return X, y

def load_weight(path):
    """
    读取权重函数
    """
    data = loadmat(path)
    return data['Theta1'], data['Theta2']

def sigmoid(z):
    """
    sigmoid函数
    """
    return 1 / (1 + np.exp(-z))

def feed_forward(theta, X):
    '''得到每层的输入和输出'''
    t1, t2 = deserialize(theta)   # 提取参数 t1是第一层到第二层的  t2是第二层到第三层的
    a1 = X   #初始值
    z2 = a1 @ t1.T   # X乘参数
    a2 = np.insert(sigmoid(z2), 0, 1, axis=1)  #加a0 并且放入sigmoid函数中
    z3 = a2 @ t2.T   #第二层到第三层
    a3 = sigmoid(z3)
    return a1, z2, a2, z3, a3

def cost(theta, X, y):
    a1, z2, a2, z3, h = feed_forward(theta, X)#前馈神经网络 第一层401个单元 第二层26个单元 第三层10个单元
    J = - y * np.log(h) - (1 - y) * np.log(1 - h)    #矩阵点乘
    return J.sum() / len(X)

def regularized_cost(theta, X, y, l=1):
    '''正则化时忽略每层的偏置项,也就是参数矩阵的第一列'''
    t1, t2 = deserialize(theta)
    reg = np.sum(t1[:,1:] ** 2) + np.sum(t2[:,1:] ** 2)    # 正则项
    return l / (2 * len(X)) * reg + cost(theta, X, y)    # 代价函数

def deserialize(seq):
    '''
    提取参数
    '''
    return seq[:25*401].reshape(25, 401), seq[25*401:].reshape(10, 26)

def serialize(a, b):
    '''
    展开参数
    '''
    return np.r_[a.flatten(),b.flatten()]

def sigmoid_gradient(z):
    """
    sigmoid函数求导
    """
    return sigmoid(z) * (1 - sigmoid(z))

def random_init(size):
    '''从服从的均匀分布的范围中随机返回size大小的值'''
    return np.random.uniform(-0.12, 0.12, size)


def gradient(theta, X, y):
    '''
    unregularized gradient, notice no d1 since the input layer has no error
    return 所有参数theta的梯度,故梯度D(i)和参数theta(i)同shape,重要。
    '''
    t1, t2 = deserialize(theta)
    a1, z2, a2, z3, h = feed_forward(theta, X)
    d3 = h - y  # (5000, 10)
    d2 = d3 @ t2[:, 1:] * sigmoid_gradient(z2)  # (5000, 25)
    D2 = d3.T @ a2  # (10, 26)
    D1 = d2.T @ a1  # (25, 401)
    D = (1 / len(X)) * serialize(D1, D2)  # (10285,)

    return D


def gradient_checking(theta, X, y, e=0.0001):
    def a_numeric_grad(plus, minus):
        """
        对每个参数theta_i计算数值梯度,即理论梯度。
        """
        return (regularized_cost(plus, X, y) - regularized_cost(minus, X, y)) / (e * 2)

    numeric_grad = []
    for i in range(len(theta)):
        plus = theta.copy()  # deep copy otherwise you will change the raw theta
        minus = theta.copy()
        plus[i] = plus[i] + e
        minus[i] = minus[i] - e
        grad_i = a_numeric_grad(plus, minus)
        numeric_grad.append(grad_i)

    numeric_grad = np.array(numeric_grad)
    analytic_grad = regularized_gradient(theta, X, y)
    diff = np.linalg.norm(numeric_grad - analytic_grad) / np.linalg.norm(numeric_grad + analytic_grad)

    print(
        'If your backpropagation implementation is correct,\nthe relative difference will be smaller than 10e-9 (assume epsilon=0.0001).\nRelative Difference: {}\n'.format(
            diff))


def regularized_gradient(theta, X, y, l=1):
    """
    不惩罚偏置单元的参数   正则化神经网络
    """
    D1, D2 = deserialize(gradient(theta, X, y))
    t1[:, 0] = 0
    t2[:, 0] = 0
    reg_D1 = D1 + (l / len(X)) * t1
    reg_D2 = D2 + (l / len(X)) * t2
    return serialize(reg_D1, reg_D2)

def nn_training(X, y):
    init_theta = random_init(10285)  # 25*401 + 10*26

    res = opt.minimize(fun=regularized_cost,
                       x0=init_theta,
                       args=(X, y, 1),
                       method='TNC',
                       jac=regularized_gradient,
                       options={'maxiter': 400})
    return res

def accuracy(theta, X, y):
    _, _, _, _, h = feed_forward(res.x, X)
    y_pred = np.argmax(h, axis=1) + 1
    print(classification_report(y, y_pred))



# def plot_100_images(X):
#     """随机画100个数字"""
#     index = np.random.choice(range(5000), 100)  # 随机选择索引
#     images = X[index]            # 选择随机索引的像素值
#     fig, ax_array = plt.subplots(10, 10, sharey=True, sharex=True, figsize=(8, 8))  # 画出10列10行的图像 设定尺寸
#     for r in range(10):   # 遍历10行10列
#         for c in range(10):
#             ax_array[r, c].matshow(images[r*10 + c].reshape(20,20), cmap='gray_r')    # 每一幅图的尺寸都是20*20 像素点存在images中
#     plt.xticks([])  # 取消x轴坐标
#     plt.yticks([])
#     plt.show()



# ——————————————1.加载数据及可视化——————————————————
X,y = load_mat('ex4data1.mat')
# plot_100_images(X)

# ——————————————2.模型表示——————————————————
# 输入有400个单元 三层神经网络
# ——————————————2.1 读取数据——————————————————
raw_X, raw_y = load_mat('ex4data1.mat')
X = np.insert(raw_X, 0, 1, axis=1)  # 加一列x0 (5000, 401)
y = expand_y(raw_y)  #(5000, 10)

# ——————————————2.2 读取权重——————————————————
# 第二层25个元素
t1, t2 = load_weight('ex4weights.mat')  # ((25, 401), (10, 26))

# ——————————————2.3 展开参数——————————————————
theta = serialize(t1, t2)  # 扁平化参数,25*401+10*26=10285

# ——————————————3. 训练神经网络——————————————————
# gradient_checking(theta, X, y)#这个运行很慢,谨慎运行
res = nn_training(X, y)#慢
print (res)
# ——————————————4. 检验——————————————————
accuracy(res.x, X, raw_y)



你可能感兴趣的:(机器学习)