吴恩达 机器学习课程 coursera 第四次编程作业(Neural Network Back Propagation) python实现

本文是吴恩达机器学习课程的第四次编程作业:Neural Network Back Propagation,用python实现。

 

ex4.py为主程序入口。

作业文件和训练集数据下载地址:https://github.com/toanoyx/MachineLearning-AndrewNg-coursera-python/tree/master/ex4%20NN%20back%20propagation

 

下文是文件的源代码:

ex4.py

from tensorflow.contrib import opt
from sklearn.metrics import classification_report
from loadData import *
from displayData import *
from feedForward import *
from nnCostFunction import *
from computeNumericalGradient import *
from checkNNGradients import *


""" 第1部分 可视化数据集 """
X, _ = loadData('ex4data1.mat')
displayData(X)
plt.show()

""" 第2部分 模型表示 """
X_raw, y_raw = loadData('ex4data1.mat', transpose=False)
X = np.insert(X_raw, 0, np.ones(X_raw.shape[0]), axis=1)


def expand_y(y):
    res = []
    for i in y:
        y_array = np.zeros(10)
        y_array[i - 1] = 1
        res.append(y_array)
    return np.array(res)


y = expand_y(y_raw)


def load_weight(path):
    data = sio.loadmat(path)
    return data['Theta1'], data['Theta2']


t1, t2 = load_weight('ex4weights.mat')

""" 第3部分 前向传播和代价函数 """
theta = np.concatenate((np.ravel(t1), np.ravel(t2)))
_, _, _, _, h = feedForward(theta, X)
print("cost function: " + str(nnCostFunction(theta, X, y)) + "(this should be 0.287629)")

""" 第4部分 正则化代价函数 """
t1, t2 = deserialize(theta)
m = X.shape[0]
l = 1
reg_t1 = (l / (2 * m)) * np.power(t1[:, 1:], 2).sum()
reg_t2 = (l / (2 * m)) * np.power(t2[:, 1:], 2).sum()
regularizedCost = nnCostFunction(theta, X, y) + reg_t1 + reg_t2
print("regularized cost function: " + str(regularizedCost) + "(this should be 0.383770)")

""" 第5部分 反向传播 """
sigmoid_gradient(0)
d1, d2 = deserialize(computeNumericalGradient(theta, X, y))

checkNNGradients(theta, X, y, epsilon= 0.0001)

checkNNGradients(theta, X, y, epsilon=0.0001, regularized=True)

""" 第6部分 """


def random_init(size):
    return np.random.uniform(-0.12, 0.12, size)


def nn_training(X, y):
    init_theta = random_init(10285)  # 25*401 + 10*26

    res = opt.minimize(fun=regularized_cost,
                       x0=init_theta,
                       args=(X, y, 1),
                       method='TNC',
                       jac=regularized_gradient,
                       options={'maxiter': 400})
    return res


res = nn_training(X, y)
print(str(res))
_, y_answer = loadData('ex4data1.mat')
print(str(y_answer[:20]))
final_theta = res.x


def show_accuracy(theta, X, y):
    _, _, _, _, h = feedForward(theta, X)
    y_pred = np.argmax(h, axis=1) + 1
    print(classification_report(y, y_pred))


def plot_hidden_layer(theta):
    final_theta1, _ = deserialize(theta)
    hidden_layer = final_theta1[:, 1:]  # ger rid of bias term theta

    fig, ax_array = plt.subplots(nrows=5, ncols=5, sharey=True, sharex=True, figsize=(5, 5))

    for r in range(5):
        for c in range(5):
            ax_array[r, c].matshow(hidden_layer[5 * r + c].reshape((20, 20)),
                                   cmap=matplotlib.cm.binary)
            plt.xticks(np.array([]))
            plt.yticks(np.array([]))


plot_hidden_layer(final_theta)
plt.show()

 

checkNNGradients.py
from nnCostFunction import *
from computeNumericalGradient import *


def checkNNGradients(theta, X, y, epsilon, regularized=False):
    def a_numeric_grad(plus, minus, regularized=False):
        if regularized:
            return (regularized_cost(plus, X, y) - regularized_cost(minus, X, y)) / (epsilon * 2)
        else:
            return (nnCostFunction(plus, X, y) - nnCostFunction(minus, X, y)) / (epsilon * 2)

    theta_matrix = expand_array(theta)  # expand to (10285, 10285)
    epsilon_matrix = np.identity(len(theta)) * epsilon

    plus_matrix = theta_matrix + epsilon_matrix
    minus_matrix = theta_matrix - epsilon_matrix

    numeric_grad = np.array([a_numeric_grad(plus_matrix[i], minus_matrix[i], regularized)
                             for i in range(len(theta))])

    analytic_grad = regularized_gradient(theta, X, y) if regularized else computeNumericalGradient(theta, X, y)
    diff = np.linalg.norm(numeric_grad - analytic_grad) / np.linalg.norm(numeric_grad + analytic_grad)

    print(
        'If your backpropagation implementation is correct,\nthe relative difference will be smaller than 10e-9 (assume epsilon=0.0001).\nRelative Difference: {}\n'.format(
            diff))


def regularized_cost(theta, X, y, l=1):
    t1, t2 = deserialize(theta)
    m = X.shape[0]
    reg_t1 = (l / (2 * m)) * np.power(t1[:, 1:], 2).sum()  # this is how you ignore first col
    reg_t2 = (l / (2 * m)) * np.power(t2[:, 1:], 2).sum()
    return nnCostFunction(theta, X, y) + reg_t1 + reg_t2


def expand_array(arr):
    return np.array(np.matrix(np.ones(arr.shape[0])).T @ np.matrix(arr))


def regularized_gradient(theta, X, y, l=1):
    m = X.shape[0]
    delta1, delta2 = deserialize(computeNumericalGradient(theta, X, y))
    t1, t2 = deserialize(theta)

    t1[:, 0] = 0
    reg_term_d1 = (l / m) * t1
    delta1 = delta1 + reg_term_d1

    t2[:, 0] = 0
    reg_term_d2 = (l / m) * t2
    delta2 = delta2 + reg_term_d2

    return np.concatenate((np.ravel(delta1), np.ravel(delta2)))

 

computeNumericalGradient.py
from feedForward import *


def computeNumericalGradient(theta, X, y):
    t1, t2 = deserialize(theta)  # t1: (25,401) t2: (10,26)
    m = X.shape[0]

    delta1 = np.zeros(t1.shape)  # (25, 401)
    delta2 = np.zeros(t2.shape)  # (10, 26)

    a1, z2, a2, z3, h = feedForward(theta, X)

    for i in range(m):
        a1i = a1[i, :]  # (1, 401)
        z2i = z2[i, :]  # (1, 25)
        a2i = a2[i, :]  # (1, 26)

        hi = h[i, :]  # (1, 10)
        yi = y[i, :]  # (1, 10)

        d3i = hi - yi  # (1, 10)

        z2i = np.insert(z2i, 0, np.ones(1))  # make it (1, 26) to compute d2i
        d2i = np.multiply(t2.T @ d3i, sigmoid_gradient(z2i))  # (1, 26)

        # careful with np vector transpose
        delta2 += np.matrix(d3i).T @ np.matrix(a2i)  # (1, 10).T @ (1, 26) -> (10, 26)
        delta1 += np.matrix(d2i[1:]).T @ np.matrix(a1i)  # (1, 25).T @ (1, 401) -> (25, 401)

    delta1 = delta1 / m
    delta2 = delta2 / m
    return np.concatenate((np.ravel(delta1), np.ravel(delta2)))

 

displayData.py
import numpy as np
import matplotlib.pyplot as plt
import matplotlib


def displayData(X):
    size = int(np.sqrt(X.shape[1]))

    sample_idx = np.random.choice(np.arange(X.shape[0]), 100)  # 100*400
    sample_images = X[sample_idx, :]

    fig, ax_array = plt.subplots(nrows=10, ncols=10, sharey=True, sharex=True, figsize=(8, 8))

    for r in range(10):
        for c in range(10):
            ax_array[r, c].matshow(sample_images[10 * r + c].reshape((size, size)),
                                   cmap=matplotlib.cm.binary)
            plt.xticks(np.array([]))
            plt.yticks(np.array([]))

 

feedForward.py
import numpy as np
from sigmoid import *


def feedForward(theta, X):
    t1, t2 = deserialize(theta)
    m = X.shape[0]
    a1 = X

    z2 = a1 @ t1.T
    a2 = np.insert(sigmoid(z2), 0, np.ones(m), axis=1)

    z3 = a2 @ t2.T
    h = sigmoid(z3)

    return a1, z2, a2, z3, h


def deserialize(seq):
    return seq[:25 * 401].reshape(25, 401), seq[25 * 401:].reshape(10, 26)

 

loadData.py
import scipy.io as sio
import numpy as np


def loadData(path, transpose=True):
    data = sio.loadmat(path)
    y = data.get('y')
    y = y.reshape(y.shape[0])
    X = data.get('X')

    if transpose:
        X = np.array([im.reshape((20, 20)).T for im in X])
        X = np.array([im.reshape(400) for im in X])

    return X, y

 

nnCostFunction.py
from feedForward import *


def nnCostFunction(theta, X, y):
    m = X.shape[0]
    _, _, _, _, h = feedForward(theta, X)

    pair_computation = -np.multiply(y, np.log(h)) - np.multiply((1 - y), np.log(1 - h))

    return pair_computation.sum() / m

 

sigmoid.py
import numpy as np


def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def sigmoid_gradient(z):
    return np.multiply(sigmoid(z), 1 - sigmoid(z))

 

你可能感兴趣的:(机器学习)