多层神经网络实现反向传播

from planar_utils import *
import matplotlib.pyplot as plt
import numpy as np
import copy
def layer_build():
    layer = [2, 4,1]
    return layer

def data_preprocess(X):
    X = (X-np.mean(X)/np.std(X))
    return X

def init_parameter():
    np.random.seed(1)
    layers = layer_build()
    W = [0]         # 第零层的权重,在这里占位,运算的时候不用
    B = [0]         # 第零层的权重,在这里占位,运算的时候不用
    for i in range(1, len(layers)):
        w = np.random.randn(layers[i], layers[i-1])*0.01
        W.append(w)
        b = np.zeros(shape=(layers[i], 1))
        B.append(b)
    parameter = {'W':W, 'B':B}
    return parameter, layers

def sigmoid(z):
    return 1/(1+np.exp(-z))

def forward_propagation(layers, A, Z, W, B):
    for layer in range(1, len(layers)):
        if layer < len(layers) - 1:
            Z[layer] = np.dot(W[layer], A[layer - 1]) + B[layer]
            A[layer] = np.tanh(Z[layer])
        else:
            Z[layer] = np.dot(W[layer], A[layer - 1]) + B[layer]
            A[layer] = sigmoid(Z[layer])
    return A, Z

def back_propagation(m, layers, A, W, dW, db, dZ, Y):
    for layer in range(len(layers) - 1, 0, -1):
        if layer == len(layers) - 1:
            dZ[layer] = A[-1] - Y
            dW[layer] = np.dot(dZ[layer], A[layer - 1].T) / m
            db[layer] = (1 / m) * np.sum(dZ[layer], axis=1, keepdims=True)
        else:
            dZ[layer] = np.multiply(np.dot(W[layer + 1].T, dZ[layer + 1]), 1 - A[layer] ** 2)
            dW[layer] = np.dot(dZ[layer], A[layer - 1].T) / m
            db[layer] = (1 / m) * np.sum(dZ[layer], axis=1, keepdims=True)
    return dW, db

def relu(z):
    return np.maximum(0, z)

def training():
    X, Y = load_planar_dataset()
    X = data_preprocess(X)
    x_shape = X.shape
    y_shape = Y.shape
    m = x_shape[1]              # 样本数
    parameter, layers = init_parameter()
    W = parameter['W']
    B = parameter['B']
    learning_rate = 0.01
    A = [1] * len(layers)          # 初始化A
    Z = copy.deepcopy(A)
    dZ = copy.deepcopy(A)
    dW = copy.deepcopy(A)
    db = copy.deepcopy(A)
    A[0] = X
    iter_times = 50000
    loss = []
    for i in range(iter_times):
        # 正向传播
        A, Z = forward_propagation(layers, A, Z, W, B)
        J = -1/m*np.sum((Y*np.log(A[-1]) + (1-Y)*np.log(1-A[-1])))
        if i%1000==0:
            loss.append(J)
            print('第{0}次迭代的损失为:{1}'.format(i, J))
        # 反向传播
        dW, db = back_propagation(m, layers, A, W, dW, db, dZ, Y)
        # 更新参数
        for i in range(1, len(layers)):
            W[i] -= learning_rate*dW[i]
            B[i] -= learning_rate*db[i]

    # predict
    A, Z = forward_propagation(layers, A, Z, W, B)
    predict = A[-1]>0.5
    correct = np.sum(predict==Y)
    print(correct, correct/m)
    plt.plot(loss)
    plt.show()
    # for layer in range(1, len(layers)):
    #     print('W{0}:{1}'.format(layer,W[layer]))
    #     print('B{0}:{1}'.format(layer,B[layer]))

if __name__ == '__main__':
    training()

命名为planar_utils.py

import matplotlib.pyplot as plt
import numpy as np
import sklearn
import sklearn.datasets
import sklearn.linear_model

def plot_decision_boundary(model, X, y):
    # Set min and max values and give it some padding
    x_min, x_max = X[0, :].min() - 1, X[0, :].max() + 1
    y_min, y_max = X[1, :].min() - 1, X[1, :].max() + 1
    h = 0.01
    # Generate a grid of points with distance h between them
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    # Predict the function value for the whole grid
    Z = model(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    # Plot the contour and training examples
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    plt.ylabel('x2')
    plt.xlabel('x1')
    plt.scatter(X[0, :], X[1, :], c=y, cmap=plt.cm.Spectral)


def sigmoid(x):
    s = 1/(1+np.exp(-x))
    return s

def load_planar_dataset():
    np.random.seed(1)
    m = 400 # number of examples
    N = int(m/2) # number of points per class
    D = 2 # dimensionality
    X = np.zeros((m,D)) # data matrix where each row is a single example
    Y = np.zeros((m,1), dtype='uint8') # labels vector (0 for red, 1 for blue)
    a = 4 # maximum ray of the flower

    for j in range(2):
        ix = range(N*j,N*(j+1))
        t = np.linspace(j*3.12,(j+1)*3.12,N) + np.random.randn(N)*0.2 # theta
        r = a*np.sin(4*t) + np.random.randn(N)*0.2 # radius
        X[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
        Y[ix] = j

    X = X.T
    Y = Y.T

    return X, Y

def load_extra_datasets():  
    N = 200
    noisy_circles = sklearn.datasets.make_circles(n_samples=N, factor=.5, noise=.3)
    noisy_moons = sklearn.datasets.make_moons(n_samples=N, noise=.2)
    blobs = sklearn.datasets.make_blobs(n_samples=N, random_state=5, n_features=2, centers=6)
    gaussian_quantiles = sklearn.datasets.make_gaussian_quantiles(mean=None, cov=0.5, n_samples=N, n_features=2, n_classes=2, shuffle=True, random_state=None)
    no_structure = np.random.rand(N, 2), np.random.rand(N, 2)

    return noisy_circles, noisy_moons, blobs, gaussian_quantiles, no_structure

 

你可能感兴趣的:(机器学习)