吴恩达Course 1 - 神经网络和深度学习 - 第三周作业 - 带有一个隐藏层的平面数据分类

import numpy as np
import matplotlib.pyplot as plt
import time
  • load_planar_dataset()-生成数据集; 返回X,Y;

def load_planar_dataset():
    np.random.seed(1)
    m = 400 # number of examples
    N = int(m/2) # number of points per class
    D = 2 # dimensionality
    X = np.zeros((m,D)) # data matrix where each row is a single example
    Y = np.zeros((m,1), dtype='uint8') # labels vector (0 for red, 1 for blue)
    a = 4 # maximum ray of the flower

    for j in range(2):
        ix = range(N*j,N*(j+1))
        t = np.linspace(j*3.12,(j+1)*3.12,N) + np.random.randn(N)*0.2 # theta
        r = a*np.sin(4*t) + np.random.randn(N)*0.2 # radius
        X[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
        Y[ix] = j

    X = X.T
    Y = Y.T

    return X, Y
  • init_parameter(num_of_unit, X)-随机初始化W; 零初始化B; 生成M, L; 返回W, B, M, L

def init_parameter(num_of_unit, X):
    W = []
    W.append(np.empty(1))
    for i in range(1, len(num_of_unit)):
        np.random.seed(1)
        W.append(np.random.randn(num_of_unit[i], num_of_unit[i - 1]) / np.sqrt(num_of_unit[i - 1]))
    
    B = []
    B.append(np.empty(1))
    for i in range(1, len(num_of_unit)):
        B.append(np.zeros((num_of_unit[i], 1)))
    
    M = X.shape[1]
    L = len(num_of_unit) - 1
    
    return W, B, M, L
  • derivative_of_activation(A, tag)-返回标签相关函数的导数

def derivative_of_activation(A, tag):
    if tag == "sigmoid":
        return A * (1 - A)
    elif tag == "tanh":
        return 1 - A * A
    elif tag == "relu":
        return np.clip((A > 0), 0, 1)
    else:
        return np.clip((A > 0), 0.01, 1)
  • 激活函数

def sigmoid(Z):
    return 1 / (1 + np.exp(-Z))

def relu(Z):
    return np.maximum(np.zeros(Z.shape), Z)

def leaky_relu(Z):
    return np.maximum(0.01 * Z, Z)
  • activation_function(Z)-激活函数

def activation_function(Z, tag):
    if tag == "sigmoid":
        return sigmoid(Z)
    elif tag == 'tanh':
        return np.tanh(Z)
    elif tag == 'relu':
        return relu(Z)
    else:
        return leaky_relu(Z)
  • predict(A, Y)-获取预测精度并返回

def predict(A, Y):
    return (1 - np.sum(np.abs(1 * (A > 0.5) - Y)) / Y.shape[1]) * 100
  • gradient_descent(X, Y, learning_rate, num_of_unit, iteration_time, tag)

-正向传播获取输出;反向传播获取梯度;执行梯度下降;每一百次迭代记录代价; 返回代价;

def gradient_descent(X, Y, learning_rate, num_of_unit, iteration_times, tag):
    W, B, M, L = init_parameter(num_of_unit, X)
    costs = []
    start = time.time()
    
    for iteration in range(iteration_times):
        
        '''Forward Propagation'''
        A = []
        A.append(X)
        for i in range(1, L + 1):
            Z = W[i] @ A[i - 1] + B[i]
            assert(Z.shape == (num_of_unit[i], M))
            if i == L:
                A.append(sigmoid(Z))
            else:
                A.append(activation_function(Z, tag))
            assert(A[i].shape == (num_of_unit[i], M))
        '''******************'''
        
        if iteration % 100 == 0:
            cost = (Y @ np.log(A[L].T) + (1 - Y) @ np.log(1 - A[L].T)).squeeze() / -M
            costs.append(cost)
        
        dW = {}
        dB = {}
        
        '''Backward Propagation'''
        for i in range(L, 0, -1):
            if i == L:
                dZ = A[L] - Y
            else:
                dA = W[i + 1].T @ dZ
                dZ = dA * derivative_of_activation(A[i], tag)
            dW[str(i)] = dZ @ A[i - 1].T / M
            dB[str(i)] = dZ.sum(axis=1, keepdims=True) / M
        '''********************'''
        
        '''*******Update*******'''
        for i in range(1, L + 1):
            W[i] -= learning_rate * dW[str(i)]
            B[i] -= learning_rate * dB[str(i)]
        '''********************'''
        
    print("Activation Function: " + tag.title() + 
          " | Learning Rate: " + str(learning_rate) + 
          "\nAccuracy: " + str(round(predict(A[L], Y), 2)) + "%" + 
          " | Runing Time: " + str(round(time.time() - start, 2)) + "seconds"
          "\n---------------------------------------------------------------")
    return costs
  • 主函数

X, Y = load_planar_dataset()
structures = [[2, 4, 1], [2, 8, 1], [2, 4, 2, 1], [2, 8, 4, 1]]
learning_rates = [[5.3, 4.6, 0.0075, 0.0075], [4.95, 4.3, 0.0075, 0.0075], 
                  [5.6, 1.91, 0.0075, 0.0075], [3.88, 0.74, 0.0075, 0.0075]]
activations = ["sigmoid", "tanh", "relu", "leaky_relu"]
iteration_times = 10000
Costs = []

for i in range(4): 
    cost_temp = []
    print("#Neural Network Structure: " + str(structures[i]) + "\n")
    for j in range(4):
        cost_temp.append(gradient_descent(X, Y, learning_rates[i][j], structures[i], iteration_times, activations[j]))
    Costs.append(cost_temp)

fig = plt.figure(figsize=(16, 9))
colors = ['r', 'g', 'b', 'c']
for i in range(4):
    ax = fig.add_subplot(2, 2, i + 1)
    ax.set_title("Structure:" + str(structures[i]))
    ax.set_xlabel("Iterations")
    ax.set_ylabel("Cost")
    ax.axis([0, 10000, 0, 0.8])
    pic = []
    for j in range(4):
        ax.plot(np.arange(0, 10000, 100), Costs[i][j], color = colors[j], linewidth = 3, label = activations[j])

fig.tight_layout()
plt.legend(bbox_to_anchor=(1.05, 0), loc=3, borderaxespad=0)
plt.show()

输出:

    #Neural Network Structure: [2, 4, 1]
    
    Activation Function: Sigmoid | Learning Rate: 5.3
    Accuracy: 91.25% | Runing Time: 1.0seconds
    ---------------------------------------------------------------
    Activation Function: Tanh | Learning Rate: 4.6
    Accuracy: 91.0% | Runing Time: 1.0seconds
    ---------------------------------------------------------------
    Activation Function: Relu | Learning Rate: 0.0075
    Accuracy: 59.0% | Runing Time: 0.93seconds
    ---------------------------------------------------------------
    Activation Function: Leaky_Relu | Learning Rate: 0.0075
    Accuracy: 58.0% | Runing Time: 0.81seconds
    ---------------------------------------------------------------
    #Neural Network Structure: [2, 8, 1]
    
    Activation Function: Sigmoid | Learning Rate: 4.95
    Accuracy: 90.75% | Runing Time: 1.33seconds
    ---------------------------------------------------------------
    Activation Function: Tanh | Learning Rate: 4.3
    Accuracy: 91.25% | Runing Time: 1.43seconds
    ---------------------------------------------------------------
    Activation Function: Relu | Learning Rate: 0.0075
    Accuracy: 73.25% | Runing Time: 1.14seconds
    ---------------------------------------------------------------
    Activation Function: Leaky_Relu | Learning Rate: 0.0075
    Accuracy: 73.0% | Runing Time: 1.14seconds
    ---------------------------------------------------------------
    #Neural Network Structure: [2, 4, 2, 1]
    
    Activation Function: Sigmoid | Learning Rate: 5.6
    Accuracy: 91.5% | Runing Time: 1.49seconds
    ---------------------------------------------------------------
    Activation Function: Tanh | Learning Rate: 1.91
    Accuracy: 92.0% | Runing Time: 1.54seconds
    ---------------------------------------------------------------
    Activation Function: Relu | Learning Rate: 0.0075
    Accuracy: 76.5% | Runing Time: 1.23seconds
    ---------------------------------------------------------------
    Activation Function: Leaky_Relu | Learning Rate: 0.0075
    Accuracy: 76.25% | Runing Time: 1.31seconds
    ---------------------------------------------------------------
    #Neural Network Structure: [2, 8, 4, 1]
    
    Activation Function: Sigmoid | Learning Rate: 3.88
    Accuracy: 91.0% | Runing Time: 2.08seconds
    ---------------------------------------------------------------
    Activation Function: Tanh | Learning Rate: 0.74
    Accuracy: 91.5% | Runing Time: 2.28seconds
    ---------------------------------------------------------------
    Activation Function: Relu | Learning Rate: 0.0075
    Accuracy: 88.5% | Runing Time: 1.5seconds
    ---------------------------------------------------------------
    Activation Function: Leaky_Relu | Learning Rate: 0.0075
    Accuracy: 87.75% | Runing Time: 1.61seconds
    ---------------------------------------------------------------

吴恩达Course 1 - 神经网络和深度学习 - 第三周作业 - 带有一个隐藏层的平面数据分类_第1张图片

分析:
1、层数较少时,sigmoid和tanh性能表现相似。
2、拟合非线性数据集,线性单元激活函数表现较差,就本数据集而言,相同学习率下relu函数与leaky relu表现相似n
3、适当增加神经网络每层单元数,会提高预测精确度,但收敛更缓慢
4、学习率是重要参数指标,若代价曲线波动较大,应适当缩小学习率,若收敛较慢,应适当增大

你可能感兴趣的:(深度学习)