CS224d Assignment1 part2(Neural Network Basics)代码部分

在这里插入图片描述

import numpy as np

def sigmoid(x):
    """
    Compute the sigmoid function for the input here.
    """
    
    ### YOUR CODE HERE
    f=1/(1+np.exp(-x))   ##sigmoid函数定义
    ### END YOUR CODE
    
    return f

def sigmoid_grad(f):
    """
    Compute the gradient for the sigmoid function here. Note that
    for this implementation, the input f should be the sigmoid
    function value of your original input x. 
    """
    
    ### YOUR CODE HERE
    f=f*(1-f)   ###由(a)题可以得到sigmoid函数求导的简化形式
    ### END YOUR CODE
    
    return f

def test_sigmoid_basic():
    """
    Some simple tests to get you started. 
    Warning: these are not exhaustive.
    """
    print("Running basic tests...")
    x = np.array([[1, 2], [-1, -2]])
    f = sigmoid(x)
    g = sigmoid_grad(f)
    print(f)
    assert np.amax(f - np.array([[0.73105858, 0.88079708], 
        [0.26894142, 0.11920292]])) <= 1e-6
    print(g)
    assert np.amax(g - np.array([[0.19661193, 0.10499359],
        [0.19661193, 0.10499359]])) <= 1e-6
    print("You should verify these results!\n")


if __name__ == "__main__":
    test_sigmoid_basic();

在这里插入图片描述

梯度校验:
f ′ = l i m h → 0 f ( x + h ) − f ( x − h ) 2 ∗ h f^{'}=lim_{h\to{0}}\frac{f(x+h)-f(x-h)}{2*h} f=limh02hf(x+h)f(xh)

import numpy as np
import random

# First implement a gradient checker by filling in the following functions
def gradcheck_naive(f, x):
    """ 
    Gradient check for a function f 
    - f should be a function that takes a single argument and outputs the cost and its gradients
    - x is the point (numpy array) to check the gradient at
    """ 
   #设置种子,保证在梯度校验中,两次求f的随机值是一致的
    rndstate = random.getstate()
    random.setstate(rndstate)  
    fx, grad = f(x) # Evaluate function value at original point
    h = 1e-4

    # Iterate over all indexes in x
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        ix = it.multi_index

        ### try modifying x[ix] with h defined above to compute numerical gradients
        ### make sure you call random.setstate(rndstate) before calling f(x) each time, this will make it 
        ### possible to test cost functions with built in randomness later
        ### YOUR CODE HERE:
       # raise NotImplementedError
        ### END YOUR CODE
        x[ix] += h
        f_1 = f(x)[0]  # 返回函数值和梯度
        x[ix] -= 2 * h
        f_2 = f(x)[0]
        numgrad = (f_1 - f_2) / (2 * h)
        numgrad = np.sum(numgrad)  # 可能返回函数值是一个向量或者矩阵,直接求sum

        # Compare gradients
        reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix]))
        #grad[ix]就是计算的梯度在ix位置的值,而numgrad是根据梯度校验公式计算得到的梯度
        if reldiff > 1e-5:
            print("Gradient check failed.")
            print("First gradient error found at index %s" % str(ix))
            print("Your gradient: %f \t Numerical gradient: %f" % (grad[ix], numgrad))
            return
    
        it.iternext() # Step to next dimension

    print("Gradient check passed!")

def sanity_check():
    """
    Some basic sanity checks.
    """
    quad = lambda x: (np.sum(x ** 2), x * 2)

    print("Running sanity checks...")
    gradcheck_naive(quad, np.array(123.456))      # scalar test
    gradcheck_naive(quad, np.random.randn(3,))    # 1-D test
    gradcheck_naive(quad, np.random.randn(4,5))   # 2-D test




if __name__ == "__main__":
    sanity_check()

在这里插入图片描述

import numpy as np
import random

from q1_softmax import softmax
from q2_sigmoid import sigmoid, sigmoid_grad
from q2_gradcheck import gradcheck_naive

def forward_backward_prop(data, labels, params, dimensions):
    """ 
    Forward and backward propagation for a two-layer sigmoidal network 
    
    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    #前向传播
    #由(c)可知:
    Z1 = np.matmul(X, W1) + b1
    h = sigmoid(Z1)
    Z2 = np.matmul(h, W2) + b2
    y = softmax(Z2)
    cost = np.sum(-labels * np.log(y)) / X.shape[0]

    #反向传播
    #由(c)中的求导过程可知:
    grady = (y - labels) / X.shape[0]
    gradW2 = np.matmul(h.T, grady)
    gradb2 = np.sum(grady, axis=0, keepdims=True)
    gradh = np.matmul(grady, W2.T)
    gradh = sigmoid_grad(h) * gradh
    gradW1 = np.matmul(X.T, gradA1)
    gradb1 = np.sum(gradh, axis=0, keepdims=True)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))



    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 
        gradW2.flatten(), gradb2.flatten()))
    
    return cost, grad

def sanity_check():
    """
    Set up fake data and parameters for the neural network, and test using 
    gradcheck.
    """
    print("Running sanity check...")

    N = 20#unit 个数
    dimensions = [10, 5, 10]#维度
    data = np.random.randn(N, dimensions[0])   # each row will be a datum  输入
    labels = np.zeros((N, dimensions[2]))# 输出
    for i in range(N):
        labels[i,random.randint(0,dimensions[2]-1)] = 1#产生one-hot 矩阵
    
    params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (
        dimensions[1] + 1) * dimensions[2], )#网络参数

    gradcheck_naive(lambda params: forward_backward_prop(data, labels, params,
        dimensions), params)



if __name__ == "__main__":
    sanity_check()

你可能感兴趣的:(CS224)