一层节点训练5个坐标的超简单神经网络代码

ILearnDeepLearning.py/Numpy deep neural network.ipynb at master · SkalskiP/ILearnDeepLearning.py · GitHub
我根据这个文章的代码提取出一层节点训练5个坐标的超简单神经网络代码
第二象限点为0类,第一象限点为1类

import numpy as np


def sigmoid(x):
    return 1.0/(1+np.exp(-x))

def sigmoid_backward( Z):
    sig = sigmoid(Z)
    return sig * (1 - sig)

#第一个坐标的为负数归为0类,第一个坐标为正数的归为1类
X = np.array( [  [3.,5.], [4.,2.], [1.,1.], [-5.,4.], [-4.,2.]  ] )
Y = np.array( [ 1,1,1,0,0 ] )
if __name__ == '__main__':
    n_samples = X.shape[0]
    print( X.T.shape )
    W = np.array([[1.0, 1.0]])
    b = np.array([[1.0]])

    for i in range(100):
        #前向传播
        Z = np.dot( W, X.T) + b
        A = sigmoid(Z)
        Y_hat = A
        #反向传播
        dA = 2*( Y_hat - Y )
        dZ = dA * sigmoid_backward( Z )
        dW = np.dot(dZ, X) / n_samples
        db = np.sum(dZ, axis=1, keepdims=True) / n_samples

        #0.01是学习率,我认为是收敛的速度
        W -= 0.01 * dW
        b -= 0.01 * db

        #print( dA )

        continue

    #   测试数据   第一个是1类, 第二个是0类  第一个坐标数太靠近0,越容易判断错误
    X = np.array([ [30., 5.], #第一个是1类
                   [-40, 2.] ]) #第二个是0类
    Z = np.dot(W, X.T) + b
    A = sigmoid(Z)
    Y_hat = A
    print(Y_hat)

编程表达和数学上公式表达有差异,如果纯按数学公式编程很难得到正确预测结果
1、若Z'(A)dA=dZ(A);编程梯度下降时,dA表示为Z'(A),dZ(A)看成是1,不要用预测值减标注值:(Z^-Z)
即dA=Z'(A)*1,最后再乘以学习率就行了。
还有dA不要表示为dZ(A)/Z'(A),不要表示为(Z^-Z)/Z'(A),不要表示为Z'(A)*(Z^-Z)
2、若W*X+B=Z,则dW=dZ*X.T,X.T是X的转置矩阵。X一般是n行1列的。

下面是我纠正我代码后的样子,发现用△为0.000001的计算出的斜率值代替导数函数也可以得到类似的预测:

import numpy as np


def sigmoid(x):
    return 1.0/(1+np.exp(-x))

def sigmoid_backward( Z):
    sig = sigmoid(Z)
    return np.multiply( sig , (1 - sig) )

def square(x):
    return np.multiply( x, x )

def derivative( func , x, d=0.000001):
    return (func(x+d)-func(x)) / d

x= [ [[3.],[5.]], [[4.],[2.]], [[2.],[3.]], [[1.],[1.]], [[-1.],[5.]], [[-5.],[4.]], [[-4.],[2.]], [[-3.2],[2.5]]   ]
type= [ [[1.],[0.]], [[1.],[0.]], [[1.],[0.]], [[1.],[0.]],  [[0.],[1.]], [[0.],[1.]], [[0.],[1.]], [[0.],[1.]]  ]

x= [ [[3.],[5.]], [[-1.],[5.]] ]
type= [ [[1.],[0.]],   [[0.],[1.]]  ]

A=[ [1.,1.], [1.,1.]  ]
B=[ [1.],[1.] ]

if __name__ == '__main__':
    count = len(x)
    xList= x
    yList= type

    for i in range(10000):
        oList = []
        ypList = []
        for x in xList:
            x = np.matrix(x)
            o = np.matrix(A)*(x)+B
            oList.append( o )
            yp= sigmoid(o)

            ypList.append( yp )

        E = 0
        M = 0
        for  index ,yp in enumerate(ypList):
            k = np.multiply( derivative(square, yp - yList[index], 0.0001), #2*(yp - yList[index])  ,                  #derivative(square, yp - yList[index], 0.0001),
                             derivative( sigmoid, oList[index], 0.0001 ) ) #sigmoid_backward( oList[index] ) )
                            #derivative( sigmoid, oList[index], 0.0001 ))
            m = k #np.multiply(k, yList[index] - yp)
            M = M + m
            E = E + ( m*( np.matrix(xList[index]).T ) )

        E = E/count
        M = M/count

        A -= np.multiply( 0.1, E )
        B -= np.multiply( 0.1, M )
        #print( ypList )

    oList = []
    ypList = []
    for x in xList:
        x = np.matrix(x)
        o = A * x + B
        oList.append(o)
        yp = sigmoid(o)

        ypList.append(yp)

    print( ypList )

你可能感兴趣的:(神经网络,神经网络,numpy)