ILearnDeepLearning.py/Numpy deep neural network.ipynb at master · SkalskiP/ILearnDeepLearning.py · GitHub 我根据这个文章的代码提取出一层节点训练5个坐标的超简单神经网络代码 第二象限点为0类,第一象限点为1类 import numpy as np def sigmoid(x): return 1.0/(1+np.exp(-x)) def sigmoid_backward( Z): sig = sigmoid(Z) return sig * (1 - sig) #第一个坐标的为负数归为0类,第一个坐标为正数的归为1类 X = np.array( [ [3.,5.], [4.,2.], [1.,1.], [-5.,4.], [-4.,2.] ] ) Y = np.array( [ 1,1,1,0,0 ] ) if __name__ == '__main__': n_samples = X.shape[0] print( X.T.shape ) W = np.array([[1.0, 1.0]]) b = np.array([[1.0]]) for i in range(100): #前向传播 Z = np.dot( W, X.T) + b A = sigmoid(Z) Y_hat = A #反向传播 dA = 2*( Y_hat - Y ) dZ = dA * sigmoid_backward( Z ) dW = np.dot(dZ, X) / n_samples db = np.sum(dZ, axis=1, keepdims=True) / n_samples #0.01是学习率,我认为是收敛的速度 W -= 0.01 * dW b -= 0.01 * db #print( dA ) continue # 测试数据 第一个是1类, 第二个是0类 第一个坐标数太靠近0,越容易判断错误 X = np.array([ [30., 5.], #第一个是1类 [-40, 2.] ]) #第二个是0类 Z = np.dot(W, X.T) + b A = sigmoid(Z) Y_hat = A print(Y_hat)
编程表达和数学上公式表达有差异,如果纯按数学公式编程很难得到正确预测结果 1、若Z'(A)dA=dZ(A);编程梯度下降时,dA表示为Z'(A),dZ(A)看成是1,不要用预测值减标注值:(Z^-Z) 即dA=Z'(A)*1,最后再乘以学习率就行了。 还有dA不要表示为dZ(A)/Z'(A),不要表示为(Z^-Z)/Z'(A),不要表示为Z'(A)*(Z^-Z) 2、若W*X+B=Z,则dW=dZ*X.T,X.T是X的转置矩阵。X一般是n行1列的。
下面是我纠正我代码后的样子,发现用△为0.000001的计算出的斜率值代替导数函数也可以得到类似的预测:
import numpy as np def sigmoid(x): return 1.0/(1+np.exp(-x)) def sigmoid_backward( Z): sig = sigmoid(Z) return np.multiply( sig , (1 - sig) ) def square(x): return np.multiply( x, x ) def derivative( func , x, d=0.000001): return (func(x+d)-func(x)) / d x= [ [[3.],[5.]], [[4.],[2.]], [[2.],[3.]], [[1.],[1.]], [[-1.],[5.]], [[-5.],[4.]], [[-4.],[2.]], [[-3.2],[2.5]] ] type= [ [[1.],[0.]], [[1.],[0.]], [[1.],[0.]], [[1.],[0.]], [[0.],[1.]], [[0.],[1.]], [[0.],[1.]], [[0.],[1.]] ] x= [ [[3.],[5.]], [[-1.],[5.]] ] type= [ [[1.],[0.]], [[0.],[1.]] ] A=[ [1.,1.], [1.,1.] ] B=[ [1.],[1.] ] if __name__ == '__main__': count = len(x) xList= x yList= type for i in range(10000): oList = [] ypList = [] for x in xList: x = np.matrix(x) o = np.matrix(A)*(x)+B oList.append( o ) yp= sigmoid(o) ypList.append( yp ) E = 0 M = 0 for index ,yp in enumerate(ypList): k = np.multiply( derivative(square, yp - yList[index], 0.0001), #2*(yp - yList[index]) , #derivative(square, yp - yList[index], 0.0001), derivative( sigmoid, oList[index], 0.0001 ) ) #sigmoid_backward( oList[index] ) ) #derivative( sigmoid, oList[index], 0.0001 )) m = k #np.multiply(k, yList[index] - yp) M = M + m E = E + ( m*( np.matrix(xList[index]).T ) ) E = E/count M = M/count A -= np.multiply( 0.1, E ) B -= np.multiply( 0.1, M ) #print( ypList ) oList = [] ypList = [] for x in xList: x = np.matrix(x) o = A * x + B oList.append(o) yp = sigmoid(o) ypList.append(yp) print( ypList )