反向传播求导

(教材)






其中*表示逐元素乘积
向量化后:





(自己推导)
ReLU:

class Relu(AbstractLayer):
    def __init__(self):
        self.mask = None

    def forward(self, X):
        self.mask = (X <= 0)
        out = X.copy()
        out[self.mask] = 0
        return out

    def backward(self, dY):
        dX = dY
        dX[self.mask] = 0
        return dX

Sigmoid:

class Sigmoid(AbstractLayer):
    def __init__(self):
        self.Y = None

    def forward(self, X):
        self.Y = 1. / (1. + np.exp(-X))
        return self.Y

    def backward(self, dY):
        return dY * (1 - self.Y) * self.Y

Affine:
init: W \in R ^{h \times n},b \in R ^{h \times 1} \\ forward: Y \in R^{h \times m}=Y(W,b,X\in R^{n \times m})=W \cdot {X} + b \\ backward: \left\{\begin{array}{lcl} dW=dW(dY,X)=dY\cdot{X^T}\\ db=db(dY)=\sum_{axis=1}dY\\ dX=dX(W,dY)=W^T\cdot{dY}\\ \end{array} \right.

class Affine(AbstractLayer):
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.Y = None
        self.X = None
        self.m = None

    def forward(self, X):
        self.X = X
        self.m = X.shape[1]
        self.Y = np.dot(self.W, X) + self.b
        return self.Y

    def backward(self, dY):
        dW = np.dot(dY, self.X.T)
        db = np.sum(dY, axis=1, keepdims=True)
        dX = np.dot(self.W.T, dY)
        return dW, db, dX

LogCost:
forward: L = L(\hat{Y},Y) = -\frac{1}{m}\sum_{i,j}(Y^{(i,j)}*\log{\hat{Y}^{(i,j)}} + (1-Y^{(i,j)})*\log{(1-\hat{Y}^{(i,j)})}) \\ backward: d\hat{Y}=d\hat{Y}(\hat{Y},Y)=-\frac{1}{m}(\frac{Y}{\hat{Y}}-\frac{1-Y}{1-\hat{Y}})

class LogCost(CostLayer):

    def forward(self, Y_hat, Y):
        m = Y.shape[1]
        return -(1. / m) * np.sum(Y * np.log(Y_hat) + (1. - Y) * np.log(1. - Y_hat))

    def backward(self, Y_hat, Y):
        m = Y.shape[1]
        return -(1./m)*(Y / Y_hat - (1. - Y) / (1. - Y_hat))

你可能感兴趣的:(反向传播求导)