BP神经网络公式推导

BP神经网络.PNG

正向传播

第1层
公式1

公式2
\begin{bmatrix} {w_{1,1}^1}&{w_{1,2}^1}&{\cdots}&{w_{1,n_0}^1}&{b_1^1}\\ {w_{2,1}^1}&{w_{2,2}^1}&{\cdots}&{w_{2,n_0}^1}&{b_2^1}\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}&{\vdots}\\ {w_{n_1,1}^1}&{w_{n_1,2}^1}&{\cdots}&{w_{n_1,n_0}^1}&{b_{n_1}^1}\\ 0&0&{\cdots}&0&1\\ \end{bmatrix} \begin{bmatrix} {h_1^0}\\{h_2^0}\\{\vdots}\\{h_{n_0}^0}\\1\\ \end{bmatrix} = \begin{bmatrix} {i_{1}^{1}}\\{i_{2}^{1}}\\{\vdots}\\{i_{n_{1}}^{1}}\\1\\ \end{bmatrix}
公式3

公式4

第2层
公式5

公式6
\begin{bmatrix} {w_{1,1}^2}&{w_{1,2}^2}&{\cdots}&{w_{1,n_1}^2}&{b_1^2}\\ {w_{2,1}^2}&{w_{2,2}^2}&{\cdots}&{w_{2,n_1}^2}&{b_2^2}\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}&{\vdots}\\ {w_{n_2,1}^2}&{w_{n_2,2}^2}&{\cdots}&{w_{n_2,n_1}^2}&{b_{n_2}^2}\\ 0&0&{\cdots}&0&1\\ \end{bmatrix} \begin{bmatrix} {h_1^1}\\{h_2^1}\\{\vdots}\\{h_{n_1}^1}\\1\\ \end{bmatrix} = \begin{bmatrix} {i_{1}^2}\\{i_2^2}\\{\vdots}\\{i_{n_2}^2}\\1\\ \end{bmatrix}
公式7

公式8

第m层
公式9

公式10
\begin{bmatrix} {w_{1,1}^m}&{w_{1,2}^m}&{\cdots}&{w_{1,n_{m-1}}^m}&{b_1^m}\\ {w_{2,1}^m}&{w_{2,2}^m}&{\cdots}&{w_{2,n_{m-1}}^m}&{b_2^m}\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}&{\vdots}\\ {w_{n_m,1}^m}&{w_{n_m,2}^m}&{\cdots}&{w_{n_m,n_{m-1}}^m}&{b_{n_m}^m}\\ 0&0&{\cdots}&0&1\\ \end{bmatrix} \begin{bmatrix} {h_1^{m-1}}\\{h_2^{m-1}}\\{\vdots}\\{h_{n_{m-1}}^{m-1}}\\1\\ \end{bmatrix} = \begin{bmatrix} {i_{1}^m}\\{i_2^m}\\{\vdots}\\{i_{n_m}^m}\\1\\ \end{bmatrix}
公式11

公式12

反向传播

期望输出

公式13

误差

公式14

公式15

公式16
\frac{\partial H_{n_m}^m}{\partial I_{n_m}^m} = \begin{bmatrix} {\frac{\partial h_1^m}{\partial i_1^m}}&0&0&0\\ 0&\frac{\partial h_2^m}{\partial i_2^m}&0&0\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}\\ 0&0&0&{\frac{\partial h_{n_m}^m}{\partial i_{n_m}^m}}\\ \end{bmatrix} = \begin{bmatrix} {h_1^m(1-h_1^m)}&0&0&0\\ 0&{h_2^m(1-h_2^m)}&0&0\\ {\vdots}&{\vdots}&{\ddots}{\vdots}\\ 0&0&0&{h_{n_m}^m(1-h_{n_m}^m)}\\ \end{bmatrix}

权重偏导

公式17
\begin{bmatrix} \Delta W_{n_m,n_{m-1}}^m&\Delta B_{n_m}^m \end{bmatrix} = \begin{bmatrix} \frac{\partial E_{total}}{\partial w_{1,1}^m}&\frac{\partial E_{total}}{\partial w_{1,2}^m}&{\cdots}&\frac{\partial E_{total}}{\partial w_{1,n_{m-1}}^m}&\frac{\partial E_{total}}{\partial b_1^m}\\ \frac{\partial E_{total}}{\partial w_{2,1}^m}&\frac{\partial E_{total}}{\partial w_{2,2}^m}&{\cdots}&\frac{\partial E_{total}}{\partial w_{2,n_{m-1}}^m}&\frac{\partial E_{total}}{\partial b_2^m}\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}&{\vdots}\\ \frac{\partial E_{total}}{\partial w_{n_m,1}^m}&\frac{\partial E_{total}}{\partial w_{n_m,2}^m}&{\cdots}&\frac{\partial E_{total}}{\partial w_{n_m,n_{m-1}}^m}&\frac{\partial E_{total}}{\partial b_{n_m}^m}\\ \end{bmatrix}
公式18
\begin{bmatrix} \Delta W_{n_m,n_{m-1}}^m&\Delta B_{n_m}^m \end{bmatrix} = \begin{bmatrix} \frac{\partial E_{total}}{\partial i_1^m}\frac{\partial i_1^m}{\partial w_{1,1}^m}&\frac{\partial E_{total}}{\partial i_1^m}\frac{\partial i_1^m}{\partial w_{1,2}^m}&{\cdots}&\frac{\partial E_{total}}{\partial i_1^m}\frac{\partial i_1^m}{\partial w_{1,n_{m-1}}^m}&\frac{\partial E_{total}}{\partial i_1^m}\frac{\partial i_1^m}{\partial b_1^m}\\ \frac{\partial E_{total}}{\partial i_2^m}\frac{\partial i_2^m}{\partial w_{2,1}^m}&\frac{\partial E_{total}}{\partial i_2^m}\frac{\partial i_2^m}{\partial w_{2,2}^m}&{\cdots}&\frac{\partial E_{total}}{\partial i_2^m}\frac{\partial i_2^m}{\partial w_{2,n_{m-1}}^m}&\frac{\partial E_{total}}{\partial i_2^m}\frac{\partial i_2^m}{\partial b_2^m}\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}&{\vdots}\\ \frac{\partial E_{total}}{\partial i_{n_m}^m}\frac{\partial i_{n_m}^m}{\partial w_{n_m,1}^m}&\frac{\partial E_{total}}{\partial i_{n_m}^m}\frac{\partial i_{n_m}^m}{\partial w_{n_m,2}^m}&{\cdots}&\frac{\partial E_{total}}{\partial i_{n_m}^m}\frac{\partial i_{n_m}^m}{\partial w_{n_m,n_{m-1}}^m}&\frac{\partial E_{total}}{\partial i_{n_m}^m}\frac{\partial i_{n_m}^m}{\partial b_{n_m}^m}\\ \end{bmatrix}
公式19
\begin{bmatrix} \Delta W_{n_m,n_{m-1}}^m&\Delta B_{n_m}^m \end{bmatrix} = \begin{bmatrix} \frac{\partial E_{total}}{\partial i_1^m}h_1^{m-1}&\frac{\partial E_{total}}{\partial i_1^m}h_2^{m-1}&{\cdots}&\frac{\partial E_{total}}{\partial i_1^m}h_{n_{m-1}}^{m-1}&\frac{\partial E_{total}}{\partial i_1^m}1\\ \frac{\partial E_{total}}{\partial i_2^m}h_1^{m-1}&\frac{\partial E_{total}}{\partial i_2^m}h_2^{m-1}&{\cdots}&\frac{\partial E_{total}}{\partial i_2^m}h_{n_{m-1}}^{m-1}&\frac{\partial E_{total}}{\partial i_2^m}1\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}&{\vdots}\\ \frac{\partial E_{total}}{\partial i_{n_m}^m}h_1^{m-1}&\frac{\partial E_{total}}{\partial i_{n_m}^m}h_2^{m-1}&{\cdots}&\frac{\partial E_{total}}{\partial i_{n_m}^m}h_{n_{m-1}}^{m-1}&\frac{\partial E_{total}}{\partial i_{n_m}^m}1\\ \end{bmatrix}
公式20
\begin{bmatrix} \Delta W_{n_m,n_{m-1}}^m&\Delta B_{n_m}^m \end{bmatrix} = \begin{bmatrix} \frac{\partial E_{total}}{\partial i_1^m}\\ \frac{\partial E_{total}}{\partial i_2^m}\\ {\vdots}\\ \frac{\partial E_{total}}{\partial i_{n_m}^m}\\ \end{bmatrix} \begin{bmatrix} {h_1^{m-1}}&{h_2^{m-1}}&{\dots}&{h_{n_{m-1}}^{m-1}}&1\\ \end{bmatrix}
公式21
\begin{bmatrix} \Delta W_{n_m,n_{m-1}}^m&\Delta B_{n_m}^m \end{bmatrix} = \frac{\partial E_{total}}{\partial I_{n_m}^m} \begin{bmatrix} [H_{n_{m-1}}^{m-1}]^T&1\\ \end{bmatrix} = \frac{\partial H_{n_m}^m}{\partial I_{n_m}^m} \frac{\partial E_{total}}{\partial H_{n_m}^m} \begin{bmatrix} [H_{n_{m-1}}^{m-1}]^T&1\\ \end{bmatrix}
公式22
\frac{\partial I_{n_m}^m}{\partial H_{n_{m-1}}^{m-1}} = \begin{bmatrix} {\frac{\partial i_1^m}{\partial h_1^{m-1}}}&\frac{\partial i_2^m}{\partial h_1^{m-1}}&{\dots}&{\frac{\partial i_{n_m}^m}{\partial h_1^{m-1}}}\\ {\frac{\partial i_1^m}{\partial h_2^{m-1}}}&\frac{\partial i_2^m}{\partial h_2^{m-1}}&{\dots}&{\frac{\partial i_{n_m}^m}{\partial h_2^{m-1}}}\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}\\ {\frac{\partial i_1^m}{\partial h_{n_{m-1}}^{m-1}}}&\frac{\partial i_2^m}{\partial h_{n_{m-1}}^{m-1}}&{\dots}&{\frac{\partial i_{n_m}^m}{\partial h_{n_{m-1}}^{m-1}}}\\ \end{bmatrix}_{n_{m-1},n_m} = \begin{bmatrix} {w_{1,1}^m}&{w_{1,2}^m}&{\cdots}&{w_{1,n_{m-1}}^m}\\ {w_{2,1}^m}&{w_{2,2}^m}&{\cdots}&{w_{2,n_{m-1}}^m}\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}\\ {w_{n_m,1}^m}&{w_{n_m,2}^m}&{\cdots}&{w_{n_m,n_{m-1}}^m}\\ \end{bmatrix}^T =[W_{n_m,n_{m-1}}^m]^T
公式23


公式24
\frac{\partial H_{n_{m-1}}^{m-1}}{\partial I_{n_{m-1}}^{m-1}} = \begin{bmatrix} {h_1^{m-1}(1-h_1^{m-1})}&0&0&0\\ 0&{h_2^{m-1}(1-h_2^{m-1})}&0&0\\ {\vdots}&{\vdots}&{\ddots}{\vdots}\\ 0&0&0&{h_{n_{m-1}}^{m-1}(1-h_{n_{m-1}}^{m-1})}\\ \end{bmatrix}
公式25
\begin{bmatrix} \Delta W_{n_{m-1},n_{m-2}}^{m-1}&\Delta B_{n_{m-1}}^{m-1} \end{bmatrix} = \frac{\partial H_{n_{m-1}}^{m-1}}{\partial I_{n_{m-1}}^{m-1}} \frac{\partial E_{total}}{\partial H_{n_{m-1}}^{m-1}} \begin{bmatrix} [H_{n_{m-2}}^{m-2}]^T&1\\ \end{bmatrix}
公式26

公式27
\frac{\partial E_{total}}{\partial H_{n_{m-2}}^{m-2}} = \frac{\partial I_{n_{m-1}}^{m-1}}{\partial H_{n_{m-2}}^{m-2}} \frac{\partial H_{n_{m-1}}^{m-1}}{\partial I_{n_{m-1}}^{m-1}} \frac{\partial E_{total}}{\partial H_{n_{m-1}}^{m-1}}


公式28
\frac{\partial H_{n_{m-2}}^{m-2}}{\partial I_{n_{m-2}}^{m-2}} = \begin{bmatrix} {\frac{\partial h_1^{m-2}}{\partial i_1^{m-2}}}&0&0&0\\ 0&\frac{\partial h_2^{m-2}}{\partial i_2^{m-2}}&0&0\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}\\ 0&0&0&{\frac{\partial h_{n_{m-2}}^{m-2}}{\partial i_{n_{m-2}}^{m-2}}}\\ \end{bmatrix} = \begin{bmatrix} {h_1^{m-2}(1-h_1^{m-2})}&0&0&0\\ 0&{h_2^{m-2}(1-h_2^{m-2})}&0&0\\ {\vdots}&{\vdots}&{\ddots}&{\vdots}\\ 0&0&0&{h_{n_{m-2}}^{m-2}(1-h_{n_{m-2}}^{m-2})}\\ \end{bmatrix}
公式29
\begin{bmatrix} \Delta W_{n_{m-2},n_{m-3}}^{m-2}&\Delta B_{n_{m-2}}^{m-2} \end{bmatrix} = \frac{\partial H_{n_{m-2}}^{m-2}}{\partial I_{n_{m-2}}^{m-2}} \frac{\partial E_{total}}{\partial H_{n_{m-2}}^{m-2}} \begin{bmatrix} [H_{n_{m-3}}^{m-3}]^T&1\\ \end{bmatrix}


你可能感兴趣的:(BP神经网络公式推导)