CNN中的卷积和池化运算

Zero-Padding

X是一个4维Tensor,(m, n_H, n_W, n_C),对n_Hn_W维度填充0,返回X_pad

def zero_pad(X, pad):
    X_pad = np.pad( X, ((0, 0),(pad, pad),(pad, pad),(0, 0)), 'constant', constant_values=0 )
    return X_pad

卷积前向传播

运算:Z = conv( A_prev, W ) + b

A_prev.shape = (m, n_H_prev, n_W_prev, n_C_prev)
W.shape = (f, f, n_C_prev, n_C) # 共n_C个filter
b.shape = (1, 1, 1, n_C)
Z.shape = (m, n_H, n_W, n_C)

def conv_forward( A_prev, W, b, hparameters ):

    # 获取Tensor的各个维度
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    (f, f, n_C_prev, n_C) = W.shape

    # 获取超参数
    stride = hparameters['stride']
    pad = hparameters['pad']

    # 计算卷积结果空间上的维度
    n_H = int( ( n_H_prev + 2 * pad - f ) / stride ) + 1
    n_W = int( ( n_W_prev + 2 * pad - f ) / stride ) + 1

    # 初始化卷积结果
    Z = np.zeros( (m, n_H, n_W, n_C) )

    # 执行Zero Padding
    A_prev_temp = A_prev    # 暂存,用于backprop
    A_prev = zero_pad( A_prev, pad )

    for i in range(m):          # 遍历m个样本

        for h in range(n_H):    # 遍历空间上的所有位置
            for w in range(n_W):

                for c in range(n_C):    # 遍历n_C个filter

                    # 定位A_prev上的slice
                    h_start, w_start = h * stride, w * stride
                    h_end, w_end = h_start + f, w_start + f

                    A_prev_slice = A_prev[i, h_start:h_end, w_start:w_end, :]

                    # 执行卷积运算
                    # A_prev_slice和W[:, :, :, c]均为3维Tensor
                    Z[i, h, w, c] = np.sum( A_prev_slice * W[:, :, :, c] + b[:, :, :, c] )


    cache = (A_prev_temp, W, b, hparameters)    # 缓存,用于backprop

    return Z, cache

池化前向传播

运算:A = pool( A_prev )

A_prev.shape = (m, n_H_prev, n_W_prev, n_C_prev)
A.shape = (m, n_H, n_W, n_C)
池化参数:fxf,stride

注:将池化运算看做Z = pool( A_prev )(地位相当于Z = W * A_prev + b),A = Z(相当于线型激活函数),有助于理解反向传播

def pool_forward( A_prev, hparameters, mode='max' ):

    # 获取A_prev的各个维度
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape

    # 获取超参数
    f = hparameters['f']
    stride = hparameters['stride']

    # 计算池化结果的维度
    n_H = int( ( n_H_prev - f ) / stride ) + 1
    n_W = int( ( n_W_prev - f ) / stride ) + 1
    n_C = n_C_prev

    # 初始化池化结果
    A = np.zeros( (m, n_H, n_W, n_C) )

    for i in range(m):          # 遍历m个样本

        for h in range(n_H):    # 遍历空间上的所有位置
            for w in range(n_W):

                for c in range (n_C):   # 遍历n_C个filter

                    # 定位A_prev上的slice
                    h_start, w_start = h * stride, w * stride
                    h_end, w_end = h_start + f, w_start + f

                    A_prev_slice = A_prev[i, h_start:h_end, w_start:w_end, c]

                    # 执行池化运算
                    if mode == 'max':
                        A[i, h, w, c] = np.max( A_prev_slice )
                    elif mode == 'average':
                        A[i, h, w, c] = np.mean( A_prev_slice )


    cache = (A_prev, hparameters)   # 缓存,用于backprop

    return A, cache

【Tips】卷积操作中,A_prev_slice = A_prev[i, h_start:h_end, w_start:w_end, :],选取了所有channel,池化操作中,A_prev_slice = A_prev[i, h_start:h_end, w_start:w_end, c],选取当前channel

卷积反向传播

前向传播关键代码:Z[i, h, w, c] = np.sum( A_prev_slice * W[:, :, :, c] + b[:, :, :, c] )

反向传播关键代码:dA_prev_slice += W[:, :, :, c] * dZ[i, h, w, c]

参数梯度关键代码:
dW[:, :, :, c] += A_prev_slice * dZ[i, h, w, c]
db[:, :, :, c] += dZ[i, h, w, c]

# 前向传播:Z = conv( A_prev, W ) + b
# 反向传播,已知dZ,反向传播至dA_prev,同时计算参数dW,db
def conv_backward( dZ, cache ):

    (A_prev, W, b, hparameters) = cache

    # 获取Tensor的各个维度
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    (f, f, n_C_prev, n_C) = W.shape
    (m, n_H, n_W, n_C) = dZ.shape

    # 获取超参数
    stride = hparameters['stride']
    pad = hparameters['pad']

    # 初始化dA_prev,dW,db,它们的维度和A_prev,W,b相同
    dA_prev = np.zeros_like( A_prev )
    dW = np.zeros_like( W )
    db = np.zeros_like( b )

    # 执行Zero Padding
    A_prev = zero_pad( A_prev, pad )
    dA_prev = zero_pad( dA_prev, pad )

    for i in range(m):      # 遍历m个样本

        for h in range(n_H):            # 遍历空间上的所有位置
            for w in range(n_W):

                for c in range(n_C):    # 遍历n_C个filter

                    # 定位A_prev和dA_prev上的slice
                    h_start, w_start = h * stride, w * stride
                    h_end, w_end = h_start + f, w_start + f

                    A_prev_slice = A_prev[i, h_start:h_end, w_start:w_end, :]
                    dA_prev_slice = dA_prev[i, h_start:h_end, w_start:w_end, :]

                    # 执行反向传播(以累加的形式计算)
                    dA_prev_slice += W[:, :, :, c] * dZ[i, h, w, c]

                    # 同时计算参数的梯度(以累加的形式计算)
                    dW[:, :, :, c] += A_prev_slice * dZ[i, h, w, c]
                    db[:, :, :, c] += dZ[i, h, w, c]


    # 去除Zero Padding
    dA_prev = dA_prev[:, pad:-pad, pad:-pad, :]

    return dA_prev, dW, db

池化反向传播

对于max pooling,只需要记住最大值来自哪一个位置,因此设计create_mask函数,实现如下计算
A=[1432]M=[0100] A = [ 1 3 4 2 ] → M = [ 0 0 1 0 ]

def create_mask( A ):
    mask = A == A.max()
    return mask

对于average pooling,需要构造一个将1均分的矩阵,对于2x2的池化操作,需要构造如下2x2的矩阵
M=[14141414] M = [ 1 4 1 4 1 4 1 4 ]

def create_matrix( shape ):
    (n_H, n_W) = shape
    matrix = np.ones( shape ) / (n_H * n_W)
    return matrix

前向传播关键代码:
A[i, h, w, c] = np.max( A_prev_slice )
A[i, h, w, c] = np.mean( A_prev_slice )

反向传播关键代码:
mask = create_mask( A_prev_slice )
dA_prev_slice += mask * dA[i, h, w, c]

matrix = create_matrix( (f, f) )
dA_prev_slice += matrix * dA[i, h, w, c]

# 前向传播:A = pool( A_prev )
# 反向传播:已知dA,反向传播至dA_prev
def pool_backward( dA, cache, mode='max' ):

    (A_prev, hparameters) = cache

    # 获取Tensor的各个维度
    (m, n_H, n_W, n_C) = dA.shape
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape

    # 获取超参数
    stride = hparameters['stride']
    f = hparameters['f']

    # 初始化dA_prev
    dA_prev = np.zeros_like( A_prev )

    for i in range(m):          # 遍历m个样本

        for h in range(n_H):        # 遍历空间上的所有位置
            for w in range(n_W):

                for c in range(n_C):    # 遍历n_C个filter

                    # 定位A_prev和dA_prev上的slice
                    h_start = h * stride
                    h_end = h_start + f
                    w_start = w * stride
                    w_end = w_start + f

                    A_prev_slice = A_prev[i, h_start:h_end, w_start:w_end, c]
                    dA_prev_slice = dA_prev[i, h_start:h_end, w_start:w_end, c]

                    if mode == 'max':
                        mask = create_mask( A_prev_slice )
                        dA_prev_slice += mask * dA[i, h, w, c]

                    elif mode == 'average':
                        matrix = create_matrix( (f, f) )
                        dA_prev_slice += matrix * dA[i, h, w, c]


    return dA_prev

你可能感兴趣的:(深度学习)