Deep Residual Learning for Image Recognition中算法(resnet-34)的实现

# 实现一个残差块
def residual_block(x, num_filters):
    """如果输出通道数增加,则残差块降维+通道数增加,则恒等映射x也需要降维+增加通道数(pool+tf.pad)"""
    """[] X 1"""
    """resnet沿用了vgg的原则,每一次subsampling,则通道数增加"""
    x_channel = x.get_shape().as_list()[-1]
    if x_channel * 2 == num_filters:
        increase_dim = True
        strides = (2, 2)
    elif x_channel == num_filters:
        increase_dim = False
        strides = (1, 1)
    else:
        raise Exception("Error")
        
    conv0 = tf.layers.conv2d(x,
                             num_filters,
                             (3, 3),
                             strides=strides,
                             activation=tf.nn.relu,
                             padding='same',
                             name='conv0')
    conv1 = tf.layers.conv2d(conv0,
                             num_filters,
                             (3, 3),
                             strides=(1, 1),
                             activation=tf.nn.relu,
                             padding='same',
                             name='conv1')
    """1.通过增加0来增加维度,这种不添加多余的参数"""
    """2.通过非线性变换(1×1的卷积)来增加维度"""
    if increase_dim:
        pool_x = tf.layers.average_pooling2d(x,
                                             (2, 2),
                                             (2, 2),
                                             padding='same',
                                             name='pool_x')
        pad_x = tf.pad(pool_x,
                       [[0, 0],
                        [0, 0],
                        [0, 0],
                        [x_channel // 2, x_channel // 2]])
    else:
        pad_x = x
        
    out_x = pad_x + conv1
    return out_x

# 残差结构的实现
def res_net(x, num_filter_block, num_filter_base, class_num):
    """num_filter_block=[3,4,6,3]"""
    num_subsampling = len(num_filter_block)  # 需要四次降采样
    layers = []  # 因为每一次的输入都是上一次的输出
    
    # 先经过一个卷积,一个池化
    with tf.variable_scope('conv2'):
        conv0 = tf.layers.conv2d(x,
                                 num_filter_base,
                                 (7, 7),
                                 strides=(2, 2),
                                 activation=tf.nn.relu,
                                 padding='same',
                                 name='conv0')
        pool0 = tf.layers.max_pooling2d(conv0,
                                        (3, 3), # overlapping
                                        (2, 2),
                                        name='pool0')
        layers.append(pool0)
    
    # 再经过残差块
    # [0, 1, 2, 3]
    for i in range(num_subsampling):
        # [3, 4, 6, 3]
        for sample in range(num_filter_block[i]):
            with tf.variable_scope("conv%d_%d" % (i, sample)):
                layer = residual_block(layers[-1], num_filter_base * (2 ** i))
                layers.append(layer)
    
    # 最后经过一个average_pooling(沿着每一个通道做所有元素的平均),直接输出到1000维
    with tf.variable_scope('fc'):
        # layers[-1].shape: [None, width, height, channel]
        # ave_pool: [None, channel]
        ave_pool = tf.reduce_mean(layers[-1], [1, 2])
        logits = tf.layers.dense(ave_pool, class_num)
        layers.append(logits)
        
    return layers[-1]

你可能感兴趣的:(Deep Residual Learning for Image Recognition中算法(resnet-34)的实现)