tensorflow2.0 DenseNet121 训练 cifar100

from tensorflow.keras import layers, regularizers, Sequential, optimizers
import tensorflow as tf
import numpy as np

def regularized_padded_conv2d(*args, **kwargs):
    ''' 带标准化的卷积 '''

    return layers.Conv2D(
        *args, **kwargs,
        padding='same',
        kernel_regularizer=regularizers.l2(5e-5),
        bias_regularizer=regularizers.l2(5e-5),
        kernel_initializer='glorot_normal'
    )

def load_cifar100_with_DataAugmentation():
    (train_x, train_y), (test_x, test_y) = tf.keras.datasets.cifar100.load_data()
    train_x = np.array(tf.reshape(train_x, shape=(50000, 32, 32, 3)), dtype=np.float) / 255.0
    train_y = tf.keras.utils.to_categorical(train_y)

    test_x = np.array(tf.reshape(test_x, shape=(10000, 32, 32, 3)), dtype=np.float) / 255.0
    test_y = tf.keras.utils.to_categorical(test_y)

    train_noise = tf.random.normal(shape=train_x.shape, mean=0.0, stddev=0.05)
    train_x = tf.add(train_x, train_noise)

    flip_1 = tf.image.flip_up_down(train_x)
    flip_2 = tf.image.flip_left_right(train_x)
    flip_3 = tf.image.random_flip_up_down(train_x)
    flip_4 = tf.image.random_flip_left_right(train_x)

    rot_90 = tf.image.rot90(train_x, k=1)
    rot_180 = tf.image.rot90(train_x, k=2)
    rot_270 = tf.image.rot90(train_x, k=3)

    train_x = np.concatenate([train_x, flip_1, flip_2, flip_3, flip_4, rot_90, rot_180, rot_270])
    train_y = np.concatenate([train_y, train_y, train_y, train_y, train_y, train_y, train_y, train_y])

    return (train_x, train_y), (test_x, test_y)

class _DenseNet_BottleNeck(layers.Layer):
    ''' densenet 瓶颈层构造方法 '''

    def __init__(self, growth_rate, dropout_rate):
        super(_DenseNet_BottleNeck, self).__init__()
        self.BN1 = layers.BatchNormalization(trainable=True)
        self.Act1 = layers.Activation('relu')

        self.Conv1 = regularized_padded_conv2d(
            filters=growth_rate * 4,
            kernel_size=(1, 1),
            strides=1
        )
        self.BN2 = layers.BatchNormalization(trainable=True)
        self.Act2 = layers.Activation('relu')

        self.Conv2 = regularized_padded_conv2d(
            filters=growth_rate,
            kernel_size=(3, 3),
            strides=1
        )
        self.dropout = layers.Dropout(rate=dropout_rate)

    def call(self, inputs, **kwargs):
        x = self.BN1(inputs)
        x = self.Act1(x)
        x = self.Conv1(x)
        x = self.BN2(x)
        x = self.Act2(x)
        x = self.Conv2(x)
        x = self.dropout(x)
        x = layers.concatenate([inputs, x], axis=-1)
        return x

class _DenseBlock(layers.Layer):
    ''' denseblock 构造方法 '''

    def __init__(self, num_layers, growth_rate, dropout_rate):
        super(_DenseBlock, self).__init__()
        self.Alllayers = []
        for _ in range(num_layers):
            self.Alllayers.append(_DenseNet_BottleNeck(growth_rate=growth_rate, dropout_rate=dropout_rate))

    def call(self, inputs, **kwargs):
        x = inputs
        for layer in self.Alllayers:
            x = layer(x)
        return x

class _TransitionLayer(layers.Layer):
    ''' 每个denseblock 之间降低特征图的数量 '''

    def __init__(self, out_channels):
        super(_TransitionLayer, self).__init__()
        self.BN = layers.BatchNormalization(trainable=True)
        self.Conv = regularized_padded_conv2d(
            filters=out_channels,
            kernel_size=(1, 1),
            strides=1
        )
        self.Act = layers.Activation('relu')
        self.Pool = layers.MaxPool2D(pool_size=(2, 2), strides=2, padding="same")

    def call(self, inputs, **kwargs):
        x = self.BN(inputs)
        x = self.Act(x)
        x = self.Conv(x)
        x = self.Pool(x)
        return x

class _PretreatLayer(layers.Layer):
    ''' 输入数据预处理层 '''

    def __init__(self, num_init_features):
        super(_PretreatLayer, self).__init__()
        self.Conv = regularized_padded_conv2d(
            filters=num_init_features,
            kernel_size=(7, 7),
            strides=2
        )
        self.Act = layers.Activation('relu')
        self.BN = layers.BatchNormalization(trainable=True)
        self.Pool = layers.MaxPool2D(pool_size=(3, 3), strides=2, padding="same")

    def call(self, inputs, **kwargs):
        x = self.Conv(inputs)
        x = self.Act(x)
        x = self.BN(x)
        x = self.Pool(x)

        return x

class DenseNet(layers.Layer):
    ''' densenet 构造方法, 默认为 densenet121 '''

    def __init__(self,
                 num_init_features=64,
                 growth_rate=36,
                 block_layers=[6, 12, 24, 16],
                 compression_rate=0.5,
                 dropout_rate=0.2
                 ):
        super(DenseNet, self).__init__()
        self.pretreat = _PretreatLayer(num_init_features=num_init_features)
        num_channels = num_init_features
        self.AllBlocks = []

        for index, num_layer in enumerate(block_layers):
            self.AllBlocks.append(_DenseBlock(num_layers=num_layer,
                                              growth_rate=growth_rate,
                                              dropout_rate=dropout_rate))
            if index != len(block_layers) - 1:
                num_channels += growth_rate * num_layer
                num_channels *= compression_rate
                self.AllBlocks.append(_TransitionLayer(out_channels=int(num_channels)))

        self.avgpool = layers.GlobalAveragePooling2D()

    def call(self, inputs, training=None, mask=None):
        x = self.pretreat(inputs)
        for block in self.AllBlocks:
            x = block(x)
        x = self.avgpool(x)
        return x


if __name__ == "__main__":

    (x, y), (tx, ty) = load_cifar100_with_DataAugmentation()

    batch_size = 1000
    epochs = 10

    model = Sequential([
        DenseNet(),
        layers.Dense(units=100, activation=tf.nn.softmax)
    ])

    model.build(input_shape=(None, 32, 32, 3))

    # model.load_weights('./cifar-100-densenet121.h5')

    optimizer = optimizers.SGD(learning_rate=2e-4)

    for e in range(epochs):
        loss_sum = 0
        for i in range(int(400000 / batch_size)):
            start = i * batch_size
            end = min((i + 1) * batch_size, 400000)
            batch_x = x[start:end]
            batch_y = y[start:end]

            with tf.GradientTape() as tape:
                loss = tf.losses.categorical_crossentropy(y_true=batch_y, y_pred=model(batch_x))

                grad = tape.gradient(loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grad, model.trainable_variables))

            loss_sum += float(tf.reduce_mean(loss))

        loss_mean = loss_sum / int(400000 / batch_size)

        print('epoch: {}/{}, loss: {}'.format(e + 1, epochs, loss_mean))

    model.save_weights('./cifar-100-densenet121.h5')

你可能感兴趣的:(机器学习,深度学习)