import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
模型的创建是通过指定输入层和输出层来构建出整个网络结构,因此可以通过利用已定义的层,而不同的输入层和输出层的组合方式来构建不同的网络结构。
encoder_input = keras.Input(shape=(28,28,1), name='img')
x = layers.Conv2D(16, 3, activation='relu')(encoder_input)
x = layers.Conv2D(32, 3, activation='relu')(x)
x = layers.MaxPool2D(3)(x)
x = layers.Conv2D(32, 3, activation='relu')(x)
x = layers.Conv2D(16, 3, activation='relu')(x)
encoder_output = layers.GlobalMaxPooling2D()(x)
# 通过指定输入encoder_input, 输出encoder_output
# 来构建第一个网络
encoder = keras.Model(encoder_input, encoder_output, name='encoder')
encoder.summary()
x = layers.Reshape((4, 4, 1))(encoder_output)
x = layers.Conv2DTranspose(16, 3, activation='relu')(x)
x = layers.Conv2DTranspose(32, 3, activation='relu')(x)
x = layers.UpSampling2D(3)(x)
x = layers.Conv2DTranspose(16, 3, activation='relu')(x)
decoder_output = layers.Conv2DTranspose(1, 3, activation='relu')(x)
# 通过指定输入encoder_input,输出decoder_output
# 来构建另一个网络
autoencoder = keras.Model(encoder_input, decoder_output, name='autoencoder')
autoencoder.summary()
您可以通过调用另一层的输入或输出,将任何模型视为一层。通过调用模型,您不仅重用了模型的体系结构,还重用了它的权重。
如下举例与上不同,此处定义了一个编码模型,一个解码模型,并将两者相连到同一个模型,构成自动编码模型
encoder_input = keras.Input(shape=(28, 28, 1), name='original_img')
x = layers.Conv2D(16, 3, activation='relu')(encoder_input)
x = layers.Conv2D(32, 3, activation='relu')(x)
x = layers.MaxPooling2D(3)(x)
x = layers.Conv2D(32, 3, activation='relu')(x)
x = layers.Conv2D(16, 3, activation='relu')(x)
encoder_output = layers.GlobalMaxPooling2D()(x)
# 编码模型
encoder = keras.Model(encoder_input, encoder_output, name='encoder')
encoder.summary()
decoder_input = keras.Input(shape=(16,), name='encoded_img')
x = layers.Reshape((4, 4, 1))(decoder_input)
x = layers.Conv2DTranspose(16, 3, activation='relu')(x)
x = layers.Conv2DTranspose(32, 3, activation='relu')(x)
x = layers.UpSampling2D(3)(x)
x = layers.Conv2DTranspose(16, 3, activation='relu')(x)
decoder_output = layers.Conv2DTranspose(1, 3, activation='relu')(x)
# 解码模型
decoder = keras.Model(decoder_input, decoder_output, name='decoder')
decoder.summary()
autoencoder_input = keras.Input(shape=(28, 28, 1), name='img')
encoded_img = encoder(autoencoder_input)
decoded_img = decoder(encoded_img)
autoencoder = keras.Model(autoencoder_input, decoded_img, name='autoencoder')
正如您所看到的,模型可以被嵌套:模型可以包含子模型(因为模型就像一个层)。模型嵌套的一个常见用例是集成学习。例如,下面是如何将一组模型集成到一个单独的模型中,并将各小模型的输出进行平均
def get_model():
inputs = keras.Input(shape=(128, ))
outputs = layers.Dense(1)(inputs)
return keras.Model(inputs, outputs)
model1 = get_model()
model2 = get_model()
model3 = get_model()
inputs = keras.Input(shape=(128, ))
y1 = model1(inputs)
y2 = model2(inputs)
y3 = model3(inputs)
outputs = layers.average([y1, y2, y3])
ensemble_model = keras.Model(inputs, outputs)
keras.utils.plot_model(ensemble_model, './temp/test.png', show_shapes=True)
构建的模型有不同的输入层和输出层
如下举例:构建一个按照系统,按照优先级对定制的发行票据进行排序,并将它们分配到正确的部门。有三个输入:
模型的两个输出:
num_tags = 12 # 问题标签的个数
num_words = 10000 # 文本的单词个数(预处理)
num_departments = 4 # 部门个数
title_input = keras.Input(shape=(None,), name='title')
body_input = keras.Input(shape=(None,), name='body')
tags_input = keras.Input(shape=(num_tags,), name='tags')
title_features = layers.Embedding(num_words, 64)(title_input)
body_features = layers.Embedding(num_words, 64)(body_input)
title_features = layers.LSTM(128)(title_features)
body_features = layers.LSTM(32)(body_features)
x = layers.concatenate([title_features, body_features, tags_input])
priority_pred = layers.Dense(1, name='priority')(x)
department_pred = layers.Dense(num_departments, name='department')(x)
model = keras.Model(inputs=[title_input, body_input, tags_input],
outputs=[priority_pred, department_pred])
keras.utils.plot_model(model, './temp/mutilModel.png', show_shapes=True)
当编译此模型时,可以指定不同的损失函数,还可以为不同的损失设定不一样的权重,来调节它们对总训练损失的比重
model.compile(optimizer=keras.optimizers.RMSprop(1e-3),
loss=[keras.losses.BinaryCrossentropy(from_logits=True),
keras.losses.CategoricalCrossentropy(from_logits=True)],
loss_weights=[1., 0.2])
因为输出层有不同的名字,可以作出如下指定
model.compile(optimizer=keras.optimizers.RMSprop(1e-3),
loss={'priority':keras.losses.BinaryCrossentropy(from_logits=True),
'department':keras.losses.CategoricalCrossentropy(from_logits=True)},
loss_weights=[1.0, 0.2])
创建一些随机数据来训练模型
# 输入数据
title_data = np.random.randint(num_words, size=(1288,10))
body_data = np.random.randint(num_words, size=(1288,100))
tags_data = np.random.randint(2, size=(1288, num_tags)).astype('float32')
# 标签数据
priority_targets = np.random.random(size=(1288, 1))
dept_targets = np.random.randint(2, size=(1288, num_departments))
model.fit({'title':title_data, 'body':body_data, 'tags':tags_data},
{'priority':priority_targets, 'department':dept_targets},
epochs=2, batch_size=32)
Epoch 1/2
41/41 [==============================] - 4s 109ms/step - loss: 1.3079 - priority_loss: 0.7075 - department_loss: 3.0022
Epoch 2/2
41/41 [==============================] - 4s 100ms/step - loss: 1.2808 - priority_loss: 0.7001 - department_loss: 2.9036
如下举例:小型的ResNet模型
inputs = keras.Input(shape=(32, 32, 3), name='img')
x = layers.Conv2D(32, 3, activation='relu')(inputs)
x = layers.Conv2D(64, 3, activation='relu')(x)
block_1_output = layers.MaxPooling2D(3)(x)
x = layers.Conv2D(64, 3, activation='relu', padding='same')(block_1_output)
x = layers.Conv2D(64, 3, activation='relu', padding='same')(x)
block_2_output = layers.add([x, block_1_output])
x = layers.Conv2D(64, 3, activation='relu', padding='same')(block_2_output)
x = layers.Conv2D(64, 3, activation='relu', padding='same')(x)
block_3_output = layers.add([x, block_2_output])
x = layers.Conv2D(64, 3, activation='relu')(block_3_output)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(10)(x)
model = keras.Model(inputs, outputs, name='toy_resnet')
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)
model.compile(optimizer=keras.optimizers.RMSprop(1e-3),
loss=keras.losses.CategoricalCrossentropy(from_logits=True),
metrics=['acc'])
model.fit(x_train, y_train,
batch_size=64,
epochs=1,
validation_split=0.2)
625/625 [==============================] - 165s 264ms/step - loss: 1.8920 - acc: 0.2807 - val_loss: 1.5289 - val_acc: 0.4314
共享层是指在同一个模型中多次重用的层实例。
共享层通常用于对来自类似空间的输入进行编码(例如,具有类似词汇表的两个不同文本片段)。它们能够通过不同的输入数据共享信息,使得训练此类模型更为容易,如果一个给定的词汇在一个输入数据中出现,通过共享层将有助于其他输入数据的处理
shared_embedding = layers.Embedding(1000, 128)
text_input_a = keras.Input(shape=(None,), dtype='int32')
text_input_b = keras.Input(shape=(None,), dtype='int32')
encoded_input_a = shared_embedding(text_input_a)
encoded_input_b = shared_embedding(text_input_b)
因为我们构建的网络层是一个静态数据结构,所以可以访问和检查它,这就是为什么可以将网络结构绘制成一张图片显示的原因。
这也意味着可以访问中间层(网络中“节点”)的激活值,并且将它重用在其他地方,这对于特征提取之类的任务非常有用。
让我们看一个例子。举例上面提到的小型的ResNet模型
通过查询网络层结构,来获得中间层激活函数的值
features_list = [layer.output for layer in model.layers]
使用这些特征创建一个新的特征提取模型,返回中间层激活的值:
feat_extraction_model = keras.Model(inputs=model.input, outputs=features_list)
img = np.random.random((1000, 32, 32, 3)).astype('float32')
extracted_features = feat_extraction_model(img)
# feat_extraction_model.summary()
extracted_features[3].shape
# 输出: TensorShape([1000, 9, 9, 64])