本文将介绍:
常规的卷及操作如上图所示:参数量共计3 x 3 x 3 x 4 = 108.
常规卷积,卷积核是作用于所有的depth方向的. depthwise conv对不同channel用不同的卷积核做卷积
depthwise卷积没有考虑到同一位置不同channel上的数据之间的关联.所以我们用一个1 x
1的卷积核对depthwise得到的[3,64,64]输出再做卷积.这里做的就是常规卷积. 得到[1,64,64].
用4个这样的1x1卷积核去做常规卷积. 得到输出[4,64,64].
这一步参数量为3 x 1 x 1 x 4 = 12.
深度可分离卷积一共耗费参数量:27 + 12 = 39个.
所以可以看出同样是得到[4,64,64]的feature map. 相比普通卷积的108个参数量,深度可分离卷积的方式大大减少了参数量.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf
from tensorflow import keras
my_seed = 666
np.random.seed(my_seed)
import random
random.seed(my_seed)
import tensorflow as tf
tf.random.set_seed(my_seed)
# 1,实现tensorflow动态按需分配GPU
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
# 打印使用的python库的版本信息
print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
print(module.__name__, module.__version__)
# 2,从tf.keras.datasets中取数据
fashion_mnist = keras.datasets.fashion_mnist
(x_train_all, y_train_all), (x_test, y_test) = fashion_mnist.load_data()
x_valid, x_train = x_train_all[:5000], x_train_all[5000:]
y_valid, y_train = y_train_all[:5000], y_train_all[5000:]
print(x_valid.shape, y_valid.shape)
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)
# 3,将数据整合为标准化数据
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
# 整合数据为1通道数据
x_train_scaled = scaler.fit_transform(
x_train.astype(np.float32).reshape(-1, 1)).reshape(-1, 28, 28, 1)
x_valid_scaled = scaler.transform(
x_valid.astype(np.float32).reshape(-1, 1)).reshape(-1, 28, 28, 1)
x_test_scaled = scaler.transform(
x_test.astype(np.float32).reshape(-1, 1)).reshape(-1, 28, 28, 1)
# 4,构建CNN模型
model = keras.models.Sequential()
model.add(keras.layers.Conv2D(filters=32, kernel_size=3,
padding='same',
activation='selu',
input_shape=(28, 28, 1)))
model.add(keras.layers.SeparableConv2D(filters=32, kernel_size=3,
padding='same',
activation='selu'))
model.add(keras.layers.MaxPool2D(pool_size=2))
model.add(keras.layers.SeparableConv2D(filters=64, kernel_size=3,
padding='same',
activation='selu'))
model.add(keras.layers.SeparableConv2D(filters=64, kernel_size=3,
padding='same',
activation='selu'))
model.add(keras.layers.MaxPool2D(pool_size=2))
model.add(keras.layers.SeparableConv2D(filters=128, kernel_size=3,
padding='same',
activation='selu'))
model.add(keras.layers.SeparableConv2D(filters=128, kernel_size=3,
padding='same',
activation='selu'))
model.add(keras.layers.MaxPool2D(pool_size=2))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(128, activation='selu'))
model.add(keras.layers.Dense(10, activation="softmax"))
model.compile(loss="sparse_categorical_crossentropy",
optimizer = "sgd",
metrics = ["accuracy"])
# 5,查看模型层级和参数
model.summary()
# 6,定义callback 并 训练模型
logdir = './separable-cnn-selu-callbacks'
if not os.path.exists(logdir):
os.mkdir(logdir)
output_model_file = os.path.join(logdir,
"fashion_mnist_model.h5")
callbacks = [
keras.callbacks.TensorBoard(logdir),
keras.callbacks.ModelCheckpoint(output_model_file,
save_best_only = True),
keras.callbacks.EarlyStopping(patience=5, min_delta=1e-3),
]
history = model.fit(x_train_scaled, y_train, epochs=30,
validation_data=(x_valid_scaled, y_valid),
callbacks = callbacks)
# 7,打印训练曲线
def plot_learning_curves(history):
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 3)
plt.show()
plot_learning_curves(history)
# 8,打印估计器结果
print(model.evaluate(x_test_scaled, y_test, verbose = 0))