【参考资料】
【1】https://www.kaggle.com/phoenigs/u-net-dropout-augmentation-stratification
【2】https://blog.csdn.net/u012931582/article/details/70215756
【3】http://www.cnblogs.com/gujianhan/p/6030639.html
- 输入三通道RGB图像: 3 * 512 * 512
- 目标分为10类,则输出为10 * 512 * 512,表示每个像素的分类概率
- 第一步: 利用16 * 3 * 3 * 3 卷积核,步长为1操作,输出为16 * 512 * 512; (即16次用3 * 3 * 3的卷积核对3 * 512 * 512 图像进行卷积操作)
- 第二步: 利用32 * 16 * 3 * 3 卷积核,步长为1操作,输出 32 * 512 * 512;
- 最后一步: 利用10 * 32 * 3 * 3 卷积核,步长为1操作,输出 10 * 512 * 512
采用反卷积的方式形成一种图像分类,个人理解就是先做卷积进行特征分类,然后再用差值的方式将这种分类扩大化;
要注意的时,FCN不是将最后一层卷积输出做分类,而是要利用中间的输出进行融合以获取更多的细节, 如下图:
##### 代码实现
备注:该代码参考kaggle竞赛中《TGS Salt Identification Challenge》的一个kernel,见【参考资料】【1】。基本调通,并增加了注释,但实际测试训练只有一次,个人MAC实在太慢了。。。。
https://www.kaggle.com/c/tgs-salt-identification-challenge
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras import Model
from keras.preprocessing.image import load_img
from keras.layers import Input, Conv2D, Conv2DTranspose, MaxPooling2D, concatenate, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from skimage.transform import resize
img_size_ori = 101 #原始图像输出
img_size_target = 128
"""
将输入【0, 1】按照每隔0.1进行分类,返回分类值
"""
def cov_to_class(val):
for i in range(0, 11):
if val * 10 <= i :
return i
"""
将图像调整到128*128
"""
def upsample(img):
if img_size_ori == img_size_target:
return img
return resize(img, (img_size_target, img_size_target), mode='constant', preserve_range=True)
"""
将图像回到101 * 101
"""
def downsample(img):
if img_size_ori == img_size_target:
return img
return resize(img, (img_size_ori, img_size_ori), mode='constant', preserve_range=True)
def build_unet_model(input_layer, start_neurons):
"""
构建u-net模型
"""
#start_neurons * 1 代表卷积核的个数,即产生多少张特征图
# 128 -> 64
conv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(input_layer)
conv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(conv1)
pool1 = MaxPooling2D((2, 2))(conv1)
pool1 = Dropout(0.25)(pool1)
# 64 -> 32
conv2 = Conv2D(start_neurons * 2, (3, 3), activation="relu", padding="same")(pool1)
conv2 = Conv2D(start_neurons * 2, (3, 3), activation="relu", padding="same")(conv2)
pool2 = MaxPooling2D((2, 2))(conv2)
pool2 = Dropout(0.5)(pool2)
# 32 -> 16
conv3 = Conv2D(start_neurons * 4, (3, 3), activation="relu", padding="same")(pool2)
conv3 = Conv2D(start_neurons * 4, (3, 3), activation="relu", padding="same")(conv3)
pool3 = MaxPooling2D((2, 2))(conv3)
pool3 = Dropout(0.5)(pool3)
# 16 -> 8
conv4 = Conv2D(start_neurons * 8, (3, 3), activation="relu", padding="same")(pool3)
conv4 = Conv2D(start_neurons * 8, (3, 3), activation="relu", padding="same")(conv4)
pool4 = MaxPooling2D((2, 2))(conv4)
pool4 = Dropout(0.5)(pool4)
# Middle
convm = Conv2D(start_neurons * 16, (3, 3), activation="relu", padding="same")(pool4)
convm = Conv2D(start_neurons * 16, (3, 3), activation="relu", padding="same")(convm)
# 8 -> 16
deconv4 = Conv2DTranspose(start_neurons * 8, (3, 3), strides=(2, 2), padding="same")(convm)
uconv4 = concatenate([deconv4, conv4])
uconv4 = Dropout(0.5)(uconv4)
uconv4 = Conv2D(start_neurons * 8, (3, 3), activation="relu", padding="same")(uconv4)
uconv4 = Conv2D(start_neurons * 8, (3, 3), activation="relu", padding="same")(uconv4)
# 16 -> 32
"""
Conv2DTranspose: 逆向卷积
concatenate: 融合层进行特征合并,根据FCN的模型,将某一个卷积层和反向卷积层输出进行融合,以获取更多细节
"""
deconv3 = Conv2DTranspose(start_neurons * 4, (3, 3), strides=(2, 2), padding="same")(uconv4)
uconv3 = concatenate([deconv3, conv3])
uconv3 = Dropout(0.5)(uconv3)
uconv3 = Conv2D(start_neurons * 4, (3, 3), activation="relu", padding="same")(uconv3)
uconv3 = Conv2D(start_neurons * 4, (3, 3), activation="relu", padding="same")(uconv3)
# 32 -> 64
deconv2 = Conv2DTranspose(start_neurons * 2, (3, 3), strides=(2, 2), padding="same")(uconv3)
uconv2 = concatenate([deconv2, conv2])
uconv2 = Dropout(0.5)(uconv2)
uconv2 = Conv2D(start_neurons * 2, (3, 3), activation="relu", padding="same")(uconv2)
uconv2 = Conv2D(start_neurons * 2, (3, 3), activation="relu", padding="same")(uconv2)
# 64 -> 128
deconv1 = Conv2DTranspose(start_neurons * 1, (3, 3), strides=(2, 2), padding="same")(uconv2)
uconv1 = concatenate([deconv1, conv1])
uconv1 = Dropout(0.5)(uconv1)
uconv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(uconv1)
uconv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(uconv1)
output_layer = Conv2D(1, (1,1), padding="same", activation="sigmoid")(uconv1)
return output_layer
if __name__ == "__main__":
"""
测试全连接卷积神经网络u-net
参考: https://www.kaggle.com/phoenigs/u-net-dropout-augmentation-stratification
"""
train_df = pd.read_csv("../01.data/all/train.csv", index_col="id", usecols=[0])
depths_df = pd.read_csv("../01.data/all/depths.csv", index_col="id")
#该join方法类似数据库中的left join,若depth_df中的index在train_df中不存在,则数据被抛弃
#在本例中depths_df中有22000行,join操作后train_df为4000行 id + 深度
train_df = train_df.join(depths_df)
#该方法取反,即取出上面代码中22000 - 4000 = 18000行数据
test_df = depths_df[~depths_df.index.isin(train_df.index)]
train_df["images"] = [np.array(load_img("../01.data/all/train/images/{}.png".format(idx), color_mode="grayscale")) / 255
for idx in train_df.index]
train_df["masks"] = [np.array(load_img("../01.data/all/train/masks/{}.png".format(idx), color_mode="grayscale")) / 255
for idx in train_df.index]
"""
print("id = {}, depth = {}, iamges = {}, masks = {}".format(train_df.index[0], train_df["z"][0],
train_df["images"][0], train_df["masks"][0]))
id = 575d24d81d, depth = 843, iamges = [[0.5254902 0.51372549 0.5254902 ... 0.5254902 0.53333333 0.53333333]
[0.48627451 0.48235294 0.4745098 ... 0.52156863 0.48627451 0.4745098 ]
[0.44705882 0.4627451 0.45490196 ... 0.5372549 0.50196078 0.47843137]
...
[0.69803922 0.69803922 0.68627451 ... 0.47058824 0.47058824 0.49411765]
[0.70588235 0.70196078 0.69411765 ... 0.42745098 0.43921569 0.46666667]
[0.71372549 0.69803922 0.69803922 ... 0.37647059 0.38823529 0.41176471]], masks = [[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
...
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]]
"""
#计算覆盖率,mask图像为1的像素求和/原始图像的尺寸
train_df["coverage"] = train_df.masks.map(np.sum) / pow(img_size_ori, 2)
#通过覆盖率的值将其分成11个种类,属于[0,1,2,3,4,5,6,7,8,9,10]
train_df["coverage_class"] = train_df.coverage.map(cov_to_class)
"""
**创建测试数据**
在train_test_split函数中,前四个参数的值被按照0.2的比例被划分成ids、x、y、conv、depth五组训练/测试数据
"""
ids_train, ids_valid, x_train, x_valid, y_train, y_valid, cov_train, cov_test, depth_train, depth_test\
= train_test_split(
train_df.index.values,
np.array(train_df.images.map(upsample).tolist()).reshape(-1, img_size_target, img_size_target, 1),
np.array(train_df.masks.map(upsample).tolist()).reshape(-1, img_size_target, img_size_target, 1),
train_df.coverage.values,
train_df.z.values,
test_size=0.2, stratify=train_df.coverage_class, random_state=1337)
"""
**构建模型**
"""
input_layer = Input((img_size_target, img_size_target, 1))
output_layer = build_unet_model(input_layer, 16)
model = Model(input_layer, output_layer)
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()
"""
**训练网络**
EarlyStopping: 防止过拟合,当监视的量(如val_loss)被激活后,即训练后并无下降,则patience后停止训练,verbose为信息的展示模式
ModelCheckpoint: 保存模型,save_best_only存储验证集下最好的模型在"./keras.model"下
ReduceLROnPlateau: 当patience个周期后监视的量(如val_loss)不在提升的时候,以factor为因子降低学习率
"""
early_stopping = EarlyStopping(patience=10, verbose=1)
model_checkpoint = ModelCheckpoint("./keras.model", save_best_only=True, verbose=1)
reduce_lr = ReduceLROnPlateau(factor=0.1, patience=5, min_lr=0.00001, verbose=1)
epochs = 1 #训练的次数,我的破MAC直接跑不动:(
batch_size = 32
history = model.fit(x_train, y_train,
validation_data=[x_valid, y_valid],
epochs=epochs,
batch_size=batch_size,
callbacks=[early_stopping, model_checkpoint, reduce_lr])
"""
**预测**
显示max_images张图像,对比预测结果和实际结果
"""
preds_valid = model.predict(x_valid).reshape(-1, img_size_target, img_size_target)
preds_valid = np.array([downsample(x) for x in preds_valid])
y_valid_ori = np.array([train_df.loc[idx].masks for idx in ids_valid])
max_images = 20
grid_width = 10
grid_height = int(max_images / grid_width)
fig, axs = plt.subplots(grid_height, grid_width, figsize=(grid_width, grid_height))
for i, idx in enumerate(ids_valid[:max_images]):
img = train_df.loc[idx].images
mask = train_df.loc[idx].masks
pred = preds_valid[i]
ax = axs[int(i / grid_width), i % grid_width]
ax.imshow(img, cmap="Greys") #原始图像
ax.imshow(mask, alpha=0.3, cmap="Greens") #原始图像的mask
ax.imshow(pred, alpha=0.5, cmap="OrRd") #预测结果的mask
ax.text(1, img_size_ori-1, train_df.loc[idx].z, color="black")
ax.text(img_size_ori - 1, 1, round(train_df.loc[idx].coverage, 2), color="black", ha="right", va="top")
ax.text(1, 1, train_df.loc[idx].coverage_class, color="black", ha="left", va="top")
ax.set_yticklabels([])
ax.set_xticklabels([])
plt.suptitle("Green: salt, Red: prediction. Top-left: coverage class, top-right: salt coverage, bottom-left: depth")
plt.show()