将VGG提取的featrue map直接用作回归目标检测有多差?

网上总是说将VGG输出的featrue map作为回归检测,预测目标的xy坐标非常难,也非常差,那有多差呢?

在PET数据集上测试,结果如图,平均IOU=46.0%

将VGG提取的featrue map直接用作回归目标检测有多差?_第1张图片

from keras.layers import Dense, Activation, Flatten, Convolution2D, Dropout, MaxPooling2D
from keras.optimizers import SGD, Adadelta, Adagrad
from keras.models import Sequential
import numpy as np
import tensorflow as tf
from keras import backend as K
from PIL import Image
import matplotlib.pyplot as plt

# 固定随机种子
np.random.seed(42)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
tf.set_random_seed(42)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

def show_numpy_images(np_array_image,pred_y=None,true_y=None):
    plt.figure(figsize=(8, 8))
    for im in range(28):
        plt.subplot(4, 7, im + 1)
        image = np_array_image[im, :, :, :]

        if true_y is not None:
            xmin_true, xmax_true, ymin_true, ymax_true = true_y[im,0], true_y[im,1], true_y[im,2], true_y[im,3]
            xmin_true, xmax_true = min(xmin_true, xmax_true), max(xmin_true, xmax_true)
            ymin_true, ymax_true = min(ymin_true, ymax_true), max(ymin_true, ymax_true)
            real_color = [255, 255, 255]
            for i in range(int(ymin_true), int(ymax_true)):  # 绘制方框
                image[int(xmin_true),i, :] = real_color
                image[int(xmax_true),i, :] = real_color
            for i in range(int(xmin_true), int(xmax_true)):
                image[i,int(ymin_true), :] = real_color
                image[i,int(ymax_true), :] = real_color

        if pred_y is not None:
            xmin_pred, xmax_pred, ymin_pred, ymax_pred = pred_y[im, 0], pred_y[im, 1], pred_y[im, 2], pred_y[im, 3]
            xmin_pred, xmax_pred = min(xmin_pred, xmax_pred), max(xmin_pred, xmax_pred)
            ymin_pred, ymax_pred = min(ymin_pred, ymax_pred), max(ymin_pred, ymax_pred)
            pred_color = [0, 255, 0]
            for i in range(int(ymin_pred), int(ymax_pred)):  # 绘制方框
                image[int(xmin_pred), i, :] = pred_color
                image[int(xmax_pred), i, :] = pred_color
            for i in range(int(xmin_pred), int(xmax_pred)):
                image[i, int(ymin_pred), :] = pred_color
                image[i, int(ymax_pred), :] = pred_color

        if pred_y is not None and true_y is not None:
            iou = IOU(true_y[im,:],pred_y[im,:])*100.0
            plt.title("%.2f"%(iou))
        plt.imshow(image)
        plt.axis('off')
    plt.show()

def IOU(Reframe,GTframe):
    """
    计算两矩形 IOU,Reframe=[xmin,xmax,ymin,ymax]
    """
    p = np.zeros([224,224])
    xmin_pred, xmax_pred, ymin_pred, ymax_pred = GTframe[0], GTframe[1], GTframe[2], GTframe[3]
    p[int(xmin_pred):int(xmax_pred), int(ymin_pred):int(ymax_pred)] += 1
    xmin_true, xmax_true, ymin_true, ymax_true = Reframe[0], Reframe[1], Reframe[2], Reframe[3]
    p[int(xmin_true):int(xmax_true), int(ymin_true):int(ymax_true)] += 1
    n1 = np.sum(p == 1)
    n2 = np.sum(p == 2)
    iou = n2/(n1+n2)
    return iou


# 读取数据
train_photo = np.load("E:\学习\cs\数据集\PET\dataset_x.npy")
train_data_x = np.load("E:\学习\cs\数据集\PET\\vgg_dataset_x.npy")
train_data_y = np.load("E:\学习\cs\数据集\PET\dataset_y.npy")
print(train_data_x.shape,train_data_y.shape)  # (3686, 7, 7, 512) (3686, 5)
train_data_x = np.reshape(train_data_x,[train_data_x.shape[0], -1])
train_data_pos = train_data_y[:,:4]
train_data_lab = train_data_y[:,4]
print(train_data_x.shape,train_data_y.shape,train_data_lab.shape)  # (3686, 25088) (3686, 4)

# 构建模型
model = Sequential()
model.add(Dense(2048,activation="relu",input_shape=(25088,)))
model.add(Dropout(0.25))
model.add(Dense(1024,activation="relu"))
model.add(Dropout(0.25))
model.add(Dense(512,activation="relu"))
model.add(Dropout(0.25))
model.add(Dense(64))
model.add(Dense(4))
model.summary()
model.compile(loss='mse',optimizer='adadelta',metrics=['accuracy'])
model.fit(x=train_data_x,y=train_data_pos,batch_size=128,nb_epoch=20,verbose=1)  # 15:4% 20:42% 25:7%

# 看看最终结果
pred_y = model.predict(train_data_x)  # (3686, 4)
show_numpy_images(train_photo,pred_y,true_y=train_data_pos)

ious = []
for i in range(train_data_x.shape[0]):
    ious.append(IOU(train_data_pos[i,:],pred_y[i,:]))
print("平均IOU",np.average(ious))

你可能感兴趣的:(计算机视觉)