网上总是说将VGG输出的featrue map作为回归检测,预测目标的xy坐标非常难,也非常差,那有多差呢?
在PET数据集上测试,结果如图,平均IOU=46.0%
from keras.layers import Dense, Activation, Flatten, Convolution2D, Dropout, MaxPooling2D
from keras.optimizers import SGD, Adadelta, Adagrad
from keras.models import Sequential
import numpy as np
import tensorflow as tf
from keras import backend as K
from PIL import Image
import matplotlib.pyplot as plt
# 固定随机种子
np.random.seed(42)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
tf.set_random_seed(42)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)
def show_numpy_images(np_array_image,pred_y=None,true_y=None):
plt.figure(figsize=(8, 8))
for im in range(28):
plt.subplot(4, 7, im + 1)
image = np_array_image[im, :, :, :]
if true_y is not None:
xmin_true, xmax_true, ymin_true, ymax_true = true_y[im,0], true_y[im,1], true_y[im,2], true_y[im,3]
xmin_true, xmax_true = min(xmin_true, xmax_true), max(xmin_true, xmax_true)
ymin_true, ymax_true = min(ymin_true, ymax_true), max(ymin_true, ymax_true)
real_color = [255, 255, 255]
for i in range(int(ymin_true), int(ymax_true)): # 绘制方框
image[int(xmin_true),i, :] = real_color
image[int(xmax_true),i, :] = real_color
for i in range(int(xmin_true), int(xmax_true)):
image[i,int(ymin_true), :] = real_color
image[i,int(ymax_true), :] = real_color
if pred_y is not None:
xmin_pred, xmax_pred, ymin_pred, ymax_pred = pred_y[im, 0], pred_y[im, 1], pred_y[im, 2], pred_y[im, 3]
xmin_pred, xmax_pred = min(xmin_pred, xmax_pred), max(xmin_pred, xmax_pred)
ymin_pred, ymax_pred = min(ymin_pred, ymax_pred), max(ymin_pred, ymax_pred)
pred_color = [0, 255, 0]
for i in range(int(ymin_pred), int(ymax_pred)): # 绘制方框
image[int(xmin_pred), i, :] = pred_color
image[int(xmax_pred), i, :] = pred_color
for i in range(int(xmin_pred), int(xmax_pred)):
image[i, int(ymin_pred), :] = pred_color
image[i, int(ymax_pred), :] = pred_color
if pred_y is not None and true_y is not None:
iou = IOU(true_y[im,:],pred_y[im,:])*100.0
plt.title("%.2f"%(iou))
plt.imshow(image)
plt.axis('off')
plt.show()
def IOU(Reframe,GTframe):
"""
计算两矩形 IOU,Reframe=[xmin,xmax,ymin,ymax]
"""
p = np.zeros([224,224])
xmin_pred, xmax_pred, ymin_pred, ymax_pred = GTframe[0], GTframe[1], GTframe[2], GTframe[3]
p[int(xmin_pred):int(xmax_pred), int(ymin_pred):int(ymax_pred)] += 1
xmin_true, xmax_true, ymin_true, ymax_true = Reframe[0], Reframe[1], Reframe[2], Reframe[3]
p[int(xmin_true):int(xmax_true), int(ymin_true):int(ymax_true)] += 1
n1 = np.sum(p == 1)
n2 = np.sum(p == 2)
iou = n2/(n1+n2)
return iou
# 读取数据
train_photo = np.load("E:\学习\cs\数据集\PET\dataset_x.npy")
train_data_x = np.load("E:\学习\cs\数据集\PET\\vgg_dataset_x.npy")
train_data_y = np.load("E:\学习\cs\数据集\PET\dataset_y.npy")
print(train_data_x.shape,train_data_y.shape) # (3686, 7, 7, 512) (3686, 5)
train_data_x = np.reshape(train_data_x,[train_data_x.shape[0], -1])
train_data_pos = train_data_y[:,:4]
train_data_lab = train_data_y[:,4]
print(train_data_x.shape,train_data_y.shape,train_data_lab.shape) # (3686, 25088) (3686, 4)
# 构建模型
model = Sequential()
model.add(Dense(2048,activation="relu",input_shape=(25088,)))
model.add(Dropout(0.25))
model.add(Dense(1024,activation="relu"))
model.add(Dropout(0.25))
model.add(Dense(512,activation="relu"))
model.add(Dropout(0.25))
model.add(Dense(64))
model.add(Dense(4))
model.summary()
model.compile(loss='mse',optimizer='adadelta',metrics=['accuracy'])
model.fit(x=train_data_x,y=train_data_pos,batch_size=128,nb_epoch=20,verbose=1) # 15:4% 20:42% 25:7%
# 看看最终结果
pred_y = model.predict(train_data_x) # (3686, 4)
show_numpy_images(train_photo,pred_y,true_y=train_data_pos)
ious = []
for i in range(train_data_x.shape[0]):
ious.append(IOU(train_data_pos[i,:],pred_y[i,:]))
print("平均IOU",np.average(ious))