转自:Getting Started With Bounding Box Regression In TensorFlow
来自kaggle,有373 张图像,分为三类(黄瓜、茄子、蘑菇),图像的边框信息存放在XML 文件中
如图所示,一个边框可以用元祖(xmin,ymin,xmax,ymax)表示
可以借助LabelImg 软件,对指定的图片添加边框,会自动生成xml格式的边框数据文件
下载image-localization-dataset.zip文件,解压;使用glob
包过滤jpg文件
import numpy as np
input_dim = 228
from PIL import Image , ImageDraw
import os
import glob
images = []
image_paths = glob.glob( 'training_images/*.jpg' )
for imagefile in image_paths:
image = Image.open( imagefile ).resize( ( input_dim , input_dim ))
image = np.asarray( image ) / 255.0
images.append( image )
处理边框文件XML ,使用xmltodict
包将改类型文件转为python字典
import xmltodict
import os
bboxes = []
annotations_paths = glob.glob( 'training_images/*.xml' )
for xmlfile in annotations_paths:
x = xmltodict.parse( open( xmlfile , 'rb' ) )
bndbox = x[ 'annotation' ][ 'object' ][ 'bndbox' ]
bndbox = np.array([ int(bndbox[ 'xmin' ]) , int(bndbox[ 'ymin' ]) , int(bndbox[ 'xmax' ]) , int(bndbox[ 'ymax' ]) ])
bboxes.append( bndbox / input_dim )
划分训练集与测试集
from sklearn.model_selection import train_test_split
Y = np.array( bboxes )
X = np.array( images )
Y = np.reshape( Y , ( -1 , 1 , 1 , 4 ) )
print( X.shape )
print( Y.shape )
x_train, x_test, y_train, y_test = train_test_split( X, Y, test_size=0.1 )
先定义损失函数,同时用均方根误差 ( MSE ) 和交并比Intersection over Union (IOU).
交并比是两个边框重合的面积与两者覆盖面积之和的比值
import tensorflow as tf
input_shape = ( input_dim , input_dim , 3 )
dropout_rate = 0.5
alpha = 0.2
def calculate_iou( target_boxes , pred_boxes ):
xA = tf.math.maximum( target_boxes[ ... , 0], pred_boxes[ ... , 0] )
yA = tf.math.maximum( target_boxes[ ... , 1], pred_boxes[ ... , 1] )
xB = tf.math.maximum( target_boxes[ ... , 2], pred_boxes[ ... , 2] )
yB = tf.math.maximum( target_boxes[ ... , 3], pred_boxes[ ... , 3] )
interArea = tf.math.maximum( 0.0 , xB - xA ) * tf.math.maximum( 0.0 , yB - yA )
boxAArea = (target_boxes[ ... , 2] - target_boxes[ ... , 0]) * (target_boxes[ ... , 3] - target_boxes[ ... , 1])
boxBArea = (pred_boxes[ ... , 2] - pred_boxes[ ... , 0]) * (pred_boxes[ ... , 3] - pred_boxes[ ... , 1])
iou = interArea / ( boxAArea + boxBArea - interArea )
return iou
def custom_loss( y_true , y_pred ):
mse = tf.losses.mean_squared_error( y_true , y_pred )
iou = calculate_iou( y_true , y_pred )
return mse + ( 1 - iou )
def iou_metric( y_true , y_pred ):
return calculate_iou( y_true , y_pred )
创建 CNN 模型,堆叠部分Conv2D卷积层,将其输出展开并通过全连接Dense
层
为避免过拟合,在全连接层使用Dropout
和LeakyReLU
激活函数
import tensorflow.keras as keras
model_layers = [
keras.layers.Conv2D( 256 , input_shape=( input_dim , input_dim , 3 ) , kernel_size=( 3 , 3 ) , strides=2 , activation='relu' ),
keras.layers.Conv2D( 256 , kernel_size=( 3 , 3 ) , strides=2 , activation='relu' ),
keras.layers.BatchNormalization(),
keras.layers.Conv2D( 256 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.Conv2D( 256 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.BatchNormalization(),
keras.layers.Conv2D( 256 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.Conv2D( 256 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.BatchNormalization(),
keras.layers.Conv2D( 256 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.Conv2D( 256 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.BatchNormalization(),
keras.layers.Conv2D( 128 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.Conv2D( 128 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.BatchNormalization(),
keras.layers.Conv2D( 128 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.Conv2D( 128 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.BatchNormalization(),
keras.layers.Conv2D( 128 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.Conv2D( 128 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.BatchNormalization(),
keras.layers.Conv2D( 64 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.Conv2D( 64 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.BatchNormalization(),
keras.layers.Conv2D( 64 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.Conv2D( 64 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.BatchNormalization(),
keras.layers.Conv2D( 32 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.Conv2D( 32 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.BatchNormalization(),
keras.layers.Conv2D( 32 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.Conv2D( 32 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.BatchNormalization(),
keras.layers.Conv2D( 32 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.Conv2D( 32 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.BatchNormalization(),
keras.layers.Conv2D( 32 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.Conv2D( 32 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.BatchNormalization(),
keras.layers.Conv2D( 16 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.Conv2D( 16 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
keras.layers.Conv2D( 4 , kernel_size=( 2 , 2 ) , strides=1 , activation='relu' ),
keras.layers.Conv2D( 4 , kernel_size=( 2 , 2 ) , strides=1 , activation='relu' ),
keras.layers.Conv2D( 4 , kernel_size=( 2 , 2 ) , strides=1 , activation='sigmoid' ),
]
model = keras.Sequential( model_layers )
model.compile(
optimizer=keras.optimizers.Adam( lr=0.0001 ),
loss=custom_loss,
metrics=[ iou_metric ]
)
model.summary()
model.fit(
x_train ,
y_train ,
validation_data=( x_test , y_test ),
epochs=100 ,
batch_size=3
)
model.save( 'model.h5')
输出结果
!rm -rf inference_images
!mkdir inference_images
boxes = model.predict( x_test )
for i in range( boxes.shape[0] ):
b = boxes[ i , 0 , 0 , 0 : 4 ] * input_dim
img = x_test[i] * 255
source_img = Image.fromarray( img.astype( np.uint8 ) , 'RGB' )
draw = ImageDraw.Draw( source_img )
draw.rectangle( b , outline="black" )
source_img.save( 'inference_images/image_{}.png'.format( i + 1 ) , 'png' )