Pytorch、Tensorflow中均内置了NMS能力的函数。这里给出两种基于tensorflow/keras的实现,第一种来自EfficientDet,GPU版本支持正框和斜框。
from tensorflow import keras
import tensorflow as tf
def filter_detections(
boxes,
classification,
alphas=None,
ratios=None,
class_specific_filter=True,
nms=True,
score_threshold=0.01,
max_detections=100,
nms_threshold=0.5,
detect_quadrangle=False,
):
"""
Filter detections using the boxes and classification values.
Args
boxes: Tensor of shape (num_boxes, 4) containing the boxes in (x1, y1, x2, y2) format.
classification: Tensor of shape (num_boxes, num_classes) containing the classification scores.
other: List of tensors of shape (num_boxes, ...) to filter along with the boxes and classification scores.
class_specific_filter: Whether to perform filtering per class, or take the best scoring class and filter those.
nms: Flag to enable/disable non maximum suppression.
score_threshold: Threshold used to prefilter the boxes with.
max_detections: Maximum number of detections to keep.
nms_threshold: Threshold for the IoU value to determine when a box should be suppressed.
Returns
A list of [boxes, scores, labels, other[0], other[1], ...].
boxes is shaped (max_detections, 4) and contains the (x1, y1, x2, y2) of the non-suppressed boxes.
scores is shaped (max_detections,) and contains the scores of the predicted class.
labels is shaped (max_detections,) and contains the predicted label.
other[i] is shaped (max_detections, ...) and contains the filtered other[i] data.
In case there are less than max_detections detections, the tensors are padded with -1's.
"""
def _filter_detections(scores_, labels_):
# threshold based on score
# (num_score_keeps, 1)
indices_ = tf.where(keras.backend.greater(scores_, score_threshold))
if nms:
# (num_score_keeps, 4)
filtered_boxes = tf.gather_nd(boxes, indices_)
# In [4]: scores = np.array([0.1, 0.5, 0.4, 0.2, 0.7, 0.2])
# In [5]: tf.greater(scores, 0.4)
# Out[5]:
# In [6]: tf.where(tf.greater(scores, 0.4))
# Out[6]:
#
#
# In [7]: tf.gather(scores, tf.where(tf.greater(scores, 0.4)))
# Out[7]:
#
filtered_scores = keras.backend.gather(scores_, indices_)[:, 0]
# perform NMS
# filtered_boxes = tf.concat([filtered_boxes[..., 1:2], filtered_boxes[..., 0:1],
# filtered_boxes[..., 3:4], filtered_boxes[..., 2:3]], axis=-1)
nms_indices = tf.image.non_max_suppression(filtered_boxes, filtered_scores, max_output_size=max_detections,
iou_threshold=nms_threshold)
# filter indices based on NMS
# (num_score_nms_keeps, 1)
indices_ = keras.backend.gather(indices_, nms_indices)
# add indices to list of all indices
# (num_score_nms_keeps, )
labels_ = tf.gather_nd(labels_, indices_)
# (num_score_nms_keeps, 2)
indices_ = keras.backend.stack([indices_[:, 0], labels_], axis=1)
return indices_
if class_specific_filter:
all_indices = []
# perform per class filtering
for c in range(int(classification.shape[1])):
scores = classification[:, c]
labels = c * tf.ones((keras.backend.shape(scores)[0],), dtype='int64')
all_indices.append(_filter_detections(scores, labels))
# concatenate indices to single tensor
# (concatenated_num_score_nms_keeps, 2)
indices = keras.backend.concatenate(all_indices, axis=0)
else:
scores = keras.backend.max(classification, axis=1)
labels = keras.backend.argmax(classification, axis=1)
indices = _filter_detections(scores, labels)
# select top k
scores = tf.gather_nd(classification, indices)
labels = indices[:, 1]
scores, top_indices = tf.nn.top_k(scores, k=keras.backend.minimum(max_detections, keras.backend.shape(scores)[0]))
# filter input using the final set of indices
indices = keras.backend.gather(indices[:, 0], top_indices)
boxes = keras.backend.gather(boxes, indices)
labels = keras.backend.gather(labels, top_indices)
# zero pad the outputs
pad_size = keras.backend.maximum(0, max_detections - keras.backend.shape(scores)[0])
boxes = tf.pad(boxes, [[0, pad_size], [0, 0]], constant_values=-1)
scores = tf.pad(scores, [[0, pad_size]], constant_values=-1)
labels = tf.pad(labels, [[0, pad_size]], constant_values=-1)
labels = keras.backend.cast(labels, 'int32')
# set shapes, since we know what they are
boxes.set_shape([max_detections, 4])
scores.set_shape([max_detections])
labels.set_shape([max_detections])
if detect_quadrangle:
alphas = keras.backend.gather(alphas, indices)
ratios = keras.backend.gather(ratios, indices)
alphas = tf.pad(alphas, [[0, pad_size], [0, 0]], constant_values=-1)
ratios = tf.pad(ratios, [[0, pad_size]], constant_values=-1)
alphas.set_shape([max_detections, 4])
ratios.set_shape([max_detections])
return [boxes, scores, alphas, ratios, labels]
else:
return [boxes, scores, labels]
另一个CPU版本,支持正框
https://blog.csdn.net/m0_37605642/article/details/98358864