在图像中找到目标的位置并判定其类别。
权重等文件的准备:Yolov3权重文件和网络配置文件,coco数据集所有类别的名称。
网盘链接:https://pan.baidu.com/s/1cZUwpvDJUhchxIRGQBMZUQ
提取码:0fch
import numpy as np
import cv2
coco_name_txt = open('./dnn/coco_name.txt', 'r').readlines()
coco_category_names = {
i:name.strip() for i, name in enumerate(coco_name_txt)}
print(coco_category_names)
image = cv2.imread('./dnn/dog.jpg')
img_height, img_width = image.shape[:2]
#图片归一化,大小resize至(416, 416),色彩空间转换到RGB
blob_img = cv2.dnn.blobFromImage(image, 1.0/255.0, (416, 416), 0, True, False)
#读取yolov3配置文件和权重文件
net = cv2.dnn.readNetFromDarknet('./dnn/yolov3.cfg', './dnn/yolov3.weights')
#设定前向计算的硬件平台为CPU,也可采用GUP加速计算
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
#放入图片进行前向计算并获得计算结果
net.setInput(blob_img)
outNames = net.getUnconnectedOutLayersNames()
outs = net.forward(outNames)
#从网络结果解码获得真实的目标框和目标类别及其置信度
ConThreshold = 0.5 #置信度阈值
NMSThreshold = 0.5 #进行非极大抑制两个框的重合度阈值
all_boxes = []
all_confidences = []
all_categories = []
for out in outs:
box, confidence, category = np.split(out, [4, 5], axis=-1)
for i in range(len(confidence)):
if confidence[i] > ConThreshold and np.argmax(category[i]) > ConThreshold:
#中心点加宽高
x_center, y_center, w, h = box[i]
x_center = x_center*img_width
y_center = y_center*img_height
w = w*img_width
h = h*img_height
#将框的表达形式转换为左上右下的形式,需转换成int类型
xmin = int(x_center - w/2)
ymin = int(y_center - h/2)
xmax = int(x_center + w/2)
ymax = int(y_center + h/2)
all_boxes.append([xmin, ymin, xmax, ymax])
all_categories.append(np.argmax(category[i]))
all_confidences.append(float(np.max(category[i]))) #需转换为float类型
提取出置信度大于阈值的所有框之后并绘制。
colors = np.random.randint(0, 256, (80, 3)).tolist() #每个类别的颜色
for i in range(len(all_boxes)):
category_name = coco_category_names[all_categories[i]]
confidence = all_confidences[i]
xmin, ymin, xmax, ymax = all_boxes[i]
color = colors[all_categories[i]]
scalar = (color[0], color[1], color[2])
cv2.rectangle(image, (xmin, ymin), (xmax, ymax), scalar, 2)
text = category_name + str(round(confidence, 2))
cv2.putText(image, text, (xmin, ymin), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255, 255, 0), 1)
cv2.namedWindow('result', cv2.WINDOW_AUTOSIZE)
cv2.imshow('result', image)
cv2.waitKey(0)
cv2.destroyAllWindows()
根据2.3得出的结果可看出每个目标有多个框,非极大抑制的作用就是找出置信度最高的框,当两个框的重合度大于阈值时则认为这两个框内的目标属于同一目标,选择置信度最高的那个作为最终结果。
使用Opencv的非极大抑制算法时,要注意boxes必须是整型的列表且为左上右下的表达形式, score必须是float的列表
indices = cv2.dnn.NMSBoxes(all_boxes, all_confidences, ConThreshold, NMSThreshold)
image = cv2.imread('./dnn/dog.jpg')
colors = np.random.randint(0, 256, (80, 3)).tolist() #每个类别的颜色
for i in np.squeeze(indices):
category_name = coco_category_names[all_categories[i]]
confidence = all_confidences[i]
xmin, ymin, xmax, ymax = all_boxes[i]
color = colors[all_categories[i]]
scalar = (color[0], color[1], color[2])
cv2.rectangle(image, (xmin, ymin), (xmax, ymax), scalar, 2)
text = category_name + str(round(confidence, 2))
cv2.putText(image, text, (xmin, ymin), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255, 255, 0), 1)
cv2.namedWindow('result', cv2.WINDOW_AUTOSIZE)
cv2.imshow('result', image)
cv2.waitKey(0)
cv2.destroyAllWindows()