基于Opencv深度学习的目标检测器,图片从本地导入

* Write a real time object detection using deeplearning and opencv*
基于Opencv深度学习的目标检测器,图片从本地导入

# USAGE: In Pycharm Terminal, you can input follow:
# python RealTimeObjectDetection.py
# --prototxt MobileNetSSD_deploy.prtototxt.txt
# --model MobileNetSSD_deploy.caffemodel
# --image  .\DADA\ILSVRC2017_test_00005476.JPEG


# 1. 导入包
from imutils.video import VideoStream  # VideoStream用来读取本地摄像头视频流
from imutils.video import FPS  # VideoStream用来计算本地摄像头视频帧数
import numpy as np
import argparse
import imutils
import time
import cv2
import os

# 2. 构建参数解析器,解析参数
ap = argparse.ArgumentParser()  # ArgumentParser: The main entry point for command-line parsing
ap.add_argument("-c", "--confidence", type=float, default=0.2,
                help="minimum probability to filter weak detections")  # --confidence:过滤弱检测的最小概率阈值,默认值为 20%。

# the parse_args() method is invoked to convert the args at thecommand-line into an object with attributes.
args = vars(ap.parse_args())     # vars() 函数返回对象object的属性和属性值的字典对象。

# ADDDDDDD 我直接在这里加入参数运行
path = r"F:\python offer works\Project"
args["prototxt"] = os.path.join(path, 'MobileNetSSD_deploy.prototxt.txt')
args["model"] = os.path.join(path, 'MobileNetSSD_deploy.caffemodel')
# args["image"] = os.path.join(path, 'DATA\Car4\img\\0001.jpg')

# 3. 初始化类标签的列表,MobileNet SSD被训练,然后生成每一个类别的一系列边界框
CLASS = ["background", "aeroplane", "bicycle", "bird", "boat", " bottle",
         "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse",
         "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
COLORS = np.random.uniform(0, 255, size=(len(CLASS),3))   # numpy.random.uniform(low,high,size)均匀分布


# 4. 导入模型
print("[INFO] loading model....")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])

# ADDDDDDD 这里,我加入多帧以视频的形式检测
for r, pt, dirs  in os.walk(os.path.join(path, 'DATA\Basketball\img')):
    for dr in dirs:
        args["image"] = os.path.join(r, dr)


        # 5. 加载图像。准备blob
        image = cv2.imread(args["image"])
        (h, w) = image.shape[:2]
        blob = cv2.dnn.blobFromImage(cv2.resize(image, (300,300)), 0.007843, (300,300), 127.5)
        '''
        Blob是一个四维的数组,用于存储数据,包括输入数据、输出数据、权值等;
        Blob是Caffe中处理和传递实际数据的数据封装包,并且在CPU与GPU之间具有同步处理能力。
        从数学意义上说,blob是按C风格连续存储的N维数组。
        '''
        # 6. 输入blob到网络中,获取检测结果
        net.setInput(blob)
        detections = net.forward()  # shape:(1,1,3,7), 第三维是框的个数。第四维是每一个框的类别(1)、置信度(2),坐标(3:7)

        # 7. 对检测的目标查看置信度,绘制边界框和显示标签
        for i in np.arange(0, detections.shape[2]):
            confidence = detections[0, 0, i, 2]

            # 一个图像检测了多少目标
            if confidence > args["confidence"]:
                # 提取类标签的索引
                idx = int(detections[0, 0, i ,1])
                # 计算边界框的x,y坐标
                box = detections[0, 0, i, 3:7]*np.array([w,h,w,h])   # detections[0, 0, i, 3:7]在0到1之间
                (startX, startY, endX, endY) = box.astype("int")

                # 将预测结果在帧上显示
                label = "{}:{:.2f}%".format(CLASS[idx],
                                            confidence*100)
                cv2.rectangle(image, (startX, startY), (endX, endY), COLORS[idx], 2)
                y = startY-15 if startY-15>15 else startY+15
                cv2.putText(image, label, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)

        # 8. 显示帧,更新fps计数器
        cv2.imshow("Frame", image)
        key = cv2.waitKey(25)
        if key == ord('q'):
            cv2.destroyAllWindows()
            break

你可能感兴趣的:(深度学习-检测与跟踪)