opencv dnn模块 示例(1) 图像分类 classification

1、网络结构

squeezenet_v1.1.prototxt
官方标题为 SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <1MB model size。
输入data层的blob shape为[1,3,227,227],输出prob的shape为[1,1000]。
opencv dnn模块 示例(1) 图像分类 classification_第1张图片

2、示例代码

c++代码和python代码分别如下

(1) c++代码如下

#include 
#include 

#include 
#include 
#include 

using namespace cv;
using namespace dnn;

std::vector<std::string> classes;

int main(int argc, char** argv)
{
	float scale{1.0};
	Scalar mean{ 0, 0, 0 };
	bool swapRB{false};
	int inpWidth = 227;
	int inpHeight = 227;
	String model = "../../data/testdata/dnn/squeezenet_v1.1.prototxt";
	String config = "../../data/testdata/dnn/squeezenet_v1.1.caffemodel";
	String framework = "";
	int backendId = cv::dnn::DNN_BACKEND_OPENCV;  
	int targetId = cv::dnn::DNN_TARGET_OPENCL;		

	String classesFile = "../../data/dnn/classification_classes_ILSVRC2012.txt";

	// Open file with classes names.
	if (!classesFile.empty()) {
		const std::string& file = classesFile;
		std::ifstream ifs(file.c_str());
		if (!ifs.is_open())
			CV_Error(Error::StsError, "File " + file + " not found");
		std::string line;
		while (std::getline(ifs, line)) {
			classes.push_back(line);
		}
	}

	CV_Assert(!model.empty());

	//! [Read and initialize network]
	Net net = readNet(model, config, framework);
	//Net net = readNetFromCaffe(model, config); //明确框架类型
	net.setPreferableBackend(backendId);
	net.setPreferableTarget(targetId);
	//! [Read and initialize network]

	// Create a window
	static const std::string kWinName = "Deep learning image classification in OpenCV";
	namedWindow(kWinName, WINDOW_NORMAL);

	//! [Open a video file or an image file or a camera stream]
	VideoCapture cap;
	cap.open(0);
	//! [Open a video file or an image file or a camera stream]

	// Process frames.
	Mat frame, blob;
	while (waitKey(1) < 0) {
		cap >> frame;
		if (frame.empty()) {
			waitKey();
			break;
		}

		//! [Create a 4D blob from a frame]
		blobFromImage(frame, blob, scale, Size(inpWidth, inpHeight), mean, swapRB, false);
		//! [Create a 4D blob from a frame]

		//! [Set input blob]
		net.setInput(blob);
		//! [Set input blob]
		//! [Make forward pass]
		Mat prob = net.forward();
		//! [Make forward pass]

		//! [Get a class with a highest score]
		Point classIdPoint;
		double confidence;
		minMaxLoc(prob.reshape(1, 1), 0, &confidence, 0, &classIdPoint);
		int classId = classIdPoint.x;
		//! [Get a class with a highest score]

		// Put efficiency information.
		std::vector<double> layersTimes;
		double freq = getTickFrequency() / 1000;
		double t = net.getPerfProfile(layersTimes) / freq;
		std::string label = format("Inference time: %.2f ms", t);
		putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0),2);

		// Print predicted class.
		label = format("%s: %.4f", (classes.empty() ? format("Class #%d", classId).c_str() :
			classes[classId].c_str()),
			confidence);
		putText(frame, label, Point(0, 40), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0),2);

		imshow(kWinName, frame);
	}
	return 0;
}

(2) python代码如下

import cv2 as cv
import numpy as np

from common import *

#backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV)
#targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD)

class MyArgs:
    pass

args = MyArgs()

args.model = "../data/testdata/dnn/squeezenet_v1.1.prototxt"
args.config = "../data/testdata/dnn/squeezenet_v1.1.caffemodel"
args.classes = "../data/dnn/classification_classes_ILSVRC2012.txt"
args.backend = cv.dnn.DNN_BACKEND_DEFAULT
args.target = cv.dnn.DNN_TARGET_OPENCL

args.scale = 1.0
args.mean = (0,0,0)
args.rgb = False
args.input = None

# Load names of classes
classes = None
if args.classes:
    with open(args.classes, 'rt') as f:
        classes = f.read().rstrip('\n').split('\n')

# Load a network
#net = cv.dnn.readNet(args.model, args.config, args.framework)
net = cv.dnn.readNetFromCaffe(args.model, args.config)
net.setPreferableBackend(args.backend)
net.setPreferableTarget(args.target)

winName = 'Deep learning image classification in OpenCV'
cv.namedWindow(winName, cv.WINDOW_NORMAL)

cap = cv.VideoCapture(args.input if args.input else 0)

while cv.waitKey(1) < 0:
    hasFrame, frame = cap.read()
    if not hasFrame:
        cv.waitKey(1)
        break

    # Create a 4D blob from a frame.
    inpWidth =  frame.shape[1]
    inpHeight = frame.shape[0]
    blob = cv.dnn.blobFromImage(frame, args.scale, (inpWidth, inpHeight), args.mean, args.rgb, crop=False)

    # Run a model
    net.setInput(blob)
    out = net.forward()

    # Get a class with a highest score.
    out = out.flatten()

    #classId = np.argmax(out)
    #confidence = out[classId]
    _,confidence,_,classId = cv.minMaxLoc(out)
    classId = classId[1]

    # Put efficiency information.
    t, _ = net.getPerfProfile()
    label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
    cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))

    # Print predicted class.
    label = '%s: %.4f' % (classes[classId] if classes else 'Class #%d' % classId, confidence)
    cv.putText(frame, label, (0, 40), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))

    cv.imshow(winName, frame)

分类代码流程:

  1. 读取网络结构、模型文件创建Net对象
  2. 使用blobFromImage将img转换blob
  3. 将blob输入到Net
  4. 执行Net的forward进行inference
  5. 读取Net输出根据分数拿到类别

3、演示

使用摄像头随机读取进行测试,机器配置i7-7700k,GTX 1080ti。
target使用cv::dnn::DNN_TARGET_OPENCL比cv::dnn::DNN_TARGET_CPU快5倍以上。
opencv dnn模块 示例(1) 图像分类 classification_第2张图片
opencv dnn模块 示例(1) 图像分类 classification_第3张图片
opencv dnn模块 示例(1) 图像分类 classification_第4张图片


想尝试DNN模块使用cuda加速,参考博客opencv dnn模块 示例(15) opencv4.2版本dnn支持cuda加速。

你可能感兴趣的:(深度神经网络,OpenCV,opencv实例源码演示)