squeezenet_v1.1.prototxt
官方标题为 SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <1MB model size。
输入data层的blob shape为[1,3,227,227],输出prob的shape为[1,1000]。
c++代码和python代码分别如下
#include
#include
#include
#include
#include
using namespace cv;
using namespace dnn;
std::vector<std::string> classes;
int main(int argc, char** argv)
{
float scale{1.0};
Scalar mean{ 0, 0, 0 };
bool swapRB{false};
int inpWidth = 227;
int inpHeight = 227;
String model = "../../data/testdata/dnn/squeezenet_v1.1.prototxt";
String config = "../../data/testdata/dnn/squeezenet_v1.1.caffemodel";
String framework = "";
int backendId = cv::dnn::DNN_BACKEND_OPENCV;
int targetId = cv::dnn::DNN_TARGET_OPENCL;
String classesFile = "../../data/dnn/classification_classes_ILSVRC2012.txt";
// Open file with classes names.
if (!classesFile.empty()) {
const std::string& file = classesFile;
std::ifstream ifs(file.c_str());
if (!ifs.is_open())
CV_Error(Error::StsError, "File " + file + " not found");
std::string line;
while (std::getline(ifs, line)) {
classes.push_back(line);
}
}
CV_Assert(!model.empty());
//! [Read and initialize network]
Net net = readNet(model, config, framework);
//Net net = readNetFromCaffe(model, config); //明确框架类型
net.setPreferableBackend(backendId);
net.setPreferableTarget(targetId);
//! [Read and initialize network]
// Create a window
static const std::string kWinName = "Deep learning image classification in OpenCV";
namedWindow(kWinName, WINDOW_NORMAL);
//! [Open a video file or an image file or a camera stream]
VideoCapture cap;
cap.open(0);
//! [Open a video file or an image file or a camera stream]
// Process frames.
Mat frame, blob;
while (waitKey(1) < 0) {
cap >> frame;
if (frame.empty()) {
waitKey();
break;
}
//! [Create a 4D blob from a frame]
blobFromImage(frame, blob, scale, Size(inpWidth, inpHeight), mean, swapRB, false);
//! [Create a 4D blob from a frame]
//! [Set input blob]
net.setInput(blob);
//! [Set input blob]
//! [Make forward pass]
Mat prob = net.forward();
//! [Make forward pass]
//! [Get a class with a highest score]
Point classIdPoint;
double confidence;
minMaxLoc(prob.reshape(1, 1), 0, &confidence, 0, &classIdPoint);
int classId = classIdPoint.x;
//! [Get a class with a highest score]
// Put efficiency information.
std::vector<double> layersTimes;
double freq = getTickFrequency() / 1000;
double t = net.getPerfProfile(layersTimes) / freq;
std::string label = format("Inference time: %.2f ms", t);
putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0),2);
// Print predicted class.
label = format("%s: %.4f", (classes.empty() ? format("Class #%d", classId).c_str() :
classes[classId].c_str()),
confidence);
putText(frame, label, Point(0, 40), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0),2);
imshow(kWinName, frame);
}
return 0;
}
import cv2 as cv
import numpy as np
from common import *
#backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV)
#targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD)
class MyArgs:
pass
args = MyArgs()
args.model = "../data/testdata/dnn/squeezenet_v1.1.prototxt"
args.config = "../data/testdata/dnn/squeezenet_v1.1.caffemodel"
args.classes = "../data/dnn/classification_classes_ILSVRC2012.txt"
args.backend = cv.dnn.DNN_BACKEND_DEFAULT
args.target = cv.dnn.DNN_TARGET_OPENCL
args.scale = 1.0
args.mean = (0,0,0)
args.rgb = False
args.input = None
# Load names of classes
classes = None
if args.classes:
with open(args.classes, 'rt') as f:
classes = f.read().rstrip('\n').split('\n')
# Load a network
#net = cv.dnn.readNet(args.model, args.config, args.framework)
net = cv.dnn.readNetFromCaffe(args.model, args.config)
net.setPreferableBackend(args.backend)
net.setPreferableTarget(args.target)
winName = 'Deep learning image classification in OpenCV'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
cap = cv.VideoCapture(args.input if args.input else 0)
while cv.waitKey(1) < 0:
hasFrame, frame = cap.read()
if not hasFrame:
cv.waitKey(1)
break
# Create a 4D blob from a frame.
inpWidth = frame.shape[1]
inpHeight = frame.shape[0]
blob = cv.dnn.blobFromImage(frame, args.scale, (inpWidth, inpHeight), args.mean, args.rgb, crop=False)
# Run a model
net.setInput(blob)
out = net.forward()
# Get a class with a highest score.
out = out.flatten()
#classId = np.argmax(out)
#confidence = out[classId]
_,confidence,_,classId = cv.minMaxLoc(out)
classId = classId[1]
# Put efficiency information.
t, _ = net.getPerfProfile()
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
# Print predicted class.
label = '%s: %.4f' % (classes[classId] if classes else 'Class #%d' % classId, confidence)
cv.putText(frame, label, (0, 40), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
cv.imshow(winName, frame)
分类代码流程:
使用摄像头随机读取进行测试,机器配置i7-7700k,GTX 1080ti。
target使用cv::dnn::DNN_TARGET_OPENCL比cv::dnn::DNN_TARGET_CPU快5倍以上。
想尝试DNN模块使用cuda加速,参考博客opencv dnn模块 示例(15) opencv4.2版本dnn支持cuda加速。