重要参考链接:
使用VS2015新建 空项目。
新建源文件 main.cpp
,内容如下:
#include
#include
#include
#include
#include
#include
using namespace std;
using namespace cv;
const char* classNames[]= {"background", "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "background", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "background", "backpack",
"umbrella", "background", "background", "handbag", "tie", "suitcase", "frisbee","skis", "snowboard", "sports ball", "kite", "baseball bat","baseball glove", "skateboard", "surfboard", "tennis racket",
"bottle", "background", "wine glass", "cup", "fork", "knife", "spoon","bowl", "banana", "apple", "sandwich", "orange","broccoli", "carrot", "hot dog", "pizza", "donut",
"cake", "chair", "couch", "potted plant", "bed", "background", "dining table", "background", "background", "toilet", "background","tv", "laptop", "mouse", "remote", "keyboard",
"cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "background","book", "clock", "vase", "scissors","teddy bear", "hair drier", "toothbrush"};
int main()
{
String weights = "models/frozen_inference_graph.pb";
String prototxt = "models/ssd_mobilenet_v1_coco.pbtxt";
const size_t width = 300;
const size_t height = 300;
VideoCapture capture;
capture.open(0);
namedWindow("input", CV_WINDOW_AUTOSIZE);
int w = capture.get(CAP_PROP_FRAME_WIDTH);
int h = capture.get(CAP_PROP_FRAME_HEIGHT);
printf("frame width : %d, frame height : %d", w, h);
// set up net
dnn::Net net = cv::dnn::readNetFromTensorflow(weights, prototxt);
Mat frame;
/*
while (1) // 模式1:测试单张图像
{
frame = imread("models/car.jpg");
imshow("input", frame);
*/
while (capture.read(frame)) // 模式2:调用摄像头
{
//预测
cv::Mat inputblob = cv::dnn::blobFromImage(frame, 1. / 255, Size(width, height));
net.setInput(inputblob);
Mat output = net.forward();
//检测
Mat detectionMat(output.size[2], output.size[3], CV_32F, output.ptr<float>());
float confidence_threshold = 0.5;
for (int i = 0; i < detectionMat.rows; i++) {
float confidence = detectionMat.at<float>(i, 2);
if (confidence > confidence_threshold) {
size_t objIndex = (size_t)(detectionMat.at<float>(i, 1));
float tl_x = detectionMat.at<float>(i, 3) * frame.cols;
float tl_y = detectionMat.at<float>(i, 4) * frame.rows;
float br_x = detectionMat.at<float>(i, 5) * frame.cols;
float br_y = detectionMat.at<float>(i, 6) * frame.rows;
Rect object_box((int)tl_x, (int)tl_y, (int)(br_x - tl_x), (int)(br_y - tl_y));
rectangle(frame, object_box, Scalar(0, 255, 0), 2, 8, 0);
putText(frame, format("%s", classNames[objIndex]), Point(tl_x, tl_y), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 255, 0), 2);
}
}
imshow("ssd-video-demo", frame);
char c = waitKey(5);
if (c == 27)
{ // ESC退出
break;
}
}
capture.release();
waitKey(0);
return 0;
}
在该项目下新建models
文件夹,放入测试图像和模型。
重要参考链接:
使用VS2015新建 空项目。
新建源文件 main.cpp
,内容如下:
//use opencv_dnn module for image classification by using GoogLeNet trained network
#include
#include
#include
using namespace cv;
using namespace cv::dnn;
using namespace std;
String modelTxt = "models/bvlc_googlenet.prototxt";
String modelBin = "models/bvlc_googlenet.caffemodel";
String labelFile = "models/synset_words.txt";
vector<String> readClasslabels();
int main(int argc, char** argv) {
Mat testImage = imread("models/car.jpg");
if (testImage.empty()) {
printf("could not load image...\n");
return -1;
}
// create googlenet with caffemodel text and bin
Net net = dnn::readNetFromCaffe(modelTxt, modelBin);
if (net.empty())
{
std::cerr << "Can't load network by using the following files: " << std::endl;
std::cerr << "prototxt: " << modelTxt << std::endl;
std::cerr << "caffemodel: " << modelBin << std::endl;
return -1;
}
// 读取分类数据
vector<String> labels = readClasslabels();
//GoogLeNet accepts only 224x224 RGB-images
Mat inputBlob = blobFromImage(testImage, 1, Size(224, 224), Scalar(104, 117, 123));//mean: Scalar(104, 117, 123)
// 支持1000个图像分类检测
Mat prob;
// 循环10+
for (int i = 0; i < 10; i++)
{
// 输入
net.setInput(inputBlob, "data");
// 分类预测
prob = net.forward("prob");
}
// 读取分类索引,最大与最小值
Mat probMat = prob.reshape(1, 1); //reshape the blob to 1x1000 matrix // 1000个分类
Point classNumber;
double classProb;
minMaxLoc(probMat, NULL, &classProb, NULL, &classNumber); // 可能性最大的一个
int classIdx = classNumber.x; // 分类索引号
printf("\n current image classification : %s, possible : %.2f \n", labels.at(classIdx).c_str(), classProb);
putText(testImage, labels.at(classIdx), Point(20, 20), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 0, 255), 2, 8);
imshow("Image Category", testImage);
waitKey(0);
return 0;
}
/* 读取图像的1000个分类标记文本数据 */
vector<String> readClasslabels() {
std::vector<String> classNames;
std::ifstream fp(labelFile);
if (!fp.is_open())
{
std::cerr << "File with classes labels not found: " << labelFile << std::endl;
exit(-1);
}
std::string name;
while (!fp.eof())
{
std::getline(fp, name);
if (name.length())
classNames.push_back(name.substr(name.find(' ') + 1));
}
fp.close();
return classNames;
}
下载模型文件
caffemodel文件:http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel
bvlc_googlenet.prototxt文件:https://github.com/opencv/opencv_extra/blob/master/testdata/dnn/bvlc_googlenet.prototxt
synset_words.txt: http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz 中的 synset_words.txt。
运行程序main.cpp
输出结果:
重要参考链接:
使用VS2015新建 空项目。
新建源文件 main.cpp
,内容如下:
#include
#include
#include
using namespace cv;
using namespace cv::dnn;
using namespace std;
const size_t width = 300;
const size_t height = 300;
const float meanVal = 127.5;
const float scaleFactor = 0.007843f;
const char* classNames[] = { "background",
"aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair",
"cow", "diningtable", "dog", "horse",
"motorbike", "person", "pottedplant",
"sheep", "sofa", "train", "tvmonitor" };
String modelFile = "models/MobileNetSSD_deploy.caffemodel";
String model_text_file = "models/MobileNetSSD_deploy.prototxt";
int main()
{
VideoCapture capture;
capture.open(0);
namedWindow("input", CV_WINDOW_AUTOSIZE);
int w = capture.get(CAP_PROP_FRAME_WIDTH);
int h = capture.get(CAP_PROP_FRAME_HEIGHT);
printf("frame width : %d, frame height : %d", w, h);
// set up net
Net net = readNetFromCaffe(model_text_file, modelFile);
Mat frame;
while (capture.read(frame)) //注意:这里提供了两种模式,调用摄像头的时候把该句取消注释即可
while (1)
{
frame = imread("models/car.jpg");
imshow("input", frame);
//while (capture.read(frame))
//{
//预测
Mat inputblob = blobFromImage(frame, scaleFactor, Size(width, height), meanVal, false);
net.setInput(inputblob, "data");
Mat detection = net.forward("detection_out");
//检测
Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
float confidence_threshold = 0.3;
for (int i = 0; i < detectionMat.rows; i++) {
float confidence = detectionMat.at<float>(i, 2);
if (confidence > confidence_threshold) {
size_t objIndex = (size_t)(detectionMat.at<float>(i, 1));
float tl_x = detectionMat.at<float>(i, 3) * frame.cols;
float tl_y = detectionMat.at<float>(i, 4) * frame.rows;
float br_x = detectionMat.at<float>(i, 5) * frame.cols;
float br_y = detectionMat.at<float>(i, 6) * frame.rows;
Rect object_box((int)tl_x, (int)tl_y, (int)(br_x - tl_x), (int)(br_y - tl_y));
rectangle(frame, object_box, Scalar(0, 0, 255), 2, 8, 0);
putText(frame, format("%s", classNames[objIndex]), Point(tl_x, tl_y), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(255, 0, 0), 2);
}
}
imshow("ssd-video-demo", frame);
char c = waitKey(5);
if (c == 27)
{ // ESC退出
break;
}
}
capture.release();
waitKey(0);
return 0;
}
caffemodel文件:https://drive.google.com/file/d/0B3gersZ2cHIxRm5PMWRoTkdHdHc/view?resourcekey=0-1Lpfs4EvGDeCQz12AF64hQ
MobileNetSSD_deploy.prototxt文件:
https://raw.githubusercontent.com/chuanqi305/MobileNet-SSD/daef68a6c2f5fbb8c88404266aa28180646d17e0/MobileNetSSD_deploy.prototxt
运行main.cpp
,测试结果
重要参考链接:
使用VS2015新建 空项目。
新建源文件 main.cpp
,内容如下:
//use opencv_dnn module for Segmentation
#include
#include
#include
#include
#include
#include
using namespace cv;
using namespace dnn;
std::vector<std::string> classes;
std::vector<Vec3b> colors;
void showLegend();
void colorizeSegmentation(const Mat &score, Mat &segm);
int main(int argc, char** argv) try {
// VGG - based FCN(semantical segmentation network)
// ENet(lightweight semantical segmentation network)
// 根据选择的检测模型文件进行配置
float confThreshold, nmsThreshold, scale;
cv::Scalar mean;
bool swapRB;
int inpWidth, inpHeight;
String modelPath, configPath, classesFile;
int modelType = 0; // 0-fcn 1-enet
if (modelType == 0){
confThreshold = 0.5;
nmsThreshold = 0.4;
scale = 1.0;
mean = Scalar{ 0,0,0 };
swapRB = false;
inpWidth = 500;
inpHeight = 500;
modelPath = "models/fcn8s-heavy-pascal.caffemodel";
configPath = "models/fcn8s-heavy-pascal.prototxt";
classesFile = "models/object_detection_classes_pascal_voc.txt";
}
else if (modelType == 1){
confThreshold = 0.5;
nmsThreshold = 0.4;
scale = 0.00392;
mean = Scalar{ 0,0,0 };
swapRB = false;
inpWidth = 512;
inpHeight = 256;
modelPath = "models/Enet-model-best.net";
configPath = "";
classesFile = "models/enet-classes.txt";
}
String colorFile = "";
String framework = "";
int backendId = cv::dnn::DNN_BACKEND_OPENCV;
int targetId = cv::dnn::DNN_TARGET_CPU;
// Open file with classes names.
if (!classesFile.empty()) {
const std::string file = classesFile;
std::ifstream ifs(file.c_str());
if (!ifs.is_open())
CV_Error(Error::StsError, "File " + file + " not found");
std::string line;
if (modelType == 0)
classes.push_back("background"); //使用的是object_detection_classes,需要增加背景; enet不需要,注释该行
while (std::getline(ifs, line)) {
classes.push_back(line);
}
}
if (!colorFile.empty()) {
const std::string file = colorFile;
std::ifstream ifs(file.c_str());
if (!ifs.is_open())
CV_Error(Error::StsError, "File " + file + " not found");
std::string line;
while (std::getline(ifs, line)) {
std::istringstream colorStr(line.c_str());
Vec3b color;
for (int i = 0; i < 3 && !colorStr.eof(); ++i)
colorStr >> color[i];
colors.push_back(color);
}
}
CV_Assert(!modelPath.empty());
//! [Read and initialize network]
Net net = readNet(modelPath, configPath, framework);
net.setPreferableBackend(backendId);
net.setPreferableTarget(targetId);
//! [Read and initialize network]
// Create a window
static const std::string kWinName = "Deep learning semantic segmentation in OpenCV";
namedWindow(kWinName, WINDOW_AUTOSIZE);
//! [Open a video file or an image file or a camera stream]
VideoCapture cap;
//cap.open("../../data/image/person.jpg"); // pascal voc
cap.open("models/car.jpg"); // enet Cityscapes
if (!cap.isOpened()) {
std::cout << "VideoCapture open failed." << std::endl;
return 0;
}
//! [Open a video file or an image file or a camera stream]
// Process frames.
Mat frame, blob;
while (waitKey(1) < 0) {
cap >> frame;
if (frame.empty()) {
waitKey();
break;
}
//! [Create a 4D blob from a frame]
blobFromImage(frame, blob, scale, Size(inpWidth, inpHeight), mean, swapRB, false);
//! [Create a 4D blob from a frame]
//! [Set input blob]
net.setInput(blob);
//! [Set input blob]
//! [Make forward pass]
Mat score = net.forward();
//! [Make forward pass]
Mat segm;
colorizeSegmentation(score, segm);
resize(segm, segm, frame.size(), 0, 0, INTER_NEAREST);
addWeighted(frame, 0.5, segm, 0.5, 0.0, frame);
// Put efficiency information.
std::vector<double> layersTimes;
double freq = getTickFrequency() / 1000;
double t = net.getPerfProfile(layersTimes) / freq;
std::string label = format("Inference time: %.2f ms", t);
putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
imshow(kWinName, frame);
if (!classes.empty())
showLegend();
}
return 0;
}
catch (std::exception & e) {
std::cerr << e.what() << std::endl;
}
void colorizeSegmentation(const Mat &score, Mat &segm)
{
const int rows = score.size[2];
const int cols = score.size[3];
const int chns = score.size[1];
if (colors.empty()) {
// Generate colors.
colors.push_back(Vec3b());
for (int i = 1; i < chns; ++i) {
Vec3b color;
for (int j = 0; j < 3; ++j)
color[j] = (colors[i - 1][j] + rand() % 256) / 2;
colors.push_back(color);
}
}
else if (chns != (int)colors.size()) {
CV_Error(Error::StsError, format("Number of output classes does not match "
"number of colors (%d != %zu)", chns, colors.size()));
}
Mat maxCl = Mat::zeros(rows, cols, CV_8UC1);
Mat maxVal(rows, cols, CV_32FC1, score.data);
for (int ch = 1; ch < chns; ch++) {
for (int row = 0; row < rows; row++) {
const float *ptrScore = score.ptr<float>(0, ch, row);
uint8_t *ptrMaxCl = maxCl.ptr<uint8_t>(row);
float *ptrMaxVal = maxVal.ptr<float>(row);
for (int col = 0; col < cols; col++) {
if (ptrScore[col] > ptrMaxVal[col]) {
ptrMaxVal[col] = ptrScore[col];
ptrMaxCl[col] = (uchar)ch;
}
}
}
}
segm.create(rows, cols, CV_8UC3);
for (int row = 0; row < rows; row++) {
const uchar *ptrMaxCl = maxCl.ptr<uchar>(row);
Vec3b *ptrSegm = segm.ptr<Vec3b>(row);
for (int col = 0; col < cols; col++) {
ptrSegm[col] = colors[ptrMaxCl[col]];
}
}
}
void showLegend()
{
static const int kBlockHeight = 30;
static Mat legend;
if (legend.empty()) {
const int numClasses = (int)classes.size();
if ((int)colors.size() != numClasses) {
CV_Error(Error::StsError, format("Number of output classes does not match "
"number of labels (%zu != %zu)", colors.size(), classes.size()));
}
legend.create(kBlockHeight * numClasses, 200, CV_8UC3);
for (int i = 0; i < numClasses; i++) {
Mat block = legend.rowRange(i * kBlockHeight, (i + 1) * kBlockHeight);
block.setTo(colors[i]);
putText(block, classes[i], Point(0, kBlockHeight / 2), FONT_HERSHEY_SIMPLEX, 0.5, Vec3b(255, 255, 255));
}
namedWindow("Legend", WINDOW_AUTOSIZE);
imshow("Legend", legend);
}
}
模型文件下载地址,caffemodel和prototxt的名字必须对应:
caffemodel:http://dl.caffe.berkeleyvision.org/fcn8s-heavy-pascal.caffemodel
prototxt文件:https://github.com/opencv/opencv_extra/blob/master/testdata/dnn/fcn8s-heavy-pascal.prototxt
类别文件:https://github.com/opencv/opencv/blob/master/samples/data/dnn/object_detection_classes_pascal_voc.txt
运行程序main.cpp
,测试结果为
上述模型基本可以从下面两个链接中下载