本次笔记要整理记录的内容是关于三个caffe模型的联合调用,分别是人脸检测模型、性别检测模型和年龄检测模型。基于这三个模型来实现实时的性别年龄检测效果。
这个效果实现的思路如下:
(1)加载预先下载好的三个caffe网络模型;
(2)打开摄像头加载视频流;
(3)对从摄像头中获取每一帧图像进行人脸检测;
(4)对检测到的人脸进行性别与年龄检测;
(5)解析检测结果;
(6)显示检测结果。
下面通过代码来逐步整理。
首先,我们需要加载这三个模型,并设置计算后台和目标设备,由于是对视频流进行实时处理,所以我们对运算速度有更快的要求,在这里使用openVINO作为计算后台,用来进行CPU加速。
//加载人脸检测模型
string face_model_path = "D:\\opencv_c++\\opencv_tutorial\\data\\models\\face_detector\\res10_300x300_ssd_iter_140000_fp16.caffemodel";
string face_config_path = "D:\\opencv_c++\\opencv_tutorial\\data\\models\\face_detector\\deploy.prototxt";
Net face_net = readNetFromCaffe(face_config_path, face_model_path);
face_net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
face_net.setPreferableTarget(DNN_TARGET_CPU);
//加载年龄预测模型
string age_model_path = "D:/opencv_c++/opencv_tutorial/data/models/cnn_age_gender_models/age_net.caffemodel";
string age_config_path = "D:/opencv_c++/opencv_tutorial/data/models/cnn_age_gender_models/age_deploy.prototxt";
Net age_net = readNetFromCaffe(age_config_path, age_model_path);
age_net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
age_net.setPreferableTarget(DNN_TARGET_CPU);
//加载性别预测模型
string gender_model_path = "D:\\opencv_c++\\opencv_tutorial\\data\\models\\cnn_age_gender_models\\gender_net.caffemodel";
string gender_config_path = "D:\\opencv_c++\\opencv_tutorial\\data\\models\\cnn_age_gender_models\\gender_deploy.prototxt";
Net gender_net = readNetFromCaffe(gender_config_path, gender_model_path);
gender_net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
gender_net.setPreferableTarget(DNN_TARGET_CPU);
然后我们需要分别定义性别检测模型和年龄检测模型各自的标签集,使得检测结果能够索引到正确的标签。这里的年龄检测模型将检测年龄段分为了8个区间,每次检测将会得到其中一个区间值,可能是因为太精确的年龄检测起来很困难,所以才采取了分区间的方法把。至于性别检测模型就不用多说了,分为“男”、“女”两类。
//定义年龄、性别标签
string age_labels[8] = { "0-2","4-6","8-12","15-20","25-32","38-43","48-53","60-100" };
string gender_labels[2] = { "Man","Woman" };
然后调用摄像头,并判断是否能够正常打开摄像头
//调用摄像头
VideoCapture capture;
capture.open(0,CAP_DSHOW);
if (!capture.isOpened())
{
cout << "can't open camera" << endl;
exit(-1);
}
当我们对某一帧图像进行检测的时候,首先需要通过人脸检测模型来定位到人脸区域,并且获取这个人脸区域。这里可以设置一个阈值,用来判断检测到的对象是人脸的置信度有多高。
而且由于后续对人脸进行性别年龄检测的时候,我们需要把人脸区域给截出来,所以在定位人脸的时候我们必须注意到人脸检测框不可以超出图像的范围,一旦截取的人脸检测框超出了图像范围就会导致访问越界,显示内存出错。
所以我们在进行人脸区域截取之前,必须先对检测框进行限制,避免它出现越界。
flip(frame, frame, 1);
Mat inputBlob = blobFromImage(frame, 1, Size(300, 300), Scalar(104, 117, 123), true, false);
face_net.setInput(inputBlob);
Mat face_prob = face_net.forward();
Mat face_detection = Mat(face_prob.size[2], face_prob.size[3], CV_32F, face_prob.ptr<float>());
float confidence_thresh = 0.6;
for (int row = 0;row < face_detection.rows;row++)
{
float confidence = face_detection.at<float>(row, 2);
if (confidence > confidence_thresh)
{
int top_left_x = int(face_detection.at<float>(row, 3) * frame.cols);
int top_left_y = int(face_detection.at<float>(row, 4) * frame.rows);
if (top_left_x >= frame.cols || top_left_y >= frame.rows)
{
continue; //如果检测到的人脸框超出画面则检测下一帧
}
int button_right_x = int(face_detection.at<float>(row, 5) * frame.cols);
int button_right_y = int(face_detection.at<float>(row, 6) * frame.rows);
int width = button_right_x - top_left_x;
int height = button_right_y - top_left_y;
Rect face_box;
face_box.x = max(0, top_left_x - 10);
face_box.y = max(0, top_left_y - 10);
width = min(width + 20, frame.cols - 1);
height = min(height + 20, frame.rows - 1);
int outsize[2] = { face_box.x + width - frame.cols, face_box.y + height - frame.rows }; //判断脸部矩形框是否超过图像尺寸
if (outsize[0] >= 0 || outsize[1] >= 0)
{
face_box.width = width - outsize[0];
face_box.height = height - outsize[1];
}
else
{
face_box.width = width;
face_box.height = height;
}
//标注人脸区域
rectangle(frame, face_box, Scalar(0, 255, 0), 1, 8);
到这里我们就可以定位到人脸区域了,接下来就是将得到的人脸区域分别进行性别、年龄检测。
我们先将人脸区域转换成blob,便于后续的操作
Mat face;
face = frame(face_box).clone();
Mat face_blob = blobFromImage(face, 1, Size(227, 227), Scalar(78.4263377603, 87.7689143744, 114.895847746), false, false);
然后进行年龄检测,这里通过年龄检测模型前向传播后得到的矩阵,是一个1x8x1的Mat对象,也就是单通道、8列、1行的CV_32F类型的Mat对象。其中每一列都代表每个年龄区间的置信度,我们只需要找到其中置信度最大的那一列,那么该列的列数(x坐标)就是对于年龄区间的类别ID。
age_net.setInput(face_blob);
Mat age_prob = age_net.forward();
double age_maxConfidence;
Point age_maxConfidence_pt;
minMaxLoc(age_prob, NULL, &age_maxConfidence, NULL, &age_maxConfidence_pt);
int age_classID = age_maxConfidence_pt.x;
string age = "age: "+age_labels[age_classID];
putText(frame, age, Point(face_box.x, face_box.y+30), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 0, 200), 1, 8);
age_prob.~Mat();
然后再进行性别检测,性别检测模型的前向传播结果和年龄检测模型的前向传播结果的结构是一样的,每一列都代表一个分类的置信度。所以性别检测模型的前向传播结果是一个1行2列的Mat对象,我们同样找到其中置信度最大的类别,就可以获得它的类别ID了。
gender_net.setInput(face_blob);
Mat gender_prob = gender_net.forward();
double gender_maxConfidence;
Point gender_maxConfidence_pt;
minMaxLoc(gender_prob, NULL, &gender_maxConfidence, NULL, &gender_maxConfidence_pt);
int gender_classID = gender_maxConfidence_pt.x;
string gender = "gender: " + gender_labels[gender_classID];
putText(frame, gender, Point(face_box.x, face_box.y-10), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 0, 200), 1, 8);
gender_prob.~Mat();
到这里我们就实现了对人物图像的性别年龄检测了,完整的代码演示如下:
//加载人脸检测模型
string face_model_path = "D:\\opencv_c++\\opencv_tutorial\\data\\models\\face_detector\\res10_300x300_ssd_iter_140000_fp16.caffemodel";
string face_config_path = "D:\\opencv_c++\\opencv_tutorial\\data\\models\\face_detector\\deploy.prototxt";
Net face_net = readNetFromCaffe(face_config_path, face_model_path);
//vector> ie = getAvailableBackends();
face_net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
face_net.setPreferableTarget(DNN_TARGET_CPU);
//加载年龄预测模型
string age_model_path = "D:/opencv_c++/opencv_tutorial/data/models/cnn_age_gender_models/age_net.caffemodel";
string age_config_path = "D:/opencv_c++/opencv_tutorial/data/models/cnn_age_gender_models/age_deploy.prototxt";
Net age_net = readNetFromCaffe(age_config_path, age_model_path);
age_net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
age_net.setPreferableTarget(DNN_TARGET_CPU);
//加载性别预测模型
string gender_model_path = "D:\\opencv_c++\\opencv_tutorial\\data\\models\\cnn_age_gender_models\\gender_net.caffemodel";
string gender_config_path = "D:\\opencv_c++\\opencv_tutorial\\data\\models\\cnn_age_gender_models\\gender_deploy.prototxt";
Net gender_net = readNetFromCaffe(gender_config_path, gender_model_path);
gender_net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
gender_net.setPreferableTarget(DNN_TARGET_CPU);
//定义年龄、性别标签
string age_labels[8] = { "0-2","4-6","8-12","15-20","25-32","38-43","48-53","60-100" };
string gender_labels[2] = { "Man","Woman" };
//调用摄像头
VideoCapture capture;
capture.open(0,CAP_DSHOW);
if (!capture.isOpened())
{
cout << "can't open camera" << endl;
exit(-1);
}
Mat frame;
//对每一帧图像进行预测
while (capture.read(frame))
{
flip(frame, frame, 1);
Mat inputBlob = blobFromImage(frame, 1, Size(300, 300), Scalar(104, 117, 123), true, false);
face_net.setInput(inputBlob);
Mat face_prob = face_net.forward();
Mat face_detection = Mat(face_prob.size[2], face_prob.size[3], CV_32F, face_prob.ptr<float>());
float confidence_thresh = 0.6;
for (int row = 0;row < face_detection.rows;row++)
{
float confidence = face_detection.at<float>(row, 2);
if (confidence > confidence_thresh)
{
int top_left_x = int(face_detection.at<float>(row, 3) * frame.cols);
int top_left_y = int(face_detection.at<float>(row, 4) * frame.rows);
if (top_left_x >= frame.cols || top_left_y >= frame.rows)
{
continue; //如果检测到的人脸框超出画面则检测下一帧
}
int button_right_x = int(face_detection.at<float>(row, 5) * frame.cols);
int button_right_y = int(face_detection.at<float>(row, 6) * frame.rows);
int width = button_right_x - top_left_x;
int height = button_right_y - top_left_y;
Rect face_box;
face_box.x = max(0, top_left_x - 10);
face_box.y = max(0, top_left_y - 10);
width = min(width + 20, frame.cols - 1);
height = min(height + 20, frame.rows - 1);
int outsize[2] = { face_box.x + width - frame.cols, face_box.y + height - frame.rows }; //判断脸部矩形框是否超过图像尺寸
if (outsize[0] >= 0 || outsize[1] >= 0)
{
face_box.width = width - outsize[0];
face_box.height = height - outsize[1];
}
else
{
face_box.width = width;
face_box.height = height;
}
//标注人脸区域
rectangle(frame, face_box, Scalar(0, 255, 0), 1, 8);
//对人脸区域进行年龄、性别预测
Mat face;
face = frame(face_box).clone();
Mat face_blob = blobFromImage(face, 1, Size(227, 227), Scalar(78.4263377603, 87.7689143744, 114.895847746), false, false);
age_net.setInput(face_blob);
Mat age_prob = age_net.forward();
double age_maxConfidence;
Point age_maxConfidence_pt;
minMaxLoc(age_prob, NULL, &age_maxConfidence, NULL, &age_maxConfidence_pt);
int age_classID = age_maxConfidence_pt.x;
string age = "age: "+age_labels[age_classID];
putText(frame, age, Point(face_box.x, face_box.y+30), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 0, 200), 1, 8);
age_prob.~Mat();
gender_net.setInput(face_blob);
Mat gender_prob = gender_net.forward();
double gender_maxConfidence;
Point gender_maxConfidence_pt;
minMaxLoc(gender_prob, NULL, &gender_maxConfidence, NULL, &gender_maxConfidence_pt);
int gender_classID = gender_maxConfidence_pt.x;
string gender = "gender: " + gender_labels[gender_classID];
putText(frame, gender, Point(face_box.x, face_box.y-10), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 0, 200), 1, 8);
gender_prob.~Mat();
face.~Mat();
face_blob.~Mat();
}
//若没有检测到人脸区域,则继续下一帧
else
{
continue;
}
}
imshow("result", frame);
char ch = cv::waitKey(1);
if (ch == 27)
{
break;
}
}
capture.release();
那么到这里就实现了在实时视频流中进行性别年龄检测的功能,我就不进行效果演示啦哈哈哈~
但从检测效果上来看,这个并没有特别的令人满意,尤其是角度变换的时候也可能导致不同的检测结果,可能这是因为所用的模型都是开源网络模型的缘故吧。。。和商用网络模型的正确率还是没得比的,不过当做日常娱乐还是可以的哈哈哈。好的那本次笔记也就到此结束啦~
PS:本人的注释比较杂,既有自己的心得体会也有网上查阅资料时摘抄下的知识内容,所以如有雷同,纯属我向前辈学习的致敬,如果有前辈觉得我的笔记内容侵犯了您的知识产权,请和我联系,我会将涉及到的博文内容删除,谢谢!