人类具有一种视觉注意机制,即当面对一个场景时,会选择性地忽略不感兴趣的区域,聚焦于感兴趣的区域。这些感兴趣的区域称为显著性区域。视觉显著性检测(Visual Saliency Detection,VSD)则是一种模拟人类视觉并从图像中提取显著性区域的智能算法。如下面左边的图所示,人眼在观看该图片时会首先注意其中的小狗,自动忽略背景区域,小狗所在区域就是显著性区域。通过计算机视觉算法对左边的图像进行视觉显著性检测能够得到下图右边的结果,其中黑色区域为不显著区域,白色为显著区域,显著性检测在机器人领域、目标检测领域和图像识别领域有大量应用。
本文主要介绍基于OpenCV contrib库中的saliency模块实现视觉显著性检测算法,OpenCV contrib库中的saliency模块官方仓库见saliency。关于视觉显著性检测算法更多详细介绍见:图像显著性检测论文及代码汇总
本文需要OpenCV contrib库,OpenCV contrib库的编译安装见:
OpenCV_contrib库在windows下编译使用指南
本文所有代码见:
OpenCV-Practical-Exercise
OpenCV contrib库中的saliency模块提供四种显著性检测算法。本节主要介绍这四种方法的相关信息。
调用接口如下:
C++
saliencyAlgorithm = StaticSaliencySpectralResidual::create();
// 计算显著性
bool success = saliencyAlgorithm->computeSaliency(image, saliencyMap);
Python
saliencyAlgorithm = cv2.saliency.StaticSaliencySpectralResidual_create()
# 计算显著性
success, saliencyMap = saliencyAlgorithm.computeSaliency(image)
调用接口如下:
C++
saliencyAlgorithm = StaticSaliencyFineGrained::create();
// 计算显著性
bool success = saliencyAlgorithm->computeSaliency(image, saliencyMap);
Python
saliencyAlgorithm = cv2.saliency.StaticSaliencyFineGrained_create()
# 计算显著性
success, saliencyMap = saliencyAlgorithm.computeSaliency(image)
调用接口如下:
C++
saliencyAlgorithm = ObjectnessBING::create();
vector saliencyMap;
// 提取模型文件参数
saliencyAlgorithm.dynamicCast()->setTrainingPath(training_path);
// 将算法检测结果保存在Results文件夹内
saliencyAlgorithm.dynamicCast()->setBBResDir("Results");
// 计算显著性
bool success = saliencyAlgorithm->computeSaliency(image, saliencyMap);
Python
saliencyAlgorithm = cv2.saliency.ObjectnessBING_create()
# 提取模型文件参数
saliencyAlgorithm.setTrainingPath(training_path)
# 将算法检测结果保存在Results文件夹内
saliencyAlgorithm.setBBResDir("Results")
# 计算显著性
success, saliencyMap = saliencyAlgorithm.computeSaliency(image)
调用接口如下:
C++
saliencyAlgorithm = MotionSaliencyBinWangApr2014::create();
// 设置数据结构大小
saliencyAlgorithm.dynamicCast()->setImagesize(image.cols, image.rows);
// 初始化
saliencyAlgorithm.dynamicCast()->init();
saliencyAlgorithm->computeSaliency(frame, saliencyMap);
Python
saliencyAlgorithm = cv2.saliency.MotionSaliencyBinWangApr2014_create()
# 设置数据结构大小
saliencyAlgorithm.setImagesize(image.shape[1], image.shape[0])
# 初始化
saliencyAlgorithm.init()
success, saliencyMap = saliencyAlgorithm.computeSaliency(frame)
显著性检测算法与目标检测算法大大不同。显著性检测算法,只是判断图中有显著目标的区域,这些区域可能包含目标也可能不包含目标,因方法而异。类比人眼的观察方式,显著性检测算法是许多计算机视觉任务的第一步,检测出显著性区域后,对这些显著性区域进行进一步判断和预测。显著性检测算法通常检测速度较快,某些计算量大的算法如深度学习图像分类算法,可以只在显著性区域上运行,以缩小检测范围,加快检测速度,提高检测精度。
OpenCV saliency模块提供了四种不同的显著性检测方法,但是按方法类别只有三种。OpenCV saliency模块的类关系如下图所示:
OpenCV saliency模块提供的三种不同方法类别模块介绍如下:
更多关于OpenCV saliency模块的介绍可以见:OpenCV中的显著性检测(Saliency Detection)和OpenCV—python 显着性检测二
本文所提供的代码可以对视频或者图像进行显著性检测,BinWangApr2014只能对视频进行显著性检测。本文提供C++和Python代码实现,代码如下:
C++
#include
#include
#include
using namespace std;
using namespace cv;
using namespace saliency;
int main()
{
// 显著性检测算法
// 可选:SPECTRAL_RESIDUAL,FINE_GRAINED,BING,BinWangApr2014
String saliency_algorithm = "FINE_GRAINED";
// 检测视频或者图像
String video_name = "video/vtest.avi";
// String video_name = "video/dog.jpg";
// 起始帧
int start_frame = 0;
// 模型路径
String training_path = "ObjectnessTrainedModel";
// 如果算法名和视频名为空,停止检测
if (saliency_algorithm.empty() || video_name.empty())
{
cout << "Please set saliency_algorithm and video_name";
return -1;
}
// open the capture
VideoCapture cap;
// 打开视频
cap.open(video_name);
// 设置视频起始帧
cap.set(CAP_PROP_POS_FRAMES, start_frame);
// 输入图像
Mat frame;
// instantiates the specific Saliency
// 实例化saliencyAlgorithm结构
Ptr saliencyAlgorithm;
// 二值化检测结果
Mat binaryMap;
// 检测图像
Mat image;
// 读图
cap >> frame;
if (frame.empty())
{
return 0;
}
frame.copyTo(image);
// 根据输入的方法确定检测类型
// StaticSaliencySpectralResidual
if (saliency_algorithm.find("SPECTRAL_RESIDUAL") == 0)
{
// 检测结果,白色区域表示显著区域
Mat saliencyMap;
saliencyAlgorithm = StaticSaliencySpectralResidual::create();
// 计算显著性
double start = static_cast(getTickCount());
bool success = saliencyAlgorithm->computeSaliency(image, saliencyMap);
double duration = ((double)getTickCount() - start) / getTickFrequency();
cout << "computeSaliency cost time is: " << duration * 1000 << "ms" << endl;
if (success)
{
StaticSaliencySpectralResidual spec;
// 二值化图像
double start = static_cast(getTickCount());
spec.computeBinaryMap(saliencyMap, binaryMap);
double duration = ((double)getTickCount() - start) / getTickFrequency();
cout << "computeBinaryMap cost time is: " << duration * 1000 << "ms" << endl;
imshow("Original Image", image);
imshow("Saliency Map", saliencyMap);
imshow("Binary Map", binaryMap);
// 转换格式才能保存图片
saliencyMap.convertTo(saliencyMap, CV_8UC3, 256);
imwrite("Results/SPECTRAL_RESIDUAL_saliencyMap.jpg", saliencyMap);
imwrite("Results/SPECTRAL_RESIDUAL_binaryMap.jpg", binaryMap);
waitKey(0);
}
}
// StaticSaliencyFineGrained
else if (saliency_algorithm.find("FINE_GRAINED") == 0)
{
Mat saliencyMap;
saliencyAlgorithm = StaticSaliencyFineGrained::create();
// 计算显著性
double start = static_cast(getTickCount());
bool success = saliencyAlgorithm->computeSaliency(image, saliencyMap);
double duration = ((double)getTickCount() - start) / getTickFrequency();
cout << "computeSaliency cost time is: " << duration * 1000 << "ms" << endl;
if (success)
{
StaticSaliencyFineGrained spec;
// 二值化图像
// 二值化图像
double start = static_cast(getTickCount());
spec.computeBinaryMap(saliencyMap, binaryMap);
double duration = ((double)getTickCount() - start) / getTickFrequency();
cout << "computeBinaryMap cost time is: " << duration * 1000 << "ms" << endl;
imshow("Saliency Map", saliencyMap);
imshow("Original Image", image);
imshow("Binary Map", binaryMap);
// 转换格式才能保存图片
saliencyMap.convertTo(saliencyMap, CV_8UC3, 256);
imwrite("Results/FINE_GRAINED_saliencyMap.jpg", saliencyMap);
imwrite("Results/FINE_GRAINED_binaryMap.jpg", binaryMap);
waitKey(0);
}
}
// ObjectnessBING
else if (saliency_algorithm.find("BING") == 0)
{
// 判断模型是否存在
if (training_path.empty())
{
cout << "Path of trained files missing! " << endl;
return -1;
}
else
{
saliencyAlgorithm = ObjectnessBING::create();
vector saliencyMap;
// 提取模型文件参数
saliencyAlgorithm.dynamicCast()->setTrainingPath(training_path);
// 将算法检测结果保存在Results文件夹内
saliencyAlgorithm.dynamicCast()->setBBResDir("Results");
// 计算显著性
double start = static_cast(getTickCount());
bool success = saliencyAlgorithm->computeSaliency(image, saliencyMap);
double duration = ((double)getTickCount() - start) / getTickFrequency();
cout << "computeSaliency cost time is: " << duration * 1000 << "ms" << endl;
if (success)
{
// saliencyMap获取检测到的目标个数
int ndet = int(saliencyMap.size());
std::cout << "Objectness done " << ndet << std::endl;
// The result are sorted by objectness. We only use the first maxd boxes here.
// 目标按可能性从大到小排列,maxd为显示前5个目标,step设置颜色,jitter设置矩形框微调
int maxd = 5, step = 255 / maxd, jitter = 9;
Mat draw = image.clone();
for (int i = 0; i < std::min(maxd, ndet); i++)
{
// 获得矩形框坐标点
Vec4i bb = saliencyMap[i];
// 设定颜色
Scalar col = Scalar(((i*step) % 255), 50, 255 - ((i*step) % 255));
// 矩形框微调
Point off(theRNG().uniform(-jitter, jitter), theRNG().uniform(-jitter, jitter));
// 画矩形
rectangle(draw, Point(bb[0] + off.x, bb[1] + off.y), Point(bb[2] + off.x, bb[3] + off.y), col, 2);
// mini temperature scale
// 颜色标注
rectangle(draw, Rect(20, 20 + i * 10, 10, 10), col, -1);
}
imshow("BING", draw);
// 保存图片
imwrite("Results/BING_draw.jpg", draw);
waitKey();
}
else
{
std::cout << "No saliency found for " << video_name << std::endl;
}
}
}
// BinWangApr2014
else if (saliency_algorithm.find("BinWangApr2014") == 0)
{
saliencyAlgorithm = MotionSaliencyBinWangApr2014::create();
// 设置数据结构大小
saliencyAlgorithm.dynamicCast()->setImagesize(image.cols, image.rows);
// 初始化
saliencyAlgorithm.dynamicCast()->init();
bool paused = false;
for (;; )
{
if (!paused)
{
cap >> frame;
if (frame.empty())
{
return 0;
}
cvtColor(frame, frame, COLOR_BGR2GRAY);
Mat saliencyMap;
// 计算
double start = static_cast(getTickCount());
saliencyAlgorithm->computeSaliency(frame, saliencyMap);
double duration = ((double)getTickCount() - start) / getTickFrequency();
cout << "computeSaliency cost time is: " << duration * 1000 << "ms" << endl;
imshow("image", frame);
// 显示
imshow("saliencyMap", saliencyMap * 255);
}
char c = (char)waitKey(2);
if (c == 'q')
break;
if (c == 'p')
paused = !paused;
}
}
destroyAllWindows();
return 0;
}
Python
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 15 19:22:58 2020
@author: luohenyueji
"""
import cv2
import random
def main():
# 显著性检测算法
# 可选:SPECTRAL_RESIDUAL,FINE_GRAINED,BING,BinWangApr2014
saliency_algorithm = "FINE_GRAINED"
# 检测视频或者图像
video_name = "video/vtest.avi"
# video_name = "video/dog.jpg";
# 起始帧
start_frame = 0
# 模型路径
training_path = "ObjectnessTrainedModel"
# 如果算法名和视频名为空,停止检测
if saliency_algorithm is None or video_name is None:
print("Please set saliency_algorithm and video_name")
return
# open the capture
cap = cv2.VideoCapture(video_name)
# 设置视频起始帧
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
# 读图
_, frame = cap.read()
if frame is None:
print("Please set saliency_algorithm and video_name")
return
image = frame.copy()
# 根据输入的方法确定检测类型
if saliency_algorithm.find("SPECTRAL_RESIDUAL") == 0:
# 检测结果,白色区域表示显著区域
saliencyAlgorithm = cv2.saliency.StaticSaliencySpectralResidual_create()
# 计算显著性
start = cv2.getTickCount()
success, saliencyMap = saliencyAlgorithm.computeSaliency(image)
duration = (cv2.getTickCount() - start) / cv2.getTickFrequency()
print("computeBinaryMap cost time is: {} ms".format(duration * 1000))
if success:
# 二值化图像
start = cv2.getTickCount()
_, binaryMap = saliencyAlgorithm.computeBinaryMap(saliencyMap)
duration = (cv2.getTickCount() - start) / cv2.getTickFrequency()
print("computeBinaryMap cost time is: {} ms".format(duration * 1000))
cv2.imshow("Saliency Map", saliencyMap)
cv2.imshow("Original Image", image)
cv2.imshow("Binary Map", binaryMap)
# 转换格式才能保存图片
saliencyMap = (saliencyMap * 255)
cv2.imwrite("Results/FINE_GRAINED_saliencyMap.jpg", saliencyMap)
cv2.imwrite("Results/FINE_GRAINED_binaryMap.jpg", binaryMap)
cv2.waitKey(0)
# FINE_GRAINED
elif saliency_algorithm.find("FINE_GRAINED") == 0:
saliencyAlgorithm = cv2.saliency.StaticSaliencyFineGrained_create()
# 计算显著性
start = cv2.getTickCount()
success, saliencyMap = saliencyAlgorithm.computeSaliency(image)
duration = (cv2.getTickCount() - start) / cv2.getTickFrequency()
print("computeBinaryMap cost time is: {} ms".format(duration * 1000))
if success:
# 二值化图像
start = cv2.getTickCount()
_, binaryMap = saliencyAlgorithm.computeBinaryMap(saliencyMap)
duration = (cv2.getTickCount() - start) / cv2.getTickFrequency()
print("computeBinaryMap cost time is: {} ms".format(duration * 1000))
cv2.imshow("Saliency Map", saliencyMap)
cv2.imshow("Original Image", image)
cv2.imshow("Binary Map", binaryMap)
# 转换格式才能保存图片
saliencyMap = (saliencyMap * 255)
cv2.imwrite("Results/FINE_GRAINED_saliencyMap.jpg", saliencyMap)
cv2.imwrite("Results/FINE_GRAINED_binaryMap.jpg", binaryMap)
cv2.waitKey(0)
elif saliency_algorithm.find("BING") == 0:
# 判断模型是否存在
if training_path is None:
print("Path of trained files missing! ")
return
else:
saliencyAlgorithm = cv2.saliency.ObjectnessBING_create()
# 提取模型文件参数
saliencyAlgorithm.setTrainingPath(training_path)
# 将算法检测结果保存在Results文件夹内
saliencyAlgorithm.setBBResDir("Results")
# 计算显著性
start = cv2.getTickCount()
success, saliencyMap = saliencyAlgorithm.computeSaliency(image)
duration = (cv2.getTickCount() - start) / cv2.getTickFrequency()
print("computeBinaryMap cost time is: {} ms".format(duration * 1000))
if success:
# saliencyMap获取检测到的目标个数
ndet = saliencyMap.shape[0]
print("Objectness done ", ndet)
# The result are sorted by objectness. We only use the first maxd boxes here.
# 目标按可能性从大到小排列,maxd为显示前5个目标,step设置颜色,jitter设置矩形框微调
maxd = 5
step = 255 / maxd
jitter = 9
draw = image.copy()
for i in range(0, min(maxd, ndet)):
# 获得矩形框坐标点
bb = saliencyMap[i][0]
# 设定颜色
col = ((i * step) % 255), 50, 255 - ((i * step) % 255)
# 矩形框微调
off = random.randint(-jitter,
jitter), random.randint(-jitter, jitter)
# 画矩形
cv2.rectangle(draw, (bb[0] + off[0], bb[1] + off[1]),
(bb[2] + off[0], bb[3] + off[1]), col, 2)
# mini temperature scale
# 颜色标注
cv2.rectangle(draw, (20, 20 + i * 10, 10, 10), col, -1)
# 保存图片
cv2.imwrite("Results/BING_draw.jpg", draw)
cv2.imshow("BING", draw)
cv2.waitKey(0)
# 需要传入图像建模
elif saliency_algorithm.find("BinWangApr2014") == 0:
saliencyAlgorithm = cv2.saliency.MotionSaliencyBinWangApr2014_create()
# 设置数据结构大小
saliencyAlgorithm.setImagesize(image.shape[1], image.shape[0])
# 初始化
saliencyAlgorithm.init()
paused = False
while True:
if not paused:
_, frame = cap.read()
if frame is None:
break
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 计算显著性
start = cv2.getTickCount()
success, saliencyMap = saliencyAlgorithm.computeSaliency(frame)
duration = (cv2.getTickCount() - start) / \
cv2.getTickFrequency()
print("computeBinaryMap cost time is: {} ms".format(duration * 1000))
cv2.imshow("image", frame)
# 显示
cv2.imshow("saliencyMap", saliencyMap * 255)
c = cv2.waitKey(2)
c = chr(c) if c != -1 else 0
if c == 'q':
break
if c == 'p':
paused = not paused
cv2.destroyAllWindows()
return
if __name__ == '__main__':
main()
对单张图片检测进行显著性区域检测后的结果如下所示:
类型 | 图片(487X365) | 单帧处理时间/ms |
---|---|---|
原图 | - | |
StaticSaliencySpectralResidual | 2.8 | |
StaticSaliencyFineGrained | 53.7 | |
ObjectnessBING | 411.7 |
此外为了提高可视化结果,对StaticSaliencySpectralResidual和StaticSaliencyFineGrained的显著性检测结果图片进行了二值化,主要通过StaticSaliency::computeBinaryMap实现,即先聚类然后阈值分割。结果如下所示:
类型 | 图片 | 单帧处理时间/ms |
---|---|---|
StaticSaliencySpectralResidual | 48.4 | |
StaticSaliencyFineGrained | 52.4 |
对视频进行检测,StaticSaliencySpectralResidual,StaticSaliencyFineGrained,ObjectnessBING就是对每帧进行检测;MotionSaliencyBinWangApr2014就是对每帧图片进行运动建模和显著性检测。取视频第100帧显著性检测结果,结果如下所示:
类型 | 图片(768X576) | 单帧处理时间/ms |
---|---|---|
原图 | - | |
StaticSaliencySpectralResidual | 3.2 | |
StaticSaliencyFineGrained | 119.2 | |
ObjectnessBING | 986.5 | |
MotionSaliencyBinWangApr2014 | 65.1 |
二值化结果如下所示:
类型 | 图片 | 单帧处理时间/ms |
---|---|---|
StaticSaliencySpectralResidual | 120.125 | |
StaticSaliencyFineGrained | 138.783 |
可以很认真的说,OpenCV contrib库中的saliency模块所提供的视觉显著性检测算法效果都很差,这个效果很差主要是现实落地上的意义,可以进行大规模测试。具体来说OpenCV提供的视觉显著性检测算法都只能应用于简单场景,复杂场景很容易出错,而且精度不如直接用目标识别来的快。当然现有的视觉显著性检测算法效果都很差,都无法实用,具体介绍可见:视觉图像显著性检测综述
如果非要对OpenCV contrib库中的saliency模块所提供的视觉显著性检测算法进行评价,个人感觉如下: