box filter的作用很简单,即对局部区域求平均,并把值赋给某个点,一般我们赋给区域中心。用公式表达如下:
r e s u l t ( r o w , c o l ) = 1 n 2 ∑ i p a t c h ∑ j p a t c h i m a g e ( i , j ) result(row, col) = \cfrac{1}{n^2} \sum_i^{patch} {\sum_j^{patch} {image(i,j)}} result(row,col)=n21i∑patchj∑patchimage(i,j)
其中 p a t c h patch patch是以 ( r o w , c o l ) (row, col) (row,col)为中心的一块区域。
为了跟后面的公式及程序对应,我们做如下定义:
外层两个循环是关于完整图像 ( r o w , c o l ) (row, col) (row,col)的循环,内层两个循环是关于图像patch ( i , j ) (i, j) (i,j)的循环。
注意:如果图像是多通道的话实际上还有一个通常维度的循环,但是通道数不是本文优化的重心,所以本文不再赘述这个因素,后文也不再提,并且在计算量的估计中也会把这个因素省略掉。
这个实现比较简单,需要做的计算有:
patch的平均可以进行行列分离,也就是先对行方向做平均,并缓存结果,再对缓存的结果做列方向的平均。以公式的形式表达如下:
1 n 2 ∑ i ∑ j i m a g e ( i , j ) = 1 n ∑ i 1 n ∑ j i m a g e ( i , j ) \cfrac{1}{n^2} \sum_i {\sum_j {image(i,j)}} = \cfrac{1}{n}\sum_i {\cfrac{1}{n} \sum_j {image(i,j)}} n21i∑j∑image(i,j)=n1i∑n1j∑image(i,j)
举个例子展开写会容易理解,比如3*3的patch,共9个数:
m e a n ( a i j ) = 1 9 ( a 00 + a 01 + a 02 + a 10 + a 11 + a 12 + a 20 + a 21 + a 22 ) = 1 3 ( 1 3 ( a 00 + a 01 + a 02 ) + 1 3 ( a 10 + a 11 + a 12 ) + 1 3 ( a 20 + a 21 + a 22 ) ) = 1 3 ( 1 3 ∑ j a 0 j + 1 3 ∑ j a 1 j + 1 3 ∑ j a 2 j ) = 1 3 ∑ i ( 1 3 ∑ j a i j ) \begin{aligned} mean(a_{ij}) &= \cfrac{1}{9} (a_{00} + a_{01} + a_{02} + a_{10} + a_{11} + a_{12} + a_{20} + a_{21} + a_{22}) \\[2ex] &= \cfrac{1}{3} (\cfrac{1}{3} (a_{00} + a_{01} + a_{02}) + \cfrac{1}{3} (a_{10} + a_{11} + a_{12} ) + \cfrac{1}{3} (a_{20} + a_{21} + a_{22})) \\[2ex] &= \cfrac{1}{3} (\cfrac{1}{3} \sum_j {a_{0j}} + \cfrac{1}{3} \sum_j {a_{1j}} + \cfrac{1}{3} \sum_j {a_{2j}}) \\[2ex] &= \cfrac{1}{3} \sum_i (\cfrac{1}{3} \sum_j {a_{ij}}) \end{aligned} mean(aij)=91(a00+a01+a02+a10+a11+a12+a20+a21+a22)=31(31(a00+a01+a02)+31(a10+a11+a12)+31(a20+a21+a22))=31(31j∑a0j+31j∑a1j+31j∑a2j)=31i∑(31j∑aij)
这种方式的计算量:
第二种实现可以对求和做进一步优化。在单个维度做求和时,可以对当前一维patch的和做一个缓存,当中心点移动后,减去弹出像素的值,加上新增像素的值,这样就避免了重复性求和操作。
这种方案需要对patch的和做一个初始化和缓存,该方案的计算量为:
上面做计算量估计的时候没有考虑边界条件,在具体代码实现的时候需要仔细处理边界,防止数组访问越界。
代码同时跟opencv做了个效果和性能的对比,第三种方式虽然仍然比opencv慢,但性能基本处于同一量级了,opencv可能还做了一些其他跟算法无关的优化,比如指令集、并行化之类的。
注意:下面为了方便比较,opencv boxFilter
的边界处理参数选择BORDER_CONSTANT
。即使是边界处patch不满覆盖的情况下,opencv仍然除以 n 2 n^2 n2,也就是说除以的数字有点大了,所以边界会逐渐发黑,特别是kernel_size(对应于radius)比较大时候视觉效果更明显。
#include
#include
#include
#include
#include
using namespace std;
using namespace cv;
Mat BoxFilter_1(const Mat& image, int radius);
Mat BoxFilter_2(const Mat& image, int radius);
Mat BoxFilter_3(const Mat& image, int radius);
int main()
{
clock_t time_beg;
clock_t time_end;
Mat image = imread("lena_std.bmp", IMREAD_UNCHANGED);
image.convertTo(image, CV_32FC3);
image /= 255.0f;
int radius = 9;
int ksize = radius * 2 + 1;
Mat image_box_filter_cv;
time_beg = clock();
boxFilter(image, image_box_filter_cv, -1, Size(ksize, ksize), Point(-1, -1), true, BORDER_CONSTANT);
time_end = clock();
cout << "box-filter-cv time cost: " << time_end - time_beg << endl;
Mat image_box_filter_1 = BoxFilter_1(image, radius);
Mat image_box_filter_2 = BoxFilter_2(image, radius);
Mat image_box_filter_3 = BoxFilter_3(image, radius);
namedWindow("original_image", 1);
imshow("original_image", image);
namedWindow("cv_box_filter", 1);
imshow("cv_box_filter", image_box_filter_cv);
namedWindow("box_filter-1", 1);
imshow("box_filter-1", image_box_filter_1);
namedWindow("box_filter-2", 1);
imshow("box_filter-2", image_box_filter_2);
namedWindow("box_filter-3", 1);
imshow("box_filter-3", image_box_filter_3);
Mat diff;
cv::absdiff(image_box_filter_2, image_box_filter_3, diff);
namedWindow("diff", 1);
imshow("diff", 50 * diff);
waitKey(0);
destroyAllWindows();
return 0;
}
Mat BoxFilter_1(const Mat& image, int radius)
{
int cols = image.cols;
int rows = image.rows;
int channels = image.channels();
int row_bound = rows - 1;
int col_bound = cols - 1;
Mat result(rows, cols, CV_32FC3);
clock_t time_beg;
clock_t time_end;
time_beg = clock();
for (int row = 0; row < rows; ++row) {
int row_beg = max(row - radius, 0);
int row_end = min(row + radius, row_bound);
for (int col = 0; col < cols; ++col) {
int col_beg = max(col - radius, 0);
int col_end = min(col + radius, col_bound);
vector<float> sums(channels, 0.0f);
int count = 0;
for (int i = row_beg; i <= row_end; ++i) {
for (int j = col_beg; j <= col_end; ++j) {
count++;
for (int k = 0; k < channels; ++k) {
sums[k] += image.at<Vec3f>(i, j)[k];
}
}
}
for (int k = 0; k < channels; ++k) {
result.at<Vec3f>(row, col)[k] = sums[k] / static_cast<float>(count);
// opencv BORDER_CONSTANT:
/*float COUNT = (float)(2 * radius + 1) * (2 * radius + 1);
result.at(row, col)[k] = sums[k] / COUNT;*/
}
}
}
result = cv::max(cv::min(result, 1.0), 0.0);
time_end = clock();
cout << "box-filter-1 time cost: " << time_end - time_beg << endl;
return result;
}
Mat BoxFilter_2(const Mat& image, int radius)
{
int cols = image.cols;
int rows = image.rows;
int channels = image.channels();
int row_bound = rows - 1;
int col_bound = cols - 1;
Mat result(rows, cols, CV_32FC3);
clock_t time_beg;
clock_t time_end;
time_beg = clock();
// compute mean for row-wise
Mat row_result(rows, cols, CV_32FC3);
for (int row = 0; row < rows; ++row) {
for (int col = 0; col < cols; ++col) {
int col_beg = max(col - radius, 0);
int col_end = min(col + radius, col_bound);
vector<float> sums(channels, 0.0f);
int count = 0;
for (int j = col_beg; j <= col_end; ++j) {
count++;
for (int k = 0; k < channels; ++k) {
sums[k] += image.at<Vec3f>(row, j)[k];
}
}
for (int k = 0; k < channels; ++k) {
row_result.at<Vec3f>(row, col)[k] = sums[k] / static_cast<float>(count);
}
}
}
// compute mean for column-wise
for (int col = 0; col < cols; ++col) {
for (int row = 0; row < rows; ++row) {
int row_beg = max(row - radius, 0);
int row_end = min(row + radius, row_bound);
vector<float> sums(channels, 0.0f);
int count = 0;
for (int i = row_beg; i <= row_end; ++i) {
count++;
for (int k = 0; k < channels; ++k) {
sums[k] += row_result.at<Vec3f>(i, col)[k];
}
}
for (int k = 0; k < channels; ++k) {
result.at<Vec3f>(row, col)[k] = sums[k] / static_cast<float>(count);
}
}
}
result = cv::max(cv::min(result, 1.0), 0.0);
time_end = clock();
cout << "box-filter-2 time cost: " << time_end - time_beg << endl;
return result;
}
Mat BoxFilter_3(const Mat& image, int radius)
{
int cols = image.cols;
int rows = image.rows;
int channels = image.channels();
Mat result(rows, cols, CV_32FC3);
clock_t time_beg;
clock_t time_end;
time_beg = clock();
// compute mean for row-wise
Mat row_result(rows, cols, CV_32FC3);
for (int row = 0; row < rows; ++row) {
// initialize sums for row
vector<float> sums(channels, 0.0f);
int count = 0;
for (int col = 0; col < radius; ++col) {
if (col < cols) {
count++;
for (int k = 0; k < channels; ++k) {
sums[k] += image.at<Vec3f>(row, col)[k];
}
}
}
// process row
for (int col = 0; col < cols; ++col) {
int left = col - radius - 1;
int right = col + radius;
if (left >= 0) {
count--;
for (int k = 0; k < channels; ++k) {
sums[k] -= image.at<Vec3f>(row, left)[k];
}
}
if (right < cols) {
count++;
for (int k = 0; k < channels; ++k) {
sums[k] += image.at<Vec3f>(row, right)[k];
}
}
for (int k = 0; k < channels; ++k) {
row_result.at<Vec3f>(row, col)[k] = sums[k] / static_cast<float>(count);
}
}
}
// compute mean for column-wise
for (int col = 0; col < cols; ++col) {
// initialize sums for column
vector<float> sums(channels, 0.0f);
int count = 0;
for (int row = 0; row < radius; ++row) {
if (row < rows) {
count++;
for (int k = 0; k < channels; ++k) {
sums[k] += row_result.at<Vec3f>(row, col)[k];
}
}
}
// process column
for (int row = 0; row < rows; ++row) {
int up = row - radius - 1;
int down = row + radius;
if (up >= 0) {
count--;
for (int k = 0; k < channels; ++k) {
sums[k] -= row_result.at<Vec3f>(up, col)[k];
}
}
if (down < rows) {
count++;
for (int k = 0; k < channels; ++k) {
sums[k] += row_result.at<Vec3f>(down, col)[k];
}
}
for (int k = 0; k < channels; ++k) {
result.at<Vec3f>(row, col)[k] = sums[k] / static_cast<float>(count);
}
}
}
result = cv::max(cv::min(result, 1.0), 0.0);
time_end = clock();
cout << "box-filter-3 time cost: " << time_end - time_beg << endl;
return result;
}