dlib是一个很不错的视觉库,相比opencv,dlib里面的很多算法更接近工业实际,这个也只是个人的感觉。当然,opencv也很不错,涉及到的图像处理方方面面,为图像的开发者带来很多便利。最近一段时间在做hog这一块的检测算法的研究,opencv上实现的hog检测效果不如dlib的检测效果。一方面dlib采用的并非最原始的hog,而是fasthog,与hog相比,fhog是以cell为单元的提取最终的向量的,而hog是以block为单元提取最终的向量;另外dlib实现目标检测训练过程中,考虑了目标检测框到特征空间的映射,以及特征空间反映射回来框的IOU,这就在训练的过程中考虑了缩放层级的问题,从而使得提取的特征更为准确。dlib具备的优势使得在标签制作的过程中比较严格,因为dlib训练要求所有层级中mapped_rect,与truth_rect的IOU一定要大于0.5,不然会报错;其实这也很容易理解,如果mapped_rect与truth_rect的IOU都小于0.5,那么提取的特征肯定会比较差了。好了,说了一大堆的文字,可能没看源码的人会有些不理解,望见谅。 本文主要实现的是dlib目标检测的前向过程,代码中做了一些修改,实际效果和原始的fhog基本一致。
首先是fhog特征提取
#ifndef _FHOG_H_
#define _FHOG_H_
#include "common.h"
#include "image.h"
#include "point.h"
image_t* extract_fhog(image_t *img);
point_t fhog_to_image(point_t p, int filter_rows_padding = 12, int filter_cols_padding = 12);
#endif
#include "fhog.h"
//quadrant
static int quadrant(int dx, int dy)
{
int index = 0;
if (dx > 0 && dy >= 0)
index = 1;
else if (dx < 0 && dy >= 0)
index = 2;
else if (dx < 0 && dy <= 0)
index = 3;
else if (dx > 0 && dy <= 0)
index = 4;
return index;
}
static float cmp_min(float a, float b)
{
if (a > b)
return b;
else
return a;
}
image_t* extract_fhog(image_t *img)
{
//cell num
int cell_nr = (int)((float)img->nr / (float)(cell_size)+0.5);
int cell_nc = (int)((float)img->nc / (float)(cell_size)+0.5);
if (cell_nr == 0 || cell_nc == 0)
printf("cell num invalid\n");
int bins = 8;
//hist设置和初始化
float *hist = (float*)malloc(cell_nr * cell_nc * 2 * bins * sizeof(float));
float *norm = (float*)malloc(cell_nr * cell_nc * sizeof(float));
for (int r = 0; r < cell_nr; r++)
{
for (int c = 0; c < cell_nc; c++)
{
norm[r * cell_nc + c] = 0;
for (int k = 0; k < 2 * bins; k++)
{
hist[(r * cell_nc + c) * 2 * bins + k] = 0;
}
}
}
const int hog_nr = max(cell_nr - 2, 0);
const int hog_nc = max(cell_nc - 2, 0);
if (hog_nr == 0 || hog_nc == 0)
printf("hog size invalid\n");
const int padding_rows_offset = (cell_per_win - 1) / 2;
const int padding_cols_offset = (cell_per_win - 1) / 2;
const int hog_h = hog_nr + cell_per_win - 1;
const int hog_w = hog_nc + cell_per_win - 1;
image_t* hog = image_alloc(hog_w, hog_h, dims, IM_32FC1);
float* feat = hog->data_f32;
for (int o = 0; o < dims; o++)
{
for (int y = 0; y < hog_nr + cell_per_win - 1; y++)
{
for (int x = 0; x < hog_nc + cell_per_win - 1; x++)
{
feat[o * (hog_nr + cell_per_win - 1) * (hog_nc + cell_per_win - 1) + y * (hog_nc + cell_per_win - 1) + x] = 0;
}
}
}
const int visible_nr = (min(cell_nr * cell_size, img->nr)) - 1;
const int visible_nc = (min(cell_nc * cell_size, img->nc)) - 1;
int best_o = 0, flag = 0;
int tempx = 0, tempy = 0;
float absx = 0, absy = 0;
float Dtan22 = 0, Dtan45 = 0, Dtan67 = 0, Dy = 0;
float v = 0;
float sum_data = 0;
float shift_n = 8;
int count_num = 0;
for (int y = 1; y < visible_nr; y++)
{
for (int x = 1; x < visible_nc; x++)
{
tempx = img->data_u8[y * img->nc + x + 1] - img->data_u8[y * img->nc + x - 1];//((int)get_pixel_intensity(img[y][x + 1]) - (int)get_pixel_intensity(img[y][x - 1]));
tempy = img->data_u8[(y + 1) * img->nc + x] - img->data_u8[(y - 1) * img->nc + x];
absx = fabs((float)tempx);
absy = fabs((float)tempy);
v = absx + absy;
Dtan22 = absx * tan22;
Dtan45 = absx * tan45;
Dtan67 = absx * tan67;
Dy = absy;
flag = 0;
if (tempx == 0)
{
if (tempy == 0)
best_o = 0;
else
best_o = 3;
}
else
{
if (Dy <= Dtan22)
{
flag = quadrant(tempx, tempy);
if (flag == 1 || flag == 3)
best_o = 0;
else if (flag == 2 || flag == 4)
best_o = 7;
}
else if (Dy > Dtan22 && Dy <= Dtan45)
{
flag = quadrant(tempx, tempy);
if (flag == 1 || flag == 3)
best_o = 1;
else if (flag == 2 || flag == 4)
best_o = 6;
}
else if (Dy > Dtan45 && Dy <= Dtan67)
{
flag = quadrant(tempx, tempy);
if (flag == 1 || flag == 3)
best_o = 2;
else if (flag == 2 || flag == 4)
best_o = 5;
}
else if (Dy > Dtan67)
{
flag = quadrant(tempx, tempy);
if (flag == 1 || flag == 3)
best_o = 3;
else if (flag == 2 || flag == 4)
best_o = 4;
}
}
if (flag == 3 || flag == 4)
best_o = best_o + bins;
float xp = ((float)x + 0.5) / (float)bins - 0.5;
float yp = ((float)y + 0.5) / (float)bins - 0.5;
int ixp = (int)std::floor(xp);
int iyp = (int)std::floor(yp);
float vx0 = xp - ixp;
float vy0 = yp - iyp;
float vx1 = 1.0 - vx0;
float vy1 = 1.0 - vy0;
if (ixp >= 0 && iyp >= 0)
{
*(hist + iyp * cell_nc + ixp + best_o * cell_nr * cell_nc) += ((vx1 * vy1 * v));
}
if (ixp + 1 < cell_nc && iyp >= 0 && ixp + 1 >= 0)
{
*(hist + iyp * cell_nc + (ixp + 1) + best_o * cell_nr * cell_nc) += ((vx0 * vy1 * v));
}
if (ixp >= 0 && iyp + 1 < cell_nr && iyp + 1 >= 0)
{
*(hist + (iyp + 1) * cell_nc + ixp + best_o * cell_nr * cell_nc) += ((vx1 * vy0 * v));
}
if (ixp + 1 < cell_nc && iyp + 1 < cell_nr && ixp + 1 >= 0 && iyp + 1 >= 0)
{
*(hist + (ixp + 1) + (iyp + 1) * cell_nc + best_o * cell_nr * cell_nc) += ((vx0 * vy0 * v));
}
}
}
//norm
for (int o = 0; o < bins; o++)
{
f32 *p1 = hist + o * cell_nr * cell_nc;
f32 *p2 = hist + (o + bins) * cell_nr * cell_nc;
f32 *dst = norm;
f32 *end = norm + cell_nr * cell_nc;
while (dst < end)
{
*(dst++) += (*p1 + *p2) * (*p1 + *p2);
p1++;
p2++;
}
}
////compute features
float up_limit = 0.2;
float temp_value[16];
for (int y = 0; y < hog_nr; y++)
{
const int yy = y + padding_rows_offset;
for (int x = 0; x < hog_nc; x++)
{
const int xx = x + padding_cols_offset;
float *dst = feat + yy * hog_w + xx;
float *src, *p;
float n1, n2, n3, n4;
p = norm + (y + 1) * cell_nc + x + 1;
n1 = 1. / sqrt(*p + *(p + 1) + *(p + cell_nc) + *(p + cell_nc + 1));
p = norm + (y + 1) * cell_nc + x;
n2 = 1. / sqrt(*p + *(p + 1) + *(p + cell_nc) + *(p + cell_nc + 1));
p = norm + y * cell_nc + x + 1;
n3 = 1. / sqrt(*p + *(p + 1) + *(p + cell_nc) + *(p + cell_nc + 1));
p = norm + y * cell_nc + x;
n4 = 1. / sqrt(*p + *(p + 1) + *(p + cell_nc) + *(p + cell_nc + 1));
//16 directions feature
float sum_data = 0;
src = hist + (y + 1) * cell_nc + x + 1;
for (int o = 0; o < 2 * bins; o++)
{
//clip data
float h1 = min(*src * n1, up_limit);
float h2 = min(*src * n2, up_limit);
float h3 = min(*src * n3, up_limit);
float h4 = min(*src * n4, up_limit);
sum_data = (h1 + h2 + h3 + h4) * 0.5;
*dst = sum_data;
temp_value[o] = sum_data;
dst += hog_w * hog_h;
src += cell_nr * cell_nc;
}
//8 directions feature
src = hist + (y + 1) * cell_nc + x + 1;
for (int o = 0; o < bins; o++)
{
f32 sum = temp_value[o] + temp_value[o + bins];
float h1 = min(sum, up_limit);
float h2 = min(sum, up_limit);
float h3 = min(sum, up_limit);
float h4 = min(sum, up_limit);
sum_data = (h1 + h2 + h3 + h4) * 0.5;
*dst = sum_data;
dst += hog_w * hog_h;
src += cell_nc * cell_nr;
}
}
}
free(hist);
free(norm);
return hog;
}
//hog空间坐标映射回image空间
point_t fhog_to_image(point_t p, int filter_rows_padding, int filter_cols_padding)
{
point_t offset;
point_t p0 = point_init(1,1);
point_t p1 = point_init((filter_cols_padding - 1) / 2, (filter_rows_padding - 1) / 2);
p = point_add(p, p0);
p = point_sub(p, p1);
p = point_mul(p, cell_size);
p = point_add(p, p0);
if (p.x >= 0 && p.y >= 0)
offset = point_init(cell_size / 2, cell_size / 2);
if (p.x < 0 && p.y >= 0)
offset = point_init(-cell_size / 2, cell_size / 2);
if (p.x >= 0 && p.y < 0)
offset = point_init(cell_size / 2, -cell_size / 2);
if (p.x < 0 && p.y < 0)
offset = point_init(-cell_size / 2, -cell_size / 2);
p = point_add(p, offset);
return p;
}
其次就是对滤波后的特征图进行svm系数加权
#ifndef _FILTER_H_
#define _FILTER_H_
#include "common.h"
#include "image.h"
rect_t spatially_filter_image(image_t* in_img, image_t* out_img, image_t *filter);
#endif
#include "filter.h"
rect_t spatially_filter_image(image_t* in_img, image_t* out_img, image_t *filter)
{
int w = in_img->nc;
int h = in_img->nr;
const int first_row = filter->nr / 2;
const int first_col = filter->nc / 2;
const int last_row = in_img->nr - ((filter->nr - 1) / 2);
const int last_col = in_img->nc - ((filter->nc - 1) / 2);
//有效的数据区域
rect_t non_border;
non_border.l = first_col;
non_border.t = first_row;
non_border.r = last_col;
non_border.b = last_row;
const int N = 10;
for (int ch = 0; ch < dims; ch++)
{
f32 *img_ptr = in_img->data_f32 + ch * w * h;
f32 *filter_ptr = filter->data_f32 + ch * cell_per_win * cell_per_win;
for (int r = first_row; r < last_row; r++)
{
int c = first_col;
for (; c < last_col; c++)
{
f32 value0 = 0, value1 = 0, value2 = 0;
f32 temp0 = 0, temp1 = 0, temp2 = 0;
for (int m = 0; m < filter->nr; m++)
{
int n = 0;
for (; n < filter->nc - 2; n += 3)
{
value0 = img_ptr[(r - first_row + m) * w + c - first_col + n];
value1 = img_ptr[(r - first_row + m) * w + c - first_col + n + 1];
value2 = img_ptr[(r - first_row + m) * w + c - first_col + n + 2];
f32 f0 = filter_ptr[m * cell_per_win + n];
f32 f1 = filter_ptr[m * cell_per_win + n + 1];
f32 f2 = filter_ptr[m * cell_per_win + n + 2];
temp0 += value0 * f0;
temp1 += value1 * f1;
temp2 += value2 * f2;
}
for (; n < filter->nc; n++)
{
value0 = img_ptr[(r - first_row + m) * w + c - first_col + n];
temp0 += value0 * (filter_ptr[m * cell_per_win + n]);
}
}
temp0 += temp1 + temp2;
out_img->data_f32[r * w + c] += temp0;
}
}
}
return non_border;
}
然后就是检测函数
#ifndef _DETECT_H_
#define _DETECT_H_
#include "common.h"
#include "image.h"
#include "fhog.h"
#include "filter.h"
#include "point.h"
int detection(image_t *img, image_t *filter, rect_detection_t *rects);
#endif
#include "detect.h"
static void sort_box(rect_detection_t *rects,int num)
{
rect_detection_t temp;
for (int i = 0; i < num; i++)
{
for (int j = i; j < num; j++)
{
if (rects[j].detection_confidence > rects[i].detection_confidence)
{
temp = rects[i];
rects[i] = rects[j];
rects[j] = temp;
}
}
}
}
//box的iou
static int boxes_overlap(rect_detection_t a, rect_detection_t b)
{
if (a.box.top > a.box.bottom || a.box.left > a.box.right ||
b.box.top > b.box.bottom || b.box.left > b.box.right)
return true;
float a_area = (a.box.right - a.box.left + 1) * (a.box.bottom - a.box.top + 1);
float b_area = (b.box.right - b.box.left + 1) * (b.box.bottom - b.box.top + 1);
box_t box0;
box0.left = max(a.box.left, b.box.left);
box0.top = max(a.box.top, b.box.top);
box0.right = min(a.box.right, b.box.right);
box0.bottom = min(a.box.bottom, b.box.bottom);
if (box0.top > box0.bottom || box0.left > box0.right)
return false;
float inner_area = (box0.right - box0.left + 1) * (box0.bottom - box0.top + 1);
box0.left = min(a.box.left, b.box.left);
box0.top = min(a.box.top, b.box.top);
box0.right = max(a.box.right, b.box.right);
box0.bottom = max(a.box.bottom, b.box.bottom);
float outer_area = (box0.right - box0.left + 1) * (box0.bottom - box0.top + 1);
if (inner_area / outer_area > match_thresh ||
inner_area / a_area > overlap_thresh ||
inner_area / b_area > overlap_thresh)
return true;
else
return false;
}
static int overlaps_any_box(rect_detection_t *final_rects, rect_detection_t rect, int num)
{
for (int i = 0; i < num; i++)
{
if (boxes_overlap(final_rects[i], rect))
return 1;
}
return 0;
}
int detection(image_t *img, image_t *filter, rect_detection_t *final_rects)
{
int count = 0;
int w = img->nc;
int h = img->nr;
int levels = 0;
int scalew = 0;
int scaleh = 0;
float *scale = (float*)malloc(max_pyramid_levels * sizeof(float));
float scalef = 5./6;//缩放因子
float tempw = w, temph = h;
scale[0] = scalef;
while (tempw >= min_pyramid_layer_width && temph >= min_pyramid_layer_height && levels < max_pyramid_levels)
{
tempw = w * scalef;
temph = h * scalef;
scale[levels] = scalef;
levels++;
scalef *= 5./6;
}
rect_t area;
image_t *feat = NULL;
image_t *scale_img = NULL;
image_t *out_img = NULL;
int i = 0;
rect_detection_t* rects = (rect_detection_t*)malloc(2000 * sizeof(rect_detection_t));
//逐层进行检测
for (; i < levels; i++)
{
if (i == 0)
{
feat = extract_fhog(img);
out_img = image_alloc(feat->nc, feat->nr, 1, IM_32FC1);
for (int y = 0; y < feat->nr; y++)
{
for (int x = 0; x < feat->nc; x++)
{
out_img->data_f32[y * feat->nc + x] = 0;
}
}
area = spatially_filter_image(feat, out_img, filter);
}
else
{
//尺度变化提取特征
scalew = (int)((float)w * scale[i - 1]);
scaleh = (int)((float)h * scale[i - 1]);
scale_img = image_alloc(scalew, scaleh, 1, IM_8UC1);
scale_bilinear(img,scale_img);
feat = extract_fhog(scale_img);
out_img = image_alloc(feat->nc, feat->nr, 1, IM_32FC1);
for (int y = 0; y < feat->nr; y++)
{
for (int x = 0; x < feat->nc; x++)
{
out_img->data_f32[y * feat->nc + x] = 0;
}
}
area = spatially_filter_image(feat, out_img, filter);
}
//对svm加权后的结果进行筛选
//printf("-----------------------------\n");
for (int r = area.t; r < area.b; r++)
{
for (int c = area.l; c < area.r; c++)
{
if (out_img->data_f32[r * out_img->nc + c] >= thresh)
{
point_t p = point_init(c,r);
point_t p_tl, p_br;
box_t b;
p_tl.x = (p.x - (cell_per_win - 2 * padding) / 2);
p_tl.y = (p.y - (cell_per_win - 2 * padding) / 2);
p_br.x = (p_tl.x + (cell_per_win - 2 * padding) - 1);
p_br.y = (p_tl.y + (cell_per_win - 2 * padding) - 1);
p_tl = fhog_to_image(p_tl, cell_per_win, cell_per_win);
p_br = fhog_to_image(p_br, cell_per_win, cell_per_win);
p_tl = point_up(p_tl,i);
p_br = point_up(p_br,i);
//printf("value = %f,r = %d,c = %d,l = %d,t = %d,r = %d,b = %d\n", out_img->data_f32[r * out_img->nc + c], r, c, p_tl.x, p_tl.y, p_br.x, p_br.y);
rects[count].box.left = p_tl.x;
rects[count].box.top = p_tl.y;
rects[count].box.right = p_br.x;
rects[count].box.bottom = p_br.y;
rects[count].detection_confidence = out_img->data_f32[r * out_img->nc + c];
count++;
}
}
}
free(feat->data_f32);
free(feat);
free(out_img->data_f32);
free(out_img);
}
//对rects进行排序
sort_box(rects,count);
//非极大值抑制
int find_index = 0;
for (int i = 0; i < count; i++)
{
if (overlaps_any_box(final_rects, rects[i],find_index))
continue;
final_rects[find_index++] = rects[i];
}
return find_index;
}
最后就是主函数:
#include "common.h"
#include "image.h"
#include "detect.h"
#include "detect_hog.h"
#include "io.h"
#include
#include
using namespace std;
//获取所有的文件名
void GetAllFiles(string path, std::vector& files, string format)
{
long long hFile = 0;
//文件信息
struct _finddata_t fileinfo;//用来存储文件信息的结构体
int len = format.length();
string p, temp;
if ((hFile = _findfirst(p.assign(path).append("\\*").c_str(), &fileinfo)) != -1) //第一次查找
{
do
{
if ((fileinfo.attrib & _A_SUBDIR)) //如果查找到的是文件夹
{
if (strcmp(fileinfo.name, ".") != 0 && strcmp(fileinfo.name, "..") != 0) //进入文件夹查找
{
//files.push_back(p.assign(path).append("\\").append(fileinfo.name) );
GetAllFiles(p.assign(path).append("\\").append(fileinfo.name), files, format);
}
}
else //如果查找到的不是是文件夹
{
//files.push_back(p.assign(fileinfo.name) ); //将文件路径保存,也可以只保存文件名: p.assign(path).append("\\").append(fileinfo.name)
temp = fileinfo.name;
//判断字符串是否以format格式结尾
if (temp.length()>len && temp.compare(temp.length() - len, len, format) == 0)
files.push_back(p.assign(path).append("\\").append(fileinfo.name));
}
} while (_findnext(hFile, &fileinfo) == 0);
_findclose(hFile); //结束查找
}
}
image_t* load_image(Mat src)
{
int h = src.rows;
int w = src.cols;
image_t* img = (image_t*)malloc(sizeof(image_t));
img->nr = h;
img->nc = w;
img->ch = 1;
img->size = w * h * sizeof(u8);
img->type = IM_8UC1;
img->data_u8 = (u8*)malloc(w * h * sizeof(u8));
for (int y = 0; y < h; y++)
{
for (int x = 0; x < w; x++)
{
img->data_u8[y * w + x] = src.data[y * src.step + x];
}
}
return img;
}
#define PIC 1
#define VIDEO 0
void main()
{
#if PIC
image_t* img_gray ;
image_t* img_filter = image_alloc(cell_per_win, cell_per_win, dims, IM_32FC1);
rect_detection_t* result = (rect_detection_t*)malloc(2000 * sizeof(rect_detection_t));
FILE *fp = fopen("weights3.txt", "rb");
if (fread(img_filter->data_f32, cell_per_win * cell_per_win * dims * sizeof(float), 1, fp) != 1)
printf("read file error\n");
fclose(fp);
int box_num = 0;
std::vector file_list;
string file_path = "test1";
GetAllFiles(file_path, file_list, ".jpg");
int count = 0;
char info[200];
for (int i = 0; i < file_list.size(); i++)
{
Mat src = imread(file_list[i]);
Mat gray = imread(file_list[i], 0);
img_gray = load_image(gray);
box_num = detection(img_gray, img_filter,result);
for (int i = 0; i < box_num; i++)
{
Rect r;
r.x = result[i].box.left;
r.y = result[i].box.top;
r.width = result[i].box.right - result[i].box.left + 1;
r.height = result[i].box.bottom - result[i].box.top + 1;
rectangle(src, r, Scalar(0, 255, 0), 2, 8, 0);
}
imshow("src", src);
waitKey(2);
sprintf(info,"result/%d.jpg",count++);
imwrite(info,src);
free(img_gray -> data_u8);
free(img_gray);
}
#endif
#if VIDEO
Mat src_img;
Mat gray_img;
int box_num = 0;
VideoCapture cap("0.mp4");
image_t *img_gray;
image_t* img_filter = image_alloc(cell_per_win, cell_per_win, dims, IM_32FC1);
rect_detection_t* result = (rect_detection_t*)malloc(2000 * sizeof(rect_detection_t));
FILE *fp = fopen("weights1.txt", "rb");
if (fread(img_filter->data_f32, cell_per_win * cell_per_win * dims * sizeof(float), 1, fp) != 1)
printf("read file error\n");
fclose(fp);
//float thresh = 1.4150455669441055;// 1.1907374298637405;
int count = 0;
char info[200];
while (1)
{
cap >> src_img;
//resize(src_img, src_img, Size(2 * src_img.cols, 2 * src_img.rows * 2));
cvtColor(src_img, gray_img, CV_BGR2GRAY);
img_gray = load_image(gray_img);
box_num = detection(img_gray, img_filter, thresh, result);
for (int i = 0; i < box_num; i++)
{
Rect r;
r.x = result[i].box.left;
r.y = result[i].box.top;
r.width = result[i].box.right - result[i].box.left + 1;
r.height = result[i].box.bottom - result[i].box.top + 1;
rectangle(src_img, r, Scalar(0, 255, 0), 2, 8, 0);
}
imshow("src", src_img);
waitKey(2);
sprintf(info,"video3/%d.jpg",count++);
imwrite(info,src_img);
box_num = 0;
free(img_gray->data_u8);
free(img_gray);
}
#endif
int kk = 1;
}
附上一张效果图
文中的测试模型并不是最好的模型,可以自己通过训练得到更好的模型。如果想要dlib原始fhog一样的特征,可以自己稍加修改,不过本文的特征亲自实验了几把基本差不多。关于训练的东西需要自己多探索,我也是刚刚摸索不久,由于dlib测试程序太慢了,无奈之下只有自己亲自动手复现了一遍,水平有限,若有不当之处,请指教,谢谢!
源码下载:https://download.csdn.net/download/yongjiankuang/10662941
github: