http://cs.nju.edu.cn/wujx/projects/C4/C4.htm
Jianxin Wu实现的快速行人检测方法。
Real-Time Human Detection Using Contour Cues:
http://c2inet.sce.ntu.edu.sg/Jianxin/paper/ICRA_final.pdf
C4能够达到比现有人体检测算法更高的速度。在640*480的20fps视频上,使用单核2.8GHz处理器,现有的最快系统(保证有较低的虚警率和较高的检测率)能达到大约10fps,但它是使用了GPU的并行处理。C4在小分辨率的图片上速度更快,在480*360分辨率的视频上速度达到36.3fps,在320*240分辨率大小的视频上能达到109fps。
不需要显式地构造Hlin的特征向量并不是使得C4如此快的唯一原因。在INRIA数据集上进行的测试表明,第一级线性分类器Hlin是一个强大的分类器,可以过滤大约99.43%的图片区域,只有不到0.6%的图片区域需要HIK核分类器Hhik的处理。C4处理INRIA测试集中的所有图片用了27.1秒,而HOG检测器[1]需要2167.5秒(所以C4相对于HOG大约有80倍的加速)。
下面是仅仅使用CPU运行的代码:
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#define USE_DOUBLE
#ifdef USE_DOUBLE
typedef double REAL;
#else
typedef float REAL;
#endif
template class Array2dC;
template
class Array2d
{
public:
int nrow;
int ncol;
T** p;
public:
Array2d():nrow(0),ncol(0),p(NULL) { }
Array2d(const int nrow,const int ncol):nrow(0),ncol(0),p(NULL)
{
Create(nrow,ncol);
}
Array2d(const Array2d& source);
virtual ~Array2d()
{
Clear();
}
Array2d& operator=(const Array2d& source);
void Create(const int _nrow,const int _ncol);
void Swap(Array2d& array2);
void Clear();
void Zero(const T t = 0);
};
template
class Array2dC
{
public:
int nrow;
int ncol;
T** p;
T* buf;
public:
Array2dC():nrow(0),ncol(0),p(NULL),buf(NULL) {}
Array2dC(const int nrow,const int ncol):nrow(0),ncol(0),p(NULL),buf(NULL)
{
Create(nrow,ncol);
}
Array2dC(const Array2dC& source);
virtual ~Array2dC()
{
Clear();
}
Array2dC& operator=(const Array2dC& source);
void Create(const int _nrow,const int _ncol);
void Swap(Array2dC& array2);
void Zero(const T t = 0);
void Clear();
};
template
Array2d::Array2d(const Array2d& source):nrow(0),ncol(0),p(NULL)
{
if(source.p!=NULL)
{
Create(source.nrow,source.ncol);
for(int i=0; i
Array2d& Array2d::operator=(const Array2d& source)
{
if(source.p!=NULL)
{
Create(source.nrow,source.ncol);
for(int i=0; i
void Array2d::Create(const int _nrow,const int _ncol)
{
assert(_nrow>0 && _ncol>0);
Clear();
nrow = _nrow;
ncol = _ncol;
p = new T*[nrow];
assert(p!=NULL);
for(int i=0; i
void Array2d::Swap(Array2d& array2)
{
std::swap(nrow,array2.nrow);
std::swap(ncol,array2.ncol);
std::swap(p,array2.p);
}
template
void Array2d::Zero(const T t)
{
if(nrow>0)
{
for(int i=0; i
void Array2d::Clear()
{
for(int i=0; i
Array2dC::Array2dC(const Array2dC& source):nrow(0),ncol(0),p(NULL),buf(NULL)
{
if(source.buf!=NULL)
{
Create(source.nrow,source.ncol);
std::copy(source.buf,source.buf+nrow*ncol,buf);
}
}
template
Array2dC& Array2dC::operator=(const Array2dC& source)
{
if(source.buf!=NULL)
{
Create(source.nrow,source.ncol);
std::copy(source.buf,source.buf+nrow*ncol,buf);
}
else
Clear();
return *this;
}
template
void Array2dC::Create(const int _nrow,const int _ncol)
{
assert(_nrow>0 && _ncol>0);
if(nrow==_nrow && ncol==_ncol) return;
Clear();
nrow = _nrow;
ncol = _ncol;
buf = new T[nrow*ncol];
assert(buf!=NULL);
p = new T*[nrow];
assert(p!=NULL);
for(int i=0; i
void Array2dC::Swap(Array2dC& array2)
{
std::swap(nrow,array2.nrow);
std::swap(ncol,array2.ncol);
std::swap(p,array2.p);
std::swap(buf,array2.buf);
}
template
void Array2dC::Zero(const T t)
{
if(nrow>0) std::fill(buf,buf+nrow*ncol,t);
}
template
void Array2dC::Clear()
{
delete[] buf;
buf = NULL;
delete[] p;
p = NULL;
nrow = ncol = 0;
}
/*****************************************/
// IntImage.h
/*****************************************/
template
class IntImage:public Array2dC
{
private:
IntImage(const IntImage &source) { } // prohibit copy constructor
public:
IntImage():variance(0.0),label(-1) { }
virtual ~IntImage()
{
Clear();
}
virtual void Clear(void);
inline void SetSize(const int h, const int w);
bool Load(cv::Mat img, const char channel = 'I');
void Save(const std::string& filename) const;
void Swap(IntImage& image2);
void CalcIntegralImageInPlace(void);
void Resize(IntImage &result,const REAL ratio) const;
void Resize(IntImage& result,const int height,const int width) const;
IntImage& operator=(const IntImage& source);
void Sobel(IntImage& result,const bool useSqrt,const bool normalize);
public:
using Array2dC::nrow;
using Array2dC::ncol;
using Array2dC::buf;
using Array2dC::p;
REAL variance;
int label;
};
template
void IntImage::Clear(void)
{
Array2dC::Clear();
variance = 0.0;
label = -1;
}
template
bool IntImage::Load(cv::Mat img, const char channel)
{
if (img.empty()) return false;
if (channel == 'R' || channel == 'G' || channel == 'B')
{
int c;
if (channel == 'B') c = 0;
else if (channel == 'G') c = 1;
else c = 2; // OpenCV is 'BGR' ordering
cv::Mat planes[3];
split(img, planes);
img = planes[c];
}
else // use gray scale for all others
{
cv::cvtColor(img, img, cv::COLOR_BGR2GRAY);
}
SetSize(img.rows, img.cols);
for(int i=0,ih=img.rows,iw=img.cols; i(img.data+img.step*i);
for(int j=0; j
void IntImage::Save(const std::string& filename) const
{
IplImage* img;
img = cvCreateImage(cvSize(ncol,nrow),IPL_DEPTH_8U,1);
for(int i=0,ih=img->height,iw=img->width; i(img->imageData+img->widthStep*i);
for(int j=0; j
void IntImage::SetSize(const int h,const int w)
{
if((h == nrow) && (w == ncol)) return;
Clear();
Array2dC::Create(h,w);
}
template
IntImage& IntImage::operator=(const IntImage& source)
{
if(&source==this) return *this;
SetSize(source.nrow,source.ncol);
std::copy(source.buf,source.buf+nrow*ncol,buf);
label = source.label;
variance = source.variance;
return *this;
}
template
void IntImage::Resize(IntImage &result,const REAL ratio) const
{
Resize(result,int(nrow*ratio),int(ncol*ratio));
}
template
void IntImage::Resize(IntImage& result,const int height,const int width) const
{
assert(height>0 && width>0);
result.SetSize(height,width);
REAL ixratio = nrow*1.0/height, iyratio = ncol*1.0/width;
REAL* p_y = new REAL[result.ncol];
assert(p_y!=NULL);
int* p_y0 = new int[result.ncol];
assert(p_y0!=NULL);
for(int i=0; i
void IntImage::CalcIntegralImageInPlace(void)
// We pad a zero column and a zero row, so 24*24 image will be 25*25 in size
// if the input image is not padded, the results on 1st row will be problematic
{
for(int i=1; i
void IntImage::Swap(IntImage& image2)
{
Array2dC::Swap(image2);
std::swap(variance,image2.variance);
std::swap(label,image2.label);
}
template
void IntImage::Sobel(IntImage& result,const bool useSqrt,const bool normalize)
{
// compute the Sobel gradient. For now, we just use the very inefficient way. Optimization can be done later
// if useSqrt = true, we compute the real Sobel gradient; otherwise, the square of it
// if normalize = true, the numbers are normalized to be in 0..255
result.Create(nrow,ncol);
for(int i=0; imaxv)
maxv = result.p[i][j];
}
}
for(int i=0; i= right) || (top >= bottom);
}
void Clear()
{
left = right = top = bottom = 0;
}
double Size() const
{
if(Empty())
return 0;
else
return (bottom-top)*(right-left);
}
// Intersect and Union of two rectangles, both function should be able to run when &result==this
bool Intersect(CRect& result,const CRect& rect2) const;
bool Union(CRect& result,const CRect& rect2) const;
};
class NodeDetector
{
public:
enum NodeType { CD_LIN, CD_HIK, LINEAR, HISTOGRAM };
public:
int type; // linear or histogram?
Array2dC classifier;
double thresh;
int featurelength;
int upper_bound;
int index;
std::string filename;
public:
NodeDetector(const NodeType _type,const int _featurelength,const int _upper_bound,const int _index,const char* _filename)
{
Load(_type,_featurelength,_upper_bound,_index,_filename);
minvalue = DBL_MAX;
maxvalue = -minvalue;
}
~NodeDetector()
{
}
void Load(const NodeType _type,const int _featurelength,const int _upper_bound,const int _index,const char* _filename);
bool Classify(int* f);
private:
double minvalue;
double maxvalue;
public:
void SetValues(const double v)
{
if(v>maxvalue) maxvalue = v;
if(v& types,std::vector& upper_bounds,std::vector& filenames);
private:
IntImage* integrals;
IntImage image,sobel;
IntImage ct;
Array2dC hist;
IntImage scores;
void InitImage(IntImage& original);
void InitIntegralImages(const int stepsize);
void ResizeImage();
public:
int Scan(IntImage& original,std::vector& results,const int stepsize,const int round,std::ofstream* out,const int upper_bound);
int FastScan(IntImage& original,std::vector& results,const int stepsize);
int FeatureLength() const
{
return (xdiv-1)*(ydiv-1)*baseflength;
}
};
void RunFiles();
/*****************************************/
// Pedestrian_ICRA.cpp
/*****************************************/
const int HUMAN_height = 108;
const int HUMAN_width = 36;
const int HUMAN_xdiv = 9;
const int HUMAN_ydiv = 4;
static const int EXT = 1;
// The detector
DetectionScanner scanner(HUMAN_height,HUMAN_width,HUMAN_xdiv,HUMAN_ydiv,256,0.8);
// ---------------------------------------------------------------------
// Helper functions
// compute the Sobel image "ct" from "original"
void ComputeCT(IntImage& original,IntImage& ct)
{
ct.Create(original.nrow,original.ncol);
for(int i=2; i& result)
{
std::ifstream in(modelfile);
if(in.good()==false)
{
std::cout<<"SVM model "<>buffer;
assert(buffer=="nr_feature");
int num_dim;
in>>num_dim;
assert(num_dim>0 && num_dim==m);
std::getline(in,buffer); // end of line 4
in>>buffer;
assert(buffer=="bias");
int bias;
in>>bias;
std::getline(in,buffer); //end of line 5;
in>>buffer;
assert(buffer=="w");
std::getline(in,buffer); //end of line 6
result.Create(1,num_dim);
for(int i=0; i>result.buf[i];
double rho = 0;
if(bias>=0) in>>rho;
in.close();
return rho;
}
// Load SVM models -- Histogram Intersectin Kernel SVM trained by libHIK
double UseSVM_CD_FastEvaluationStructure(const char* modelfile, const int m, const int upper_bound, Array2dC& result)
{
std::ifstream fs(modelfile, std::fstream::binary);
if( !fs.is_open() )
{
std::cout << "SVM model " << modelfile << " can not be loaded." << std::endl;
exit(-1);
}
// Header
int rows, cols, type, channels;
fs.read((char*)&rows, sizeof(int)); // rows
fs.read((char*)&cols, sizeof(int)); // cols
fs.read((char*)&type, sizeof(int)); // type
fs.read((char*)&channels, sizeof(int)); // channels
// Data
cv::Mat mat(rows, cols, type);
fs.read((char*)mat.data, CV_ELEM_SIZE(type) * rows * cols);
int num_dim = m;
result.Create(num_dim, upper_bound);
for(int i=0; i(i, j);
}
return -0.00455891;
}
// find the intersection of "this" and "rect2", and put into "result"
bool CRect::Intersect(CRect& result,const CRect& rect2) const
{
if( Empty() || rect2.Empty() ||
left >= rect2.right || rect2.left >= right ||
top >= rect2.bottom || rect2.top >= bottom )
{
result.Clear();
return false;
}
result.left = std::max( left, rect2.left );
result.right = std::min( right, rect2.right );
result.top = std::max( top, rect2.top );
result.bottom = std::min( bottom, rect2.bottom );
return true;
}
// find the union of "this" and "rect2", and put into "result"
bool CRect::Union(CRect& result,const CRect& rect2) const
{
if(Empty())
{
if(rect2.Empty())
{
result.Clear();
return false;
}
else
result = rect2;
}
else
{
if(rect2.Empty())
result = *this;
else
{
result.left = std::min( left, rect2.left );
result.right = std::max( right, rect2.right );
result.top = std::min( top, rect2.top );
result.bottom = std::max( bottom, rect2.bottom );
}
}
return true;
}
// A simple post-process (NMS, non-maximal suppression)
// "result" -- rectangles before merging
// -- after this function it contains rectangles after NMS
// "combine_min" -- threshold of how many detection are needed to survive
void PostProcess(std::vector& result,const int combine_min)
{
std::vector res1;
std::vector resmax;
std::vector res2;
bool yet;
CRect rectInter;
for(unsigned int i=0,size_i=result.size(); i0.6*result_i.Size()
&& rectInter.Size()>0.6*resmax_j.Size()
)
{
CRect& res1_j = res1[j];
resmax_j.Union(resmax_j,result_i);
res1_j.bottom += result_i.bottom;
res1_j.top += result_i.top;
res1_j.left += result_i.left;
res1_j.right += result_i.right;
res2[j]++;
yet = true;
break;
}
}
}
if(yet==false)
{
res1.push_back(result_i);
resmax.push_back(result_i);
res2.push_back(1);
}
}
for(unsigned int i=0,size=res1.size(); icombine_min)
result.push_back(res1[i]);
}
// If one detection (after NMS) is inside another, remove the inside one
void RemoveCoveredRectangles(std::vector& result)
{
std::vector covered;
covered.resize(result.size());
std::fill(covered.begin(),covered.end(),false);
CRect inter;
for(unsigned int i=0; iresult[i].Size()*0.65)
covered[i] = true;
if(isize>result[j].Size()*0.65)
covered[j] = true;
}
}
std::vector newresult;
for(unsigned int i=0; i types;
std::vector upper_bounds;
std::vector filenames;
types.push_back(NodeDetector::CD_LIN); // first node
upper_bounds.push_back(100);
filenames.push_back("combined.txt.model");
types.push_back(NodeDetector::CD_HIK); // second node
upper_bounds.push_back(353);
filenames.push_back("combined.txt.model_");
ds.LoadDetector(types,upper_bounds,filenames);
// You can adjust these parameters for different speed, accuracy etc
ds.cascade->nodes[0]->thresh += 0.8;
ds.cascade->nodes[1]->thresh -= 0.095;
}
void DetectionScanner::LoadDetector(std::vector& types,std::vector& upper_bounds,std::vector& filenames)
{
unsigned int depth = types.size();
assert(depth>0 && depth==upper_bounds.size() && depth==filenames.size());
if(cascade)
delete cascade;
cascade = new CascadeDetector;
assert(xdiv>0 && ydiv>0);
for(unsigned int i=0; iAddNode(types[i],(xdiv-EXT)*(ydiv-EXT)*baseflength,upper_bounds[i],filenames[i].c_str());
hist.Create(1,baseflength*(xdiv-EXT)*(ydiv-EXT));
}
void NodeDetector::Load(const NodeType _type,const int _featurelength,const int _upper_bound,const int _index,const char* _filename)
{
type = _type;
index = _index;
filename = _filename;
featurelength = _featurelength;
upper_bound = _upper_bound;
if(type==CD_LIN)
thresh = UseSVM_CD_FastEvaluationStructure(_filename,_featurelength,classifier);
else if(type==CD_HIK)
thresh = UseSVM_CD_FastEvaluationStructure(_filename,_featurelength,upper_bound,classifier);
if(type==CD_LIN) type = LINEAR;
if(type==CD_HIK) type = HISTOGRAM;
}
void CascadeDetector::AddNode(const NodeDetector::NodeType _type,const int _featurelength,const int _upper_bound,const char* _filename)
{
if(length==size)
{
int newsize = size * 2;
NodeDetector** p = new NodeDetector*[newsize];
assert(p!=NULL);
std::copy(nodes,nodes+size,p);
size = newsize;
delete[] nodes;
nodes = p;
}
nodes[length] = new NodeDetector(_type,_featurelength,_upper_bound,length,_filename);
length++;
}
// End of functions that load the two classifiers
// ---------------------------------------------------------------------
// ---------------------------------------------------------------------
// Detection functions
// initialization -- compute the Census Tranform image for CENTRIST
void DetectionScanner::InitImage(IntImage& original)
{
image = original;
image.Sobel(sobel,false,false);
ComputeCT(sobel,ct);
}
// combine the (xdiv-1)*(ydiv-1) integral images into a single one
void DetectionScanner::InitIntegralImages(const int stepsize)
{
if(cascade->nodes[0]->type!=NodeDetector::LINEAR)
return; // No need to prepare integral images
const int hd = height/xdiv*2-2;
const int wd = width/ydiv*2-2;
scores.Create(ct.nrow,ct.ncol);
scores.Zero(cascade->nodes[0]->thresh/hd/wd);
double* linearweights = cascade->nodes[0]->classifier.buf;
for(int i=0; i& original,std::vector& results,const int stepsize)
{
if(original.nrownodes[1];
double** pc = node->classifier.p;
int oheight = original.nrow, owidth = original.ncol;
CRect rect;
while(image.nrow>=height && image.ncol>=width)
{
InitIntegralImages(stepsize);
for(int i=2; i+heightthresh;
for(int k=0; kclassifier.nrow; k++) score += pc[k][hist.buf[k]];
if(score>0)
{
rect.top = i*oheight/image.nrow;
rect.bottom = (i+height)*oheight/image.nrow;
rect.left = j*owidth/image.ncol;
rect.right = (j+width)*owidth/image.ncol;
results.push_back(rect);
}
}
}
ResizeImage();
}
return 0;
}
// The interface function that detects pedestrians
// "filename" -- an input image
// detect pedestrians in image "filename" and save results to "outname"
// "ds" -- the detector cascade -- pass "scanner" as this parameter
// "out" -- file stream that saves the output rectangle information
int totaltime = 0;
int DetectHuman(const char* filename,DetectionScanner& ds)
{
std::vector results;
IntImage original;
// original.Load(filename);
ds.FastScan(original,results,2);
PostProcess(results,2);
PostProcess(results,0);
RemoveCoveredRectangles(results);
cvNamedWindow("show");
{
IplImage* iplImage = NULL;
iplImage = cvLoadImage(filename);
for(unsigned int i=0; i" << std::endl << std::endl;
std::cout << "keys:" << std::endl;
std::cout << "space : toggle using simple post-process (NMS, non-maximal suppression)" << std::endl;
std::cout << "0 : waits to process next frame until a key pressed" << std::endl;
std::cout << "1 : doesn't wait to process next frame" << std::endl;
std::cout << "2 : resize frames 1/2" << std::endl;
std::cout << "3 : don't resize frames" << std::endl;
std::cout << "4 : resize frames 1/4" << std::endl;
if (argc < 2)
return 0;
cv::Mat src;
cv::VideoCapture capture( argv[1] );
LoadCascade(scanner);
std::cout<<"Detectors loaded."< original;
while( key != 27 )
{
capture >> src;
if( src.empty() ) break;
if (fx < 1)
{
cv::resize(src, src, cv::Size(), fx, fx);
}
original.Load( src );
std::vector results;
scanner.FastScan(original, results, 2);
if(rect_organization)
{
PostProcess(results,2);
PostProcess(results,0);
RemoveCoveredRectangles(results);
}
for(size_t i = 0; i < results.size(); i++)
{
cv::rectangle(src, cvPoint(results[i].left,results[i].top),cvPoint(results[i].right,results[i].bottom),cv::Scalar(0,255,0),2 );
}
cv::imshow("result",src);
key = cv::waitKey( wait_time );
if (key == 32)
rect_organization = !rect_organization;
if (key == 48)
wait_time = 0;
if (key == 49)
wait_time = 1;
if (key == 50)
fx = 0.5;
if (key == 51)
fx = 1;
if (key == 52)
fx = 0.25;
}
cv::waitKey();
return 0;
}