C4: Real-time pedestrian detection——C4实时行人检测算法

http://cs.nju.edu.cn/wujx/projects/C4/C4.htm

Jianxin Wu实现的快速行人检测方法。

Real-Time Human Detection Using Contour Cues:

http://c2inet.sce.ntu.edu.sg/Jianxin/paper/ICRA_final.pdf

        C4能够达到比现有人体检测算法更高的速度。在640*480的20fps视频上,使用单核2.8GHz处理器,现有的最快系统(保证有较低的虚警率和较高的检测率)能达到大约10fps,但它是使用了GPU的并行处理。C4在小分辨率的图片上速度更快,在480*360分辨率的视频上速度达到36.3fps,在320*240分辨率大小的视频上能达到109fps。

        不需要显式地构造Hlin的特征向量并不是使得C4如此快的唯一原因。在INRIA数据集上进行的测试表明,第一级线性分类器Hlin是一个强大的分类器,可以过滤大约99.43%的图片区域,只有不到0.6%的图片区域需要HIK核分类器Hhik的处理。C4处理INRIA测试集中的所有图片用了27.1秒,而HOG检测器[1]需要2167.5秒(所以C4相对于HOG大约有80倍的加速)。

下面是仅仅使用CPU运行的代码:

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

#include 
#include 

#define USE_DOUBLE

#ifdef USE_DOUBLE
typedef double REAL;
#else
typedef float REAL;
#endif

template class Array2dC;

template
class Array2d
{
public:
    int nrow;
    int ncol;
    T** p;
public:
    Array2d():nrow(0),ncol(0),p(NULL) { }
    Array2d(const int nrow,const int ncol):nrow(0),ncol(0),p(NULL)
    {
        Create(nrow,ncol);
    }
    Array2d(const Array2d& source);
    virtual ~Array2d()
    {
        Clear();
    }

    Array2d& operator=(const Array2d& source);
    void Create(const int _nrow,const int _ncol);
    void Swap(Array2d& array2);
    void Clear();
    void Zero(const T t = 0);
};

template
class Array2dC
{
public:
    int nrow;
    int ncol;
    T** p;
    T* buf;
public:
    Array2dC():nrow(0),ncol(0),p(NULL),buf(NULL) {}
    Array2dC(const int nrow,const int ncol):nrow(0),ncol(0),p(NULL),buf(NULL)
    {
        Create(nrow,ncol);
    }
    Array2dC(const Array2dC& source);
    virtual ~Array2dC()
    {
        Clear();
    }

    Array2dC& operator=(const Array2dC& source);
    void Create(const int _nrow,const int _ncol);
    void Swap(Array2dC& array2);
    void Zero(const T t = 0);
    void Clear();
};

template
Array2d::Array2d(const Array2d& source):nrow(0),ncol(0),p(NULL)
{
    if(source.p!=NULL)
    {
        Create(source.nrow,source.ncol);
        for(int i=0; i
Array2d& Array2d::operator=(const Array2d& source)
{
    if(source.p!=NULL)
    {
        Create(source.nrow,source.ncol);
        for(int i=0; i
void Array2d::Create(const int _nrow,const int _ncol)
{
    assert(_nrow>0 && _ncol>0);
    Clear();
    nrow = _nrow;
    ncol = _ncol;
    p = new T*[nrow];
    assert(p!=NULL);
    for(int i=0; i
void Array2d::Swap(Array2d& array2)
{
    std::swap(nrow,array2.nrow);
    std::swap(ncol,array2.ncol);
    std::swap(p,array2.p);
}

template
void Array2d::Zero(const T t)
{
    if(nrow>0)
    {
        for(int i=0; i
void Array2d::Clear()
{
    for(int i=0; i
Array2dC::Array2dC(const Array2dC& source):nrow(0),ncol(0),p(NULL),buf(NULL)
{
    if(source.buf!=NULL)
    {
        Create(source.nrow,source.ncol);
        std::copy(source.buf,source.buf+nrow*ncol,buf);
    }
}

template
Array2dC& Array2dC::operator=(const Array2dC& source)
{
    if(source.buf!=NULL)
    {
        Create(source.nrow,source.ncol);
        std::copy(source.buf,source.buf+nrow*ncol,buf);
    }
    else
        Clear();
    return *this;
}

template
void Array2dC::Create(const int _nrow,const int _ncol)
{
    assert(_nrow>0 && _ncol>0);
    if(nrow==_nrow && ncol==_ncol) return;
    Clear();
    nrow = _nrow;
    ncol = _ncol;
    buf = new T[nrow*ncol];
    assert(buf!=NULL);
    p = new T*[nrow];
    assert(p!=NULL);
    for(int i=0; i
void Array2dC::Swap(Array2dC& array2)
{
    std::swap(nrow,array2.nrow);
    std::swap(ncol,array2.ncol);
    std::swap(p,array2.p);
    std::swap(buf,array2.buf);
}

template
void Array2dC::Zero(const T t)
{
    if(nrow>0) std::fill(buf,buf+nrow*ncol,t);
}

template
void Array2dC::Clear()
{
    delete[] buf;
    buf = NULL;
    delete[] p;
    p = NULL;
    nrow = ncol = 0;
}


/*****************************************/
// IntImage.h
/*****************************************/

template
class IntImage:public Array2dC
{
private:
    IntImage(const IntImage &source) { } // prohibit copy constructor

public:
    IntImage():variance(0.0),label(-1) { }
    virtual ~IntImage()
    {
        Clear();
    }

    virtual void Clear(void);
    inline void SetSize(const int h, const int w);
    bool Load(cv::Mat img, const char channel = 'I');
    void Save(const std::string& filename) const;
    void Swap(IntImage& image2);

    void CalcIntegralImageInPlace(void);
    void Resize(IntImage &result,const REAL ratio) const;
    void Resize(IntImage& result,const int height,const int width) const;

    IntImage& operator=(const IntImage& source);

    void Sobel(IntImage& result,const bool useSqrt,const bool normalize);
public:
    using Array2dC::nrow;
    using Array2dC::ncol;
    using Array2dC::buf;
    using Array2dC::p;
    REAL variance;
    int label;
};

template
void IntImage::Clear(void)
{
    Array2dC::Clear();
    variance = 0.0;
    label = -1;
}

template
bool IntImage::Load(cv::Mat img, const char channel)
{
    if (img.empty()) return false;

    if (channel == 'R' || channel == 'G' || channel == 'B')
    {
        int c;
        if (channel == 'B') c = 0;
        else if (channel == 'G') c = 1;
        else c = 2; // OpenCV is 'BGR' ordering
        cv::Mat planes[3];
        split(img, planes);
        img = planes[c];
    }
    else // use gray scale for all others
    {
        cv::cvtColor(img, img, cv::COLOR_BGR2GRAY);
    }

    SetSize(img.rows, img.cols);
    for(int i=0,ih=img.rows,iw=img.cols; i(img.data+img.step*i);
        for(int j=0; j
void IntImage::Save(const std::string& filename) const
{
    IplImage* img;

    img = cvCreateImage(cvSize(ncol,nrow),IPL_DEPTH_8U,1);
    for(int i=0,ih=img->height,iw=img->width; i(img->imageData+img->widthStep*i);
        for(int j=0; j
void IntImage::SetSize(const int h,const int w)
{
    if((h == nrow) && (w == ncol)) return;
    Clear();
    Array2dC::Create(h,w);
}

template
IntImage& IntImage::operator=(const IntImage& source)
{
    if(&source==this) return *this;
    SetSize(source.nrow,source.ncol);
    std::copy(source.buf,source.buf+nrow*ncol,buf);
    label = source.label;
    variance = source.variance;
    return *this;
}

template
void IntImage::Resize(IntImage &result,const REAL ratio) const
{
    Resize(result,int(nrow*ratio),int(ncol*ratio));
}

template
void IntImage::Resize(IntImage& result,const int height,const int width) const
{
    assert(height>0 && width>0);
    result.SetSize(height,width);
    REAL ixratio = nrow*1.0/height, iyratio = ncol*1.0/width;

    REAL* p_y = new REAL[result.ncol];
    assert(p_y!=NULL);
    int* p_y0 = new int[result.ncol];
    assert(p_y0!=NULL);
    for(int i=0; i
void IntImage::CalcIntegralImageInPlace(void)
// We pad a zero column and a zero row, so 24*24 image will be 25*25 in size
// if the input image is not padded, the results on 1st row will be problematic
{
    for(int i=1; i
void IntImage::Swap(IntImage& image2)
{
    Array2dC::Swap(image2);
    std::swap(variance,image2.variance);
    std::swap(label,image2.label);
}

template
void IntImage::Sobel(IntImage& result,const bool useSqrt,const bool normalize)
{
    // compute the Sobel gradient. For now, we just use the very inefficient way. Optimization can be done later
// if useSqrt = true, we compute the real Sobel gradient; otherwise, the square of it
// if normalize = true, the numbers are normalized to be in 0..255
    result.Create(nrow,ncol);
    for(int i=0; imaxv)
                    maxv = result.p[i][j];
            }
        }
        for(int i=0; i= right) || (top >= bottom);
    }
    void Clear()
    {
        left = right = top = bottom = 0;
    }
    double Size() const
    {
        if(Empty())
            return 0;
        else
            return (bottom-top)*(right-left);
    }
    // Intersect and Union of two rectangles, both function should be able to run when &result==this
    bool Intersect(CRect& result,const CRect& rect2) const;
    bool Union(CRect& result,const CRect& rect2) const;
};

class NodeDetector
{
public:
    enum NodeType { CD_LIN, CD_HIK, LINEAR, HISTOGRAM };
public:
    int type; // linear or histogram?
    Array2dC classifier;
    double thresh;
    int featurelength;
    int upper_bound;
    int index;
    std::string filename;
public:
    NodeDetector(const NodeType _type,const int _featurelength,const int _upper_bound,const int _index,const char* _filename)
    {
        Load(_type,_featurelength,_upper_bound,_index,_filename);
        minvalue = DBL_MAX;
        maxvalue = -minvalue;
    }
    ~NodeDetector()
    {
    }

    void Load(const NodeType _type,const int _featurelength,const int _upper_bound,const int _index,const char* _filename);
    bool Classify(int* f);
private:
    double minvalue;
    double maxvalue;
public:
    void SetValues(const double v)
    {
        if(v>maxvalue) maxvalue = v;
        if(v& types,std::vector& upper_bounds,std::vector& filenames);
private:
    IntImage* integrals;
    IntImage image,sobel;
    IntImage ct;
    Array2dC hist;
    IntImage scores;

    void InitImage(IntImage& original);
    void InitIntegralImages(const int stepsize);
    void ResizeImage();
public:
    int Scan(IntImage& original,std::vector& results,const int stepsize,const int round,std::ofstream* out,const int upper_bound);
    int FastScan(IntImage& original,std::vector& results,const int stepsize);
    int FeatureLength() const
    {
        return (xdiv-1)*(ydiv-1)*baseflength;
    }
};

void RunFiles();

/*****************************************/
// Pedestrian_ICRA.cpp
/*****************************************/

const int HUMAN_height = 108;
const int HUMAN_width = 36;
const int HUMAN_xdiv = 9;
const int HUMAN_ydiv = 4;
static const int EXT = 1;

// The detector
DetectionScanner scanner(HUMAN_height,HUMAN_width,HUMAN_xdiv,HUMAN_ydiv,256,0.8);

// ---------------------------------------------------------------------
// Helper functions


// compute the Sobel image "ct" from "original"
void ComputeCT(IntImage& original,IntImage& ct)
{
    ct.Create(original.nrow,original.ncol);
    for(int i=2; i& result)
{
    std::ifstream in(modelfile);
    if(in.good()==false)
    {
        std::cout<<"SVM model "<>buffer;
    assert(buffer=="nr_feature");
    int num_dim;
    in>>num_dim;
    assert(num_dim>0 && num_dim==m);
    std::getline(in,buffer); // end of line 4
    in>>buffer;
    assert(buffer=="bias");
    int bias;
    in>>bias;
    std::getline(in,buffer); //end of line 5;
    in>>buffer;
    assert(buffer=="w");
    std::getline(in,buffer); //end of line 6
    result.Create(1,num_dim);
    for(int i=0; i>result.buf[i];
    double rho = 0;
    if(bias>=0) in>>rho;
    in.close();
    return rho;
}

// Load SVM models -- Histogram Intersectin Kernel SVM trained by libHIK
double UseSVM_CD_FastEvaluationStructure(const char* modelfile, const int m, const int upper_bound, Array2dC& result)
{

    std::ifstream fs(modelfile, std::fstream::binary);
	if( !fs.is_open() )
	{
		std::cout << "SVM model " << modelfile << " can not be loaded." << std::endl;
		exit(-1);
	}
    // Header
    int rows, cols, type, channels;
    fs.read((char*)&rows, sizeof(int));         // rows
    fs.read((char*)&cols, sizeof(int));         // cols
    fs.read((char*)&type, sizeof(int));         // type
    fs.read((char*)&channels, sizeof(int));     // channels

    // Data
    cv::Mat mat(rows, cols, type);
    fs.read((char*)mat.data, CV_ELEM_SIZE(type) * rows * cols);

    int num_dim = m;

    result.Create(num_dim, upper_bound);
    for(int i=0; i(i, j);
        }

    return -0.00455891;
}

// find the intersection of "this" and "rect2", and put into "result"
bool CRect::Intersect(CRect& result,const CRect& rect2) const
{
    if( Empty() || rect2.Empty() ||
            left >= rect2.right || rect2.left >= right ||
            top >= rect2.bottom || rect2.top >= bottom )
    {
        result.Clear();
        return false;
    }
    result.left   = std::max( left, rect2.left );
    result.right  = std::min( right, rect2.right );
    result.top    = std::max( top, rect2.top );
    result.bottom = std::min( bottom, rect2.bottom );
    return true;
}

// find the union of "this" and "rect2", and put into "result"
bool CRect::Union(CRect& result,const CRect& rect2) const
{
    if(Empty())
    {
        if(rect2.Empty())
        {
            result.Clear();
            return false;
        }
        else
            result = rect2;
    }
    else
    {
        if(rect2.Empty())
            result = *this;
        else
        {
            result.left   = std::min( left, rect2.left );
            result.right  = std::max( right, rect2.right );
            result.top    = std::min( top, rect2.top );
            result.bottom = std::max( bottom, rect2.bottom );
        }
    }
    return true;
}

// A simple post-process (NMS, non-maximal suppression)
// "result" -- rectangles before merging
//          -- after this function it contains rectangles after NMS
// "combine_min" -- threshold of how many detection are needed to survive
void PostProcess(std::vector& result,const int combine_min)
{
    std::vector res1;
    std::vector resmax;
    std::vector res2;
    bool yet;
    CRect rectInter;

    for(unsigned int i=0,size_i=result.size(); i0.6*result_i.Size()
                        && rectInter.Size()>0.6*resmax_j.Size()
                  )
                {
                    CRect& res1_j = res1[j];
                    resmax_j.Union(resmax_j,result_i);
                    res1_j.bottom += result_i.bottom;
                    res1_j.top += result_i.top;
                    res1_j.left += result_i.left;
                    res1_j.right += result_i.right;
                    res2[j]++;
                    yet = true;
                    break;
                }
            }
        }
        if(yet==false)
        {
            res1.push_back(result_i);
            resmax.push_back(result_i);
            res2.push_back(1);
        }
    }

    for(unsigned int i=0,size=res1.size(); icombine_min)
            result.push_back(res1[i]);
}

// If one detection (after NMS) is inside another, remove the inside one
void RemoveCoveredRectangles(std::vector& result)
{
    std::vector covered;
    covered.resize(result.size());
    std::fill(covered.begin(),covered.end(),false);
    CRect inter;
    for(unsigned int i=0; iresult[i].Size()*0.65)
                covered[i] = true;
            if(isize>result[j].Size()*0.65)
                covered[j] = true;
        }
    }
    std::vector newresult;
    for(unsigned int i=0; i types;
    std::vector upper_bounds;
    std::vector filenames;

    types.push_back(NodeDetector::CD_LIN); // first node
    upper_bounds.push_back(100);
    filenames.push_back("combined.txt.model");
    types.push_back(NodeDetector::CD_HIK); // second node
    upper_bounds.push_back(353);
    filenames.push_back("combined.txt.model_");

    ds.LoadDetector(types,upper_bounds,filenames);
    // You can adjust these parameters for different speed, accuracy etc
    ds.cascade->nodes[0]->thresh += 0.8;
    ds.cascade->nodes[1]->thresh -= 0.095;
}

void DetectionScanner::LoadDetector(std::vector& types,std::vector& upper_bounds,std::vector& filenames)
{
    unsigned int depth = types.size();
    assert(depth>0 && depth==upper_bounds.size() && depth==filenames.size());
    if(cascade)
        delete cascade;
    cascade = new CascadeDetector;
    assert(xdiv>0 && ydiv>0);
    for(unsigned int i=0; iAddNode(types[i],(xdiv-EXT)*(ydiv-EXT)*baseflength,upper_bounds[i],filenames[i].c_str());

    hist.Create(1,baseflength*(xdiv-EXT)*(ydiv-EXT));
}

void NodeDetector::Load(const NodeType _type,const int _featurelength,const int _upper_bound,const int _index,const char* _filename)
{
    type = _type;
    index = _index;
    filename = _filename;
    featurelength = _featurelength;
    upper_bound = _upper_bound;
    if(type==CD_LIN)
        thresh = UseSVM_CD_FastEvaluationStructure(_filename,_featurelength,classifier);
    else if(type==CD_HIK)
        thresh = UseSVM_CD_FastEvaluationStructure(_filename,_featurelength,upper_bound,classifier);

    if(type==CD_LIN) type = LINEAR;
    if(type==CD_HIK) type = HISTOGRAM;
}

void CascadeDetector::AddNode(const NodeDetector::NodeType _type,const int _featurelength,const int _upper_bound,const char* _filename)
{
    if(length==size)
    {
        int newsize = size * 2;
        NodeDetector** p = new NodeDetector*[newsize];
        assert(p!=NULL);
        std::copy(nodes,nodes+size,p);
        size = newsize;
        delete[] nodes;
        nodes = p;
    }
    nodes[length] = new NodeDetector(_type,_featurelength,_upper_bound,length,_filename);
    length++;
}

// End of functions that load the two classifiers
// ---------------------------------------------------------------------

// ---------------------------------------------------------------------
// Detection functions

// initialization -- compute the Census Tranform image for CENTRIST
void DetectionScanner::InitImage(IntImage& original)
{
    image = original;
    image.Sobel(sobel,false,false);
    ComputeCT(sobel,ct);
}

// combine the (xdiv-1)*(ydiv-1) integral images into a single one
void DetectionScanner::InitIntegralImages(const int stepsize)
{
    if(cascade->nodes[0]->type!=NodeDetector::LINEAR)
        return; // No need to prepare integral images

    const int hd = height/xdiv*2-2;
    const int wd = width/ydiv*2-2;
    scores.Create(ct.nrow,ct.ncol);
    scores.Zero(cascade->nodes[0]->thresh/hd/wd);
    double* linearweights = cascade->nodes[0]->classifier.buf;
    for(int i=0; i& original,std::vector& results,const int stepsize)
{
    if(original.nrownodes[1];
    double** pc = node->classifier.p;
    int oheight = original.nrow, owidth = original.ncol;
    CRect rect;
    while(image.nrow>=height && image.ncol>=width)
    {
        InitIntegralImages(stepsize);
        for(int i=2; i+heightthresh;
                for(int k=0; kclassifier.nrow; k++) score += pc[k][hist.buf[k]];
                if(score>0)
                {
                    rect.top = i*oheight/image.nrow;
                    rect.bottom = (i+height)*oheight/image.nrow;
                    rect.left = j*owidth/image.ncol;
                    rect.right = (j+width)*owidth/image.ncol;
                    results.push_back(rect);
                }
            }
        }
        ResizeImage();
    }
    return 0;
}

// The interface function that detects pedestrians
// "filename" -- an input image
// detect pedestrians in image "filename" and save results to "outname"
// "ds" -- the detector cascade -- pass "scanner" as this parameter
// "out" -- file stream that saves the output rectangle information
int totaltime = 0;

int DetectHuman(const char* filename,DetectionScanner& ds)
{
    std::vector results;
    IntImage original;
//    original.Load(filename);

    ds.FastScan(original,results,2);
    PostProcess(results,2);
    PostProcess(results,0);
    RemoveCoveredRectangles(results);

    cvNamedWindow("show");
    {
        IplImage* iplImage = NULL;
        iplImage = cvLoadImage(filename);
        for(unsigned int i=0; i" << std::endl << std::endl;
	std::cout << "keys:" << std::endl;
	std::cout << "space : toggle using simple post-process (NMS, non-maximal suppression)" << std::endl;
	std::cout << "0     : waits to process next frame until a key pressed" << std::endl;
	std::cout << "1     : doesn't wait to process next frame" << std::endl;
	std::cout << "2     : resize frames 1/2" << std::endl;
	std::cout << "3     : don't resize frames" << std::endl;
	std::cout << "4     : resize frames 1/4" << std::endl;
	if (argc < 2)
		return 0;

	cv::Mat src;
    cv::VideoCapture capture( argv[1] );

    LoadCascade(scanner);
    std::cout<<"Detectors loaded."< original;

    while( key != 27 )
    {
        capture >> src;
        if( src.empty() ) break;  
		
		if (fx < 1)
		{
			cv::resize(src, src, cv::Size(), fx, fx);
		}
		
        original.Load( src );
        std::vector results;
        scanner.FastScan(original, results, 2);

        if(rect_organization)
        {
            PostProcess(results,2);
            PostProcess(results,0);
            RemoveCoveredRectangles(results);
        }

        for(size_t i = 0; i < results.size(); i++)
        {
            cv::rectangle(src, cvPoint(results[i].left,results[i].top),cvPoint(results[i].right,results[i].bottom),cv::Scalar(0,255,0),2 );
        }

        cv::imshow("result",src);
        key = cv::waitKey( wait_time );

		if (key == 32)
			rect_organization = !rect_organization;

		if (key == 48)
			wait_time = 0;

		if (key == 49)
			wait_time = 1;

		if (key == 50)
			fx = 0.5;

		if (key == 51)
			fx = 1;

		if (key == 52)
			fx = 0.25;
	}
    cv::waitKey();
    return 0;
}

 

你可能感兴趣的:(行人检测)