discrete adaboost的C++实现

参考之前的博文,AdaBoost算法学习实现的c++代码

//adaboost.h
#ifndef ADABOOST_H
#define ADABOOST_H

#include<cmath>
#include<iostream>
#include<vector>
#include<assert.h>


using namespace std;

#define FEATURETYPE double


struct FeaVec
{
	unsigned int dim;
	std::vector<FEATURETYPE>fea;
	int label;//这里只去两个值,-1,1
	FeaVec(unsigned int d) :dim(d)
	{

	}
};

class weakclassfier;

class adaboost
{

public:
	friend class weakclassfier;
	adaboost();
	virtual ~adaboost();
	void train();
	int classify(FeaVec data);
	void load_trainset(vector<FeaVec>*data);

protected:

private:
	double*W;
	int dim;//特征维数
	std::vector<FeaVec>trainset;
	std::vector<weakclassfier*>classfier;
	double aggri_error;

};

#endif // ADABOOST_H


//adaboost.cpp
#include "stdafx.h"
#include "adaboost.h"

class weakclassfier
{
public:
	friend class adaboost;
	weakclassfier(adaboost*ada)
	{
		this->ada = ada;
		min_error_rate = 1000000;
	}
	void build();
	std::vector<int>* stumpclassify(int const k, double const threshold,
		vector<FeaVec>& data, bool greatthan);
	~weakclassfier();
private:
	bool greaterthan;//控制不等式符号
	int dim;//当前分类器在那一维进行分类
	double threshold;
	double min_error_rate;//当前弱分类器在训练集上的错误率
	std::vector<int>*predicted;//保存对训练集的分类结果
	double alpha;//在强分类器中所占的权重
	adaboost* ada;
};
weakclassfier::~weakclassfier()
{
	if (predicted != NULL)
		delete predicted;
}
void weakclassfier::build()
{
	double minerror = 100000;
	for (int i = 0; i < ada->dim; i++)//外循环次数少
	{
		double min = 100000;
		double max = -100000;
		for (int j = 0; j<ada->trainset.size(); j++)
		{
			if (ada->trainset[j].fea[i]>max)
				max = ada->trainset[j].fea[i];
			if (ada->trainset[j].fea[i] < min)
				min = ada->trainset[j].fea[i];
		}

		double step = (max - min) / double(10);
		for (double j = min; j < max;)
		{
			j += step;
			double current_error = 0;
			bool flag = false;
			vector<int>*aa = stumpclassify(i, j, ada->trainset, true);
			for (int k = 0; k < ada->trainset.size(); k++)
				current_error += ((*aa)[k] != ada->trainset[k].label) ? ada->W[k] : 0;
			if (current_error < min_error_rate)
			{
				min_error_rate = current_error;
				threshold = j;
				greaterthan = true;
				dim = i;
				if (predicted != NULL)
					delete predicted;
				predicted = aa;
				flag = true;
			}
			current_error = 0;
			aa = stumpclassify(i, j, ada->trainset, false);
			for (int k = 0; k < ada->trainset.size(); k++)
				current_error += ((*aa)[k] != ada->trainset[k].label) ? ada->W[k] : 0;
			//current_error += abs((*aa)[k] -ada->trainset[k].label) *ada->W[k];
			if (current_error < min_error_rate)
			{
				min_error_rate = current_error;
				threshold = j;
				greaterthan = false;
				dim = i;
				if (predicted != NULL)
					delete predicted;
				predicted = aa;
				flag = true;
			}
			if (!flag)//new和delete必须配套使用
				delete aa;
		}
	}
	assert(min_error_rate < 0.5);
}

std::vector<int>* weakclassfier::stumpclassify(int const k, double const threshold,
	vector<FeaVec>&data, bool greatthan)
{
	std::vector<int>*pre = new vector < int > ;
	//开始假设都满足大于阈值
	//开始假设都满足小于阈值
	(*pre).insert((*pre).begin(), data.size(), 1);

	for (int j = 0; j < data.size(); j++)
	{
		if (greatthan&&data[j].fea[k] < threshold)//对于greater_than,ada->trainset[j]被预测为另一个类
		{
			(*pre)[j] = -1;
		}
		else if (!greatthan&&data[j].fea[k] > threshold)
		{
			(*pre)[j] = -1;
		}
	}
	return pre;
}



adaboost::adaboost()
{
	
}

adaboost::~adaboost()
{
	for (int i = 0; i < classfier.size(); i++)
		delete classfier[i];
	if (W != NULL)
		delete[]W;
}

void adaboost::train()
{
	W = new double[trainset.size()];
	//全部初始化为0,用memset可以,但某一特定值,只能用循环了
	//memset(W, double(1) / double(trainset.size()), trainset.size()*sizeof(double));
	for (int i = 0; i < trainset.size(); i++)
		W[i] = double(1) / double(trainset.size());
	vector<double> aggrigate;
	aggrigate.resize(trainset.size());

	while (classfier.size() < 4)
	{
		aggri_error = 0;
		weakclassfier*weak = new weakclassfier(this);
		weak->build();
		if (weak->min_error_rate < 0.5)
		{
			//弱分类器的准确率越高,其权重也越大
			weak->alpha = (0.5*log((1.0 - weak->min_error_rate) / (weak->min_error_rate + 1e-16)));
			classfier.push_back(weak);
			double sumW = 0;
			for (int j = 0; j < trainset.size(); j++)
			{
				//根据当前弱分类器分类结果将错分样本的权重提升
				W[j] *= exp(weak->alpha*((*weak->predicted)[j] == trainset[j].label ? -1 : 1));
				sumW += W[j];
			}
			for (int j = 0; j < trainset.size(); j++)
			{
				W[j] /= (sumW + 0.00000001);
				//	aggrigate[j] += weak->alpha*(*weak->predicted)[j];
				//aggri_error += ((aggrigate[j] > 0) ? 1 : -1) == trainset[j].label ? 0 : 1;
			}
			//aggri_error /= double(trainset.size());
			//	if (aggri_error == 0)
			//	break;
		}
		delete weak->predicted;
	}
}

int adaboost::classify(FeaVec data)
{
	vector<FeaVec>bb;
	bb.push_back(data);
	double cc = 0;

	for (int i = 0; i < classfier.size(); i++)
	{
		vector<int>*aa = classfier[i]->stumpclassify(classfier[i]->dim,
			classfier[i]->threshold, bb, classfier[i]->greaterthan);
		//	for (int j = 0; j < data.dim; j++)
		cc += (*aa)[0] * classfier[i]->alpha;
		delete aa;
	}
	return cc > 0 ? 1 : -1;
}


void adaboost::load_trainset(vector<FeaVec>*data)
{
	trainset = *data;
	dim = data->back().dim;
}








//main
#include "stdafx.h"
#include"adaboost.h"

int _tmain(int argc, _TCHAR* argv[])
{
	cout << double(1) / double(5) << endl;
	FeaVec aa(2), bb(2), cc(2), dd(2),ee(2);
	aa.fea.push_back(2);
	aa.fea.push_back(1.1);
	aa.label = 1;
	bb.fea.push_back(1.3);
	bb.fea.push_back(1.0);
	bb.label = -1;
	cc.fea.push_back(1.0);
	cc.fea.push_back(1.0);
	cc.label = -1;
	dd.fea.push_back(2);
	dd.fea.push_back(1.0);
	dd.label = 1;
	ee.fea.push_back(1);
	ee.fea.push_back(2.1);
	ee.label = 1;
	vector<FeaVec>pp;
	pp.push_back(aa);
	pp.push_back(bb);
	pp.push_back(cc);
	pp.push_back(dd);
	pp.push_back(ee);
	adaboost ada;
	ada.load_trainset(&pp);
	ada.train();
	FeaVec ff(2);
	ff.fea.push_back(0.9);
	ff.fea.push_back(1.1);
	int a = ada.classify(ff);

	return 0;
}


你可能感兴趣的:(adaboost)