#include <iostream> #include <fstream> #include <sstream> #include <string> #include <vector> #include <cstring> #include <algorithm> #include <cmath> #include <random> #include <Eigen/Dense> #include <time.h> using namespace std; using namespace Eigen; class svm { public: EIGEN_MAKE_ALIGNED_OPERATOR_NEW //use the Eigen svm(const string &filename) { dataInit(filename); } void print() { smoP(); caclWs(); cout<<w<<endl; } void test() { double errCount=0.0; cout<<w<<endl; for (int i=0;i<trainDataNum;i++) { if((trainDataMat.row(i)*w+b<0&&trainLabelMat[i]>0)|| (trainDataMat.row(i)*w+b>0&&trainLabelMat[i]<0)) errCount+=1.0; } cout<<errCount<<endl; cout<<"error rate is: "<<errCount/trainDataNum<<endl; } private: void dataInit(const string &filename) { ifstream file(filename); string line; while(getline(file,line)) { istringstream record(line); vector<double> data; double temp; while(record>>temp) data.push_back(temp); trainLabelVec.push_back(temp); data.pop_back(); trainDataVec.push_back(data); } trainDataNum=trainDataVec.size(); trainFeatureNum=trainDataVec[0].size(); trainDataMat.resize(trainDataNum,trainFeatureNum); trainLabelMat.resize(trainDataNum); alphas.resize(trainDataNum); eCache.resize(trainDataNum,2); w.resize(trainFeatureNum); for(int i=0;i<trainDataNum;i++) for(int j=0;j<trainFeatureNum;j++) trainDataMat(i,j)=trainDataVec[i][j]; for(int i=0;i<trainDataNum;i++) { trainLabelMat[i]=trainLabelVec[i]; alphas[i]=0; eCache(i,0)=0; eCache(i,1)=0; } } vector<vector<double>> trainDataVec; vector<double> trainLabelVec; int trainDataNum; int trainFeatureNum; Eigen::MatrixXd trainDataMat; Eigen::VectorXd trainLabelMat; double C=0.6; double toler=0.001; double b=0; Eigen::VectorXd alphas; Eigen::MatrixXd eCache; Eigen::VectorXd w; double calcEk(int k) { Eigen::VectorXd alpMulLab(trainDataNum); for(int i=0;i<trainDataNum;i++) alpMulLab(i)=alphas(i)*trainLabelMat(i); double fXk=alpMulLab.transpose()* (trainDataMat*(trainDataMat.row(k).transpose()))+b; return fXk-trainLabelMat(k); } int selectJrand(int i,int m) { int j=i; std::default_random_engine generator(rand()%m); std::uniform_int_distribution<int> dis(0,m); while(j==i) j=dis(generator); return j; } int selectJ(int i,double Ei,double &Ej) { int maxK=-1; double maxDeltaE=0; Ej=0; eCache(i,0)=1; eCache(i,1)=Ei; int flag=0; for(int k=0;k<trainDataNum;k++) { if(eCache(k,0)==1) { if(k==i) continue; else { flag=1; double Ek=calcEk(k); double deltaE=abs(Ei-Ej); if(deltaE>maxDeltaE) { maxK=k; maxDeltaE=deltaE; Ej=Ek; } } } } if(flag) return maxK; else { int j=selectJrand(i,trainDataNum); Ej=calcEk(j); return j; } } //对alpha剪辑 double cliAlpha(double aj,double H,double L) { if (aj>H) aj=H; if (L>aj) aj=L; return aj; } int innerL(int i) { double Ei=calcEk(i); if(((trainLabelMat[i]*Ei<-toler)&&(alphas[i]<C))|| ((trainLabelMat[i]*Ei>toler)&&(alphas[i]>0))) { double Ej; int j=selectJ(i,Ei,Ej); double alphaIold=alphas[i]; double alphaJold=alphas[j]; double L,H; if(trainLabelMat[i]!=trainLabelMat[j]) { L=max(0.0,alphas[j]-alphas[i]); H=min(C,C+alphas[j]-alphas[i]); } else { L=max(0.0,alphas[j]+alphas[i]-C); H=min(C,alphas[j]+alphas[i]); } if(L==H) return 0; double eta=2.0*trainDataMat.row(i).dot(trainDataMat.row(j))- trainDataMat.row(i).dot(trainDataMat.row(i))- trainDataMat.row(j).dot(trainDataMat.row(j)); if(eta>=0) return 0; alphas[j]-=trainLabelMat[j]*(Ei-Ej)/eta; alphas[j]=cliAlpha(alphas[j],H,L); updataEk(j); if(abs(alphas[j]-alphaJold)<0.00001) return 0; alphas[i]+=trainLabelMat[j]*trainLabelMat[i]*(alphaJold-alphas[j]); updataEk(i); double b1=b-Ei-trainLabelMat[i]*(alphas[i]-alphaIold)* (trainDataMat.row(i).dot(trainDataMat.row(i)))- trainLabelMat[j]*(alphas[j]-alphaJold)* (trainDataMat.row(i).dot(trainDataMat.row(j))); double b2=b-Ej-trainLabelMat[i]*(alphas[i]-alphaIold)* (trainDataMat.row(i).dot(trainDataMat.row(j)))- trainLabelMat[i]*(alphas[j]-alphaJold)* (trainDataMat.row(j).dot(trainDataMat.row(j))); if (0<alphas[i]&&C>alphas[i]) b=b1; else if(0<alphas[j]&&alphas[j]<C) b=b2; else b=(b1+b2)/2.0; return 1; } else return 0; } void smoP() { int iter=0; int alphaPairsChanged=0; int entireSet=1; while((iter<40)&&((alphaPairsChanged>0)||(entireSet))) { alphaPairsChanged=0; if(entireSet) //go over all { for(int i=0;i<trainDataNum;i++) alphaPairsChanged+=innerL(i); iter+=1; } else //go over non-bound (railed) alphas { for( int i=0;i<trainDataNum;i++) { if(alphas[i]>0&&alphas[i]<C) { alphaPairsChanged+=innerL(i); } } iter+=1; } if(entireSet) entireSet=0; else { if(alphaPairsChanged==0) entireSet=1; } } } void updataEk(int k) { double Ek=calcEk(k); eCache(k,0)=1; eCache(k,1)=Ek; } void smoSimple() { int iter=0; while (iter<40) { int alphaPairsChanged=0; for(int i=0;i<trainDataNum;i++) { double Ei=calcEk(i); if(((trainLabelMat[i]*Ei<-toler)&&(alphas[i]<C))|| ((trainLabelMat[i]*Ei>toler)&&(alphas[i]>0))) { int j=selectJrand(i,trainDataNum); double Ej=calcEk(j); double alphaIold=alphas[i]; double alphaJold=alphas[j]; double L,H; if(trainLabelMat[i]!=trainLabelMat[j]) { L=max(0.0,alphas[j]-alphas[i]); H=min(C,C+alphas[j]-alphas[i]); } else { L=max(0.0,alphas[j]+alphas[i]-C); H=min(C,alphas[j]+alphas[i]); } if (L==H) { cout<<"L==H"<<endl; continue; } double eta=2.0*trainDataMat.row(i).dot(trainDataMat.row(j))- trainDataMat.row(i).dot(trainDataMat.row(i))- trainDataMat.row(j).dot(trainDataMat.row(j)); if (eta>=0) { cout<<"eta>=0"; continue; } alphas[j]-=trainLabelMat[j]*(Ei-Ej)/eta; alphas[j]=cliAlpha(alphas[j],H,L); if(abs(alphas[j]-alphaJold)<0.00001) { cout<<"j not moving enough"<<endl; continue; } alphas[i]+=trainLabelMat[j]*trainLabelMat[i]*(alphaJold-alphas[j]); double b1=b-Ei-trainLabelMat[i]*(alphas[i]-alphaIold)* (trainDataMat.row(i).dot(trainDataMat.row(i)))- trainLabelMat[j]*(alphas[j]-alphaJold)* (trainDataMat.row(i).dot(trainDataMat.row(j))); double b2=b-Ej-trainLabelMat[i]*(alphas[i]-alphaIold)* (trainDataMat.row(i).dot(trainDataMat.row(j)))- trainLabelMat[i]*(alphas[j]-alphaJold)* (trainDataMat.row(j).dot(trainDataMat.row(j))); if (0<alphas[i]&&C>alphas[i]) b=b1; else if(0<alphas[j]&&alphas[j]<C) b=b2; else b=(b1+b2)/2.0; alphaPairsChanged+=1; cout<<"iter: "<<iter<<"pairs changed: "<<alphaPairsChanged; } if(alphaPairsChanged==0) iter+=1; else iter=0; } } } void caclWs() { Eigen::VectorXd alpMulLab(trainDataNum); for(int i=0;i<trainDataNum;i++) alpMulLab(i)=alphas(i)*trainLabelMat(i); for(int i=0;i<trainFeatureNum;i++) w[i]=alpMulLab.transpose()*(trainDataMat.col(i)); } }; int main() { svm svm("testSet.txt"); svm.print(); svm.test(); }