在选定第一个拉格朗日乘子ai后,内层循环会通过最大化步长的方式来挑选第二个拉格朗日乘子,即最大化|Ei-Ej|,当Ei为正时最小化Ej,当为负Ei时最大化Ej.
下面给出matlab代码实现 :
1.线性可分简单smo
function [b,alphas] = smoSimple(data, class, C, toler, maxIter)
b = 0;
[m,n] = size(data);
alphas = zeros(m,1);
iter=0;
while (iter < maxIter)
alphasChanges = 0;
for k=1:1:m
fxk = (alphas .* class)' * data * data(k,:)' + b; % f = wx+b
ek = fxk - class(k);
if (((ek*class(k) < -toler) && (alphas(k) < C)) || ((ek*class(k) > toler) && (alphas(k) > 0)))
j = selectJrand(k,m);
fxj = (alphas .* class)' * data * data(j,:)' + b; % f = wx+b
ej = fxj - class(j);
temp_k = alphas(k);
temp_j = alphas(j);
if(class(k) ~= class(j))
L = max(0, alphas(j) - alphas(k));
H = min(C, C + alphas(j) - alphas(k));
else
L = max(0, alphas(k) + alphas(j) - C);
H = min(C, alphas(k) + alphas(j));
end
if L == H
continue;
end
eta = 2.0 * data(k,:) * data(j,:)' - data(k,:) * data(k,:)' - data(j,:) * data(j,:)';
if eta >= 0
continue;
end
alphas(j) = alphas(j) - class(j) * (ek - ej) / eta;
alphas(j) = clipalpha(alphas(j), H, L);
if(abs(alphas(j) - temp_j) < 0.00001)
continue;
end
alphas(k) = alphas(k) + class(k) * class(j) * (temp_j - alphas(j));
b1 = b - ek - class(k) * (alphas(k) - temp_k) * data(k,:) * data(k,:)' - class(j) * (alphas(j) - temp_j) * data(k,:) * data(j,:)';
b2 = b - ej - class(k) * (alphas(k) - temp_k) * data(k,:) * data(j,:)' - class(j) * (alphas(j) - temp_j) * data(j,:) * data(j,:)';
if (alphas(k) > 0 && alphas(k) < C)
b = b1;
elseif(alphas(j) > 0 && alphas(j) < C)
b = b2;
else
b = (b1 + b2)/2;
end
alphasChanges = alphasChanges + 1;
end
end
if alphasChanges == 0
iter = iter + 1;
else
iter = 0;
end
end
end
function index = selectJrand(k,m)
index = k;
while(index == k)
index = randi([1,m],1,1);
end
end
function res = clipalpha(a, H, L)
if a > H
a = H;
end
if a < L
a = L;
end
res = a;
end
clc;
clear;
load Data
[r,c] = size(Data);
Test = Data(:,1:2);
Label = Data(:,3);
[b, alphas] = smoSimple(Test, Label, 0.6, 0.001, 40);
%%画图
figure(1)
axis([-2 12 -8 6])
for k = 1:1:r
hold on
if Data(k,3) == 1
plot(Data(k,1),Data(k,2),'r+');
else
plot(Data(k,1),Data(k,2),'b*');
end
end
%画支持向量及分割面
%result=[];
for k=1:1:r
if alphas(k)~= 0
hold on
%result =[result;alphas(k)];
QX = plot(Data(k,1:1),Data(k,2:2),'Ok','MarkerSize',12);
set(QX,'LineWidth',2.0);
end
end
W=(alphas.*Label)'*Data(:,1:2);
y=(-W(1).* Data(:,1:1)-b) ./W(2);
plot(Data(:,1:1),y);
结果:
上述代码运行事件较长,此处仅是100个点的小规模数据集,对于更大的数据集收敛时间更长。
2.完整的smo算法
function [b, res_alphas] = smoP(data, class, C, toler, maxIter)
[m,n] = size(data);
iter = 0;
entireSet = 1;
alphaPairsChanged = 0;
oS = init(data,class,C,toler,m);
while(((iter 0)) || (entireSet == 1))
alphaPairsChanged = 0;
if entireSet == 1
for k = 1:1:m
[ret, oS] = innerL(k, oS);
alphaPairsChanged = alphaPairsChanged + ret;
end
iter = iter + 1;
else
nonBoundIs = [];
for k = 1:1:m
if ((oS.alphas(k) < C) && (oS.alphas(k) > 0))
nonBoundIs = [nonBoundIs k];
end
end
[r,c] = size(nonBoundIs);
for k = 1:1:c
index = nonBoundIs(k);
[ret, oS] = innerL(index, oS);
alphaPairsChanged = alphaPairsChanged + ret;
end
iter = iter + 1;
end
if entireSet == 1
entireSet = 0;
elseif alphaPairsChanged == 0
entireSet = 1;
end
end
b = oS.b;
res_alphas = oS.alphas;
end
function oS = init(data,class,C,toler,m)
alphas = zeros(m,1);
eCache = zeros(m,2);
b = 0;
oS.data = data;
oS.class = class;
oS.C = C;
oS.toler = toler;
oS.m = m;
oS.alphas = alphas;
oS.b = b;
oS.eCache = eCache;
end
function [ret,oS] = innerL(k, oS)
Ei = calcEk(oS, k);
if(((oS.class(k)*Ei < -oS.toler) && (oS.alphas(k) < oS.C)) || ((oS.class(k)*Ei > oS.toler) && (oS.alphas(k) > 0)))
[j, Ej] = selectJ(k, oS, Ei);
temp_k = oS.alphas(k);
temp_j = oS.alphas(j);
if oS.class(k) ~= oS.class(j)
L = max(0, oS.alphas(j) - oS.alphas(k));
H = min(oS.C, oS.C +oS.alphas(j) - oS.alphas(k));
else
L = max(0, oS.alphas(j) + oS.alphas(k) - oS.C);
H = min(oS.C, oS.alphas(j) + oS.alphas(k));
end
if L == H
ret = 0;
return;
end
eta = 2.0 * oS.data(k,:) * oS.data(j,:)' - oS.data(k,:) * oS.data(k,:)' - oS.data(j,:) * oS.data(j,:)';
if eta >=0
ret = 0;
return;
end
oS.alphas(j) = oS.alphas(j) - oS.class(j) * (Ei - Ej) / eta;
oS.alphas(j) = clipalpha(oS.alphas(j), H, L);
%update Ek
Et = calcEk(oS, j);
oS.eCache(j,:) = [1 Et];
if(abs(oS.alphas(j) - temp_j) < 0.00001)
ret = 0;
return;
end
oS.alphas(k) = oS.alphas(k) + oS.class(j)*oS.class(k)*(temp_j - oS.alphas(j));
Et = calcEk(oS, k);
oS.eCache(k,:) = [1 Et];
b1 = oS.b - Ei - oS.class(k) * (oS.alphas(k) - temp_k) * oS.data(k,:) * oS.data(k,:)' - oS.class(j) * (oS.alphas(j) - temp_j) * oS.data(k,:) * oS.data(j,:)';
b2 = oS.b - Ej - oS.class(k) * (oS.alphas(k) - temp_k) * oS.data(k,:) * oS.data(j,:)' - oS.class(j) * (oS.alphas(j) - temp_j) * oS.data(j,:) * oS.data(j,:)';
if (oS.alphas(k)>0) && (oS.alphas(k)0) && (oS.alphas(j) 1
for l=1:1:c
index = validEcacheList(l)
if index == k
continue;
end
Ek = calcEk(oS,index);
deltaE = abs(Ei - Ek);
if(deltaE > maxDeltaE)
maxK = index;
maxDeltaE = deltaE;
Ej = Ek;
end
end
j = maxK;
else
j = selectJrand(k, oS.m);
Ej = calcEk(oS, j);
end
end
function index = selectJrand(k,m)
index = k;
while(index == k)
index = randi([1,m],1,1);
end
end
function res = clipalpha(a, H, L)
if a > H
a = H;
end
if a < L
a = L;
end
res = a;
end
clc;
clear;
load Data
[r,c] = size(Data);
Test = Data(:,1:2);
Label = Data(:,3);
[b, alphas] = smoP(Test, Label, 0.6, 0.001, 40);
%%画图
figure(1)
axis([-2 12 -8 6])
for k = 1:1:r
hold on
if Data(k,3) == 1
plot(Data(k,1),Data(k,2),'r+');
else
plot(Data(k,1),Data(k,2),'b*');
end
end
%画支持向量及分割面
%result=[];
for k=1:1:r
if alphas(k)~= 0
hold on
%result =[result;alphas(k)];
QX = plot(Data(k,1:1),Data(k,2:2),'Ok','MarkerSize',12);
set(QX,'LineWidth',2.0);
end
end
W=(alphas.*Label)'*Data(:,1:2);
y=(-W(1).* Data(:,1:1)-b) ./W(2);
plot(Data(:,1:1),y);
运行结果:
与第一个代码唯一的不同就是选择alphas的方式。第一个代码覆盖了所有数据集。常数C=0.6,一方面要保证所有样例的间隔不小于1.0,另一方面又要使得分类间隔尽可能大,并且要在 这两方面平衡。如果C很大,那么分类器将力图通过分割超平面对所有的样例都正确分类。小圆点标注的是支持向量。如果数据集非线性可分,支持向量 会在超平面附近聚集成团
四、非线性可分问题
对于上图,在二维平面中很难用直线分割,但是这里明显存在着两类数据。接下来,我们就使用一种称为核函数的工具将数据转化成易于分类器理解的形式。
1. 利用核函数将数据映射到高维空间
对于上图而言,如果只在x和y构成的坐标系中插入直线进行分类的话,我们不会得到理想的结果。我们可以对数据进行转换从而得到某些新的变量来表示数据。在这种情况下,我们就更容易得到大于零或小于零的测试结果。数学家们将数据从一个特征空间转换到另一特征空间的过程称为特征空间映射,通常我们将低维特征空间映射到高维特征空间。下面举个例子来形象地理解核函数:
我们把横轴上端点a和b之间红色部分里的所有点定为正类,两边的黑色部分里的点定为负类。试问能找到一个线性函数把两类正确分开么?不能,因为二维空间里的线性函数就是指直线,显然找不到符合条件的直线。但我们可以找到一条曲线,例如下面这一条:
显然通过点在这条曲线的上方还是下方就可以判断点所属的类别(你在横轴上随便找一点,算算这一点的函数值,会发现负类的点函数值一定比0大,而正类的一定比0小)。这条曲线就是我们熟知的二次曲线。
上述过程即完成了一维空间向二维空间的映射。
对于SVM分类问题,所有的运算都可以写成内积形式(点积),我们把内积运算替换成核函数,即可完成特征映射。核函数主要有:
l 多项式核
l 傅立叶核
l B样条核
l Sigmod核
l 高斯径向基核
核函数并不仅仅应用于支持向量机,很多其他机器学习算法也要用到。下面就介绍高斯径向基核函数。
径向基函数是一种采用向量作为自变量的函数,能够基于向量距离输出一个标量,具体数学公式:
其中,σ是用户定义的用于确定到达率或者说是函数值跌落到0的速度参数。这个高斯核函数将数据从其特征空间映射到更高维的空间,具体说来这里是映射到一个无穷维的空间。我们不用确切地理解数据是如何表现的。
【这里扯一下我的同学,他的论文《基于矩阵运算的单隐层Madaline网络批量学习》,人家提出数据往低维空间映射,比较神奇哈】
最终的分类平面:(推导参考:http://blog.csdn.net/wangran51/article/details/7354915http://blog.csdn.net/wangran51/article/details/7354915)
代码:
function [b, res_alphas] = rbf_smoP(data, class, C, toler, maxIter, k1)
[m,n] = size(data);
iter = 0;
entireSet = 1;
alphaPairsChanged = 0;
oS = init(data, class, C, toler, m, k1);
while(((iter 0)) || (entireSet == 1))
alphaPairsChanged = 0;
if entireSet == 1
for k = 1:1:m
[ret, oS] = innerL(k, oS);
alphaPairsChanged = alphaPairsChanged + ret;
end
iter = iter + 1;
else
nonBoundIs = [];
for k = 1:1:m
if ((oS.alphas(k) < C) && (oS.alphas(k) > 0))
nonBoundIs = [nonBoundIs k];
end
end
[r,c] = size(nonBoundIs);
for k = 1:1:c
index = nonBoundIs(k);
[ret, oS] = innerL(index, oS);
alphaPairsChanged = alphaPairsChanged + ret;
end
iter = iter + 1;
end
if entireSet == 1
entireSet = 0;
elseif alphaPairsChanged == 0
entireSet = 1;
end
end
b = oS.b;
res_alphas = oS.alphas;
end
function K = kernelTrans(X, A, k1)
[m, n] = size(X);
K = zeros(m,1);
for j = 1:1:m
deltaRow = X(j,:) - A;
K(j) = deltaRow * deltaRow';
end
K = exp(K./(-2*k1));
end
function oS = init(data,class,C,toler,m,k1)
alphas = zeros(m,1);
eCache = zeros(m,2);
b = 0;
oS.data = data;
oS.class = class;
oS.C = C;
oS.toler = toler;
oS.m = m;
oS.alphas = alphas;
oS.b = b;
oS.eCache = eCache;
oS.K = zeros(m,m);
for j = 1:1:m
oS.K(:,j) = kernelTrans(oS.data,oS.data(j,:),k1);
end
end
function [ret,oS] = innerL(k, oS)
Ei = calcEk(oS, k);
if(((oS.class(k)*Ei < -oS.toler) && (oS.alphas(k) < oS.C)) || ((oS.class(k)*Ei > oS.toler) && (oS.alphas(k) > 0)))
[j, Ej] = selectJ(k, oS, Ei);
temp_k = oS.alphas(k);
temp_j = oS.alphas(j);
if oS.class(k) ~= oS.class(j)
L = max(0, oS.alphas(j) - oS.alphas(k));
H = min(oS.C, oS.C +oS.alphas(j) - oS.alphas(k));
else
L = max(0, oS.alphas(j) + oS.alphas(k) - oS.C);
H = min(oS.C, oS.alphas(j) + oS.alphas(k));
end
if L == H
ret = 0;
return;
end
eta = 2.0 * oS.K(k,j) - oS.K(k,k) - oS.K(j,j);
if eta >=0
ret = 0;
return;
end
oS.alphas(j) = oS.alphas(j) - oS.class(j) * (Ei - Ej) / eta;
oS.alphas(j) = clipalpha(oS.alphas(j), H, L);
%update Ek
Et = calcEk(oS, j);
oS.eCache(j,:) = [1 Et];
if(abs(oS.alphas(j) - temp_j) < 0.00001)
ret = 0;
return;
end
oS.alphas(k) = oS.alphas(k) + oS.class(j)*oS.class(k)*(temp_j - oS.alphas(j));
Et = calcEk(oS, k);
oS.eCache(k,:) = [1 Et];
b1 = oS.b - Ei - oS.class(k) * (oS.alphas(k) - temp_k) * oS.K(k,k) - oS.class(j) * (oS.alphas(j) - temp_j) * oS.K(k,j);
b2 = oS.b - Ej - oS.class(k) * (oS.alphas(k) - temp_k) * oS.K(k,j) - oS.class(j) * (oS.alphas(j) - temp_j) * oS.K(j,j);
if (oS.alphas(k)>0) && (oS.alphas(k)0) && (oS.alphas(j) 1
for l=1:1:c
index = validEcacheList(l);
if index == k
continue;
end
Ek = calcEk(oS,index);
deltaE = abs(Ei - Ek);
if(deltaE > maxDeltaE)
maxK = index;
maxDeltaE = deltaE;
Ej = Ek;
end
end
j = maxK;
else
j = selectJrand(k, oS.m);
Ej = calcEk(oS, j);
end
end
function index = selectJrand(k,m)
index = k;
while(index == k)
index = randi([1,m],1,1);
end
end
function res = clipalpha(a, H, L)
if a > H
a = H;
end
if a < L
a = L;
end
res = a;
end
clc;
clear;
load NData
load NTest
Data = ndata;
Data_Test = ntest;
[r,c] = size(Data);
Test = Data(:,1:2);
Label = Data(:,3);
[b, alphas] = rbf_smoP(Test, Label, 200, 0.0001, 1000,1.3);
%%画图
figure(1)
axis([-1.5 1.5 -1.5 1.5])
for k = 1:1:r
hold on
if Data(k,3) == 1
plot(Data(k,1),Data(k,2),'r+');
else
plot(Data(k,1),Data(k,2),'b*');
end
end
%%画支持向量
support_vector = [];
lable_sv = [];
alphas_sv = [];
for k=1:1:r
if alphas(k)~= 0
hold on
support_vector = [support_vector; Test(k,1:2)];
lable_sv = [lable_sv Label(k)];
alphas_sv = [alphas_sv alphas(k)];
%result =[result;alphas(k)];
QX = plot(Data(k,1:1),Data(k,2:2),'Ok','MarkerSize',12);
set(QX,'LineWidth',2.0);
end
end
%%预测
temp = lable_sv .* alphas_sv;
[m, n] = size(Data_Test);
errorCount = 0;
for k = 1:1:m
value = kernelTrans(support_vector, Data_Test(k,1:2),1.3);
predict = temp * value + b;
if predict > 0
predict = 1;
else
predict = -1;
end
if predict ~= Data_Test(k,3:3)
errorCount = errorCount + 1;
end
end
errorCount
运行结果:
支持向量围绕超平面成团了。。。
预测结果,错分类2,效果不错。
代码地址:
http://download.csdn.net/detail/jinshengtao/8134089
唉,这篇博文写了将近一个月,断断续续的,自己写到最后都不知道写的什么了,尤其smo推导那块,乱七八糟,大家可以参考网上其他的优秀文章。
华为比较辛苦,搞适配单板,bcm sdk啥的一点意思都没有,明年打算辞职咯