L-BFGS的原理及在回归分析中的应用

L-BFGS的原理及在回归分析中的应用_第1张图片
L-BFGS的原理及在回归分析中的应用_第2张图片
L-BFGS的原理及在回归分析中的应用_第3张图片
后续会讲到L-BFGS在模式分类中的应用。

L_BFGS.m
x = load('ex3x.dat');
y = load('ex3y.dat');
c=0.6; a=1; roi=0.5; m=10;
x = [ones(size(x,1),1) x];
meanx = mean(x);%求均值
sigmax = std(x);%求标准偏差
x(:,2) = (x(:,2)-meanx(2))./sigmax(2);
x(:,3) = (x(:,3)-meanx(3))./sigmax(3);
itera_num = 1000; %尝试的迭代次数
sample_num = size(x,1); %训练样本的次数
theta_old = zeros(size(x,2),1); %theta的初始值赋值为0
Jtheta_new = zeros(itera_num, 1);
Jtheta = zeros(itera_num, 1);
Jtheta1= zeros(itera_num, 1);
q=zeros(3,itera_num);
M=zeros(3,itera_num);
s=zeros(3,itera_num);
r=zeros(3,itera_num);
A=zeros(1,itera_num);
Jtheta(1) = (1/(2*sample_num)).*(x*theta_old-y)'*(x*theta_old-y);
grad_old= (1/sample_num).*x'*(x*theta_old-y);
q(:,1)=grad_old;
B=eye(3);
H=-inv(B);
d1=H*grad_old;
%grad_new = (1/sample_num).*x'*(x*(theta_old+a*d1)-y);
Jtheta_new=(1/(2*sample_num)).*(x*(theta_old+a*d1)-y)'*(x*(theta_old+a*d1)-y);
while 1
    if Jtheta_new<=Jtheta(1)+(c*a)*grad_old'*d1
        break;
    else
        a=roi*a;
    end
    %grad_new = (1/sample_num).*x'*(x*(theta_old+a*d1)-y);
    Jtheta_new=(1/(2*sample_num)).*(x*(theta_old+a*d1)-y)'*(x*(theta_old+a*d1)-y);
end
theta_new=theta_old+a*d1;
for i=2:itera_num %计算出某个学习速率alpha下迭代itera_num次数后的参数
    a=0.5;
    Jtheta(i) = (1/(2*sample_num)).*(x*theta_new-y)'*(x*theta_new-y);
    grad_old=(1/sample_num).*x'*(x*theta_old-y);
    grad_new = (1/sample_num).*x'*(x*theta_new-y);
    M(:,i-1)=grad_new-grad_old;
    s(:,i-1)=theta_new-theta_old;
    roiJ(i-1)=1/(M(:,i-1)'*s(:,i-1));
    gamma=(s(:,i-1)'*M(:,i-1))/(M(:,i-1)'*M(:,i-1));
    HK=gamma*eye(3);
    if sqrt(abs(grad_new'*grad_new))<0.0001;
        N=i;
        break;
    end
    r=lbfgsloop(i,m,HK,s,M,roiJ,grad_new);
    d=-r;
    Jtheta1(i) = (1/(2*sample_num)).*(x*(theta_new+a*d)-y)'*(x*(theta_new+a*d)-y);%Jtheta是个行向量
    while 1
        if Jtheta1(i,1)<=Jtheta(i,1)+(c*a)*grad_new'*d;
            break;
        else
            a=roi*a;
        end
        % grad_new = (1/sample_num).*x'*(x*(theta_old+a*d1)-y);
        Jtheta1(i)=(1/(2*sample_num)).*(x*(theta_new+a*d)-y)'*(x*(theta_new+a*d)-y);
    end
    theta_old=theta_new;
    theta_new = theta_new + a*d;
end
K(1)=Jtheta(500) ;
plot(0:N-1, Jtheta(1:N),'b--','LineWidth', 3);
hold on
%%
legend('L-BFGS','BFGS');
xlabel('Number of iterations')
ylabel('Cost function')

lbfgsloop.m

function result=lbfgsloop(iteras,length,H_k,STHETA,MGRAD,ROI,grad)   
%% 确定搜索方向H_k*f_k
if iteras<=length
        deta=0;
    else
        deta=iteras-length;
    end
    if iteras<=length
        L=iteras;
    else
        L=length;
    end
    q=grad;
    k=L-1;
 %% 向后循环得到A(k)和q
    while k>=1
        j=k+deta;
        A(k)=ROI(j)*STHETA(:,j)'*q;
        q=q-A(k)*MGRAD(:,j);
        k=k-1;
    end
    r=H_k*q; %定义初始的方向
 %% 前向循环得到r,此时的r为H_k*f_k
    for k=1:L-1
        j=k+deta;
        beta=ROI(j)*MGRAD(:,j)'*r;
        r=r+STHETA(:,j)*(A(k)-beta);
    end
    
result=r;


你可能感兴趣的:(优化,算法,机器学习,deep,learning,最优化算法设计)