直接给出实现过程,主要参考类高翔博士的《SLAM十四讲》
本文中,使用到以下数据,函数模型为y = a*e^(b*t),残差函数为r = a*e^(b*t) - y,代价函数fx=0.5*r^2
double t[8] = {1, 2, 3, 4, 5, 6, 7, 8}; //变量
double y[8] = {8.3, 11.0, 14.7, 19.7, 26.7, 35.2, 44.4, 55.9}; //观测值
对模型函数的a和b分别求偏导
//the derivative for a of model function
double Jacobian_a(double ti, double a, double b)
{
return exp(b*ti);
}
//the derivative for b of model function
double Jacobian_b(double ti, double a, double b)
{
return a*ti*exp(b*ti);
}
高斯牛顿法
#include
#include
#include
#include
using namespace Eigen;
using namespace std;
int main() {
std::cout << "Guassian-Newton iteration method" << std::endl;
//初始化参数
int N = 8; //N组数据
double delta=0.05; //步长阈值,当步长小于该值时停止迭代
double t[8] = {1, 2, 3, 4, 5, 6, 7, 8}; //变量
double y[8] = {8.3, 11.0, 14.7, 19.7, 26.7, 35.2, 44.4, 55.9}; //观测值
int iterMax=5; //最大迭代次数
double a=6., b=0.3; //初始化参数a和b
double fx=0;
for (int k = 0; k < iterMax; ++k)
{
std::cout <<" ============================" << std::endl;
std::cout << k<<" iter !" << std::endl;
//计算fx和残差
fx=0;
VectorXd r(8);
for(int i=0; idouble ri = a*exp(b*t[i]) - y[i];
fx +=0.5*ri*ri;
r(i) = ri;
}
cout<<"r = \n"<cout<<"fx = "<//计算Jacobian矩阵
MatrixXd JacobMat(8,2);
for(int i=0; i0) = Jacobian_a(t[i], a, b);
JacobMat(i,1) = Jacobian_b(t[i], a, b);
}
cout<<"JacobMat\n"<cout<<"\nJTJ\n"<cout<<"\nB\n"<//构建线性方程组并求解 ‘JTJ*x1 = B’
Vector2d x1;
x1 = JTJ.colPivHouseholderQr().solve(B);
//x1 = JTJ.llt().solve(B);
//x1 = JTJ.ldlt().solve(B);
cout<<"\nx1\n"<double step_norm = x1.norm(); //步长
cout<<"\nstep_size is "<if(step_normstd::cout <1<<" 步长小于阈值,收敛,停止迭代 !" << std::endl;
break;
} else{
a += x1(0);
b += x1(1);
cout<<"\nupdate 'a' and 'b'\n\ta is "<"; b is "<0;
for(int i=0; idouble ri = a*exp(b*t[i]) - y[i];
fx +=0.5*ri*ri;
r(i) = ri;
}
cout<<"\nupdate fx = "<if(k==N-1)
std::cout <" 迭代次数大于 iterMax !" << std::endl;
}
}
fx=0;
for(int i=0; idouble ri = a*exp(b*t[i]) - y[i];
fx +=0.5*ri*ri;
}
cout<<"\nLastly 'a' and 'b'\n\ta is "<"; b is "<cout<<"fx is "<return 0;
}
运行代码,迭代了三次后收敛,优化前后对比结果如下
//初始值
a is 6.; b is 0.3; fx is 63.6547
//优化之后
a is 7.0016; b is 0.262038; fx is 3.00657
列文伯格-马夸尔特方法
#include
#include
#include
#include
#include
using namespace Eigen;
using namespace std;
int main() {
std::cout << "Levenberg-Marquardt iteration method" << std::endl;
int N = 8;
double delta_step = 0.05;//判断收敛的步长的阈值
double rho_delta = 0.25; //判断此次结算出的步长下,代价函数实际下降和近似下降的相似程度,大于该阈值时才进行更新
double miu=0.1; //步长的信赖域
double lamda =1.;//拉格朗日乘子
double pk_norm=0, pk_norm_last=0.01;//求解出的参数的变化量的模
double t[8] = {1, 2, 3, 4, 5, 6, 7, 8};
double y[8] = {8.3, 11.0, 14.7, 19.7, 26.7, 35.2, 44.4, 55.9};
//初始化参数a和b
double a=6., b=0.3;
double fx=0, fx_update=0;
int iterMax = 5;
double rho=0.;
for (int k = 0; k < iterMax; ++k)
{
std::cout <<" ============================" << std::endl;
std::cout << k<<" iter !" << std::endl;
int while_cnt=0;
Vector2d x2;
while(1) {
fx=0;
VectorXd r(8);
for(int i=0; idouble ri = a*exp(b*t[i]) - y[i];
fx +=0.5*ri*ri;
r(i,0) = ri;
}
//求fx和残差
cout<<"r = \n"<cout<<"fx = "<//求雅克比矩阵
MatrixXd Jacobian(8,2);
for(int i=0; idouble Jai = Jacobian_a(t[i], a, b);
double Jbi = Jacobian_b(t[i], a, b);
Jacobian(i,0) = Jai;
Jacobian(i,1) = Jbi;
}
cout<2,2);
MatrixXd B = -Jacobian.transpose()*r;
//构建线性方程组,并求解 'A*x2 = B'
x2 = A.colPivHouseholderQr().solve(B);
pk_norm = x2.norm();
cout<<"\nstep_size is "<//计算 rho
fx_update=0;
for(int i=0; idouble r0 = (a+x2(0)) * exp((b+x2(1))*t[i]) - y[i];
fx_update +=0.5*r0*r0;
}
//1. 计算rho的分母,二阶
double L_bias = 0.5*x2.transpose()*(lamda*x2+B);
//2. 计算rho的分母,一阶
//L_bias = -0.5*r.transpose()*Jacobian*x2;
rho = (fx-fx_update)/L_bias;
cout<<"fx delta is "<<(fx-fx_update)<cout<<"L_bias is "<cout<<"rho is "<//更新miu值
if(rho>0.75){
miu*=2.;
}else if(rho<0.25){
miu*=0.5;
}
std::cout<<"\nwhile_cnt is "<" "<" ""\n";
//rho = model function的实际下降 / phi function的近似下降、
// 当rho大于阈值rho_delta时,认为实际下降和近似下降近似,该拟合可行
// 并且步长在信赖区域内
if ((rho>0.25) && (pk_normbreak;
while_cnt++;
if(while_cnt>10){
std::cout<<"陷入步长小于设定的阈值,重新选择步长阈值\n";
exit(0);
}
}
//更新lamda,不大于1
if(pk_norm>pk_norm_last)
lamda *= 0.1;
else
lamda = lamda*2 > 1 ? 1. : lamda*2;
pk_norm_last = pk_norm;
cout<<"\nfx = "<cout<<"\nupdate fx = "<0);
b += x2(1);
//判断是否收敛
if(pk_normcout<<"步长很小,收敛并退出"<cout<<"lamda is "<break;
}
if(k==N-1)
std::cout <" 迭代次数大于 iterMax !" << std::endl;
}
fx=0;
for(int i=0; idouble ri = a*exp(b*t[i]) - y[i];
fx +=0.5*ri*ri;
}
cout<<"\nLastly 'a' and 'b'\n\ta is "<"; b is "<cout<<"fx is "<return 0;
}
同样迭代三次之后收敛
//初始值
a is 6.; b is 0.3; fx is 63.6547
//优化之后
a is 7.00008; b is 0.262078; fx is 3.00654
代码是参考了很多资料,结合自己的认识写的,有不同想法的欢迎交流[email protected]~
参数miu应该是和lamda有联系的,从而改变步长,使其在信赖区域内,但是十四讲中并未涉及到,在参考资料3中有讲解
高斯牛顿法其实是用Jacobian^T*Jacobian近似Hessian矩阵,节省了计算,但是在计算中Jacobian^T*Jacobian只有半正定性,可能计算不出正确的结果,导致步长太大或者局部不够准确。
列文伯格-马夸尔特方法中当lamda比较小的时候,说明二次近似模型在该区域内是比较好的,更接近与高斯牛顿法;但是当lamda比较大的时候,更接近于一阶梯度下降法。它在一定程度上避免了现行方程组的系数矩阵的非奇异和病态问题,给出更好的解。