svo 的更新过程第一步:Sparse Image Align
使用 GaussNewton 优化 cur 与 ref 图像帧间的位姿 T_cur_from_ref


  new_frame_->T_f_w_ = last_frame_->T_f_w_;
  SparseImgAlign img_align(Config::kltMaxLevel(), Config::kltMinLevel(),
                           30, SparseImgAlign::GaussNewton, false, false);
  size_t img_align_n_tracked =, new_frame_);


  1. T_cur_from_ref 初始化为单位阵,计算方法兼容后期 prior;
  2. 在图像金字塔 第4层 到 第2层,使用 GaussNewton 优化 T_cur_from_ref

- “svo/klt_min_level”, 2;
- “svo/klt_max_level”, 4

size_t SparseImgAlign::run(FramePtr ref_frame, FramePtr cur_frame)

    SVO_WARN_STREAM("SparseImgAlign: no features to track!");
    return 0;

  ref_frame_ = ref_frame;
  cur_frame_ = cur_frame;
  ref_patch_cache_ = cv::Mat(ref_frame_->fts_.size(), patch_area_, CV_32F);
  jacobian_cache_.resize(Eigen::NoChange, ref_patch_cache_.rows*patch_area_);
  visible_fts_.resize(ref_patch_cache_.rows, false); // TODO: should it be reset at each level?

  SE3 T_cur_from_ref(cur_frame_->T_f_w_ * ref_frame_->T_f_w_.inverse());

  for(level_=max_level_; level_>=min_level_; --level_)
    mu_ = 0.1;
    have_ref_patch_cache_ = false;
      printf("\nPYRAMID LEVEL %i\n---------------\n", level_);
  cur_frame_->T_f_w_ = T_cur_from_ref * ref_frame_->T_f_w_;

  return n_meas_/patch_area_;


  1. computeResiduals(model, false, true); 计算 GaussNewton 中的阻尼因子;
  2. double new_chi2 = computeResiduals(model, true, false); 计算残差(光度误差);
  3. applyPrior(model); 未实现,用于 imu 融合;
  4. solve() 调用 eigen 库中的 ldlt 方法,求解方程;
  5. update(model, new_model); 根据上一步计算得到的优化增量,更新位姿;
template <int D, typename T>
void vk::NLLSSolver<D, T>::optimizeGaussNewton(ModelType& model)
  // Compute weight scale
    computeResiduals(model, false, true); // 根据 false/true 参数,选择计算阻尼项/残差;

  // Save the old model to rollback in case of unsuccessful update
  ModelType old_model(model);

  // perform iterative estimation
  for (iter_ = 0; iter_<n_iter_; ++iter_)
    rho_ = 0;
    startIteration(); // do nothing.


    // compute initial error
    n_meas_ = 0;
    double new_chi2 = computeResiduals(model, true, false);

    // add prior

    // solve the linear system
    // H*x = Jres 求解残差 x_;
      // matrix was singular and could not be computed
      std::cout << "Matrix is close to singular! Stop Optimizing." << std::endl;
      std::cout << "H = " << H_ << std::endl;
      std::cout << "Jres = " << Jres_ << std::endl;
      stop_ = true;

    // check if error increased since last optimization
    if((iter_ > 0 && new_chi2 > chi2_) || stop_)
        std::cout << "It. " << iter_
                  << "\t Failure"
                  << "\t new_chi2 = " << new_chi2
                  << "\t Error increased. Stop optimizing."
                  << std::endl;
      model = old_model; // rollback

    // update the model
    ModelType new_model;
    // 使用残差 x_ 更新pose;
    update(model, new_model);
    old_model = model;
    model = new_model;

    chi2_ = new_chi2;

      std::cout << "It. " << iter_
                << "\t Success"
                << "\t new_chi2 = " << new_chi2
                << "\t n_meas = " << n_meas_
                << "\t x_norm = " << vk::norm_max(x_)
                << std::endl;

    // do nothing but printf;

    // stop when converged, i.e. update step too small


该函数主要对 特征 对应 patch 计算图像对李代数的 jacobian 矩阵;

  • jacobian_xyz2uv() 计算像素坐标uv 对 李代数 的 jacobian 矩阵;
  • dxdy 是图像对像素坐标uv的导数,采用中心差分求导;
    • 对于金字塔中图像,坐标值可能出现小数,故采用 双线性插值 计算其灰度值;
    • 先用双线性插值求出特征附近四个坐标点,再使用中心差分求导(代码中放在一起计算);


  • u v uv uv - ref 像素坐标;

u = f x X + c x Z , v = f y Y + c y Z u = \frac{{{f_x}X + {c_x}}}{Z},v = \frac{{{f_y}Y + {c_y}}}{Z} u=ZfxX+cx,v=ZfyY+cy

  • q q q - 空间点在 ref 帧图像坐标系;

q = R P \mathbf{q} = \mathbf{RP} q=RP

  • 像素坐标对相机坐标系的三维空间点求导:

∂ u ∂ q = [ ∂ u ∂ X ∂ u ∂ Y ∂ u ∂ Z ∂ v ∂ X ∂ v ∂ Y ∂ v ∂ Z ] = [ f x Z 0 − f x X Z 2 0 f y Z − f y Y Z 2 ] \frac{{\partial \mathbf{u}}}{{\partial \mathbf{q}}} = {\begin{bmatrix} {\frac{{\partial u}}{{\partial X}}}&{\frac{{\partial u}}{{\partial Y}}}&{\frac{{\partial u}}{{\partial Z}}}\\ {\frac{{\partial v}}{{\partial X}}}&{\frac{{\partial v}}{{\partial Y}}}&{\frac{{\partial v}}{{\partial Z}}} \end{bmatrix}} = {\begin{bmatrix} {\frac{{{f_x}}}{{\rm{Z}}}}&0&{ - \frac{{{f_x}X}}{{{Z^2}}}}\\ 0&{\frac{{{f_y}}}{Z}}&{ - \frac{{{f_y}Y}}{{{Z^2}}}} \end{bmatrix}} qu=[XuXvYuYvZuZv]=[Zfx00ZfyZ2fxXZ2fyY]

  • 三维点对李代数求导:

d ( q ) d R = d ( R P ) d R = lim ⁡ δ ϕ → 0 exp ⁡ ( δ ϕ ) ∧ ⋅ R P − R P δ ϕ = lim ⁡ Δ ϕ → 0 ( exp ⁡ ( δ ϕ ) ∧ − 1 ) R P δ ϕ = − ( R P ) ∧ \begin{array}{l} \frac{{d\left( {\mathbf{q}} \right)}}{{dR}} = \frac{{d\left( {RP} \right)}}{{dR}} = \mathop {\lim }\limits_{\delta \phi \to 0} \frac{{\exp {{\left( {\delta \phi } \right)}^ \wedge } \cdot RP - RP}}{{\delta \phi }}\\ = \mathop {\lim }\limits_{\Delta \phi \to 0} \frac{{(\exp {{\left( {\delta \phi } \right)}^ \wedge } - 1)RP}}{{\delta \phi }}\\ = - {\left( {RP} \right)^ \wedge } \end{array} dRd(q)=dRd(RP)=δϕ0limδϕexp(δϕ)RPRP=Δϕ0limδϕ(exp(δϕ)1)RP=(RP)
∂ q ∂ δ ξ = [ I , − q ∧ ] \frac{{\partial \mathbf{q}}}{{\partial \delta \mathbf{\xi} }} = \left[ { \mathbf{I}, - {\mathbf{q}^ \wedge }} \right] δξq=[I,q]

  • 综上:

∂ u ∂ δ ξ = [ f x Z 0 − f x X Z 2 − f x X Y Z 2 f x + f x X 2 Z 2 − f x Y Z 0 f y Z − f y Y Z 2 − f y − f y Y 2 Z 2 f y X Y Z 2 f y X Z ] \frac{{\partial \mathbf{u}}}{{\partial \delta \mathbf{\xi} }} ={\begin{bmatrix} {\frac{{{f_x}}}{Z}}&0&{ - \frac{{{f_x}X}}{{{Z^2}}}}&{ - \frac{{{f_x}XY}}{{{Z^2}}}}&{{f_x} + \frac{{{f_x}{X^2}}}{{{Z^2}}}}&{ - \frac{{{f_x}Y}}{Z}}\\ 0&{\frac{{{f_y}}}{Z}}&{ - \frac{{{f_y}Y}}{{{Z^2}}}}&{ - {f_y} - \frac{{{f_y}{Y^2}}}{{{Z^2}}}}&{\frac{{{f_y}XY}}{{{Z^2}}}}&{\frac{{{f_y}X}}{Z}} \end{bmatrix}} δξu=[Zfx00ZfyZ2fxXZ2fyYZ2fxXYfyZ2fyY2fx+Z2fxX2Z2fyXYZfxYZfyX]

void SparseImgAlign::precomputeReferencePatches()
  const int border = patch_halfsize_+1;
  const cv::Mat& ref_img = ref_frame_->;
  const int stride = ref_img.cols;
  const float scale = 1.0f/(1<<level_);
  const Vector3d ref_pos = ref_frame_->pos();
  const double focal_length = ref_frame_->cam_->errorMultiplier2();
  size_t feature_counter = 0;
  std::vector<bool>::iterator visiblity_it = visible_fts_.begin();
  for(auto it=ref_frame_->fts_.begin(), ite=ref_frame_->fts_.end();
      it!=ite; ++it, ++feature_counter, ++visiblity_it)
    // check if reference with patch size is within image
    const float u_ref = (*it)->px[0]*scale;
    const float v_ref = (*it)->px[1]*scale;
    const int u_ref_i = floorf(u_ref);
    const int v_ref_i = floorf(v_ref);
    if((*it)->point == NULL || u_ref_i-border < 0 || v_ref_i-border < 0 || u_ref_i+border >= ref_img.cols || v_ref_i+border >= ref_img.rows)
    *visiblity_it = true; // 保留 point 对应的特征,去除 visible_fts_ 边界点;

    // cannot just take the 3d points coordinate because of the reprojection errors in the reference image!!!
    // 参考 后面 pose 优化部分,这里 ref 帧的 pose 是否误差较大?导致重投影误差大;
    const double depth(((*it)->point->pos_ - ref_pos).norm());
    const Vector3d xyz_ref((*it)->f*depth);

    // evaluate projection jacobian
    // 计算公式:
    // 三维点对李代数的求导参考:公式16:
    // 求 uv 对 李代数 的 jacobian;[中间量为三维点]
    Matrix<double,2,6> frame_jac;
    Frame::jacobian_xyz2uv(xyz_ref, frame_jac);

    // compute bilateral interpolation weights for reference image
    const float subpix_u_ref = u_ref-u_ref_i;
    const float subpix_v_ref = v_ref-v_ref_i;
    const float w_ref_tl = (1.0-subpix_u_ref) * (1.0-subpix_v_ref);
    const float w_ref_tr = subpix_u_ref * (1.0-subpix_v_ref);
    const float w_ref_bl = (1.0-subpix_u_ref) * subpix_v_ref;
    const float w_ref_br = subpix_u_ref * subpix_v_ref;
    size_t pixel_counter = 0;
    float* cache_ptr = reinterpret_cast<float*>( + patch_area_*feature_counter;
    for(int y=0; y<patch_size_; ++y)
      uint8_t* ref_img_ptr = (uint8_t*) + (v_ref_i+y-patch_halfsize_)*stride + (u_ref_i-patch_halfsize_);
      for(int x=0; x<patch_size_; ++x, ++ref_img_ptr, ++cache_ptr, ++pixel_counter)
        // precompute interpolated reference patch color
        *cache_ptr = w_ref_tl*ref_img_ptr[0] + w_ref_tr*ref_img_ptr[1] + w_ref_bl*ref_img_ptr[stride] + w_ref_br*ref_img_ptr[stride+1];

        // we use the inverse compositional: thereby we can take the gradient always at the same position
        // get gradient of warped image (~gradient at warped position)
        // 在图像的线性插值中,分母部分都为 1 。
        // 此处可附图;
        float dx = 0.5f * ((w_ref_tl*ref_img_ptr[1] + w_ref_tr*ref_img_ptr[2] + w_ref_bl*ref_img_ptr[stride+1] + w_ref_br*ref_img_ptr[stride+2])
                          -(w_ref_tl*ref_img_ptr[-1] + w_ref_tr*ref_img_ptr[0] + w_ref_bl*ref_img_ptr[stride-1] + w_ref_br*ref_img_ptr[stride]));
        float dy = 0.5f * ((w_ref_tl*ref_img_ptr[stride] + w_ref_tr*ref_img_ptr[1+stride] + w_ref_bl*ref_img_ptr[stride*2] + w_ref_br*ref_img_ptr[stride*2+1])
                          -(w_ref_tl*ref_img_ptr[-stride] + w_ref_tr*ref_img_ptr[1-stride] + w_ref_bl*ref_img_ptr[0] + w_ref_br*ref_img_ptr[1]));

        // cache the jacobian
        //单点像素导数 与jacobian_xyz2uv()相乘,得到像素点对李代数的导数;
        // focal_length / (1<
        jacobian_cache_.col(feature_counter*patch_area_ + pixel_counter) =
            (dx*frame_jac.row(0) + dy*frame_jac.row(1))*(focal_length / (1<<level_));
  have_ref_patch_cache_ = true;


  • jacobian_xyz2uv() 计算的是三维点到归一化平面的 jacobian;
  • 通过 * (focal_length / (1< 得到近似结果(假设 f x ≈ f y ≈ f f_x \approx f_y \approx f fxfyf
  /// Frame jacobian for projection of 3D point in (f)rame coordinate to
  /// unit plane coordinates uv (focal length = 1).
  inline static void jacobian_xyz2uv(
      const Vector3d& xyz_in_f,
      Matrix<double,2,6>& J)
    const double x = xyz_in_f[0];
    const double y = xyz_in_f[1];
    const double z_inv = 1./xyz_in_f[2];
    const double z_inv_2 = z_inv*z_inv;

    J(0,0) = -z_inv;              // -1/z
    J(0,1) = 0.0;                 // 0
    J(0,2) = x*z_inv_2;           // x/z^2
    J(0,3) = y*J(0,2);            // x*y/z^2
    J(0,4) = -(1.0 + x*J(0,2));   // -(1.0 + x^2/z^2)
    J(0,5) = y*z_inv;             // y/z

    J(1,0) = 0.0;                 // 0
    J(1,1) = -z_inv;              // -1/z
    J(1,2) = y*z_inv_2;           // y/z^2
    J(1,3) = 1.0 + y*J(1,2);      // 1.0 + y^2/z^2
    J(1,4) = -J(0,3);             // -x*y/z^2
    J(1,5) = -x*z_inv;            // x/z


  • 在每次迭代时,使用更新后的 T_cur_from_ref,调用 computeResiduals() 计算当前残差;
  • 双线性插值计算当前图像的灰度值:intensity_cur,与 ref 帧对应灰度值相减,得到残差;
  • GaussNewton 非线性优化,参考:非线性最小二乘问题的混合算法 - 孟繁雪;


x k + 1 − x k = − ( J ( x k ) T J ( x k ) ) − 1 ⋅ J ( x k ) T r e s ( x k ) = − H − 1 ⋅ J r e s x_{k+1} - x_{k} = -(J(x_k)^T J(x_k))^{-1} \cdot J(x_k)^T res(x_k) \\ = -H^{-1} \cdot J_{res} xk+1xk=(J(xk)TJ(xk))1J(xk)Tres(xk)=H1Jres

double SparseImgAlign::computeResiduals(
    const SE3& T_cur_from_ref,
    bool linearize_system,
    bool compute_weight_scale)
  // Warp the (cur)rent image such that it aligns with the (ref)erence image
  const cv::Mat& cur_img = cur_frame_->;

  if(linearize_system && display_)
    resimg_ = cv::Mat(cur_img.size(), CV_32F, cv::Scalar(0));

  if(have_ref_patch_cache_ == false)
// 计算 jacobian_cache_,图像对李代数的导数;

  // compute the weights on the first iteration
  std::vector<float> errors;
  const int stride = cur_img.cols;
  const int border = patch_halfsize_+1;
  const float scale = 1.0f/(1<<level_);
  const Vector3d ref_pos(ref_frame_->pos());
  float chi2 = 0.0;
  size_t feature_counter = 0; // is used to compute the index of the cached jacobian
  std::vector<bool>::iterator visiblity_it = visible_fts_.begin();
  for(auto it=ref_frame_->fts_.begin(); it!=ref_frame_->fts_.end();
      ++it, ++feature_counter, ++visiblity_it)
    // check if feature is within image

    // compute pixel location in cur img
    const double depth = ((*it)->point->pos_ - ref_pos).norm();
    const Vector3d xyz_ref((*it)->f*depth);
    const Vector3d xyz_cur(T_cur_from_ref * xyz_ref);
    const Vector2f uv_cur_pyr(cur_frame_->cam_->world2cam(xyz_cur).cast<float>() * scale);
    const float u_cur = uv_cur_pyr[0];
    const float v_cur = uv_cur_pyr[1];
    const int u_cur_i = floorf(u_cur);
    const int v_cur_i = floorf(v_cur);

    // check if projection is within the image
    if(u_cur_i < 0 || v_cur_i < 0 || u_cur_i-border < 0 || v_cur_i-border < 0 || u_cur_i+border >= cur_img.cols || v_cur_i+border >= cur_img.rows)

    // compute bilateral interpolation weights for the current image
    const float subpix_u_cur = u_cur-u_cur_i;
    const float subpix_v_cur = v_cur-v_cur_i;
    const float w_cur_tl = (1.0-subpix_u_cur) * (1.0-subpix_v_cur);
    const float w_cur_tr = subpix_u_cur * (1.0-subpix_v_cur);
    const float w_cur_bl = (1.0-subpix_u_cur) * subpix_v_cur;
    const float w_cur_br = subpix_u_cur * subpix_v_cur;
    float* ref_patch_cache_ptr = reinterpret_cast<float*>( + patch_area_*feature_counter;
    size_t pixel_counter = 0; // is used to compute the index of the cached jacobian
    for(int y=0; y<patch_size_; ++y)
      uint8_t* cur_img_ptr = (uint8_t*) + (v_cur_i+y-patch_halfsize_)*stride + (u_cur_i-patch_halfsize_);

      for(int x=0; x<patch_size_; ++x, ++pixel_counter, ++cur_img_ptr, ++ref_patch_cache_ptr)
        // compute residual
        // 双线性插值,计算当前像素点的光度/灰度(intensity)
        const float intensity_cur = w_cur_tl*cur_img_ptr[0] + w_cur_tr*cur_img_ptr[1] + w_cur_bl*cur_img_ptr[stride] + w_cur_br*cur_img_ptr[stride+1];
        // cur 与 ref 相减,得到残差;
        const float res = intensity_cur - (*ref_patch_cache_ptr);

        // used to compute scale for robust cost

        // robustification
        float weight = 1.0;
        if(use_weights_) {
            // weight_function_ 由四种高斯牛顿的阻尼方法实现( Huber函数、Tukey函数等 )
            // 可参考:
          weight = weight_function_->value(res/scale_);

        chi2 += res*res*weight;
        n_meas_++; // Number of measurements

        if(linearize_system) // false
          // compute Jacobian, weighted Hessian and weighted "steepest descend images" (times error)
          const Vector6d J(jacobian_cache_.col(feature_counter*patch_area_ + pixel_counter));
          H_.noalias() += J*J.transpose()*weight; // Hessian approximation
          Jres_.noalias() -= J*res*weight; // Jacobian x Residual
  <float>((int) v_cur+y-patch_halfsize_, (int) u_cur+x-patch_halfsize_) = res/255.0;

  // compute the weights on the first iteration
  if(compute_weight_scale && iter_ == 0)
    scale_ = scale_estimator_->compute(errors);

  return chi2/n_meas_;


int SparseImgAlign::solve()
    // using LDLt Cholesky
  x_ = H_.ldlt().solve(Jres_);
  if((bool) std::isnan((double) x_[0]))
    return 0;
  return 1;


void SparseImgAlign::update(
    const ModelType& T_curold_from_ref,
    ModelType& T_curnew_from_ref)
  T_curnew_from_ref =  T_curold_from_ref * SE3::exp(-x_);
