算法流程函数:
1. (*finder)(img, features[i])
Mathematical explanation:
Refer to BRIEF: Binary Robust Independent Elementary Features and ORB: An efficient alternative to SIFT or SURF
...\sources\modules\stitching\src\matchers.cpp
void OrbFeaturesFinder::find(InputArray image, ImageFeatures &features)
{
//When UMat should be created when opencl is on so that cl buffer be allocated for this UMat, when the UMat is used as a parameter of cl kernel, opencl path (instead of cpu path) will work, CPU and opencl are two modes
UMat gray_image;
CV_Assert((image.type() == CV_8UC3) || (image.type() == CV_8UC4) || (image.type() == CV_8UC1));
if (image.type() == CV_8UC3) {
cvtColor(image, gray_image, COLOR_BGR2GRAY);
} else if (image.type() == CV_8UC4) {
cvtColor(image, gray_image, COLOR_BGRA2GRAY);
} else if (image.type() == CV_8UC1) {
gray_image = image.getUMat();
} else {
CV_Error(Error::StsUnsupportedFormat, "");
}
if (grid_size.area() == 1)
//detect and compute descriptors based on orb algorithm.
orb->detectAndCompute(gray_image, Mat(), features.keypoints, features.descriptors);
else
{
features.keypoints.clear();
features.descriptors.release();
std::vector<KeyPoint> points;
Mat _descriptors;
UMat descriptors;
for (int r = 0; r < grid_size.height; ++r)
for (int c = 0; c < grid_size.width; ++c)
{
int xl = c * gray_image.cols / grid_size.width;
int yl = r * gray_image.rows / grid_size.height;
int xr = (c+1) * gray_image.cols / grid_size.width;
int yr = (r+1) * gray_image.rows / grid_size.height;
// LOGLN("OrbFeaturesFinder::find: gray_image.empty=" << (gray_image.empty()?"true":"false") << ", "
// << " gray_image.size()=(" << gray_image.size().width << "x" << gray_image.size().height << "), "
// << " yl=" << yl << ", yr=" << yr << ", "
// << " xl=" << xl << ", xr=" << xr << ", gray_image.data=" << ((size_t)gray_image.data) << ", "
// << "gray_image.dims=" << gray_image.dims << "\n");
UMat gray_image_part=gray_image(Range(yl, yr), Range(xl, xr));
// LOGLN("OrbFeaturesFinder::find: gray_image_part.empty=" << (gray_image_part.empty()?"true":"false") << ", "
// << " gray_image_part.size()=(" << gray_image_part.size().width << "x" << gray_image_part.size().height << "), "
// << " gray_image_part.dims=" << gray_image_part.dims << ", "
// << " gray_image_part.data=" << ((size_t)gray_image_part.data) << "\n");
//Functions in this level should be directly used from opencv_features2d300d.lib instead of using
...\sources\modules\features2d\src\orb.cpp sentence by sentence
orb->detectAndCompute(gray_image_part, UMat(), points, descriptors);
features.keypoints.reserve(features.keypoints.size() + points.size());
for (std::vector<KeyPoint>::iterator kp = points.begin(); kp != points.end(); ++kp)
{
kp->pt.x += xl;
kp->pt.y += yl;
features.keypoints.push_back(*kp);
}
_descriptors.push_back(descriptors.getMat(ACCESS_READ));
}
// TODO optimize copyTo()
//features.descriptors = _descriptors.getUMat(ACCESS_READ);
_descriptors.copyTo(features.descriptors);
}
}
1. matcher(features, pairwise_matches);
Call without using ocl
...\sources\modules\stitching\src\matchers.cpp
void operator ()(const Range &r) const
{
const int num_images = static_cast<int>(features.size());
for (int i = r.start; i < r.end; ++i)
{
int from = near_pairs[i].first;
int to = near_pairs[i].second;
int pair_idx = from*num_images + to;
matcher(features[from], features[to], pairwise_matches[pair_idx]);
pairwise_matches[pair_idx].src_img_idx = from;
pairwise_matches[pair_idx].dst_img_idx = to;
size_t dual_pair_idx = to*num_images + from;
pairwise_matches[dual_pair_idx] = pairwise_matches[pair_idx];
pairwise_matches[dual_pair_idx].src_img_idx = to;
pairwise_matches[dual_pair_idx].dst_img_idx = from;
if (!pairwise_matches[pair_idx].H.empty())
pairwise_matches[dual_pair_idx].H = pairwise_matches[pair_idx].H.inv();
for (size_t j = 0; j < pairwise_matches[dual_pair_idx].matches.size(); ++j)
std::swap(pairwise_matches[dual_pair_idx].matches[j].queryIdx,
pairwise_matches[dual_pair_idx].matches[j].trainIdx);
LOG(".");
}
}
The red sentence calls
void BestOf2NearestMatcher::match(const ImageFeatures &features1, const ImageFeatures &features2,
MatchesInfo &matches_info)
{
(*impl_)(features1, features2, matches_info);
// Check if it makes sense to find homography
// We need at least 6 points to calculate the Homography matrix
if (matches_info.matches.size() < static_cast<size_t>(num_matches_thresh1_))
return;
// Construct point-point correspondences for homography estimation
Mat src_points(1, static_cast<int>(matches_info.matches.size()), CV_32FC2);
Mat dst_points(1, static_cast<int>(matches_info.matches.size()), CV_32FC2);
//Change the origin of the coordinate system to be the center of the image
for (size_t i = 0; i < matches_info.matches.size(); ++i)
{
const DMatch& m = matches_info.matches[i];
Point2f p = features1.keypoints[m.queryIdx].pt;
p.x -= features1.img_size.width * 0.5f;
p.y -= features1.img_size.height * 0.5f;
src_points.at<Point2f>(0, static_cast<int>(i)) = p;
p = features2.keypoints[m.trainIdx].pt;
p.x -= features2.img_size.width * 0.5f;
p.y -= features2.img_size.height * 0.5f;
dst_points.at<Point2f>(0, static_cast<int>(i)) = p;
}
// Find pair-wise motion
matches_info.H = findHomography(src_points, dst_points, matches_info.inliers_mask, RANSAC);
if (matches_info.H.empty() || std::abs(determinant(matches_info.H)) < std::numeric_limits<double>::epsilon())
return;
// Find number of inliers
matches_info.num_inliers = 0;
for (size_t i = 0; i < matches_info.inliers_mask.size(); ++i)
if (matches_info.inliers_mask[i])
matches_info.num_inliers++;
// These coeffs are from paper M. Brown and D. Lowe. "Automatic Panoramic Image Stitching
// using Invariant Features"
// Mathematical explanation: Refer to Section 3.2 in the paper, (7) to (9) represents the probability of a proportion of matching pairs to be inliers given a correct image match(two images),
// the posterior probability that an image match is correct is in (11), so if we want the image match to be correct, the number of inliers should satisfy (13), i.e. matches_info.num_inliers > (8 + 0.3 * matches_info.matches.size())
matches_info.confidence = matches_info.num_inliers / (8 + 0.3 * matches_info.matches.size());
// Set zero confidence to remove matches between too close images, as they don't provide
// additional information anyway. The threshold was set experimentally.
matches_info.confidence = matches_info.confidence > 3. ? 0. : matches_info.confidence;
// Check if we should try to refine motion
// num_inliers should also be larger than 6
if (matches_info.num_inliers < num_matches_thresh2_)
return;
// Construct point-point correspondences for inliers only
src_points.create(1, matches_info.num_inliers, CV_32FC2);
dst_points.create(1, matches_info.num_inliers, CV_32FC2);
int inlier_idx = 0;
for (size_t i = 0; i < matches_info.matches.size(); ++i)
{
if (!matches_info.inliers_mask[i])
continue;
const DMatch& m = matches_info.matches[i];
Point2f p = features1.keypoints[m.queryIdx].pt;
p.x -= features1.img_size.width * 0.5f;
p.y -= features1.img_size.height * 0.5f;
src_points.at<Point2f>(0, inlier_idx) = p;
p = features2.keypoints[m.trainIdx].pt;
p.x -= features2.img_size.width * 0.5f;
p.y -= features2.img_size.height * 0.5f;
dst_points.at<Point2f>(0, inlier_idx) = p;
inlier_idx++;
}
// Rerun motion estimation on inliers only
matches_info.H = findHomography(src_points, dst_points, RANSAC);
}
The red sentence calls
void CpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &features2, MatchesInfo& matches_info)
{
CV_Assert(features1.descriptors.type() == features2.descriptors.type());
CV_Assert(features2.descriptors.depth() == CV_8U || features2.descriptors.depth() == CV_32F);
#ifdef HAVE_TEGRA_OPTIMIZATION
if (tegra::useTegra() && tegra::match2nearest(features1, features2, matches_info, match_conf_))
return;
#endif
matches_info.matches.clear();
Ptr<cv::DescriptorMatcher> matcher;
#if 0 // TODO check this
if (ocl::useOpenCL())
{
matcher = makePtr<BFMatcher>((int)NORM_L2);
}
else
#endif
{
//Index method. The corresponing index structure is composed of a group of randomized kd-trees, it can search in parallel.
Ptr<flann::IndexParams> indexParams = makePtr<flann::KDTreeIndexParams>();
//Searching method.
Ptr<flann::SearchParams> searchParams = makePtr<flann::SearchParams>();
if (features2.descriptors.depth() == CV_8U)
{
//FLANN_INDEX_LSH means Hamming distance.
//LSH index system comes from paper "Multi-probe LSH: Efficient indexing for High-Dimensional Similarity Search" by Qin Lv
indexParams->setAlgorithm(cvflann::FLANN_INDEX_LSH);
searchParams->setAlgorithm(cvflann::FLANN_INDEX_LSH);
}
//Brute Force Matcher and FLANN Matcher are two common matching methods in OpenCV, matcher = makePtr<BFMatcher>((int)NORM_L2);
//and matcher = makePtr<FlannBasedMatcher>(indexParams, searchParams);
//BFMatcher::BFMatcher(int normType=NORM_L2, bool crossCheck=false )
//Parameters:
//normType – One of NORM_L1, NORM_L2, NORM_HAMMING, NORM_HAMMING2.L1 and L2 norms are preferable choices for SIFT and SURF
//descriptors, NORM_HAMMING should be used with ORB, BRISK and BRIEF, NORM_HAMMING2 should be used with ORB when WTA_K == 3
//or 4 (see ORB::ORB constructor description).
//crossCheck – If it is false, this is will be default BFMatcher behaviour when it finds the k nearest neighbors for each
//query descriptor.If crossCheck == true, then the knnMatch() method with k = 1 will only return pairs(i, j) such that for
//i - th query descriptor the j - th descriptor in the matcher’s collection is the nearest and vice versa, i.e.the BFMatcher
//will only return consistent pairs.Such technique usually produces best results with minimal number of outliers when there
//are enough matches.This is alternative to the ratio test, used by D.Lowe in SIFT paper.
//class FlannBasedMatcher : public DescriptorMatcher
//The difference of Bruteforce and FLANN lies in that BFMatcher always tries all possible matches so that it can find the best
//one, but FlannBasedMatcher means "Fast Library forApproximate Nearest Neighbors", faster but probably not the best one. We can
//adjust the parameters in FlannBasedMatcher to adjust accuracy or speed.
matcher = makePtr<FlannBasedMatcher>(indexParams, searchParams);
}
std::vector< std::vector<DMatch> > pair_matches;
MatchesSet matches;
// Find 1->2 matches
// Find the 2 features in the second image that can match the features in the first image best
// The method is by computing distance
// This function is called by using ..\..\lib\Debug\opencv_features2d300d.lib, which is precompiled first, the precompiled files include matchers.cpp in ..\opencv30\sources\modules\features2d\src
// The descriptor of FlannBasedMatcher might be of type float or CV_8U
matcher->knnMatch(features1.descriptors, features2.descriptors, pair_matches, 2);
for (size_t i = 0; i < pair_matches.size(); ++i)
{
if (pair_matches[i].size() < 2)//If the qualified matches are less than 2.
continue;
const DMatch& m0 = pair_matches[i][0];//The best match
const DMatch& m1 = pair_matches[i][1];//Second best match
if (m0.distance < (1.f - match_conf_) * m1.distance)
{
matches_info.matches.push_back(m0);//queryIdx, trainIdx, imageIdx, distance
matches.insert(std::make_pair(m0.queryIdx, m0.trainIdx));
}
}
LOG("\n1->2 matches: " << matches_info.matches.size() << endl);
// Find 2->1 matches
// Find the 2 features in the first image that can match the features in the second image best
pair_matches.clear();
matcher->knnMatch(features2.descriptors, features1.descriptors, pair_matches, 2);
for (size_t i = 0; i < pair_matches.size(); ++i)
{
if (pair_matches[i].size() < 2)
continue;
const DMatch& m0 = pair_matches[i][0];
const DMatch& m1 = pair_matches[i][1];
if (m0.distance < (1.f - match_conf_) * m1.distance)
//If for a particular feature point in image 1, we cannot find 2 qualified matches or cannot find the 2 matches that can satisfy the confidence,
//and if for another feature point in image 2, we can find the match connecting it to the above feature point in image 1, then we add this matching
//pair to matches_info.matches
if (matches.find(std::make_pair(m0.trainIdx, m0.queryIdx)) == matches.end())
matches_info.matches.push_back(DMatch(m0.trainIdx, m0.queryIdx, m0.distance));
}
LOG("1->2 & 2->1 matches: " << matches_info.matches.size() << endl);
}
2. vector<int> indices = leaveBiggestComponent(features, pairwise_matches, conf_thresh);
Mathematical explanation:
Refer to Section 3.2 in paper “Automatic Panoramic Image Stitching using Invariant Features”
Same with 3, call
...\sources\modules\stitching\src\motion_estimators.cpp
//In image stitching, each pairwise match has a parameter: confidence. For image pairs, if the number of inliers is greater than a function of the number of features, then this pair of images can be regarded as coming from the same panorama.
std::vector<int> leaveBiggestComponent(std::vector<ImageFeatures> &features, std::vector<MatchesInfo> &pairwise_matches, float conf_threshold)
{
const int num_images = static_cast<int>(features.size());
DisjointSets comps(num_images);
//comps represents sets
for (int i = 0; i < num_images; ++i)
{
for (int j = 0; j < num_images; ++j)
{
if (pairwise_matches[i*num_images + j].confidence < conf_threshold)
continue;
int comp1 = comps.findSetByElem(i);
int comp2 = comps.findSetByElem(j);
if (comp1 != comp2)
comps.mergeSets(comp1, comp2);
}
}
//max_comp represents the set containing the most matching images.
int max_comp = static_cast<int>(std::max_element(comps.size.begin(), comps.size.end()) - comps.size.begin());
std::vector<int> indices;
std::vector<int> indices_removed;
for (int i = 0; i < num_images; ++i)
//If the image comes from the biggest set, we push its index into the indices set
if (comps.findSetByElem(i) == max_comp)
indices.push_back(i);
else
indices_removed.push_back(i);
std::vector<ImageFeatures> features_subset;
std::vector<MatchesInfo> pairwise_matches_subset;
for (size_t i = 0; i < indices.size(); ++i)
{
//We also use features_subset to store matching pairs
features_subset.push_back(features[indices[i]]);
for (size_t j = 0; j < indices.size(); ++j)
{
pairwise_matches_subset.push_back(pairwise_matches[indices[i]*num_images + indices[j]]);
pairwise_matches_subset.back().src_img_idx = static_cast<int>(i);
pairwise_matches_subset.back().dst_img_idx = static_cast<int>(j);
}
}
if (static_cast<int>(features_subset.size()) == num_images)
return indices;
LOG("Removed some images, because can't match them or there are too similar images: (");
LOG(indices_removed[0] + 1);
for (size_t i = 1; i < indices_removed.size(); ++i)
LOG(", " << indices_removed[i]+1);
LOGLN(").");
LOGLN("Try to decrease the match confidence threshold and/or check if you're stitching duplicates.");
features = features_subset;
pairwise_matches = pairwise_matches_subset;
return indices;
}
3. estimator(features, pairwise_matches, cameras)
Mathematical explanation: Estimate the focals and rotation matrices of cameras based on pairwise matches.
//Below statement influences the methods used in calcJacobian and calcError.
if (ba_cost_func == "reproj") adjuster = makePtr<detail::BundleAdjusterReproj>();//the cost function of beam average method. (focal average method)
else if (ba_cost_func == "ray") adjuster = makePtr<detail::BundleAdjusterRay>();
bool HomographyBasedEstimator::estimate(
const std::vector<ImageFeatures> &features,
const std::vector<MatchesInfo> &pairwise_matches,
std::vector<CameraParams> &cameras)
{
LOGLN("Estimating rotations...");
#if ENABLE_LOG
int64 t = getTickCount();
#endif
const int num_images = static_cast<int>(features.size());
#if 0
// Robustly estimate focal length from rotating cameras
std::vector<Mat> Hs;
for (int iter = 0; iter < 100; ++iter)
{
int len = 2 + rand()%(pairwise_matches.size() - 1);
std::vector<int> subset;
selectRandomSubset(len, pairwise_matches.size(), subset);
Hs.clear();
for (size_t i = 0; i < subset.size(); ++i)
if (!pairwise_matches[subset[i]].H.empty())
Hs.push_back(pairwise_matches[subset[i]].H);
Mat_<double> K;
if (Hs.size() >= 2)
{
if (calibrateRotatingCamera(Hs, K))
cin.get();
}
}
#endif
if (!is_focals_estimated_)
{
// Estimate focal length and set it for all cameras
std::vector<double> focals;
estimateFocal(features, pairwise_matches, focals);
cameras.assign(num_images, CameraParams());
for (int i = 0; i < num_images; ++i)
cameras[i].focal = focals[i];
}
else
{
for (int i = 0; i < num_images; ++i)
{
cameras[i].ppx -= 0.5 * features[i].img_size.width;
cameras[i].ppy -= 0.5 * features[i].img_size.height;
}
}
// Restore global motion
Graph span_tree;
std::vector<int> span_tree_centers;
findMaxSpanningTree(num_images, pairwise_matches, span_tree, span_tree_centers);
span_tree.walkBreadthFirst(span_tree_centers[0], CalcRotation(num_images, pairwise_matches, cameras));
// As calculations were performed under assumption that p.p. is in image center
for (int i = 0; i < num_images; ++i)
{
cameras[i].ppx += 0.5 * features[i].img_size.width;
cameras[i].ppy += 0.5 * features[i].img_size.height;
}
LOGLN("Estimating rotations, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");
return true;
}
4. (*adjuster)(features, pairwise_matches, cameras)
Bundle adjustment, the aim is to reduce the error in camera parameters estimation, the matrix of each camera should be corrected with beam average algorithm. The energy function of beam average method is decided by adjuster to be reproj or ray, it means the error in mapping, each point in a certain image should be mapped to other images to calculate mapping error. By simply integrating camera pairs with discrete homography matrices, it is easy to ignore global constraints and thus accumulated error will grow. Also refer to page 338 in the Chinese book.
Mathematical explanation:
First, it uses Levenberg-Marquardt algorithm to get the camera parameters (number of cameras*four parameters for each camera).
The basic Levenberg-Marquardt algorithm is as follows:
https://en.wikipedia.org/wiki/Levenberg%E2%80%93Marquardt_algorithm
Where we regard pairwise_matches as the function of parameter vector cameras. We use an object belonging to the CvLevMarq class to initialize the iteration and perform updating calculation. Iteration is used to calculate the camera parameters based on the mapping relationship between pairwise matches.
After we get cam_params_, we use obtainRefinedCameraParams to get focal length and rotation matrix of the cameras.
We use function Rodrigues to calculate the rotation matrix of the camera with rotation vector, based on
We can also refer to Section 4 in “Automatic Panoramic Image Stitching using Invariant Features”
Finally, construct max spanning tree using inliers, and find the root node based on search giving priority to breadth(广度优先搜索求出根节点),normalize rotation matrices. Find the camera nearest to the center and select its rotation matrix as the basis, then normalize the rotation matrices of other cameras by dividing them with the basic rotation matrix.
\\...\sources\modules\stitching\src\motion_estimators.cpp
bool BundleAdjusterBase::estimate(const std::vector<ImageFeatures> &features,//used
const std::vector<MatchesInfo> &pairwise_matches,
std::vector<CameraParams> &cameras)
{
LOG_CHAT("Bundle adjustment");
#if ENABLE_LOG
int64 t = getTickCount();
#endif
num_images_ = static_cast<int>(features.size());
features_ = &features[0];
pairwise_matches_ = &pairwise_matches[0];
setUpInitialCameraParams(cameras);
// Leave only consistent image pairs
edges_.clear();
for (int i = 0; i < num_images_ - 1; ++i)
{
for (int j = i + 1; j < num_images_; ++j)
{
const MatchesInfo& matches_info = pairwise_matches_[i * num_images_ + j];
if (matches_info.confidence > conf_thresh_)
edges_.push_back(std::make_pair(i, j));
}
}
// Compute number of correspondences
total_num_matches_ = 0;
for (size_t i = 0; i < edges_.size(); ++i)
total_num_matches_ += static_cast<int>(pairwise_matches[edges_[i].first * num_images_ + edges_[i].second].num_inliers);
//Levenberg Marquardt
//Implementation of Levenberg Marquardt nonlinear least squares algorithms used to iteratively finds a local minimum of a function that is expressed as the sum of squares of non-linear functions.
//It can be used to non-linearly estimate the essential matrix. OpenCV internally use the algorithm to find the camera intrinsic and extrinsic parameters from several views of a calibration pattern.
//refine extrinsic parameters using iterative algorithms
//The first parameter is the number of all cameras, each of which has 4 configuration parameters
//The second parameter is the sum of the measurement error of input data(all matches)
//The third parameter is the termination criterion
CvLevMarq solver(num_images_ * num_params_per_cam_,
total_num_matches_ * num_errs_per_measurement_,
term_criteria_);
Mat err, jac;
//cam_params_ is the camera parameters vector
CvMat matParams = cam_params_;
cvCopy(&matParams, solver.param);
int iter = 0;
for(;;)
{
const CvMat* _param = 0;
CvMat* _jac = 0;
CvMat* _err = 0;
//update camera parameters according to
//It uses SVD, the termination criterion is the maximum iteration number or if the change in camera parameters between adjacent iteration times is small enough. The matrix J and err are calculated by calcJacobian and calcError. Error means how close the parameters can satisfy pairwise_matches.
bool proceed = solver.update(_param, _jac, _err);
cvCopy(_param, &matParams);
if (!proceed || !_err)
break;
if (_jac)
{
calcJacobian(jac);
CvMat tmp = jac;
cvCopy(&tmp, _jac);
}
if (_err)
{
calcError(err);
LOG_CHAT(".");
iter++;
CvMat tmp = err;
cvCopy(&tmp, _err);
}
}
LOGLN_CHAT("");
LOGLN_CHAT("Bundle adjustment, final RMS error: " << std::sqrt(err.dot(err) / total_num_matches_));
LOGLN_CHAT("Bundle adjustment, iterations done: " << iter);
// Check if all camera parameters are valid
bool ok = true;
for (int i = 0; i < cam_params_.rows; ++i)
{
if (cvIsNaN(cam_params_.at<double>(i,0)))
{
ok = false;
break;
}
}
if (!ok)
return false;
obtainRefinedCameraParams(cameras);
// Normalize motion to center image
Graph span_tree;
std::vector<int> span_tree_centers;
findMaxSpanningTree(num_images_, pairwise_matches, span_tree, span_tree_centers);
Mat R_inv = cameras[span_tree_centers[0]].R.inv();
for (int i = 0; i < num_images_; ++i)
cameras[i].R = R_inv * cameras[i].R;
LOGLN_CHAT("Bundle adjustment, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");
return true;
}
solver.update calls
cvcalibration.cpp
bool CvLevMarq::update( const CvMat*& _param, CvMat*& matJ, CvMat*& _err )
{
double change;
matJ = _err = 0;
assert( !err.empty() );
if( state == DONE )
{
_param = param;
return false;
}
if( state == STARTED )
{
_param = param;
cvZero( J );
cvZero( err );
matJ = J;
_err = err;
if( state == CALC_J )
{
//cvMulTransposed(const CvArr* src, CvArr* dst, int order, const CvArr* delta=NULL)
//src: input matrix
//dst: ourput matrix
//If order = 0
//dst=(src-delta)*(src-delta)T
//If order = 1
//dst=(src-delat)T*(src-delta)
cvMulTransposed( J, JtJ, 1 );
//cvGEMM(const CvArr* src1, const CvArr* src2, double alpha, const CvArr* src3, double beta, CvArr*dst, int tABC=0)
//dst=alpha*src1T*src2+beta*src3T
//JtErr=JT*err
cvGEMM( J, err, 1, 0, 0, JtErr, CV_GEMM_A_T );
cvCopy( param, prevParam );
step();
void CvLevMarq::step()
{
const double LOG10 = log(10.);
double lambda = exp(lambdaLg10*LOG10);
//param = cvCreateMat( nparams, 1, CV_64F )
int i, j, nparams = param->rows;
//Initialize the matrix JtJ with size nparams-by-nparams to zero and vector JtErr with length JtErr to zero
for( i = 0; i < nparams; i++ )
if( mask->data.ptr[i] == 0 )
{
//Each row is the partial derivative of f(xi,beta) with respect to vector beta. Here i is the iterative time
double *row = JtJ->data.db + i*nparams, *col = JtJ->data.db + i;
for( j = 0; j < nparams; j++ )
row[j] = col[j*nparams] = 0;
JtErr->data.db[i] = 0;
}
cv::Mat cvJtJ(JtJ);
cv::Mat cvparam(param);
if( !err )
cvCompleteSymm( JtJ, completeSymmFlag );
#if 1
cvCopy( JtJ, JtJN );
for( i = 0; i < nparams; i++ )
JtJN->data.db[(nparams+1)*i] *= 1. + lambda;
#else
cvSetIdentity(JtJN, cvRealScalar(lambda));
cvAdd( JtJ, JtJN, JtJN );
#endif
cvSVD( JtJN, JtJW, 0, JtJV, CV_SVD_MODIFY_A + CV_SVD_U_T + CV_SVD_V_T );
//calculate delta with singular value decomposition and store delta in param
cvSVBkSb( JtJW, JtJV, JtJV, JtErr, param, CV_SVD_U_T + CV_SVD_V_T );
for( i = 0; i < nparams; i++ ) {
double delta = (mask->data.ptr[i] ? param->data.db[i] : 0);
if (delta != 0.0)
delta = delta + 0.0;
param->data.db[i] = prevParam->data.db[i] - (mask->data.ptr[i] ? param->data.db[i] : 0);
}
}
if( iters == 0 )
prevErrNorm = cvNorm(err, 0, CV_L2);
_param = param;
cvZero( err );
_err = err;
state = CHECK_ERR;
return true;
}
assert( state == CHECK_ERR );
errNorm = cvNorm( err, 0, CV_L2 );
if( errNorm > prevErrNorm )
{
lambdaLg10++;
step();
_param = param;
cvZero( err );
_err = err;
state = CHECK_ERR;
return true;
}
lambdaLg10 = MAX(lambdaLg10-1, -16);
if( ++iters >= criteria.max_iter ||
(((change = cvNorm(param, prevParam, CV_RELATIVE_L2)) < criteria.epsilon )&&(change !=0)) )
{
_param = param;
state = DONE;
return true;
}
prevErrNorm = errNorm;
_param = param;
cvZero(J);
matJ = J;
_err = err;
state = CALC_J;
return true;
}
state = CALC_J;
return true;
}
5. waveCorrect(rmats, wave_correct);
Deal with the problem where neighboring images not in the same horizontal line. It calculates the ‘up’ vector of each camera from the horizontal line and correct them. It calculates the eigenvalues and eigenvectors of a symmetric matrix. If it is horizontal correction, it will delete the 3rd row of feature vector, if it is vertical correction, it will delete the 1st row of feature vector. Finally, it corrects the matrices of all cameras. Rmats[i] is the corrected rotation matrix of the ith camera.
Mathematical explanation:
Input is the rotation matrices of cameras, they are corrected in the function.
Refer to Section 5 in “Automatic Panoramic Image Stitching using Invariant Features”
主要使用两个约束条件来计算全局旋转矩阵:
1. 相机坐标系的X轴与世界坐标系中的Z轴垂直。
2. 相机的旋转矩阵的Z轴方向的平均值与世界坐标系的Z轴相接近。
Call
...\sources\modules\stitching\src\ motion_estimators.cpp
a) void waveCorrect(std::vector<Mat> &rmats, WaveCorrectKind kind)
6. warper->warp(images[i], K, cameras[i].R, INTER_LINEAR, BORDER_REFLECT, images_warped[i]); OpenCL
The function converts a pair of maps for remap from one representation to another. As is explained in OpenCL file, together with images_warped[i].convertTo(images_warped_f[i], CV_32F).
7. compensator->feed(corners, images_warped, masks_warped);
Multiple band blend, first initialize compensator, make sure of the fusion method of compensator, default fusion method is multiple band blend, also we calculate the size of panorama based on corners.
Mathematical explanation:
Quadratic objective function.
Refer to Section 6 in “Automatic Panoramic Image Stitching using Invariant Features”
Gain is used in compensator->apply(img_idx, corners[img_idx], img_warped, mask_warped).
8. seam_finder->find(images_warped_f, corners, masks_warped);
Mathematical explanation:
Suppose the left image is A, the right image is B, then
C is the overlap area, to find the seamline, we need an energy function:
N is the 3-by-3 neighborhood of points in overlap area. Suppose (p,q) are two neighboring points in A, and (p’,q’) are their counterparts in B, then
The energy function counts the difference of gradients in two images.
...\sources\modules\stitching\src\seam_finders.cpp
void GraphCutSeamFinder::Impl::find(const std::vector<UMat> &src, const std::vector<Point> &corners, std::vector<UMat> &masks)
{
// Compute gradients
dx_.resize(src.size());
dy_.resize(src.size());
Mat dx, dy;
for (size_t i = 0; i < src.size(); ++i)
{
CV_Assert(src[i].channels() == 3);
Sobel(src[i], dx, CV_32F, 1, 0);
Sobel(src[i], dy, CV_32F, 0, 1);
dx_[i].create(src[i].size(), CV_32F);
dy_[i].create(src[i].size(), CV_32F);
for (int y = 0; y < src[i].rows; ++y)
{
const Point3f* dx_row = dx.ptr<Point3f>(y);
const Point3f* dy_row = dy.ptr<Point3f>(y);
float* dx_row_ = dx_[i].ptr<float>(y);
float* dy_row_ = dy_[i].ptr<float>(y);
for (int x = 0; x < src[i].cols; ++x)
{
dx_row_[x] = normL2(dx_row[x]);
dy_row_[x] = normL2(dy_row[x]);
}
}
}
PairwiseSeamFinder::find(src, corners, masks);
}
9. blender->feed(img_warped_s, mask_warped, corners[img_idx]);
Mathematical explanation:
Construct a pyramid for each image, number of layers is based on input parameters, default value is 5, first deal with image width and height, make sure it's integer multiples of 2^(num of bands) so that we can downsample the image for number of bands times, we downsample the source image iteratively and upsample the bottom image iteratively, substract the corresponding downsampled and upsampled images to get the high frequency components of images, then store these high frequency components to each pyramid. Finally we write the pyramid of each image into the final pyramid. The final panorama is got from adding all pyramid layers.
Refer to Section 7 in “Automatic Panoramic Image Stitching using Invariant Features”
References:
[1] Automaic Panoramic Image Stitching using Invariant Features
[2] Image Alignment and Stitching
[3] "GrabCut" - Interactive Foreground Extraction using Iterated Graph Cuts
[4] Image Stitching Algorithm Research Based on OpenCV
[5] 计算机视觉算法与应用中文版
[6] BRIEF: Binary Robust Independent Elementary Features.
[7] ORB: An efficient alternative to SIFT or SURF.