1 介绍
1.1 视频稳定的方法
1.2 使用点特征匹配的视频稳定
2 算法
2.1 帧间运动信息获取
2.1.1 合适的特征点获取
2.1.2 Lucas-Kanade光流法
2.1.3 运动估计
2.2 计算帧之间的总体运动
2.2.1 计算运动轨迹
2.2.2 计算平滑轨迹
2.2.3 平滑变化计算
2.3 将平滑后的变化矩阵应用于帧
3 结果和代码
3.2 代码
数字视频稳定:此方法不需要特殊的传感器来估算相机运动。本文就是用的这种方法。主要有三个步骤, 1)运动估计,2)运动平滑,3)图像合成。在第一阶段中导出两个连续帧之间的变换参数。第二级滤除不需要的运动,第三个阶段重建稳定的视频。
1 获取多帧视频图像,获取图像角点(特征点);
2 光流法跟踪角点;根据前后两张图像角点变化得到表示运动的仿射变化矩阵。
3 根据仿射变化矩阵计算运动轨迹,并且平滑运动轨迹。
4 根据平滑后的运动轨迹,得到平滑运动后的仿射变化矩阵。
5 根据平滑运动后的仿射变化矩阵得到稳定后的图像。
一旦我们在前一帧中找到了好的特征(角点),我们就可以使用名为Lucas-Kanade Optical Flow的算法在下一帧中跟踪它们,该算法以算法的发明者命名。算法详情可以见
// video_stabilization.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
#include "pch.h"
using namespace std;
using namespace cv;
// In frames. The larger the more stable the video, but less reactive to sudden panning 移动平均滑动窗口大小
const int SMOOTHING_RADIUS = 50;
* @brief 运动信息结构体
struct TransformParam
TransformParam() {}
TransformParam(double _dx, double _dy, double _da)
dx = _dx;
dy = _dy;
da = _da;
double dx;
double dy;
// angle
double da;
void getTransform(Mat &T)
// Reconstruct transformation matrix accordingly to new values 重建变换矩阵
T.at(0, 0) = cos(da);
T.at(0, 1) = -sin(da);
T.at(1, 0) = sin(da);
T.at(1, 1) = cos(da);
T.at(0, 2) = dx;
T.at(1, 2) = dy;
* @brief 轨迹结构体
struct Trajectory
Trajectory() {}
Trajectory(double _x, double _y, double _a)
x = _x;
y = _y;
a = _a;
double x;
double y;
// angle
double a;
* @brief 轨迹累积
* @param transforms 运动信息结构体
* @return vector 轨迹结构体
vector cumsum(vector &transforms)
// trajectory at all frames 所有帧的运动轨迹
vector trajectory;
// Accumulated frame to frame transform 累加计算x,y以及a(角度)
double a = 0;
double x = 0;
double y = 0;
for (size_t i = 0; i < transforms.size(); i++)
x += transforms[i].dx;
y += transforms[i].dy;
a += transforms[i].da;
trajectory.push_back(Trajectory(x, y, a));
return trajectory;
* @brief 平滑运动轨迹
* @param trajectory 运动轨迹
* @param radius 窗格大小
* @return vector
vector smooth(vector &trajectory, int radius)
vector smoothed_trajectory;
for (size_t i = 0; i < trajectory.size(); i++)
double sum_x = 0;
double sum_y = 0;
double sum_a = 0;
int count = 0;
for (int j = -radius; j <= radius; j++)
if (i + j >= 0 && i + j < trajectory.size())
sum_x += trajectory[i + j].x;
sum_y += trajectory[i + j].y;
sum_a += trajectory[i + j].a;
double avg_a = sum_a / count;
double avg_x = sum_x / count;
double avg_y = sum_y / count;
smoothed_trajectory.push_back(Trajectory(avg_x, avg_y, avg_a));
return smoothed_trajectory;
* @brief
* @param frame_stabilized
void fixBorder(Mat &frame_stabilized)
Mat T = getRotationMatrix2D(Point2f(frame_stabilized.cols / 2, frame_stabilized.rows / 2), 0, 1.04);
warpAffine(frame_stabilized, frame_stabilized, T, frame_stabilized.size());
int main(int argc, char **argv)
// Read input video 读取视频
VideoCapture cap("./video/detect.mp4");
// Get frame count 读取视频总帧数
int n_frames = int(cap.get(CAP_PROP_FRAME_COUNT));
// Our test video may be wrong to read the frame after frame 1300
n_frames = 1300;
// Get width and height of video stream 获取视频图像宽高
int w = int(cap.get(CAP_PROP_FRAME_WIDTH));
int h = int(cap.get(CAP_PROP_FRAME_HEIGHT));
// Get frames per second (fps) 获取视频每秒帧数
double fps = cap.get(CV_CAP_PROP_FPS);
// Set up output video 设置输出视频
VideoWriter out("video_out.avi", CV_FOURCC('M', 'J', 'P', 'G'), fps, Size(2 * w, h));
// Define variable for storing frames 定义存储帧的相关变量
Mat curr, curr_gray;
Mat prev, prev_gray;
// Read first frame 获得视频一张图象
cap >> prev;
// Convert frame to grayscale 转换为灰度图
cvtColor(prev, prev_gray, COLOR_BGR2GRAY);
// Pre-define transformation-store array 仿射变化参数结构体
vector transforms;
//previous transformation matrix 上一张图像的仿射矩阵
Mat last_T;
for (int i = 1; i < n_frames; i++)
// Vector from previous and current feature points 前一帧角点vector,当前帧角点vector
vector prev_pts, curr_pts;
// Detect features in previous frame 获取前一帧的角点
//前一帧灰度图,前一帧角点vector, 最大角点数,检测到的角点的质量等级,两个角点之间的最小距离
goodFeaturesToTrack(prev_gray, prev_pts, 200, 0.01, 30);
// Read next frame 读取当前帧图像
bool success = cap.read(curr);
if (!success)
// Convert to grayscale 将当前帧图像转换为灰度图
cvtColor(curr, curr_gray, COLOR_BGR2GRAY);
// Calculate optical flow (i.e. track feature points) 光流法追寻特征点
vector status;
vector err;
calcOpticalFlowPyrLK(prev_gray, curr_gray, prev_pts, curr_pts, status, err);
// Filter only valid points 获取光流跟踪下有效的角点
auto prev_it = prev_pts.begin();
auto curr_it = curr_pts.begin();
for (size_t k = 0; k < status.size(); k++)
if (status[k])
prev_it = prev_pts.erase(prev_it);
curr_it = curr_pts.erase(curr_it);
// Find transformation matrix 获得变换矩阵
Mat T = estimateRigidTransform(prev_pts, curr_pts, false);
// In rare cases no transform is found.
// We'll just use the last known good transform.
if (T.data == NULL)
// Extract traslation 提取仿射变化结果
double dx = T.at(0, 2);
double dy = T.at(1, 2);
// Extract rotation angle 提取角度
double da = atan2(T.at(1, 0), T.at(0, 0));
// Store transformation 存储仿射变化矩阵
transforms.push_back(TransformParam(dx, dy, da));
// Move to next frame 进行下一次检测准测
cout << "Frame: " << i << "/" << n_frames << " - Tracked points : " << prev_pts.size() << endl;
// Compute trajectory using cumulative sum of transformations 获取累加轨迹
vector trajectory = cumsum(transforms);
// Smooth trajectory using moving average filter 获取平滑后的轨迹
vector smoothed_trajectory = smooth(trajectory, SMOOTHING_RADIUS);
vector transforms_smooth;
for (size_t i = 0; i < transforms.size(); i++)
// Calculate difference in smoothed_trajectory and trajectory 计算平滑后的轨迹和原始轨迹差异
double diff_x = smoothed_trajectory[i].x - trajectory[i].x;
double diff_y = smoothed_trajectory[i].y - trajectory[i].y;
double diff_a = smoothed_trajectory[i].a - trajectory[i].a;
// Calculate newer transformation array 计算平滑后的运动信息结构体数据
double dx = transforms[i].dx + diff_x;
double dy = transforms[i].dy + diff_y;
double da = transforms[i].da + diff_a;
transforms_smooth.push_back(TransformParam(dx, dy, da));
Mat T(2, 3, CV_64F);
Mat frame, frame_stabilized, frame_out;
for (int i = 0; i < n_frames - 1; i++)
bool success = cap.read(frame);
if (!success)
// Extract transform from translation and rotation angle. 提取平滑后的仿射变化矩阵
// Apply affine wrapping to the given frame 应用仿射变化
warpAffine(frame, frame_stabilized, T, frame.size());
// Scale image to remove black border artifact 去除黑边
// Now draw the original and stablised side by side for coolness 将原图和变化后的图横向排列输出到视频
hconcat(frame, frame_stabilized, frame_out);
// If the image is too big, resize it.
if (frame_out.cols > 1920)
resize(frame_out, frame_out, Size(frame_out.cols / 2, frame_out.rows / 2));
//imshow("Before and After", frame_out);
cout << "out frame:" << i << endl;
// Release video
// Close windows
return 0;
# Import numpy and OpenCV
import numpy as np
import cv2
def movingAverage(curve, radius):
window_size = 2 * radius + 1
# Define the filter
f = np.ones(window_size)/window_size
# Add padding to the boundaries
curve_pad = np.lib.pad(curve, (radius, radius), 'edge')
# Apply convolution
curve_smoothed = np.convolve(curve_pad, f, mode='same')
# Remove padding
curve_smoothed = curve_smoothed[radius:-radius]
# return smoothed curve
return curve_smoothed
def smooth(trajectory):
smoothed_trajectory = np.copy(trajectory)
# Filter the x, y and angle curves
for i in range(3):
smoothed_trajectory[:, i] = movingAverage(
trajectory[:, i], radius=SMOOTHING_RADIUS)
return smoothed_trajectory
def fixBorder(frame):
s = frame.shape
# Scale the image 4% without moving the center
T = cv2.getRotationMatrix2D((s[1]/2, s[0]/2), 0, 1.04)
frame = cv2.warpAffine(frame, T, (s[1], s[0]))
return frame
# The larger the more stable the video, but less reactive to sudden panning
# Read input video
cap = cv2.VideoCapture('video/detect.mp4')
# Get frame count
n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# Our test video may be wrong to read the frame after frame 1300
n_frames = 1300
# Get width and height of video stream
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# Get frames per second (fps)
fps = cap.get(cv2.CAP_PROP_FPS)
# Define the codec for output video
fourcc = cv2.VideoWriter_fourcc(*'MJPG')
# Set up output video
out = cv2.VideoWriter('video_out.avi', fourcc, fps, (2 * w, h))
# Read first frame
_, prev = cap.read()
# Convert frame to grayscale
prev_gray = cv2.cvtColor(prev, cv2.COLOR_BGR2GRAY)
# Pre-define transformation-store array
transforms = np.zeros((n_frames-1, 3), np.float32)
for i in range(n_frames-2):
# Detect feature points in previous frame
prev_pts = cv2.goodFeaturesToTrack(prev_gray,
# Read next frame
success, curr = cap.read()
if not success:
# Convert to grayscale
curr_gray = cv2.cvtColor(curr, cv2.COLOR_BGR2GRAY)
# Calculate optical flow (i.e. track feature points)
curr_pts, status, err = cv2.calcOpticalFlowPyrLK(
prev_gray, curr_gray, prev_pts, None)
# Sanity check
assert prev_pts.shape == curr_pts.shape
# Filter only valid points
idx = np.where(status == 1)[0]
prev_pts = prev_pts[idx]
curr_pts = curr_pts[idx]
# Find transformation matrix
# will only work with OpenCV-3 or less
m = cv2.estimateRigidTransform(prev_pts, curr_pts, fullAffine=False)
# Extract traslation
dx = m[0, 2]
dy = m[1, 2]
# Extract rotation angle
da = np.arctan2(m[1, 0], m[0, 0])
# Store transformation
transforms[i] = [dx, dy, da]
# Move to next frame
prev_gray = curr_gray
print("Frame: " + str(i) + "/" + str(n_frames) +
" - Tracked points : " + str(len(prev_pts)))
# Compute trajectory using cumulative sum of transformations
trajectory = np.cumsum(transforms, axis=0)
# Create variable to store smoothed trajectory
smoothed_trajectory = smooth(trajectory)
# Calculate difference in smoothed_trajectory and trajectory
difference = smoothed_trajectory - trajectory
# Calculate newer transformation array
transforms_smooth = transforms + difference
# Reset stream to first frame
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
# Write n_frames-1 transformed frames
for i in range(n_frames-2):
# Read next frame
success, frame = cap.read()
if not success:
# Extract transformations from the new transformation array
dx = transforms_smooth[i, 0]
dy = transforms_smooth[i, 1]
da = transforms_smooth[i, 2]
# Reconstruct transformation matrix accordingly to new values
m = np.zeros((2, 3), np.float32)
m[0, 0] = np.cos(da)
m[0, 1] = -np.sin(da)
m[1, 0] = np.sin(da)
m[1, 1] = np.cos(da)
m[0, 2] = dx
m[1, 2] = dy
# Apply affine wrapping to the given frame
frame_stabilized = cv2.warpAffine(frame, m, (w, h))
# Fix border artifacts
frame_stabilized = fixBorder(frame_stabilized)
# Write the frame to the file
frame_out = cv2.hconcat([frame, frame_stabilized])
# If the image is too big, resize it.
if(frame_out.shape[1] > 1920):
frame_out = cv2.resize(
frame_out, (frame_out.shape[1]/2, frame_out.shape[0]/2))
#cv2.imshow("Before and After", frame_out)
# cv2.waitKey(10)
# Release video
# Close windows