前言
最近也看了许许多多关于slam,vo,特征点检测匹配,光流法相关的资料。想着自己能动手实践一下,先从最简单的2d-2d位姿估计开始吧,也顺便记录下踩的坑。
注:代码太多了c++放另外一篇吧。c++
一、总体流程
首先需要说的是2d-2d的位姿估计会丢失平移向量t的尺寸,只有后面不再用2d-2d用三角化求解点再用3d-2d才能解决尺度问题。2d-2d的位姿估计算是蛮简单的,可以参考slam14讲的代码,主要有以下几个步骤:
1、两帧间的特征点寻找与匹配
2、通过特征点求解本质矩阵
3、通过本质矩阵恢复R,t
不得不说我们都是站在巨人的肩膀上,OpenCV几乎提供了所有这一切,我们所做的仅仅是组合就完了。
二、实现代码
废话不多说直接上代码吧
总体实现了两种算法:
"""
基于光流法和特征点法实现2d-2d相机的位姿估计
2d-2d的相机位姿估计采用对极几何来实现,坐标采用了归一化坐标所以t的尺度会丢失
"""
import cv2
import numpy as np
from FeaturePointDetection import *
from LK import *
from R2Angle import *
import glob
import time
"""
基于光流法的2d-2d位姿估计
"""
# i = 0
# # 计数丢失的帧数
# mm = 0
# image_pre = cv2.imread('../asset/image/6.bmp')
# image_new = cv2.imread('../asset/image/7.bmp')
# t = 0
# num = 0
# for imagePath in glob.glob("E:/Data/test" + "/*.bmp"):
# num += 1
# t_start = time.time()
# # 加载图像,转为灰度图
# if i < 1:
# image_pre = cv2.imread(imagePath)
# i += 1
# continue
# image_new = cv2.imread(imagePath)
#
# # 光流法
# p00, p11 = calcOpticalFlowPyrLKByImage(image_pre, image_new)
# if len(p00) < 8:
# mm = mm + 1
# image_pre = image_new.copy()
# num -= 1
# continue
#
#
# E, mask = cv2.findEssentialMat(p00, p11, focal=0, pp=(0, 0), method=cv2.RANSAC)
#
# points, R_est, t_est, mask_pose = cv2.recoverPose(E, p00, p11)
# # print(f'r is {R_est}')
# R2Angle(R_est)
# # print(f't is {t_est.T}')
#
# # image_p = image_pre.copy()
# # image_n = image_new.copy()
# # for i, (new, old) in enumerate(zip(p11, p00)):
# # cv2.circle(image_p, (old[0], old[1]), 5, (0, 0, 255), -1)
# # cv2.circle(image_n, (new[0], new[1]), 5, (0, 0, 255), -1)
# #
# # cv2.imshow("pre", image_p)
# # cv2.imshow("new", image_n)
# # image_pre = image_new.copy()
# # cv2.waitKey(10)
# t_end = time.time()
# print(f'use : {t_end - t_start}')
# t += (t_end - t_start)
# print(f'舍弃{mm}帧')
# print(f'平均用时: {t / num}')
"""
基于特征点匹配算法的2d-2d位姿估计
"""
i = 0
p00 = []
p11 = []
# 计数丢失的帧数
mm = 0
image_pre = cv2.imread('../asset/image/6.bmp')
image_new = cv2.imread('../asset/image/7.bmp')
t = 0
num = 0
for imagePath in glob.glob("E:/Data/test" + "/*.bmp"):
num += 1
# 加载图像,转为灰度图
t_start = time.time()
if i < 1:
image_pre = cv2.imread(imagePath)
# print(imagePath)
# image_pre[200:-1, :] = 0
i += 1
continue
# print(imagePath)
image_new = cv2.imread(imagePath)
# image_new[200:-1, :] = 0
kp1, des1 = SIFT(image_pre)
kp2, des2 = SIFT(image_new)
matches11 = ByFlann(image_pre, image_new, kp1, kp2, des1, des2, "SIFT")
kp1, kp2, matches, matchesMask = RANSAC(image_pre, image_new, kp1, kp2, matches11)
if matchesMask == None:
mm = mm + 1
image_pre = image_new.copy()
num -= 1
continue
for i in range(len(matches)):
if matchesMask[i] == 1:
p00.append(kp1[matches[i].queryIdx].pt)
p11.append(kp2[matches[i].trainIdx].pt)
if len(p11) <= 10:
mm = mm + 1
image_pre = image_new.copy()
num -= 1
continue
p000 = np.array(p00)
p111 = np.array(p11)
E, mask = cv2.findEssentialMat(p000, p111, focal=0, pp=(0, 0), method=cv2.RANSAC)
points, R_est, t_est, mask_pose = cv2.recoverPose(E, p000, p111)
# print(f'r is {R_est}')
print(f'{R2Angle(R_est)}')
# print(f't is {t_est.T}')
# print(f'特征点个数{len(p00)}')
# # img3用来查看特征点匹配情况
# draw_params = dict(matchColor=(0, 0, 255), # draw matches in green color
# singlePointColor=None,
# matchesMask=matchesMask, # draw only inliers
# flags=4)
#
# img3 = cv2.drawMatches(image_pre, kp1, image_new, kp2, matches, None, **draw_params)
image_p = image_pre.copy()
image_n = image_new.copy()
for i, (new, old) in enumerate(zip(p11, p00)):
cv2.circle(image_p, (int(old[0]), int(old[1])), 5, (0, 0, 255), -1)
cv2.circle(image_n, (int(new[0]), int(new[1])), 5, (0, 0, 255), -1)
cv2.imshow("pre", image_p)
cv2.imshow("new", image_n)
# cv2.imshow("new", img3)
image_pre = image_new.copy()
cv2.waitKey(10)
cv2.imwrite("1.jpg",image_p)
cv2.imwrite("2.jpg", image_n)
p00.clear()
p11.clear()
t_end = time.time()
print(f'use : {t_end - t_start}')
t += (t_end - t_start)
print(f'舍弃{mm}帧')
print(f'平均用时: {t / num}')
计算角度的代码 R2Angle .py
"""
计算角度
"""
import cv2
import numpy as np
def R2Angle(rotM):
thetax = np.arctan2(rotM[1, 0], rotM[0, 0])
thetay = np.arctan2(-rotM[2, 0], np.sqrt(rotM[2, 0] ** 2 + rotM[2, 2] ** 2))
thetaz = np.arctan2(rotM[2, 1], rotM[2, 2])
# print(f'三个角度弧度x:{thetax}, y:{thetay}, z:{thetaz}')
print(f'三个角度x:{thetax * 180 / np.pi}, y:{thetay * 180 / np.pi}, z:{thetaz * 180 / np.pi}')
光流法的实现 LK.py
"""
LK光流法
稀疏光流
"""
import numpy as np
import cv2
'''
LK for video
'''
def calcOpticalFlowPyrLKByVideo(cap):
"""
:param cap: 视频
:return: 无
"""
# 角点检测所需参数 最大检测数量 角点的检测最低下hi两小于0.3不被计入 最小的欧氏距离
feature_params = dict(maxCorners=100, qualityLevel=0.3, minDistance=7)
# lucas kanade参数
# winSize 选择多少个点进行u,v的求解 图像金字塔的层数
lk_params = dict(winSize=(30, 30), maxLevel=6)
# 随机颜色条
color = np.random.randint(0, 255, (100, 3))
# 拿到第一帧图像
ret, oldframe = cap.read()
old_gray = cv2.cvtColor(oldframe, cv2.COLOR_BGR2GRAY)
# 返回所有检测特征点,需要输入图像,角点最大数量(效率),品质因子(特征值越大的越好,来筛选)
# 距离相当于这区间有比这个角点强的,就不要这个弱的了 Shi-Tomasi算子的运用 ,用于检测角点
p0 = cv2.goodFeaturesToTrack(old_gray, mask=None, **feature_params)
# 创建一个mask
mask = np.zeros_like(oldframe)
i = 0
while True:
ret, frame = cap.read()
framegray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 需要传入前一帧和当前图像以及前一帧检测到的角点
p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, framegray, p0, None, **lk_params)
# st=1表示
good_new = p1[st == 1]
good_old = p0[st == 1]
# 绘制轨迹
for i, (new, old) in enumerate(zip(good_new, good_old)):
a, b = new.ravel()
c, d = old.ravel()
mask = cv2.line(mask, (a, b), (c, d), color[i].tolist(), 2)
frame = cv2.circle(frame, (a, b), 5, color[i].tolist(), -1)
img = cv2.add(frame, mask)
cv2.imshow('frame', img)
# cv2.imwrite(str(i) + "aa.jpg", img)
i += 1
k = cv2.waitKey(150) & 0xff
if k == 27:
break
# 更新
old_gray = framegray.copy()
p0 = good_new.reshape(-1, 1, 2)
cv2.destroyAllWindows()
cap.release()
"""
LK for image
"""
def calcOpticalFlowPyrLKByImage(img_old, img_new):
"""
:param img_old: 前一帧
:param img_new: 当前帧
:return: 特征点坐标
"""
old_frame = cv2.cvtColor(img_old, cv2.COLOR_BGR2GRAY)
new_frame = cv2.cvtColor(img_new, cv2.COLOR_BGR2GRAY)
feature_params = dict(maxCorners=100, qualityLevel=0.3, minDistance=7)
lk_params = dict(winSize=(30, 30), maxLevel=6)
p0 = cv2.goodFeaturesToTrack(old_frame, mask=None, **feature_params)
p1, st, err = cv2.calcOpticalFlowPyrLK(old_frame, new_frame, p0, None, **lk_params)
p11 = p1[st == 1]
p00 = p0[st == 1]
# for i, (new, old) in enumerate(zip(p11, p00)):
# image_old = cv2.circle(img_old, (old[0], old[1]), 5, (0, 0, 255), -1)
# image_new = cv2.circle(img_new, (new[0], new[1]), 5, (0, 0, 255), -1)
# cv2.imshow('new', image_new)
# cv2.imshow('old', image_old)
return p00, p11
# cap = cv2.VideoCapture('../asset/video/2.mp4')
# calcOpticalFlowPyrLKByVideo(cap)
# img_old = cv2.imread('../asset/image/1.bmp')
# img_new = cv2.imread('../asset/image/2.bmp')
# calcOpticalFlowPyrLKByImage(img_old, img_new)
# cv2.waitKey(0)
应该是还差一个FeaturePointDetection.py里面实现了各种特征点检测和匹配算法,防止篇幅过长见这篇文章FeaturePointDetection.py详情
三、总结
1、python来实现这些算法还是相当方便的,这使得我可以轻松的计算每种算法的耗时结果如下:
surf+knn+随机一致性采样 平均每帧0.162s
光流法 平均每帧 0.14s
sift+knn+随机一致性采样 平均每帧0.16025s
2、这结果其实很不合理,首先是太慢了差不多0.15s的时间,很难做到实时检测。三个算法的时间竟然几乎接近这也很不正常,要知道surf的效率可是比sift快很多的,光流法也很快。估计问题是出在了本质矩阵求解上,本质矩阵的求解默认采用了随机一致性采样,降低了速度。