使用 Python 对 ViBe 算法进行仿真,通过对一段视频的测试,发现以下 2 个问题:
仿真图像:
下方仿真代码从视频片段中读出图像帧,经过 ViBe 计算得到区分前景与背景的二值图像,将原始图像帧与 ViBe 输出图像合并后组成输出视频
import cv2
import numpy as np
import random
import sys
import math
# 定义 vibe 算法参数
# 模型内采样值数目
N = 20
# 像素值区域半径
R = 20
# 像素值区域比较门限
T = 2
# 二次采样随机比例
F = 2
# 定义视频图像
HEIGHT = 480
WIDTH = 640
FPS = 30
# 定义视频帧范围
FRAME_BEGIN = 5580
FRAME_END = 6179
# 建立 vibe 像素点模型,每个像素点 N 个采样值
vibe_model = np.zeros((HEIGHT, WIDTH, N, 3), dtype=np.float32)
# vibe 图像
vibe_img = np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8)
def frames_capture(path):
# 抽取图像帧
cap = cv2.VideoCapture(path)
# 打开失败,退出函数
if not cap.isOpened():
print("fail to open video")
return
# 取得视频属性
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# 函数返回
return cap, count, height, width
def vibe_init(frame, height, width, channels=3):
"""vibe 算法初始化"""
global N, vibe_model
for i in range(height):
print('vibe_init: row {}'.format(i))
for j in range(width):
for n in range(N):
rand_i = np.random.randint(-1, 2)
rand_j = np.random.randint(-1, 2)
# 在 3x3 范围内随机取点
while ((i+rand_i) < 0) or ((i+rand_i) >= height) or ((j+rand_j) < 0) or ((j+rand_j) >= width):
rand_i = np.random.randint(-1, 2)
rand_j = np.random.randint(-1, 2)
# 将随机取得的像素值写入模型
vibe_model[i, j, n, :] = frame[(i+rand_i), (j+rand_j), :].astype(dtype=np.float32)
def vibe(frame):
"""每个新图像帧进行 vibe 计算"""
global N, R, T, F, vibe_model, vibe_img
h, w, _ = frame.shape
frame_float = frame.astype(dtype=np.float32)
for i in range(h):
# print('vibe row {}'.format(i))
for j in range(w):
# N 个采样值比较结果
cnt = 0
for n in range(N):
# 计算色域空间距离
dist = vibe_model[i, j, n, :]-frame_float[i, j, :]
dist = math.sqrt(np.sum(dist**2))
# 判断是否在像素值范围内
if dist < R:
cnt += 1
# 当前像素点属于背景
if cnt >= T:
break
# 返回图像
if cnt >= T:
# 当前像素点为背景
vibe_img[i, j, :] = np.array([0, 0, 0])
else:
# 当前像素点为前景,保持原值
vibe_img[i, j, :] = np.array([255, 255, 255])
# 更新模型
if cnt >= T:
# 更新当前像素点模型
if random.randint(0, (F-1)) == 0:
# 按 1/F 的概率更新模型
vibe_model[i, j, random.randint(0, (N-1)), :] = frame[i, j, :].astype(dtype=np.float32)
# 更新邻近点模型
if random.randint(0, (F-1)) == 0:
# 按 1/F 的概率更新邻近点模型
rand_i = np.random.randint(-1, 2)
rand_j = np.random.randint(-1, 2)
# 在 3x3 范围内随机取点
while ((i+rand_i) < 0) or ((i+rand_i) >= height) or ((j+rand_j) < 0) or ((j+rand_j) >= width):
rand_i = np.random.randint(-1, 2)
rand_j = np.random.randint(-1, 2)
vibe_model[i+rand_i, j+rand_j, random.randint(0, (N-1)), :] = frame[i, j, :].astype(dtype=np.float32)
if __name__ == "__main__":
cap, count, height, width = frames_capture("d:/vot/lala.mp4")
if height != HEIGHT or width != WIDTH:
print('invalid video resolution')
sys.exit()
if FRAME_BEGIN < 0 or FRAME_BEGIN >= FRAME_END or FRAME_END > (count-1):
print('invalid video property')
sys.exit()
# 建立录像对象
fourcc = cv2.VideoWriter_fourcc(*'XVID')
w = cv2.VideoWriter('d:/vot/vibe.mp4', fourcc, FPS, (WIDTH*2, HEIGHT), True)
# 定义录像用帧
w_frame = np.zeros((HEIGHT, WIDTH*2, 3), dtype=np.uint8)
# 帧索引位置至首帧
cap.set(cv2.CAP_PROP_POS_FRAMES, FRAME_BEGIN)
# 读出首帧进行 vibe 初始化
_, f = cap.read()
vibe_init(f, height, width)
# 重帧帧索引位置至首帧
cap.set(cv2.CAP_PROP_POS_FRAMES, FRAME_BEGIN)
for i in range(FRAME_BEGIN, FRAME_END+1):
print('frame {}'.format(i))
# 读出当前图像帧
_, f = cap.read()
# 计算 vibe
vibe(f)
# 保存视频
w_frame[:, 0:WIDTH, :] = f
w_frame[:, WIDTH:(WIDTH*2), :] = vibe_img
w.write(w_frame)
# 显示图像帧
cv2.imshow('original', f)
cv2.imshow('vibe', vibe_img)
# 按下任意键,程序结束
if cv2.waitKey(10) != -1:
break
cap.release()
w.release()
cv2.destroyAllWindows()