https://gitee.com/seuvcl/CVPR2020-OOH
极简
注意事项:
data_folder
utils\imutils.py
的estimate_translation_np
函数里加了个伪逆,不是最佳处理办法,暂时凑活用了import sys
def estimate_translation_np(S, joints_2d, joints_conf, focal_length=5000, cx=128., cy=128.):
num_joints = S.shape[0]
...
# square matrix
A = np.dot(Q.T, Q)
b = np.dot(Q.T, c)
# 检查矩阵 A 是否是奇异的
if np.linalg.cond(A) < 1/sys.float_info.epsilon:
# 如果不是奇异的,使用正常的求解方法
trans = np.linalg.solve(A, b)
else:
# 如果是奇异的,使用伪逆
pinv = np.linalg.pinv(A)
trans = np.dot(pinv, b)
return trans
记录一下截帧代码
cap = cv2.VideoCapture(video_path)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
frame_id = 1
gap = 4
save_path = f"demo_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}"
os.makedirs(save_path,exist_ok=True)
while True:
start_time = time.time()
ret, frame = cap.read()
if frame_id % gap != 0:
frame_id += 1
continue
if not ret:
break
elapsed_time = time.time() - start_time
logger.info(
'Frame {}/{} ({:.2f} ms)'.format(frame_id, frame_count, elapsed_time * 1000), )
frame_id += 1
id_save_path = f"{save_path}/{frame_id:05d}.png"
cv2.imwrite(id_save_path, frame)
cap.release()
以及生成效果视频的代码
def create_video(image_folder, video_name, fps):
images = [img for img in os.listdir(image_folder) if img.endswith(".jpg")]
images.sort() # 确保图像是按顺序排列的
# 读取第一张图像来确定视频的分辨率
frame = cv2.imread(os.path.join(image_folder, images[0]))
height, width, layers = frame.shape
video = cv2.VideoWriter(video_name, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
for image in images:
video.write(cv2.imread(os.path.join(image_folder, image)))
cv2.destroyAllWindows()
video.release()
path = "output/demo/01.02-15h40m11s/images" # 请替换为您的根目录路径
temp_folder = f"temp_images_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}" # 以防后续帧数太多,所以整个临时文件夹用于存储拼接后的图像
os.makedirs(temp_folder, exist_ok=True)
num=len(os.listdir(path))//6
for i in range(num):
img_path = f"{path}/{i:05d}_img.jpg"
mask_path = f"{path}/{i:05d}_mask.jpg"
render_path = f"{path}/{i:05d}_render.jpg"
heat_path = f"{path}/{i:05d}_heatmap.jpg"
img = cv2.imread(img_path)
mask = cv2.imread(mask_path)
render = cv2.imread(render_path)
heat = cv2.imread(heat_path)
combined_image_1 = np.hstack((img, heat))
combined_image_2 = np.hstack((mask, render))
combined_image = np.vstack((combined_image_1, combined_image_2))
# 保存拼接后的图像
cv2.imwrite(f"{temp_folder}/{i:05d}.jpg", combined_image)
# 创建视频
create_video(temp_folder, f"output_video_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}.mp4", 10) # 10 FPS
存在问题
生成的四类图像都没有回到原图的分辨率导致很模糊,pad的黑边也没有去,之后看看优化一下?
具体算法原理之后再说,大体上是基于分割剪影和预设UV图得到的蒙皮,关节点应该只是辅助
所以分割效果差的帧,生成的蒙皮也会很神秘
(虽说这个mask是有几层通道的,也就是说其实有类似sam会有区分前景后景的吧)
,所以这种错就很明显(包括有些只有人头像的帧也能脑补出来个蹲坐的蒙皮还挺逗的)
,但是有些时候手臂的抬起高度很明显却对不准,很怪def extract_square_roi(image, bbox):
x, y, width, height = bbox
cx, cy = x + width // 2, y + height // 2 # Center of the bbox
# Determine the side length of the square (max of width and height of the bbox)
side_length = max(width, height)
# Calculate the square's top left corner
x1 = int(cx - side_length // 2)
y1 = int(cy - side_length // 2)
# Calculate the square's bottom right corner
x2 = x1 + side_length
y2 = y1 + side_length
# Pad the image if necessary
top, bottom, left, right = 0, 0, 0, 0
if x1 < 0:
left = abs(x1)
if y1 < 0:
top = abs(y1)
if x2 > image.shape[1]:
right = int(x2 - image.shape[1])
if y2 > image.shape[0]:
bottom = int(y2 - image.shape[0])
padded_image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])
# Adjust coordinates for the padded image
x1_adj = int(x1 + left)
y1_adj = int(y1 + top)
x2_adj = int(x2 + left)
y2_adj = int(y2 + top)
# Crop the square ROI
square_roi = padded_image[y1_adj:y2_adj, x1_adj:x2_adj,:]
return square_roi
其实说到底,错误都是剪影造成的,就看蒙皮对剪影人体的理解怎样。所以还是需要结合一下骨架点啊,之后可能还得考虑一下时域连续性,这下就和步态识别很像了
不过有时蒙皮坐标对齐效果也有点问题,比如前面说的超大巨有时近景单人也会出现,或者缩的很小,还不太清楚这个对齐比例是怎么做到的,给我这个project的人还说前面的smpl是不能直接把蒙皮画到原图上的,还得是这种依赖uv图的方法
顺便readme里说可以改yaml里的fitting从而适用于smpl?但是尝试后发现报错
Traceback (most recent call last):
File "D:\Doctor\CVPR2020-OOH-master\demo.py", line 40, in <module>
main(**args)
File "D:\Doctor\CVPR2020-OOH-master\demo.py", line 34, in main
demo(model, test_loader, viz=viz, device=device)
File "D:\Doctor\CVPR2020-OOH-master\process.py", line 34, in demo
model.save_results_render(results, i)
File "D:\Doctor\CVPR2020-OOH-master\modules.py", line 226, in save_results_render
params, mesh = self.fitting(mesh)
File "D:\Doctor\CVPR2020-OOH-master\utils\fitting\SMPLfitting.py", line 20, in __call__
init_guess(self.setting, mesh)
File "D:\Doctor\CVPR2020-OOH-master\utils\fitting\utils.py", line 110, in init_guess
model_output = model(return_verts=True, return_full_pose=True, body_pose=init_pose)
File "E:\Anaconda3\envs\gait\lib\site-packages\torch\nn\modules\module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "D:\Doctor\CVPR2020-OOH-master\utils\fitting\smplx\body_models_scale.py", line 388, in forward
vertices, joints = lbs(betas, full_pose, self.v_template,
File "D:\Doctor\CVPR2020-OOH-master\utils\fitting\smplx\lbs.py", line 179, in lbs
v_shaped = v_template + blend_shapes(betas, shapedirs)
File "D:\Doctor\CVPR2020-OOH-master\utils\fitting\smplx\lbs.py", line 268, in blend_shapes
blend_shape = torch.einsum('bl,mkl->bmk', [betas, shape_disps])
File "E:\Anaconda3\envs\gait\lib\site-packages\torch\functional.py", line 297, in einsum
return einsum(equation, *_operands)
File "E:\Anaconda3\envs\gait\lib\site-packages\torch\functional.py", line 299, in einsum
return _VF.einsum(equation, operands) # type: ignore[attr-defined]
RuntimeError: einsum(): operands do not broadcast with remapped shapes [original->remapped]: [1, 10]->[1, 1, 1, 10] [6890, 300, 3]->[1, 6890, 300, 3]
总之就是betas和shape_disps这两个预设变量的维度不匹配
所以试着betas = betas.repeat(1, 30)
但是最终得到的结果和没有fitting的效果也差不多,明明变得那么超慢