上一话3D视觉——3.人体姿态估计(Pose Estimation) 算法对比 即 效果展示——MediaPipe与OpenPosehttps://blog.csdn.net/XiaoyYidiaodiao/article/details/125571632?spm=1001.2014.3001.5502这一章 讲述 使用MediaPipe的手势识别
1.solutions.hands
import mediapipe as mp
mp_hands = mp.solutions.hands
mediapipe手势模块(.solutions.hands)将手分成21个点(0-20)如下图1. ,可通过判断手势的角度,来识别是什么手势。8号关键点很重要,因为做HCI(人机交互)都是以8号关键点为要素。
2.mp_hand.Hands()
static_image_mode 表示 静态图像还是连续帧视频;
max_num_hands 表示 最多识别多少只手,手越多识别越慢;
min_detection_confidence 表示 检测置信度阈值;
min_tracking_confidence 表示 各帧之间跟踪置信度阈值;
hands = mp_hands.Hands(static_image_mode=True,
max_num_hands=2,
min_detection_confidence=0.5,
min_tracking_confidence=0.5)
3.mp.solutions.drawing_utils
绘图
draw = mp.solutions.drawing_utils
draw.draw_landmarks(img, hand, mp_hands.HAND_CONNECTIONS)
import cv2
import mediapipe as mp
import matplotlib.pyplot as plt
if __name__ == '__main__':
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True,
max_num_hands=2,
min_detection_confidence=0.5,
min_tracking_confidence=0.5)
draw = mp.solutions.drawing_utils
img = cv2.imread("3.jpg")
# Flip Horizontal
img = cv2.flip(img, 1)
# BGR to RGB
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = hands.process(img)
# detected hands
if results.multi_hand_landmarks:
for hand_idx, _ in enumerate(results.multi_hand_landmarks):
hand = results.multi_hand_landmarks[hand_idx]
draw.draw_landmarks(img, hand, mp_hands.HAND_CONNECTIONS)
plt.imshow(img)
plt.show()
import cv2
import mediapipe as mp
import matplotlib.pyplot as plt
if __name__ == "__main__":
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False,
max_num_hands=2,
min_detection_confidence=0.5,
min_tracking_confidence=0.5)
draw = mp.soultions.drawing_utils
img = cv2.imread("3.jpg")
img = cv2.filp(img, 1)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = hands.process(img)
print(results.multi_handedness)
显示置信度与左右手
print(results.multi_handedness)
# results.multi_handedness
结果如下,其中:label 表示 左右手;socre 表示 置信度。
[classification {
index: 0
score: 0.9686367511749268
label: "Left"
}
, classification {
index: 1
score: 0.9293265342712402
label: "Right"
}
]
Process finished with exit code 0
调用索引为1的置信度或者标签(左右手)
print(results.multi_handedness[1].classification[0].label)
# results.multi_handedness[1].classification[0].label
print(results.multi_handedness[1].classification[0].score)
# results.multi_handedness[1].classification[0].scor
结果如下
Right
0.9293265342712402
Process finished with exit code 0
关键点坐标
所有手指的关键点坐标都显示出来
print(results.multi_hand_landmarks)
# results.multi_hand_landmarks
结果如下,其中这个z轴不是真正的归一化或者真实距离,是与第0号关键点(手腕根部)相对的单位,所以这个在另外一个方面这也算是2.5D
[landmark {
x: 0.2627016007900238
y: 0.6694213151931763
z: 5.427047540251806e-07
}
landmark {
x: 0.33990585803985596
y: 0.6192424297332764
z: -0.03650109842419624
}
landmark {
x: 0.393616259098053
y: 0.5356684923171997
z: -0.052688632160425186
}
landmark {
x: 0.43515193462371826
y: 0.46728551387786865
z: -0.06730890274047852
}
landmark {
x: 0.47741779685020447
y: 0.4358704090118408
z: -0.08190854638814926
}
landmark {
x: 0.3136638104915619
y: 0.3786224126815796
z: -0.031268805265426636
}
landmark {
x: 0.32385849952697754
y: 0.2627469599246979
z: -0.05158748850226402
}
landmark {
x: 0.32598018646240234
y: 0.19216321408748627
z: -0.06765355914831161
}
landmark {
x: 0.32443925738334656
y: 0.13277988135814667
z: -0.08096525818109512
}
landmark {
x: 0.2579415738582611
y: 0.37382692098617554
z: -0.032878294587135315
}
landmark {
x: 0.2418910712003708
y: 0.24533957242965698
z: -0.052028004080057144
}
landmark {
x: 0.2304030954837799
y: 0.16545917093753815
z: -0.06916359812021255
}
landmark {
x: 0.2175263613462448
y: 0.10054746270179749
z: -0.08164216578006744
}
landmark {
x: 0.2122785747051239
y: 0.4006640613079071
z: -0.03896263241767883
}
landmark {
x: 0.16991624236106873
y: 0.29440340399742126
z: -0.06328895688056946
}
landmark {
x: 0.14282439649105072
y: 0.22940337657928467
z: -0.0850684642791748
}
landmark {
x: 0.12077423930168152
y: 0.17327186465263367
z: -0.09951525926589966
}
landmark {
x: 0.17542198300361633
y: 0.45321160554885864
z: -0.04842161759734154
}
landmark {
x: 0.11945687234401703
y: 0.3971719741821289
z: -0.07581596076488495
}
landmark {
x: 0.08262123167514801
y: 0.364059180021286
z: -0.0922597199678421
}
landmark {
x: 0.05292154848575592
y: 0.3294071555137634
z: -0.10204877704381943
}
, landmark {
x: 0.7470765113830566
y: 0.6501559019088745
z: 5.611524898085918e-07
}
landmark {
x: 0.6678823232650757
y: 0.5958800911903381
z: -0.02732565440237522
}
landmark {
x: 0.6151978969573975
y: 0.5073886513710022
z: -0.04038363695144653
}
landmark {
x: 0.5764827728271484
y: 0.4352443814277649
z: -0.05264268442988396
}
landmark {
x: 0.5317870378494263
y: 0.39630216360092163
z: -0.0660330131649971
}
landmark {
x: 0.6896227598190308
y: 0.36289966106414795
z: -0.0206490196287632
}
landmark {
x: 0.6775739192962646
y: 0.24553297460079193
z: -0.040212471038103104
}
landmark {
x: 0.6752403974533081
y: 0.1725052148103714
z: -0.0579310804605484
}
landmark {
x: 0.6765506267547607
y: 0.10879574716091156
z: -0.07280831784009933
}
landmark {
x: 0.7441056370735168
y: 0.35811278223991394
z: -0.027878733351826668
}
landmark {
x: 0.7618186473846436
y: 0.22912392020225525
z: -0.04549985006451607
}
landmark {
x: 0.7751038670539856
y: 0.14770638942718506
z: -0.06489630043506622
}
landmark {
x: 0.788221538066864
y: 0.07831943035125732
z: -0.08037586510181427
}
landmark {
x: 0.7893437743186951
y: 0.38241785764694214
z: -0.03979477286338806
}
landmark {
x: 0.8274303674697876
y: 0.27563461661338806
z: -0.06700978428125381
}
landmark {
x: 0.8543692827224731
y: 0.20750769972801208
z: -0.09000862389802933
}
landmark {
x: 0.877074122428894
y: 0.14460964500904083
z: -0.10605450719594955
}
landmark {
x: 0.8275361061096191
y: 0.4312361180782318
z: -0.05377437546849251
}
landmark {
x: 0.879731297492981
y: 0.3712681233882904
z: -0.08328921347856522
}
landmark {
x: 0.9155224561691284
y: 0.33470967411994934
z: -0.09889543056488037
}
landmark {
x: 0.9464077949523926
y: 0.297884464263916
z: -0.109173983335495
}
]
Process finished with exit code 0
统计手的数量
print(len(results.multi_hand_landmarks))
# len(results.multi_hand_landmarks)
结果如下
2
Process finished with exit code 0
获取索引为1的第3号关键点的坐标
print(results.multi_hand_landmarks[1].landmark[3])
# results.multi_hand_landmarks[1].landmark[3]
结果如下
x: 0.5764827728271484
y: 0.4352443814277649
z: -0.05264268442988396
Process finished with exit code 0
以上的x,y坐标是根据高和宽归一化之后的结果,将其转化为图像中坐标结果
height, width, _ = img.shape
print("height:{},width:{}".format(height, width))
cx = results.multi_hand_landmarks[1].landmark[3].x * width
cy = results.multi_hand_landmarks[1].landmark[3].y * height
print("cx:{}, cy:{}".format(cx, cy))
结果如下
height:512,width:720
cx:415.0675964355469, cy:222.84512329101562
Process finished with exit code 0
关键点之间的连接
print(mp_hands.HAND_CONNECTIONS)
# mp_hands.HAND_CONNECTIONS
结果如下
frozenset({(3, 4),
(0, 5),
(17, 18),
(0, 17),
(13, 14),
(13, 17),
(18, 19),
(5, 6),
(5, 9),
(14, 15),
(0, 1),
(9, 10),
(1, 2),
(9, 13),
(10, 11),
(19, 20),
(6, 7),
(15, 16),
(2, 3),
(11, 12),
(7, 8)})
Process finished with exit code 0
显示左右手信息
if results.multi_hand_landmarks:
hand_info = ""
for h_idx, hand in enumerate(results.multi_hand_landmarks):
hand_info += str(h_idx) + ":" + str(results.multi_handedness[h_idx].classification[0].label) + ", "
print(hand_info)
结果如下
0:Left, 1:Right,
Process finished with exit code 0
完整代码
import cv2
import mediapipe as mp
import matplotlib.pyplot as plt
if __name__ == '__main__':
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True,
max_num_hands=2,
min_detection_confidence=0.5,
min_tracking_confidence=0.5)
draw = mp.solutions.drawing_utils
img = cv2.imread("3.jpg")
img = cv2.flip(img, 1)
height, width, _ = img.shape
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = hands.process(img)
if results.multi_hand_landmarks:
handness_str = ''
index_finger_tip_str = ''
for h_idx, hand in enumerate(results.multi_hand_landmarks):
# get 21 key points of hand
hand_info = results.multi_hand_landmarks[h_idx]
# log left and right hands info
draw.draw_landmarks(img, hand_info, mp_hands.HAND_CONNECTIONS)
temp_handness = results.multi_handedness[h_idx].classification[0].label
handness_str += "{}:{} ".format(h_idx, temp_handness)
cz0 = hand_info.landmark[0].z
for idx, keypoint in enumerate(hand_info.landmark):
cx = int(keypoint.x * width)
cy = int(keypoint.y * height)
cz = keypoint.z
depth_z = cz0 - cz
# radius about depth_z
radius = int(6 * (1 + depth_z))
if idx == 0:
img = cv2.circle(img, (cx, cy), radius * 2, (0, 0, 255), -1)
elif idx == 8:
img = cv2.circle(img, (cx, cy), radius * 2, (193, 182, 255), -1)
index_finger_tip_str += '{}:{:.2f} '.format(h_idx, depth_z)
elif idx in [1, 5, 9, 13, 17]:
img = cv2.circle(img, (cx, cy), radius, (16, 144, 247), -1)
elif idx in [2, 6, 10, 14, 18]:
img = cv2.circle(img, (cx, cy), radius, (1, 240, 255), -1)
elif idx in [3, 7, 11, 15, 19]:
img = cv2.circle(img, (cx, cy), radius, (140, 47, 240), -1)
elif idx in [4, 12, 16, 20]:
img = cv2.circle(img, (cx, cy), radius, (223, 155, 60), -1)
img = cv2.putText(img, handness_str, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
img = cv2.putText(img, index_finger_tip_str, (10, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
plt.imshow(img)
plt.show()
这次就不用那个莫兰迪色了,朋友说我用了那个莫兰迪色系还不如本来的颜色明显。
运行程序
简化版代码
import cv2
import mediapipe as mp
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False,
max_num_hands=2,
min_tracking_confidence=0.5,
min_detection_confidence=0.5)
draw = mp.solutions.drawing_utils
def process_frame(img):
img = cv2.flip(img, 1)
height, width, _ = img.shape
results = hands.process(img)
if results.multi_hand_landmarks:
for h_idx, hand in enumerate(results.multi_hand_landmarks):
draw.draw_landmarks(img, hand, mp_hands.HAND_CONNECTIONS)
return img
if __name__ == '__main__':
cap = cv2.VideoCapture()
cap.open(0)
while cap.isOpened():
success, frame = cap.read()
if not success:
raise ValueError("Error")
frame = process_frame(frame)
cv2.imshow("hand", frame)
if cv2.waitKey(1) in [ord('q'), 27]:
break
cap.release()
cv2.destroyAllWindows()
优化版代码
import cv2
import mediapipe as mp
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False,
max_num_hands=2,
min_tracking_confidence=0.5,
min_detection_confidence=0.5)
draw = mp.solutions.drawing_utils
def process_frame(img):
img = cv2.flip(img, 1)
height, width, _ = img.shape
results = hands.process(img)
handness_str = ''
index_finger_tip_str = ''
if results.multi_hand_landmarks:
for h_idx, hand in enumerate(results.multi_hand_landmarks):
draw.draw_landmarks(img, hand, mp_hands.HAND_CONNECTIONS)
# get 21 key points of hand
hand_info = results.multi_hand_landmarks[h_idx]
# log left and right hands info
temp_handness = results.multi_handedness[h_idx].classification[0].label
handness_str += "{}:{} ".format(h_idx, temp_handness)
cz0 = hand_info.landmark[0].z
for idx, keypoint in enumerate(hand_info.landmark):
cx = int(keypoint.x * width)
cy = int(keypoint.y * height)
cz = keypoint.z
depth_z = cz0 - cz
# radius about depth_z
radius = int(6 * (1 + depth_z))
if idx == 0:
img = cv2.circle(img, (cx, cy), radius * 2, (0, 0, 255), -1)
elif idx == 8:
img = cv2.circle(img, (cx, cy), radius * 2, (193, 182, 255), -1)
index_finger_tip_str += '{}:{:.2f} '.format(h_idx, depth_z)
elif idx in [1, 5, 9, 13, 17]:
img = cv2.circle(img, (cx, cy), radius, (16, 144, 247), -1)
elif idx in [2, 6, 10, 14, 18]:
img = cv2.circle(img, (cx, cy), radius, (1, 240, 255), -1)
elif idx in [3, 7, 11, 15, 19]:
img = cv2.circle(img, (cx, cy), radius, (140, 47, 240), -1)
elif idx in [4, 12, 16, 20]:
img = cv2.circle(img, (cx, cy), radius, (223, 155, 60), -1)
img = cv2.putText(img, handness_str, (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
img = cv2.putText(img, index_finger_tip_str, (25, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
return img
if __name__ == '__main__':
cap = cv2.VideoCapture()
cap.open(0)
while cap.isOpened():
success, frame = cap.read()
if not success:
raise ValueError("Error")
frame = process_frame(frame)
cv2.imshow("hand", frame)
if cv2.waitKey(1) in [ord('q'), 27]:
break
cap.release()
cv2.destroyAllWindows()
此代码能正常运行,不展示运行结果!
简化版完整代码
import cv2
from tqdm import tqdm
import mediapipe as mp
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False,
max_num_hands=2,
min_tracking_confidence=0.5,
min_detection_confidence=0.5)
draw = mp.solutions.drawing_utils
def process_frame(img):
img = cv2.flip(img, 1)
height, width, _ = img.shape
results = hands.process(img)
if results.multi_hand_landmarks:
for h_idx, hand in enumerate(results.multi_hand_landmarks):
draw.draw_landmarks(img, hand, mp_hands.HAND_CONNECTIONS)
return img
def out_video(input):
file = input.split("/")[-1]
output = "out-" + file
print("It will start processing video: {}".format(input))
cap = cv2.VideoCapture(input)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
frame_size = (cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# create VideoWriter, VideoWriter_fourcc is video decode
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fps = cap.get(cv2.CAP_PROP_FPS)
out = cv2.VideoWriter(output, fourcc, fps, (int(frame_size[0]), int(frame_size[1])))
with tqdm(range(frame_count)) as pbar:
while cap.isOpened:
success, frame = cap.read()
if not success:
break
frame = process_frame(frame)
out.write(frame)
pbar.update(1)
pbar.close()
cv2.destroyAllWindows()
out.release()
cap.release()
print("{} finished!".format(output))
if __name__ == '__main__':
video_dirs = "7.mp4"
out_video(video_dirs)
运行结果
此视频是我在网上随便找来做实验的
手势识别
优化版完整代码
import cv2
import time
from tqdm import tqdm
import mediapipe as mp
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False,
max_num_hands=2,
min_tracking_confidence=0.5,
min_detection_confidence=0.5)
draw = mp.solutions.drawing_utils
def process_frame(img):
t0 = time.time()
img = cv2.flip(img, 1)
height, width, _ = img.shape
results = hands.process(img)
handness_str = ''
index_finger_tip_str = ''
if results.multi_hand_landmarks:
for h_idx, hand in enumerate(results.multi_hand_landmarks):
draw.draw_landmarks(img, hand, mp_hands.HAND_CONNECTIONS)
# get 21 key points of hand
hand_info = results.multi_hand_landmarks[h_idx]
# log left and right hands info
temp_handness = results.multi_handedness[h_idx].classification[0].label
handness_str += "{}:{} ".format(h_idx, temp_handness)
cz0 = hand_info.landmark[0].z
for idx, keypoint in enumerate(hand_info.landmark):
cx = int(keypoint.x * width)
cy = int(keypoint.y * height)
cz = keypoint.z
depth_z = cz0 - cz
# radius about depth_z
radius = int(6 * (1 + depth_z))
if idx == 0:
img = cv2.circle(img, (cx, cy), radius * 2, (0, 0, 255), -1)
elif idx == 8:
img = cv2.circle(img, (cx, cy), radius * 2, (193, 182, 255), -1)
index_finger_tip_str += '{}:{:.2f} '.format(h_idx, depth_z)
elif idx in [1, 5, 9, 13, 17]:
img = cv2.circle(img, (cx, cy), radius, (16, 144, 247), -1)
elif idx in [2, 6, 10, 14, 18]:
img = cv2.circle(img, (cx, cy), radius, (1, 240, 255), -1)
elif idx in [3, 7, 11, 15, 19]:
img = cv2.circle(img, (cx, cy), radius, (140, 47, 240), -1)
elif idx in [4, 12, 16, 20]:
img = cv2.circle(img, (cx, cy), radius, (223, 155, 60), -1)
img = cv2.putText(img, handness_str, (25, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
img = cv2.putText(img, index_finger_tip_str, (25, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
fps = 1 / (time.time() - t0)
img = cv2.putText(img, "FPS:" + str(int(fps)), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
return img
def out_video(input):
file = input.split("/")[-1]
output = "out-optim-" + file
print("It will start processing video: {}".format(input))
cap = cv2.VideoCapture(input)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
frame_size = (cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# create VideoWriter, VideoWriter_fourcc is video decode
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fps = cap.get(cv2.CAP_PROP_FPS)
out = cv2.VideoWriter(output, fourcc, fps, (int(frame_size[0]), int(frame_size[1])))
with tqdm(range(frame_count)) as pbar:
while cap.isOpened:
success, frame = cap.read()
if not success:
break
frame = process_frame(frame)
out.write(frame)
pbar.update(1)
pbar.close()
cv2.destroyAllWindows()
out.release()
cap.release()
print("{} finished!".format(output))
if __name__ == '__main__':
video_dirs = "7.mp4"
out_video(video_dirs)
运行结果
手势识别优化
未完...