如果有问题欢迎评论或私信。
使用opencv实现人体关键点检测,并实现仰卧起坐计数,博客最后分别提供带界面和不带界面代码下载链接。
人体姿态估计仰卧起坐
代码地址(无界面版):
链接:https://pan.baidu.com/s/1rvhcNLRLSPFV84snIWYHyQ
提取码:k5ab
下面是部分训练代码
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import argparse
import cv2
import torch
from torch.nn import DataParallel
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from datasets.coco import CocoTrainDataset
from datasets.transformations import ConvertKeypoints, Scale, Rotate, CropPad, Flip
from modules.get_parameters import get_parameters_conv, get_parameters_bn, get_parameters_conv_depthwise
from models.with_mobilenet import PoseEstimationWithMobileNet
from modules.loss import l2_loss
from modules.load_state import load_state
from val import evaluate
cv2.setNumThreads(0)
cv2.ocl.setUseOpenCL(False) # To prevent freeze of DataLoader
def train(prepared_train_labels, train_images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter,
num_workers, checkpoint_path, weights_only, checkpoints_folder, log_after,
val_labels, val_images_folder, val_output_name, checkpoint_after, val_after):
net = PoseEstimationWithMobileNet(num_refinement_stages)
stride = 8
sigma = 7
path_thickness = 1
dataset = CocoTrainDataset(prepared_train_labels, train_images_folder,
stride, sigma, path_thickness,
transform=transforms.Compose([
ConvertKeypoints(),
Scale(),
Rotate(pad=(128, 128, 128)),
CropPad(pad=(128, 128, 128)),
Flip()]))
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
optimizer = optim.Adam([
{'params': get_parameters_conv(net.model, 'weight')},
{'params': get_parameters_conv_depthwise(net.model, 'weight'), 'weight_decay': 0},
{'params': get_parameters_bn(net.model, 'weight'), 'weight_decay': 0},
{'params': get_parameters_bn(net.model, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0},
{'params': get_parameters_conv(net.cpm, 'weight'), 'lr': base_lr},
{'params': get_parameters_conv(net.cpm, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0},
{'params': get_parameters_conv_depthwise(net.cpm, 'weight'), 'weight_decay': 0},
{'params': get_parameters_conv(net.initial_stage, 'weight'), 'lr': base_lr},
{'params': get_parameters_conv(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0},
{'params': get_parameters_conv(net.refinement_stages, 'weight'), 'lr': base_lr * 4},
{'params': get_parameters_conv(net.refinement_stages, 'bias'), 'lr': base_lr * 8, 'weight_decay': 0},
{'params': get_parameters_bn(net.refinement_stages, 'weight'), 'weight_decay': 0},
{'params': get_parameters_bn(net.refinement_stages, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0},
], lr=base_lr, weight_decay=5e-4)
num_iter = 0
current_epoch = 0
drop_after_epoch = [100, 200, 260]
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=drop_after_epoch, gamma=0.333)
if checkpoint_path:
checkpoint = torch.load(checkpoint_path)
load_state(net, checkpoint)
print("load : {}".format(checkpoint_path))
net = net.cuda()
net.train()
flag_start = False
for epochId in range(current_epoch, 280):
if flag_start:
scheduler.step()
flag_start = True
total_losses = [0, 0] * (num_refinement_stages + 1) # heatmaps loss, paf loss per stage
batch_per_iter_idx = 0
for batch_data in train_loader:
if batch_per_iter_idx == 0:
optimizer.zero_grad()
images = batch_data['image'].cuda()
keypoint_masks = batch_data['keypoint_mask'].cuda()
paf_masks = batch_data['paf_mask'].cuda()
keypoint_maps = batch_data['keypoint_maps'].cuda()
paf_maps = batch_data['paf_maps'].cuda()
stages_output = net(images)
losses = []
for loss_idx in range(len(total_losses) // 2):
losses.append(l2_loss(stages_output[loss_idx * 2], keypoint_maps, keypoint_masks, images.shape[0]))
losses.append(l2_loss(stages_output[loss_idx * 2 + 1], paf_maps, paf_masks, images.shape[0]))
total_losses[loss_idx * 2] += losses[-2].item() / batches_per_iter
total_losses[loss_idx * 2 + 1] += losses[-1].item() / batches_per_iter
loss = losses[0]
for loss_idx in range(1, len(losses)):
loss += losses[loss_idx]
loss /= batches_per_iter
loss.backward()
batch_per_iter_idx += 1
if batch_per_iter_idx == batches_per_iter:
optimizer.step()
batch_per_iter_idx = 0
num_iter += 1
else:
continue
if num_iter % log_after == 0:
print('Iter: {}'.format(num_iter))
for loss_idx in range(len(total_losses) // 2):
print('\n'.join(['stage{}_pafs_loss: {}', 'stage{}_heatmaps_loss: {}']).format(
loss_idx + 1, total_losses[loss_idx * 2 + 1] / log_after,
loss_idx + 1, total_losses[loss_idx * 2] / log_after))
for loss_idx in range(len(total_losses)):
total_losses[loss_idx] = 0
if num_iter % checkpoint_after == 0:
snapshot_name = '{}/light_pose-iter_{}.pth'.format(checkpoints_folder, num_iter)
torch.save(net.state_dict(),snapshot_name)
if num_iter % val_after == 0:
print('Validation...')
evaluate(val_labels, val_output_name, val_images_folder, net)
net.train()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--prepared-train-labels', type=str, default='prepared_train_annotation.pkl',
help='path to the file with prepared annotations')
parser.add_argument('--train-images-folder', type=str, default='./coco2017/train2017/', help='path to COCO train images folder')
parser.add_argument('--num-refinement-stages', type=int, default=3, help='number of refinement stages')
parser.add_argument('--base-lr', type=float, default=4e-5, help='initial learning rate')
parser.add_argument('--batch-size', type=int, default=8, help='batch size')
parser.add_argument('--batches-per-iter', type=int, default=1, help='number of batches to accumulate gradient from')
parser.add_argument('--num-workers', type=int, default=8, help='number of workers')
parser.add_argument('--checkpoint-path', type=str, default='./finetune_model/light_pose.pth', help='path to the checkpoint to continue training from')
parser.add_argument('--weights-only', type=bool,default=True,
help='just initialize layers with pre-trained weights and start training from the beginning')
parser.add_argument('--experiment-name', type=str, default='light_pose',
help='experiment name to create folder for checkpoints')
parser.add_argument('--log-after', type=int, default=100, help='number of iterations to print train loss')
parser.add_argument('--val-labels', type=str, default='val_subset.json', help='path to json with keypoints val labels')
parser.add_argument('--val-images-folder', type=str, default='./coco2017/val2017/', help='path to COCO val images folder')
parser.add_argument('--val-output-name', type=str, default='detections.json',
help='name of output json file with detected keypoints')
parser.add_argument('--checkpoint-after', type=int, default=1000,
help='number of iterations to save checkpoint')
parser.add_argument('--val-after', type=int, default=10000,
help='number of iterations to run validation')
args = parser.parse_args()
checkpoints_folder = '{}_checkpoints'.format(args.experiment_name)
if not os.path.exists(checkpoints_folder):
os.makedirs(checkpoints_folder)
train(args.prepared_train_labels, args.train_images_folder, args.num_refinement_stages, args.base_lr, args.batch_size,
args.batches_per_iter, args.num_workers, args.checkpoint_path, args.weights_only,
checkpoints_folder, args.log_after, args.val_labels, args.val_images_folder, args.val_output_name,
args.checkpoint_after, args.val_after)
界面代码
import sys, cv2
from PyQt5.QtGui import *
from PyQt5.QtWidgets import *
from PyQt5.QtCore import *
from PyQt5 import QtGui,QtWidgets
import sys, cv2, time
from untitled import Ui_Form
import cv2 as cv
import numpy as np
from inference_video import light_pose_model,draw_one_pose
from win32com.client import Dispatch
from threading import Thread
class Mywindow(QtWidgets.QWidget,Ui_Form): #这个窗口继承了用QtDesignner 绘制的窗口
def __init__(self):
super(Mywindow,self).__init__()
self.setupUi(self)
#加载模型
self.model_path = "light_pose-20210519.pth"
self.model_pose = light_pose_model(model_path=self.model_path, heatmaps_thr=0.08) # 定义模型推理类
self.radioButton.setChecked(True)
self.progressBar.setValue(0) # 设置进度条的最小值
self.start=False
self.stop=True
#设置背景图片
self.use_palette()
self.tang = False
self.tang2 = False
self.qi = False
self.count = 0
self.static=""
#设置定时器
self._timer = QTimer(self)
self._timer.timeout.connect(self.play)
self.speaker = Dispatch('SAPI.SpVoice')
#选择是否使用gpu
self.radioButton.toggled.connect(self.gpu_cpu)
self.pushButton.clicked.connect(self.video)
self.pushButton_2.clicked.connect(self.star)
self.pushButton_3.clicked.connect(self.stops)
def star(self):
self.start=True
self.stop=False
def stops(self):
self.start = False
self.stop = True
def gpu_cpu(self):
if self.radioButton.isChecked():
self.model_pose = light_pose_model(model_path=self.model_path, heatmaps_thr=0.08)
else:
self.model_pose = light_pose_model(model_path=self.model_path, heatmaps_thr=0.08,use_cpu=True)
#设置背景图片函数
def use_palette(self):
self.setWindowTitle("仰卧起坐计数")
window_pale = QtGui.QPalette()
window_pale.setBrush(self.backgroundRole(),
QtGui.QBrush(QtGui.QPixmap(r"./back.jpg")))
self.setPalette(window_pale)
#将opencv格式图片转成pyqt5格式图片
def cv_qt(self,src):
if len(src.shape)==2:
src = np.expand_dims(src, 2)
src = src.repeat(3, axis=2)
h,w,d=src.shape
bytesperline=d*w
# self.src=cv.cvtColor(self.src,cv.COLOR_BGR2RGB)
qt_image=QImage(src.data,w,h,bytesperline,QImage.Format_RGB888).rgbSwapped()
return qt_image
#选择视频函数
def video(self):
v, videoType = QFileDialog.getOpenFileName(self,
"选择视频",
"",
" *.mp4;;*.avi;;All Files (*)")
self.ved = cv2.VideoCapture(v)
qq, bb = self.ved.read()
if qq == False:
msg_box = QMessageBox(QMessageBox.Warning, 'Warning', '请选择视频!')
msg_box.exec_()
return
self.start = False
self.stop = True
self.num_pic = self.ved.get(cv.CAP_PROP_FRAME_COUNT)
self.now_pic=0
self.count=0
self.label_5.setText(str(self.count))
self._timer.start(1)
def play(self):
#读取视频
r, self.frame = self.ved.read()
if not r:
return
#将图像旋转90度
self.frame=np.rot90(self.frame)[::-1,:,:]
#当前时间
start=time.time()
#检测图片
self.frame=self.detection(self.frame)
self.now_pic+=1
self.progressBar.setValue(self.now_pic/self.num_pic*100+1)
#检测结束以后时间
end = time.time()
#计数fps
fps=round(1/(end-start),3)
#将fps显示到界面
self.label_7.setText(str(fps))
#将当前状态显示到界面
self.label_3.setText(self.static)
#将仰卧起坐个数显示到界面
self.label_5.setText(str(self.count))
self.frame = np.rot90(self.frame)[::-1,:,:]
#缩放图像
self.frame=self.img_resize(self.frame)
#将图像转成pyqt5格式
self.frame=self.cv_qt(self.frame)
#将图像显示到窗口上
self.label.setPixmap(QPixmap.fromImage(self.frame))
#改变图片宽高,使其适应窗口大小函数
def img_resize(self,img):
#获取窗口的宽高
h=self.label.height()
w=self.label.width()
if img.shape[0]>=img.shape[1]:
rate=h/img.shape[0]
#缩放尺寸
src=cv.resize(img,(int(img.shape[1]*rate),int(img.shape[0]*rate)))
else:
rate = w / img.shape[1]
# 缩放尺寸
src = cv.resize(img, (int(img.shape[1] * rate),int(img.shape[0] * rate)))
return src
#识别函数
def detection(self,img):
#复制图片
img2=img.copy()
#获取预测结果
if self.start:
pose_dict = self.model_pose.predict(img.copy())
else:
time.sleep(0.03)
return img
if pose_dict is not None:
for pose in pose_dict['data']:
#获取矩形框
bbox = pose['bbox']
#获取关键点中为-1的个数
pp = pose['keypoints'].count((-1, -1))
if pp <= 8:
#画出矩形框
cv2.rectangle(img2, (int(bbox[0]), int(bbox[1])),
(int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])), (25, 155, 255), 2)
#画出关键点和连线
draw_one_pose(img2, np.array(pose['keypoints']),
(int(pose['color'][0]), int(pose['color'][1]), int(pose['color'][2])))
#判断当前状态
rate = bbox[3] / bbox[2]
if rate >= 2:
self.static="DOWN"
self.tang = True
self.tang2 = True
self.qi = False
else:
self.static="UP"
self.qi = True
self.tang = False
if self.qi and self.tang2:
self.count += 1
#使用线程播放语音
t1 = Thread(target=self.read, args=(str(self.count) + "个",))
t1.start()
self.tang2 = False
print(self.count)
# cv2.putText(img2, str(self.count), (200, 200),
# cv2.FONT_HERSHEY_COMPLEX, 2, (0, 0, 255), 1)
return img2
#播放语音函数
def read(self,num):
self.speaker.Speak(num)
if __name__ == '__main__':
app = QApplication(sys.argv)
win = Mywindow()
win.show()
sys.exit(app.exec_())
整体项目下载地址(含界面)