记录::关键点检测数据转化和可视化LSP、FLIC转yolov8-pose的txt

最近想试一下关键点检测的效果,先从yolov8-pose开始,不想跑coco那么大的数据集,就找了两个比较小的

yolov8-pose的txt数据格式如下:

        类别、box、节点,数据做了归一化

记录::关键点检测数据转化和可视化LSP、FLIC转yolov8-pose的txt_第1张图片

可视化只显示了点,没有连线

参数:

        mat_path 是 数据集mat文件所在地址,包含mat文件名
        image_path 是 数据集图像的地址,不包含图像名
        save_path 是 转换为txt后保存的地址
        save_path 是可视化结果保存的地址

1、lsp

LSP:运动场景,单人数据集,截取后的单人区域,图片很小,2000张图片,14个节点

def save_joints_lsp(mat_path, image_path, save_path,save_path1):
    """
    mat_path 是 lsp数据集mat文件所在地址,包含mat文件名
    image_path 是 lsp数据集图像的地址,不包含图像名
    save_path 是 转换为txt后保存的地址
    save_path 是可视化结果保存的地址
    lsp数据集共2000张图片
    """
    joints = loadmat(mat_path)
    joints = joints["joints"].transpose(2, 0, 1)
    joints = joints[:, :, :]

    #num = 0
    for img_path in glob.glob("%s/*.jpg" % image_path):
        img_name = img_path.split("/")[-1].split(".")[0]
        img = Image.open(img_path)
        img = np.array(img, dtype=np.uint8)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        imgh, imgw = img.shape[:2]
        num = int(img_name[2:])
        cen_points = joints[num-1, ...]

        points_num = cen_points.shape[-1]
        point_dict = {}
        ps = []
        for points_ in range(points_num):
            point_x = cen_points[0, points_]
            point_y = cen_points[1, points_]
            vi = cen_points[2, points_]
            if vi==0:
                vi = 2.0
            elif vi==2:
                print(name)
            point_dict[str(points_)] = [point_x/imgw, point_y/imgh,vi]
            # cv2.circle(img, (int(point_x), int(point_y)), 5, colors[points_],
            #                   thickness=-1)

            ps.append([int(point_x), int(point_y)])
        # x, y, w, h = cv2.boundingRect(np.array([ps]))
        # x = (x+w/2)/imgw
        # y = (y+h/2)/imgh
        # w = (w+6)/imgw
        # h = (h+6)/imgh
        x =0.5
        y = 0.5
        w =1
        h=1

        with open(os.path.join(save_path, img_name + ".txt"), "w") as f:
            f.write(str(0)+" "+str(x)+" "+str(y)+" "+str(w)+" "+str(h))
            cv2.rectangle(img,(int(x*imgw-w*imgw/2),int(y*imgh-h*imgh/2)),
                          (int(x*imgw+w*imgw/2),int(y*imgh+h*imgh/2)),
                          (0,0,255),5)
            for i in point_dict:
                p = point_dict[i]
                f.write(" "+str(p[0]) + " " + str(p[1]) + " " + str(p[2]))
                cv2.circle(img, (int(p[0]*imgw), int(p[1]*imgh)), 5, colors[points_],
                           thickness=-1)
            f.write("\n")



            #img_txt.write(str(point_dict))
        f.close()
        #num += 1
        # 若不想看图片中关键点的位置是否准确,请注释掉后面两行
        # cv2.imshow("img", img)
        # cv2.waitKey()
        cv2.imwrite(save_path1+"/"+img_name+".jpg",img)

2、FLIC

FLIC:电影场景,多人情况下label也只有单人,labels有29个节点,大多为nan,选了9个节点

def save_flic(mat_path, image_path, save_path,save_path1):
    examples = loadmat(mat_path)
    examples = examples["examples"][0]
    joint_ids = ['lsho', 'lelb', 'lwri', 'rsho', 'relb', 'rwri', 'lhip',
                 'lkne', 'lank', 'rhip', 'rkne', 'rank', 'leye', 'reye',
                 'lear', 'rear', 'nose', 'msho', 'mhip', 'mear', 'mtorso',
                 'mluarm', 'mruarm', 'mllarm', 'mrlarm', 'mluleg', 'mruleg',
                 'mllleg', 'mrlleg']
    available = ['lsho', 'lelb', 'lwri', 'rsho', 'relb', 'rwri', 'lhip','rhip', 'head']

    for i, example in enumerate(examples):
        joint = example[2].T
        img_name = example[3][0]
        joints = dict(zip(joint_ids, joint))

        img =cv2.imread(image_path+"/"+img_name)
        img_name = img_name.split(".")[0]
        imgh, imgw = img.shape[:2]
        point_dict = {}
        ps = []
        head = np.asarray(joints['reye']) + \
               np.asarray(joints['leye']) + \
               np.asarray(joints['nose'])
        head /= 3
        joints['head'] = head.tolist()
        for name in available:
            #joint_pos.append(joints[name])
            point = joints[name]
            point_dict[name] = [point[0]/imgw, point[1]/imgh,2.0]
            ps.append([int(point[0]), int(point[1])])
        x, y, w, h = cv2.boundingRect(np.array([ps]))
        x = (x+w/2)/imgw
        y = (y+h/2)/imgh
        w = (w+20)/imgw
        h = (h+20)/imgh
        with open(os.path.join(save_path, img_name + ".txt"), "w") as f:
            f.write(str(0) + " " + str(x) + " " + str(y) + " " + str(w) + " " + str(h))
            cv2.rectangle(img, (int(x * imgw - w * imgw / 2), int(y * imgh - h * imgh / 2)),
                          (int(x * imgw + w * imgw / 2), int(y * imgh + h * imgh / 2)),
                          (0, 0, 255), 5)
            c =0
            for i in point_dict:
                p = point_dict[i]
                f.write(" " + str(p[0]) + " " + str(p[1]) + " " + str(p[2]))
                cv2.circle(img, (int(p[0] * imgw), int(p[1] * imgh)), 5, colors[c],
                           thickness=-1)
            f.write("\n")
            c = c+1
            # img_txt.write(str(point_dict))
        f.close()

        # num += 1
        # 若不想看图片中关键点的位置是否准确,请注释掉后面两行
        # cv2.imshow("img", img)
        # cv2.waitKey()
        cv2.imwrite(save_path1 + "/" + img_name + ".jpg", img)

完整代码:https://github.com/ziyaoma/detect-pose

参考:LSP数据集与MPII数据集标签转txt文件(字典形式储存)_mpii标注文件修改成txt-CSDN博客

https://github.com/Fangyh09/PoseDatasets

你可能感兴趣的:(记录,YOLO,pose,数据集)