google手势检测

 

模型:

https://github.com/google/mediapipe/tree/294687295d502f1bec3fe140c25538adefa11a14/mediapipe/models

原文:https://github.com/Yuki03759/utils_python/blob/5c817f8b9dd44748f315eb63e9f791d0df11732e/tflite/main.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import numpy as np
import tensorflow as tf
from PIL import Image
import cv2
import matplotlib.pyplot as plt


def visualize(output_data, output_picture):
    x, y = [], []
    for idx in range(0, output_data.shape[1], 2):
        x.append(int(output_data[0][idx]))
        y.append(-int(output_data[0][idx + 1]))

    plt.xlim(0, 1000)
    plt.ylim(-1000, 0)
    for i in range(len(x)):
        print('plotting : ', x[i], y[i])
        plt.plot(x[i], y[i], "o")
    plt.show()
    # plt.savefig(output_picture)


def img_resize(picture):
    # img = cv2.imread(picture)
    img = cv2.resize(picture, dsize=(256, 256))
    # img = cv2.imwrite(picture, img)
    # img = np.array(Image.open(picture), np.float32)
    img=np.array(img, np.float32)
    img = img[None, ...]
    return img


class tflite:
    def __init__(self):
        # モデルの読み込み
        self.interpreter = tf.lite.Interpreter(model_path=MODEL)
        # メモリの確保
        self.interpreter.allocate_tensors()

    def inference(self, img):
        # 学習モデルの入力層と出力層のプロパティを入手
        input_details = self.interpreter.get_input_details()
        output_details = self.interpreter.get_output_details()

        # 入力層のテンソルデータ構成の取得
        input_shape = input_details[0]['shape']
        input_data = img
        self.interpreter.set_tensor(input_details[0]['index'], input_data)

        # 推論
        self.interpreter.invoke()

        # 出力層のテンソルデータ
        output_data = self.interpreter.get_tensor(output_details[0]['index'])

        return output_data


if __name__ == "__main__":
    MODEL_NAME = ['hand_landmark.tflite', 'hand_landmark_3d.tflite', 'face_detection_front.tflite',
                  'palm_detection.tflite']
    MODEL = MODEL_NAME[0]
    PICTURE_NAME = ['hand0', 'hand1', 'hand2', 'hand3', 'hand4']
    PICTURE = PICTURE_NAME[0]

    tflite_class = tflite()

    cam= cv2.VideoCapture(0)

    index=0
    while True:
        ret,picture=cam.read()

        picture = cv2.resize(picture, dsize=(256, 256))
        # img = cv2.imwrite(picture, img)
        # img = np.array(Image.open(picture), np.float32)
        img = np.array(picture, np.float32)
        img = img[None, ...]
        output_data = tflite_class.inference(img)

        for idx in range(0, output_data.shape[1], 2):
            cv2.circle(picture,(int(output_data[0][idx]*0.256),int(output_data[0][idx + 1]*0.256)),5,(0,0,254),1)
        cv2.imshow("hand",picture)
        cv2.waitKey(1)
            # x.append(int(output_data[0][idx]))
            # y.append(-int(output_data[0][idx + 1]))
        # visualize(output_data, output_picture)

 

你可能感兴趣的:(python,深度学习基础)