数字图像处理-基于opencv和kreas的手势(手语)识别

基于opencv和kreas的手势(手语)识别

代码为实验需要,具体使用需要修改
tensorflow教程(推荐看这个)
内容一:裁剪视频,获取手势

import cv2
import numpy as np
import os
import shutil


# 皮肤检测
def pi(res):
    y_cr_cb = cv2.cvtColor(res, cv2.COLOR_BGR2YCR_CB)  # 转换至YCrCb空间
    (y, cr, cb) = cv2.split(y_cr_cb)  # 拆分出Y,Cr,Cb值
    cr1 = cv2.GaussianBlur(cr, (5, 5), 0)
    _, skin = cv2.threshold(cr1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)  # Oust处理
    res = cv2.bitwise_and(res, res, mask=skin)
    kernel = np.ones((3, 3), np.uint8)  # 设置卷积核
    erosion = cv2.erode(res, kernel)  # 腐蚀操作
    res = cv2.dilate(erosion, kernel)  # 膨胀操作
    return res


def find(root, file):
    v_path = root + "/" + file
    # 读取视频帧
    if os.path.exists('handImg1'):
        shutil.rmtree('handImg1')
    os.mkdir('handImg1')
    if os.path.exists('handImg2'):
        shutil.rmtree('handImg2')
    os.mkdir('handImg2')
    num = 0
    total = 0
    cap = cv2.VideoCapture(v_path)
    while cap.isOpened():
        ret, frame = cap.read()
        total += 1
        if ret:
            if total % 5 == 0:
                cv2.imwrite('./handImg1/%d.png' % num, frame)
                frame = pi(frame)
                cv2.imwrite('./handImg2/%d.png' % num, frame)
                num += 1
                # 显示
            #     cv2.imshow('a', frame)
            # if cv2.waitKey(20) & 0xFF == ord('q'):
            #     break
        else:
            break
    # 手势位置处理
    if os.path.exists(root + "/" + file[:10]):
        shutil.rmtree(root + "/" + file[:10])
    os.mkdir(root + "/" + file[:10])
    all_p = num
    for i in range(1, all_p - 1):
        im_out = cv2.imread('./handImg1/%d.png' % i)
        im_size = im_out.shape
        h = im_size[0]
        w = im_size[1]
        im1 = cv2.imread('./handImg2/%d.png' % (i - 1))
        im1 = cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY)
        im2 = cv2.imread('./handImg2/%d.png' % i)
        im2 = cv2.cvtColor(im2, cv2.COLOR_BGR2GRAY)
        im3 = cv2.imread('./handImg2/%d.png' % (i + 1))
        im3 = cv2.cvtColor(im3, cv2.COLOR_BGR2GRAY)
        im12 = cv2.absdiff(im1, im2)
        _, thresh1 = cv2.threshold(im12, 40, 255, cv2.THRESH_BINARY)
        im23 = cv2.absdiff(im2, im3)
        _, thresh2 = cv2.threshold(im23, 40, 255, cv2.THRESH_BINARY)
        thresh1 = cv2.medianBlur(thresh1, 9)
        thresh2 = cv2.medianBlur(thresh2, 9)
        k = np.ones((33, 33), np.uint8)
        thresh1 = cv2.morphologyEx(thresh1, cv2.MORPH_CLOSE, k)
        thresh2 = cv2.morphologyEx(thresh2, cv2.MORPH_CLOSE, k)
        binary = cv2.bitwise_and(thresh1, thresh2)
        # 画框
        contours, hierarchy = cv2.findContours(binary, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
        if len(contours) != 0:
            rect = cv2.minAreaRect(contours[0])
            points = cv2.boxPoints(rect)
            im2 = cv2.drawContours(im2, [points.astype(int)], 0, (255, 255, 255), 2)
            get_point = points.astype(int)
            x_list = [get_point[0][0], get_point[1][0], get_point[2][0], get_point[3][0]]
            y_list = [get_point[0][1], get_point[1][1], get_point[2][1], get_point[3][1]]
            # 框大小阈值
            test3 = (max(y_list) - min(y_list)) * (max(x_list) - min(x_list))
            test_len = 20  # 框的扩充像素
            if min(y_list) < test_len:
                min_y = 0
            else:
                min_y = min(y_list) - test_len
            if min(x_list) < test_len:
                min_x = 0
            else:
                min_x = min(x_list) - test_len
            if max(y_list) > h - test_len:
                max_y = h
            else:
                max_y = max(y_list) + test_len
            if max(x_list) > w - test_len:
                max_x = w
            else:
                max_x = max(x_list) + test_len
            im_save = im_out[min_y:max_y, min_x:max_x]
            if test3 > 300:
                im_save = cv2.resize(im_save, (300, 300))
                s_path = root + "\\" + file[:10] + '\\%d.png' % i
                # 保存图片,有中文路径
                cv2.imencode('.png', im_save)[1].tofile(s_path)
                # cv2.imwrite(s_path, im_save)
        # 显示
        # cv2.imshow('a', im2)
        # if cv2.waitKey(20) & 0xFF == ord('q'):
        #     break
    cap.release()
    cv2.destroyAllWindows()


def b_li(path):
    # (root,dirs,files)分别为:遍历的文件夹,遍历的文件夹下的所有文件夹,遍历的文件夹下的所有文件
    for root, dirs, files in os.walk(path):
        for file in files:
            if file[11:] == 'avi':
                print("正在处理 " + root + " 下的 " + file)
                # 手势寻找
                find(root, file)
        for dir_in in dirs:
            b_li(dir_in)


if __name__ == '__main__':
    all_path = r'C:\Users\dell\桌面\演示\021'
    b_li(all_path)

内容二:使用图片搭建训练集和测试集
:需要手动删除截取效果不好的图片

import os
import numpy as np
import cv2
import h5py
import random


# 加载数据集中的文件
def save_image_to_h5py(path):
    # names = ['你', '的', '是', '很', '高兴']
    names = [0, 1, 2, 3, 4]
    img_list = []
    label_list = []
    test_img_list = []
    test_label_list = []
    dir_counter = 0
    # 数据集占比
    test_num = 0.8
    for child_dir in os.listdir(path):
        # 拼接打乱图片用
        x = []
        y = []
        child_path = os.path.join(path, child_dir)
        # print('文件中的子文件名是:\n', child_path)
        if os.path.isdir(child_path):
            for dir_image in os.listdir(child_path):
                # print('dir_image中图像的名称是:\n', dir_image)
                img = cv2.imdecode(np.fromfile(os.path.join(child_path, dir_image), dtype=np.uint8), 1)
                # img =cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)#单通道,分辨率会下降
                img = cv2.resize(img, (64, 64))

                # 扩充数据集
                j_r = cv2.getRotationMatrix2D((32, 32), -45, 1)
                img_r = cv2.warpAffine(img, j_r, (64, 64))
                # cv2.imshow('img_r', img_r)
                # cv2.waitKey()
                j_l = cv2.getRotationMatrix2D((32, 32), 45, 1)
                img_l = cv2.warpAffine(img, j_l, (64, 64))
                # cv2.imshow('img_l', img_l)
                # cv2.waitKey()

                for im in [img, img_l, img_r]:
                    a = random.random()
                    if a < test_num:
                        x.append(im)
                        # .encode()
                        label_list.append(names[dir_counter])
                    else:
                        y.append(im)
                        # .encode()
                        test_label_list.append(names[dir_counter])
            # 打乱后添加
            random.shuffle(x)
            random.shuffle(y)
            img_list.extend(x)
            test_img_list.extend(y)
            # 返回的img_list转成了 np.array的格式
            dir_counter += 1

    img_np = np.array(img_list)
    label_np = np.array(label_list)
    test_img_np = np.array(test_img_list)
    test_label_np = np.array(test_label_list)
    print('训练集中原始的标签顺序是:\n', label_np, len(label_np))
    # print('测试集中原始的标签顺序是:\n', test_label_np, len(test_label_np))

    print('训练集占比:', len(label_np) / (len(label_np) + len(test_label_np)))
    # 写入h5文件
    f_train = h5py.File(r'C:\Users\dell\桌面\train\train.h5', 'w')
    f_train['train_x'] = img_np
    f_train['train_y'] = label_np
    f_train.close()
    f_test = h5py.File(r'C:\Users\dell\桌面\train\test.h5', 'w')
    f_test['test_x'] = test_img_np
    f_test['test_y'] = test_label_np
    f_test.close()


save_image_to_h5py(r'C:\Users\dell\桌面\train')


# 加载h5py成np的形式
def load_h5py_to_np(path_train, path_test):
    h5_file1 = h5py.File(path_train, 'r')
    print('h5py_train中的关键字', h5_file1.keys())
    h5_file2 = h5py.File(path_test, 'r')
    print('h5py_test中的关键字', h5_file2.keys())
    shuffled_image = h5_file1['train_x'][:]
    shuffled_label = h5_file1['train_y'][:]
    # print('数据集中的标签顺序是:\n', shuffled_label, len(h5_file['labels']))
    return shuffled_image, shuffled_label


images, labels = load_h5py_to_np(r'C:\Users\dell\桌面\train\train.h5', r'C:\Users\dell\桌面\train\test.h5')

f1 = h5py.File(r'C:\Users\dell\桌面\train\train.h5', 'r')
for key in f1.keys():
    print(f1[key].name)
    print(f1[key].shape)
f2 = h5py.File(r'C:\Users\dell\桌面\train\test.h5', 'r')
for key in f2.keys():
    print(f2[key].name)
    print(f2[key].shape)

内容三:使用google的colab进行线上训练及测试
挂载谷歌云盘参考教程

from google.colab import drive
drive.mount('/content/drive')

机器学习

import h5py
import numpy as np
import keras
import tensorflow as tf
from keras.layers import Conv2D  # , MaxPool2D, Dense, Flatten
# from keras import regularizers
# from keras.models import Sequential, load_model
from tensorflow.keras import datasets, layers, models


# google_path
path = r'/content/drive/MyDrive/myhand/'
# local_path
# path = r'C:/Users/dell/桌面/train'


def get_train():
    f = h5py.File(path+'train.h5', 'r')
    train_x = np.array(f['train_x'])  # 训练集数据  将数据转化为np.array
    train_y = np.array(f['train_y'])  # 训练集标签
    return train_x, train_y


def get_test():
    f = h5py.File(path+'test.h5', 'r')
    test_x = np.array(f['test_x'])  # 测试集数据  将数据转化为np.array
    test_y = np.array(f['test_y'])  # 测试集标签
    return test_x, test_y


def preprocess(x):
    # 将X标准化,从0-255变成0-1
    x = x / 255
    return x


if __name__ == '__main__':
    x1, y1 = get_train()
    x2, y2 = get_test()
    print(x1.shape, y1.shape)
    print(x2.shape, y2.shape)
    print(np.max(x1))
    print(np.max(y2))

    from matplotlib import pyplot as plt

    plt.figure()
    for i in range(1, 16):
        plt.subplot(3, 5, i)
        plt.imshow(x1[i])
        print(y1[i])
    plt.show()

    # model = keras.Sequential([
    #     keras.layers.Flatten(input_shape=(64, 64, 3)),
    #     keras.layers.Dense(512, activation='relu'),
    #     keras.layers.Dense(128, activation='relu'),
    #     keras.layers.Dense(32, activation='relu'),
    #     keras.layers.Dense(5)
    # ])
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))

    model.add(layers.Flatten())
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(32, activation='relu'))
    model.add(layers.Dense(5))

    model.compile(optimizer='adam',
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])

    x_train, y_train = get_train()
    x_test, y_test = get_test()
    x_train = preprocess(x_train)
    x_test = preprocess(x_test)
    # 开始训练
    model.fit(x_train, y_train, epochs=10)

    # 保存模型
    model.save(path+'model.h5')
    # 测试
    result = model.evaluate(x_test, y_test)
    print('测试结果:', result)

内容四:识别系统搭建

import tkinter as tk
import cv2
from PIL import ImageTk, Image
import numpy as np
import tensorflow as tf
import os
import shutil

num = 1
win = tk.Tk()
win.title('手势识别')
win.geometry('1000x650+300+60')

f1 = tk.Frame(win, width=500, height=400)
f1.pack(side='left')
f2 = tk.Frame(win, width=500, height=400)
f2.pack(side='right')

t1 = tk.Label(f1, width=20, height=2, text='摄像头', font=('宋体', 12))
t1.pack()
t2 = tk.Label(f2, width=20, height=2, text='截图 or 虚拟人', font=('宋体', 12))
t2.pack()
# 左边视图
cvs_l = tk.Canvas(f1, width=400, height=400, bg='white')
cvs_l.pack()
# 右边视图
cvs_r = tk.Canvas(f2, width=400, height=400, bg='white')
cvs_r.pack()
img = None
photo_l = None
frame = None
load_num = 0


def load():
    global load_num
    load_num = 1
    print('已开始录制...')


# 皮肤检测
def pi(im):
    y_cr_cb = cv2.cvtColor(im, cv2.COLOR_BGR2YCR_CB)  # 转换至YCrCb空间
    (y, cr, cb) = cv2.split(y_cr_cb)  # 拆分出Y,Cr,Cb值
    cr1 = cv2.GaussianBlur(cr, (5, 5), 0)
    _, skin = cv2.threshold(cr1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)  # Ostu处理
    res = cv2.bitwise_and(im, im, mask=skin)
    return res


# 显示处理
def out(im):
    # 将图像的通道顺序由BGR转换成RGB
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    # 判断是否为np数组
    if isinstance(im, np.ndarray):
        im = Image.fromarray(im.astype(np.uint8))
    photo = ImageTk.PhotoImage(image=im)
    return photo


# 摄像头
def open_cinema():
    global num, photo_l, frame, load_num
    num = 1
    s_num = 0
    total = 0
    test = 0
    cap = cv2.VideoCapture(0)  # 获取摄像头设备或打开摄像头
    if cap.isOpened():  # 判断摄像头是否已经打开,若打开则进入循环
        while num == 1:  # 无限循环
            ret, frame = cap.read()  # cap返回两个值,所以要用两个变量接收,即使ret获取视频播放状态,
            frame = cv2.flip(frame, 1)
            # 左摄像头
            cv2.rectangle(frame, (200, 150), (400, 350), (0, 255, 0))  # 画出截取的手势框图
            photo_l = out(frame)
            cvs_l.create_image(200, 200, anchor='center', image=photo_l)
            # 录制用
            if not os.path.exists('handout'):
                os.mkdir('handout')
            if os.path.exists('handout') and test == 0 and load_num == 1:
                shutil.rmtree('handout')
                test = 1
            if total % 5 == 0 and load_num == 1:
                cv2.imwrite('./handout/%d.png' % s_num, frame[150:350, 200:400])
                s_num += 1
            total += 1
            # 皮肤检测

            photo_r = pi(frame)
            # 二值化处理
            # photo_r = cv2.cvtColor(photo_r, cv2.COLOR_BGR2GRAY)
            # ret, photo_r = cv2.threshold(photo_r, 75, 255, cv2.THRESH_BINARY)
            # 右处理图像
            # photo_r = out(photo_r)
            # cvs_r.create_image(200, 200, anchor='center', image=photo_r)
            # 更新图像
            # if num == 1:
            win.update_idletasks()
            win.update()
            # else:
            #     break
    cap.release()  # 释放资源,即销毁进程
    cv2.destroyAllWindows()  # 销毁所有窗口


def close_cinema():
    global num, load_num
    num = 0
    load_num = 0
    print("摄像头关闭,录制结束!")


def screenshot():
    global photo_l, img, frame
    img = photo_l
    frame = frame[150:350, 200:400]  # 裁剪坐标为[y0:y1, x0:x1]
    img = out(frame)
    cvs_r.create_image(200, 200, anchor='center', image=img)
    # img = 255 * np.array(img).astype('uint8')
    # img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)  # PIL转cv2
    # frame = pi(frame)

    cv2.imwrite('test.png', frame)


# 虚拟人调用
def person(i):
    v_path = r'C:\Users\dell\桌面\train\testavi' + '\\' + i + '.avi'
    cap = cv2.VideoCapture(v_path)
    while cap.isOpened():
        ret, test_avi = cap.read()  # cap返回两个值,所以要用两个变量接收,即使ret获取视频播放状态,
        if ret:
            # test_avi = cv2.flip(test_avi, 1)
            test_avi = cv2.resize(test_avi, (400, 400))
            photo_r = out(test_avi)
            cvs_r.create_image(200, 200, anchor='center', image=photo_r)
            win.update_idletasks()
            win.update()
            # test_avi = cv2.flip(test_avi, 1)
            # cv2.imshow('1', test_avi)
            if cv2.waitKey(20) & 0xFF == ord('q'):
                break
        else:
            cap.release()  # 释放资源,即销毁进程
            cv2.destroyAllWindows()  # 销毁所有窗口


def tf_model(path):
    label_list = ['你', '的', '是', '很', '高兴']
    model = tf.keras.models.load_model(r'C:\Users\dell\桌面\train\model_2.h5')

    # path = r'C:\Users\dell\桌面\train\021\159.png'
    img_test = cv2.imdecode(np.fromfile(path, dtype=np.uint8), -1)
    # img_test = cv2.cvtColor(img_test, cv2.COLOR_BGR2RGB)
    img_test = cv2.resize(img_test, (64, 64))

    # 检查其架构
    # model.summary()

    # 增加一个维度,不然会报错
    img_test = (np.expand_dims(img_test, 0))
    img_test = img_test / 255.0

    probability_model = tf.keras.Sequential([model,
                                             tf.keras.layers.Softmax()])
    predictions = probability_model.predict(img_test)

    return predictions


def hand():
    label_list = ['你', '的', '是', '很', '高兴']
    path = r'test.png'
    predictions = tf_model(path)
    print(predictions[0])
    print('预测结果:', label_list[int(np.argmax(predictions[0]))])
    out_label = np.argsort(-predictions[0])
    print('可信度顺序: ', end='')
    for i in out_label:
        print(label_list[i], end=' ')
    print()
    # 虚拟人调用
    person(str(np.argmax(predictions[0])))


def video():
    label_list = ['你', '的', '是', '很', '高兴']
    test_list = [0] * 5
    for child_dir in os.listdir('.\\handout'):
        child_path = os.path.join('.\\handout', child_dir)
        predictions = tf_model(child_path)
        print(child_dir, label_list[int(np.argmax(test_list))], predictions[0])
        test_list[int(np.argmax(predictions[0]))] += 1
    i = 0
    for lab in label_list:
        print(lab + '的个数:' + str(test_list[i]))
        i += 1
    print('\n最终预测:', label_list[int(np.argmax(test_list))])
    person(str(np.argmax(test_list)))


# 左按钮
b1 = tk.Button(f1, width=12, height=2, text='打开摄像头', command=open_cinema)
b1.pack()
b2 = tk.Button(f1, width=12, height=2, text='开始录制', command=load)
b2.pack()
b5 = tk.Button(f1, width=12, height=2, text='关闭摄像头', command=close_cinema)
b5.pack()
# 右按钮
b3 = tk.Button(f2, width=12, height=2, text='截图', command=screenshot)
b3.pack()
b4 = tk.Button(f2, width=12, height=2, text='手势识别(截图)', command=hand)
b4.pack()
b6 = tk.Button(f2, width=12, height=2, text='手势识别(视频)', command=video)
b6.pack()
# 窗口主循环
win.mainloop()

剩余批处理及测试代码
批处理复制文件夹下所有图片

import shutil
import os


def b_li(path):
    global num
    # (root,dirs,files)分别为:遍历的文件夹,遍历的文件夹下的所有文件夹,遍历的文件夹下的所有文件
    for root, dirs, files in os.walk(path):
        for file in files:
            # 文件后缀检测
            if file[-len(f_type):] == f_type:
                shutil.copyfile(root + '\\' + file, out_path + '\\%d.%s' % (num, f_type))
                print(root + '\\' + file + ' 复制成功-> ' + out_path + '\\%d.%s' % (num, f_type))
                num += 1
        for dir_in in dirs:
            b_li(dir_in)


if __name__ == '__main__':
    # 文件夹路径
    f_path = r'C:\Users\dell\桌面\演示\001'
    # 输出路径
    out_path = r'C:\Users\dell\桌面\演示\train\001'
    # 文件类型后缀
    f_type = 'png'
    num = 1
    b_li(f_path)

测试单张图片识别结果

import tensorflow as tf
import cv2
import numpy as np

# 001 007 009 020 021
label_list = ['你', '的', '是', '很', '高兴']
model = tf.keras.models.load_model(r'C:\Users\dell\桌面\train\model_2.h5')
# path = r'test.png'
path = r'C:\Users\dell\桌面\train\001\216.png'
img = cv2.imdecode(np.fromfile(path, dtype=np.uint8), -1)
img = cv2.resize(img, (64, 64))

# 检查其架构
model.summary()

# 增加一个维度,不然会报错
img = (np.expand_dims(img, 0))
img = img / 255.0

probability_model = tf.keras.Sequential([model,
                                         tf.keras.layers.Softmax()])
predictions = probability_model.predict(img)

print(predictions[0])
print('预测结果:', label_list[int(np.argmax(predictions[0]))])

你可能感兴趣的:(有趣的python,数字图像处理,cobol,cv,python,tensorflow,神经网络)