代码为实验需要,具体使用需要修改
tensorflow教程(推荐看这个)
内容一:裁剪视频,获取手势
import cv2
import numpy as np
import os
import shutil
# 皮肤检测
def pi(res):
y_cr_cb = cv2.cvtColor(res, cv2.COLOR_BGR2YCR_CB) # 转换至YCrCb空间
(y, cr, cb) = cv2.split(y_cr_cb) # 拆分出Y,Cr,Cb值
cr1 = cv2.GaussianBlur(cr, (5, 5), 0)
_, skin = cv2.threshold(cr1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) # Oust处理
res = cv2.bitwise_and(res, res, mask=skin)
kernel = np.ones((3, 3), np.uint8) # 设置卷积核
erosion = cv2.erode(res, kernel) # 腐蚀操作
res = cv2.dilate(erosion, kernel) # 膨胀操作
return res
def find(root, file):
v_path = root + "/" + file
# 读取视频帧
if os.path.exists('handImg1'):
shutil.rmtree('handImg1')
os.mkdir('handImg1')
if os.path.exists('handImg2'):
shutil.rmtree('handImg2')
os.mkdir('handImg2')
num = 0
total = 0
cap = cv2.VideoCapture(v_path)
while cap.isOpened():
ret, frame = cap.read()
total += 1
if ret:
if total % 5 == 0:
cv2.imwrite('./handImg1/%d.png' % num, frame)
frame = pi(frame)
cv2.imwrite('./handImg2/%d.png' % num, frame)
num += 1
# 显示
# cv2.imshow('a', frame)
# if cv2.waitKey(20) & 0xFF == ord('q'):
# break
else:
break
# 手势位置处理
if os.path.exists(root + "/" + file[:10]):
shutil.rmtree(root + "/" + file[:10])
os.mkdir(root + "/" + file[:10])
all_p = num
for i in range(1, all_p - 1):
im_out = cv2.imread('./handImg1/%d.png' % i)
im_size = im_out.shape
h = im_size[0]
w = im_size[1]
im1 = cv2.imread('./handImg2/%d.png' % (i - 1))
im1 = cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY)
im2 = cv2.imread('./handImg2/%d.png' % i)
im2 = cv2.cvtColor(im2, cv2.COLOR_BGR2GRAY)
im3 = cv2.imread('./handImg2/%d.png' % (i + 1))
im3 = cv2.cvtColor(im3, cv2.COLOR_BGR2GRAY)
im12 = cv2.absdiff(im1, im2)
_, thresh1 = cv2.threshold(im12, 40, 255, cv2.THRESH_BINARY)
im23 = cv2.absdiff(im2, im3)
_, thresh2 = cv2.threshold(im23, 40, 255, cv2.THRESH_BINARY)
thresh1 = cv2.medianBlur(thresh1, 9)
thresh2 = cv2.medianBlur(thresh2, 9)
k = np.ones((33, 33), np.uint8)
thresh1 = cv2.morphologyEx(thresh1, cv2.MORPH_CLOSE, k)
thresh2 = cv2.morphologyEx(thresh2, cv2.MORPH_CLOSE, k)
binary = cv2.bitwise_and(thresh1, thresh2)
# 画框
contours, hierarchy = cv2.findContours(binary, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
if len(contours) != 0:
rect = cv2.minAreaRect(contours[0])
points = cv2.boxPoints(rect)
im2 = cv2.drawContours(im2, [points.astype(int)], 0, (255, 255, 255), 2)
get_point = points.astype(int)
x_list = [get_point[0][0], get_point[1][0], get_point[2][0], get_point[3][0]]
y_list = [get_point[0][1], get_point[1][1], get_point[2][1], get_point[3][1]]
# 框大小阈值
test3 = (max(y_list) - min(y_list)) * (max(x_list) - min(x_list))
test_len = 20 # 框的扩充像素
if min(y_list) < test_len:
min_y = 0
else:
min_y = min(y_list) - test_len
if min(x_list) < test_len:
min_x = 0
else:
min_x = min(x_list) - test_len
if max(y_list) > h - test_len:
max_y = h
else:
max_y = max(y_list) + test_len
if max(x_list) > w - test_len:
max_x = w
else:
max_x = max(x_list) + test_len
im_save = im_out[min_y:max_y, min_x:max_x]
if test3 > 300:
im_save = cv2.resize(im_save, (300, 300))
s_path = root + "\\" + file[:10] + '\\%d.png' % i
# 保存图片,有中文路径
cv2.imencode('.png', im_save)[1].tofile(s_path)
# cv2.imwrite(s_path, im_save)
# 显示
# cv2.imshow('a', im2)
# if cv2.waitKey(20) & 0xFF == ord('q'):
# break
cap.release()
cv2.destroyAllWindows()
def b_li(path):
# (root,dirs,files)分别为:遍历的文件夹,遍历的文件夹下的所有文件夹,遍历的文件夹下的所有文件
for root, dirs, files in os.walk(path):
for file in files:
if file[11:] == 'avi':
print("正在处理 " + root + " 下的 " + file)
# 手势寻找
find(root, file)
for dir_in in dirs:
b_li(dir_in)
if __name__ == '__main__':
all_path = r'C:\Users\dell\桌面\演示\021'
b_li(all_path)
内容二:使用图片搭建训练集和测试集
注:需要手动删除截取效果不好的图片
import os
import numpy as np
import cv2
import h5py
import random
# 加载数据集中的文件
def save_image_to_h5py(path):
# names = ['你', '的', '是', '很', '高兴']
names = [0, 1, 2, 3, 4]
img_list = []
label_list = []
test_img_list = []
test_label_list = []
dir_counter = 0
# 数据集占比
test_num = 0.8
for child_dir in os.listdir(path):
# 拼接打乱图片用
x = []
y = []
child_path = os.path.join(path, child_dir)
# print('文件中的子文件名是:\n', child_path)
if os.path.isdir(child_path):
for dir_image in os.listdir(child_path):
# print('dir_image中图像的名称是:\n', dir_image)
img = cv2.imdecode(np.fromfile(os.path.join(child_path, dir_image), dtype=np.uint8), 1)
# img =cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)#单通道,分辨率会下降
img = cv2.resize(img, (64, 64))
# 扩充数据集
j_r = cv2.getRotationMatrix2D((32, 32), -45, 1)
img_r = cv2.warpAffine(img, j_r, (64, 64))
# cv2.imshow('img_r', img_r)
# cv2.waitKey()
j_l = cv2.getRotationMatrix2D((32, 32), 45, 1)
img_l = cv2.warpAffine(img, j_l, (64, 64))
# cv2.imshow('img_l', img_l)
# cv2.waitKey()
for im in [img, img_l, img_r]:
a = random.random()
if a < test_num:
x.append(im)
# .encode()
label_list.append(names[dir_counter])
else:
y.append(im)
# .encode()
test_label_list.append(names[dir_counter])
# 打乱后添加
random.shuffle(x)
random.shuffle(y)
img_list.extend(x)
test_img_list.extend(y)
# 返回的img_list转成了 np.array的格式
dir_counter += 1
img_np = np.array(img_list)
label_np = np.array(label_list)
test_img_np = np.array(test_img_list)
test_label_np = np.array(test_label_list)
print('训练集中原始的标签顺序是:\n', label_np, len(label_np))
# print('测试集中原始的标签顺序是:\n', test_label_np, len(test_label_np))
print('训练集占比:', len(label_np) / (len(label_np) + len(test_label_np)))
# 写入h5文件
f_train = h5py.File(r'C:\Users\dell\桌面\train\train.h5', 'w')
f_train['train_x'] = img_np
f_train['train_y'] = label_np
f_train.close()
f_test = h5py.File(r'C:\Users\dell\桌面\train\test.h5', 'w')
f_test['test_x'] = test_img_np
f_test['test_y'] = test_label_np
f_test.close()
save_image_to_h5py(r'C:\Users\dell\桌面\train')
# 加载h5py成np的形式
def load_h5py_to_np(path_train, path_test):
h5_file1 = h5py.File(path_train, 'r')
print('h5py_train中的关键字', h5_file1.keys())
h5_file2 = h5py.File(path_test, 'r')
print('h5py_test中的关键字', h5_file2.keys())
shuffled_image = h5_file1['train_x'][:]
shuffled_label = h5_file1['train_y'][:]
# print('数据集中的标签顺序是:\n', shuffled_label, len(h5_file['labels']))
return shuffled_image, shuffled_label
images, labels = load_h5py_to_np(r'C:\Users\dell\桌面\train\train.h5', r'C:\Users\dell\桌面\train\test.h5')
f1 = h5py.File(r'C:\Users\dell\桌面\train\train.h5', 'r')
for key in f1.keys():
print(f1[key].name)
print(f1[key].shape)
f2 = h5py.File(r'C:\Users\dell\桌面\train\test.h5', 'r')
for key in f2.keys():
print(f2[key].name)
print(f2[key].shape)
内容三:使用google的colab进行线上训练及测试
挂载谷歌云盘参考教程
from google.colab import drive
drive.mount('/content/drive')
机器学习
import h5py
import numpy as np
import keras
import tensorflow as tf
from keras.layers import Conv2D # , MaxPool2D, Dense, Flatten
# from keras import regularizers
# from keras.models import Sequential, load_model
from tensorflow.keras import datasets, layers, models
# google_path
path = r'/content/drive/MyDrive/myhand/'
# local_path
# path = r'C:/Users/dell/桌面/train'
def get_train():
f = h5py.File(path+'train.h5', 'r')
train_x = np.array(f['train_x']) # 训练集数据 将数据转化为np.array
train_y = np.array(f['train_y']) # 训练集标签
return train_x, train_y
def get_test():
f = h5py.File(path+'test.h5', 'r')
test_x = np.array(f['test_x']) # 测试集数据 将数据转化为np.array
test_y = np.array(f['test_y']) # 测试集标签
return test_x, test_y
def preprocess(x):
# 将X标准化,从0-255变成0-1
x = x / 255
return x
if __name__ == '__main__':
x1, y1 = get_train()
x2, y2 = get_test()
print(x1.shape, y1.shape)
print(x2.shape, y2.shape)
print(np.max(x1))
print(np.max(y2))
from matplotlib import pyplot as plt
plt.figure()
for i in range(1, 16):
plt.subplot(3, 5, i)
plt.imshow(x1[i])
print(y1[i])
plt.show()
# model = keras.Sequential([
# keras.layers.Flatten(input_shape=(64, 64, 3)),
# keras.layers.Dense(512, activation='relu'),
# keras.layers.Dense(128, activation='relu'),
# keras.layers.Dense(32, activation='relu'),
# keras.layers.Dense(5)
# ])
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(5))
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
x_train, y_train = get_train()
x_test, y_test = get_test()
x_train = preprocess(x_train)
x_test = preprocess(x_test)
# 开始训练
model.fit(x_train, y_train, epochs=10)
# 保存模型
model.save(path+'model.h5')
# 测试
result = model.evaluate(x_test, y_test)
print('测试结果:', result)
内容四:识别系统搭建
import tkinter as tk
import cv2
from PIL import ImageTk, Image
import numpy as np
import tensorflow as tf
import os
import shutil
num = 1
win = tk.Tk()
win.title('手势识别')
win.geometry('1000x650+300+60')
f1 = tk.Frame(win, width=500, height=400)
f1.pack(side='left')
f2 = tk.Frame(win, width=500, height=400)
f2.pack(side='right')
t1 = tk.Label(f1, width=20, height=2, text='摄像头', font=('宋体', 12))
t1.pack()
t2 = tk.Label(f2, width=20, height=2, text='截图 or 虚拟人', font=('宋体', 12))
t2.pack()
# 左边视图
cvs_l = tk.Canvas(f1, width=400, height=400, bg='white')
cvs_l.pack()
# 右边视图
cvs_r = tk.Canvas(f2, width=400, height=400, bg='white')
cvs_r.pack()
img = None
photo_l = None
frame = None
load_num = 0
def load():
global load_num
load_num = 1
print('已开始录制...')
# 皮肤检测
def pi(im):
y_cr_cb = cv2.cvtColor(im, cv2.COLOR_BGR2YCR_CB) # 转换至YCrCb空间
(y, cr, cb) = cv2.split(y_cr_cb) # 拆分出Y,Cr,Cb值
cr1 = cv2.GaussianBlur(cr, (5, 5), 0)
_, skin = cv2.threshold(cr1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) # Ostu处理
res = cv2.bitwise_and(im, im, mask=skin)
return res
# 显示处理
def out(im):
# 将图像的通道顺序由BGR转换成RGB
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
# 判断是否为np数组
if isinstance(im, np.ndarray):
im = Image.fromarray(im.astype(np.uint8))
photo = ImageTk.PhotoImage(image=im)
return photo
# 摄像头
def open_cinema():
global num, photo_l, frame, load_num
num = 1
s_num = 0
total = 0
test = 0
cap = cv2.VideoCapture(0) # 获取摄像头设备或打开摄像头
if cap.isOpened(): # 判断摄像头是否已经打开,若打开则进入循环
while num == 1: # 无限循环
ret, frame = cap.read() # cap返回两个值,所以要用两个变量接收,即使ret获取视频播放状态,
frame = cv2.flip(frame, 1)
# 左摄像头
cv2.rectangle(frame, (200, 150), (400, 350), (0, 255, 0)) # 画出截取的手势框图
photo_l = out(frame)
cvs_l.create_image(200, 200, anchor='center', image=photo_l)
# 录制用
if not os.path.exists('handout'):
os.mkdir('handout')
if os.path.exists('handout') and test == 0 and load_num == 1:
shutil.rmtree('handout')
test = 1
if total % 5 == 0 and load_num == 1:
cv2.imwrite('./handout/%d.png' % s_num, frame[150:350, 200:400])
s_num += 1
total += 1
# 皮肤检测
photo_r = pi(frame)
# 二值化处理
# photo_r = cv2.cvtColor(photo_r, cv2.COLOR_BGR2GRAY)
# ret, photo_r = cv2.threshold(photo_r, 75, 255, cv2.THRESH_BINARY)
# 右处理图像
# photo_r = out(photo_r)
# cvs_r.create_image(200, 200, anchor='center', image=photo_r)
# 更新图像
# if num == 1:
win.update_idletasks()
win.update()
# else:
# break
cap.release() # 释放资源,即销毁进程
cv2.destroyAllWindows() # 销毁所有窗口
def close_cinema():
global num, load_num
num = 0
load_num = 0
print("摄像头关闭,录制结束!")
def screenshot():
global photo_l, img, frame
img = photo_l
frame = frame[150:350, 200:400] # 裁剪坐标为[y0:y1, x0:x1]
img = out(frame)
cvs_r.create_image(200, 200, anchor='center', image=img)
# img = 255 * np.array(img).astype('uint8')
# img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) # PIL转cv2
# frame = pi(frame)
cv2.imwrite('test.png', frame)
# 虚拟人调用
def person(i):
v_path = r'C:\Users\dell\桌面\train\testavi' + '\\' + i + '.avi'
cap = cv2.VideoCapture(v_path)
while cap.isOpened():
ret, test_avi = cap.read() # cap返回两个值,所以要用两个变量接收,即使ret获取视频播放状态,
if ret:
# test_avi = cv2.flip(test_avi, 1)
test_avi = cv2.resize(test_avi, (400, 400))
photo_r = out(test_avi)
cvs_r.create_image(200, 200, anchor='center', image=photo_r)
win.update_idletasks()
win.update()
# test_avi = cv2.flip(test_avi, 1)
# cv2.imshow('1', test_avi)
if cv2.waitKey(20) & 0xFF == ord('q'):
break
else:
cap.release() # 释放资源,即销毁进程
cv2.destroyAllWindows() # 销毁所有窗口
def tf_model(path):
label_list = ['你', '的', '是', '很', '高兴']
model = tf.keras.models.load_model(r'C:\Users\dell\桌面\train\model_2.h5')
# path = r'C:\Users\dell\桌面\train\021\159.png'
img_test = cv2.imdecode(np.fromfile(path, dtype=np.uint8), -1)
# img_test = cv2.cvtColor(img_test, cv2.COLOR_BGR2RGB)
img_test = cv2.resize(img_test, (64, 64))
# 检查其架构
# model.summary()
# 增加一个维度,不然会报错
img_test = (np.expand_dims(img_test, 0))
img_test = img_test / 255.0
probability_model = tf.keras.Sequential([model,
tf.keras.layers.Softmax()])
predictions = probability_model.predict(img_test)
return predictions
def hand():
label_list = ['你', '的', '是', '很', '高兴']
path = r'test.png'
predictions = tf_model(path)
print(predictions[0])
print('预测结果:', label_list[int(np.argmax(predictions[0]))])
out_label = np.argsort(-predictions[0])
print('可信度顺序: ', end='')
for i in out_label:
print(label_list[i], end=' ')
print()
# 虚拟人调用
person(str(np.argmax(predictions[0])))
def video():
label_list = ['你', '的', '是', '很', '高兴']
test_list = [0] * 5
for child_dir in os.listdir('.\\handout'):
child_path = os.path.join('.\\handout', child_dir)
predictions = tf_model(child_path)
print(child_dir, label_list[int(np.argmax(test_list))], predictions[0])
test_list[int(np.argmax(predictions[0]))] += 1
i = 0
for lab in label_list:
print(lab + '的个数:' + str(test_list[i]))
i += 1
print('\n最终预测:', label_list[int(np.argmax(test_list))])
person(str(np.argmax(test_list)))
# 左按钮
b1 = tk.Button(f1, width=12, height=2, text='打开摄像头', command=open_cinema)
b1.pack()
b2 = tk.Button(f1, width=12, height=2, text='开始录制', command=load)
b2.pack()
b5 = tk.Button(f1, width=12, height=2, text='关闭摄像头', command=close_cinema)
b5.pack()
# 右按钮
b3 = tk.Button(f2, width=12, height=2, text='截图', command=screenshot)
b3.pack()
b4 = tk.Button(f2, width=12, height=2, text='手势识别(截图)', command=hand)
b4.pack()
b6 = tk.Button(f2, width=12, height=2, text='手势识别(视频)', command=video)
b6.pack()
# 窗口主循环
win.mainloop()
剩余批处理及测试代码
批处理复制文件夹下所有图片
import shutil
import os
def b_li(path):
global num
# (root,dirs,files)分别为:遍历的文件夹,遍历的文件夹下的所有文件夹,遍历的文件夹下的所有文件
for root, dirs, files in os.walk(path):
for file in files:
# 文件后缀检测
if file[-len(f_type):] == f_type:
shutil.copyfile(root + '\\' + file, out_path + '\\%d.%s' % (num, f_type))
print(root + '\\' + file + ' 复制成功-> ' + out_path + '\\%d.%s' % (num, f_type))
num += 1
for dir_in in dirs:
b_li(dir_in)
if __name__ == '__main__':
# 文件夹路径
f_path = r'C:\Users\dell\桌面\演示\001'
# 输出路径
out_path = r'C:\Users\dell\桌面\演示\train\001'
# 文件类型后缀
f_type = 'png'
num = 1
b_li(f_path)
测试单张图片识别结果
import tensorflow as tf
import cv2
import numpy as np
# 001 007 009 020 021
label_list = ['你', '的', '是', '很', '高兴']
model = tf.keras.models.load_model(r'C:\Users\dell\桌面\train\model_2.h5')
# path = r'test.png'
path = r'C:\Users\dell\桌面\train\001\216.png'
img = cv2.imdecode(np.fromfile(path, dtype=np.uint8), -1)
img = cv2.resize(img, (64, 64))
# 检查其架构
model.summary()
# 增加一个维度,不然会报错
img = (np.expand_dims(img, 0))
img = img / 255.0
probability_model = tf.keras.Sequential([model,
tf.keras.layers.Softmax()])
predictions = probability_model.predict(img)
print(predictions[0])
print('预测结果:', label_list[int(np.argmax(predictions[0]))])