百度AI Studio平台手势识别数据集:https://aistudio.baidu.com/aistudio/datasetdetail/2182
解压数据集中Dataset.zip文件,删除数据集文件夹中名为.DS_Store的文件,得到如图目录结构:
import os
data_path = './Dataset'
character_folders = os.listdir(data_path)
if os.path.exists('./train_data.txt'):
os.remove('./train_data.txt')
if os.path.exists('./test_data.txt'):
os.remove('./test_data.txt')
for character_folder in character_folders:
with open('./train_data.txt', 'a') as f_train:
with open('./test_data.txt', 'a') as f_test:
if character_folder == '.DS_Store':
continue
character_imgs = os.listdir(os.path.join(data_path, character_folder))
count = 0
for img in character_imgs:
if img == '.DS_Store':
continue
if count % 10 == 0:
f_test.write(os.path.join(data_path, character_folder, img) + '\t' + character_folder + '\n')
else:
f_train.write(os.path.join(data_path, character_folder, img) + '\t' + character_folder + '\n')
count += 1
print('数据列表生成完成')
import os
import paddle
import numpy as np
from PIL import Image
from multiprocessing import cpu_count
# 定义训练集和测试集的reader
def data_mapper(sample):
"""读取图片,对图片进行归一化处理,返回图片和标签
"""
img, label = sample
img = Image.open(img)
img = img.resize((100, 100), Image.ANTIALIAS)
img = np.array(img).astype('float32')
img = img.transpose((2, 0, 1)) # 读出来的图像为rgb图像,转至成为rrr、ggg、bbb
img = img / 255.0
return img, label
def data_reader(data_list_path):
"""按照train_list和test_list批量读取图片
"""
def reader():
with open(data_list_path, 'r') as f:
lines = f.readlines()
for line in lines:
img, label = line.split('\t')
yield img, int(label)
return paddle.reader.xmap_readers(data_mapper, reader, cpu_count(), 512)
train_reader = paddle.batch(
reader=paddle.reader.shuffle(reader=data_reader('./train_data.txt'), # shuffle()有一个乱序过程保证训练结果具有较好泛化能力
buf_size=256),
batch_size=32)
test_reader = paddle.batch(reader=data_reader('./test_data.txt'), batch_size=32)
说明:当前目录下生成的 train_data.txt 和 test_data.txt 文件主要用于存储数据路径和分类标签,内容如下,test_data.txt ——>
train_data.txt ——>
说明:网络模型为简单的DNN模型,代码如下
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
# 网络模型定义
class MyDNN(fluid.dygraph.Layer):
def __init__(self):
super(MyDNN, self).__init__()
self.hidden1 = Linear(100, 100, act='relu')
self.hidden2 = Linear(100, 100, act='relu')
self.hidden3 = Linear(100, 100, act='relu')
self.hidden4 = Linear(3 * 100 * 100, 10, act='softmax')
def forward(self, input):
x = self.hidden1(input)
x = self.hidden2(x)
x = self.hidden3(x)
x = fluid.layers.reshape(x, shape=[-1, 3 * 100 * 100])
y = self.hidden4(x)
return y
import paddle
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from PIL import Image
from multiprocessing import cpu_count
from paddle.fluid.dygraph import Linear
# 训练
with fluid.dygraph.guard():
l_rate = 0.001
model = MyDNN()
model.train()
opt = fluid.optimizer.SGDOptimizer(learning_rate=l_rate,
parameter_list=model.parameters()) # 梯度下降
epochs_num = 10 # 迭代次数
for pass_num in range(epochs_num):
for batch_id, data in enumerate(train_reader()):
images = np.array([x[0].reshape(3, 100, 100) for x in data], np.float32)
labels = np.array([x[1] for x in data]).astype('int64')
labels = labels[:, np.newaxis]
# 将ny转换成dygraph接收输入,该函数实现从numpy.ndarray对象创建一个variable类型对象
image = fluid.dygraph.to_variable(images)
label = fluid.dygraph.to_variable(labels)
predict = model(image)
loss = layers.cross_entropy(predict, label) # 交叉熵
avg_loss = layers.mean(loss)
acc = layers.accuracy(predict, label) # 精度计算
if batch_id != 0 and batch_id % 15 == 0:
print('train_pass:{}, batch_id:{}, train_loss:{}, acc:{}'.format(pass_num,
batch_id,
avg_loss.numpy(),
acc.numpy()[0],
))
avg_loss.backward() # 使用backward()方法执行反向网络
opt.minimize(avg_loss) # 调用定义的优化器对象的minimize方法进行参数更新
model.clear_gradients() # 每一轮参数更新完成后调用clear_gradients()重置梯度,保证下一轮准确性
fluid.save_dygraph(model.state_dict(), 'MyDNN')
说明:模型训练时简单迭代10次,训练出的模型效果并不好,这里主要提供一种代码思路,并未做任何优化。保存的模型文件名为”MyDNN.pdparams",保存在当前目录下。
训练过程:
import matplotlib.pyplot as plt
import paddle.fluid.layers as layers
import Gesture_Recognition as GR
import paddle.fluid as fluid
import numpy as np
from PIL import Image
from Gesture_Recognition import test_reader
with fluid.dygraph.guard():
accs = []
model_dict, _ = fluid.load_dygraph('MyDNN.pdparams')
model = GR.MyDNN()
model.load_dict(model_dict) # 加载模型
model.eval() # 模型评估
for batch_id, data in enumerate(test_reader()):
images = np.array([x[0].reshape(3, 100, 100) for x in data], np.float32)
labels = np.array([x[1] for x in data]).astype('int64')
labels = labels[:, np.newaxis]
image = fluid.dygraph.to_variable(images)
label = fluid.dygraph.to_variable(labels)
predict = model(image)
acc = layers.accuracy(predict, label) # 精度计算
accs.append(acc.numpy()[0])
avg_acc = np.mean(accs)
print('平均acc:', avg_acc)
# 读取预测图象进行预测
def load_image(path):
img = Image.open(path)
img = img.resize((100, 100), Image.ANTIALIAS)
img = np.array(img).astype('float32')
img = img.transpose((2, 0, 1))
img = img / 255.0
# print(img.shape)
return img
# 构建预测动态图
with fluid.dygraph.guard():
infer_path = '手势.JPG'
model = GR.MyDNN()
model_dict, _ = fluid.load_dygraph('MyDNN.pdparams')
model.load_dict(model_dict) # 加载模型
model.eval() # 模型评估
infer_img = load_image(infer_path)
infer_img = np.array(infer_img).astype('float32')
infer_img = infer_img[np.newaxis, :, :, :]
infer_img = fluid.dygraph.to_variable(infer_img)
result = model(infer_img)
print('预测值:', np.argmax(result.numpy()))
plt.imshow(Image.open('手势.JPG'))
plt.show()