一、图像预处理
1.1 数据集来源
百度AI Studio平台手势识别数据集:https://aistudio.baidu.com/aistudio/datasetdetail/2182
1.2 目录结构
解压数据集中Dataset.zip文件,删除数据集文件夹中名为.DS_Store的文件,得到如图目录结构:
1.3 图像预处理代码
- 生成图像列表(10%的数据用于测试,90%的数据用于训练)
import os data_path = './Dataset' character_folders = os.listdir(data_path) if os.path.exists('./train_data.txt'): os.remove('./train_data.txt') if os.path.exists('./test_data.txt'): os.remove('./test_data.txt') for character_folder in character_folders: with open('./train_data.txt', 'a') as f_train: with open('./test_data.txt', 'a') as f_test: if character_folder == '.DS_Store': continue character_imgs = os.listdir(os.path.join(data_path, character_folder)) count = 0 for img in character_imgs: if img == '.DS_Store': continue if count % 10 == 0: f_test.write(os.path.join(data_path, character_folder, img) + '\t' + character_folder + '\n') else: f_train.write(os.path.join(data_path, character_folder, img) + '\t' + character_folder + '\n') count += 1 print('数据列表生成完成') 定义训练集和测试集reader,生成data文件 import os import paddle import numpy as np from PIL import Image from multiprocessing import cpu_count # 定义训练集和测试集的reader def data_mapper(sample): """读取图片,对图片进行归一化处理,返回图片和标签 """ img, label = sample img = Image.open(img) img = img.resize((100, 100), Image.ANTIALIAS) img = np.array(img).astype('float32') img = img.transpose((2, 0, 1)) # 读出来的图像为rgb图像,转至成为rrr、ggg、bbb img = img / 255.0 return img, label def data_reader(data_list_path): """按照train_list和test_list批量读取图片 """ def reader(): with open(data_list_path, 'r') as f: lines = f.readlines() for line in lines: img, label = line.split('\t') yield img, int(label) return paddle.reader.xmap_readers(data_mapper, reader, cpu_count(), 512) train_reader = paddle.batch( reader=paddle.reader.shuffle(reader=data_reader('./train_data.txt'), # shuffle()有一个乱序过程保证训练结果具有较好泛化能力 buf_size=256), batch_size=32) test_reader = paddle.batch(reader=data_reader('./test_data.txt'), batch_size=32)
说明:当前目录下生成的 train_data.txt 和 test_data.txt 文件主要用于存储数据路径和分类标签,内容如下,test_data.txt ——>
train_data.txt ——>
二、网络模型定义与训练
2.1 网络模型定义
说明:网络模型为简单的DNN模型,代码如下
import paddle.fluid as fluid from paddle.fluid.dygraph import Linear # 网络模型定义 class MyDNN(fluid.dygraph.Layer): def __init__(self): super(MyDNN, self).__init__() self.hidden1 = Linear(100, 100, act='relu') self.hidden2 = Linear(100, 100, act='relu') self.hidden3 = Linear(100, 100, act='relu') self.hidden4 = Linear(3 * 100 * 100, 10, act='softmax') def forward(self, input): x = self.hidden1(input) x = self.hidden2(x) x = self.hidden3(x) x = fluid.layers.reshape(x, shape=[-1, 3 * 100 * 100]) y = self.hidden4(x) return y
2.2 模型训练代码
import paddle import numpy as np import paddle.fluid as fluid import paddle.fluid.layers as layers from PIL import Image from multiprocessing import cpu_count from paddle.fluid.dygraph import Linear # 训练 with fluid.dygraph.guard(): l_rate = 0.001 model = MyDNN() model.train() opt = fluid.optimizer.SGDOptimizer(learning_rate=l_rate, parameter_list=model.parameters()) # 梯度下降 epochs_num = 10 # 迭代次数 for pass_num in range(epochs_num): for batch_id, data in enumerate(train_reader()): images = np.array([x[0].reshape(3, 100, 100) for x in data], np.float32) labels = np.array([x[1] for x in data]).astype('int64') labels = labels[:, np.newaxis] # 将ny转换成dygraph接收输入,该函数实现从numpy.ndarray对象创建一个variable类型对象 image = fluid.dygraph.to_variable(images) label = fluid.dygraph.to_variable(labels) predict = model(image) loss = layers.cross_entropy(predict, label) # 交叉熵 avg_loss = layers.mean(loss) acc = layers.accuracy(predict, label) # 精度计算 if batch_id != 0 and batch_id % 15 == 0: print('train_pass:{}, batch_id:{}, train_loss:{}, acc:{}'.format(pass_num, batch_id, avg_loss.numpy(), acc.numpy()[0], )) avg_loss.backward() # 使用backward()方法执行反向网络 opt.minimize(avg_loss) # 调用定义的优化器对象的minimize方法进行参数更新 model.clear_gradients() # 每一轮参数更新完成后调用clear_gradients()重置梯度,保证下一轮准确性 fluid.save_dygraph(model.state_dict(), 'MyDNN')
说明:模型训练时简单迭代10次,训练出的模型效果并不好,这里主要提供一种代码思路,并未做任何优化。保存的模型文件名为”MyDNN.pdparams",保存在当前目录下。
三、模型测试
import matplotlib.pyplot as plt import paddle.fluid.layers as layers import Gesture_Recognition as GR import paddle.fluid as fluid import numpy as np from PIL import Image from Gesture_Recognition import test_reader with fluid.dygraph.guard(): accs = [] model_dict, _ = fluid.load_dygraph('MyDNN.pdparams') model = GR.MyDNN() model.load_dict(model_dict) # 加载模型 model.eval() # 模型评估 for batch_id, data in enumerate(test_reader()): images = np.array([x[0].reshape(3, 100, 100) for x in data], np.float32) labels = np.array([x[1] for x in data]).astype('int64') labels = labels[:, np.newaxis] image = fluid.dygraph.to_variable(images) label = fluid.dygraph.to_variable(labels) predict = model(image) acc = layers.accuracy(predict, label) # 精度计算 accs.append(acc.numpy()[0]) avg_acc = np.mean(accs) print('平均acc:', avg_acc) # 读取预测图象进行预测 def load_image(path): img = Image.open(path) img = img.resize((100, 100), Image.ANTIALIAS) img = np.array(img).astype('float32') img = img.transpose((2, 0, 1)) img = img / 255.0 # print(img.shape) return img # 构建预测动态图 with fluid.dygraph.guard(): infer_path = '手势.JPG' model = GR.MyDNN() model_dict, _ = fluid.load_dygraph('MyDNN.pdparams') model.load_dict(model_dict) # 加载模型 model.eval() # 模型评估 infer_img = load_image(infer_path) infer_img = np.array(infer_img).astype('float32') infer_img = infer_img[np.newaxis, :, :, :] infer_img = fluid.dygraph.to_variable(infer_img) result = model(infer_img) print('预测值:', np.argmax(result.numpy())) plt.imshow(Image.open('手势.JPG')) plt.show()