手势识别基于环境 paddlepaddle 1.7.0(因为我们要用到动态图)
其实神经网络也不一定要越深越好,越深的网络结构越复杂,以此来减少过拟合和无法传播到深处等问题,例如resnet和densenet等
我们可以选择简单的神经网络,例如lenet这样经典基础的网络
class LeNet(fluid.dygraph.Layer):
def __init__(self, training= True):
super(DenseNet, self).__init__()
self.conv1 = Conv2D(num_channels=3, num_filters=32, filter_size=3, act='relu')
self.pool1 = Pool2D(pool_size=2, pool_stride=2)
self.conv2 = Conv2D(num_channels=32, num_filters=32, filter_size=3, act='relu')
self.pool2 = Pool2D(pool_size=2, pool_stride=2)
self.conv3 = Conv2D(num_channels=32, num_filters=64, filter_size=3, act='relu')
self.pool3 = Pool2D(pool_size=2, pool_stride=2)
self.fc1 = Linear(input_dim=6400, output_dim=4096, act='relu')
self.drop_ratiol = 0.5 if training else 0.0
self.fc2 = Linear(input_dim=4096, output_dim=10)
def forward(self, inputs):
conv1 = self.conv1(inputs) # 32 32 98 98
pool1 = self.pool1(conv1) # 32 32 49 49
conv2 = self.conv2(pool1) # 32 32 47 47
pool2 = self.pool2(conv2) # 32 32 23 23
conv3 = self.conv3(pool2) # 32 64 21 21
pool3 = self.pool3(conv3) # 32 64 10 10
rs_1 = fluid.layers.reshape(pool3, [pool3.shape[0], -1])
fc1 = self.fc1(rs_1)
drop1 = fluid.layers.dropout(fc1, self.drop_ratiol)
y = self.fc2(drop1)
return y
这里我们使用paddlepaddle实现卷积神经网络
选择常用损失函数,平方误差,交叉熵等损失函数
这里我们选择paddlepaddle中实现的交叉熵
fluid.layers.softmax_with_cross_entropy
这里提供一下paddlepaddle动态图的官方文档
https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/dygraph_cn.html
优化算法主要有GD,SGD,Momentum,RMSProp和Adam算法
我们选择 fluid.optimizer.Momentum 基于梯度的移动指数加权平均,他就像是拥有动量一样,没法说停就停【手动狗头】
import os
import time
import random
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from multiprocessing import cpu_count
from paddle.fluid.dygraph import Pool2D,Conv2D
from paddle.fluid.dygraph import Linear
# 生成图像列表
data_path = '/home/aistudio/data/data23668/Dataset'
character_folders = os.listdir(data_path)
# print(character_folders)
if(os.path.exists('./train_data.list')):
os.remove('./train_data.list')
if(os.path.exists('./test_data.list')):
os.remove('./test_data.list')
for character_folder in character_folders:
with open('./train_data.list', 'a') as f_train:
with open('./test_data.list', 'a') as f_test:
if character_folder == '.DS_Store':
continue
character_imgs = os.listdir(os.path.join(data_path,character_folder))
count = 0
for img in character_imgs:
if img =='.DS_Store':
continue
if count%10 == 0:
f_test.write(os.path.join(data_path,character_folder,img) + '\t' + character_folder + '\n')
else:
f_train.write(os.path.join(data_path,character_folder,img) + '\t' + character_folder + '\n')
count +=1
print('列表已生成')
# 定义训练集和测试集的reader
def data_mapper(sample):
img, label = sample
img = Image.open(img)
img = img.resize((100, 100), Image.ANTIALIAS)
img = np.array(img).astype('float32')
img = img.transpose((2, 0, 1))
img = img/255.0
return img, label
def data_reader(data_list_path):
def reader():
with open(data_list_path, 'r') as f:
lines = f.readlines()
for line in lines:
img, label = line.split('\t')
yield img, int(label)
return paddle.reader.xmap_readers(data_mapper, reader, cpu_count(), 512)
# 用于训练的数据提供器
train_reader = paddle.batch(reader=paddle.reader.shuffle(reader=data_reader('./train_data.list'), buf_size=256), batch_size=32)
# 用于测试的数据提供器
test_reader = paddle.batch(reader=data_reader('./test_data.list'), batch_size=32)
class LeNet(fluid.dygraph.Layer):
def __init__(self, training= True):
super(DenseNet, self).__init__()
self.conv1 = Conv2D(num_channels=3, num_filters=32, filter_size=3, act='relu')
self.pool1 = Pool2D(pool_size=2, pool_stride=2)
self.conv2 = Conv2D(num_channels=32, num_filters=32, filter_size=3, act='relu')
self.pool2 = Pool2D(pool_size=2, pool_stride=2)
self.conv3 = Conv2D(num_channels=32, num_filters=64, filter_size=3, act='relu')
self.pool3 = Pool2D(pool_size=2, pool_stride=2)
self.fc1 = Linear(input_dim=6400, output_dim=4096, act='relu')
self.drop_ratiol = 0.5 if training else 0.0
self.fc2 = Linear(input_dim=4096, output_dim=10)
def forward(self, inputs):
conv1 = self.conv1(inputs) # 32 32 98 98
pool1 = self.pool1(conv1) # 32 32 49 49
conv2 = self.conv2(pool1) # 32 32 47 47
pool2 = self.pool2(conv2) # 32 32 23 23
conv3 = self.conv3(pool2) # 32 64 21 21
pool3 = self.pool3(conv3) # 32 64 10 10
rs_1 = fluid.layers.reshape(pool3, [pool3.shape[0], -1])
fc1 = self.fc1(rs_1)
drop1 = fluid.layers.dropout(fc1, self.drop_ratiol)
y = self.fc2(drop1)
return y
with fluid.dygraph.guard():
model=LeNet(True) #模型实例化 (修改)
model.train() #训练模式
# opt=fluid.optimizer.SGDOptimizer(learning_rate=0.01, parameter_list=model.parameters())#优化器选用SGD随机梯度下降,学习率为0.001.
opt =fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9, parameter_list=model.parameters()) # 修改
epochs_num=150 #迭代次数(修改20->150)
/*
60epoch差不多能达到90+
建议100左右
*/
for pass_num in range(epochs_num):
for batch_id,data in enumerate(train_reader()):
images=np.array([x[0].reshape(3,100,100) for x in data],np.float32)
labels = np.array([x[1] for x in data]).astype('int64')
labels = labels[:, np.newaxis]
# print(images.shape)
image=fluid.dygraph.to_variable(images)
label=fluid.dygraph.to_variable(labels)
logits=model(image) # 预测 (修改)
pred = fluid.layers.softmax(logits)
# print(predict)
# loss=fluid.layers.cross_entropy(predict,label)
loss = fluid.layers.softmax_with_cross_entropy(logits, label) # 修改
avg_loss=fluid.layers.mean(loss)#获取loss值
acc=fluid.layers.accuracy(pred, label)#计算精度 (修改)
if batch_id!=0 and batch_id%50==0:
print("train_pass:{},batch_id:{},train_loss:{},train_acc:{}".format(pass_num,batch_id,avg_loss.numpy(),acc.numpy()))
avg_loss.backward()
opt.minimize(avg_loss)
model.clear_gradients()
fluid.save_dygraph(model.state_dict(),'LeNet')#保存模型 修改
with fluid.dygraph.guard():
accs = []
model_dict, _ = fluid.load_dygraph('LeNet') # 修改
model = LeNet(True) # 修改
model.load_dict(model_dict) #加载模型参数
model.eval() #训练模式
for batch_id,data in enumerate(test_reader()):#测试集
images=np.array([x[0].reshape(3,100,100) for x in data],np.float32)
labels = np.array([x[1] for x in data]).astype('int64')
labels = labels[:, np.newaxis]
image=fluid.dygraph.to_variable(images)
label=fluid.dygraph.to_variable(labels)
predict=model(image)
acc=fluid.layers.accuracy(predict,label)
accs.append(acc.numpy()[0])
avg_acc = np.mean(accs)
print(avg_acc)
笔者在这里训练了一下,正确率达到了88%,如果在模型部分再加一层卷积,笔者训练能够达到95% 大家可以自行修改
def load_image(path):
img = Image.open(path)
img = img.resize((100, 100), Image.ANTIALIAS)
img = np.array(img).astype('float32')
img = img.transpose((2, 0, 1))
img = img/255.0
print(img.shape)
return img
#构建预测动态图过程
with fluid.dygraph.guard():
infer_path = '手势.JPG'
model=LeNet(False)#模型实例化
model_dict,_=fluid.load_dygraph('LeNet') # 修改
model.load_dict(model_dict)#加载模型参数
model.eval()#评估模式
infer_img = load_image(infer_path)
infer_img=np.array(infer_img).astype('float32')
infer_img=infer_img[np.newaxis,:, : ,:]
infer_img = fluid.dygraph.to_variable(infer_img)
result=model(infer_img)
display(Image.open('手势.JPG'))
print(np.argmax(result.numpy()))
我们可以看见成功预测标签为5的手势,虽然成功率仅有88左右,但作为我们的案例无疑是成功的,大家可以自行修改超参数和网络。
数据集笔者刚来,还不知道怎么上传,没看见有这个按键
给大家贴个课程链接