带你玩转手势识别 百度飞桨深度学习7日—Day02

百度深度学习7日—Day02 手势识别

  • 深度学习三部曲
    • 建立模型
    • 损失函数
    • 参数学习
  • 我们使用paddlepaddle1.7.0的动态图进行训练
    • 数据读取预处理部分
    • 模型定义部分参考上面建立模型处
    • 动态图进行训练
    • 模型校验
    • 读取预测图像,进行预测

深度学习三部曲

手势识别基于环境 paddlepaddle 1.7.0(因为我们要用到动态图)

建立模型

  1. 选择什么样的网络结构?
  2. 选择多少层数,每层选择多少神经元

其实神经网络也不一定要越深越好,越深的网络结构越复杂,以此来减少过拟合和无法传播到深处等问题,例如resnet和densenet等
我们可以选择简单的神经网络,例如lenet这样经典基础的网络

class LeNet(fluid.dygraph.Layer):
    def __init__(self, training= True):
        super(DenseNet, self).__init__()
        self.conv1 = Conv2D(num_channels=3, num_filters=32, filter_size=3, act='relu')
        self.pool1 = Pool2D(pool_size=2, pool_stride=2)

        self.conv2 = Conv2D(num_channels=32, num_filters=32, filter_size=3, act='relu')
        self.pool2 = Pool2D(pool_size=2, pool_stride=2)

        self.conv3 = Conv2D(num_channels=32, num_filters=64, filter_size=3, act='relu')
        self.pool3 = Pool2D(pool_size=2, pool_stride=2)

        self.fc1 = Linear(input_dim=6400, output_dim=4096, act='relu')
        self.drop_ratiol = 0.5 if training else 0.0
        self.fc2 = Linear(input_dim=4096, output_dim=10)

    def forward(self, inputs):
        conv1 = self.conv1(inputs)  # 32 32 98 98
        pool1 = self.pool1(conv1)  # 32 32 49 49

        conv2 = self.conv2(pool1)  # 32 32 47 47
        pool2 = self.pool2(conv2)  # 32 32 23 23

        conv3 = self.conv3(pool2)  # 32 64 21 21
        pool3 = self.pool3(conv3)  # 32 64 10 10

        rs_1 = fluid.layers.reshape(pool3, [pool3.shape[0], -1])
        fc1 = self.fc1(rs_1)
        drop1 = fluid.layers.dropout(fc1, self.drop_ratiol)
        y = self.fc2(drop1)

        return y

这里我们使用paddlepaddle实现卷积神经网络

损失函数

选择常用损失函数,平方误差,交叉熵等损失函数
这里我们选择paddlepaddle中实现的交叉熵
fluid.layers.softmax_with_cross_entropy

这里提供一下paddlepaddle动态图的官方文档
https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/dygraph_cn.html

参数学习

  1. 梯度下降
  2. 反向传播算法

优化算法主要有GD,SGD,Momentum,RMSProp和Adam算法

我们选择 fluid.optimizer.Momentum 基于梯度的移动指数加权平均,他就像是拥有动量一样,没法说停就停【手动狗头】

我们使用paddlepaddle1.7.0的动态图进行训练

数据读取预处理部分

import os
import time
import random
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from multiprocessing import cpu_count
from paddle.fluid.dygraph import Pool2D,Conv2D
from paddle.fluid.dygraph import Linear

# 生成图像列表
data_path = '/home/aistudio/data/data23668/Dataset'
character_folders = os.listdir(data_path)
# print(character_folders)
if(os.path.exists('./train_data.list')):
    os.remove('./train_data.list')
if(os.path.exists('./test_data.list')):
    os.remove('./test_data.list')
    
for character_folder in character_folders:
    
    with open('./train_data.list', 'a') as f_train:
        with open('./test_data.list', 'a') as f_test:
            if character_folder == '.DS_Store':
                continue
            character_imgs = os.listdir(os.path.join(data_path,character_folder))
            count = 0 
            for img in character_imgs:
                if img =='.DS_Store':
                    continue
                if count%10 == 0:
                    f_test.write(os.path.join(data_path,character_folder,img) + '\t' + character_folder + '\n')
                else:
                    f_train.write(os.path.join(data_path,character_folder,img) + '\t' + character_folder + '\n')
                count +=1
print('列表已生成')

# 定义训练集和测试集的reader
def data_mapper(sample):
    img, label = sample
    img = Image.open(img)
    img = img.resize((100, 100), Image.ANTIALIAS)
    img = np.array(img).astype('float32')
    img = img.transpose((2, 0, 1))
    img = img/255.0
    return img, label

def data_reader(data_list_path):
    def reader():
        with open(data_list_path, 'r') as f:
            lines = f.readlines()
            for line in lines:
                img, label = line.split('\t')
                yield img, int(label)
    return paddle.reader.xmap_readers(data_mapper, reader, cpu_count(), 512)
 
 # 用于训练的数据提供器
train_reader = paddle.batch(reader=paddle.reader.shuffle(reader=data_reader('./train_data.list'), buf_size=256), batch_size=32)
# 用于测试的数据提供器
test_reader = paddle.batch(reader=data_reader('./test_data.list'), batch_size=32)

模型定义部分参考上面建立模型处

class LeNet(fluid.dygraph.Layer):
    def __init__(self, training= True):
        super(DenseNet, self).__init__()
        self.conv1 = Conv2D(num_channels=3, num_filters=32, filter_size=3, act='relu')
        self.pool1 = Pool2D(pool_size=2, pool_stride=2)

        self.conv2 = Conv2D(num_channels=32, num_filters=32, filter_size=3, act='relu')
        self.pool2 = Pool2D(pool_size=2, pool_stride=2)

        self.conv3 = Conv2D(num_channels=32, num_filters=64, filter_size=3, act='relu')
        self.pool3 = Pool2D(pool_size=2, pool_stride=2)

        self.fc1 = Linear(input_dim=6400, output_dim=4096, act='relu')
        self.drop_ratiol = 0.5 if training else 0.0
        self.fc2 = Linear(input_dim=4096, output_dim=10)

    def forward(self, inputs):
        conv1 = self.conv1(inputs)  # 32 32 98 98
        pool1 = self.pool1(conv1)  # 32 32 49 49

        conv2 = self.conv2(pool1)  # 32 32 47 47
        pool2 = self.pool2(conv2)  # 32 32 23 23

        conv3 = self.conv3(pool2)  # 32 64 21 21
        pool3 = self.pool3(conv3)  # 32 64 10 10

        rs_1 = fluid.layers.reshape(pool3, [pool3.shape[0], -1])
        fc1 = self.fc1(rs_1)
        drop1 = fluid.layers.dropout(fc1, self.drop_ratiol)
        y = self.fc2(drop1)

        return y

动态图进行训练

with fluid.dygraph.guard():

    model=LeNet(True) #模型实例化 (修改)
    model.train() #训练模式
    # opt=fluid.optimizer.SGDOptimizer(learning_rate=0.01, parameter_list=model.parameters())#优化器选用SGD随机梯度下降,学习率为0.001.
    opt =fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9, parameter_list=model.parameters())  # 修改

    epochs_num=150 #迭代次数(修改20->150)
    /*
    60epoch差不多能达到90+
    建议100左右
    */
    
    for pass_num in range(epochs_num):
        
        for batch_id,data in enumerate(train_reader()):
            
            images=np.array([x[0].reshape(3,100,100) for x in data],np.float32)
            
            labels = np.array([x[1] for x in data]).astype('int64')
            labels = labels[:, np.newaxis]
            # print(images.shape)
            image=fluid.dygraph.to_variable(images)
            label=fluid.dygraph.to_variable(labels)
            logits=model(image)  # 预测  (修改)
            pred = fluid.layers.softmax(logits)
            # print(predict)
            # loss=fluid.layers.cross_entropy(predict,label)
            loss = fluid.layers.softmax_with_cross_entropy(logits, label)  # 修改
            avg_loss=fluid.layers.mean(loss)#获取loss值
            
            acc=fluid.layers.accuracy(pred, label)#计算精度  (修改)
            
            if batch_id!=0 and batch_id%50==0:
                print("train_pass:{},batch_id:{},train_loss:{},train_acc:{}".format(pass_num,batch_id,avg_loss.numpy(),acc.numpy()))
            
            avg_loss.backward()
            opt.minimize(avg_loss)
            model.clear_gradients()
            
    fluid.save_dygraph(model.state_dict(),'LeNet')#保存模型  修改

模型校验

with fluid.dygraph.guard():
    accs = []
    model_dict, _ = fluid.load_dygraph('LeNet')  # 修改
    model = LeNet(True)  # 修改
    model.load_dict(model_dict) #加载模型参数
    model.eval() #训练模式
    for batch_id,data in enumerate(test_reader()):#测试集
        images=np.array([x[0].reshape(3,100,100) for x in data],np.float32)
        labels = np.array([x[1] for x in data]).astype('int64')
        labels = labels[:, np.newaxis]

        image=fluid.dygraph.to_variable(images)
        label=fluid.dygraph.to_variable(labels)
        
        predict=model(image)       
        acc=fluid.layers.accuracy(predict,label)
        accs.append(acc.numpy()[0])
        avg_acc = np.mean(accs)
    print(avg_acc)

笔者在这里训练了一下,正确率达到了88%,如果在模型部分再加一层卷积,笔者训练能够达到95% 大家可以自行修改

读取预测图像,进行预测

def load_image(path):
    img = Image.open(path)
    img = img.resize((100, 100), Image.ANTIALIAS)
    img = np.array(img).astype('float32')
    img = img.transpose((2, 0, 1))
    img = img/255.0
    print(img.shape)
    return img

#构建预测动态图过程
with fluid.dygraph.guard():
    infer_path = '手势.JPG'
    model=LeNet(False)#模型实例化
    model_dict,_=fluid.load_dygraph('LeNet')  # 修改
    model.load_dict(model_dict)#加载模型参数
    model.eval()#评估模式
    infer_img = load_image(infer_path)
    infer_img=np.array(infer_img).astype('float32')
    infer_img=infer_img[np.newaxis,:, : ,:]
    infer_img = fluid.dygraph.to_variable(infer_img)
    result=model(infer_img)
    display(Image.open('手势.JPG'))
    print(np.argmax(result.numpy()))

带你玩转手势识别 百度飞桨深度学习7日—Day02_第1张图片
我们可以看见成功预测标签为5的手势,虽然成功率仅有88左右,但作为我们的案例无疑是成功的,大家可以自行修改超参数和网络。
数据集笔者刚来,还不知道怎么上传,没看见有这个按键
给大家贴个课程链接

你可能感兴趣的:(带你玩转手势识别 百度飞桨深度学习7日—Day02)