百度AI Studio首页https://aistudio.baidu.com/aistudio/index
一、课程评价
1、每日课程分为理论部分和实践部分,课程内容由浅至深,循序渐进。每次课后都有作业,用以巩固课上所学的知识,亲自上手搭建网络,调整参数,不断提高准确率,满满的成就感。
2、课程微信群学习氛围浓郁,班班和助教们都很专业,几乎所有的问题发到群里后,很快就能得到班班,助教和同学们的帮助。
3、免费,完全免费,paddlepaddle为每位同学承担1699元/人学费,每天运行AI Studio项目就能获得12小时免费GPU资源(Tesla V100),连续5天还能额外获得48小时,完全就是白嫖啊。
4、奖励多多,完成作业+打比赛就可以获得结业证书,还有小度音响、小度在家、深度学习书籍等各种奖励。
二、课程内容
Day 1:新冠疫情可视化
1、本地安装PaddlePaddle:https://www.paddlepaddle.org.cn/documentation/docs/zh/install/index_cn.html
2、使用Pyecharts进行全国疫情实时数据
#安装pyecharts
#pip install -i https://pypi.tuna.tsinghua.edu.cn/simple pyecharts
import json
import datetime
from pyecharts.charts import Map
from pyecharts import options as opts
from pyecharts.charts import Pie
# 读原始数据文件
today = datetime.date.today().strftime('%Y%m%d') #20200315
datafile = 'work/'+ today + '.json'
with open(datafile, 'r', encoding='UTF-8') as file:
json_array = json.loads(file.read())
# 分析全国实时确诊数据:'confirmedCount'字段
china_data = []
for province in json_array:
china_data.append((province['provinceShortName'], province['confirmedCount']))
china_data = sorted(china_data, key=lambda x: x[1], reverse=True) #reverse=True,表示降序,反之升序
print(china_data)
# 全国疫情地图
labels = [data[0] for data in china_data]
counts = [data[1] for data in china_data]
data_pair = [list(z) for z in zip(labels, counts)]
pie = Pie(init_opts=opts.InitOpts(width='900px',height='500px'))
pie.add(
series_name="新冠病例统计",
data_pair=data_pair,
radius="55%",
center=["30%", "70%"],
label_opts=opts.LabelOpts(is_show=False,position="center")).set_global_opts(
title_opts=opts.TitleOpts(title='全国实时确诊数据',subtitle='数据来源:丁香园 '),
legend_opts=opts.LegendOpts(is_show=False),).set_series_opts(
tooltip_opts=opts.TooltipOpts(trigger="item", formatter="{a}
{b}: {c} ({d}%)"),
label_opts=opts.LabelOpts(formatter="{b}: {c}",
)
)
pie.render(path='/home/aistudio/work/pie.html')
Day 2:手势识别
DNN网络结构(经过N次训练后,测试集准确率为0.982)
数据集地址:
https://aistudio.baidu.com/aistudio/datasetdetail/2182
#定义DNN网络
class MyDNN(fluid.dygraph.Layer):
def __init__(self):
super(MyDNN,self).__init__()
self.conv1 = Conv2D(num_channels=3,num_filters=6,filter_size=2,stride=2,act='relu')
self.pool1 = Pool2D(pool_size=2,pool_type='max',pool_stride=2)
self.conv2 = Conv2D(num_channels=6,num_filters=16,filter_size=3,stride=2,act='relu')
self.pool2 = Pool2D(pool_size=2,pool_type='max',pool_stride=2)
self.linear1 = Linear(16*6*6,256,act='relu')
self.linear2 = Linear(256,10,act='softmax')
def forward(self,input):
x = self.conv1(input)
x = self.pool1(x)
x = self.conv2(x)
x = self.pool2(x)
x = fluid.layers.reshape(x,shape=[-1,16*6*6])
x = self.linear1(x)
x = fluid.layers.dropout(x,dropout_prob=0.7)
y = self.linear2(x)
return y
Day 3:车牌识别
CNN网络结构(经过N次训练后,测试集准确率为0.978)
数据集地址:
https://aistudio.baidu.com/aistudio/datasetdetail/23617
#定义网络
class MyLeNet(fluid.dygraph.Layer):
def __init__(self):
super(MyLeNet,self).__init__()
self.hidden1_1 = Conv2D(num_channels=1,num_filters=28,filter_size=5,stride=1,act='relu')
self.hidden1_2 = Pool2D(2,pool_type='max',pool_stride=1)
self.hidden2_1 = Conv2D(num_channels=28,num_filters=32,filter_size=3,stride=1,act='relu')
self.hidden2_2 = Pool2D(2,pool_type='max',pool_stride=1)
self.hidden3 = Conv2D(num_channels=32,num_filters=32,filter_size=3,stride=1,act='relu')
self.hidden4 = Linear(32*10*10,65,act='softmax')
def forward(self,input):
x = self.hidden1_1(input)
x = self.hidden1_2(x)
x = self.hidden2_1(x)
x = self.hidden2_2(x)
x = self.hidden3(x)
x = fluid.layers.reshape(x,shape=[-1,32*10*10])
y = self.hidden4(x)
return y
Day 4:口罩分类
VGG16网络结构(准确率为1.0)
数据集地址:
https://aistudio.baidu.com/aistudio/datasetdetail/22392
https://aistudio.baidu.com/aistudio/datasetdetail/23615
class ConvPool(fluid.dygraph.Layer):
'''卷积+池化'''
def __init__(self,
num_channels,
num_filters,
filter_size,
pool_size,
pool_stride,
groups,
pool_padding=0,
pool_type='max',
conv_stride=1,
conv_padding=0,
act=None):
super(ConvPool, self).__init__()
self._conv2d_list = []
for i in range(groups):
conv2d = self.add_sublayer( #返回一个由所有子层组成的列表。
'bb_%d' % i,
fluid.dygraph.Conv2D(
num_channels=num_channels[i], #通道数
num_filters=num_filters, #卷积核个数
filter_size=filter_size, #卷积核大小
stride=conv_stride, #步长
padding=conv_padding, #padding大小,默认为0
act=act)
)
self._conv2d_list.append(conv2d)
self._pool2d = fluid.dygraph.Pool2D(
pool_size=pool_size, #池化核大小
pool_type=pool_type, #池化类型,默认是最大池化
pool_stride=pool_stride, #池化步长
pool_padding=pool_padding #填充大小
)
def forward(self, inputs):
x = inputs
for conv in self._conv2d_list:
x = conv(x)
x = self._pool2d(x)
return x
class VGGNet(fluid.dygraph.Layer):
'''
VGG网络
'''
def __init__(self):
super(VGGNet, self).__init__()
self.c2p1_64 = ConvPool(num_channels=[3,64],num_filters=64,filter_size=3,pool_size=2,pool_stride=2,groups=2,conv_stride=1,conv_padding=1,act='relu')
self.c2p2_128 = ConvPool(num_channels=[64,128],num_filters=128,filter_size=3,pool_size=2,pool_stride=2,groups=2,conv_stride=1,conv_padding=1,act='relu')
self.c3p3_256 = ConvPool(num_channels=[128,256,256],num_filters=256,filter_size=3,pool_size=2,pool_stride=2,groups=3,conv_stride=1,conv_padding=1,act='relu')
self.c3p4_512 = ConvPool(num_channels=[256,512,512],num_filters=512,filter_size=3,pool_size=2,pool_stride=2,groups=3,conv_stride=1,conv_padding=1,act='relu')
self.c3p5_512 = ConvPool(num_channels=[512,512,512],num_filters=512,filter_size=3,pool_size=2,pool_stride=2,groups=3,conv_stride=1,conv_padding=1,act='relu')
self.linear1 = fluid.dygraph.Linear(512*7*7,4096,act='relu')
self.linear2 = fluid.dygraph.Linear(4096,4096,act='relu')
self.linear3 = fluid.dygraph.Linear(4096,2,act='softmax')
def forward(self, inputs):
"""前向计算"""
x = self.c2p1_64(inputs)
x = self.c2p2_128(x)
x = self.c3p3_256(x)
x = self.c3p4_512(x)
x = self.c3p5_512(x)
x = fluid.layers.reshape(x,shape=[-1,512*7*7])
x = self.linear1(x)
x = fluid.layers.dropout(x,dropout_prob=0.5)
x = self.linear2(x)
x = fluid.layers.dropout(x,dropout_prob=0.5)
y = self.linear3(x)
return y
Day 5:PaddleHub体验
【比赛】人流密度检测
数据集地址:https://aistudio.baidu.com/aistudio/datasetdetail/1917
Day 6:PaddleSlim模型压缩
PaddleSlim代码地址: https://github.com/PaddlePaddle/PaddleSlim
文档地址:https://paddlepaddle.github.io/PaddleSlim/
#安装paddleslim
#pip install paddleslim
#1. 导入依赖
import paddle
import paddle.fluid as fluid
import paddleslim as slim
import numpy as np
#2. 构建模型
#该章节构造一个用于对MNIST数据进行分类的分类模型,选用MobileNetV1,并将输入大小设置为[1, 28, 28],输出类别数为10。 为了方便展示示例,我们在paddleslim.models下预定义了用于构建分类模型的方法,执行以下代码构建分类模型:
use_gpu = fluid.is_compiled_with_cuda()
exe, train_program, val_program, inputs, outputs = slim.models.image_classification("MobileNet", [1, 28, 28], 10, use_gpu=use_gpu)
place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace()
#3 定义输入数据
#为了快速执行该示例,我们选取简单的MNIST数据,Paddle框架的paddle.dataset.mnist包定义了MNIST数据的下载和读取。 代码如下:
import paddle.dataset.mnist as reader
train_reader = paddle.batch(
reader.train(), batch_size=128, drop_last=True)
test_reader = paddle.batch(
reader.test(), batch_size=128, drop_last=True)
data_feeder = fluid.DataFeeder(inputs, place)
#4. 训练和测试
#先定义训练和测试函数,正常训练和量化训练时只需要调用函数即可。在训练函数中执行了一个epoch的训练,因为MNIST数据集数据较少,一个epoch就可将top1精度训练到95%以上。
def train(prog):
iter = 0
for data in train_reader():
acc1, acc5, loss = exe.run(prog, feed=data_feeder.feed(data), fetch_list=outputs)
if iter % 100 == 0:
print('train iter={}, top1={}, top5={}, loss={}'.format(iter, acc1.mean(), acc5.mean(), loss.mean()))
iter += 1
def test(prog):
iter = 0
res = [[], []]
for data in test_reader():
acc1, acc5, loss = exe.run(prog, feed=data_feeder.feed(data), fetch_list=outputs)
if iter % 100 == 0:
print('test iter={}, top1={}, top5={}, loss={}'.format(iter, acc1.mean(), acc5.mean(), loss.mean()))
res[0].append(acc1.mean())
res[1].append(acc5.mean())
iter += 1
print('final test result top1={}, top5={}'.format(np.array(res[0]).mean(), np.array(res[1]).mean()))
#调用train函数训练分类网络,train_program是在第2步:构建网络中定义的
train(train_program)
train iter=0, top1=0.0390625, top5=0.46875, loss=2.89760327339
train iter=100, top1=0.9296875, top5=0.9921875, loss=0.196367427707
train iter=200, top1=0.96875, top5=1.0, loss=0.125141501427
train iter=300, top1=0.9609375, top5=0.9921875, loss=0.158306568861
train iter=400, top1=0.9375, top5=1.0, loss=0.178206339478
#调用test函数测试分类网络,val_program是在第2步:构建网络中定义的。
test(val_program)
test iter=0, top1=0.9765625, top5=1.0, loss=0.0672305747867
final test result top1=0.962439894676, top5=0.998597741127
#5. 量化模型
#按照配置在train_program和val_program中加入量化和反量化op.
place = exe.place
# quant_program = #请在次数添加你的代码
# val_quant_program = #请在次数添加你的代码
quant_program = slim.quant.quant_aware(train_program, exe.place, for_test=False)
val_quant_program = slim.quant.quant_aware(val_program, exe.place, for_test=True)
2020-04-05 21:52:28,981-INFO: quant_aware config {'moving_rate': 0.9, 'weight_quantize_type': 'channel_wise_abs_max', 'is_full_quantize': False, 'dtype': 'int8', 'weight_bits': 8, 'window_size': 10000, 'activation_bits': 8, 'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'], 'not_quant_pattern': ['skip_quant'], 'activation_quantize_type': 'moving_average_abs_max', 'for_tensorrt': False}
2020-04-05 21:52:30,032-INFO: quant_aware config {'moving_rate': 0.9, 'weight_quantize_type': 'channel_wise_abs_max', 'is_full_quantize': False, 'dtype': 'int8', 'weight_bits': 8, 'window_size': 10000, 'activation_bits': 8, 'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'], 'not_quant_pattern': ['skip_quant'], 'activation_quantize_type': 'moving_average_abs_max', 'for_tensorrt': False}
#6 训练和测试量化后的模型¶
#微调量化后的模型,训练一个epoch后测试。
train(quant_program)
train iter=0, top1=0.96875, top5=1.0, loss=0.0995958149433
train iter=100, top1=0.96875, top5=1.0, loss=0.0631555095315
train iter=200, top1=0.96875, top5=1.0, loss=0.100415810943
train iter=300, top1=0.9921875, top5=0.9921875, loss=0.0579719617963
train iter=400, top1=0.953125, top5=1.0, loss=0.122109659016
#测试量化后的模型,和3.2 训练和测试中得到的测试结果相比,精度相近,达到了无损量化。
test(val_quant_program)
test iter=0, top1=0.984375, top5=1.0, loss=0.0304987411946
final test result top1=0.973056912422, top5=0.99919873476
三、心得体会
课程难度适中,作业提供baseline,降低了学习的难度。在作业中手写了DNN,LeNet,VGG16等网络,并且通过修改网络结构和参数得到了较高的准确率,是一次很好的实践经历。在课程的学习中,对深度学习的一些概念有了更深的理解,实践的经验对毕业设计也有很大帮助。例如可以通过dropout和正则化等方法解决过拟合问题;模型的准确率不仅仅与网络结构有关,还和batch_size,learning_rate等参数密不可分
四、PaddlePaddle打卡营预告
1、下一期打卡营
课程时间:4月22日
课程内容:python和AI打卡营,适合无人工智能背景,无编程基础、无学习氛围、无计算资源又想好好学习、天天向上的纯小白
课程简介:基础语法、进阶运用、深度学习工具和paddlehub 创意赛
课程报名:关注飞桨PaddlePaddle公众号
2、筹备中打卡营
1、论文复现营:以CV领域检测任务、GAN、视频分类等经典方向的最新顶会论文(如,CVPR)为案例,介绍论文研读、框架搭建、模型优化等方面内容,带你进入科研大军
2、竞赛辅导营:以CV领域研究最应用最广的目标检测为题,讲解目标检测两阶段(R-CNN、SPP-Net、Fast R-CNN、Faster R-CNN、FPN、Mask-RCNN)和一阶段方法(YOLO、SSD),介绍大赛的塞梯解读、数据分析、模型选取、模型优化、后处理调优的方面经验知识