# coding: utf-8
# # 食物图片分类
#
# ## 友情提示
# 同学们可以前往课程作业区先行动手尝试!!!
#
# ## 项目描述
# 训练一个简单的卷积神经网络,实现食物图片的分类。
#
# ## 数据集介绍
# 本次使用的数据集为food-11数据集,共有11类
#
# Bread, Dairy product, Dessert, Egg, Fried food, Meat, Noodles/Pasta, Rice, Seafood, Soup, and Vegetable/Fruit.
# (面包,乳制品,甜点,鸡蛋,油炸食品,肉类,面条/意大利面,米饭,海鲜,汤,蔬菜/水果)
# Training set: 9866张
# Validation set: 3430张
# Testing set: 3347张
#
# **数据格式**
# 下载 zip 档后解压缩会有三个资料夹,分别为training、validation 以及 testing
# training 以及 validation 中的照片名称格式为 [类别]_[编号].jpg,例如 3_100.jpg 即为类别 3 的照片(编号不重要)
#
# ## 项目要求
# * 请使用 CNN 搭建 model
# * 不能使用额外 dataset
# * 禁止使用 pre-trained model(只能自己手写CNN)
# * 请不要上网寻找 label
# ## 数据准备
# In[ ]:
# ## 环境配置/安装
#
# 无
# In[ ]:
# # 定义数据集
# 在 paddle 中,我们可以利用 paddle.io 的 Dataset 及 DataLoader 来"包装"数据,使后续的训练及预测更为方便。
# Dataset 需要 overload 两个函数:__len__ 及 __getitem__
# __len__ 必须要回传 dataset 的大小,而 __getitem__ 则定义了当函数利用 [idx] 取值时,数据集应该要怎么回传数据。
# 实际上我们并不会直接使用到这两个函数,但是使用 DataLoader 在 enumerate Dataset 时会使用到,没有做的话会在运行阶段出现错误。
# In[ ]:
# Import需要的套件
import os
import cv2
import time
import numpy as np
import paddle
from paddle.io import Dataset, DataLoader
from paddle.nn import Sequential, Conv2D, BatchNorm2D, ReLU, MaxPool2D, Linear, Flatten
from paddle.vision.transforms import Compose, Transpose, RandomRotation, RandomHorizontalFlip, Normalize, Resize
# 分配GPU设备
place = paddle.CUDAPlace(0)
paddle.disable_static(place)
paddle.__version__
# In[ ]:
class FoodDataset(Dataset):
def __init__(self, image_path, image_size=(128, 128), mode='train'):
self.image_path = image_path
self.image_file_list = sorted(os.listdir(image_path))
self.mode = mode
# training 时做 data augmentation
self.train_transforms = Compose([
Resize(size=image_size),
RandomHorizontalFlip(),
RandomRotation(15),
Transpose(),
Normalize(mean=127.5, std=127.5)
])
# testing 时不需做 data augmentation
self.test_transforms = Compose([
Resize(size=image_size),
Transpose(),
Normalize(mean=127.5, std=127.5)
])
def __len__(self):
return len(self.image_file_list)
def __getitem__(self, idx):
img = cv2.imread(os.path.join(self.image_path, self.image_file_list[idx]))
if self.mode == 'train':
img = self.train_transforms(img)
label = int(self.image_file_list[idx].split("_")[0])
return img, label
else:
img = self.test_transforms(img)
return img
# In[ ]:
batch_size = 128
traindataset = FoodDataset('work/food-11/training')
valdataset = FoodDataset('work/food-11/validation')
train_loader = DataLoader(traindataset, places=paddle.CUDAPlace(0), batch_size=batch_size, shuffle=True, drop_last=True)
val_loader = DataLoader(valdataset, places=paddle.CUDAPlace(0), batch_size=batch_size, shuffle=False, drop_last=True)
# # 模型结构
#
# 卷积神经网络时常使用“Conv+BN+激活+池化”作为一个基础block,我们可以将多个block堆叠在一起,进行特征提取,最后连接一个Linear层,实现图片分类。
# In[ ]:
class Classifier(paddle.nn.Layer):
def __init__(self):
super(Classifier, self).__init__()
# input 维度 [3, 128, 128]
self.cnn = Sequential(
Conv2D(3, 64, 3, 1, 1), # [64, 128, 128]
BatchNorm2D(64),
ReLU(),
MaxPool2D(2, 2, 0), # [64, 64, 64]
Conv2D(64, 128, 3, 1, 1), # [128, 64, 64]
BatchNorm2D(128),
ReLU(),
MaxPool2D(2, 2, 0), # [128, 32, 32]
Conv2D(128, 256, 3, 1, 1), # [256, 32, 32]
BatchNorm2D(256),
ReLU(),
MaxPool2D(2, 2, 0), # [256, 16, 16]
Conv2D(256, 512, 3, 1, 1), # [512, 16, 16]
BatchNorm2D(512),
ReLU(),
MaxPool2D(2, 2, 0), # [512, 8, 8]
Conv2D(512, 512, 3, 1, 1), # [512, 8, 8]
BatchNorm2D(512),
ReLU(),
MaxPool2D(2, 2, 0), # [512, 4, 4]
)
self.fc = Sequential(
Linear(512 * 4 * 4, 1024),
ReLU(),
Linear(1024, 512),
ReLU(),
Linear(512, 11)
)
def forward(self, x):
x = self.cnn(x)
x = x.flatten(start_axis=1)
x = self.fc(x)
return x
# In[ ]:
# 查看模型结构
my_model = paddle.Model(Classifier())
my_model.summary((-1, 3, 128, 128))
# # 模型训练
# 使用训练数据集训练,并使用验证数据集寻找好的参数
# In[ ]:
epoch_num = 30
learning_rate = 0.001
model = Classifier()
loss = paddle.nn.loss.CrossEntropyLoss() # 因为是分类任务,所以 loss 使用 CrossEntropyLoss
optimizer = paddle.optimizer.Adam(learning_rate=learning_rate, parameters=model.parameters()) # optimizer 使用 Adam
# In[ ]:
# In[ ]:
print('start training...')
for epoch in range(epoch_num):
epoch_start_time = time.time()
train_acc = 0.0
train_loss = 0.0
val_acc = 0.0
val_loss = 0.0
# 模型训练
model.train()
for img, label in train_loader():
optimizer.clear_grad()
pred = model(img)
step_loss = loss(pred, label)
step_loss.backward()
optimizer.step()
train_acc += np.sum(np.argmax(pred.numpy(), axis=1) == label.numpy())
train_loss += step_loss.numpy()[0]
# 模型验证
model.eval()
for img, label in val_loader():
pred = model(img)
step_loss = loss(pred, label)
val_acc += np.sum(np.argmax(pred.numpy(), axis=1) == label.numpy())
val_loss += step_loss.numpy()[0]
# 将结果打印出来
print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % (
epoch + 1, epoch_num, time.time() - epoch_start_time, train_acc / traindataset.__len__(),
train_loss / traindataset.__len__(), val_acc / valdataset.__len__(), val_loss / valdataset.__len__()))
num = epoch
paddle.save(model.state_dict(), 'model_%d.pdparams' % num)
paddle.save(optimizer.state_dict(), 'optimizer_%d.pdopt' % num)
# 得到好的参数后,我们使用训练数据和验证数据共同训练(数据量变多,模型效果较好)
traindataset = FoodDataset('work/food-11/train_val')
train_loader = DataLoader(traindataset, places=paddle.CUDAPlace(0), batch_size=batch_size, shuffle=True, drop_last=True)
# In[ ]:
epoch_num = 30
learning_rate = 0.001
model_best = Classifier()
loss = paddle.nn.loss.CrossEntropyLoss() # 因为是分类任务,所以 loss 使用 CrossEntropyLoss
optimizer = paddle.optimizer.Adam(learning_rate=learning_rate, parameters=model_best.parameters()) # optimizer 使用 Adam
# # 加载模型
# In[ ]:
layer_state_dict = paddle.load('model_4.pdparams')
opt_state_dict = paddle.load('optimizer_4.pdopt')
model_best.set_state_dict(layer_state_dict)
optimizer.set_state_dict(opt_state_dict)
# In[ ]:
print('start training...')
for epoch in range(epoch_num):
epoch_start_time = time.time()
train_acc = 0.0
train_loss = 0.0
val_acc = 0.0
val_loss = 0.0
# 模型训练
model_best.train()
for img, label in train_loader():
# optimizer.clear_grad()
pred = model_best(img)
step_loss = loss(pred, label)
step_loss.backward()
optimizer.step()
train_acc += np.sum(np.argmax(pred.numpy(), axis=1) == label.numpy())
train_loss += step_loss.numpy()[0]
# 将结果打印出来
print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f' % (
epoch + 1, epoch_num, time.time() - epoch_start_time, train_acc / traindataset.__len__(),
train_loss / traindataset.__len__()))
# # 测试
# 利用刚刚训练好的模型进行预测
# In[ ]:
batch_size = 128
testdataset = FoodDataset('work/food-11/testing', mode='test')
test_loader = DataLoader(testdataset, places=paddle.CUDAPlace(0), batch_size=batch_size, shuffle=False, drop_last=True)
# In[ ]:
prediction = list()
model_best.eval()
for img in test_loader():
pred = model_best(img[0])
test_label = np.argmax(pred.numpy(), axis=1)
for y in test_label:
prediction.append(y)
# In[ ]:
# 将结果写入CSV文件
with open('work/predict.csv', 'w') as f:
f.write('Id,Category\n')
for i, y in enumerate(prediction):
f.write('{},{}\n'.format(i, y))