此次资料为网络上收集到的食物照片,共11类。
( 1 ) B r e a d , ( 2 ) D a i r y p r o d u c t , ( 3 ) D e s s e r t , ( 4 ) E g g , ( 5 ) F r i e d f o o d , (1)Bread,(2) Dairy product, (3)Dessert, (4)Egg, (5)Fried food, (1)Bread,(2)Dairyproduct,(3)Dessert,(4)Egg,(5)Friedfood,
( 6 ) M e a t , ( 7 ) N o o d l e s / P a s t a , ( 8 ) R i c e , ( 9 ) S e a f o o d , ( 10 ) S o u p , ( 11 ) V e g e t a b l e / F r u i t . (6)Meat, (7)Noodles/Pasta, (8)Rice, (9)Seafood, (10)Soup, (11)Vegetable/Fruit. (6)Meat,(7)Noodles/Pasta,(8)Rice,(9)Seafood,(10)Soup,(11)Vegetable/Fruit.
数据集:
目标:
使用 C N N CNN CNN对图片进行分类
(以下代码是参照官方代码手敲的,与官方代码只存在小部分不同)
import os
import torch
import cv2
import numpy as np
import torchvision.transforms as transforms
from torch.utils.data import Dataset,DataLoader
import torch.nn as nn
import time
读取数据的函数
def readfile(path,label):
'''
path:文件所在文件夹
labrl:bool类型,是否返回数据集标签
'''
#获取文件夹下的文件名
img_dir = sorted(os.listdir(path))
#装样本的x,len(img_dir)张128*128的彩色图片 (m,N_W,N_H,N_C)
x = np.zeros((len(img_dir),128,128,3),dtype=np.uint8)
y = np.zeros((len(img_dir)),dtype=np.uint8)
for i,file in enumerate(img_dir):
#把文件夹名和文件名拼在一起读取文件
img = cv2.imread(os.path.join(path,file))
x[i,:,:] = cv2.resize(img,(128,128))
if label:
y[i] = int(file.split("_")[0])#图片命名格式:分类_图片序号,如0_1.jpg 第0类的第1张图片
if label:
return x,y
else:
return x
读取数据
img_dir = 'Dataset/food-11'#文件夹位置
print('Reading data')
train_x,train_y = readfile(os.path.join(img_dir,"training"),True)
print("Size of training data={}".format(len(train_x)))
val_x,val_y = readfile(os.path.join(img_dir,"validation"),True)
print("Size of validation data={}".format(len(val_x)))
test_x = readfile(os.path.join(img_dir,"testing"),False)
print("Size of testing data={}".format(len(test_x)))
输出结果:
Reading data
Size of training data=9866
Size of validation data=3431
Size of testing data=3347
#定义transforms操作
#训练集数据的操作
train_trasform = transforms.Compose([
transforms.ToPILImage(),
#数据增强
transforms.RandomHorizontalFlip(),#水平翻转
transforms.RandomRotation(15),#随机旋转15°
transforms.ToTensor(),
])
#测试集数据不需要进行数据增强
test_transform = transforms.Compose([
transforms.ToPILImage(),
transforms.ToTensor(),
])
#继承Dataset来获取数据
class ImgDataset(Dataset):
def __init__(self,x,y=None,transform=None):
self.x = x
self.y = y
if y is not None:
self.y = torch.LongTensor(y)#转成tensor类型
self.transform = transform
def __len__(self):
return len(self.x)
#按下标获取数据
def __getitem__(self, item):
X = self.x[item]
if self.transform is not None:
X = self.transform(X)
if self.y is not None:
Y = self.y[item]
return X,Y
else:
return X
使用Dataloder进行抽样
batch_size = 128
train_set = ImgDataset(train_x,train_y,train_trasform)
val_set = ImgDataset(val_x,val_y,test_transform)
train_loder = DataLoader(train_set,batch_size=batch_size,shuffle=True)
val_loder = DataLoader(val_set,batch_size=batch_size,shuffle=False)
通道数 N C N_C NC不断变多, N h 、 N w N_h、N_w Nh、Nw不断减少。
class Classifier(nn.Module):
def __init__(self) -> None:
super().__init__()
#input维度:[3,128,128] tensor类型是通道数在前的
self.cnn = nn.Sequential(
nn.Conv2d(in_channels=3,out_channels=64,kernel_size=3,stride=1,padding=1), #输出维度:[64,128,128]
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2,2,0),#输出维度:[64,64,64]
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1), # 输出维度:[128,64,64]
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # 输出维度:[128,32,32]
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1), # 输出维度:[256,32,32]
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # 输出维度:[256,16,16]
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1), # 输出维度:[256,16,16]
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # 输出维度:[512,8,8]
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), # 输出维度:[512,8,8]
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # 输出维度:[512,4,4]
)
self.fc = nn.Sequential(
nn.Linear(512*4*4,1024),
nn.ReLU(),
nn.Linear(1024,512),
nn.ReLU(),
nn.Linear(512,11)
)
def forward(self, x):
out = self.cnn(x)
out = out.view(out.size()[0],-1)#按照第一个维度平铺,即flatten()
return self.fc(out)
model = Classifier()
#判断是否有cuda
if torch.cuda.is_available():
model = model.cuda()
loss = nn.CrossEntropyLoss()
if torch.cuda.is_available():
loss = loss.cuda()
#定义优化器
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
num_epoch = 30
for epoch in range(num_epoch):
epoch_start_time = time.time()
train_acc = 0.0
train_loss = 0.0
val_acc = 0.0
val_loss = 0.0
model.train()#在使用了Dropout、BatchNormal时不用省略,作用是确保是在训练模型
for data in train_loder:
img,targets = data
if torch.cuda.is_available():
img = img.cuda()
targets = targets.cuda()
optimizer.zero_grad()#梯度清零
train_pred = model(img)#获得输出
batch_loss = loss(train_pred,targets)#计算损失函数
batch_loss.backward()#反向传播
optimizer.step()#执行一次梯度下降
train_acc += (train_pred.argmax(1)==targets).sum()#计算准确率
train_loss += batch_loss.item()
model.eval()#表示开启测试,后续代码是进行验证集训练的(使用验证集选择参数)
with torch.no_grad():#确保梯度不会改变
for data in val_loder:
img, targets = data
if torch.cuda.is_available():
img = img.cuda()
targets = targets.cuda()
val_pred = model(img)
batch_loss = loss(val_pred,targets)
val_acc += (val_pred.argmax(1) == targets).sum()
val_loss += batch_loss.item()
print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % (epoch + 1, num_epoch, time.time() - epoch_start_time, train_acc / train_set.__len__(), train_loss / train_set.__len__(), val_acc / val_set.__len__(),val_loss / val_set.__len__()))
运行结果:
[001/030] 23.66 sec(s) Train Acc: 0.236063 Loss: 0.017809 | Val Acc: 0.213932 loss: 0.017797
[002/030] 23.69 sec(s) Train Acc: 0.347963 Loss: 0.014709 | Val Acc: 0.323521 loss: 0.015096
[003/030] 23.48 sec(s) Train Acc: 0.397730 Loss: 0.013572 | Val Acc: 0.293209 loss: 0.019180
[004/030] 23.88 sec(s) Train Acc: 0.445064 Loss: 0.012603 | Val Acc: 0.445351 loss: 0.012793
[005/030] 23.58 sec(s) Train Acc: 0.488547 Loss: 0.011668 | Val Acc: 0.476246 loss: 0.011889
[006/030] 23.53 sec(s) Train Acc: 0.533448 Loss: 0.010752 | Val Acc: 0.495482 loss: 0.011747
[007/030] 23.54 sec(s) Train Acc: 0.557369 Loss: 0.010105 | Val Acc: 0.476246 loss: 0.012213
[008/030] 23.52 sec(s) Train Acc: 0.572066 Loss: 0.009716 | Val Acc: 0.519382 loss: 0.011405
[009/030] 23.55 sec(s) Train Acc: 0.595682 Loss: 0.009187 | Val Acc: 0.556398 loss: 0.010150
[010/030] 23.83 sec(s) Train Acc: 0.634502 Loss: 0.008349 | Val Acc: 0.521714 loss: 0.011477
[011/030] 23.56 sec(s) Train Acc: 0.641496 Loss: 0.008131 | Val Acc: 0.587292 loss: 0.010066
[012/030] 23.58 sec(s) Train Acc: 0.675451 Loss: 0.007482 | Val Acc: 0.489070 loss: 0.012889
[013/030] 23.54 sec(s) Train Acc: 0.681431 Loss: 0.007245 | Val Acc: 0.552026 loss: 0.011332
[014/030] 23.50 sec(s) Train Acc: 0.693493 Loss: 0.007033 | Val Acc: 0.575634 loss: 0.010282
[015/030] 23.59 sec(s) Train Acc: 0.701906 Loss: 0.006724 | Val Acc: 0.610318 loss: 0.009404
[016/030] 23.86 sec(s) Train Acc: 0.720454 Loss: 0.006281 | Val Acc: 0.572428 loss: 0.011218
[017/030] 23.58 sec(s) Train Acc: 0.744172 Loss: 0.005858 | Val Acc: 0.606529 loss: 0.010286
[018/030] 23.60 sec(s) Train Acc: 0.749443 Loss: 0.005562 | Val Acc: 0.602157 loss: 0.010116
[019/030] 23.53 sec(s) Train Acc: 0.771032 Loss: 0.005292 | Val Acc: 0.574177 loss: 0.011388
[020/030] 23.52 sec(s) Train Acc: 0.751976 Loss: 0.005733 | Val Acc: 0.615855 loss: 0.010243
[021/030] 23.87 sec(s) Train Acc: 0.751368 Loss: 0.005585 | Val Acc: 0.638298 loss: 0.009475
[022/030] 23.67 sec(s) Train Acc: 0.788364 Loss: 0.004805 | Val Acc: 0.637424 loss: 0.009800
[023/030] 23.61 sec(s) Train Acc: 0.802858 Loss: 0.004554 | Val Acc: 0.605946 loss: 0.011229
[024/030] 23.54 sec(s) Train Acc: 0.803061 Loss: 0.004387 | Val Acc: 0.663072 loss: 0.009186
[025/030] 23.52 sec(s) Train Acc: 0.828299 Loss: 0.003794 | Val Acc: 0.618770 loss: 0.011051
[026/030] 23.61 sec(s) Train Acc: 0.840969 Loss: 0.003588 | Val Acc: 0.657826 loss: 0.010324
[027/030] 23.83 sec(s) Train Acc: 0.843098 Loss: 0.003669 | Val Acc: 0.665112 loss: 0.009451
[028/030] 23.64 sec(s) Train Acc: 0.838334 Loss: 0.003664 | Val Acc: 0.659866 loss: 0.010017
[029/030] 23.57 sec(s) Train Acc: 0.867119 Loss: 0.003018 | Val Acc: 0.642087 loss: 0.010967
[030/030] 23.55 sec(s) Train Acc: 0.869653 Loss: 0.002892 | Val Acc: 0.659575 loss: 0.010865
合并数据集
train_val_x = np.concatenate((train_x,val_x),axis=0)
train_val_y = np.concatenate((train_y,val_y),axis=0)
train_val_set = ImgDataset(train_val_x,train_val_y,train_trasform)
train_val_lodaer = DataLoader(train_val_set,batch_size=batch_size,shuffle=True)
训练模型
model_best = Classifier()
if torch.cuda.is_available():
model_best = model_best.cuda()
loss = nn.CrossEntropyLoss()
if torch.cuda.is_available():
loss = loss.cuda()
optimizer = torch.optim.Adam(model_best.parameters(),lr=0.001)
num_epoch = 30
for epoch in range(num_epoch):
epoch_start_time = time.time()
train_acc = 0.0
train_loss = 0.0
for data in train_val_lodaer:
img,targets = data
if torch.cuda.is_available():
img = img.cuda()
targets = targets.cuda()
optimizer.zero_grad()
train_pred = model_best(img)
batch_loss = loss(train_pred,targets)
batch_loss.backward()
optimizer.step()
train_acc += (train_pred.argmax(1) == targets).sum()
train_loss += batch_loss.item()
print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f' % (epoch + 1, num_epoch, time.time()-epoch_start_time, train_acc/train_val_set.__len__(), train_loss/train_val_set.__len__()))
输出结果:
[001/030] 28.60 sec(s) Train Acc: 0.264345 Loss: 0.016747
[002/030] 28.16 sec(s) Train Acc: 0.385952 Loss: 0.013655
[003/030] 28.05 sec(s) Train Acc: 0.442055 Loss: 0.012396
[004/030] 28.14 sec(s) Train Acc: 0.500338 Loss: 0.011158
[005/030] 28.14 sec(s) Train Acc: 0.542228 Loss: 0.010296
[006/030] 28.39 sec(s) Train Acc: 0.587877 Loss: 0.009280
[007/030] 28.09 sec(s) Train Acc: 0.610062 Loss: 0.008759
[008/030] 28.00 sec(s) Train Acc: 0.633602 Loss: 0.008285
[009/030] 28.09 sec(s) Train Acc: 0.666316 Loss: 0.007510
[010/030] 28.10 sec(s) Train Acc: 0.689554 Loss: 0.006984
[011/030] 28.34 sec(s) Train Acc: 0.708957 Loss: 0.006621
[012/030] 28.10 sec(s) Train Acc: 0.717906 Loss: 0.006296
[013/030] 28.11 sec(s) Train Acc: 0.743927 Loss: 0.005802
[014/030] 28.04 sec(s) Train Acc: 0.747913 Loss: 0.005656
[015/030] 28.07 sec(s) Train Acc: 0.770775 Loss: 0.005209
[016/030] 28.39 sec(s) Train Acc: 0.784538 Loss: 0.004807
[017/030] 28.03 sec(s) Train Acc: 0.790629 Loss: 0.004639
[018/030] 28.09 sec(s) Train Acc: 0.815673 Loss: 0.004161
[019/030] 28.04 sec(s) Train Acc: 0.821915 Loss: 0.004001
[020/030] 28.05 sec(s) Train Acc: 0.833496 Loss: 0.003724
[021/030] 28.40 sec(s) Train Acc: 0.838535 Loss: 0.003553
[022/030] 28.06 sec(s) Train Acc: 0.850718 Loss: 0.003346
[023/030] 28.03 sec(s) Train Acc: 0.867639 Loss: 0.002950
[024/030] 28.06 sec(s) Train Acc: 0.875611 Loss: 0.002728
[025/030] 28.02 sec(s) Train Acc: 0.884260 Loss: 0.002583
[026/030] 28.34 sec(s) Train Acc: 0.892758 Loss: 0.002402
[027/030] 28.06 sec(s) Train Acc: 0.897646 Loss: 0.002194
[028/030] 28.01 sec(s) Train Acc: 0.912762 Loss: 0.001959
[029/030] 27.98 sec(s) Train Acc: 0.921486 Loss: 0.001751
[030/030] 28.33 sec(s) Train Acc: 0.917951 Loss: 0.001820
torch.save(model_best.state_dict(),'model_best.pth')
pred_dict = {
0:'Bread',1:'Dairy product', 2:'Dessert',3:'Egg',4:'Fried food',5:'Meat',6:'Noodles/Pasta',7:'Rice',8:'Seafood',9:'Soup',10:'Vegetable/Fruit'
}
img = test_x[10]
cv2_imshow(img)
img = test_transform(img).unsqueeze(0)#进行transform操作并添加一个维度
img = img.cuda()
test_pred = model_best(img)
print(pred_dict[test_pred.argmax(axis=1).item()])
import os
import torch
import cv2
import numpy as np
import torchvision.transforms as transforms
from torch.utils.data import Dataset,DataLoader
import torch.nn as nn
import time
def readfile(path,label):
'''
path:文件所在文件夹
labrl:bool类型,是否返回数据集标签
'''
#获取文件夹下的文件名
img_dir = sorted(os.listdir(path))
#装样本的x,len(img_dir)张128*128的彩色图片 (m,N_W,N_H,N_C)
x = np.zeros((len(img_dir),128,128,3),dtype=np.uint8)
y = np.zeros((len(img_dir)),dtype=np.uint8)
for i,file in enumerate(img_dir):
#把文件夹名和文件名拼在一起读取文件
img = cv2.imread(os.path.join(path,file))
x[i,:,:] = cv2.resize(img,(128,128))
if label:
y[i] = int(file.split("_")[0])
if label:
return x,y
else:
return x
img_dir = '/content/gdrive/MyDrive/ColabNotebooks/Dataset/food-11'#文件夹位置
print('Reading data')
train_x,train_y = readfile(os.path.join(img_dir,"training"),True)
print("Size of training data={}".format(len(train_x)))
val_x,val_y = readfile(os.path.join(img_dir,"validation"),True)
print("Size of validation data={}".format(len(val_x)))
test_x = readfile(os.path.join(img_dir,"testing"),False)
print("Size of testing data={}".format(len(test_x)))
train_trasform = transforms.Compose([
transforms.ToPILImage(),
#数据增强
transforms.RandomHorizontalFlip(),#水平翻转
transforms.RandomRotation(15),#随机旋转15°
transforms.ToTensor(),
])
#测试集数据不需要进行数据增强
test_transform = transforms.Compose([
transforms.ToPILImage(),
transforms.ToTensor(),
])
#继承Dataset来获取数据
class ImgDataset(Dataset):
def __init__(self,x,y=None,transform=None):
self.x = x
self.y = y
if y is not None:
self.y = torch.LongTensor(y)#转成tensor类型
self.transform = transform
def __len__(self):
return len(self.x)
def __getitem__(self, item):
X = self.x[item]
if self.transform is not None:
X = self.transform(X)
if self.y is not None:
Y = self.y[item]
return X,Y
else:
return X
batch_size = 128
train_set = ImgDataset(train_x,train_y,train_trasform)
val_set = ImgDataset(val_x,val_y,test_transform)
train_loder = DataLoader(train_set,batch_size=batch_size,shuffle=True)
val_loder = DataLoader(val_set,batch_size=batch_size,shuffle=False)
class Classifier(nn.Module):
def __init__(self) -> None:
super().__init__()
# input维度:[3,128,128] tensor类型是通道数在前的
self.cnn = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1), # 输出维度:[64,128,128]
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # 输出维度:[64,64,64]
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1), # 输出维度:[128,64,64]
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # 输出维度:[128,32,32]
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1), # 输出维度:[256,32,32]
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # 输出维度:[256,16,16]
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1), # 输出维度:[256,16,16]
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # 输出维度:[512,8,8]
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), # 输出维度:[512,8,8]
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # 输出维度:[512,4,4]
)
self.fc = nn.Sequential(
nn.Linear(512 * 4 * 4, 1024),
nn.ReLU(),
nn.Linear(1024, 512),
nn.ReLU(),
nn.Linear(512, 11)
)
def forward(self, x):
out = self.cnn(x)
out = out.view(out.size()[0], -1)
return self.fc(out)
model = Classifier()
if torch.cuda.is_available():
model = model.cuda()
loss = nn.CrossEntropyLoss()
if torch.cuda.is_available():
loss = loss.cuda()
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
num_epoch = 30
for epoch in range(num_epoch):
epoch_start_time = time.time()
train_acc = 0.0
train_loss = 0.0
val_acc = 0.0
val_loss = 0.0
model.train()#在使用了Dropout、BatchNormal时不用省略,作用是确保是在训练模型
for data in train_loder:
img,targets = data
if torch.cuda.is_available():
img = img.cuda()
targets = targets.cuda()
optimizer.zero_grad()#梯度清零
train_pred = model(img)#获得输出
batch_loss = loss(train_pred,targets)
batch_loss.backward()
optimizer.step()#执行梯度下降
train_acc += (train_pred.argmax(1)==targets).sum()
train_loss += batch_loss.item()
model.eval()#表示开启测试,后续代码是进行验证集训练的
with torch.no_grad():
for data in val_loder:
img, targets = data
if torch.cuda.is_available():
img = img.cuda()
targets = targets.cuda()
val_pred = model(img)
batch_loss = loss(val_pred,targets)
val_acc += (val_pred.argmax(1) == targets).sum()
val_loss += batch_loss.item()
print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % (epoch + 1, num_epoch, time.time() - epoch_start_time, train_acc / train_set.__len__(), train_loss / train_set.__len__(), val_acc / val_set.__len__(),val_loss / val_set.__len__()))
train_val_x = np.concatenate((train_x,val_x),axis=0)
train_val_y = np.concatenate((train_y,val_y),axis=0)
train_val_set = ImgDataset(train_val_x,train_val_y,train_trasform)
train_val_lodaer = DataLoader(train_val_set,batch_size=batch_size,shuffle=True)
model_best = Classifier()
if torch.cuda.is_available():
model_best = model_best.cuda()
loss = nn.CrossEntropyLoss()
if torch.cuda.is_available():
loss = loss.cuda()
optimizer = torch.optim.Adam(model_best.parameters(),lr=0.001)
num_epoch = 30
for epoch in range(num_epoch):
epoch_start_time = time.time()
train_acc = 0.0
train_loss = 0.0
for data in train_val_lodaer:
img,targets = data
if torch.cuda.is_available():
img = img.cuda()
targets = targets.cuda()
optimizer.zero_grad()
train_pred = model_best(img)
batch_loss = loss(train_pred,targets)
batch_loss.backward()
optimizer.step()
train_acc += (train_pred.argmax(1) == targets).sum()
train_loss += batch_loss.item()
print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f' % (epoch + 1, num_epoch, time.time()-epoch_start_time, train_acc/train_val_set.__len__(), train_loss/train_val_set.__len__()))
torch.save(model_best.state_dict(),'model_best.pth')