除了简单的使用CNN,我们还可以进行数据增强来提高模型的性能
给出的训练集,有一部分是未标记的,需要我们自己标记并使用,在3.1会讲解
输入一张图片,输出食物类别
这个分类的过程大致可以描述为下面的图:
我们有一张图,这张图输入我们设定的CNN的网络,最后一层的输出一定是11(因为我们有是一个类别),输出经过一个softmax得到分类结果
训练集有280*11
张标记的图片,因为每个类别280张,一共11个类别,还有6786个没有被标记的图片(需要我们用自己用已有的数据训练model之后去标记)
验证集有30*11
张标记的图片,同理,每个类别30张
测试集 3347张未标记的图片需要我们去分类
这里用到半监督学习的方法,我们使用已经被标记的图片来训练网络,然后用训练得到的网络来预测训练集未标记的图像,给他们添加标记,但是这个标记不一定正确,然后我们把原本被标记的和我们自己标记的图片一起再用作输入去训练我们的模型,再对未标记的图像做预测,循环下来,我们或许就能训练出很好的模型。
**并不是所有我们标记的图片都可以用来训练!**因为如果标记的是错的,那么用来训练可能会误导我们的模型,所以,当我们的预测非常确定说这个图片就是哪一类的时候,我们再将它作为训练集。比如下图,softmax之后第七类的可能性明显高于其他类,那么我们就可以给图片一个标记并加入训练集。但如果某个图像预测的分类结果比较norm(每一类都是0.1左右),那我们就不能确定它是哪一类,也就不能加入训练集。
# 数据增强 trainsforms.Compose
# 只需要在训练集使用数据增强 测试集调整到统一的size即可
train_tfm = transforms.Compose([
transforms.ToPILImage(),
#transforms.Resize((128, 128)),
transforms.RandomHorizontalFlip(), # 随机将图片水平翻转
transforms.RandomRotation(15), # 随机旋转图片
transforms.ToTensor(), # 将数据转换为tensor
])
test_tfm = transforms.Compose([
transforms.ToPILImage(),
#transforms.Resize((128, 128)),
transforms.ToTensor()
])
root = './2021/food-11/'
# 初始化Dataset
# 读取数据到内存
# 根据文件名切割 label
class FoodDataset(Dataset):
def __init__(self, path, trans, mode='train'):
super(FoodDataset).__init__()
self.mode = mode
self.trans = trans
self.image_dir = sorted(os.listdir(path))
self.x = np.zeros((len(self.image_dir), 128, 128, 3), dtype=np.uint8)
self.y = np.zeros((len(self.image_dir)), dtype=np.uint8)
for i, file in enumerate(self.image_dir):
# print(os.path.join((path, file)))
# cv2得到的返回值是数组np.array()
img = cv2.imread(path+file)
self.x[i, :, :] = cv2.resize(img, (128, 128))
if mode in ['train', 'dev']:
self.y[i] = int(file.split("_")[0])
print('Finishing read {} set from FoodDataset, {} samples load'.format(self.mode, len(self.image_dir)))
def __len__(self):
return len(self.image_dir)
def __getitem__(self, idx):
tran_x = self.x[idx]
if self.trans is not None:
tran_x = self.trans(tran_x)
if self.mode in ['train', 'dev']:
# print(tran_x, self.y[idx])
return tran_x, self.y[idx]
else:
return tran_x
batch_size = 128
# 初始化DataLoader
def dataloader(path, trans, mode = 'train', batch_size=batch_size):
dataset = FoodDataset(path, trans, mode)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=(mode=='train'))
return dataloader
# 得到训练集 验证集 测试集
tr_set = dataloader(os.path.join(root, 'training/labeled/'), train_tfm, 'train')
tt_set = dataloader(os.path.join(root, 'testing/00/'), test_tfm, mode='test')
dev_set = dataloader(os.path.join(root, 'validation/'), test_tfm, mode='dev')
# model
class Classifier(nn.Module):
def __init__(self):
super(Classifier, self).__init__()
# input dim [3, 128, 128]
self.cnn_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, 1, 1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0),
nn.Conv2d(64, 128, 3, 1, 1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0),
nn.Conv2d(128, 256, 3, 1, 1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(4, 4, 0),
)
self.fc_layers = nn.Sequential(
nn.Linear(256 * 8 * 8, 256),
nn.ReLU(),
nn.Linear(256, 256),
nn.ReLU(),
nn.Linear(256, 11)
)
self.criterion = nn.CrossEntropyLoss()
def forward(self, x):
x = self.cnn_layers(x)
x = x.flatten(1)
x = self.fc_layers(x)
return x
def cal_loss(self, target, y):
return self.criterion(target, y)
def get_device():
device = "cuda" if torch.cuda.is_available() else "cpu"
return device
device = get_device()
os.makedirs('models', exist_ok=True) # 保存训练模型
model = Classifier().to(device)
config = {
'n_epochs': 3000,
'batch_size' : 120,
'optimizer': 'Adam',
'opt_params': {
'lr': 0.0001, # 学习率
},
'save_path': 'model.pth',
'early_stop': 200,
'val_traio': 0.2
}
# 定义验证函数
def dev(d_set, model, device):
model.eval()
total_loss = 0
val_acc = 0
for x, y in d_set:
x, y = x.to(device), y.to(device)
with torch.no_grad():
pred = model(x)
loss = model.cal_loss(pred, y)
total_loss += loss.detach().cpu().item() * len(x)
val_acc += np.sum(np.argmax(pred.detach().cpu().numpy(), axis=1) == y[1].detach().cpu().numpy()) # 计算准确率
return total_loss, val_acc
这部分我还没做出来,后面更
def train(tr_set, d_set, model,config, device):
record_loss = {'train': [], 'dev': []}
n_epochs = config['n_epochs']
optimizer = getattr(torch.optim, config['optimizer'])(model.parameters(), **config['opt_params'])
early_cnt = 0
min_loss = 20000
epoch = 0
train_loss = 0
train_acc = 0
while epoch < n_epochs:
model.train()
for x, y in tr_set:
x, y = x.to(device), y.to(device)
optimizer.zero_grad()
pred = model(x)
#print(pred, y[1])
t_loss = model.cal_loss(pred, y)
t_loss.backward()
optimizer.step()
record_loss['train'].append(t_loss.detach().cpu().item())
train_loss += t_loss.detach().cpu().item()
train_acc += np.sum(np.argmax(pred.detach().cpu().numpy(), axis=1) == y[1].detach().cpu().numpy())
dev_loss, val_acc = dev(d_set, model, device)
print('Train loss:{} acc:{}, Val loss:{} acc:{}'.format(train_loss/ tr_set.__len__(), train_acc / tr_set.__len__(), dev_loss / dev_set.__len__(), val_acc / dev_set.__len__()))
if dev_loss < min_loss:
min_loss = dev_loss
np.save(model.state_dict(), 'model.pth')
print('Saving model (epoch = {:4d}, loss = {:.4f})'
.format(epoch + 1, min_loss))
early_cnt = 0
else:
early_cnt += 1
epoch += 1
record_loss['dev'].append(dev_loss)
if early_cnt > config['early_stop']:
break
return min_loss, record_loss
# 开始训练
min_loss, record_loss = train(tr_set, dev_set, model, config, device)
# 测试
def test(tt_set, model, device):
model.eval()
preds = []
for x, y in tt_set:
x = x.to(device)
with torch.no_grad():
pred = model(x)
preds.append(pred.detach().cpu())
preds = torch.cat(preds, dim=0).numpy()
return preds
# 保存结果
def save_preds(preds, file):
print('Saving results to {}'.format(file))
with open(file, 'w') as fp:
writer = csv.writer(fp)
writer.writerow(['id', 'class'])
for i, p in enumerate(preds):
writer.writerow([i, p])
preds = test(tt_set, model, device)
save_preds(preds, 'preds.csv')