目录
数据增强方法
CNN图像分类数据集构建
导入数据集
定义trainer
超参数设置
数据增强
构建CNN网络
开始训练
模型测试
# 一般情况下,我们不会在验证集和测试集上做数据扩增
# 我们只需要将图片裁剪成同样的大小并装换成Tensor就行
test_tfm = transforms.Compose([
transforms.Resize((128, 128)),
transforms.ToTensor(),
])
# 当然,我们也可以再测试集中对数据进行扩增(对同样本的不同装换)
# - 用训练数据的装化方法(train_tfm)去对测试集数据进行转化,产出扩增样本
# - 对同个照片的不同样本分别进行预测
# - 最后可以用soft vote / hard vote 等集成方法输出最后的预测
train_tfm = transforms.Compose([
# 图片裁剪 (height = width = 128)
transforms.Resize((128, 128)),
transforms.AutoAugment(transforms.AutoAugmentPolicy.IMAGENET),
# ToTensor() 放在所有处理的最后
transforms.ToTensor(),
])
class FoodDataset(Dataset):
# 构造函数
def __init__(self, path, tfm=test_tfm, files=None):
# 调用父类的构造函数
super(FoodDataset).__init__()
# 存储图像文件夹路径
self.path = path
# 从路径中获取所有以.jpg结尾的文件,并按字典顺序排序
self.files = sorted([os.path.join(path, x) for x in os.listdir(path) if x.endswith(".jpg")])
# 如果提供了文件列表,则使用该列表代替自动搜索得到的列表
if files is not None:
self.files = files
# 打印路径中的一个样本文件路径
print(f"One {path} sample", self.files[0])
# 存储用于图像变换的函数
self.transform = tfm
# 返回数据集中的样本数
def __len__(self):
return len(self.files)
# 根据索引获取单个样本
def __getitem__(self, idx):
# 获取文件名
fname = self.files[idx]
# 打开图像文件
im = Image.open(fname)
# 应用变换
im = self.transform(im)
# 尝试从文件名中提取标签,如果失败则设置为-1(表示测试集中没有标签)
try:
label = int(fname.split("/")[-1].split("_")[0])
except:
label = -1 # 测试集没有label
# 返回图像和标签
return im, label
注意这里的“私有方法”
_dataset_dir = config['dataset_dir']#“_”是为了避免和python中的dataset重名
train_set = FoodDataset(os.path.join(_dataset_dir,"training"), tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=config['batch_size'],
shuffle=True, num_workers=0, pin_memory=True)
valid_set = FoodDataset(os.path.join(_dataset_dir,"validation"), tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=config['batch_size'],
shuffle=True, num_workers=0, pin_memory=True)
# 测试级保证输出顺序一致
test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=config['batch_size'],
shuffle=False, num_workers=0, pin_memory=True)
def trainer(train_loader, valid_loader, model, config, device, rest_net_flag=False):
# 定义交叉熵损失函数,用于评估分类任务的模型性能
criterion = nn.CrossEntropyLoss()
# 初始化优化器,这里使用Adam优化器
optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'], weight_decay=config['weight_decay'])
# 根据rest_net_flag标志选择模型保存路径
save_path = config['save_path'] if rest_net_flag else config['resnet_save_path']
# 初始化TensorBoard的SummaryWriter,用于记录训练过程
writer = SummaryWriter()
# 如果'models'目录不存在,则创建该目录
if not os.path.isdir('./models'):
os.mkdir('./models')
# 初始化训练参数:训练轮数、最佳损失、步骤计数器和早停计数器
n_epochs, best_loss, step, early_stop_count = config['n_epochs'], math.inf, 0, 0
# 进行多个训练周期
for epoch in range(n_epochs):
# 设置模型为训练模式
model.train()
# 初始化损失记录器和准确率记录器
loss_record = []
train_accs = []
# 使用tqdm显示训练进度条
train_pbar = tqdm(train_loader, position=0, leave=True)
# 遍历训练数据
for x, y in train_pbar:
# 重置优化器梯度
optimizer.zero_grad()
# 将数据和标签移动到指定设备(如GPU)
x, y = x.to(device), y.to(device)
# 进行一次前向传播
pred = model(x)
# 计算损失
loss = criterion(pred, y)
# 反向传播
loss.backward()
# 如果启用梯度裁剪,则应用梯度裁剪
if config['clip_flag']:
grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)
# 进行一步优化(梯度下降)
optimizer.step()
# 记录当前步骤
step += 1
# 计算准确率并记录损失和准确率
acc = (pred.argmax(dim=-1) == y.to(device)).float().mean()
l_ = loss.detach().item()
loss_record.append(l_)
train_accs.append(acc.detach().item())
train_pbar.set_description(f'Epoch [{epoch+1}/{n_epochs}]')
train_pbar.set_postfix({'loss': f'{l_:.5f}', 'acc': f'{acc:.5f}'})
# 计算并记录平均训练损失和准确率
mean_train_acc = sum(train_accs) / len(train_accs)
mean_train_loss = sum(loss_record) / len(loss_record)
writer.add_scalar('Loss/train', mean_train_loss, step)
writer.add_scalar('ACC/train', mean_train_acc, step)
# 设置模型为评估模式
model.eval()
# 初始化验证集损失记录器和准确率记录器
loss_record = []
test_accs = []
# 遍历验证数据
for x, y in valid_loader:
x, y = x.to(device), y.to(device)
with torch.no_grad():
pred = model(x)
loss = criterion(pred, y)
acc = (pred.argmax(dim=-1) == y.to(device)).float().mean()
loss_record.append(loss.item())
test_accs.append(acc.detach().item())
# 计算并打印平均验证损失和准确率
mean_valid_acc = sum(test_accs) / len(test_accs)
mean_valid_loss = sum(loss_record) / len(loss_record)
print(f'Epoch [{epoch+1}/{n_epochs}]: Train loss: {mean_train_loss:.4f}, acc: {mean_train_acc:.4f} Valid loss: {mean_valid_loss:.4f}, acc: {mean
device = 'cuda' if torch.cuda.is_available() else 'cpu'
config = {
'seed': 6666,
'dataset_dir': "../input/data",
'n_epochs': 10,
'batch_size': 64,
'learning_rate': 0.0003,
'weight_decay':1e-5,
'early_stop': 300,
'clip_flag': True,
'save_path': './models/model.ckpt',
'resnet_save_path': './models/resnet_model.ckpt'
}
print(device)
all_seed(config['seed'])
test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=train_tfm)
test_loader_extra1 = DataLoader(test_set, batch_size=config['batch_size'],
shuffle=False, num_workers=0, pin_memory=True)
test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=train_tfm)
test_loader_extra2 = DataLoader(test_set, batch_size=config['batch_size'],
shuffle=False, num_workers=0, pin_memory=True)
test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=train_tfm)
test_loader_extra3 = DataLoader(test_set, batch_size=config['batch_size'],
shuffle=False, num_workers=0, pin_memory=True)
class Classifier(nn.Module):
def __init__(self):
super(Classifier, self).__init__()
# input 維度 [3, 128, 128]
self.cnn = nn.Sequential(
nn.Conv2d(3, 64, 3, 1, 1), # [64, 128, 128]
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [64, 64, 64]
nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [128, 32, 32]
nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [256, 16, 16]
nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [512, 8, 8]
nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [512, 4, 4]
)
self.fc = nn.Sequential(
nn.Linear(512*4*4, 1024),
nn.ReLU(),
nn.Linear(1024, 512),
nn.ReLU(),
nn.Linear(512, 11)
)
def forward(self, x):
out = self.cnn(x)
out = out.view(out.size()[0], -1)
return self.fc(out)
model = Classifier().to(device)
trainer(train_loader, valid_loader, model, config, device)
或者可以通过调用pytorch官方的一些标准model进行训
from torchvision.models import resnet50
resNet = resnet50(pretrained=False)
# 残差网络
resNet = resNet.to(device)
trainer(train_loader, valid_loader, resNet, config, device)
model_best = Classifier().to(device)
model_best.load_state_dict(torch.load(config['save_path']))
model_best.eval()
prediction = []
with torch.no_grad():
for data,_ in test_loader:
test_pred = model_best(data.to(device))
test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
prediction += test_label.squeeze().tolist()