准备数据集,我的数据集是这样的,数据集总数有300多张,但是有200多张是重复的,真正能提供特征的话只有100多张,不过需要识别的只需要前三位就够了,数据集的分布分别为, 1-9的数字 ,3位运算符(没有除法),1-9的数字,即使全部排列组合,也没有多少数据集,当然,我没有采样到全部的数据集,需要数据集的话,私聊博主就可以。
由于图片本身没有太多的形变,平移,干扰等,所以不太需要数据增强,数据增强的话,反而可能成为降低准确率的一种可能性,当然我没试过数据增强哈,感兴趣的小伙伴可以试一下,我这里将10位数字和3位符号全部转化为独热编码的形式,这样子,一张图像对应的标签就是3 * 13大小的one-hot标签,这里的*由于文件名不支持,我用$号代替。
import torchvision.transforms as T
import torch.utils.data as data
class ImgData(data.Dataset):
def __init__(self, path):
super().__init__()
imgs = os.listdir(path)
labels = {str(i): i for i in range(10)}
labels.update({"+": 10, "-": 11, "$": 12})
self.__dict__.update(locals())
def __getitem__(self, index):
# 标签转换为独热编码
label = self.imgs[index].split(".")[0][:3]
label = [[self.labels[str(i)]] for i in label]
label = torch.Tensor(label)
one_hot = torch.zeros(label.size(0), 13).long()
one_hot.scatter_(dim=1, index=label.long(), src=torch.ones(label.size(0), 13).long())
one_hot = one_hot.to(torch.float32)
# 读取图像
img = Image.open(os.path.join(self.path, self.imgs[index]))
img = T.ToTensor()(img)
return (one_hot, img)
def __len__(self):
return len(self.imgs)
既然数据集非常少,搭建model模型时多加几层Conv2d来 提取模型,我这里就加了2层,最后一层,转换为3*13的神经元输出,先说明哈,最终的准确率在百分之99以上,不是说层数搭的越多越好,最后的模型大小只有十几二十M。
class ImgModel(nn.Module):
def __init__(self):
super().__init__()
self.Stage1 = self.Sequential(3, 36, padding=0)
self.Stage2 = self.Sequential(36, 72, padding=0)
self.Pool1 = nn.MaxPool2d(2, 1)
self.Drop1 = nn.Dropout(0.2)
self.Flatten = nn.Flatten()
self.Fc1 = nn.Linear(421504, 39)
def Sequential(self, input_size, output_size, kernel_size=3, stride=1, padding=1):
stage = nn.Sequential(
nn.Conv2d(input_size, output_size, kernel_size, stride, padding),
nn.BatchNorm2d(output_size),
nn.ReLU()
)
return stage
def forward(self, x):
x = self.Stage1(x)
x = self.Stage2(x)
x = self.Pool1(x)
x = self.Flatten(x)
x = self.Fc1(x)
x = x.reshape(x.size()[0], 3, 13)
return x
使用batch_size大小为1,由于数据集比较小,所以需要更细的精度,用更多的模型参数更新次数与loss震荡换来更高的准确率。
DEVICE = torch.device("cuda:1")
BATCH_SIZE = 1
train_set = ImgData("./datasets/anhui/images")
ImgLoader = DataLoader(dataset=train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
# 加载模型和超参数
model = ImgModel()
model = model.to(DEVICE)
losses = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-6)
scheduler = StepLR(
optimizer=optimizer,
step_size=20, # 设定调整的间隔数
gamma=0.95, # 系数
last_epoch=-1
)
model.train()
for epoch in range(300):
running_loss = 0.
for i, data in enumerate(ImgLoader):
label, img = data
label, img = label.to(DEVICE), img.to(DEVICE)
optimizer.zero_grad()
pred = model(img)
loss = losses(pred, label)
loss.backward()
running_loss += loss.item()
optimizer.step()
# scheduler.step(running_loss)
# scheduler.step()
print("epoch: %s loss: %s" % (epoch, running_loss))
# 保存模型
checkpoint = {
"net": model.state_dict(),
'optimizer': optimizer.state_dict(),
"epoch": epoch
}
torch.save(checkpoint, './models/ckpt_best.pth')
最终的loss大概在0.0001左右
epoch: 0 loss: 0.0002137473008190227
epoch: 1 loss: 0.00017065600528454183
epoch: 2 loss: 0.00015853689117051317
epoch: 3 loss: 0.0001568195396330907
epoch: 4 loss: 0.00015941860091572835
epoch: 5 loss: 0.00015824139602926834
epoch: 6 loss: 0.00015571212547982327
epoch: 7 loss: 0.00015360863142888093
epoch: 8 loss: 0.00015596995165623184
epoch: 9 loss: 0.00015566498088759317
epoch: 10 loss: 0.00015361718873663222
epoch: 11 loss: 0.0001493654517012999
epoch: 12 loss: 0.00015319680674963365
epoch: 13 loss: 0.0001504135071854762
epoch: 14 loss: 0.00015071407296218808
epoch: 15 loss: 0.00014917661473035082
epoch: 16 loss: 0.00014558360170013884
epoch: 17 loss: 0.00014495838560080188
epoch: 18 loss: 0.00014095358103105582
epoch: 19 loss: 0.0001448862797381878
DEVICE = torch.device("cuda:1")
path_checkpoint = "./models/ckpt_best.pth" # 断点路径
checkpoint = torch.load(path_checkpoint) # 加载断点
model = ImgModel()
model = model.to(DEVICE)
model.load_state_dict(checkpoint['net']) # 加载模型可学习参数
tru_labels = {i: str(i) for i in range(10)}
tru_labels.update({10: "+", 11: "-", 12: "$"})
trues = 0
sums = 0
valid_path = "./datasets/valid"
model.eval()
for p in os.listdir(valid_path):
img = Image.open(os.path.join(valid_path, p))
img = T.ToTensor()(img)
img = img.to(DEVICE)
img = torch.unsqueeze(img, 0)
labels = model(img)[0]
preds = ""
for label in labels:
pred = tru_labels[int(torch.argmax(label).item())]
preds += pred
if preds == p.split(".")[0][:3]:
trues += 1
else:
print(preds, p)
sums += 1
print("准确个数: %s 总数 %s 准确率 %s " % (trues, sums, trues / sums))
8-2 8-7 (3).png
准确个数: 155 总数 156 准确率 0.9935897435897436
可以看到,大概拿了100多个进行预测,只有一个不准。嘿嘿,有什么问题欢迎讨论