import os, sys, glob, shutil, json
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
import cv2
from PIL import Image
import numpy as np
from tqdm import tqdm, tqdm_notebook
import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data.dataset import Dataset
use_cuda=True
# 添加高斯噪声
def gasuss_noise(image, mean=0, var=0.001):
'''
添加高斯噪声
mean : 均值
var : 方差
'''
image = np.array(image, dtype=np.float32)
image = image/255
noise = np.random.normal(mean, var ** 0.5, image.shape)
out = image + noise
if out.min() < 0:
low_clip = -1.
else:
low_clip = 0.
out = np.clip(out, low_clip, 1.0)
out = out*255
out = out.astype(np.uint8)
#cv.imshow("gasuss", out)
return out
import random
class SVHN_Dataset(Dataset):
def __init__(self,img_paths,img_labels,transform=None):
super(SVHN_Dataset,self).__init__()
self.img_paths = img_paths
self.img_labels = img_labels
if transform is not None:
self.transform=transform
else:
self.transform = None
# 必须定义
def __getitem__(self,index):
img = Image.open(self.img_paths[index]).convert('RGB')
# 因为会出现过拟合的问题,所以为图片随机添加噪声
is_add = random.random()
if is_add < 0.5:
img = gasuss_noise(img)
# 把img转为PIL对象
img = Image.fromarray(img).convert('RGB')
if self.transform is not None:
img = self.transform(img)
label = np.array(self.img_labels[index],dtype=np.int)
# 因为数字不定长,所以用标签10补齐至定长(5)
label = list(label)+(5-len(label))*[10]
return img,torch.from_numpy(np.array(label[:5]))
# 必须定义
def __len__(self):
return len(self.img_paths)
train_img_paths = glob.glob('/content/drive/My Drive/drive/SVHN/mchar_train/mchar_train/*.png')
train_img_paths.sort()
val_img_paths = glob.glob('/content/drive/My Drive/drive/SVHN/mchar_val/mchar_val/*.png')
val_img_paths.sort()
test_img_paths = glob.glob('/content/drive/My Drive/drive/SVHN/mchar_test_a/mchar_test_a/*.png')
test_img_paths.sort()
train_json = json.load(open('/content/drive/My Drive/drive/SVHN/mchar_train.json'))
train_labels = [train_json[x]['label'] for x in train_json]
val_json = json.load(open('/content/drive/My Drive/drive/SVHN/mchar_val.json'))
val_labels = [val_json[x]['label'] for x in val_json]
# 只是为了传入img_labels
test_labels = [[1]] * len(test_img_paths)
train_dataloader = torch.utils.data.DataLoader(
SVHN_Dataset(train_img_paths,train_labels,transforms.Compose([
transforms.Resize((64,128)),
transforms.CenterCrop((60,120)),
transforms.ColorJitter(0.3,0.3,0.2),
transforms.RandomRotation(5),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])),
batch_size=40,
shuffle=True,
num_workers=10)
val_dataloader = torch.utils.data.DataLoader(
SVHN_Dataset(val_img_paths,val_labels,transforms.Compose([
transforms.Resize((60,120)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])),
batch_size =40,
shuffle=True,
num_workers=10)
test_dataloader = torch.utils.data.DataLoader(
SVHN_Dataset(test_img_paths,test_labels,transforms.Compose([
transforms.Resize((60,120)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])),
batch_size =40,
shuffle=False,
num_workers=10)
import collections
class SVHN_model(nn.Module):
## 必须定义
def __init__(self):
super(SVHN_model,self).__init__()
resnet = models.resnet50(pretrained=True)
resnet.avgpool = nn.AdaptiveAvgPool2d(1)
self.resnet = nn.Sequential(*list(resnet.children())[:-1])
# 增加1X1卷积降低通道数
cnn_dict = collections.OrderedDict()
cnn_dict['conv1'] = nn.Conv2d(2048,1024,kernel_size=1,stride=1,padding=0)
cnn_dict['relu1'] = nn.ReLU()
cnn_dict['dropout1'] = nn.Dropout(0.7)
cnn_dict['conv2'] = nn.Conv2d(1024,512,kernel_size=1,stride=1,padding=0)
cnn_dict['relu2'] = nn.ReLU()
cnn_dict['dropout2'] = nn.Dropout(0.7)
cnn_dict['conv3'] = nn.Conv2d(512,128,kernel_size=1,stride=1,padding=0)
cnn_dict['relu3'] = nn.ReLU()
self.cnn = nn.Sequential(cnn_dict)
self.linear = nn.Linear(128,11)
# 定义5个全连接层分别学习一张图片的5个字符
self.fc1 = nn.Linear(128,11)
self.fc2 = nn.Linear(128,11)
self.fc3 = nn.Linear(128,11)
self.fc4 = nn.Linear(128,11)
self.fc5 = nn.Linear(128,11)
def forward(self,X):
pre_X = self.resnet(X) # pre_X : (batch_size,512,1,1)
pre_X = self.cnn(pre_X) # pre_X : (batch_size,128,1,1)
pre_X = pre_X.view(X.size(0),-1)
c1 = self.fc1(pre_X)
c2 = self.fc2(pre_X)
c3 = self.fc3(pre_X)
c4 = self.fc4(pre_X)
c5 = self.fc5(pre_X)
return c1,c2,c3,c4,c5
def train(model,train_dataloader,criterion,optimizer):
model.train()
train_loss = []
for i,(data,label) in enumerate(train_dataloader):
if use_cuda:
data = data.cuda()
label = label.cuda()
c1,c2,c3,c4,c5 = model(data)
lamda = np.ones(label.size())
loss = criterion(c1,label[:,0])+ \
criterion(c2,label[:,1])+ \
criterion(c3,label[:,2])+ \
criterion(c4,label[:,3])+ \
criterion(c5,label[:,4])
optimizer.zero_grad()
loss.backward()
optimizer.step()
if i%100==0:
print(loss.item())
train_loss.append(loss.item())
return np.mean(train_loss)
def val(model,val_dataloader,criterion):
model.eval()
val_loss = []
with torch.no_grad():
for i,(data,label) in enumerate(val_dataloader):
if use_cuda:
data = data.cuda()
label = label.cuda()
c1,c2,c3,c4,c5 = model(data)
loss = criterion(c1,label[:,0])+ \
criterion(c2,label[:,1])+ \
criterion(c3,label[:,2])+ \
criterion(c4,label[:,3])+ \
criterion(c5,label[:,4])
val_loss.append(loss.item())
return np.mean(val_loss)
TTA 方法:综合多次结果预测去平均值作为最终预测结果
def predict(model,test_dataloader,TTA=10):
model.eval()
final_result = None
for i in range(TTA):
test_pred = []
with torch.no_grad():
for i,(data,label) in enumerate(test_dataloader):
if use_cuda:
data = data.cuda()
label = label.cuda()
c1,c2,c3,c4,c5 = model(data)
result = torch.cat((c1,c2,c3,c4,c5),axis=1) #-->(batch_size,55)
test_pred.append(result.cpu().numpy())
result_pred = np.vstack(test_pred) #--->result_pred:(num,55) num=len(data)
if final_result is None:
final_result = result_pred
else :
final_result += result_pred
# final_result ---> (num,55)
pred_label = np.vstack([final_result[:,:11].argmax(axis=1),
final_result[:,11:22].argmax(axis=1),
final_result[:,22:33].argmax(axis=1),
final_result[:,33:44].argmax(axis=1),
final_result[:,44:55].argmax(axis=1)]).T
return pred_label #----> (1,5*num)
model = SVHN_model()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),0.001)
epoch = 20
best_loss = 1000
if use_cuda:
model = model.cuda()
for i in range(epoch):
train_loss = train(model,train_dataloader,criterion,optimizer)
val_loss = val(model,val_dataloader,criterion)
label_pre = predict(model,val_dataloader,TTA=1)
true_label = [''.join(map(str, x)) for x in val_dataloader.dataset.img_labels]
pre_label = []
for x in label_pre:
pre_label.append(''.join(map(str,x[x!=10])))
val_char_acc = np.mean(np.array(pre_label)==np.array(true_label))
print('Epoch: {0}, Train loss: {1} \t Val loss: {2}'.format(epoch, train_loss, val_loss))
print('acc:',val_char_acc)
if val_loss<best_loss:
best_loss = val_loss
torch.save(model.state_dict(),'./model.pt')
test_label_pred = predict(model,test_dataloader,TTA=10)
test_pred_label = []
for x in test_label_pred:
test_pred_label.append(''.join(map(str,x[x!=10])))
import pandas as pd
df_submit = pd.read_csv('testA.csv')
df_submit['file_code'] = test_pred_label
df_submit.to_csv('resnet18.csv',index=None)