链接:https://github.com/bfortuner/pytorch_tiramisu
网络原本是多分类的,而且原始数据输入函数都是为CamVid这样的公开数据集定制的,要用自己的数据还是要花点时间的,我这里是改成了自己的数据输入且为二分类。
环境CUDA10.0 CUDNN7.6.0 pytorch1.2.0(环境可以先不改,试试看能不能行,换环境太麻烦),显存12G
提示:项目提供的网络有三种复杂度FCDenseNet57、FCDenseNet67、FCDenseNet103,详情见/models/tiramisu.py文件,由于机器限制,这里试了FCDenseNet67,网络参数有点多,图像大小256*256,batchsize=2
觉得有帮助一定要点赞喔,不要白嫖!
精度评定:
acc: 0.9275658925374349
acc_cls: 0.8512592612858325
iou: [0.91613816 0.65293152]
miou: 0.7845348416766565
fwavacc: 0.8669676138438737
class_accuracy: 0.7215300562956269
class_recall: 0.6494413368605654
accuracy: 0.850057297858639
f1_score: 0.6835904041249753
注意下,这是初步的改好的训练情况,只训练了20个epoch
数据链接:https://pan.baidu.com/s/17I831_hpnfEOEMFzthKChg
提取码:kgep
结果展示:
原图
标签
预测结果
数据存放结构:
训练、验证、测试均一致,图像和标签都是裁剪标号相同的情况下图像加_sat标签加_mask
参考trainUnet.ipynb文件,以及上一篇的数据输入,本次的train.py文件如下
import os
import time
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from models import tiramisu
from datasets import camvid
from datasets import joint_transforms
import utils.imgs
import utils.training as train_utils
from utils.data import ImageFolder
batch_size = 2
train_root = 'D:/pytorch_tiramisu-master/data/Build256/train/'
imagelist = filter(lambda x: x.find('sat')!=-1, os.listdir(train_root))
trainlist = map(lambda x: x[:-8], imagelist)
trainlist = list(trainlist)
val_root = 'D:/pytorch_tiramisu-master/data/Build256/val/'
imagelist = filter(lambda x: x.find('sat')!=-1, os.listdir(val_root))
vallist = map(lambda x: x[:-8], imagelist)
vallist = list(vallist)
train_dataset = ImageFolder(trainlist, train_root)
val_dataset = ImageFolder(vallist, val_root)
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(
val_dataset, batch_size=batch_size, shuffle=False)
# inputs, targets = next(iter(train_loader))
LR = 1e-4
LR_DECAY = 0.995
DECAY_EVERY_N_EPOCHS = 1
N_EPOCHS = 20
torch.cuda.manual_seed(0)
model = tiramisu.FCDenseNet67(n_classes=2).cuda()
model.apply(train_utils.weights_init)
optimizer = torch.optim.RMSprop(model.parameters(), lr=LR, weight_decay=1e-4)
# criterion = nn.NLLLoss2d(weight=camvid.class_weight.cuda()).cuda() # 原始的loss函数
criterion = nn.CrossEntropyLoss().cuda() #改用交叉熵loss
for epoch in range(1, N_EPOCHS+1):
since = time.time()
### Train ###
trn_loss, trn_err = train_utils.train(
model, train_loader, optimizer, criterion, epoch)
print('Epoch {:d}: Train - Loss: {:.4f}, Acc: {:.4f}'.format(
epoch, trn_loss, 1-trn_err))
time_elapsed = time.time() - since
print('Train Time {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
### Test ###
val_loss, val_err = train_utils.test(model, val_loader, criterion, epoch)
print('Val - Loss: {:.4f} | Acc: {:.4f}'.format(val_loss, 1-val_err))
time_elapsed = time.time() - since
print('Total Time {:.0f}m {:.0f}s\n'.format(
time_elapsed // 60, time_elapsed % 60))
### Checkpoint ###
train_utils.save_weights(model, epoch, val_loss, val_err)
### Adjust Lr ###
train_utils.adjust_learning_rate(LR, LR_DECAY, optimizer,
epoch, DECAY_EVERY_N_EPOCHS)
data.py文件,用于给网络输入数据,数据没做扩充,想扩充的参考上一篇的数据输入文件,里面有数据扩充的函数,该文件放在utils文件夹。
import os
import cv2
import numpy as np
import torch
import torch.utils.data as data
def data_loader(id, root):
img = cv2.imread(os.path.join(root, '{}_sat.png').format(id))
mask = cv2.imread(os.path.join(root, '{}_mask.png').format(id),0)
mask = np.expand_dims(mask, axis=2)
mask[mask>=0.5] = 1
mask[mask<=0.5] = 0
img = np.array(img, np.float32).transpose(2,0,1)
mask = np.array(mask, np.float32).transpose(2,0,1)
return img, mask
class ImageFolder(data.Dataset):
def __init__(self, trainlist, root):
self.ids = trainlist
self.loader = data_loader
self.root = root
def __getitem__(self, index):
id = list(self.ids)[index]
img, mask = self.loader(id, self.root)
img = torch.Tensor(img)
mask = torch.Tensor(mask)
return img, mask
def __len__(self):
return len(list(self.ids))
需要修改的文件./utils/training.py
import os
import sys
import math
import string
import random
import shutil
import time
from tqdm import tqdm
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torch.autograd import Variable
import torch.nn.functional as F
from . import imgs as img_utils
RESULTS_PATH = '../results/'
WEIGHTS_PATH = '../weights/'
def save_weights(model, epoch, loss, err):
weights_fname = 'weights-%d-%.3f-%.3f.pth' % (epoch, loss, err)
weights_fpath = os.path.join(WEIGHTS_PATH, weights_fname)
torch.save({
'startEpoch': epoch,
'loss':loss,
'error': err,
'state_dict': model.state_dict()
}, weights_fpath)
shutil.copyfile(weights_fpath, WEIGHTS_PATH+'latest.th')
def load_weights(model, fpath):
print("loading weights '{}'".format(fpath))
weights = torch.load(fpath)
startEpoch = weights['startEpoch']
model.load_state_dict(weights['state_dict'])
print("loaded weights (lastEpoch {}, loss {}, error {})"
.format(startEpoch-1, weights['loss'], weights['error']))
return startEpoch
def get_predictions(output_batch):
bs,c,h,w = output_batch.size()
tensor = output_batch.data
values, indices = tensor.cpu().max(1)
indices = indices.view(bs,h,w)
return indices
def error(preds, targets):
assert preds.size() == targets.size()
bs,h,w = preds.size()
n_pixels = bs*h*w
incorrect = preds.ne(targets).cpu().sum()
err = incorrect/n_pixels
return round(err.item(),5)
def train(model, trn_loader, optimizer, criterion, epoch): #这里修改很多,主要是用于调整为二分类的相关代码,还有进度条的优化
model.train()
trn_loss = 0
trn_error = 0
try:
with tqdm(trn_loader,ncols=10) as t:
for idx, data in enumerate(t):
inputs = Variable(data[0].cuda())
targets = Variable(data[1].cuda())
optimizer.zero_grad()
output = model(inputs)
targets = targets.type(torch.cuda.LongTensor)
targets1 = targets.squeeze(1)
loss = criterion(output, targets1)
loss.backward()
optimizer.step()
trn_loss += loss.item()
pred = get_predictions(output)
targets = torch.squeeze(targets,1)
targets = targets.type(torch.cuda.LongTensor)
trn_error += error(pred, targets.data.cpu())
except KeyboardInterrupt:
t.close()
raise
t.close()
trn_loss /= len(trn_loader)
trn_error /= len(trn_loader)
return trn_loss, trn_error
def test(model, test_loader, criterion, epoch=1): #这里修改很多,主要是用于二分类的调整
model.eval()
test_loss = 0
test_error = 0
try:
with tqdm(test_loader,ncols=10) as t:
for data, target in t:
data = Variable(data.cuda(), volatile=True)
target = Variable(target.cuda())
output = model(data)
target = target.type(torch.cuda.LongTensor)
target1 = target.squeeze(1)
test_loss += criterion(output, target1).item()
pred = get_predictions(output)
target = torch.squeeze(target,1)
target = target.type(torch.cuda.LongTensor)
test_error += error(pred, target.data.cpu())
except KeyboardInterrupt:
t.close()
raise
t.close()
test_loss /= len(test_loader)
test_error /= len(test_loader)
return test_loss, test_error
def adjust_learning_rate(lr, decay, optimizer, cur_epoch, n_epochs):
"""Sets the learning rate to the initially
configured `lr` decayed by `decay` every `n_epochs`"""
new_lr = lr * (decay ** (cur_epoch // n_epochs))
for param_group in optimizer.param_groups:
param_group['lr'] = new_lr
def weights_init(m):
if isinstance(m, nn.Conv2d):
nn.init.kaiming_uniform(m.weight)
m.bias.data.zero_()
def predict(model, input_loader, n_batches=1):
# input_loader.batch_size = 1
predictions = []
pre = []
model.eval()
for input, target in input_loader:
data = Variable(input.cuda(), volatile=True)
label = Variable(target.cuda())
output = model(data)
pred = get_predictions(output)
pre.append(pred)
# predictions.append([input,target,pred]) #这里注释了
# return predictions #这里注释了
return pre
def view_sample_predictions(model, loader, n):
inputs, targets = next(iter(loader))
data = Variable(inputs.cuda(), volatile=True)
label = Variable(targets.cuda())
output = model(data)
pred = get_predictions(output)
batch_size = inputs.size(0)
for i in range(min(n, batch_size)):
img_utils.view_image(inputs[i])
img_utils.view_annotated(targets[i])
img_utils.view_annotated(pred[i])
模型文件有小小的改动./models/tiramisu.py,主要是二分类相关的地方,改的很少,但还是贴出来吧
import torch
import torch.nn as nn
#import torch.nn.functional as F
from .layers import *
class FCDenseNet(nn.Module):
def __init__(self, in_channels=3, down_blocks=(5,5,5,5,5),
up_blocks=(5,5,5,5,5), bottleneck_layers=5,
growth_rate=16, out_chans_first_conv=48, n_classes=12):
super().__init__()
self.down_blocks = down_blocks
self.up_blocks = up_blocks
cur_channels_count = 0
skip_connection_channel_counts = []
## First Convolution ##
self.add_module('firstconv', nn.Conv2d(in_channels=in_channels,
out_channels=out_chans_first_conv, kernel_size=3,
stride=1, padding=1, bias=True))
cur_channels_count = out_chans_first_conv
#####################
# Downsampling path #
#####################
self.denseBlocksDown = nn.ModuleList([])
self.transDownBlocks = nn.ModuleList([])
for i in range(len(down_blocks)):
self.denseBlocksDown.append(
DenseBlock(cur_channels_count, growth_rate, down_blocks[i]))
cur_channels_count += (growth_rate*down_blocks[i])
skip_connection_channel_counts.insert(0,cur_channels_count)
self.transDownBlocks.append(TransitionDown(cur_channels_count))
#####################
# Bottleneck #
#####################
self.add_module('bottleneck',Bottleneck(cur_channels_count,
growth_rate, bottleneck_layers))
prev_block_channels = growth_rate*bottleneck_layers
cur_channels_count += prev_block_channels
#######################
# Upsampling path #
#######################
self.transUpBlocks = nn.ModuleList([])
self.denseBlocksUp = nn.ModuleList([])
for i in range(len(up_blocks)-1):
self.transUpBlocks.append(TransitionUp(prev_block_channels, prev_block_channels))
cur_channels_count = prev_block_channels + skip_connection_channel_counts[i]
self.denseBlocksUp.append(DenseBlock(
cur_channels_count, growth_rate, up_blocks[i],
upsample=True))
prev_block_channels = growth_rate*up_blocks[i]
cur_channels_count += prev_block_channels
## Final DenseBlock ##
self.transUpBlocks.append(TransitionUp(
prev_block_channels, prev_block_channels))
cur_channels_count = prev_block_channels + skip_connection_channel_counts[-1]
self.denseBlocksUp.append(DenseBlock(
cur_channels_count, growth_rate, up_blocks[-1],
upsample=False))
cur_channels_count += growth_rate*up_blocks[-1]
## Softmax ##
self.finalConv = nn.Conv2d(in_channels=cur_channels_count,
out_channels=n_classes, kernel_size=1, stride=1,
padding=0, bias=True)
# self.softmax = nn.LogSoftmax(dim=1) #注释了这里
def forward(self, x):
out = self.firstconv(x)
skip_connections = []
for i in range(len(self.down_blocks)):
out = self.denseBlocksDown[i](out)
skip_connections.append(out)
out = self.transDownBlocks[i](out)
out = self.bottleneck(out)
for i in range(len(self.up_blocks)):
skip = skip_connections.pop()
out = self.transUpBlocks[i](out, skip)
out = self.denseBlocksUp[i](out)
out = self.finalConv(out)
# out = self.softmax(out) #注释了这里
out = torch.sigmoid(out) #加了这里
return out
def FCDenseNet57(n_classes):
return FCDenseNet(
in_channels=3, down_blocks=(4, 4, 4, 4, 4),
up_blocks=(4, 4, 4, 4, 4), bottleneck_layers=4,
growth_rate=12, out_chans_first_conv=48, n_classes=n_classes)
def FCDenseNet67(n_classes):
return FCDenseNet(
in_channels=3, down_blocks=(5, 5, 5, 5, 5),
up_blocks=(5, 5, 5, 5, 5), bottleneck_layers=5,
growth_rate=16, out_chans_first_conv=48, n_classes=n_classes)
def FCDenseNet103(n_classes):
return FCDenseNet(
in_channels=3, down_blocks=(4,5,7,10,12),
up_blocks=(12,10,7,5,4), bottleneck_layers=15,
growth_rate=16, out_chans_first_conv=48, n_classes=n_classes)
预测文件predict.py
import os
import cv2
import torch
import torch.nn as nn
from models import tiramisu
from utils.data import ImageFolder
import utils.training as train_utils
batch_size = 1
test_root = 'D:/pytorch_tiramisu-master/data/Build256/test/'
imagelist = filter(lambda x: x.find('sat')!=-1, os.listdir(test_root))
testlist = map(lambda x: x[:-8], imagelist)
testlist = list(testlist)
test_dataset = ImageFolder(testlist, test_root)
test_loader = torch.utils.data.DataLoader(
test_dataset, batch_size=batch_size, shuffle=False)
model = tiramisu.FCDenseNet67(n_classes=2).cuda()
model.load_state_dict(torch.load('./weights/latest.th')['state_dict'])
pre = train_utils.predict(model, test_loader, n_batches=1)
save_path = './results1'
for i in range(len(pre)):
pre_path = os.path.join(save_path,testlist[i]+'_mask.png')
pre_temp = pre[i].numpy()
pre_temp[pre_temp>0] = 255
# print(pre_temp.shape)
cv2.imwrite(pre_path, pre_temp[0])
精度评定文件eval.py,这段代码我贴好几次了
# -*- coding: utf-8 -*-
import os
import cv2
import numpy as np
from sklearn.metrics import confusion_matrix
class IOUMetric:
"""
Class to calculate mean-iou using fast_hist method
"""
def __init__(self, num_classes):
self.num_classes = num_classes
self.hist = np.zeros((num_classes, num_classes))
def _fast_hist(self, label_pred, label_true):
mask = (label_true >= 0) & (label_true < self.num_classes)
hist = np.bincount(
self.num_classes * label_true[mask].astype(int) +
label_pred[mask], minlength=self.num_classes ** 2).reshape(self.num_classes, self.num_classes)
return hist
def evaluate(self, predictions, gts):
for lp, lt in zip(predictions, gts):
assert len(lp.flatten()) == len(lt.flatten())
self.hist += self._fast_hist(lp.flatten(), lt.flatten())
# miou
iou = np.diag(self.hist) / (self.hist.sum(axis=1) + self.hist.sum(axis=0) - np.diag(self.hist))
miou = np.nanmean(iou)
# mean acc
acc = np.diag(self.hist).sum() / self.hist.sum()
acc_cls = np.nanmean(np.diag(self.hist) / self.hist.sum(axis=1))
freq = self.hist.sum(axis=1) / self.hist.sum()
fwavacc = (freq[freq > 0] * iou[freq > 0]).sum()
return acc, acc_cls, iou, miou, fwavacc
if __name__ == '__main__':
label_path = 'D:/pytorch_tiramisu-master/results/label/'
predict_path = 'D:/pytorch_tiramisu-master/results/pre/'
pres = os.listdir(predict_path)
labels = []
predicts = []
for im in pres:
if im[-4:] == '.png':
label_name = im.split('.')[0] + '.png'
lab_path = os.path.join(label_path, label_name)
pre_path = os.path.join(predict_path, im)
label = cv2.imread(lab_path,0)
pre = cv2.imread(pre_path,0)
label[label>0] = 1
pre[pre>0] = 1
labels.append(label)
predicts.append(pre)
el = IOUMetric(2)
acc, acc_cls, iou, miou, fwavacc = el.evaluate(predicts, labels)
print('acc: ',acc)
print('acc_cls: ',acc_cls)
print('iou: ',iou)
print('miou: ',miou)
print('fwavacc: ',fwavacc)
pres = os.listdir(predict_path)
init = np.zeros((2,2))
for im in pres:
lb_path = os.path.join(label_path, im)
pre_path = os.path.join(predict_path, im)
lb = cv2.imread(lb_path,0)
pre = cv2.imread(pre_path,0)
lb[lb>0] = 1
pre[pre>0] = 1
lb = lb.flatten()
pre = pre.flatten()
confuse = confusion_matrix(lb, pre)
init += confuse
precision = init[1][1]/(init[0][1] + init[1][1])
recall = init[1][1]/(init[1][0] + init[1][1])
accuracy = (init[0][0] + init[1][1])/init.sum()
f1_score = 2*precision*recall/(precision + recall)
print('class_accuracy: ', precision)
print('class_recall: ', recall)
print('accuracy: ', accuracy)
print('f1_score: ', f1_score)
训练测试相关的内容到此结束了,提示一下关于训练的时候出现的Accuracy恒等于1,这个是因为这个相关的代码我没改,这个后面有时间再改吧,不过不影响结果,也只不过是个可参考的输出的
下面是后处理探索,我发现很多人都想用crf来个后处理,下面是crf.py,这个能跑通,但是没有任何改善,我先贴出来,因为很多人甚至没有跑通,这个代码有可以调整的参数,大家都可以试下看看怎么能提高结果的评价精度。
pydensecrf需要安装,安装包链接:https://pan.baidu.com/s/19HjFxyqDd1PffukFGyPj9g
提取码:ke6g 我的python是3.6的,其他版本访问https://www.lfd.uci.edu/~gohlke/pythonlibs/#pydensecrf
参考链接:https://www.aiuai.cn/aifarm418.html
这个参考链接里面提供的pytorch示例代码里已经说得很明白了,crf对结果并没有什么提升,注意这句话:“You’re using CRF. Are you sure? In our previous experiments, it has never improved the performance.”
# -*- coding: utf-8 -*-
import os
import cv2
import numpy as np
import pydensecrf.densecrf as dcrf
def unary_from_labels(labels, n_labels, gt_prob, zero_unsure=True):
assert 0 < gt_prob < 1, "`gt_prob must be in (0,1)."
labels = labels.flatten()
n_energy = -np.log((1.0 - gt_prob) / (n_labels - 1))
p_energy = -np.log(gt_prob)
U = np.full((n_labels, len(labels)), n_energy, dtype='float32')
U[labels - 1 if zero_unsure else labels, np.arange(U.shape[1])] = p_energy
if zero_unsure:
U[:, labels == 0] = -np.log(1.0 / n_labels)
return U
def dense_crf(img, output_probs):
h = output_probs.shape[0]
w = output_probs.shape[1]
output_probs = np.expand_dims(output_probs, 0)
output_probs = np.append(1 - output_probs, output_probs, axis=0)
d = dcrf.DenseCRF2D(w, h, 2)
U = -np.log(output_probs)
U = U.reshape((2, -1))
U = np.ascontiguousarray(U)
img = np.ascontiguousarray(img)
U = U.astype(np.float32)
d.setUnaryEnergy(U)
d.addPairwiseGaussian(sxy=20, compat=3)
d.addPairwiseBilateral(sxy=30, srgb=20, rgbim=img, compat=10)
Q = d.inference(5)
Q = np.argmax(np.array(Q), axis=0).reshape((h, w))
return Q
if __name__ == "__main__":
img_path = "./results/imgs/" #原图路径
pre_path = "./results/pre/" #预测结果路径
out_path = "./results/crf/" #输出路径
imgs = os.listdir(img_path)
for im in imgs:
name = im[:-7]
pre_name = name + 'mask.png'
im_full_path = os.path.join(img_path, im)
pre_full_path = os.path.join(pre_path, pre_name)
out_full_path = os.path.join(out_path, pre_name)
img = cv2.imread(im_full_path)
pre = cv2.imread(pre_full_path,0)
pre[pre>0] = 1
# print(pre.shape)
crf_re = dense_crf(img, pre)
cv2.imwrite(out_full_path, crf_re)