文件:
common.py #和model_alexnet.py一起用,实现了一些基本网络模块的pytorch模块包
model_alexnet.py #因为MatchNet网络改自alexnet,直接使用pytorch的改一下
main_matchnet.py #主文件,含有指标评估函数--这个代码修改自官方的minist示例
phototour.py #pytorch-dataset的文件,已经剥离出来
utils.py #pytorch-dataset的文件,已经剥离出来
使用pytorch1.1
main_matchnet.py
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
import model_alexnet
import phototour
import shutil
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
def train(args, model, device, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data_1, data_2, match_t) in enumerate(train_loader):
data_1, data_2, match_t = data_1.to(device), data_2.to(device), match_t.to(device)
optimizer.zero_grad()
output, score = model(data_1, data_2)
#https://blog.csdn.net/m0_38133212/article/details/88087206
#交叉熵损失函数
#CrossEntropyLoss()=log_softmax() + NLLLoss()
loss = F.nll_loss(output, match_t)
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data_1), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
#https://blog.csdn.net/GAN_player/article/details/85113431
import operator
import numpy as np
def ErrorRateAt95Recall(labels, scores):
recall_point = 0.95
# Sort label-score tuples by the score in descending order.
temp = zip(labels, scores)
#operator.itemgetter(1)按照第二个元素的次序对元组进行排序,reverse=True是逆序,即按照从大到小的顺序排列
#sorted_scores.sort(key=operator.itemgetter(1), reverse=True)
sorted_scores = sorted(temp, key=operator.itemgetter(1), reverse=True)
# Compute error rate
# n_match表示测试集正样本数目
n_match = sum(1 for x in sorted_scores if x[0] == 1)
n_thresh = recall_point * n_match
tp = 0
count = 0
for label, score in sorted_scores:
count += 1
if label == 1:
tp += 1
if tp >= n_thresh:
break
return float(count - tp) / count
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
labels_list = []
scores_list = []
with torch.no_grad():
for data_t1, data_t2, match_tt in test_loader:
data_t1, data_t2, match_tt = data_t1.to(device), data_t2.to(device), match_tt.to(device)
#data, target = data.to(device), target.to(device)
output, score = model(data_t1, data_t2)
test_loss += F.nll_loss(output, match_tt, reduction='sum').item() # sum up batch loss
pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
correct += pred.eq(match_tt.view_as(pred)).sum().item()
score_array = score.cpu().data.numpy()
label_array = match_tt.cpu().data.numpy()
score = score_array.argmax(axis=1)
labels_list.extend(label_array.tolist())
scores_list.extend(score.tolist())
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
print("ErrorRateAt95Recall:", ErrorRateAt95Recall(labels_list, scores_list))
return (100. * correct / len(test_loader.dataset))
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
torch.save(state, filename)
if is_best:
shutil.copyfile(filename, 'model_best.pth.tar')
def main():
# Training settings
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--batch-size', type=int, default=128, metavar='N',
help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=128, metavar='N',
help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=150000, metavar='N',
help='number of epochs to train (default: 14)')
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
help='learning rate (default: 1.0)')
parser.add_argument('--gamma', type=float, default=0.7, metavar='M',
help='Learning rate step gamma (default: 0.7)')
parser.add_argument('--no-cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--seed', type=int, default=10086, metavar='S',
help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
help='how many batches to wait before logging training status')
parser.add_argument('--save-model', action='store_true', default=True,
help='For Saving the current Model')
args = parser.parse_args()
use_cuda = not args.no_cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
device = torch.device("cuda" if use_cuda else "cpu")
train_kwargs = {'batch_size': args.batch_size}
test_kwargs = {'batch_size': args.test_batch_size}
if use_cuda:
cuda_kwargs = {'num_workers': 1,
'pin_memory': True,
'shuffle': False}
train_kwargs.update(cuda_kwargs)
test_kwargs.update(cuda_kwargs)
transform=transforms.Compose([
transforms.ToPILImage(),
transforms.ToTensor(),
transforms.Normalize(mean=(0.4844,), std=(0.1818,))
])
dataset_path = '/home/boyun/deepglint/ImageMatch_dataset/data/PhotoTour'
dataset1 = phototour.PhotoTour(dataset_path,
name='yosemite',
train=False,
download=False,
transform=transform)
dataset2 = phototour.PhotoTour(dataset_path,
name='yosemite',
train=False,
download=False,
transform=transform)
train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
# create model
# 1024*1*64*64
model = model_alexnet.get_alexnet(version="matchnet",
in_channels=1,
in_size=(64, 64),
num_classes=2)
model = model.to(device)
optimizer = optim.SGD(model.parameters(), lr=args.lr)
scheduler = StepLR(optimizer, step_size=10000, gamma=args.gamma)
best_acc1 = 0
for epoch in range(1, args.epochs + 1):
train(args, model, device, train_loader, optimizer, epoch)
accuracy = test(model, device, test_loader)
scheduler.step()
# remember best acc@1 and save checkpoint
is_best = accuracy > best_acc1
best_acc1 = max(accuracy, best_acc1)
save_checkpoint({
'epoch': epoch + 1,
'arch': "matchnet",
'state_dict': model.state_dict(),
'best_acc1': best_acc1,
'optimizer' : optimizer.state_dict(),
}, is_best)
if args.save_model:
torch.save(model.state_dict(), "matchnet.pt")
if __name__ == '__main__':
main()
phototour.py
import os
import numpy as np
from PIL import Image
import torch
import torch.utils.data as data
from utils import download_url
class PhotoTour(data.Dataset):
"""`Learning Local Image Descriptors Data `_ Dataset.
Args:
root (string): Root directory where images are.
name (string): Name of the dataset to load.
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version.
download (bool, optional): If true, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""
urls = {
'notredame_harris': [
'http://matthewalunbrown.com/patchdata/notredame_harris.zip',
'notredame_harris.zip',
'69f8c90f78e171349abdf0307afefe4d'
],
'yosemite_harris': [
'http://matthewalunbrown.com/patchdata/yosemite_harris.zip',
'yosemite_harris.zip',
'a73253d1c6fbd3ba2613c45065c00d46'
],
'liberty_harris': [
'http://matthewalunbrown.com/patchdata/liberty_harris.zip',
'liberty_harris.zip',
'c731fcfb3abb4091110d0ae8c7ba182c'
],
'notredame': [
'http://icvl.ee.ic.ac.uk/vbalnt/notredame.zip',
'notredame.zip',
'509eda8535847b8c0a90bbb210c83484'
],
'yosemite': [
'http://icvl.ee.ic.ac.uk/vbalnt/yosemite.zip',
'yosemite.zip',
'533b2e8eb7ede31be40abc317b2fd4f0'
],
'liberty': [
'http://icvl.ee.ic.ac.uk/vbalnt/liberty.zip',
'liberty.zip',
'fdd9152f138ea5ef2091746689176414'
],
}
mean = {'notredame': 0.4854, 'yosemite': 0.4844, 'liberty': 0.4437,
'notredame_harris': 0.4854, 'yosemite_harris': 0.4844, 'liberty_harris': 0.4437}
std = {'notredame': 0.1864, 'yosemite': 0.1818, 'liberty': 0.2019,
'notredame_harris': 0.1864, 'yosemite_harris': 0.1818, 'liberty_harris': 0.2019}
lens = {'notredame': 468159, 'yosemite': 633587, 'liberty': 450092,
'liberty_harris': 379587, 'yosemite_harris': 450912, 'notredame_harris': 325295}
image_ext = 'bmp'
info_file = 'info.txt'
matches_files = 'm50_100000_100000_0.txt'
def __init__(self, root, name, train=True, transform=None, download=False):
self.root = os.path.expanduser(root)
self.name = name
self.data_dir = os.path.join(self.root, name)
self.data_down = os.path.join(self.root, '{}.zip'.format(name))
self.data_file = os.path.join(self.root, '{}.pt'.format(name))
self.train = train
self.transform = transform
self.mean = self.mean[name]
self.std = self.std[name]
if download:
self.download()
if not self._check_datafile_exists():
raise RuntimeError('Dataset not found.' +
' You can use download=True to download it')
# load the serialized data
self.data, self.labels, self.matches = torch.load(self.data_file)
def __getitem__(self, index):
"""
Args:
index (int): Index
Returns:
tuple: (data1, data2, matches)
"""
if self.train:
data = self.data[index]
if self.transform is not None:
data = self.transform(data)
return data
m = self.matches[index]
data1, data2 = self.data[m[0]], self.data[m[1]]
if self.transform is not None:
data1 = self.transform(data1)
data2 = self.transform(data2)
return data1, data2, m[2]
def __len__(self):
if self.train:
return self.lens[self.name]
return len(self.matches)
def _check_datafile_exists(self):
return os.path.exists(self.data_file)
def _check_downloaded(self):
return os.path.exists(self.data_dir)
def download(self):
if self._check_datafile_exists():
print('# Found cached data {}'.format(self.data_file))
return
if not self._check_downloaded():
# download files
url = self.urls[self.name][0]
filename = self.urls[self.name][1]
md5 = self.urls[self.name][2]
fpath = os.path.join(self.root, filename)
download_url(url, self.root, filename, md5)
print('# Extracting data {}\n'.format(self.data_down))
import zipfile
with zipfile.ZipFile(fpath, 'r') as z:
z.extractall(self.data_dir)
os.unlink(fpath)
# process and save as torch files
print('# Caching data {}'.format(self.data_file))
dataset = (
read_image_file(self.data_dir, self.image_ext, self.lens[self.name]),
read_info_file(self.data_dir, self.info_file),
read_matches_files(self.data_dir, self.matches_files)
)
with open(self.data_file, 'wb') as f:
torch.save(dataset, f)
def __repr__(self):
fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
fmt_str += ' Number of datapoints: {}\n'.format(self.__len__())
tmp = 'train' if self.train is True else 'test'
fmt_str += ' Split: {}\n'.format(tmp)
fmt_str += ' Root Location: {}\n'.format(self.root)
tmp = ' Transforms (if any): '
fmt_str += '{0}{1}'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
return fmt_str
def read_image_file(data_dir, image_ext, n):
"""Return a Tensor containing the patches
"""
def PIL2array(_img):
"""Convert PIL image type to numpy 2D array
"""
return np.array(_img.getdata(), dtype=np.uint8).reshape(64, 64)
def find_files(_data_dir, _image_ext):
"""Return a list with the file names of the images containing the patches
"""
files = []
# find those files with the specified extension
for file_dir in os.listdir(_data_dir):
if file_dir.endswith(_image_ext):
files.append(os.path.join(_data_dir, file_dir))
return sorted(files) # sort files in ascend order to keep relations
patches = []
list_files = find_files(data_dir, image_ext)
for fpath in list_files:
img = Image.open(fpath)
for y in range(0, 1024, 64):
for x in range(0, 1024, 64):
patch = img.crop((x, y, x + 64, y + 64))
patches.append(PIL2array(patch))
return torch.ByteTensor(np.array(patches[:n]))
def read_info_file(data_dir, info_file):
"""Return a Tensor containing the list of labels
Read the file and keep only the ID of the 3D point.
"""
labels = []
with open(os.path.join(data_dir, info_file), 'r') as f:
labels = [int(line.split()[0]) for line in f]
return torch.LongTensor(labels)
def read_matches_files(data_dir, matches_file):
"""Return a Tensor containing the ground truth matches
Read the file and keep only 3D point ID.
Matches are represented with a 1, non matches with a 0.
"""
matches = []
with open(os.path.join(data_dir, matches_file), 'r') as f:
for line in f:
line_split = line.split()
matches.append([int(line_split[0]), int(line_split[3]), int(line_split[1] == line_split[4])])
return torch.LongTensor(matches)
utils.py
import os
import os.path
import hashlib
import errno
from tqdm import tqdm
def gen_bar_updater(pbar):
def bar_update(count, block_size, total_size):
if pbar.total is None and total_size:
pbar.total = total_size
progress_bytes = count * block_size
pbar.update(progress_bytes - pbar.n)
return bar_update
def check_integrity(fpath, md5=None):
if md5 is None:
return True
if not os.path.isfile(fpath):
return False
md5o = hashlib.md5()
with open(fpath, 'rb') as f:
# read in 1MB chunks
for chunk in iter(lambda: f.read(1024 * 1024), b''):
md5o.update(chunk)
md5c = md5o.hexdigest()
if md5c != md5:
return False
return True
def makedir_exist_ok(dirpath):
"""
Python2 support for os.makedirs(.., exist_ok=True)
"""
try:
os.makedirs(dirpath)
except OSError as e:
if e.errno == errno.EEXIST:
pass
else:
raise
def download_url(url, root, filename=None, md5=None):
"""Download a file from a url and place it in root.
Args:
url (str): URL to download file from
root (str): Directory to place downloaded file in
filename (str): Name to save the file under. If None, use the basename of the URL
md5 (str): MD5 checksum of the download. If None, do not check
"""
from six.moves import urllib
root = os.path.expanduser(root)
if not filename:
filename = os.path.basename(url)
fpath = os.path.join(root, filename)
makedir_exist_ok(root)
# downloads file
if os.path.isfile(fpath) and check_integrity(fpath, md5):
print('Using downloaded and verified file: ' + fpath)
else:
try:
print('Downloading ' + url + ' to ' + fpath)
urllib.request.urlretrieve(
url, fpath,
reporthook=gen_bar_updater(tqdm(unit='B', unit_scale=True))
)
except OSError:
if url[:5] == 'https':
url = url.replace('https:', 'http:')
print('Failed download. Trying https -> http instead.'
' Downloading ' + url + ' to ' + fpath)
urllib.request.urlretrieve(
url, fpath,
reporthook=gen_bar_updater(tqdm(unit='B', unit_scale=True))
)
def list_dir(root, prefix=False):
"""List all directories at a given root
Args:
root (str): Path to directory whose folders need to be listed
prefix (bool, optional): If true, prepends the path to each result, otherwise
only returns the name of the directories found
"""
root = os.path.expanduser(root)
directories = list(
filter(
lambda p: os.path.isdir(os.path.join(root, p)),
os.listdir(root)
)
)
if prefix is True:
directories = [os.path.join(root, d) for d in directories]
return directories
def list_files(root, suffix, prefix=False):
"""List all files ending with a suffix at a given root
Args:
root (str): Path to directory whose folders need to be listed
suffix (str or tuple): Suffix of the files to match, e.g. '.png' or ('.jpg', '.png').
It uses the Python "str.endswith" method and is passed directly
prefix (bool, optional): If true, prepends the path to each result, otherwise
only returns the name of the files found
"""
root = os.path.expanduser(root)
files = list(
filter(
lambda p: os.path.isfile(os.path.join(root, p)) and p.endswith(suffix),
os.listdir(root)
)
)
if prefix is True:
files = [os.path.join(root, d) for d in files]
return files
common.py
"""
Common routines for models in PyTorch.
"""
__all__ = ['round_channels', 'Identity', 'Swish', 'HSigmoid', 'HSwish', 'get_activation_layer', 'SelectableDense',
'DenseBlock', 'ConvBlock1d', 'conv1x1', 'conv3x3', 'depthwise_conv3x3', 'ConvBlock', 'conv1x1_block',
'conv3x3_block', 'conv7x7_block', 'dwconv_block', 'dwconv3x3_block', 'dwconv5x5_block', 'dwsconv3x3_block',
'PreConvBlock', 'pre_conv1x1_block', 'pre_conv3x3_block', 'DeconvBlock', 'NormActivation',
'InterpolationBlock', 'ChannelShuffle', 'ChannelShuffle2', 'SEBlock', 'SABlock', 'SAConvBlock',
'saconv3x3_block', 'DucBlock', 'IBN', 'DualPathSequential', 'Concurrent', 'SequentialConcurrent',
'ParametricSequential', 'ParametricConcurrent', 'Hourglass', 'SesquialteralHourglass',
'MultiOutputSequential', 'ParallelConcurent', 'Flatten', 'HeatmapMaxDetBlock']
import math
from inspect import isfunction
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
def round_channels(channels,
divisor=8):
"""
Round weighted channel number (make divisible operation).
Parameters:
----------
channels : int or float
Original number of channels.
divisor : int, default 8
Alignment value.
Returns
-------
int
Weighted number of channels.
"""
rounded_channels = max(int(channels + divisor / 2.0) // divisor * divisor, divisor)
if float(rounded_channels) < 0.9 * channels:
rounded_channels += divisor
return rounded_channels
class Identity(nn.Module):
"""
Identity block.
"""
def __init__(self):
super(Identity, self).__init__()
def forward(self, x):
return x
class Swish(nn.Module):
"""
Swish activation function from 'Searching for Activation Functions,' https://arxiv.org/abs/1710.05941.
"""
def forward(self, x):
return x * torch.sigmoid(x)
class HSigmoid(nn.Module):
"""
Approximated sigmoid function, so-called hard-version of sigmoid from 'Searching for MobileNetV3,'
https://arxiv.org/abs/1905.02244.
"""
def forward(self, x):
return F.relu6(x + 3.0, inplace=True) / 6.0
class HSwish(nn.Module):
"""
H-Swish activation function from 'Searching for MobileNetV3,' https://arxiv.org/abs/1905.02244.
Parameters:
----------
inplace : bool
Whether to use inplace version of the module.
"""
def __init__(self, inplace=False):
super(HSwish, self).__init__()
self.inplace = inplace
def forward(self, x):
return x * F.relu6(x + 3.0, inplace=self.inplace) / 6.0
def get_activation_layer(activation):
"""
Create activation layer from string/function.
Parameters:
----------
activation : function, or str, or nn.Module
Activation function or name of activation function.
Returns
-------
nn.Module
Activation layer.
"""
assert (activation is not None)
if isfunction(activation):
return activation()
elif isinstance(activation, str):
if activation == "relu":
return nn.ReLU(inplace=True)
elif activation == "relu6":
return nn.ReLU6(inplace=True)
elif activation == "swish":
return Swish()
elif activation == "hswish":
return HSwish(inplace=True)
elif activation == "sigmoid":
return nn.Sigmoid()
elif activation == "hsigmoid":
return HSigmoid()
elif activation == "identity":
return Identity()
else:
raise NotImplementedError()
else:
assert (isinstance(activation, nn.Module))
return activation
class SelectableDense(nn.Module):
"""
Selectable dense layer.
Parameters:
----------
in_features : int
Number of input features.
out_features : int
Number of output features.
bias : bool, default False
Whether the layer uses a bias vector.
num_options : int, default 1
Number of selectable options.
"""
def __init__(self,
in_features,
out_features,
bias=False,
num_options=1):
super(SelectableDense, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.use_bias = bias
self.num_options = num_options
self.weight = Parameter(torch.Tensor(num_options, out_features, in_features))
if bias:
self.bias = Parameter(torch.Tensor(num_options, out_features))
else:
self.register_parameter("bias", None)
def forward(self, x, indices):
weight = torch.index_select(self.weight, dim=0, index=indices)
x = x.unsqueeze(-1)
x = weight.bmm(x)
x = x.squeeze(dim=-1)
if self.use_bias:
bias = torch.index_select(self.bias, dim=0, index=indices)
x += bias
return x
def extra_repr(self):
return "in_features={}, out_features={}, bias={}, num_options={}".format(
self.in_features, self.out_features, self.use_bias, self.num_options)
class DenseBlock(nn.Module):
"""
Standard dense block with Batch normalization and activation.
Parameters:
----------
in_features : int
Number of input features.
out_features : int
Number of output features.
bias : bool, default False
Whether the layer uses a bias vector.
use_bn : bool, default True
Whether to use BatchNorm layer.
bn_eps : float, default 1e-5
Small float added to variance in Batch norm.
activation : function or str or None, default nn.ReLU(inplace=True)
Activation function or name of activation function.
"""
def __init__(self,
in_features,
out_features,
bias=False,
use_bn=True,
bn_eps=1e-5,
activation=(lambda: nn.ReLU(inplace=True))):
super(DenseBlock, self).__init__()
self.activate = (activation is not None)
self.use_bn = use_bn
self.fc = nn.Linear(
in_features=in_features,
out_features=out_features,
bias=bias)
if self.use_bn:
self.bn = nn.BatchNorm1d(
num_features=out_features,
eps=bn_eps)
if self.activate:
self.activ = get_activation_layer(activation)
def forward(self, x):
x = self.fc(x)
if self.use_bn:
x = self.bn(x)
if self.activate:
x = self.activ(x)
return x
class ConvBlock1d(nn.Module):
"""
Standard 1D convolution block with Batch normalization and activation.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
kernel_size : int
Convolution window size.
stride : int
Strides of the convolution.
padding : int
Padding value for convolution layer.
dilation : int
Dilation value for convolution layer.
groups : int, default 1
Number of groups.
bias : bool, default False
Whether the layer uses a bias vector.
use_bn : bool, default True
Whether to use BatchNorm layer.
bn_eps : float, default 1e-5
Small float added to variance in Batch norm.
activation : function or str or None, default nn.ReLU(inplace=True)
Activation function or name of activation function.
"""
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
dilation=1,
groups=1,
bias=False,
use_bn=True,
bn_eps=1e-5,
activation=(lambda: nn.ReLU(inplace=True))):
super(ConvBlock1d, self).__init__()
self.activate = (activation is not None)
self.use_bn = use_bn
self.conv = nn.Conv1d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
bias=bias)
if self.use_bn:
self.bn = nn.BatchNorm1d(
num_features=out_channels,
eps=bn_eps)
if self.activate:
self.activ = get_activation_layer(activation)
def forward(self, x):
x = self.conv(x)
if self.use_bn:
x = self.bn(x)
if self.activate:
x = self.activ(x)
return x
def conv1x1(in_channels,
out_channels,
stride=1,
groups=1,
bias=False):
"""
Convolution 1x1 layer.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
stride : int or tuple/list of 2 int, default 1
Strides of the convolution.
groups : int, default 1
Number of groups.
bias : bool, default False
Whether the layer uses a bias vector.
"""
return nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=stride,
groups=groups,
bias=bias)
def conv3x3(in_channels,
out_channels,
stride=1,
padding=1,
dilation=1,
groups=1,
bias=False):
"""
Convolution 3x3 layer.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
stride : int or tuple/list of 2 int, default 1
Strides of the convolution.
padding : int or tuple/list of 2 int, default 1
Padding value for convolution layer.
groups : int, default 1
Number of groups.
bias : bool, default False
Whether the layer uses a bias vector.
"""
return nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
bias=bias)
def depthwise_conv3x3(channels,
stride):
"""
Depthwise convolution 3x3 layer.
Parameters:
----------
channels : int
Number of input/output channels.
strides : int or tuple/list of 2 int
Strides of the convolution.
"""
return nn.Conv2d(
in_channels=channels,
out_channels=channels,
kernel_size=3,
stride=stride,
padding=1,
groups=channels,
bias=False)
class ConvBlock(nn.Module):
"""
Standard convolution block with Batch normalization and activation.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
kernel_size : int or tuple/list of 2 int
Convolution window size.
stride : int or tuple/list of 2 int
Strides of the convolution.
padding : int, or tuple/list of 2 int, or tuple/list of 4 int
Padding value for convolution layer.
dilation : int or tuple/list of 2 int, default 1
Dilation value for convolution layer.
groups : int, default 1
Number of groups.
bias : bool, default False
Whether the layer uses a bias vector.
use_bn : bool, default True
Whether to use BatchNorm layer.
bn_eps : float, default 1e-5
Small float added to variance in Batch norm.
activation : function or str or None, default nn.ReLU(inplace=True)
Activation function or name of activation function.
"""
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
dilation=1,
groups=1,
bias=False,
use_bn=True,
bn_eps=1e-5,
activation=(lambda: nn.ReLU(inplace=True))):
super(ConvBlock, self).__init__()
self.activate = (activation is not None)
self.use_bn = use_bn
self.use_pad = (isinstance(padding, (list, tuple)) and (len(padding) == 4))
if self.use_pad:
self.pad = nn.ZeroPad2d(padding=padding)
padding = 0
self.conv = nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
bias=bias)
if self.use_bn:
self.bn = nn.BatchNorm2d(
num_features=out_channels,
eps=bn_eps)
if self.activate:
self.activ = get_activation_layer(activation)
def forward(self, x):
if self.use_pad:
x = self.pad(x)
x = self.conv(x)
if self.use_bn:
x = self.bn(x)
if self.activate:
x = self.activ(x)
return x
def conv1x1_block(in_channels,
out_channels,
stride=1,
padding=0,
groups=1,
bias=False,
use_bn=True,
bn_eps=1e-5,
activation=(lambda: nn.ReLU(inplace=True))):
"""
1x1 version of the standard convolution block.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
stride : int or tuple/list of 2 int, default 1
Strides of the convolution.
padding : int, or tuple/list of 2 int, or tuple/list of 4 int, default 0
Padding value for convolution layer.
groups : int, default 1
Number of groups.
bias : bool, default False
Whether the layer uses a bias vector.
use_bn : bool, default True
Whether to use BatchNorm layer.
bn_eps : float, default 1e-5
Small float added to variance in Batch norm.
activation : function or str or None, default nn.ReLU(inplace=True)
Activation function or name of activation function.
"""
return ConvBlock(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=stride,
padding=padding,
groups=groups,
bias=bias,
use_bn=use_bn,
bn_eps=bn_eps,
activation=activation)
def conv3x3_block(in_channels,
out_channels,
stride=1,
padding=1,
dilation=1,
groups=1,
bias=False,
use_bn=True,
bn_eps=1e-5,
activation=(lambda: nn.ReLU(inplace=True))):
"""
3x3 version of the standard convolution block.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
stride : int or tuple/list of 2 int, default 1
Strides of the convolution.
padding : int, or tuple/list of 2 int, or tuple/list of 4 int, default 1
Padding value for convolution layer.
dilation : int or tuple/list of 2 int, default 1
Dilation value for convolution layer.
groups : int, default 1
Number of groups.
bias : bool, default False
Whether the layer uses a bias vector.
use_bn : bool, default True
Whether to use BatchNorm layer.
bn_eps : float, default 1e-5
Small float added to variance in Batch norm.
activation : function or str or None, default nn.ReLU(inplace=True)
Activation function or name of activation function.
"""
return ConvBlock(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
bias=bias,
use_bn=use_bn,
bn_eps=bn_eps,
activation=activation)
def conv5x5_block(in_channels,
out_channels,
stride=1,
padding=2,
dilation=1,
groups=1,
bias=False,
bn_eps=1e-5,
activation=(lambda: nn.ReLU(inplace=True))):
"""
5x5 version of the standard convolution block.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
stride : int or tuple/list of 2 int, default 1
Strides of the convolution.
padding : int, or tuple/list of 2 int, or tuple/list of 4 int, default 2
Padding value for convolution layer.
dilation : int or tuple/list of 2 int, default 1
Dilation value for convolution layer.
groups : int, default 1
Number of groups.
bias : bool, default False
Whether the layer uses a bias vector.
bn_eps : float, default 1e-5
Small float added to variance in Batch norm.
activation : function or str or None, default nn.ReLU(inplace=True)
Activation function or name of activation function.
"""
return ConvBlock(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=5,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
bias=bias,
bn_eps=bn_eps,
activation=activation)
def conv7x7_block(in_channels,
out_channels,
stride=1,
padding=3,
bias=False,
use_bn=True,
activation=(lambda: nn.ReLU(inplace=True))):
"""
7x7 version of the standard convolution block.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
padding : int, or tuple/list of 2 int, or tuple/list of 4 int, default 1
Strides of the convolution.
padding : int or tuple/list of 2 int, default 3
Padding value for convolution layer.
bias : bool, default False
Whether the layer uses a bias vector.
use_bn : bool, default True
Whether to use BatchNorm layer.
activation : function or str or None, default nn.ReLU(inplace=True)
Activation function or name of activation function.
"""
return ConvBlock(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=7,
stride=stride,
padding=padding,
bias=bias,
use_bn=use_bn,
activation=activation)
def dwconv_block(in_channels,
out_channels,
kernel_size,
stride=1,
padding=1,
dilation=1,
bias=False,
use_bn=True,
bn_eps=1e-5,
activation=(lambda: nn.ReLU(inplace=True))):
"""
Depthwise version of the standard convolution block.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
kernel_size : int or tuple/list of 2 int
Convolution window size.
stride : int or tuple/list of 2 int, default 1
Strides of the convolution.
padding : int, or tuple/list of 2 int, or tuple/list of 4 int, default 1
Padding value for convolution layer.
dilation : int or tuple/list of 2 int, default 1
Dilation value for convolution layer.
bias : bool, default False
Whether the layer uses a bias vector.
use_bn : bool, default True
Whether to use BatchNorm layer.
bn_eps : float, default 1e-5
Small float added to variance in Batch norm.
activation : function or str or None, default nn.ReLU(inplace=True)
Activation function or name of activation function.
"""
return ConvBlock(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=out_channels,
bias=bias,
use_bn=use_bn,
bn_eps=bn_eps,
activation=activation)
def dwconv3x3_block(in_channels,
out_channels,
stride=1,
padding=1,
dilation=1,
bias=False,
bn_eps=1e-5,
activation=(lambda: nn.ReLU(inplace=True))):
"""
3x3 depthwise version of the standard convolution block.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
stride : int or tuple/list of 2 int, default 1
Strides of the convolution.
padding : int, or tuple/list of 2 int, or tuple/list of 4 int, default 1
Padding value for convolution layer.
dilation : int or tuple/list of 2 int, default 1
Dilation value for convolution layer.
bias : bool, default False
Whether the layer uses a bias vector.
bn_eps : float, default 1e-5
Small float added to variance in Batch norm.
activation : function or str or None, default nn.ReLU(inplace=True)
Activation function or name of activation function.
"""
return dwconv_block(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
padding=padding,
dilation=dilation,
bias=bias,
bn_eps=bn_eps,
activation=activation)
def dwconv5x5_block(in_channels,
out_channels,
stride=1,
padding=2,
dilation=1,
bias=False,
bn_eps=1e-5,
activation=(lambda: nn.ReLU(inplace=True))):
"""
5x5 depthwise version of the standard convolution block.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
stride : int or tuple/list of 2 int, default 1
Strides of the convolution.
padding : int, or tuple/list of 2 int, or tuple/list of 4 int, default 2
Padding value for convolution layer.
dilation : int or tuple/list of 2 int, default 1
Dilation value for convolution layer.
bias : bool, default False
Whether the layer uses a bias vector.
bn_eps : float, default 1e-5
Small float added to variance in Batch norm.
activation : function or str or None, default nn.ReLU(inplace=True)
Activation function or name of activation function.
"""
return dwconv_block(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=5,
stride=stride,
padding=padding,
dilation=dilation,
bias=bias,
bn_eps=bn_eps,
activation=activation)
class DwsConvBlock(nn.Module):
"""
Depthwise separable convolution block with BatchNorms and activations at each convolution layers.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
kernel_size : int or tuple/list of 2 int
Convolution window size.
stride : int or tuple/list of 2 int
Strides of the convolution.
padding : int, or tuple/list of 2 int, or tuple/list of 4 int
Padding value for convolution layer.
dilation : int or tuple/list of 2 int, default 1
Dilation value for convolution layer.
bias : bool, default False
Whether the layer uses a bias vector.
dw_use_bn : bool, default True
Whether to use BatchNorm layer (depthwise convolution block).
pw_use_bn : bool, default True
Whether to use BatchNorm layer (pointwise convolution block).
bn_eps : float, default 1e-5
Small float added to variance in Batch norm.
dw_activation : function or str or None, default nn.ReLU(inplace=True)
Activation function after the depthwise convolution block.
pw_activation : function or str or None, default nn.ReLU(inplace=True)
Activation function after the pointwise convolution block.
"""
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
dilation=1,
bias=False,
dw_use_bn=True,
pw_use_bn=True,
bn_eps=1e-5,
dw_activation=(lambda: nn.ReLU(inplace=True)),
pw_activation=(lambda: nn.ReLU(inplace=True))):
super(DwsConvBlock, self).__init__()
self.dw_conv = dwconv_block(
in_channels=in_channels,
out_channels=in_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
bias=bias,
use_bn=dw_use_bn,
bn_eps=bn_eps,
activation=dw_activation)
self.pw_conv = conv1x1_block(
in_channels=in_channels,
out_channels=out_channels,
bias=bias,
use_bn=pw_use_bn,
bn_eps=bn_eps,
activation=pw_activation)
def forward(self, x):
x = self.dw_conv(x)
x = self.pw_conv(x)
return x
def dwsconv3x3_block(in_channels,
out_channels,
stride=1,
padding=1,
dilation=1,
bias=False,
bn_eps=1e-5,
dw_activation=(lambda: nn.ReLU(inplace=True)),
pw_activation=(lambda: nn.ReLU(inplace=True)),
**kwargs):
"""
3x3 depthwise separable version of the standard convolution block.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
stride : int or tuple/list of 2 int, default 1
Strides of the convolution.
padding : int, or tuple/list of 2 int, or tuple/list of 4 int, default 1
Padding value for convolution layer.
dilation : int or tuple/list of 2 int, default 1
Dilation value for convolution layer.
bias : bool, default False
Whether the layer uses a bias vector.
bn_eps : float, default 1e-5
Small float added to variance in Batch norm.
dw_activation : function or str or None, default nn.ReLU(inplace=True)
Activation function after the depthwise convolution block.
pw_activation : function or str or None, default nn.ReLU(inplace=True)
Activation function after the pointwise convolution block.
"""
return DwsConvBlock(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
padding=padding,
dilation=dilation,
bias=bias,
bn_eps=bn_eps,
dw_activation=dw_activation,
pw_activation=pw_activation,
**kwargs)
class PreConvBlock(nn.Module):
"""
Convolution block with Batch normalization and ReLU pre-activation.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
kernel_size : int or tuple/list of 2 int
Convolution window size.
stride : int or tuple/list of 2 int
Strides of the convolution.
padding : int or tuple/list of 2 int
Padding value for convolution layer.
dilation : int or tuple/list of 2 int, default 1
Dilation value for convolution layer.
bias : bool, default False
Whether the layer uses a bias vector.
use_bn : bool, default True
Whether to use BatchNorm layer.
return_preact : bool, default False
Whether return pre-activation. It's used by PreResNet.
activate : bool, default True
Whether activate the convolution block.
"""
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
dilation=1,
bias=False,
use_bn=True,
return_preact=False,
activate=True):
super(PreConvBlock, self).__init__()
self.return_preact = return_preact
self.activate = activate
self.use_bn = use_bn
if self.use_bn:
self.bn = nn.BatchNorm2d(num_features=in_channels)
if self.activate:
self.activ = nn.ReLU(inplace=True)
self.conv = nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
bias=bias)
def forward(self, x):
if self.use_bn:
x = self.bn(x)
if self.activate:
x = self.activ(x)
if self.return_preact:
x_pre_activ = x
x = self.conv(x)
if self.return_preact:
return x, x_pre_activ
else:
return x
def pre_conv1x1_block(in_channels,
out_channels,
stride=1,
bias=False,
use_bn=True,
return_preact=False,
activate=True):
"""
1x1 version of the pre-activated convolution block.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
stride : int or tuple/list of 2 int, default 1
Strides of the convolution.
bias : bool, default False
Whether the layer uses a bias vector.
use_bn : bool, default True
Whether to use BatchNorm layer.
return_preact : bool, default False
Whether return pre-activation.
activate : bool, default True
Whether activate the convolution block.
"""
return PreConvBlock(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=stride,
padding=0,
bias=bias,
use_bn=use_bn,
return_preact=return_preact,
activate=activate)
def pre_conv3x3_block(in_channels,
out_channels,
stride=1,
padding=1,
dilation=1,
bias=False,
use_bn=True,
return_preact=False,
activate=True):
"""
3x3 version of the pre-activated convolution block.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
stride : int or tuple/list of 2 int, default 1
Strides of the convolution.
padding : int or tuple/list of 2 int, default 1
Padding value for convolution layer.
dilation : int or tuple/list of 2 int, default 1
Dilation value for convolution layer.
bias : bool, default False
Whether the layer uses a bias vector.
use_bn : bool, default True
Whether to use BatchNorm layer.
return_preact : bool, default False
Whether return pre-activation.
activate : bool, default True
Whether activate the convolution block.
"""
return PreConvBlock(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
padding=padding,
dilation=dilation,
bias=bias,
use_bn=use_bn,
return_preact=return_preact,
activate=activate)
class DeconvBlock(nn.Module):
"""
Deconvolution block with batch normalization and activation.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
kernel_size : int or tuple/list of 2 int
Convolution window size.
stride : int or tuple/list of 2 int
Strides of the deconvolution.
padding : int or tuple/list of 2 int
Padding value for deconvolution layer.
ext_padding : tuple/list of 4 int, default None
Extra padding value for deconvolution layer.
out_padding : int or tuple/list of 2 int
Output padding value for deconvolution layer.
dilation : int or tuple/list of 2 int, default 1
Dilation value for deconvolution layer.
groups : int, default 1
Number of groups.
bias : bool, default False
Whether the layer uses a bias vector.
use_bn : bool, default True
Whether to use BatchNorm layer.
bn_eps : float, default 1e-5
Small float added to variance in Batch norm.
activation : function or str or None, default nn.ReLU(inplace=True)
Activation function or name of activation function.
"""
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
ext_padding=None,
out_padding=0,
dilation=1,
groups=1,
bias=False,
use_bn=True,
bn_eps=1e-5,
activation=(lambda: nn.ReLU(inplace=True))):
super(DeconvBlock, self).__init__()
self.activate = (activation is not None)
self.use_bn = use_bn
self.use_pad = (ext_padding is not None)
if self.use_pad:
self.pad = nn.ZeroPad2d(padding=ext_padding)
self.conv = nn.ConvTranspose2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
output_padding=out_padding,
dilation=dilation,
groups=groups,
bias=bias)
if self.use_bn:
self.bn = nn.BatchNorm2d(
num_features=out_channels,
eps=bn_eps)
if self.activate:
self.activ = get_activation_layer(activation)
def forward(self, x):
if self.use_pad:
x = self.pad(x)
x = self.conv(x)
if self.use_bn:
x = self.bn(x)
if self.activate:
x = self.activ(x)
return x
class NormActivation(nn.Module):
"""
Activation block with preliminary batch normalization. It's used by itself as the final block in PreResNet.
Parameters:
----------
in_channels : int
Number of input channels.
bn_eps : float, default 1e-5
Small float added to variance in Batch norm.
"""
def __init__(self,
in_channels,
bn_eps=1e-5):
super(NormActivation, self).__init__()
self.bn = nn.BatchNorm2d(
num_features=in_channels,
eps=bn_eps)
self.activ = nn.ReLU(inplace=True)
def forward(self, x):
x = self.bn(x)
x = self.activ(x)
return x
class InterpolationBlock(nn.Module):
"""
Interpolation upsampling block.
Parameters:
----------
scale_factor : float
Multiplier for spatial size.
out_size : tuple of 2 int, default None
Spatial size of the output tensor for the bilinear interpolation operation.
mode : str, default 'bilinear'
Algorithm used for upsampling.
align_corners : bool, default True
Whether to align the corner pixels of the input and output tensors.
up : bool, default True
Whether to upsample or downsample.
"""
def __init__(self,
scale_factor,
out_size=None,
mode="bilinear",
align_corners=True,
up=True):
super(InterpolationBlock, self).__init__()
self.scale_factor = scale_factor
self.out_size = out_size
self.mode = mode
self.align_corners = align_corners
self.up = up
def forward(self, x, size=None):
if (self.mode == "bilinear") or (size is not None):
out_size = self.calc_out_size(x) if size is None else size
return F.interpolate(
input=x,
size=out_size,
mode=self.mode,
align_corners=self.align_corners)
else:
return F.interpolate(
input=x,
scale_factor=self.scale_factor,
mode=self.mode,
align_corners=self.align_corners)
def calc_out_size(self, x):
if self.out_size is not None:
return self.out_size
if self.up:
return tuple(s * self.scale_factor for s in x.shape[2:])
else:
return tuple(s // self.scale_factor for s in x.shape[2:])
def __repr__(self):
s = '{name}(scale_factor={scale_factor}, out_size={out_size}, mode={mode}, align_corners={align_corners}, up={up})' # noqa
return s.format(
name=self.__class__.__name__,
scale_factor=self.scale_factor,
out_size=self.out_size,
mode=self.mode,
align_corners=self.align_corners,
up=self.up)
def calc_flops(self, x):
assert (x.shape[0] == 1)
if self.mode == "bilinear":
num_flops = 9 * x.numel()
else:
num_flops = 4 * x.numel()
num_macs = 0
return num_flops, num_macs
def channel_shuffle(x,
groups):
"""
Channel shuffle operation from 'ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices,'
https://arxiv.org/abs/1707.01083.
Parameters:
----------
x : Tensor
Input tensor.
groups : int
Number of groups.
Returns
-------
Tensor
Resulted tensor.
"""
batch, channels, height, width = x.size()
# assert (channels % groups == 0)
channels_per_group = channels // groups
x = x.view(batch, groups, channels_per_group, height, width)
x = torch.transpose(x, 1, 2).contiguous()
x = x.view(batch, channels, height, width)
return x
class ChannelShuffle(nn.Module):
"""
Channel shuffle layer. This is a wrapper over the same operation. It is designed to save the number of groups.
Parameters:
----------
channels : int
Number of channels.
groups : int
Number of groups.
"""
def __init__(self,
channels,
groups):
super(ChannelShuffle, self).__init__()
# assert (channels % groups == 0)
if channels % groups != 0:
raise ValueError('channels must be divisible by groups')
self.groups = groups
def forward(self, x):
return channel_shuffle(x, self.groups)
def channel_shuffle2(x,
groups):
"""
Channel shuffle operation from 'ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices,'
https://arxiv.org/abs/1707.01083. The alternative version.
Parameters:
----------
x : Tensor
Input tensor.
groups : int
Number of groups.
Returns
-------
Tensor
Resulted tensor.
"""
batch, channels, height, width = x.size()
# assert (channels % groups == 0)
channels_per_group = channels // groups
x = x.view(batch, channels_per_group, groups, height, width)
x = torch.transpose(x, 1, 2).contiguous()
x = x.view(batch, channels, height, width)
return x
class ChannelShuffle2(nn.Module):
"""
Channel shuffle layer. This is a wrapper over the same operation. It is designed to save the number of groups.
The alternative version.
Parameters:
----------
channels : int
Number of channels.
groups : int
Number of groups.
"""
def __init__(self,
channels,
groups):
super(ChannelShuffle2, self).__init__()
# assert (channels % groups == 0)
if channels % groups != 0:
raise ValueError('channels must be divisible by groups')
self.groups = groups
def forward(self, x):
return channel_shuffle2(x, self.groups)
class SEBlock(nn.Module):
"""
Squeeze-and-Excitation block from 'Squeeze-and-Excitation Networks,' https://arxiv.org/abs/1709.01507.
Parameters:
----------
channels : int
Number of channels.
reduction : int, default 16
Squeeze reduction value.
round_mid : bool, default False
Whether to round middle channel number (make divisible by 8).
use_conv : bool, default True
Whether to convolutional layers instead of fully-connected ones.
activation : function, or str, or nn.Module, default 'relu'
Activation function after the first convolution.
out_activation : function, or str, or nn.Module, default 'sigmoid'
Activation function after the last convolution.
"""
def __init__(self,
channels,
reduction=16,
round_mid=False,
use_conv=True,
mid_activation=(lambda: nn.ReLU(inplace=True)),
out_activation=(lambda: nn.Sigmoid())):
super(SEBlock, self).__init__()
self.use_conv = use_conv
mid_channels = channels // reduction if not round_mid else round_channels(float(channels) / reduction)
self.pool = nn.AdaptiveAvgPool2d(output_size=1)
if use_conv:
self.conv1 = conv1x1(
in_channels=channels,
out_channels=mid_channels,
bias=True)
else:
self.fc1 = nn.Linear(
in_features=channels,
out_features=mid_channels)
self.activ = get_activation_layer(mid_activation)
if use_conv:
self.conv2 = conv1x1(
in_channels=mid_channels,
out_channels=channels,
bias=True)
else:
self.fc2 = nn.Linear(
in_features=mid_channels,
out_features=channels)
self.sigmoid = get_activation_layer(out_activation)
def forward(self, x):
w = self.pool(x)
if not self.use_conv:
w = w.view(x.size(0), -1)
w = self.conv1(w) if self.use_conv else self.fc1(w)
w = self.activ(w)
w = self.conv2(w) if self.use_conv else self.fc2(w)
w = self.sigmoid(w)
if not self.use_conv:
w = w.unsqueeze(2).unsqueeze(3)
x = x * w
return x
class SABlock(nn.Module):
"""
Split-Attention block from 'ResNeSt: Split-Attention Networks,' https://arxiv.org/abs/2004.08955.
Parameters:
----------
out_channels : int
Number of output channels.
groups : int
Number of channel groups (cardinality, without radix).
radix : int
Number of splits within a cardinal group.
reduction : int, default 4
Squeeze reduction value.
min_channels : int, default 32
Minimal number of squeezed channels.
use_conv : bool, default True
Whether to convolutional layers instead of fully-connected ones.
bn_eps : float, default 1e-5
Small float added to variance in Batch norm.
"""
def __init__(self,
out_channels,
groups,
radix,
reduction=4,
min_channels=32,
use_conv=True,
bn_eps=1e-5):
super(SABlock, self).__init__()
self.groups = groups
self.radix = radix
self.use_conv = use_conv
in_channels = out_channels * radix
mid_channels = max(in_channels // reduction, min_channels)
self.pool = nn.AdaptiveAvgPool2d(output_size=1)
if use_conv:
self.conv1 = conv1x1(
in_channels=out_channels,
out_channels=mid_channels,
bias=True)
else:
self.fc1 = nn.Linear(
in_features=out_channels,
out_features=mid_channels)
self.bn = nn.BatchNorm2d(
num_features=mid_channels,
eps=bn_eps)
self.activ = nn.ReLU(inplace=True)
if use_conv:
self.conv2 = conv1x1(
in_channels=mid_channels,
out_channels=in_channels,
bias=True)
else:
self.fc2 = nn.Linear(
in_features=mid_channels,
out_features=in_channels)
self.softmax = nn.Softmax(dim=1)
def forward(self, x):
batch, channels, height, width = x.size()
x = x.view(batch, self.radix, channels // self.radix, height, width)
w = x.sum(dim=1)
w = self.pool(w)
if not self.use_conv:
w = w.view(x.size(0), -1)
w = self.conv1(w) if self.use_conv else self.fc1(w)
w = self.bn(w)
w = self.activ(w)
w = self.conv2(w) if self.use_conv else self.fc2(w)
w = w.view(batch, self.groups, self.radix, -1)
w = torch.transpose(w, 1, 2).contiguous()
w = self.softmax(w)
w = w.view(batch, self.radix, -1, 1, 1)
x = x * w
x = x.sum(dim=1)
return x
class SAConvBlock(nn.Module):
"""
Split-Attention convolution block from 'ResNeSt: Split-Attention Networks,' https://arxiv.org/abs/2004.08955.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
kernel_size : int or tuple/list of 2 int
Convolution window size.
stride : int or tuple/list of 2 int
Strides of the convolution.
padding : int, or tuple/list of 2 int, or tuple/list of 4 int
Padding value for convolution layer.
dilation : int or tuple/list of 2 int, default 1
Dilation value for convolution layer.
groups : int, default 1
Number of groups.
bias : bool, default False
Whether the layer uses a bias vector.
use_bn : bool, default True
Whether to use BatchNorm layer.
bn_eps : float, default 1e-5
Small float added to variance in Batch norm.
activation : function or str or None, default nn.ReLU(inplace=True)
Activation function or name of activation function.
radix : int, default 2
Number of splits within a cardinal group.
reduction : int, default 4
Squeeze reduction value.
min_channels : int, default 32
Minimal number of squeezed channels.
use_conv : bool, default True
Whether to convolutional layers instead of fully-connected ones.
"""
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
dilation=1,
groups=1,
bias=False,
use_bn=True,
bn_eps=1e-5,
activation=(lambda: nn.ReLU(inplace=True)),
radix=2,
reduction=4,
min_channels=32,
use_conv=True):
super(SAConvBlock, self).__init__()
self.conv = ConvBlock(
in_channels=in_channels,
out_channels=(out_channels * radix),
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=(groups * radix),
bias=bias,
use_bn=use_bn,
bn_eps=bn_eps,
activation=activation)
self.att = SABlock(
out_channels=out_channels,
groups=groups,
radix=radix,
reduction=reduction,
min_channels=min_channels,
use_conv=use_conv,
bn_eps=bn_eps)
def forward(self, x):
x = self.conv(x)
x = self.att(x)
return x
def saconv3x3_block(in_channels,
out_channels,
stride=1,
padding=1,
**kwargs):
"""
3x3 version of the Split-Attention convolution block.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
stride : int or tuple/list of 2 int, default 1
Strides of the convolution.
padding : int or tuple/list of 2 int, default 1
Padding value for convolution layer.
"""
return SAConvBlock(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
padding=padding,
**kwargs)
class DucBlock(nn.Module):
"""
Dense Upsampling Convolution (DUC) block from 'Understanding Convolution for Semantic Segmentation,'
https://arxiv.org/abs/1702.08502.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
scale_factor : int
Multiplier for spatial size.
"""
def __init__(self,
in_channels,
out_channels,
scale_factor):
super(DucBlock, self).__init__()
mid_channels = (scale_factor * scale_factor) * out_channels
self.conv = conv3x3_block(
in_channels=in_channels,
out_channels=mid_channels)
self.pix_shuffle = nn.PixelShuffle(upscale_factor=scale_factor)
def forward(self, x):
x = self.conv(x)
x = self.pix_shuffle(x)
return x
class IBN(nn.Module):
"""
Instance-Batch Normalization block from 'Two at Once: Enhancing Learning and Generalization Capacities via IBN-Net,'
https://arxiv.org/abs/1807.09441.
Parameters:
----------
channels : int
Number of channels.
inst_fraction : float, default 0.5
The first fraction of channels for normalization.
inst_first : bool, default True
Whether instance normalization be on the first part of channels.
"""
def __init__(self,
channels,
first_fraction=0.5,
inst_first=True):
super(IBN, self).__init__()
self.inst_first = inst_first
h1_channels = int(math.floor(channels * first_fraction))
h2_channels = channels - h1_channels
self.split_sections = [h1_channels, h2_channels]
if self.inst_first:
self.inst_norm = nn.InstanceNorm2d(
num_features=h1_channels,
affine=True)
self.batch_norm = nn.BatchNorm2d(num_features=h2_channels)
else:
self.batch_norm = nn.BatchNorm2d(num_features=h1_channels)
self.inst_norm = nn.InstanceNorm2d(
num_features=h2_channels,
affine=True)
def forward(self, x):
x1, x2 = torch.split(x, split_size_or_sections=self.split_sections, dim=1)
if self.inst_first:
x1 = self.inst_norm(x1.contiguous())
x2 = self.batch_norm(x2.contiguous())
else:
x1 = self.batch_norm(x1.contiguous())
x2 = self.inst_norm(x2.contiguous())
x = torch.cat((x1, x2), dim=1)
return x
class DualPathSequential(nn.Sequential):
"""
A sequential container for modules with dual inputs/outputs.
Modules will be executed in the order they are added.
Parameters:
----------
return_two : bool, default True
Whether to return two output after execution.
first_ordinals : int, default 0
Number of the first modules with single input/output.
last_ordinals : int, default 0
Number of the final modules with single input/output.
dual_path_scheme : function
Scheme of dual path response for a module.
dual_path_scheme_ordinal : function
Scheme of dual path response for an ordinal module.
"""
def __init__(self,
return_two=True,
first_ordinals=0,
last_ordinals=0,
dual_path_scheme=(lambda module, x1, x2: module(x1, x2)),
dual_path_scheme_ordinal=(lambda module, x1, x2: (module(x1), x2))):
super(DualPathSequential, self).__init__()
self.return_two = return_two
self.first_ordinals = first_ordinals
self.last_ordinals = last_ordinals
self.dual_path_scheme = dual_path_scheme
self.dual_path_scheme_ordinal = dual_path_scheme_ordinal
def forward(self, x1, x2=None):
length = len(self._modules.values())
for i, module in enumerate(self._modules.values()):
if (i < self.first_ordinals) or (i >= length - self.last_ordinals):
x1, x2 = self.dual_path_scheme_ordinal(module, x1, x2)
else:
x1, x2 = self.dual_path_scheme(module, x1, x2)
if self.return_two:
return x1, x2
else:
return x1
class Concurrent(nn.Sequential):
"""
A container for concatenation of modules on the base of the sequential container.
Parameters:
----------
axis : int, default 1
The axis on which to concatenate the outputs.
stack : bool, default False
Whether to concatenate tensors along a new dimension.
"""
def __init__(self,
axis=1,
stack=False):
super(Concurrent, self).__init__()
self.axis = axis
self.stack = stack
def forward(self, x):
out = []
for module in self._modules.values():
out.append(module(x))
if self.stack:
out = torch.stack(tuple(out), dim=self.axis)
else:
out = torch.cat(tuple(out), dim=self.axis)
return out
class SequentialConcurrent(nn.Sequential):
"""
A sequential container with concatenated outputs.
Modules will be executed in the order they are added.
Parameters:
----------
axis : int, default 1
The axis on which to concatenate the outputs.
stack : bool, default False
Whether to concatenate tensors along a new dimension.
cat_input : bool, default True
Whether to concatenate input tensor.
"""
def __init__(self,
axis=1,
stack=False,
cat_input=True):
super(SequentialConcurrent, self).__init__()
self.axis = axis
self.stack = stack
self.cat_input = cat_input
def forward(self, x):
out = [x] if self.cat_input else []
for module in self._modules.values():
x = module(x)
out.append(x)
if self.stack:
out = torch.stack(tuple(out), dim=self.axis)
else:
out = torch.cat(tuple(out), dim=self.axis)
return out
class ParametricSequential(nn.Sequential):
"""
A sequential container for modules with parameters.
Modules will be executed in the order they are added.
"""
def __init__(self, *args):
super(ParametricSequential, self).__init__(*args)
def forward(self, x, **kwargs):
for module in self._modules.values():
x = module(x, **kwargs)
return x
class ParametricConcurrent(nn.Sequential):
"""
A container for concatenation of modules with parameters.
Parameters:
----------
axis : int, default 1
The axis on which to concatenate the outputs.
"""
def __init__(self, axis=1):
super(ParametricConcurrent, self).__init__()
self.axis = axis
def forward(self, x, **kwargs):
out = []
for module in self._modules.values():
out.append(module(x, **kwargs))
out = torch.cat(tuple(out), dim=self.axis)
return out
class Hourglass(nn.Module):
"""
A hourglass block.
Parameters:
----------
down_seq : nn.Sequential
Down modules as sequential.
up_seq : nn.Sequential
Up modules as sequential.
skip_seq : nn.Sequential
Skip connection modules as sequential.
merge_type : str, default 'add'
Type of concatenation of up and skip outputs.
return_first_skip : bool, default False
Whether return the first skip connection output. Used in ResAttNet.
"""
def __init__(self,
down_seq,
up_seq,
skip_seq,
merge_type="add",
return_first_skip=False):
super(Hourglass, self).__init__()
self.depth = len(down_seq)
assert (merge_type in ["add"])
assert (len(up_seq) == self.depth)
assert (len(skip_seq) in (self.depth, self.depth + 1))
self.merge_type = merge_type
self.return_first_skip = return_first_skip
self.extra_skip = (len(skip_seq) == self.depth + 1)
self.down_seq = down_seq
self.up_seq = up_seq
self.skip_seq = skip_seq
def forward(self, x, **kwargs):
y = None
down_outs = [x]
for down_module in self.down_seq._modules.values():
x = down_module(x)
down_outs.append(x)
for i in range(len(down_outs)):
if i != 0:
y = down_outs[self.depth - i]
skip_module = self.skip_seq[self.depth - i]
y = skip_module(y)
if (y is not None) and (self.merge_type == "add"):
x = x + y
if i != len(down_outs) - 1:
if (i == 0) and self.extra_skip:
skip_module = self.skip_seq[self.depth]
x = skip_module(x)
up_module = self.up_seq[self.depth - 1 - i]
x = up_module(x)
if self.return_first_skip:
return x, y
else:
return x
class SesquialteralHourglass(nn.Module):
"""
A sesquialteral hourglass block.
Parameters:
----------
down1_seq : nn.Sequential
The first down modules as sequential.
skip1_seq : nn.Sequential
The first skip connection modules as sequential.
up_seq : nn.Sequential
Up modules as sequential.
skip2_seq : nn.Sequential
The second skip connection modules as sequential.
down2_seq : nn.Sequential
The second down modules as sequential.
merge_type : str, default 'cat'
Type of concatenation of up and skip outputs.
"""
def __init__(self,
down1_seq,
skip1_seq,
up_seq,
skip2_seq,
down2_seq,
merge_type="cat"):
super(SesquialteralHourglass, self).__init__()
assert (len(down1_seq) == len(up_seq))
assert (len(down1_seq) == len(down2_seq))
assert (len(skip1_seq) == len(skip2_seq))
assert (len(down1_seq) == len(skip1_seq) - 1)
assert (merge_type in ["cat", "add"])
self.merge_type = merge_type
self.depth = len(down1_seq)
self.down1_seq = down1_seq
self.skip1_seq = skip1_seq
self.up_seq = up_seq
self.skip2_seq = skip2_seq
self.down2_seq = down2_seq
def _merge(self, x, y):
if y is not None:
if self.merge_type == "cat":
x = torch.cat((x, y), dim=1)
elif self.merge_type == "add":
x = x + y
return x
def forward(self, x, **kwargs):
y = self.skip1_seq[0](x)
skip1_outs = [y]
for i in range(self.depth):
x = self.down1_seq[i](x)
y = self.skip1_seq[i + 1](x)
skip1_outs.append(y)
x = skip1_outs[self.depth]
y = self.skip2_seq[0](x)
skip2_outs = [y]
for i in range(self.depth):
x = self.up_seq[i](x)
y = skip1_outs[self.depth - 1 - i]
x = self._merge(x, y)
y = self.skip2_seq[i + 1](x)
skip2_outs.append(y)
x = self.skip2_seq[self.depth](x)
for i in range(self.depth):
x = self.down2_seq[i](x)
y = skip2_outs[self.depth - 1 - i]
x = self._merge(x, y)
return x
class MultiOutputSequential(nn.Sequential):
"""
A sequential container with multiple outputs.
Modules will be executed in the order they are added.
Parameters:
----------
multi_output : bool, default True
Whether to return multiple output.
dual_output : bool, default False
Whether to return dual output.
return_last : bool, default True
Whether to forcibly return last value.
"""
def __init__(self,
multi_output=True,
dual_output=False,
return_last=True):
super(MultiOutputSequential, self).__init__()
self.multi_output = multi_output
self.dual_output = dual_output
self.return_last = return_last
def forward(self, x):
outs = []
for module in self._modules.values():
x = module(x)
if hasattr(module, "do_output") and module.do_output:
outs.append(x)
elif hasattr(module, "do_output2") and module.do_output2:
assert (type(x) == tuple)
outs.extend(x[1])
x = x[0]
if self.multi_output:
return [x] + outs if self.return_last else outs
elif self.dual_output:
return x, outs
else:
return x
class ParallelConcurent(nn.Sequential):
"""
A sequential container with multiple inputs and multiple outputs.
Modules will be executed in the order they are added.
"""
def __init__(self):
super(ParallelConcurent, self).__init__()
def forward(self, x):
out = []
for module, xi in zip(self._modules.values(), x):
out.append(module(xi))
return out
class Flatten(nn.Module):
"""
Simple flatten module.
"""
def forward(self, x):
return x.view(x.size(0), -1)
class HeatmapMaxDetBlock(nn.Module):
"""
Heatmap maximum detector block (for human pose estimation task).
"""
def __init__(self):
super(HeatmapMaxDetBlock, self).__init__()
def forward(self, x):
heatmap = x
vector_dim = 2
batch = heatmap.shape[0]
channels = heatmap.shape[1]
in_size = x.shape[2:]
heatmap_vector = heatmap.view(batch, channels, -1)
scores, indices = heatmap_vector.max(dim=vector_dim, keepdims=True)
scores_mask = (scores > 0.0).float()
pts_x = (indices % in_size[1]) * scores_mask
pts_y = (indices // in_size[1]) * scores_mask
pts = torch.cat((pts_x, pts_y, scores), dim=vector_dim)
for b in range(batch):
for k in range(channels):
hm = heatmap[b, k, :, :]
px = int(pts[b, k, 0])
py = int(pts[b, k, 1])
if (0 < px < in_size[1] - 1) and (0 < py < in_size[0] - 1):
pts[b, k, 0] += (hm[py, px + 1] - hm[py, px - 1]).sign() * 0.25
pts[b, k, 1] += (hm[py + 1, px] - hm[py - 1, px]).sign() * 0.25
return pts
@staticmethod
def calc_flops(x):
assert (x.shape[0] == 1)
num_flops = x.numel() + 26 * x.shape[1]
num_macs = 0
return num_flops, num_macs
model_alexnet.py
"""
AlexNet for ImageNet-1K, implemented in PyTorch.
Original paper: 'One weird trick for parallelizing convolutional neural networks,'
https://arxiv.org/abs/1404.5997.
"""
__all__ = ['AlexNet', 'get_alexnet']
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
from common import ConvBlock
class AlexConv(ConvBlock):
"""
AlexNet specific convolution block.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
kernel_size : int or tuple/list of 2 int
Convolution window size.
stride : int or tuple/list of 2 int
Strides of the convolution.
padding : int or tuple/list of 2 int
Padding value for convolution layer.
use_lrn : bool
Whether to use LRN layer.
"""
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
use_lrn):
super(AlexConv, self).__init__(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
bias=True,
use_bn=False)
self.use_lrn = use_lrn
def forward(self, x):
x = super(AlexConv, self).forward(x)
if self.use_lrn:
x = F.local_response_norm(x, size=5, k=2.0)
return x
class AlexDense(nn.Module):
"""
AlexNet specific dense block.
Parameters:
----------
in_channels : int
Number of input channels.
out_channels : int
Number of output channels.
"""
def __init__(self,
in_channels,
out_channels):
super(AlexDense, self).__init__()
self.fc = nn.Linear(
in_features=in_channels,
out_features=out_channels)
self.activ = nn.ReLU(inplace=True)
def forward(self, x):
x = self.fc(x)
x = self.activ(x)
return x
class AlexOutputBlock(nn.Module):
"""
AlexNet specific output block.
Parameters:
----------
in_channels : int
Number of input channels.
classes : int
Number of classification classes.
"""
def __init__(self,
in_channels,
classes):
super(AlexOutputBlock, self).__init__()
#mid_channels = 4096
mid_channels = 1024
self.fc1 = AlexDense(
in_channels=in_channels,
out_channels=mid_channels)
self.fc2 = AlexDense(
in_channels=mid_channels,
out_channels=mid_channels)
self.fc3 = nn.Linear(
in_features=mid_channels,
out_features=classes)
def forward(self, x):
x = self.fc1(x)
x = self.fc2(x)
x = self.fc3(x)
return x
class AlexNet(nn.Module):
"""
AlexNet model from 'One weird trick for parallelizing convolutional neural networks,'
https://arxiv.org/abs/1404.5997.
Parameters:
----------
channels : list of list of int
Number of output channels for each unit.
kernel_sizes : list of list of int
Convolution window sizes for each unit.
strides : list of list of int or tuple/list of 2 int
Strides of the convolution for each unit.
paddings : list of list of int or tuple/list of 2 int
Padding value for convolution layer for each unit.
use_lrn : bool
Whether to use LRN layer.
in_channels : int, default 3
Number of input channels.
in_size : tuple of two ints, default (224, 224)
Spatial size of the expected input image.
num_classes : int, default 1000
Number of classification classes.
"""
def __init__(self,
channels,
kernel_sizes,
strides,
paddings,
use_lrn,
in_channels=3,
in_size=(224, 224),
num_classes=1000):
super(AlexNet, self).__init__()
self.in_size = in_size
self.num_classes = num_classes
self.features = nn.Sequential()
for i, channels_per_stage in enumerate(channels):
use_lrn_i = use_lrn and (i in [0, 1])
stage = nn.Sequential()
for j, out_channels in enumerate(channels_per_stage):
stage.add_module("unit{}".format(j + 1), AlexConv(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_sizes[i][j],
stride=strides[i][j],
padding=paddings[i][j],
use_lrn=use_lrn_i))
in_channels = out_channels
#https://blog.csdn.net/html5baby/article/details/100609026
#https://blog.csdn.net/qq_35975447/article/details/107810046
stage.add_module("pool{}".format(i + 1), nn.MaxPool2d(
kernel_size=3,
stride=2,
padding=0,
ceil_mode=True))
self.features.add_module("stage{}".format(i + 1), stage)
self.output = AlexOutputBlock(
#in_channels=(in_channels * 6 * 6),
#(8*8*64 + 8*8*64)
in_channels=8192,
classes=num_classes)
self.score = nn.Softmax(dim=1)
self.finel = nn.LogSoftmax(dim=1)
self._init_params()
def _init_params(self):
for name, module in self.named_modules():
if isinstance(module, nn.Conv2d):
init.kaiming_uniform_(module.weight)
if module.bias is not None:
init.constant_(module.bias, 0)
def forward(self, input1, input2):
input1_x = self.features(input1)
input2_x = self.features(input2)
x = torch.cat((input1_x, input2_x), dim=1)
x = x.view(x.size(0), -1)
x = self.output(x)
x = self.finel(x)
return x, self.score(x)
def get_alexnet(version="matchnet",
**kwargs):
if version == "matchnet":
channels = [[24], [64], [96, 96, 64]]
kernel_sizes = [[7], [5], [3, 3, 3]]
strides = [[1], [1], [1, 1, 1]]
paddings = [[3], [2], [1, 1, 1]]
use_lrn = False
else:
raise ValueError("Unsupported AlexNet version {}".format(version))
net = AlexNet(
channels=channels,
kernel_sizes=kernel_sizes,
strides=strides,
paddings=paddings,
use_lrn=use_lrn,
**kwargs)
return net