对ML-GCN相关代码进行阅读,提升一下编程能力同时更加深入理解此篇文章
论文地址
代码地址
```python
import os
import shutil
import time
import torch.backends.cudnn as cudnn
import torch.nn.parallel
import torch.optim
import torch.utils.data
import torchnet as tnt
import torchvision.transforms as transforms
import torch.nn as nn
from util import *
tqdm.monitor_interval = 0
class Engine(object):
def __init__(self, state={}):
self.state = state
if self._state('use_gpu') is None:
self.state['use_gpu'] = torch.cuda.is_available()
if self._state('image_size') is None:
self.state['image_size'] = 224
if self._state('batch_size') is None:
self.state['batch_size'] = 64
if self._state('workers') is None:
self.state['workers'] = 25
if self._state('device_ids') is None:
self.state['device_ids'] = None
if self._state('evaluate') is None:
self.state['evaluate'] = False
if self._state('start_epoch') is None:
self.state['start_epoch'] = 0
if self._state('max_epochs') is None:
self.state['max_epochs'] = 90
if self._state('epoch_step') is None:
self.state['epoch_step'] = []
# meters
self.state['meter_loss'] = tnt.meter.AverageValueMeter()
# time measure
self.state['batch_time'] = tnt.meter.AverageValueMeter()
self.state['data_time'] = tnt.meter.AverageValueMeter()
# display parameters
if self._state('use_pb') is None:
self.state['use_pb'] = True
if self._state('print_freq') is None:
self.state['print_freq'] = 0
def _state(self, name):
if name in self.state:
return self.state[name]
def on_start_epoch(self, training, model, criterion, data_loader, optimizer=None, display=True):
self.state['meter_loss'].reset()
self.state['batch_time'].reset()
self.state['data_time'].reset()
def on_end_epoch(self, training, model, criterion, data_loader, optimizer=None, display=True):
loss = self.state['meter_loss'].value()[0]
if display:
if training:
print('Epoch: [{0}]\t'
'Loss {loss:.4f}'.format(self.state['epoch'], loss=loss))
else:
print('Test: \t Loss {loss:.4f}'.format(loss=loss))
return loss
def on_start_batch(self, training, model, criterion, data_loader, optimizer=None, display=True):
pass
def on_end_batch(self, training, model, criterion, data_loader, optimizer=None, display=True):
# record loss
self.state['loss_batch'] = self.state['loss'].item()
self.state['meter_loss'].add(self.state['loss_batch'])
if display and self.state['print_freq'] != 0 and self.state['iteration'] % self.state['print_freq'] == 0:
loss = self.state['meter_loss'].value()[0]
batch_time = self.state['batch_time'].value()[0]
data_time = self.state['data_time'].value()[0]
if training:
print('Epoch: [{0}][{1}/{2}]\t'
'Time {batch_time_current:.3f} ({batch_time:.3f})\t'
'Data {data_time_current:.3f} ({data_time:.3f})\t'
'Loss {loss_current:.4f} ({loss:.4f})'.format(
self.state['epoch'], self.state['iteration'], len(data_loader),
batch_time_current=self.state['batch_time_current'],
batch_time=batch_time, data_time_current=self.state['data_time_batch'],
data_time=data_time, loss_current=self.state['loss_batch'], loss=loss))
else:
print('Test: [{0}/{1}]\t'
'Time {batch_time_current:.3f} ({batch_time:.3f})\t'
'Data {data_time_current:.3f} ({data_time:.3f})\t'
'Loss {loss_current:.4f} ({loss:.4f})'.format(
self.state['iteration'], len(data_loader), batch_time_current=self.state['batch_time_current'],
batch_time=batch_time, data_time_current=self.state['data_time_batch'],
data_time=data_time, loss_current=self.state['loss_batch'], loss=loss))
def on_forward(self, training, model, criterion, data_loader, optimizer=None, display=True):
input_var = torch.autograd.Variable(self.state['input'])
target_var = torch.autograd.Variable(self.state['target'])
print('input_var的维度:\n',input_var.shape)
print('target_var的维度:\n',target_var.shape)
if not training:
input_var.volatile = True
target_var.volatile = True
# compute output
self.state['output'] = model(input_var)
self.state['loss'] = criterion(self.state['output'], target_var)
if training:
optimizer.zero_grad()
self.state['loss'].backward()
optimizer.step()
def init_learning(self, model, criterion):
if self._state('train_transform') is None:
normalize = transforms.Normalize(mean=model.image_normalization_mean,
std=model.image_normalization_std)
self.state['train_transform'] = transforms.Compose([
MultiScaleCrop(self.state['image_size'], scales=(1.0, 0.875, 0.75, 0.66, 0.5), max_distort=2),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
normalize,
])
if self._state('val_transform') is None:
normalize = transforms.Normalize(mean=model.image_normalization_mean,
std=model.image_normalization_std)
self.state['val_transform'] = transforms.Compose([
Warp(self.state['image_size']),
transforms.ToTensor(),
normalize,
])
self.state['best_score'] = 0
def learning(self, model, criterion, train_dataset, val_dataset, optimizer=None):
print('self.stat:\n')
# for key in self.state.keys():
# print('key :{} value:{}'.format(key,self.state[key]))
# key: batch_size value: 1
# key: image_size value: 448
# key: max_epochs value: 20
# key: evaluate value: True
# key: resume value: checkpoint / voc / voc_checkpoint.pth.tar
# key: num_classes value: 20
# key: difficult_examples value: True
# key: save_model_path value: checkpoint / voc2007 /
# key: workers value: 4
# key: epoch_step value: [30]
# key: lr value: 0.1
# key: use_gpu value: True
# key: device_ids value: None
# key: start_epoch value: 0
# key: meter_loss value: < torchnet.meter.averagevaluemeter.AverageValueMeterobjectat0x000002096D8C81D0 >
# key: batch_time value: < torchnet.meter.averagevaluemeter.AverageValueMeterobjectat0x000002096D8C80F0 >
# key: data_time value: < torchnet.meter.averagevaluemeter.AverageValueMeterobjectat0x000002096D8C8630 >
# key: use_pb value: True
# key: print_freq value: 0
# key: ap_meter value: < util.AveragePrecisionMeterobjectat0x000002096D8C84E0 >
self.init_learning(model, criterion)
# define train and val transform
train_dataset.transform = self.state['train_transform']
train_dataset.target_transform = self._state('train_target_transform')
val_dataset.transform = self.state['val_transform']
val_dataset.target_transform = self._state('val_target_transform')
# data loading code
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=self.state['batch_size'], shuffle=True,
num_workers=self.state['workers'])
val_loader = torch.utils.data.DataLoader(val_dataset,
batch_size=self.state['batch_size'], shuffle=False,
num_workers=self.state['workers'])
# optionally resume from a checkpoint
if self._state('resume') is not None:
if os.path.isfile(self.state['resume']):
print("=> loading checkpoint '{}'".format(self.state['resume']))
checkpoint = torch.load(self.state['resume']) # key: resume value: checkpoint / voc / voc_checkpoint.pth.tar
# print('type(checkpoint):',type(checkpoint))
# print('checkpoint:\n')
# for key in checkpoint.keys():
# print('key :{} type(value):{}'.format(key,type(checkpoint[key])))
# key :arch type(value):
# key :best_score type(value):
# key :state_dict type(value):
# key :epoch type(value):
print('模型的信息:\n')
for key in checkpoint['state_dict'].keys():
print('key :{}'.format(key))
# print('key :{} (value):{}'.format(key,checkpoint['state_dict'][key]))
# key :features.0.weight......key :features.7.2.bn3.running_var
# key: gc1.weight
# key: gc2.weight
self.state['start_epoch'] = checkpoint['epoch']
self.state['best_score'] = checkpoint['best_score']
model.load_state_dict(checkpoint['state_dict'])
print("=> loaded checkpoint '{}' (epoch {})"
.format(self.state['evaluate'], checkpoint['epoch']))
else:
print("=> no checkpoint found at '{}'".format(self.state['resume']))
if self.state['use_gpu']:
train_loader.pin_memory = True
val_loader.pin_memory = True
cudnn.benchmark = True
model = torch.nn.DataParallel(model, device_ids=self.state['device_ids']).cuda()
criterion = criterion.cuda()
# demo_voc2007_gcn执行验证
if self.state['evaluate']:
self.validate(val_loader, model, criterion)
# 如果验证,验证结束直接返回
return
# TODO define optimizer
for epoch in range(self.state['start_epoch'], self.state['max_epochs']):
self.state['epoch'] = epoch
lr = self.adjust_learning_rate(optimizer)
print('lr:',lr)
# train for one epoch
self.train(train_loader, model, criterion, optimizer, epoch)
# evaluate on validation set
prec1 = self.validate(val_loader, model, criterion)
# remember best prec@1 and save checkpoint
is_best = prec1 > self.state['best_score']
self.state['best_score'] = max(prec1, self.state['best_score'])
self.save_checkpoint({
'epoch': epoch + 1,
'arch': self._state('arch'),
'state_dict': model.module.state_dict() if self.state['use_gpu'] else model.state_dict(),
'best_score': self.state['best_score'],
}, is_best)
print(' *** best={best:.3f}'.format(best=self.state['best_score']))
return self.state['best_score']
def train(self, data_loader, model, criterion, optimizer, epoch):
# switch to train mode
model.train()
self.on_start_epoch(True, model, criterion, data_loader, optimizer)
if self.state['use_pb']:
data_loader = tqdm(data_loader, desc='Training')
end = time.time()
for i, (input, target) in enumerate(data_loader):
# measure data loading time
self.state['iteration'] = i
self.state['data_time_batch'] = time.time() - end
self.state['data_time'].add(self.state['data_time_batch'])
self.state['input'] = input
self.state['target'] = target
self.on_start_batch(True, model, criterion, data_loader, optimizer)
if self.state['use_gpu']:
self.state['target'] = self.state['target'].cuda()
self.on_forward(True, model, criterion, data_loader, optimizer)
# measure elapsed time
self.state['batch_time_current'] = time.time() - end
self.state['batch_time'].add(self.state['batch_time_current'])
end = time.time()
# measure accuracy
self.on_end_batch(True, model, criterion, data_loader, optimizer)
self.on_end_epoch(True, model, criterion, data_loader, optimizer)
def validate(self, data_loader, model, criterion):
# switch to evaluate mode
model.eval()
self.on_start_epoch(False, model, criterion, data_loader)
if self.state['use_pb']:
data_loader = tqdm(data_loader, desc='Test')
end = time.time()
for i, (input, target) in enumerate(data_loader):
# measure data loading time
self.state['iteration'] = i
self.state['data_time_batch'] = time.time() - end
self.state['data_time'].add(self.state['data_time_batch'])
self.state['input'] = input
self.state['target'] = target
self.on_start_batch(False, model, criterion, data_loader)
if self.state['use_gpu']:
self.state['target'] = self.state['target'].cuda()
self.on_forward(False, model, criterion, data_loader)
# measure elapsed time
self.state['batch_time_current'] = time.time() - end
self.state['batch_time'].add(self.state['batch_time_current'])
end = time.time()
# measure accuracy
self.on_end_batch(False, model, criterion, data_loader)
score = self.on_end_epoch(False, model, criterion, data_loader)
return score
def save_checkpoint(self, state, is_best, filename='checkpoint.pth.tar'):
if self._state('save_model_path') is not None:
filename_ = filename
filename = os.path.join(self.state['save_model_path'], filename_)
if not os.path.exists(self.state['save_model_path']):
os.makedirs(self.state['save_model_path'])
print('save model {filename}'.format(filename=filename))
torch.save(state, filename)
if is_best:
filename_best = 'model_best.pth.tar'
if self._state('save_model_path') is not None:
filename_best = os.path.join(self.state['save_model_path'], filename_best)
shutil.copyfile(filename, filename_best)
if self._state('save_model_path') is not None:
if self._state('filename_previous_best') is not None:
os.remove(self._state('filename_previous_best'))
filename_best = os.path.join(self.state['save_model_path'], 'model_best_{score:.4f}.pth.tar'.format(score=state['best_score']))
shutil.copyfile(filename, filename_best)
self.state['filename_previous_best'] = filename_best
def adjust_learning_rate(self, optimizer):
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
lr_list = []
decay = 0.1 if sum(self.state['epoch'] == np.array(self.state['epoch_step'])) > 0 else 1.0
for param_group in optimizer.param_groups:
param_group['lr'] = param_group['lr'] * decay
lr_list.append(param_group['lr'])
return np.unique(lr_list)
class MultiLabelMAPEngine(Engine):
def __init__(self, state):
Engine.__init__(self, state)
if self._state('difficult_examples') is None:
self.state['difficult_examples'] = False
self.state['ap_meter'] = AveragePrecisionMeter(self.state['difficult_examples'])
def on_start_epoch(self, training, model, criterion, data_loader, optimizer=None, display=True):
Engine.on_start_epoch(self, training, model, criterion, data_loader, optimizer)
self.state['ap_meter'].reset()
def on_end_epoch(self, training, model, criterion, data_loader, optimizer=None, display=True):
map = 100 * self.state['ap_meter'].value().mean()
loss = self.state['meter_loss'].value()[0]
OP, OR, OF1, CP, CR, CF1 = self.state['ap_meter'].overall()
OP_k, OR_k, OF1_k, CP_k, CR_k, CF1_k = self.state['ap_meter'].overall_topk(3)
if display:
if training:
print('Epoch: [{0}]\t'
'Loss {loss:.4f}\t'
'mAP {map:.3f}'.format(self.state['epoch'], loss=loss, map=map))
print('OP: {OP:.4f}\t'
'OR: {OR:.4f}\t'
'OF1: {OF1:.4f}\t'
'CP: {CP:.4f}\t'
'CR: {CR:.4f}\t'
'CF1: {CF1:.4f}'.format(OP=OP, OR=OR, OF1=OF1, CP=CP, CR=CR, CF1=CF1))
else:
print('Test: \t Loss {loss:.4f}\t mAP {map:.3f}'.format(loss=loss, map=map))
print('OP: {OP:.4f}\t'
'OR: {OR:.4f}\t'
'OF1: {OF1:.4f}\t'
'CP: {CP:.4f}\t'
'CR: {CR:.4f}\t'
'CF1: {CF1:.4f}'.format(OP=OP, OR=OR, OF1=OF1, CP=CP, CR=CR, CF1=CF1))
print('OP_3: {OP:.4f}\t'
'OR_3: {OR:.4f}\t'
'OF1_3: {OF1:.4f}\t'
'CP_3: {CP:.4f}\t'
'CR_3: {CR:.4f}\t'
'CF1_3: {CF1:.4f}'.format(OP=OP_k, OR=OR_k, OF1=OF1_k, CP=CP_k, CR=CR_k, CF1=CF1_k))
return map
def on_start_batch(self, training, model, criterion, data_loader, optimizer=None, display=True):
self.state['target_gt'] = self.state['target'].clone()
self.state['target'][self.state['target'] == 0] = 1
self.state['target'][self.state['target'] == -1] = 0
input = self.state['input']
self.state['input'] = input[0]
self.state['name'] = input[1]
def on_end_batch(self, training, model, criterion, data_loader, optimizer=None, display=True):
Engine.on_end_batch(self, training, model, criterion, data_loader, optimizer, display=False)
# measure mAP
self.state['ap_meter'].add(self.state['output'].data, self.state['target_gt'])
if display and self.state['print_freq'] != 0 and self.state['iteration'] % self.state['print_freq'] == 0:
loss = self.state['meter_loss'].value()[0]
batch_time = self.state['batch_time'].value()[0]
data_time = self.state['data_time'].value()[0]
if training:
print('Epoch: [{0}][{1}/{2}]\t'
'Time {batch_time_current:.3f} ({batch_time:.3f})\t'
'Data {data_time_current:.3f} ({data_time:.3f})\t'
'Loss {loss_current:.4f} ({loss:.4f})'.format(
self.state['epoch'], self.state['iteration'], len(data_loader),
batch_time_current=self.state['batch_time_current'],
batch_time=batch_time, data_time_current=self.state['data_time_batch'],
data_time=data_time, loss_current=self.state['loss_batch'], loss=loss))
else:
print('Test: [{0}/{1}]\t'
'Time {batch_time_current:.3f} ({batch_time:.3f})\t'
'Data {data_time_current:.3f} ({data_time:.3f})\t'
'Loss {loss_current:.4f} ({loss:.4f})'.format(
self.state['iteration'], len(data_loader), batch_time_current=self.state['batch_time_current'],
batch_time=batch_time, data_time_current=self.state['data_time_batch'],
data_time=data_time, loss_current=self.state['loss_batch'], loss=loss))
class GCNMultiLabelMAPEngine(MultiLabelMAPEngine):
def on_forward(self, training, model, criterion, data_loader, optimizer=None, display=True):
feature_var = torch.autograd.Variable(self.state['feature']).float()
target_var = torch.autograd.Variable(self.state['target']).float()
inp_var = torch.autograd.Variable(self.state['input']).float().detach() # one hot
# print('feature_var的维度:\n',feature_var.shape) # torch.Size([1, 3, 448, 448]) 特征维度
# print('target_var的维度:\n',target_var.shape) # torch.Size([1, 20]) 标签维度
# print('inp_var的维度:\n',inp_var.shape) # torch.Size([1, 20, 300])
if not training:
with torch.no_grad():
feature_var = torch.autograd.Variable(feature_var)
target_var = torch.autograd.Variable(target_var)
inp_var= torch.autograd.Variable(inp_var)
# print('feature_var的维度:\n',feature_var.shape) # torch.Size([1, 3, 448, 448]) 特征维度
# print('target_var的维度:\n',target_var.shape) # torch.Size([1, 20]) 标签维度
# print('inp_var的维度:\n',inp_var.shape) # torch.Size([1, 20, 300])
# inp_name:voc_glove_word2vec.pkl
# compute output
self.state['output'] = model(feature_var, inp_var)
self.state['loss'] = criterion(self.state['output'], target_var)
# print('self.state[output]:\n',self.state['output'].shape)
if training:
optimizer.zero_grad()
self.state['loss'].backward()
nn.utils.clip_grad_norm(model.parameters(), max_norm=10.0)
optimizer.step()
def on_start_batch(self, training, model, criterion, data_loader, optimizer=None, display=True):
self.state['target_gt'] = self.state['target'].clone()
self.state['target'][self.state['target'] == 0] = 1
self.state['target'][self.state['target'] == -1] = 0
input = self.state['input']
self.state['feature'] = input[0]
self.state['out'] = input[1]
self.state['input'] = input[2]
```python
import csv
import os
import os.path
import tarfile
from urllib.parse import urlparse
import numpy as np
import torch
import torch.utils.data as data
from PIL import Image
import pickle
import util
from util import *
object_categories = ['aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor']
urls = {
'devkit': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCdevkit_18-May-2011.tar',
'trainval_2007': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar',
'test_images_2007': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar',
'test_anno_2007': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtestnoimgs_06-Nov-2007.tar',
}
def read_image_label(file):
print('[dataset] read ' + file)
data = dict()
with open(file, 'r') as f:
for line in f:
tmp = line.split(' ')
name = tmp[0]
label = int(tmp[-1])
data[name] = label
# data.append([name, label])
# print('%s %d' % (name, label))
return data
def read_object_labels(root, dataset, set):
# 'data/voc'==root
# 'VOC2007'==dataset
# 'trainval'==set
# 'data/voc/VOCdevkit/VOC2007/trainval/ImageSets/Main'
path_labels = os.path.join(root, 'VOCdevkit', dataset, 'ImageSets', 'Main')
labeled_data = dict()
num_classes = len(object_categories)
for i in range(num_classes):
# 'data/voc/VOCdevkit/VOC2007/trainval/ImageSets/Main/object_categories[i]_trainval.txt'
file = os.path.join(path_labels, object_categories[i] + '_' + set + '.txt')
data = read_image_label(file)
# print('data:\n',data)
if i == 0:
# csv开头格式 图片id及相应类别
for (name, label) in data.items():
labels = np.zeros(num_classes)
labels[i] = label
labeled_data[name] = labels
else:
for (name, label) in data.items():
labeled_data[name][i] = label
return labeled_data
def write_object_labels_csv(file, labeled_data):
# write a csv file
print('[dataset] write file %s' % file)
with open(file, 'w') as csvfile:
fieldnames = ['name']
fieldnames.extend(object_categories)
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for (name, labels) in labeled_data.items():
example = {'name': name}
for i in range(20):
example[fieldnames[i + 1]] = int(labels[i])
writer.writerow(example)
csvfile.close()
def read_object_labels_csv(file, header=True):
images = []
num_categories = 0
print('[dataset] read', file)
with open(file, 'r') as f:
reader = csv.reader(f)
rownum = 0
for row in reader:
# print(row)
if row == []:
continue
if header and rownum == 0:
header = row
else:
if num_categories == 0:
num_categories = len(row) - 1
name = row[0]
labels = (np.asarray(row[1:num_categories + 1])).astype(np.float32)
labels = torch.from_numpy(labels)
item = (name, labels)
images.append(item)
rownum += 1
return images
def find_images_classification(root, dataset, set):
path_labels = os.path.join(root, 'VOCdevkit', dataset, 'ImageSets', 'Main')
images = []
file = os.path.join(path_labels, set + '.txt')
with open(file, 'r') as f:
for line in f:
images.append(line)
return images
def download_voc2007(root):
path_devkit = os.path.join(root, 'VOCdevkit')
path_images = os.path.join(root, 'VOCdevkit', 'VOC2007', 'JPEGImages')
tmpdir = os.path.join(root, 'tmp')
# create directory
if not os.path.exists(root):
os.makedirs(root)
if not os.path.exists(path_devkit):
if not os.path.exists(tmpdir):
os.makedirs(tmpdir)
parts = urlparse(urls['devkit'])
filename = os.path.basename(parts.path)
cached_file = os.path.join(tmpdir, filename)
if not os.path.exists(cached_file):
print('Downloading: "{}" to {}\n'.format(urls['devkit'], cached_file))
util.download_url(urls['devkit'], cached_file)
# extract file
print('[dataset] Extracting tar file {file} to {path}'.format(file=cached_file, path=root))
cwd = os.getcwd()
tar = tarfile.open(cached_file, "r")
os.chdir(root)
tar.extractall()
tar.close()
os.chdir(cwd)
print('[dataset] Done!')
# train/val images/annotations
if not os.path.exists(path_images):
# download train/val images/annotations
parts = urlparse(urls['trainval_2007'])
filename = os.path.basename(parts.path)
cached_file = os.path.join(tmpdir, filename)
if not os.path.exists(cached_file):
print('Downloading: "{}" to {}\n'.format(urls['trainval_2007'], cached_file))
util.download_url(urls['trainval_2007'], cached_file)
# extract file
print('[dataset] Extracting tar file {file} to {path}'.format(file=cached_file, path=root))
cwd = os.getcwd()
tar = tarfile.open(cached_file, "r")
os.chdir(root)
tar.extractall()
tar.close()
os.chdir(cwd)
print('[dataset] Done!')
# test annotations
test_anno = os.path.join(path_devkit, 'VOC2007/ImageSets/Main/aeroplane_test.txt')
if not os.path.exists(test_anno):
# download test annotations
parts = urlparse(urls['test_images_2007'])
filename = os.path.basename(parts.path)
cached_file = os.path.join(tmpdir, filename)
if not os.path.exists(cached_file):
print('Downloading: "{}" to {}\n'.format(urls['test_images_2007'], cached_file))
util.download_url(urls['test_images_2007'], cached_file)
# extract file
print('[dataset] Extracting tar file {file} to {path}'.format(file=cached_file, path=root))
cwd = os.getcwd()
tar = tarfile.open(cached_file, "r")
os.chdir(root)
tar.extractall()
tar.close()
os.chdir(cwd)
print('[dataset] Done!')
# test images
test_image = os.path.join(path_devkit, 'VOC2007/JPEGImages/000001.jpg')
if not os.path.exists(test_image):
# download test images
parts = urlparse(urls['test_anno_2007'])
filename = os.path.basename(parts.path)
cached_file = os.path.join(tmpdir, filename)
if not os.path.exists(cached_file):
print('Downloading: "{}" to {}\n'.format(urls['test_anno_2007'], cached_file))
util.download_url(urls['test_anno_2007'], cached_file)
# extract file
print('[dataset] Extracting tar file {file} to {path}'.format(file=cached_file, path=root))
cwd = os.getcwd()
tar = tarfile.open(cached_file, "r")
os.chdir(root)
tar.extractall()
tar.close()
os.chdir(cwd)
print('[dataset] Done!')
class Voc2007Classification(data.Dataset):
def __init__(self, root, set, transform=None, target_transform=None, inp_name=None, adj=None):
# inp_name='data/voc/voc_glove_word2vec.pkl'
self.root = root # 'data/voc'
self.path_devkit = os.path.join(root, 'VOCdevkit') # 'data/voc/VOCdevkit'
self.path_images = os.path.join(root, 'VOCdevkit', 'VOC2007', 'JPEGImages') # 'data/voc/VOCdevkit/VOC2007/JPEGImages'
self.set = set # 'trainval'
self.transform = transform # None
self.target_transform = target_transform # None
# download dataset
download_voc2007(self.root)
# define path of csv file
path_csv = os.path.join(self.root, 'files', 'VOC2007') # 'data/voc/files/VOC2007'
# define filename of csv file
file_csv = os.path.join(path_csv, 'classification_' + set + '.csv') # 'data/voc/files/VOC2007/classification_trainval.csv'
print(file_csv)
# create the csv file if necessary
if not os.path.exists(file_csv):
if not os.path.exists(path_csv): # create dir if necessary
os.makedirs(path_csv)
# generate csv file
labeled_data = read_object_labels(self.root, 'VOC2007', self.set)
# object_categories = ['aeroplane', 'bicycle', 'bird', 'boat',
# 'bottle', 'bus', 'car', 'cat', 'chair',
# 'cow', 'diningtable', 'dog', 'horse',
# 'motorbike', 'person', 'pottedplant',
# 'sheep', 'sofa', 'train', 'tvmonitor']
# write csv file
write_object_labels_csv(file_csv, labeled_data)
# 记录格式:图片id 对应voc20类 出现物体类别为1/0 未出现物体类别为-1
self.classes = object_categories
self.images = read_object_labels_csv(file_csv)
# print('type(images):\n',type(self.images))
# print('value(images):\n',self.images)
with open(inp_name, 'rb') as f:
self.inp = pickle.load(f)
self.inp_name = inp_name
print('[dataset] VOC 2007 classification set=%s number of classes=%d number of images=%d' % (
set, len(self.classes), len(self.images)))
def __getitem__(self, index):
path, target = self.images[index]
img = Image.open(os.path.join(self.path_images, path + '.jpg')).convert('RGB')
if self.transform is not None:
img = self.transform(img)
if self.target_transform is not None:
target = self.target_transform(target)
return (img, path, self.inp), target
def __len__(self):
return len(self.images)
def get_number_classes(self):
return len(self.classes)
import argparse
from engine import *
from models import *
from voc import *
parser = argparse.ArgumentParser(description='WILDCAT Training')
parser.add_argument('--data', metavar='DIR',default= 'data/voc',type=str,
help='path to dataset (e.g. data/')
parser.add_argument('--image-size', '-i', default=448, type=int,
metavar='N', help='image size (default: 224)')
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
help='number of data loading workers (default: 4)')
parser.add_argument('--epochs', default=20, type=int, metavar='N',
help='number of total epochs to run')
parser.add_argument('--epoch_step', default=[30], type=int, nargs='+',
help='number of epochs to change learning rate')
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
help='manual epoch number (useful on restarts)')
parser.add_argument('-b', '--batch-size', default=1, type=int,
metavar='N', help='mini-batch size (default: 256)')
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
metavar='LR', help='initial learning rate')
parser.add_argument('--lrp', '--learning-rate-pretrained', default=0.1, type=float,
metavar='LR', help='learning rate for pre-trained layers')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
help='momentum')
parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float,
metavar='W', help='weight decay (default: 1e-4)')
parser.add_argument('--print-freq', '-p', default=0, type=int,
metavar='N', help='print frequency (default: 10)')
parser.add_argument('--resume', default='checkpoint/voc/voc_checkpoint.pth.tar', type=str, metavar='PATH',
help='path to latest checkpoint (default: none)')
parser.add_argument('-e', '--evaluate', default='checkpoint/voc/voc_checkpoint.pth.tar', dest='evaluate', action='store_true',
help='evaluate model on validation set')
def main_voc2007():
global args, best_prec1, use_gpu
args = parser.parse_args()
use_gpu = torch.cuda.is_available()
# define dataset
train_dataset = Voc2007Classification(args.data, 'trainval', inp_name='data/voc/voc_glove_word2vec.pkl')
val_dataset = Voc2007Classification(args.data, 'test', inp_name='data/voc/voc_glove_word2vec.pkl')
# print('train_dataset:\n',train_dataset)
# Voc2007Classification包含属性:
# root:'data\voc'
# path_devkit:'data\voc\VOCdevkit'
# path_images:'data\voc\VOCdevkit\VOC2007\IPEGImages'
# set:'trainval'
# transform:None
# target_transform:None
# class:list-对应类别名称 voc20类
# images:每张图片对应信息 20类是否存在 -1 0 1
# inp_name:voc_glove_word2vec.pkl
num_classes = 20
# load model
model = gcn_resnet101(num_classes=num_classes, t=0.4, adj_file='data/voc/voc_adj.pkl')
# define loss function (criterion)
criterion = nn.MultiLabelSoftMarginLoss()
# define optimizer
optimizer = torch.optim.SGD(model.get_config_optim(args.lr, args.lrp),
lr=args.lr,
momentum=args.momentum,
weight_decay=args.weight_decay)
state = {'batch_size': args.batch_size, 'image_size': args.image_size, 'max_epochs': args.epochs,
'evaluate': args.evaluate, 'resume': args.resume, 'num_classes':num_classes}
state['difficult_examples'] = True
state['save_model_path'] = 'checkpoint/voc2007/'
state['workers'] = args.workers
state['epoch_step'] = args.epoch_step
state['lr'] = args.lr
if args.evaluate:
state['evaluate'] = True
engine = GCNMultiLabelMAPEngine(state)
engine.learning(model, criterion, train_dataset, val_dataset, optimizer)
if __name__ == '__main__':
main_voc2007()
import torchvision.models as models
from torch.nn import Parameter
from util import *
import torch
import torch.nn as nn
from torchsummary import summary
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class GraphConvolution(nn.Module):
"""
Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
"""
def __init__(self, in_features, out_features, bias=False):
super(GraphConvolution, self).__init__()
# 两层分别:
# 300,1024
# 1024,2408
# print('in_features:\n',in_features)
# print('out_features:\n',out_features)
self.in_features = in_features
self.out_features = out_features
self.weight = Parameter(torch.Tensor(in_features, out_features))
if bias:
self.bias = Parameter(torch.Tensor(1, 1, out_features))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
stdv = 1. / math.sqrt(self.weight.size(1))
self.weight.data.uniform_(-stdv, stdv)
if self.bias is not None:
self.bias.data.uniform_(-stdv, stdv)
def forward(self, input, adj):
# print('input的维度:',input.shape) # torch.Size([20, 300]) torch.Size([20, 1024])
# print('self.weight的维度',self.weight.shape) # torch.Size([300, 1024]) torch.Size([1024, 2048])
# print('adj的维度:',adj.shape) # torch.Size([20, 20]) torch.Size([20, 20])
support = torch.matmul(input, self.weight)
output = torch.matmul(adj, support)
# print('support的维度:',support.shape) # torch.Size([20, 1024]) torch.Size([20, 2048])
# print('output的维度:',output.shape) # torch.Size([20, 1024]) torch.Size([20, 2048])
if self.bias is not None:
return output + self.bias
else:
return output
def __repr__(self):
return self.__class__.__name__ + ' (' \
+ str(self.in_features) + ' -> ' \
+ str(self.out_features) + ')'
class GCNResnet(nn.Module):
def __init__(self, model, num_classes, in_channel=300, t=0, adj_file=None):
# resnet101--model
# num_classes--20
# t--0.4
# adj_file--'data/voc/voc_adj.pkl'
# in_channel--300
super(GCNResnet, self).__init__()
self.features = nn.Sequential(
model.conv1,
model.bn1,
model.relu,
model.maxpool,
model.layer1,
model.layer2,
model.layer3,
model.layer4,
)
# print('model.conv1:\n',model.conv1)
# print('model.layer1:\n',model.layer1)
# print('model.layer2:\n',model.layer2)
# print('model.layer3:\n',model.layer3)
# print('model.layer4:\n',model.layer4)
# print('self.features:\n',self.features)
self.num_classes = num_classes
self.pooling = nn.MaxPool2d(14, 14)
self.gc1 = GraphConvolution(in_channel, 1024)
self.gc2 = GraphConvolution(1024, 2048)
self.relu = nn.LeakyReLU(0.2)
_adj = gen_A(num_classes, t, adj_file)
# print('_adj的数据类型', type(_adj)) # _adj的数据类型
# print('_adj的维度', _adj.shape) # _adj的维度 (20, 20)
self.A = Parameter(torch.from_numpy(_adj).float())
# image normalization
self.image_normalization_mean = [0.485, 0.456, 0.406]
self.image_normalization_std = [0.229, 0.224, 0.225]
def forward(self, feature, inp):
feature = self.features(feature)
feature = self.pooling(feature)
feature = feature.view(feature.size(0), -1)
inp = inp[0]
adj = gen_adj(self.A).detach()
x = self.gc1(inp, adj)
x = self.relu(x)
x = self.gc2(x, adj)
x = x.transpose(0, 1)
# print('feature的维度:',feature.shape) # feature的维度: torch.Size([1, 2048])
# print('x的维度:',x.shape) # x的维度: torch.Size([2048, 20])
x = torch.matmul(feature, x)
# print('x的维度:',x.shape) # x的维度: torch.Size([1, 20])
return x
def get_config_optim(self, lr, lrp):
return [
{'params': self.features.parameters(), 'lr': lr * lrp},
{'params': self.gc1.parameters(), 'lr': lr},
{'params': self.gc2.parameters(), 'lr': lr},
]
def gcn_resnet101(num_classes, t, pretrained=True, adj_file=None, in_channel=300):
# voc
# num_classes=20
# t=0.4
# adj_file='data/voc/voc_adj.pkl'
model = models.resnet101(pretrained=pretrained).to(device)
# resnet101结构
# print('resnet101 model:\n')
# summary(model,(3,224,224))
return GCNResnet(model, num_classes, t=t, adj_file=adj_file, in_channel=in_channel)