开头设置设备,model.to(device) 不使用.cuda()
顺序 Linear -> BN -> ReLU -> Dropout
将所有超参写到最前面
torch.manual_seed(random_seed)
###############################################
# Reinitialize weights using He initialization
###############################################
for m in self.modules():
if isinstance(m, torch.nn.Conv2d):
nn.init.kaiming_normal_(m.weight.detach())
m.bias.detach().zero_()
elif isinstance(m, torch.nn.Linear):
nn.init.kaiming_normal_(m.weight.detach())
m.bias.detach().zero_()
def compute_accuracy(model, data_loader):
correct_pred, num_examples = 0, 0
for features, targets in data_loader:
features = features.to(device)
targets = targets.to(device)
logits, probas = model(features)
_, predicted_labels = torch.max(probas, 1)
num_examples += targets.size(0)
correct_pred += (predicted_labels == targets).sum()
return correct_pred.float()/num_examples * 100
def compute_accuracy(net, data_loader):
net.eval()
correct_pred, num_examples = 0, 0
with torch.no_grad():
for features, targets in data_loader:
features = features.view(-1, 28*28).to(device)
targets = targets.to(device)
logits, probas = net(features)
_, predicted_labels = torch.max(probas, 1)
num_examples += targets.size(0)
correct_pred += (predicted_labels == targets).sum()
return correct_pred.float()/num_examples * 100
def compute_epoch_loss(model, data_loader):
model.eval()
curr_loss, num_examples = 0., 0
with torch.no_grad():
for features, targets in data_loader:
features = features.to(DEVICE)
targets = targets.to(DEVICE)
logits, probas = model(features)
loss = F.cross_entropy(logits, targets, reduction='sum')
num_examples += targets.size(0)
curr_loss += loss
curr_loss = curr_loss / num_examples
return curr_loss
class CelebaDataset(Dataset):
"""Custom Dataset for loading CelebA face images"""
def __init__(self, csv_path, img_dir, transform=None):
df = pd.read_csv(csv_path, index_col=0)
self.img_dir = img_dir
self.csv_path = csv_path
self.img_names = df.index.values
self.y = df['Male'].values
self.transform = transform
def __getitem__(self, index):
img = Image.open(os.path.join(self.img_dir,
self.img_names[index]))
if self.transform is not None:
img = self.transform(img)
label = self.y[index]
return img, label
def __len__(self):
return self.y.shape[0]
def cyclical_learning_rate(batch_step,
step_size,
base_lr=0.001,
max_lr=0.006,
mode='triangular',
gamma=0.999995):
cycle = np.floor(1 + batch_step / (2. * step_size))
x = np.abs(batch_step / float(step_size) - 2 * cycle + 1)
lr_delta = (max_lr - base_lr) * np.maximum(0, (1 - x))
if mode == 'triangular':
pass
elif mode == 'triangular2':
lr_delta = lr_delta * 1 / (2. ** (cycle - 1))
elif mode == 'exp_range':
lr_delta = lr_delta * (gamma**(batch_step))
else:
raise ValueError('mode must be "triangular", "triangular2", or "exp_range"')
lr = base_lr + lr_delta
return lr
num_epochs = 150
iter_per_ep = len(train_loader.sampler.indices) // train_loader.batch_size
base_lr = 0.09
max_lr = 0.175
cost_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=base_lr)
for epoch in range(num_epochs):
for step,(x,y) in dataloader:
...
base_lr = cyclical_learning_rate(batch_step=batch_step,
step_size=num_epochs*iter_per_ep,
base_lr=base_lr,
max_lr=max_lr)
for g in optimizer.param_groups:
g['lr'] = base_lr
from torch.utils.data import SubsetRandomSampler
train_indices = torch.arange(1000, 60000)
valid_indices = torch.arange(0, 1000)
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(valid_indices)
training_transform = transforms.Compose([transforms.Resize((32, 32)),
transforms.RandomCrop((28, 28)),
transforms.ToTensor()])
valid_transform = transforms.Compose([transforms.Resize((32, 32)),
transforms.CenterCrop((28, 28)),
transforms.ToTensor()])
train_dataset = datasets.MNIST(root='data',
train=True,
transform=training_transform,
download=True)
# note that this is the same dataset as "train_dataset" above
# however, we can now choose a different transform method
valid_dataset = datasets.MNIST(root='data',
train=True,
transform=valid_transform,
download=False)
test_dataset = datasets.MNIST(root='data',
train=False,
transform=valid_transform,
download=False)
train_loader = DataLoader(train_dataset,
batch_size=BATCH_SIZE,
num_workers=4,
sampler=train_sampler)
valid_loader = DataLoader(valid_dataset,
batch_size=BATCH_SIZE,
num_workers=4,
sampler=valid_sampler)
test_loader = DataLoader(dataset=test_dataset,
batch_size=BATCH_SIZE,
num_workers=4,
shuffle=False)
1.model.DataParallel(model)
2.loss_func要用nn.functional
model = VGG16(num_features=num_features, num_classes=num_classes)
if torch.cuda.device_count() > 1:
print("Using", torch.cuda.device_count(), "GPUs")
model = nn.DataParallel(model)
#### DATA PARALLEL END ####
model.to(device)
#### DATA PARALLEL START ####
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_func = F.cross_entropy()
outputs = []
def hook(module, input, output):
outputs.append(output)
model.net[2].register_forward_hook(hook)