pytorch构建模型trick总结

1.device = torch.device()

开头设置设备,model.to(device) 不使用.cuda()

2.Dropout & Batch Normalization & 激活函数

顺序 Linear -> BN -> ReLU -> Dropout

3.Initialization

将所有超参写到最前面

torch.manual_seed(random_seed)
        ###############################################
        # Reinitialize weights using He initialization
        ###############################################
        for m in self.modules():
            if isinstance(m, torch.nn.Conv2d):
                nn.init.kaiming_normal_(m.weight.detach())
                m.bias.detach().zero_()
            elif isinstance(m, torch.nn.Linear):
                nn.init.kaiming_normal_(m.weight.detach())
                m.bias.detach().zero_()

4. 写compute eval方法 不要每次都写一遍


def compute_accuracy(model, data_loader):
    correct_pred, num_examples = 0, 0
    for features, targets in data_loader:
        features = features.to(device)
        targets = targets.to(device)
        logits, probas = model(features)
        _, predicted_labels = torch.max(probas, 1)
        num_examples += targets.size(0)
        correct_pred += (predicted_labels == targets).sum()
    return correct_pred.float()/num_examples * 100



def compute_accuracy(net, data_loader):
    net.eval()
    correct_pred, num_examples = 0, 0
    with torch.no_grad():
        for features, targets in data_loader:
            features = features.view(-1, 28*28).to(device)
            targets = targets.to(device)
            logits, probas = net(features)
            _, predicted_labels = torch.max(probas, 1)
            num_examples += targets.size(0)
            correct_pred += (predicted_labels == targets).sum()
        return correct_pred.float()/num_examples * 100

def compute_epoch_loss(model, data_loader):
    model.eval()
    curr_loss, num_examples = 0., 0
    with torch.no_grad():
        for features, targets in data_loader:
            features = features.to(DEVICE)
            targets = targets.to(DEVICE)
            logits, probas = model(features)
            loss = F.cross_entropy(logits, targets, reduction='sum')
            num_examples += targets.size(0)
            curr_loss += loss

        curr_loss = curr_loss / num_examples
        return curr_loss

 

5.MyDataset

class CelebaDataset(Dataset):
    """Custom Dataset for loading CelebA face images"""

    def __init__(self, csv_path, img_dir, transform=None):
    
        df = pd.read_csv(csv_path, index_col=0)
        self.img_dir = img_dir
        self.csv_path = csv_path
        self.img_names = df.index.values
        self.y = df['Male'].values
        self.transform = transform

    def __getitem__(self, index):
        img = Image.open(os.path.join(self.img_dir,
                                      self.img_names[index]))
        
        if self.transform is not None:
            img = self.transform(img)
        
        label = self.y[index]
        return img, label

    def __len__(self):
        return self.y.shape[0]

6.ResNet

pytorch构建模型trick总结_第1张图片

7.Cyclical Learning Rate

def cyclical_learning_rate(batch_step,
                           step_size,
                           base_lr=0.001,
                           max_lr=0.006,
                           mode='triangular',
                           gamma=0.999995):

    cycle = np.floor(1 + batch_step / (2. * step_size))
    x = np.abs(batch_step / float(step_size) - 2 * cycle + 1)

    lr_delta = (max_lr - base_lr) * np.maximum(0, (1 - x))
    
    if mode == 'triangular':
        pass
    elif mode == 'triangular2':
        lr_delta = lr_delta * 1 / (2. ** (cycle - 1))
    elif mode == 'exp_range':
        lr_delta = lr_delta * (gamma**(batch_step))
    else:
        raise ValueError('mode must be "triangular", "triangular2", or "exp_range"')
        
    lr = base_lr + lr_delta
    
    return lr

num_epochs = 150
iter_per_ep = len(train_loader.sampler.indices) // train_loader.batch_size
base_lr = 0.09
max_lr = 0.175
cost_fn = torch.nn.CrossEntropyLoss()  
optimizer = torch.optim.SGD(model.parameters(), lr=base_lr)  
for epoch in range(num_epochs):
    for step,(x,y) in dataloader:
        ...
        
    base_lr = cyclical_learning_rate(batch_step=batch_step,
                                     step_size=num_epochs*iter_per_ep,
                                     base_lr=base_lr,
                                     max_lr=max_lr)
    for g in optimizer.param_groups:
        g['lr'] = base_lr

8.拆分验证集 训练集

from torch.utils.data import SubsetRandomSampler

train_indices = torch.arange(1000, 60000)
valid_indices = torch.arange(0, 1000)


train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(valid_indices)


training_transform = transforms.Compose([transforms.Resize((32, 32)),
                                         transforms.RandomCrop((28, 28)),
                                         transforms.ToTensor()])

valid_transform = transforms.Compose([transforms.Resize((32, 32)),
                                         transforms.CenterCrop((28, 28)),
                                         transforms.ToTensor()])



train_dataset = datasets.MNIST(root='data', 
                               train=True, 
                               transform=training_transform,
                               download=True)

# note that this is the same dataset as "train_dataset" above
# however, we can now choose a different transform method
valid_dataset = datasets.MNIST(root='data', 
                               train=True, 
                               transform=valid_transform,
                               download=False)

test_dataset = datasets.MNIST(root='data', 
                              train=False, 
                              transform=valid_transform,
                              download=False)

train_loader = DataLoader(train_dataset,
                          batch_size=BATCH_SIZE,
                          num_workers=4,
                          sampler=train_sampler)

valid_loader = DataLoader(valid_dataset,
                          batch_size=BATCH_SIZE,
                          num_workers=4,
                          sampler=valid_sampler)

test_loader = DataLoader(dataset=test_dataset, 
                         batch_size=BATCH_SIZE,
                         num_workers=4,
                         shuffle=False)

9.DataParallel

1.model.DataParallel(model)

2.loss_func要用nn.functional

model = VGG16(num_features=num_features, num_classes=num_classes)
if torch.cuda.device_count() > 1:
    print("Using", torch.cuda.device_count(), "GPUs")
    model = nn.DataParallel(model)

#### DATA PARALLEL END ####
    
model.to(device)

#### DATA PARALLEL START ####
 
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

loss_func = F.cross_entropy()

10.hook

outputs = []
def hook(module, input, output):
    outputs.append(output)

model.net[2].register_forward_hook(hook)

未知

Pinned Memory

你可能感兴趣的:(自然语言处理)