label smoothing理论及PyTorch实现

Szegedy在inception v3中提出,one-hot这种脉冲式的标签导致过拟合。

new_labels = (1.0 - label_smoothing) * one_hot_labels + label_smoothing / num_classes

网络实现的时候,令 label_smoothing = 0.1,num_classes = 1000。Label smooth提高了网络精度0.2%


import torch
import torch.nn as nn

class LabelSmoothing(nn.Module):
    NLL loss with label smoothing.
    def __init__(self, smoothing=0.0):
        Constructor for the LabelSmoothing module.
        :param smoothing: label smoothing factor
        super(LabelSmoothing, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing

    def forward(self, x, target):
        logprobs = torch.nn.functional.log_softmax(x, dim=-1)

        nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1))
        nll_loss = nll_loss.squeeze(1)
        smooth_loss = -logprobs.mean(dim=-1)
        loss = self.confidence * nll_loss + self.smoothing * smooth_loss
        return loss.mean()


