l o s s ( X i , y i ) = ∣ X i − y i ∣ loss(X_{i}, y_{i}) = |X_{i}-y_{i}| loss(Xi,yi)=∣Xi−yi∣
这里我们亲自做一下实验看看具体效果
# torch.nn.L1Loss
import torch
l1_loss_fn = torch.nn.L1Loss(reduce=False, size_average=False)
input = torch.autograd.Variable(torch.randn(3,4))
target = torch.autograd.Variable(torch.randn(3,4))
loss = l1_loss_fn(input, target)
print(input)
print(target)
print(loss)
res = torch.abs(input-target)
print("loss computed by ourself")
print(res)
结果如下:
tensor([[ 0.5152, -1.3686, 0.3119, -0.3094],
[-0.3865, -0.2515, -1.4992, -0.2219],
[ 0.3324, -0.3495, 0.8597, -0.0018]])
tensor([[ 1.3572, -0.9364, 1.0528, 0.4357],
[-0.2460, 0.2986, -0.5723, -0.1117],
[-1.1078, 1.1902, 1.4491, -0.2142]])
tensor([[0.8420, 0.4322, 0.7408, 0.7452],
[0.1405, 0.5502, 0.9268, 0.1102],
[1.4402, 1.5397, 0.5894, 0.2124]])
tensor([[0.8420, 0.4322, 0.7408, 0.7452],
[0.1405, 0.5502, 0.9268, 0.1102],
[1.4402, 1.5397, 0.5894, 0.2124]])
tensor(8.2697)
另外,reduce
参数和size_average
参数有不同的搭配组合。
torch.mean(torch.abs(input-target))
其他Loss function也具有reduce和size_average这两个参数,且作用类似
l o s s ( X i , y i ) = ( X i − y i ) 2 loss(X_{i},y_{i}) = (X_{i}-y_{i})^{2} loss(Xi,yi)=(Xi−yi)2
# torch.nn.MSELoss
import torch
MSE_loss_fn = torch.nn.MSELoss(reduce=False, size_average=False)
input = torch.autograd.Variable(torch.randn(3,4))
target = torch.autograd.Variable(torch.randn(3,4))
loss = MSE_loss_fn(input, target)
print(input)
print(target)
print(loss)
res=input-target
print(res*res)
结果如下:
tensor([[ 0.3487, 0.4603, -0.3404, -0.2632],
[ 0.5376, -1.0239, -1.5926, -1.2581],
[ 0.8796, 0.4397, -0.2821, 0.0028]])
tensor([[ 0.9949, 2.3588, 0.1053, -1.2758],
[-0.5526, -1.0309, 0.9014, -0.0308],
[ 0.9400, 1.1123, 0.3666, -0.5454]])
tensor([[4.1764e-01, 3.6046e+00, 1.9869e-01, 1.0253e+00],
[1.1884e+00, 4.8598e-05, 6.2203e+00, 1.5061e+00],
[3.6437e-03, 4.5234e-01, 4.2089e-01, 3.0058e-01]])
tensor([[4.1764e-01, 3.6046e+00, 1.9869e-01, 1.0253e+00],
[1.1884e+00, 4.8598e-05, 6.2203e+00, 1.5061e+00],
[3.6437e-03, 4.5234e-01, 4.2089e-01, 3.0058e-01]])
BCELoss是二分类使用的交叉熵,用之前需要在该层前面加上Sigmoid函数。
l o s s ( X i , y i ) = − w i [ y i l o g x i + ( 1 − y i ) l o g ( 1 − x i ) ] loss(X_{i},y_{i}) = -w_{i} [y_{i}logx_{i} + (1-y_{i})log(1-x_{i})] loss(Xi,yi)=−wi[yilogxi+(1−yi)log(1−xi)]
import torch
import torch.nn.functional as F
BCE_loss_fn = torch.nn.BCELoss(reduce=False, size_average=False)
BCE_logid_loss = torch.nn.BCEWithLogitsLoss(reduce=False, size_average=False)
input=torch.autograd.Variable(torch.randn(3,4))
target=torch.autograd.Variable(torch.FloatTensor(3,4).random_(2))
loss = BCE_loss_fn(F.sigmoid(input), target)
print(input)
print(target)
print(loss)
print(BCE_logid_loss(input,target))
结果如下:
tensor([[-0.2960, -0.6593, 0.7279, -1.1125],
[ 0.9475, 0.5286, 1.6567, -0.2942],
[-0.0741, 2.1198, 0.9491, 0.7699]])
tensor([[1., 1., 1., 1.],
[1., 0., 0., 0.],
[1., 0., 1., 1.]])
tensor([[0.8521, 1.0762, 0.3940, 1.3967],
[0.3277, 0.9920, 1.8313, 0.5568],
[0.7309, 2.2332, 0.3272, 0.3805]])
tensor([[0.8521, 1.0762, 0.3940, 1.3967],
[0.3277, 0.9920, 1.8313, 0.5568],
[0.7309, 2.2332, 0.3272, 0.3805]])
可以看出,loss, x, y, w 的维度都是一样的。
此外,使用nn.BCEWithLogitsLoss是不需要使用Sigmoid层
该函数用于多分类,不需要加softmax层
l o s s ( x , l a b e l ) = − w l a b e l l o g e x l a b e l ∑ j = 1 N e x j loss(x,label)=-w_{label}log\frac{e^{x_{label}}}{\sum_{j=1}^{N}e^{x_{j}}} loss(x,label)=−wlabellog∑j=1Nexjexlabel
import torch
loss_fn = torch.nn.CrossEntropyLoss(reduce=False, size_average=False)
input=torch.autograd.Variable(torch.randn(3,4))
target=torch.autograd.Variable(torch.LongTensor(3).random_(4))
loss = loss_fn(input, target)
print(input)
print(target)
print(loss)
结果如下:
tensor([[-0.2541, 0.5136, 1.2984, -0.1278],
[ 1.4406, 2.6949, 1.9780, 1.8310],
[-0.1522, 1.7501, -1.0701, -0.3558]])
tensor([1, 3, 3])
tensor([1.4309, 1.6501, 2.3915])
参考