参考:
https://pytorch.org/tutorials/beginner/basics/intro.html
本节给出一个快速入门的例子,具体类别的变量还是要看对应章节的介绍。
torch.utils.data.Dataset
torch.utils.data.DataLoader
DataLoader wraps an iterable around the Dataset.
从Dataset变为Dataloader
train_dataloader = DataLoader(training_Dataset, batch_size=batch_size)
DataLoader类似iterator,可以用for循环遍历其中的元素
for X, y in test_dataloader:
print("Shape of X [N, C, H, W]: ", X.shape)
print("Shape of y: ", y.shape, y.dtype)
break
备注:
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))
# Define model
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10),
nn.ReLU()
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
# 如果能用Cuda就用Cuda。
device = "cuda" if torch.cuda.is_available() else "cpu"
model = NeuralNetwork().to(device)
print(model)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
然后就可以训练了
def train(dataloader,model,loss_fn,optimizer):
size = len(dataloader.dataset)
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
# Compute prediction error
pred = model(X)
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test(dataloader, model):
size = len(dataloader.dataset)
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= size
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
if __name__ =="__main__":
epochs = 5
for t in range(epochs):
print(f"Epoch {t + 1}\n-------------------------------")
train(train_dataloader, model, loss_fn, optimizer)
test(test_dataloader, model)
print("Done!")
Save
torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")
Load
import torch
from torch import nn
from torch.utils.data import DataLoader
# 使用视觉库的例子
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib.pyplot as plt
# 1.1 导入数据
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
root="data",
train=True,
download=True,
transform=ToTensor(),
)
# Download test data from open datasets.
test_data = datasets.FashionMNIST(
root="data",
train=False,
download=True,
transform=ToTensor(),
)
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10),
nn.ReLU()
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
model = NeuralNetwork()
model.load_state_dict(torch.load("model.pth"))
classes = [
"T-shirt/top",
"Trouser",
"Pullover",
"Dress",
"Coat",
"Sandal",
"Shirt",
"Sneaker",
"Bag",
"Ankle boot",
]
model.eval()
x, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
pred = model(x)
predicted, actual = classes[pred[0].argmax(0)], classes[y]
print(f'Predicted: "{predicted}", Actual: "{actual}"')
Directly from data (list)
The data type is automatically inferred.
data = [[1, 2],[3, 4]]
x_data = torch.tensor(data)
From a NumPy array
np_array = np.array(data)
x_np = torch.from_numpy(np_array)
From another tensor
# 创建一个和x_data一样形状的1矩阵
x_ones = torch.ones_like(x_data) # retains the properties of x_data
print(f"Ones Tensor: \n {x_ones} \n")
# 创建一个和x_data一样形状的随机浮点数矩阵。
x_rand = torch.rand_like(x_data, dtype=torch.float) # overrides the datatype of x_data
print(f"Random Tensor: \n {x_rand} \n")
With random or constant values:
shape = (2,3,)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)
print(f"Random Tensor: \n {rand_tensor} \n")
print(f"Ones Tensor: \n {ones_tensor} \n")
print(f"Zeros Tensor: \n {zeros_tensor}")
tensor = torch.rand(3,4)
print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")
Tensor所有的Operation
Standard numpy-like indexing and slicing:
tensor = torch.ones(4, 4)
print('第一行: ',tensor[0])
print('第一列: ', tensor[:, 0])
print('最后一列:', tensor[..., -1])
# 将第一列全变为0。
tensor[:,1] = 0
print(tensor)
Joining tensors (Stacking)
t1 = torch.cat([tensor, tensor, tensor], dim=1)
之前:
torch.Size([4, 4])
之后:
torch.Size([4, 12]) # 在dim=1上叠起来了。
之后改变tensor,t1中的元素值不会发生改变。
Arithmetic operations
矩阵乘法:
y1 = tensor @ tensor.T
y2 = tensor.matmul(tensor.T)
# 或者提前定义一个空间
y3 = torch.rand_like(tensor)
torch.matmul(tensor, tensor.T, out=y3)
矩阵对应元素相乘,跟Numpy中有广播机制。
如 [[1,1],[1,1]] * [1,2] = [[1,2],[1,2]]
z1 = tensor * tensor
z2 = tensor.mul(tensor)
z3 = torch.rand_like(tensor)
torch.mul(tensor, tensor, out=z3)
Single-element tensors: 可以用.item得到它的值
.shape: torch.Size([])
.item(): 16.0
如tensor.sum()得到的就是tensor中所有元素的和。
In-place operations: 方法名+" _ \_ _"
tensor.add_(5)
: tensor中所有元素都+5。
Tensor to NumPy array: t.numpy()
两者公用同一个内存。
t = torch.ones(5)
n = t.numpy()
n[0] = 100
t.add_(1)
print(f"t: {t}")
print(f"n: {n}")
NumPy array to Tensor: torch.from_numpy()
两者公用同一个内存。
n = np.ones(5)
t = torch.from_numpy(n)
预置的一些Dataset:
Image Datasets
Text Datasets
Audio Datasets
以导入torchvision中FashionMNIST这个数据集为例。
import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda
training_data = datasets.FashionMNIST(
root="data", # 保存在当前目录下的data文件夹内
train=True, # 训练集还是测试集
download=True, # 是否当场下载
transform=ToTensor() # specify the feature and label transformations
)
test_data = datasets.FashionMNIST(
root="data",
train=False,
download=True,
transform=ToTensor()
)
X,y = training_data[index]
trainning_data[index]
表示第index个样本。labels_map = {
0: "T-Shirt",
1: "Trouser",
2: "Pullover",
3: "Dress",
4: "Coat",
5: "Sandal",
6: "Shirt",
7: "Sneaker",
8: "Bag",
9: "Ankle Boot",
}
figure = plt.figure(figsize=(8, 8))
cols, rows = 3, 3
for i in range(1, cols * rows + 1):
sample_idx = torch.randint(len(training_data), size=(1,)).item()
img, label = training_data[sample_idx]
figure.add_subplot(rows, cols, i)
plt.title(labels_map[label])
plt.axis("off")
plt.imshow(img.squeeze(), cmap="gray")
plt.show()
__init__
: 初始化labels,dir,transform (对X的类型转换)和target_transform (对y的类型转换)__len__
:返回我们数据集中数据的个数__getitem__
:import os
import pandas as pd
from torchvision.io import read_image
class CustomImageDataset(Dataset):
def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
self.img_labels = pd.read_csv(annotations_file)
self.img_dir = img_dir
self.transform = transform
self.target_transform = target_transform
def __len__(self):
return len(self.img_labels)
def __getitem__(self, idx):
img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
image = read_image(img_path)
label = self.img_labels.iloc[idx, 1]
if self.transform:
image = self.transform(image)
if self.target_transform:
label = self.target_transform(label)
sample = {"image": image, "label": label}
return sample
关键是:
from torch.utils.data import DataLoader
train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
for X,y in train_dataloader():
...
from torch.utils.data import DataLoader
train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)
# Display image and label.
train_features, train_labels = next(iter(train_dataloader)) # 取得Iterator的下一个元素
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")
img = train_features[0].squeeze()
label = train_labels[0]
plt.imshow(img, cmap="gray")
plt.show()
print(f"Label: {label}")
transform
to modify the featurestarget_transform
to modify the labelsfrom torchvision import datasets
from torchvision.transforms import ToTensor, Lambda
ds = datasets.FashionMNIST(
root="data",
train=True,
download=True,
transform=ToTensor(),
target_transform=Lambda(lambda y: torch.zeros(10, dtype=torch.float).scatter_(0, torch.tensor(y), value=1)) # 将一个向量变为one-hot vector
)
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))])
torch.nn
namespace provides all the building blocks you need to build your own neural network.nn.Module
.device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))
nn.Flatten()
保留Batchsize这个维度:[a,b,c,d] -> [a,bcd]。
nn.Linear(in_features=28*28, out_features=20)
保留Batchsize这个维度:[a,b,c,d] -> [a,bcd]。
torch.nn.Conv2d(in_channels,out,kernel_size): 这里input_channel是颜色通道数,如果是1就是灰度图
torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode=‘zeros’)
torch.nn.MaxPool2d(kernel_size, stride=None)
torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)
nn.ReLU(), nn.Sigmoid()
nn.Softmax(dim=1): dim=1表示用第1维度的输出数据进行softmax计算
nn.Sequential
seq_modules = nn.Sequential(
flatten,
layer1,
nn.ReLU(),
nn.Linear(20, 10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)
# 打印模型各层的参数
print("Model structure: ", model, "\n\n")
# model.named_parameters包含层的name和对应的参数。
for name, param in model.named_parameters():
print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")
以构建上面这个计算图为例。
import torch
x = torch.ones(5) # [1,1,1,1,1] 可以想像成(1,5)
y = torch.zeros(3) # [0,0,0]
w = torch.randn(5,3, requires_grad=True)
# 上面写法等价于
# w = torch.randn(5,3)
# w.requires_grad_(True)
b = torch.randn(3, requires_grad=True)
z = torch.mathmul(x,w)+b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y) # sigmoid(z)和y的CrossEntropy
# 计算梯度
loss.backward()
print(w.grad)
print(b.grad)
Rq:
loss.backward(retain_graph=True)
方法一: with torch.no_grad():
z = torch.matmul(x, w)+b
print(z.requires_grad)
with torch.no_grad():
z = torch.matmul(x, w)+b
print(z.requires_grad)
方法二: z_det = z.detach()
z = torch.matmul(x, w)+b
z_det = z.detach()
print(z_det.requires_grad)
forward process: autograd同时做两件事
backward pass
DAGs are dynamic in PyTorch (动态计算图)
An important thing to note is that the graph is recreated from scratch; after each .backward() call, autograd starts populating a new graph. This is exactly what allows you to use control flow statements in your model; you can change the shape, size and operations at every iteration if needed.
Regression
nn.MSELoss()
Classification
nn.NLLLoss (Negative Log Likelihood)
nn.CrossEntropyLoss: 包括了 nn.LogSoftmax 和 nn.NLLLoss.
torch.nn.CrossEntropyLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean')
各种各样的Optimizer
一个用SGD的例子,我们传入之前模型登记过的要train的参数。
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
# 各tensor梯度清零
optimizer.zero_grad()
# 反向传播计算各tensor上的梯度
loss.backward()
# 利用优化算法和已经得到的梯度对参数进行更新
optimizer.step()
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda
training_data = datasets.FashionMNIST(
root="data",
train=True,
download=True,
transform=ToTensor()
)
test_data = datasets.FashionMNIST(
root="data",
train=False,
download=True,
transform=ToTensor()
)
train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10),
nn.ReLU()
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
model = NeuralNetwork()
# Hyperparameters
learning_rate = 1e-3
batch_size = 64
epochs = 5
def train_loop(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
for batch, (X, y) in enumerate(dataloader):
# Compute prediction and loss
pred = model(X)
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test_loop(dataloader, model, loss_fn):
size = len(dataloader.dataset)
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
pred = model(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= size
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
epochs = 10
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train_loop(train_dataloader, model, loss_fn, optimizer)
test_loop(test_dataloader, model, loss_fn)
print("Done!")
# 保存
model = models.vgg16(pretrained=True)
torch.save(model.state_dict(), 'model_weights.pth')
# 读取
# 要先读取对应的网络结构。
model = models.vgg16() # we do not specify pretrained=True, i.e. do not load default weights
model.load_state_dict(torch.load('model_weights.pth'))
model.eval()
⚠️ :一定要在inference之前,先调用model.eval()
,否则会合保存之前得到的结果不一致![用来设置dropout和Batch normalization]
之前我们需要重新instantiate这个模型(model = models.vgg16() ),但是现在我们不需要了。
torch.save(model, 'model.pth')
model = torch.load('model.pth')
这种方法最后load使用的时候需要保证模型对应的网络的class可以被读取到。
This approach uses Python pickle module when serializing the model, thus it relies on the actual class definition to be available when loading the model.
PyTorch也有本地的ONNX导出支持。然而,鉴于PyTorch执行图的动态性质,导出过程必须遍历执行图以产生持久的ONNX模型。出于这个原因,应该向导出程序传递一个适当大小的测试变量(在我们的例子中,我们将创建一个正确大小的假零张量)。
input_image = torch.zeros((1,3,224,224))
onnx.export(model, input_image, 'model.onnx')
1. TensorBoard setup
如果提示没有module,则需要用pip安装。
from torch.utils.tensorboard import SummaryWriter
# default `log_dir` is "runs" - we'll be more specific here
# 设置存储log的文件夹
writer = SummaryWriter('runs/fashion_mnist_experiment_1')
2. Writing to TensorBoard
# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()
# create grid of images
img_grid = torchvision.utils.make_grid(images)
# show images
matplotlib_imshow(img_grid, one_channel=True)
# write to tensorboard
writer.add_image('four_fashion_mnist_images', img_grid)
3. Inspect the model using TensorBoard
writer.add_graph(net, images) #把数据和网络的实例输入进去
writer.close()
然后用命令行进入run的上层文件夹中,输入:
tensorboard --logdir=runs
然后浏览器进入
http://localhost:6006/
结果:
4. Adding a “Projector” to TensorBoard
# 4. Adding a “Projector” to TensorBoard
# helper function
def select_n_random(data, labels, n=100):
'''
Selects n random datapoints and their corresponding labels from a dataset
选择n个数据点
'''
assert len(data) == len(labels)
perm = torch.randperm(len(data))
return data[perm][:n], labels[perm][:n]
# select random images and their target indices
images, labels = select_n_random(trainset.data, trainset.targets)
# get the class labels for each image
class_labels = [classes[lab] for lab in labels]
# log embeddings
features = images.view(-1, 28 * 28) # [SampleSize,特征向量维度]
# 实现高维空间到低维空间的映射,导入数据即可
writer.add_embedding(features,
metadata=class_labels,
label_img=images.unsqueeze(1))
writer.close()
5. Tracking model training with TensorBoard
# 5. Tracking model training with TensorBoard
# helper functions
def images_to_probs(net, images):
'''
Generates predictions and corresponding probabilities from a trained
network and a list of images
'''
output = net(images) # [SampleSize,10]
# convert output probabilities to predicted class
_, preds_tensor = torch.max(output, 1)
preds = np.squeeze(preds_tensor.numpy())
return preds, [F.softmax(el, dim=0)[i].item() for i, el in zip(preds, output)]
def plot_classes_preds(net, images, labels):
'''
Generates matplotlib Figure using a trained network, along with images
and labels from a batch, that shows the network's top prediction along
with its probability, alongside the actual label, coloring this
information based on whether the prediction was correct or not.
Uses the "images_to_probs" function.
'''
preds, probs = images_to_probs(net, images)
# plot the images in the batch, along with predicted and true labels
fig = plt.figure(figsize=(12, 48))
for idx in np.arange(4):
ax = fig.add_subplot(1, 4, idx+1, xticks=[], yticks=[])
matplotlib_imshow(images[idx], one_channel=True)
ax.set_title("{0}, {1:.1f}%\n(label: {2})".format(
classes[preds[idx]],
probs[idx] * 100.0,
classes[labels[idx]]),
color=("green" if preds[idx]==labels[idx].item() else "red"))
return fig
running_loss = 0.0
for epoch in range(1): # loop over the dataset multiple times
for i, data in enumerate(trainloader, 0):
# 正常训练过程
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if i % 1000 == 999: # every 1000 mini-batches...
# ...log the running loss,将显示信息写到tensorboard中
writer.add_scalar('training loss',
running_loss / 1000,
epoch * len(trainloader) + i)
# ...log a Matplotlib Figure showing the model's predictions on a
# random mini-batch
writer.add_figure('predictions vs. actuals',
plot_classes_preds(net, inputs, labels),
global_step=epoch * len(trainloader) + i)
running_loss = 0.0
print('Finished Training')
6. Assessing trained models with TensorBoard 添加Precision / Recall Curve
writer.add_pr_curve(classes[class_index],
tensorboard_truth, # list of boollean 如果原本是这个类别则为true
tensorboard_probs, # list of prob 预测成这个类别的概率
global_step=global_step)
# 6. Assessing trained models with TensorBoard
# 1. gets the probability predictions in a test_size x num_classes Tensor
# 2. gets the preds in a test_size Tensor
# takes ~10 seconds to run
class_probs = []
class_label = []
with torch.no_grad():
for data in testloader:
images, labels = data
output = net(images)
class_probs_batch = [F.softmax(el, dim=0) for el in output]
class_probs.append(class_probs_batch)
class_label.append(labels)
test_probs = torch.cat([torch.stack(batch) for batch in class_probs])
test_label = torch.cat(class_label)
# helper function
def add_pr_curve_tensorboard(class_index, test_probs, test_label, global_step=0):
'''
Takes in a "class_index" from 0 to 9 and plots the corresponding
precision-recall curve
'''
tensorboard_truth = test_label == class_index # 得到对应类别的序号
tensorboard_probs = test_probs[:, class_index] # 对应每个样本的Probability
writer.add_pr_curve(classes[class_index],
tensorboard_truth,
tensorboard_probs,
global_step=global_step)
writer.close()
# plot all the pr curves
for i in range(len(classes)):
add_pr_curve_tensorboard(i, test_probs, test_label) # 对每一个类别画PR Curve
个人猜测具体实现方式是看每个原先应该属于这个类别的样本预测成这个类别的probability。然后可以统计每一个Probability下的TP,FP,TN,FN。
Tensor.squeeze()
去除所有1的维度,如[1,2,3,1,4,1] -> [2,3,4]
改变某一个维度的值:
y.scatter_(dim=0,index=torch.tensor(1),value=2),如[0,0,0] -> [0,2,0]
交换维度:
np.transpose(tmp,(3,2,0,1)),如(3, 2, 1, 1) -> (1, 1, 3, 2)
数字得到one-hot Vector
y = torch.Tensor(1).type(torch.int64) # torch.Size([1])
torch.zeros(10, dtype=torch.float).scatter_(0, torch.tensor(y), value=1)
torch.max()
value, index = torch.max(output, 1) # output在第1维上的最大值和对应的index。
preds = np.squeeze(index.numpy())
简言之:
BCEWithLogitsLoss = Sigmoid + BCELoss
测试:
input = torch.randn(3,3)
target = torch.FloatTensor([[0,1,1],
[1,1,0],
[0,1,1]])
m = nn.Sigmoid()
loss = nn.BCELoss()
print(loss(m(input),target)) # tensor(0.8303)
loss = nn.BCEWithLogitsLoss()
print(loss(input,target)) # tensor(0.8303)
手动验算:
我们的例子中
input = tensor([[-0.8477, 0.0327, -0.0345],
[ 0.0830, -0.6805, -0.6124],
[ 1.6842, -0.4261, -0.1475]])
m(input) = tensor([[0.2999, 0.5082, 0.4914],
[0.5207, 0.3362, 0.3515],
[0.8435, 0.3951, 0.4632]])
target = tensor([[0., 1., 1.],
[1., 1., 0.],
[0., 1., 1.]])
B C E = − 1 n ∑ ( y n l n ( x n ) + ( 1 − y n ) ∗ l n ( 1 − x n ) ) BCE=-\frac{1}{n}\sum (y_n ln(x_n)+(1-y_n)*ln(1-x_n)) BCE=−n1∑(ynln(xn)+(1−yn)∗ln(1−xn))
# tensor(0.8303)
print(-torch.mean(torch.log(1-m(input).view(1,-1))*(1-target.view(1,-1))+torch.log(m(input).view(1,-1))*(target.view(1,-1))))