PyTorch1:tensor2、torch.nn、autograd、loss等神经网络学习手册(持续更新) 链接:画图、读写图片
简介:tensor是torch的一种专门的数据结构,类似于NumPy的ndarrays,可以在GPU或其他硬件加速器上运行。在PyTorch中,使用张量对模型的输入和输出以及模型的参数进行编码
加载数据torch.utils.data
(1)数据集来源
(2)变成可迭代数据集:torch.utils.data.DataLoader
'''利用官方提供数据集,并将其变成可迭代数据集'''
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
'''Download training data from open datasets.'''
training_data = datasets.FashionMNIST( root="data", train=True, download=True, transform=ToTensor(), )
'''Download test data from open datasets.'''
test_data = datasets.FashionMNIST( root="data", train=False, download=True, transform=ToTensor(), )
batch_size = 64
'''Create data loaders.'''
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)
1、plt.imshow()可以显示(H,W)对应标量值的图片,即FashionMNIST数据集的灰色图片
print(len(training_data)); print(training_data.classes); print(training_data.class_to_idx)
print(type(training_data[0])); img, label = training_data[0]; print(img.shape, label)
img = training_data[0][0]; label = training_data[0][1]; print(img.shape, label)
import matplotlib.pyplot as plt
import torch
figure = plt.figure(figsize=(8, 8))
cols, rows = 3, 3
for i in range(1, cols * rows + 1):
sample_idx = torch.randint(len(training_data), size=(1,)).item()
img, label = training_data[sample_idx]
figure.add_subplot(rows, cols, i)
plt.title(training_data.classes[label])
plt.axis("off")
plt.imshow(img.squeeze(), cmap="gray")
plt.show()
2、plt.imshow()可以显示(H,W,C=3)的numpy或torch类型的RGB图片。但datasets得到的(C,H,W),所以需要进行维度改变:torch.permute或np.transpose。或者不用plt.imshow而用PIL.Image.show。如下:
plt.imshow(train_data[0][0].permute(1, 2, 0))
plt.imshow((train_data[0][0].numpy().transpose(1,2,0)))
transforms.ToPILImage()(train_data[0][0]).show()
i = 0
for X, y in train_dataloader:
print(y); i += 1
if i == 2: break
from collections.abc import Iterable
print(isinstance(training_data, Iterable) == True)
train_features, train_labels = next(iter(train_dataloader))#一个批次64张图片
print(f"Feature batch shape: {train_features.size()}")#[N, C, H, W]: torch.Size([64, 1, 28, 28])
print(f"Labels batch shape: {train_labels.size()}")
img = train_features[0].squeeze(); label = train_labels[0]#查看第一个批次的第一张图片
plt.imshow(img, cmap="gray")
plt.show()
常用数据集的形式如下:标签是包含图片的文件夹的名字(左);图片在一个文件夹,需要创建一个标签文件夹,里面的txt文件与图片同名,且里面内容为标签(右)。
a、对于左图数据形式,继承Dataset类来自定义数据集,等价于torchvision.datasets.ImageFolder函数
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
# from torchvision.io import read_image
transform = transforms.Compose([transforms.Resize((300, 300)), transforms.ToTensor()])
class MyDataset(Dataset):
def __init__(self, root_dir, lable_dir, transform=None, target_transform=None):
self.root_dir = root_dir
self.lable_dir = lable_dir
self.path = os.path.join(self.root_dir, self.lable_dir)
self.img_path = os.listdir(self.path)
self.transform = transform
self.target_transform = target_transform
def __getitem__(self, index):
img = Image.open(os.path.join(self.path, self.img_path[index]))#得到PIL.JpegImagePlugin.JpegImageFile
# img = read_image(os.path.join(self.path, self.img_path[index]))#得到torch数据类型,(C, H, W),使用该读取方式则不需要self.transform
label = os.path.split(self.lable_dir)[1]
if self.transform:
image = self.transform(image)#将PIL图片转为torch数据类型:(C, H, W)in the range [0.0, 1.0],C为RGB,H=W=300
if self.target_transform:
label = self.target_transform(label)
return img, label # 返回图片和标签
def __len__(self):
return len(self.img_path)
root_path = 'hymenoptera_data';
train_ants_path = 'train/ants'; train_bees_path = 'train/bees'
val_ants_path = 'val/ants'; val_bees_path = 'val/bees'
train_ants = MyDataset(root_path, train_ants_path, transforms=transform)
train_bees = MyDataset(root_path, train_bees_path, transforms=transform)
val_ants = MyDataset(root_path, val_ants_path, transforms=transform)
val_bees = MyDataset(root_path, val_bees_path, transforms=transform)
train_img = train_ants + train_bees; val_img = val_ants + val_bees
train_ants_img, train_ants_lable = train_img[0] # 查看数据
import torchvision
import matplotlib.pyplot as plt
img = torchvision.transforms.ToPILImage()(train_img[0][0]) #将(C, H, W)torch数据类型图片转成PIL图片,且数值范围从[0,1]变成ToTensor之前的值(原始值)
img.show()#用PIL展示
# plt.imshow(img)
# plt.show()#用matplotlib展示
train_ants_dataloader = DataLoader(dataset=train_ants, batch_size=10)
for X,Y in train_ants_dataloader: # 一个批次10张图片
print(len(X), X[0].shape,)
img = torchvision.transforms.ToPILImage()(X[0]) #将(C, H, W)torch数据类型图片转成PIL图片,且数值范围从[0,1]变成ToTensor之前的值(原始值)
img.show()#用PIL展示
break
b、对于右图数据形式:
# 创建一个包含与图片同名的txt文件的标签文件夹
import os
root_dir = 'hymenoptera_data1/train' # 以处理train/ants为例,还需要train/bees, val/ants, val/bees
target_dir = 'ants_image'
img_path = os.listdir(os.path.join(root_dir, target_dir))
label = target_dir.split('_')[0]
out_dir = 'ants_label'
for i in img_path:
file_name = i.split('.jpg')[0]
with open(os.path.join(root_dir, out_dir,"{}.txt".format(file_name)),'w') as f:
f.write(label) #w:如果没有这个文件,就新建一个;如果有,就把原文件清空再写入新内容
# 利用右图数据继承Dataset类来自定义数据集
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torchvision import transforms
class MyData(Dataset):
def __init__(self, root_dir, image_dir, label_dir, transform=None, target_transform=None):
self.root_dir = root_dir
self.image_dir = image_dir
self.label_dir = label_dir
self.label_path = os.path.join(self.root_dir, self.label_dir)
self.image_path = os.path.join(self.root_dir, self.image_dir)
self.image_list = os.listdir(self.image_path)
self.label_list = os.listdir(self.label_path)
self.transform = transform
self.target_transform = target_transform
# 因为label 和 Image文件名相同,进行一样的排序,可以保证取出的数据和label是一一对应的
self.image_list.sort()
self.label_list.sort()
def __getitem__(self, idx):
img_name = self.image_list[idx]
label_name = self.label_list[idx]
img_item_path = os.path.join(self.root_dir, self.image_dir, img_name)
label_item_path = os.path.join(self.root_dir, self.label_dir, label_name)
img = Image.open(img_item_path)
with open(label_item_path, 'r') as f:
label = f.readline()
if self.transform:
image = self.transform(image)
if self.target_transform:
label = self.target_transform(label)
return img, label
def __len__(self):
assert len(self.image_list) == len(self.label_list)
return len(self.image_list)
transform = transforms.Compose([transforms.Resize(400), transforms.ToTensor()])
root_dir = 'hymenoptera_data1/train' # 以处理train/(ants、bees)为例,还需要val/(ants、bees)
image_ants, label_ants = "ants_image", "ants_label"
ants_dataset = MyData(root_dir, image_ants, label_ants, transform=transform)
image_bees, label_bees = "bees_image", "bees_label"
bees_dataset = MyData(root_dir, image_bees, label_bees, transform=transform)
train_img = train_ants + train_bees
PS1:(1)torch.nn的Conv2d是一个类(内部定义好weight, bias变量,由于继承自nn.Module所以能与nn.Sequential结合使用),(2)torch.nn.functional的conv2d是一个函数,(3)类实例化self.conv1后self.conv1(x)时执行了forward()函数。(4)定义网络时如果层内有Variable则用nn(当定义有变量参数的层时比如conv2d, linear, batch_norm),反之用nn.functional(可以在其基础上自定义功能)
#导入必要的包
import torch
import torch.nn as nn
import torch.nn.functional as F
device = "cuda" if torch.cuda.is_available() else "cpu"
class Net(nn.Module):
def __init__(self):#定义和初始化网络
super(Net, self).__init__()#对继承自父类nn.Module的属性进行初始化,类似于Module.__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.dropout1 = nn.Dropout2d(0.25)
self.dropout2 = nn.Dropout2d(0.5)
self.fc1 = nn.Linear(9216, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):#定义数据传播过程,前向传播,x代表数据
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
x = F.relu(x)
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = torch.flatten(x, 1)#Flatten x with start_dim=1,此时shape=(batch, c*w*h)
x = self.fc1(x)
x = F.relu(x)
x = self.dropout2(x)
x = self.fc2(x)
output = F.log_softmax(x, dim=1)
return output
random_data = torch.rand((1, 1, 28, 28)).to(device)#测试模型确保是想要的输出,one random 28x28 image
my_nn = Net().to(device); print(my_nn)
result = my_nn(random_data); print (result)
import torch.optim as optim
optimizer = optim.SGD(my_nn.parameters(), lr=0.001, momentum=0.9)
# 上述创建好了模型和随机梯度下降优化器,分别查看它们的参数
print("Model's state_dict:")
for param_tensor in my_nn.state_dict():
print(param_tensor, "\t", my_nn.state_dict()[param_tensor].size())
for name,param in my_nn.state_dict(keep_vars=True).items():#和上面相同,另外加入keep_vars=True后,requires_grad的打印结果才正确
print(name,"\t",param.shape,param.requires_grad)
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
print(var_name, "\t", optimizer.state_dict()[var_name])
在每次迭代中:训练过程用训练集3来学习参数以便做出更好的预测,验证过程用验证集4来打印模型的准确性和损失
loss_fn = nn.NLLLoss().to(device)#定义优化器(上面)和损失函数
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
my_nn.train()
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
#计算预测误差
pred = my_nn(X)
loss = loss_fn(pred, y)
#反向传播
optimizer.zero_grad()#将上次迭代计算的梯度值清0
loss.backward()#反向传播,计算梯度值
optimizer.step()#更新权值参数
# print输出格式说明:print(f"a={a:$>8.3f}")#右对齐,共8位,其中三位小数,不够的位数用$填充,默认空格填充
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = my_nn(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
print(f"Test Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
epochs = 1
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train(train_dataloader, my_nn, loss_fn, optimizer)
test(test_dataloader, my_nn, loss_fn)
print("Done!")
PATH = "state_dict_model.pt" # 约定.ph或.pth拓展名
torch.save(my_nn.state_dict(), PATH) # 保存模型
model = Net() # 加载模型
model.load_state_dict(torch.load(PATH)) # load_state_dict接受字典数据而不是路径
# 利用加载的模型进行预测
model.eval() # set dropout and batch normalization layers to evaluation mode before running inference
x, y = next(iter(test_dataloader))
test_loss, correct = 0, 0
with torch.no_grad():
x, y = x.to(device), y.to(device)
pred = my_nn(x)
test_loss = loss_fn(pred, y).item()
correct = (pred.argmax(1) == y).type(torch.float).sum().item()
correct /= len(x)
print(f"Test Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
jpg_to_png、数据集划分:形成上面补充3中左图的数据集结构
注意: 这样处理数据后,后面构建dataloader时一定要设置shuffle=True,否则网络只会学成前面的预测为一类,后面的预测为另一类
import os
import shutil
import cv2
from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import train_test_split
import re
import numpy as np
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torchvision
import matplotlib.pyplot as plt
import torch
from torchvision.models import resnet18
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
plt.rcParams['font.sans-serif'] = ['SimHei']# 画图时手动选择字体,显示中文标签。SimHei 中文黑体 Kaiti 中文楷体 FangSong 中文仿宋
plt.rcParams['font.size'] = 7#设置字体大小
'''创建文件夹'''
def mkdir(path):
folder = os.path.exists(path)
if not folder:
os.makedirs(path)
'''将文件夹input_path下的jpg图片转为png图片,并保存到output_path文件夹中'''
def jpg_to_png(input_path, output_path):
for root, dirs, files in os.walk(input_path):#->Iterator[tuple( AnyStr(dirpath), list[AnyStr(dirnames)], list[AnyStr(filenames)] )]
for name in files:
file = os.path.join(root, name)
im = cv2.imread(file)
if output_path:cv2.imwrite(os.path.join(output_path, name.replace('jpg', 'png')), im)
else:cv2.imwrite(file.replace('jpg', 'png'), im)
'''将dic_path文件夹下的数据按8:2(函数中test_size更改比例)划分为训练集和测试集,分别保存到train_path和val_path文件夹下'''
def split_for_dic(dic_path, train_path, val_path):
file_pathes = os.listdir(dic_path)
# 获取文件夹下所有 png 格式的图像的名称(不包含后缀名)
img_names = []
for file_path in file_pathes:
if os.path.splitext(file_path)[1] == ".png":
file_name = os.path.splitext(file_path)[0]
img_names.append(file_name)
# 划分训练集和验证集
train_set, val_set = train_test_split(img_names, test_size=0.2, random_state=42)
print(f"train_set size: {len(train_set)}, val_set size: {len(val_set)}, {dic_path} size:{len(train_set)+len(val_set)}")
# 得到训练集
for file_name in train_set:
img_src_path = os.path.join(dic_path, file_name+".png")
img_dst_path = os.path.join(train_path, file_name+".png")
shutil.copyfile(img_src_path, img_dst_path)#->Copy data from src to dst
# 得到验证集
for file_name in val_set:
img_src_path = os.path.join(dic_path, file_name+".png")
img_dst_path = os.path.join(val_path, file_name+".png")
shutil.copyfile(img_src_path, img_dst_path)#->Copy data from src to dst
if __name__ == "__main__":
'''形成上面补充3中左图的数据集文件夹形式'''
jpg_to_png("./allFake", "./allFake_png"); jpg_to_png("./allReal", "./allReal_png")
#将第一类数据(fake)划分训练集和验证集 #将第二类数据(real)划分训练集和验证集
split_for_dic("./allFake_png", "./train/fake", "./val/fake"); split_for_dic("./allReal_png", "./train/real", "./val/real")
(1)用封装好的ImageFolder和DataLoader函数处理上面得到的结构的数据集,ImageFolder等价于上面补充3的代码将该数据集表示为data对应label的train_data/test_data的形式
(2)训练模型:用官方提供的各种网络,查看网络print_net,训练train,测试test,多少代main(epochs=10)
device = "cuda" if torch.cuda.is_available() else "cpu"
'''用封装好的ImageFolder和DataLoader函数处理该数据集'''
transform_train = transforms.Compose([
transforms.RandomCrop(112, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),])
transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),])#matplotlib显示[0,255]及[0,1]数据,所以transform后若马上显示要im/2+0.5
def print_net():
net = resnet18(weights=None, num_classes=2).to(device)
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
# 上述创建好了模型和随机梯度下降优化器,分别查看它们的参数
print("Model's state_dict:")
for param_tensor in net.state_dict():
print(param_tensor, "\t", net.state_dict()[param_tensor].size())
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
print(var_name, "\t", optimizer.state_dict()[var_name])
random_data = torch.rand((64, 3, 28, 28)).to(device)
result = net(random_data)
print(net)
print(result)
print (result.size())
# print_net()
def train(dataloader, net, loss_fn, optimizer, epoch):
size = len(dataloader.dataset)
num_batches = len(dataloader)
net.train()
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
#计算预测误差
pred = net(X)
loss = loss_fn(pred, y)
#反向传播
optimizer.zero_grad()#将上次迭代计算的梯度值清0
loss.backward()#反向传播,计算梯度值
optimizer.step()#更新权值参数
# print输出格式说明:print(f"a={a:$>8.3f}")#右对齐,共8位,其中三位小数,不够的位数用$填充,默认空格填充
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
'''每训练100批次测试一次,和下面二选一,下面表示只测试20个测试集批次以节省时间'''
# test(test_dataloader, net, loss_fn)
with torch.no_grad():
i = 0
for x, y in test_dataloader:
i += 1
if i == 20: break
test_loss, correct = 0, 0
x, y = x.to(device), y.to(device)
pred = net(x)
test_loss = loss_fn(pred, y).item()
correct = (pred.argmax(1) == y).type(torch.float).sum().item()
correct /= len(x)
print(f"Test Accuracy: {(100*correct):>0.2f}%, Avg loss: {test_loss:>8f} \n")
PATH = "./model/state_dict_model"+str(epoch*num_batches+batch)+".pt" # 约定.ph或.pth拓展名
torch.save(net.state_dict(), PATH) # 保存模型
def test(dataloader, net, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
net.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = net(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
print(f"Test Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
def main(epochs=10):
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train(train_dataloader, net, loss_fn, optimizer, epoch=t)
test(test_dataloader, net, loss_fn)
# PATH = "./model/state_dict_model"+t+".pt" # 约定.ph或.pth拓展名
# torch.save(net.state_dict(), PATH) # 保存模型
print("Done!")
if __name__ == "__main__":
train_data = torchvision.datasets.ImageFolder(root='./train/', transform=transform_train)
test_data = torchvision.datasets.ImageFolder(root='./val/', transform=transform_test)
batch_size = 64
train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)
print("train_data:", len(train_data), train_data.classes, train_data.class_to_idx)
print("test_data:", len(test_data), test_data.classes, test_data.class_to_idx)
net = resnet18(weights=None, num_classes=2).to(device)
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
loss_fn = nn.CrossEntropyLoss().to(device)
main(epochs=10)
加载模型和测试集、测试函数中ToPILImage保存误识别图片
def test_mis_pic(dataloader, net, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
mis_feature, mis_label, mis_pred, mis_dataloder = [], [], [], []
net.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for i, (X, y) in enumerate(dataloader, start=1):
X, y = X.to(device), y.to(device)
pred = F.softmax(net(X), dim=1)
test_loss += loss_fn(net(X), y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
#取误识别样本和相应的模型输出
mis_X = X[(pred.argmax(1) != y)].cpu().data
mis_y = y[(pred.argmax(1) != y)].cpu().data
mis_feature.append(mis_X)
mis_label.append(mis_y)
mis_dataloder.append((mis_X, mis_y))
mis_pred.append(pred[(pred.argmax(1) != y)].cpu().data)
# if i == 10: print(len(mis_dataloder), mis_dataloder[i-1][0].size(), mis_dataloder[i-1][1].size()); break;
test_loss /= num_batches
correct /= size
mis_feature = torch.cat(mis_feature, dim=0)
mis_label = torch.cat(mis_label, dim=0)
mis_pred = torch.cat(mis_pred, dim=0)
mis_pre_lab = torch.cat((mis_pred, mis_label.unsqueeze(1)), dim=1)
print(f"mis_feature:{mis_feature.shape}, mis_label:{mis_label.shape}, mis_pred:{mis_pred.shape}, mis_pre_lab:{mis_pre_lab.shape}")
print(f"Test Accuracy: 1-{len(mis_label)}/{size}={(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
'''保存误识别的图片,关注保存了哪些东西,上面的mis_feature、mis_label 、mis_pred 、 mis_pre_lab 都可用用于返回'''
mis_feature = mis_feature/2+0.5
for i, pic in enumerate(mis_feature):
pic = torchvision.transforms.ToPILImage()(pic)
if mis_pre_lab[i][2] == 0.0:
os.makedirs(r"./mispic/fake/"); path = "./mispic/fake/"+str(mis_pre_lab[i])+".png"
else:
os.makedirs(r"./mispic/real/"); path = "./mispic/real/"+str(mis_pre_lab[i])+".png"
pic.save(path)
if __name__ == "__main__":
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.manual_seed(0)
PATH="./model/state_dict_model100.pt"
net = resnet18(weights=None, num_classes=2).to(device) # 加载模型
net.load_state_dict(torch.load(PATH)) # load_state_dict接受字典数据而不是路径
loss_fn = nn.CrossEntropyLoss().to(device)
transform_test = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
test_data = torchvision.datasets.ImageFolder(root='./val/', transform=transform_test)
batch_size = 64
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)
print(f"test_data:{len(test_data)}, num_batches:{len(test_dataloader)}, class:{test_data.classes}, class_idx:{test_data.class_to_idx}")
test_mis_pic(test_dataloader, net, loss_fn)
误分图像或测试集图像经灰度图像增强再放入模型测试(可挪用到先增强后训练,或许能提高效果)
(1)图像增强(各种变换函数),下面transGray函数展示了如何使用变换函数;
'''灰度图像增强部分:各种变换可以组合使用。
#灰度变换之线性变换:t=a*x+b,s源灰度值,t目标灰度值
#a>1,原图的灰度值之间的差距被拉大了,即图片中明暗的差距更大(增加对比度),反之a<1减小对比度
#b,原图整体太亮或太暗,调整b的值使图像一开始处于合适的灰度值'''
def linearGray(img, a=1.2, b=10, showPlt=True, showPlt_i=None):
newImg = img
newImg = a * img + b
newImg[newImg>255] = 255; newImg[newImg<0] = 0
newImg = np.uint8(newImg)
if showPlt:
x = np.arange(0, 256, 0.01)
y = a * x + b
plt.subplot(5,4,4*showPlt_i+1)
plt.plot(x, y, 'r', linewidth=1)
plt.title(u'线性变换函数')
plt.xlim(0, 255), plt.ylim(0, 255)
return newImg
'''灰度变换之对数变换:提升低亮区域,压缩高亮区域,使低亮区域的特征更加突出明显,即非线性改变对比度。t=c*lg(1+s)'''
def logGray(img, c=40, showPlt=True, showPlt_i=None):
newImg = c * np.log(1.0 + img)
newImg[newImg>255] = 255; newImg[newImg<0] = 0
newImg = np.uint8(newImg)
if showPlt:
x = np.arange(0, 256, 0.01)
y = c * np.log(1 + x)
plt.subplot(5,4,4*showPlt_i+1)
plt.plot(x, y, 'r', linewidth=1)
plt.title(u'对数变换函数')
plt.xlim(0, 255), plt.ylim(0, 255)
return newImg
'''灰度变换之指数变换:图片低亮度区域将被压缩,高亮度区域将被扩展。t=b^[c*(x-a)]-1'''
def indexGray(img, c=0.41, b=1.06, a=20, showPlt=True, showPlt_i=None):
newImg = b ** (c * (img - a)) - 1
newImg[newImg>255] = 255; newImg[newImg<0] = 0
newImg = np.uint8(newImg)
if showPlt:
x = np.arange(0, 256, 0.01)
y = b ** (c * (x - a)) - 1
plt.subplot(5,4,4*showPlt_i+1)
plt.plot(x, y, 'r', linewidth=1)
plt.title(u'指数变换函数')
plt.xlim(0, 255), plt.ylim(0, 255)
return newImg
'''灰度变换之gamma变换(幂变换):对漂白(相机曝光)或过暗(曝光不足)的图片进行矫正。t=(s+esp)^γ'''
def gammaGray(img, esp=0, gama=2.5, showPlt=True, showPlt_i=None):
newImg = pow(img/255 + esp, gama)*255
newImg[newImg>255] = 255; newImg[newImg<0] = 0
newImg = np.uint8(newImg)
if showPlt:
x = np.arange(0, 256, 0.01)
y = pow(x/255 + esp, gama)*255
plt.subplot(5,4,4*showPlt_i+1)
plt.plot(x, y, 'r', linewidth=1)
plt.title(u'gamma变换函数')
plt.xlim(0, 255), plt.ylim(0, 255)
return newImg
'''灰度变换之直方图均衡化:适用于整体偏暗或者偏亮的情况,可以使得整幅图像的灰度值份均匀分布在整个动态范围[0,255]之内(直方图呈均匀分布),从而增加图像的对比度。'''
def equalize_hist(img, showPlt=True, nbr_bins=256, showPlt_i=True, other_trans=linearGray):
# 图像直方图统计
imhist, bins = np.histogram(img.flatten(), nbr_bins)
# 累积分布函数
cdf = imhist.cumsum()
cdf = 255.0 * cdf / cdf[-1]
# 使用累积分布函数的线性插值,计算新的像素值
newImg = (np.interp(img.flatten(), bins[:-1], cdf)).reshape(img.shape) # 分段线性插值函数
if other_trans != None:
newImg = other_trans(newImg, showPlt=False)#容易太亮,和其他变换组合使用
if showPlt:
plt.subplot(5,4,4*showPlt_i+1)
plt.hist(img.flatten(), 256, label='源图片直方图')
plt.hist(newImg.flatten(), 256, label='变换后直方图')
plt.plot(cdf, color='r', label='累计分布函数')# 显示累积分布函数
plt.legend()#显示图例
plt.title(u'直方图均衡化')
return np.uint8(newImg)
'''将一张图片分布经上述五种灰度变换,再经网络模型得到经变换后的图片的预测类别'''
def transGray(path, label):
data, label= [],label
im = np.asarray(Image.open(path))
plt.figure(figsize=(10, 12))
plt.subplots_adjust(left=None, bottom=0.1, right=None, top=0.9, wspace=None, hspace=0.5)
'''一张图片经单个变换函数'''
# plt.figure(figsize=(15, 5))
# im1 = equalize_hist(im, showPlt=False)#变换函数组合使用
# im1 = linearGray(im)
# plt.subplot(1,3,2); plt.imshow(im); plt.title(os.path.split(path)[1])
# plt.subplot(1,3,3); plt.imshow(im1); plt.title("灰度变换")
# plt.show()
''' 一张图片经各个变换函数'''
transfun = {'linearGray':'linearGray', 'logGray':'logGray', 'indexGray':'indexGray', 'gammaGray':'gammaGray', 'equalize_hist':'equalize_hist'}
for i, (key,value) in enumerate(transfun.items()):
im1 = eval(value+'(im, showPlt_i=i)')
data.append(im1)
plt.subplot(5,4,4*i+2); plt.imshow(im); plt.title(os.path.splitext(os.path.split(path)[1])[0]+"label", bbox=dict(edgecolor='blue', alpha=0.1))
plt.subplot(5,4,4*i+3); plt.imshow(im1); plt.title(key, rotation=0, bbox=dict(facecolor='y', edgecolor='blue', alpha=0.3))
# plt.show()
return data, label
(2)下面class MyDataset(Dataset)展示了如何将tensor类型数据和标签处理成想要的数据(ImageFolder也是继承Dataset,但它是针对补充3结构的数据)
'''自定义Dataset。因为需要将某张误识别的数据经“各种灰度变换”得到"一批对应的变换后的数据list",再放入Dataset,再传入DataLoader'''
class MyDataset(Dataset):
def __init__(self, data, label, transform=None, target_transform=None):
self.data = data
self.label = label
self.transform = transform
self.target_transform = target_transform
def __getitem__(self, index):
img = self.data[index]
label = self.label
if self.transform:
img = self.transform(img)#将图片转为torch数据类型:(C, H, W)in the range [0.0, 1.0],C为RGB
if self.target_transform:
label =self.target_transform(label)
# plt.subplot(5,4,4*(index+1)); plt.imshow(img.permute(1, 2, 0)); plt.title(label)
return img, label # 返回图片和标签
def __len__(self):
return len(self.data)
(3)for每张误分类图片——>5种变换后打包成dataloader(只包含一个批次)——>测试得预测结果;计算所有图片每种变换下的准确率
'''测试DataLoader的函数'''
def test(dataloader, net):
trans_pred, trans_pred_label = [], []
net.eval()
with torch.no_grad():
for i, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
pred = F.softmax(net(X), dim=1)
pred_label = pred.argmax(1)
trans_pred.append(pred)
trans_pred_label.append(pred_label)
trans_pred = torch.cat(trans_pred, dim=0)
trans_pred_label = torch.cat(trans_pred_label, dim=0)
cat_pred_label = torch.cat((trans_pred, trans_pred_label.unsqueeze(1)), dim=1)
return(cat_pred_label)
if __name__ == "__main__":
import time
start=time.time()
'''获取误分类的图片(或测试集图片)路径和标签'''
fake_paths = os.walk("./mispic/fake"); real_paths = os.walk("./mispic/real")
fakefile = next(fake_paths); realfile = next(real_paths)
a=[fakefile[0]+'/'+i for i in fakefile[2]]; b=[realfile[0]+'/'+i for i in realfile[2]]
all_path = a+b; all_label = list(map(float, re.findall("0.0000|1.0000", "".join(all_path))))
if fakefile[0]=="./val/fake":all_label = [0]*len(a)+[1]*len(b)#对于测试集,直接得到标签
print("待测试图片的路径及图片数:", fakefile[0], len(a), '\t', realfile[0], len(b), '\t', len(all_path), len(all_label))
'''最终需要输出的结果'''
mis_final_pred = []
linearGray_correct, logGray_correct, indexGray_correct, gammaGray_correct, equalize_hist_correct = 0, 0, 0, 0, 0
correct = [linearGray_correct, logGray_correct, indexGray_correct, gammaGray_correct, equalize_hist_correct]
'''加载模型'''
PATH="./model/state_dict_model7729.pt"
net = resnet18(weights=None, num_classes=2).to(device) # 加载模型
net.load_state_dict(torch.load(PATH)) # load_state_dict接受字典数据而不是路径
loss_fn = nn.CrossEntropyLoss().to(device)
transform_test = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
'''开始测试:对所有图片'''
for b, path_label in enumerate(zip(all_path, all_label)):
# path = r"mispic\fake\tensor([0.3263, 0.6737, 0.0000]).png"; label = torch.tensor(0)
path, label = path_label[0], path_label[1]
#都进行五种灰度变换
trans_data, trans_label = transGray(path, label)
#每张图片都经五种变换作为一个批次打包成dataloader
mis_trans_data = MyDataset(trans_data, trans_label, transform=transform_test, target_transform=None)
batch_size = 5 #5种变换共5张图片
mis_trans_dataloader = DataLoader(mis_trans_data, batch_size=batch_size, shuffle=False)
#变换好的图片(DataLoader(只有一个批次)里的数据)给网络测试
pred_label = test(mis_trans_dataloader, net); mis_final_pred.append(pred_label)
#查看测试结果
train_features, train_labels = next(iter(mis_trans_dataloader))# print(f"Feature batch shape: {train_features.size()}")#[N, C, H, W]
for i in range(len(trans_data)):
img = train_features[i]; label = train_labels[i]#查看经变换后的第一个批次的所有图片
# plt.subplot(5,4,4*(i+1)); plt.imshow((img).permute(1, 2, 0))
# plt.title("NormalTo[-1,1]_"+str(pred_label[i,:])+"pre", bbox=dict(edgecolor='blue', alpha=0.1))
plt.subplot(5,4,4*(i+1)); plt.imshow((img/2+0.5).permute(1, 2, 0))
plt.title(str(pred_label[i,:])+"pre", bbox=dict(edgecolor='blue', alpha=0.1))#plt.imshow((img/2+0.5)
plt.suptitle(os.path.splitext(os.path.split(path)[1])[0]+"————5trans————"+str(mis_final_pred[b][:,-1]), bbox=dict(edgecolor='red', alpha=0.8))
#plt.show()#是否显示图片
plt.close("all")
#是否保存图片
# filePath = os.path.splitext(os.path.split(path)[1])[0]+"——》"+str(mis_final_pred[b][:,-1])+".png"; plt.savefig(filePath)
# img_PIL = Image.open(filePath); img_PIL.show()
#以下用来对 误分类或测试集 经 灰度变换后 来批测试,计算 各种灰度变换后 的准确率
for i, trans_correct in enumerate(correct):
correct[i] += (all_label[b] == mis_final_pred[b][i][-1]).type(torch.float).item()
# if b == 1: break #b代表测试的图片数量
print("被测试的图片数量:", len(all_label[:b+1]), '\t', "预测数量:", len(mis_final_pred[:b+1]), '\t', "每个预测输出数量", len(mis_final_pred[0]));
print([i for i in correct])
correct = list(map(lambda x:x/len(all_label[:b+1]), correct))
print(f"linearGray Test Accuracy: {(100*correct[0]):>0.2f}%")
print(f"logGray Test Accuracy: {(100*correct[1]):>0.2f}%")
print(f"indexGray Test Accuracy: {(100*correct[2]):>0.2f}%")
print(f"gammaGray Test Accuracy: {(100*correct[3]):>0.2f}%")
print(f"equalize_hist Test Accuracy: {(100*correct[4]):>0.2f}%")
end=time.time()
print('Running time: %ss, 即: %dmin, %ds'%(end-start, (end-start)/60, (end-start)%60))
反向传播算法5的核心是代价函数C对网络中参数(各层的权重w和偏置b)的偏导表达式 ∂ C ∂ w \frac{\partial C}{\partial w} ∂w∂C 和 ∂ C ∂ b \frac{\partial C}{\partial b} ∂b∂C
(1) 什么是图像归一化与标准化:
(2) 批标准化Batch Normalization的作用:
通过规范化的手段,将越来越偏的分布拉回到均值0、方差1的分布,使得激活函数的输入值主要集中在[-1,1],即激活函数对输入比较敏感的区域,从而使梯度变大,加快学习收敛速度,避免梯度消失的问题。
通过继承Module类的MyNet类为例,实例化为model对象来展示如何使用Module类的以下方法:add_module,apply,cuda/cpu/to,type,state_dict/load_state_dict,parameters/named_parameters,children/named_children,modules/named_modules,train/eval,required_grad_/zero_grad
class MyNet(nn.Module):
def __init__(self):
super(MyNet, self).__init__()
···
def forward(self, x):
···
return out
model = MyNet()
add_module(name: str, module: Optional['Module'])
用法:Adds a child module to the current module
apply(fn: Callable[['Module'], None])
(1) 用法1/2/3:Applies fn
recursively to every submodule. Typical use includes initializing the parameters of a model
(2) 说明:模型参数初始化是为了防止出现梯度消失或爆炸,让模型能够更快收敛,提高训练速度(为了让神经网络在训练过程中学习到有用的信息,要保证参数梯度不等于0,那么参数初始化应该使得各层激活值不会出现饱和且激活值不为0)。PyTorch已有默认的模型参数初始化,非特殊情况不必自定义模型参数初始化
。
cuda/cpu/to
device = "cuda" if torch.cuda.is_available() else "cpu"
tensor/model/loss.to(device)
# 或tensor/model/loss.cuda/cpu,查看是否在cuda上:
next(model.parameters()).is_cuda #或next(model.parameters()).device
type
state_dict/load_state_dict
parameters/named_parameters
children/named_children
modules/named_modules
train/eval
required_grad_/zero_grad
在这里插入代码片
自定义模型实例化后,用<实例名>.requires_grad_()选择是否可导
model.train/eval
model.train()6: 在train模式下,dropout网络层会按照设定的参数p设置保留激活单元的概率。BN层会继续计算数据的mean和var等参数并更新。(放在for epoch:for batch后以保证每一个batch都能进入model.train()的模式)
model.eval(): 在eval模式下,dropout层会让所有的激活单元都通过。BN层会停止计算和更新mean和var,直接使用在训练阶段已经学出的mean和var值。
(1)需要重写__init__()、forward()函数。__init__里面的模块是模型的“固有属性”,打印时会出现在model里面,而forward里面的不会。(2)需要注意下面三种方法中7添加层、索引层的区别8
import torch.nn as nn
class MyNet(nn.Module):
def __init__(self):
super(MyNet, self).__init__()
self.conv1 = nn.Conv2d(3, 32, 3, 1, 1)
self.relu = nn.ReLU()
self.max_pooling1 = nn.MaxPool2d(2)
self.dense1 = nn.Linear(32 * 3 * 3, 128)
self.dense2 = nn.Linear(128, 10)
def forward(self, x):
x = self.conv1(x)
x = self.relu(x)
x = self.max_pooling1(x)
x = x.view(x.size(0), -1)
x = self.dense1(x)
x = self.relu(x)
x = self.dense2(x)
return x
model = MyNet()
model.add_module('add_module_block', nn.Linear(10, 1)) # 添加层
print('打印model:\n',model)
module_list = [module for module in model.children()] # model.children()返回的是迭代器,用iter()、next()或for循环来访问
print('打印第一个模块:', module_list[0])
'''运行结果为:
打印model:
MyNet(
(conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu): ReLU()
(max_pooling1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(dense1): Linear(in_features=288, out_features=128, bias=True)
(dense2): Linear(in_features=128, out_features=10, bias=True)
(add_module_block): Linear(in_features=10, out_features=1, bias=True)
)
打印第一个模块: Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
'''
import torch.nn as nn
from collections import OrderedDict
class MyNet(nn.Module):
def __init__(self):
super(MyNet, self).__init__()
self.conv_block = nn.Sequential(
nn.Conv2d(3, 32, 3, 1, 1),
nn.ReLU(),
nn.MaxPool2d(2))#Seq里的各个层是没有名称的,默认按照0、1、2排序
self.dense_block = nn.Sequential(
OrderedDict([
("dense1", nn.Linear(32 * 3 * 3, 128)),
("relu2", nn.ReLU()),
("dense2", nn.Linear(128, 10))])#Seq里采用OrderedDict,所以各个层有相应的名称
)
def forward(self, x):
conv_out = self.conv_block(x)
res = conv_out.view(conv_out.size(0), -1)
out = self.dense_block(res)
return out
model = MyNet()
model.add_module('add_module_block',nn.Sequential(nn.ReLU(), nn.Linear(10, 2))) #添加层
print('打印model:\n', model) #只会出现__init__里面的层
print('打印第一个Sequential里面的第一个模块:\n', model.conv_block[0])#可以通过整数索引
# 当OrderedDict时也可以model.dense_block[0]或model.dense_block.dense1,但不可以model.dense_block['dense1']
'''运行结果为:
打印model:
MyNet(
(conv_block): Sequential(
(0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU()
(2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(dense_block): Sequential(
(dense1): Linear(in_features=288, out_features=128, bias=True)
(relu2): ReLU()
(dense2): Linear(in_features=128, out_features=10, bias=True)
)
(add_module_block): Sequential(
(0): ReLU()
(1): Linear(in_features=10, out_features=2, bias=True)
)
)
打印第一个Sequential里面的第一个模块:
Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
'''
import torch.nn as nn
class MyNet(nn.Module):
def __init__(self):
super(MyNet, self).__init__()
self.conv_block = nn.ModuleList([
nn.Conv2d(3, 32, 3, 1, 1),
nn.ReLU(),
nn.MaxPool2d(2)])
self.dense_block = nn.ModuleList([
nn.Linear(32 * 3 * 3, 128),
nn.ReLU(),
nn.Linear(128, 10)])
def forward(self, x):
conv_out = self.conv_block(x)
res = conv_out.view(conv_out.size(0), -1)
out = self.dense_block(res)
return out
model = MyNet()
model.dense_block.append(nn.ReLU()) #用ModuleList的append函数在第一个ModuleList(dense_block)的末尾添加层
model.add_module('add_module_block',nn.Linear(10, 5)) #用Module类的add_module函数添加层
model.dense_block.insert(index=1, module=nn.Sigmoid())#用ModuleList的insert函数在第一个ModuleList的索引为1的位置插入层
print('打印model:\n', model) #只会出现__init__里面的层
print('打印第一个ModuleList里面的第一个模块:\n', model.conv_block[0])#ModuleList可以像列表一样来索引层
'''运行结果为:
打印model:
MyNet(
(conv_block): ModuleList(
(0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU()
(2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(dense_block): ModuleList(
(0): Linear(in_features=288, out_features=128, bias=True)
(1): Sigmoid()
(2): ReLU()
(3): Linear(in_features=128, out_features=10, bias=True)
(4): ReLU()
)
(add_module_block): Linear(in_features=10, out_features=5, bias=True)
)
打印第一个ModuleList里面的第一个模块:
Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
'''
vgg16_false = torchvision.models.vgg16(pretrained=False)
vgg16_true = torchvision.models.vgg16(pretrained=True)
vgg16_true.classifier.add_module('add_linear', nn.Linear(1000, 10))
vgg16_false.classifier[6] = nn.Linear(4096, 10)#通过model.Seq_name[index]索引层
默认只能是【标量】对【标量/向量/矩阵】求导
, 所以常用out.sum().backward()作为标量输出,因为sum()对各分量导数为1,没有影响。也用out.mean().backward()作为标量输出,最终导数的结果相当于out.sum().backward()的导数除以输出向量的长度相当于【输出向量各分量】与【gradient向量各分量】加权求和形成1个新的标量,再对标量求梯度
"""实例:
y.backward(gradient=torch.tensor([1., 1., 1.]), retain_graph=True)
#设置gradient可以对向量和矩阵求导,设置retain_graph保留计算图
z_x = torch.autograd.grad(outputs=z, inputs=x, create_graph=True),
#设置create_graph用来求高阶导数
"""
import torch
x = torch.tensor([1., 2., 3.], requires_grad=True) # 1, 2, 3
y = 2 * x + 1 # 3, 5, 7 y对x的导数:2
z = torch.pow(y, 3) # 27, 125, 343 z对x的(一阶)导数(z只能对x,不能对y,因为x是叶节点):2*{3*[(2x+1)^^2]}=[54, 150, 294]
y.backward(gradient=torch.tensor([1., 1., 1.]), retain_graph=True) # 设置retain_graph=True来保留计算图,所以后面还能用backward()
print('y对x的导数: ', x.grad)#y对x的导数: tensor([2., 2., 2.])
x.grad = None
z.backward(gradient=torch.tensor([1., 0.1, 0.01]), retain_graph=True)
print('z对x的导数: ', x.grad)#z对x的导数: tensor([54.0000, 15.0000, 2.9400])
z = z.sum() # 用sum()将其变成标量,故下面不用需要用gradient
z1_y = torch.autograd.grad(outputs=z, inputs=y, create_graph=True) # z对y的一阶导数:{3*(y^^2)}=[27, 75, 147],create_graph使得后面可以求高阶导数
print('z对y的一阶导数: ', z1_y)#z对y的一阶导数: (tensor([ 27., 75., 147.], grad_fn=),)
z1_y = z1_y[0].sum()
z2_y = torch.autograd.grad(outputs=z1_y, inputs=y, create_graph=True) # z对y的二阶导数:2*{3*(y)}=[18, 30, 42]
print('z对y的二阶导数: ', z2_y)#z对y的二阶导数: (tensor([18., 30., 42.], grad_fn=),)
# z对y的(三阶)导数:z对y的二阶导数:2*{3*(y)}再对y求导,即6
z3_y = torch.autograd.grad(outputs=z2_y[0].sum(), inputs=y, create_graph=True)
print('z对y的三阶导数: ', z3_y)#z对y的三阶导数: (tensor([6., 6., 6.], grad_fn=),)
# z对y的(四阶)导数:z对y的三阶导数6再对y求导,即0
print('z对y的四阶导数: ', torch.autograd.grad(outputs=z3_y[0].sum(), inputs=y, create_graph=True))#z对y的四阶导数: (tensor([0., 0., 0.]),)
x.grad, z.grad = None, None
z.backward(retain_graph=True)
print('z对x的(一阶)导数: ', x.grad)#z对x的(一阶)导数: tensor([ 54., 150., 294.])
z_x = torch.autograd.grad(outputs=z, inputs=x, create_graph=True) # z对y的一阶导数:{3*(y^^2)}=[27, 75, 147]
print('z对x的一阶导数: ', z_x)#z对x的一阶导数: (tensor([ 54., 150., 294.], grad_fn=),)
# z对x的(二阶)导数:z对x的一阶导数2*{3*[(2x+1)^^2]}再对x求导,即2*{3*[2*2*(2x+1)]}=[ 72., 120., 168.]
print('z对x的二阶导数: ', torch.autograd.grad(outputs=z_x[0].sum(), inputs=x, create_graph=True))#z对x的二阶导数: (tensor([ 72., 120., 168.], grad_fn=),)
1、交叉熵损失函数的用法:
给定样本及真实标签 y l a b e l {y_{label}} ylabel(CE会自动编码成对应的onehot形式: y → \overrightarrow y y),样本通过模型进行前向传播得到最后一层的输出 o → \overrightarrow o o,再通过 s o f t max {soft\max } softmax函数得到归一化为[0,1]之间的预测结果 y ^ → \overrightarrow {\hat y} y^。通过交叉熵损失函数CE计算 y → \overrightarrow y y与 y ^ → \overrightarrow {\hat y} y^之间的损失,再对损失求导进行反向传播来更新模型参数。
2、softmax公式和交叉熵损失函数的几种常见形式:
( 1 ) y ^ i = s o f t max ( o ) = e o i ∑ j ∈ c l a s s j e o j (1)\ {\hat y_i} = soft\max ( o ) = \frac{{{e^{{o_i}}}}}{{\sum\limits_{j \in class}^j {{e^{{o_j}}}} }} (1) y^i=softmax(o)=j∈class∑jeojeoi, s o f t max {soft\max } softmax函数,一个 o i {o_i} oi对应一个 y ^ i {{\hat y}_i} y^i,但由 s o f t max {soft\max } softmax函数表达式可知 y ^ i {{\hat y}_i} y^i不仅与对应的 o i {o_i} oi有关,还与其它的 o ^ j {{\hat o}_j} o^j有关,因此存在多个偏导。
( 2 ) C E = − ∑ i ∈ c l a s s i ( y i lg y ^ i ) (2)CE = - \sum\limits_{{\text{i}} \in class}^i {({y_i}} \lg {{\hat y}_i}) (2)CE=−i∈class∑i(yilgy^i),是交叉熵损失函数CE的标准形式。
( 3 ) C E = − lg y ^ ( 与 l a b e l 对应的 ) (3)CE = - \lg {{\hat y}_{(与label对应的)}} (3)CE=−lgy^(与label对应的),是多分类问题的交叉熵损失函数,真实标签输入一个标量值,当用onehot编码后其余地方为0,便得到该形式。
( 4 ) C E = − [ y + lg y ^ + + y − l g y ^ − ] = − [ y + lg y ^ + + ( 1 − y + ) l g ( 1 − y ^ + ) ] (4)CE = - [{y_ + }\lg {{\hat y}_ + } + {y_ - }{\text{l}}g{{\hat y}_ - }] = - [{y_ + }\lg {{\hat y}_ + } + (1 - {y_ + }){\text{l}}g(1 - {{\hat y}_ + })] (4)CE=−[y+lgy^++y−lgy^−]=−[y+lgy^++(1−y+)lg(1−y^+)],是二分类问题的交叉熵损失函,见下文BCELoss函数。
3、实例分析:多分类问题交叉熵损失函数及偏导的计算
交叉熵损失函数的计算:如[猫,狗,猪]三分类问题,已知给定某张图片样本的标签1(即是猫),对应onehot编码为[1,0,0],若经softmax后预测输出为[0.7,0.1,0.2],则CE=-(1*lg0.7+0*lg0.1+0*lg0.2)=-lg0.7= − lg y ^ l a b e l - \lg {{\hat y}_{label}} −lgy^label,即只有标签对应的 y ^ i {{\hat y}_i} y^i进行了负对数。
损失函数的偏导:CE对最后一层输出 o → \overrightarrow o o的偏导数公式如下(不同在于softmax函数求导),结果为[-0.3,0.1,0.2]: ∂ C E ∂ o 1 = ∂ C E ∂ y ^ i = 1 ∗ ∂ y ^ i = 1 ∂ o j = i = 1 = − 1 y ^ i = 1 ∗ [ y ^ i = 1 ( 1 − y ^ i = 1 ) ] = y ^ i = 1 − 1 ∂ C E ∂ o 2 = ∂ C E ∂ y ^ i = 1 ∗ ∂ y ^ i = 1 ∂ o j ∈ ( 2 , 3 ) ≠ i = − 1 y ^ i = 1 ∗ ( − y ^ j ∈ ( 2 , 3 ) ≠ i y ^ i = 1 ) = y ^ j ∈ ( 2 , 3 ) ≠ i \begin{array}{l}\frac{{\partial CE}}{{\partial {o_1}}} = \frac{{\partial CE}}{{\partial {{\hat y}_{i = 1}}}}*\frac{{\partial {{\hat y}_{i = 1}}}}{{\partial {o_{j = i = 1}}}} = - \frac{1}{{{{\hat y}_{i = 1}}}}*[{{\hat y}_{i = 1}}(1 - {{\hat y}_{i = 1}})] = {{\hat y}_{i = 1}} - 1\\\frac{{\partial CE}}{{\partial {o_2}}} = \frac{{\partial CE}}{{\partial {{\hat y}_{i = 1}}}}*\frac{{\partial {{\hat y}_{i = 1}}}}{{\partial {o_{j \in (2,3) \ne i}}}} = - \frac{1}{{{{\hat y}_{i = 1}}}}*( - {{\hat y}_{_{j \in (2,3) \ne i}}}{{\hat y}_{i = 1}}) = {{\hat y}_{_{j \in (2,3) \ne i}}}\end{array} ∂o1∂CE=∂y^i=1∂CE∗∂oj=i=1∂y^i=1=−y^i=11∗[y^i=1(1−y^i=1)]=y^i=1−1∂o2∂CE=∂y^i=1∂CE∗∂oj∈(2,3)=i∂y^i=1=−y^i=11∗(−y^j∈(2,3)=iy^i=1)=y^j∈(2,3)=i
BCEWithLogitsLoss/BCELoss, MSELoss/L1Loss, KLDivLoss
等损失函数CrossEntropyLoss/NLLLoss
等损失函数常见的损失函数:对于回归问题,常用均方损失函数;对于分类问题,常用交叉熵损失函数和二元交叉熵损失函数。(重要链接1:熵/KL散度/交叉熵的关系,链接2:pytorch官网-Loss Functions)
a、
交叉熵损失函数nn.CrossEntropyLoss: C E = − ∑ i ∈ c l a s s i ( y i lg y ^ i ) {CE = - \sum\limits_{{\rm{i}} \in class}^i {({y_i}} \lg {{\hat y}_i})} CE=−i∈class∑i(yilgy^i), y i y_i yi是真实标签, y ^ i {{\hat y}_i} y^i是神经网络的输出。等价于Softmax(最后一层神经元的输出归一化到0~1,且和为1)–Log(取对数)–NLLLoss(将前面的对数取负,再取出label对应的值,再求均值)。函数log_softmax相当于对softmax做了log操作。
b、
用于多分类问题,判断属于哪一类
c、注意
:调用F.cross_entropy函数时,input(即即神经网络的输出 y ^ i {{\hat y}_i} y^i)不需要自己接softmax层,target(即 y i y_i yi或label)不是one_hot编码格式。Input: (N, C),where C = number of classes, Target: (N)。
import torch
import torch.nn.functional as F
torch.manual_seed(1)
input = torch.randn(2,3)#对于[猫,狗,猪]三分类问题,2为两张图片样本,3为模型最后一层的输出神经元个数
target = torch.tensor([0,2])#target不是one_hot编码格式,分别是2为两张图片样本的标签
#CrossEntropyLoss等价于Softmax–Log()–NLLLoss等价于LogSoftmax–NLLLoss,以下三个loss是相同的
loss = F.cross_entropy(input,target)
loss = F.nll_loss(F.log_softmax(input, dim=1),target)
loss = F.nll_loss(torch.log(F.softmax(input, dim=1)),target)
'''过程:
input:tensor([[ 0.6614, 0.2669, 0.0617],
[ 0.6213, -0.4519, -0.1661]])
target:tensor([0, 2])
F.log_softmax(input, dim=1):
tensor([[-0.7989, -1.1933, -1.3986],
[-0.5861, -1.6593, -1.3735]])
cross_entropy: tensor(1.0862),即(0.7989+1.3735)/2
'''
a、
KL散度 D K L ( y i ∥ y ^ i ) = ∑ i ∈ c l a s s i ( y i ( lg y i − lg y ^ i ) ) {D_{K L}({y_i} \| {{\hat y}_i}) = \sum\limits_{{\rm{i}} \in class}^i {({y_i}}( \lg {{y}_i}- \lg {{\hat y}_i}))} DKL(yi∥y^i)=i∈class∑i(yi(lgyi−lgy^i)),用来衡量两个概率分布的差异性。
b、应用:
当KL散度越小时(KL散度恒大于等于0,即当KL散度越接近0时),学生神经网络的输出 y ^ i {{\hat y}_i} y^i就越接近于教师网络的输出(相当于真实标签) y ^ i {{\hat y}_i} y^i。
c、注意
:pytorch官方的F.cross_entropy函数公式为 y i ( lg y i − y ^ i ) {{y_i}}( \lg {{y}_i}- {{\hat y}_i}) yi(lgyi−y^i),因此使用该接口时,input(即神经网络的输出)需要对自身进行lg操作,target(即 y i y_i yi)为真实的概率分布。Input: (N,∗),Target:(N,∗)。
import torch
import torch.nn.functional as F
x = torch.tensor([0.4, 0.4, 0.2], dtype=torch.float32)
y = torch.tensor([0.5, 0.1, 0.4], dtype=torch.float32)
# 用标签y(即target)指导x(即input,神经网络的输出),kl散度:ylg(y/x)=y(lgy-lgx)
# F.kl_div给的kl散度公式是y(lgy-x),所以接口传入的是:x的对数概率,y的概率
logp_x = torch.log(x)
kl_mean = F.kl_div(logp_x, y, reduction='batchmean')
print(kl_mean, torch.matmul(y,torch.log(y/x))/len(x))#输出:tensor(0.0834) tensor(0.0834)
a、
BCEWithLogitsLoss等价于:Sigmoid+二元交叉熵损失函数nn.BCELoss − [ y + lg y ^ + + ( 1 − y + ) l g ( 1 − y ^ + ) ] - [{y_ + }\lg {{\hat y}_ + } + (1 - {y_ + }){\text{l}}g(1 - {{\hat y}_ + })] −[y+lgy^++(1−y+)lg(1−y^+)]。即神经元的输出经sigmoid归一化后的对数损失函数,或逻辑回归模型的损失函数。求导过程
b、用于二分类问题
Input:(N),Target:(N),例:判断N张图片每一张是猫还是不是猫(即狗)
b、用于多标签分类
Input:(N,M),Target:(N,M),例:判断N张图片每一张有还是没有花(某标签)、房子(某标签)、猫(某标签)等共M种标签的东西
c、注意:
调用nn.BCEWithLogitsLoss时,input(即神经网络的输出 y ^ i {{\hat y}_i} y^i)不需要自己接sigmoid层,target(即 y i y_i yi或label)里面元素的值只能取0或1。Input: (N,∗),Target:(N,∗)。
import torch
import torch.nn as nn
torch.manual_seed(1)
input = torch.randn(3)#是猫还是不是猫(即狗)]的二分类问题。3为三张图片样本
target = torch.tensor([1., 0., 0.])#分别是3为两张图片样本的标签,第一张1代表是猫,第二三张图片0代表是不是猫(即狗)
#BCEWithLogitsLoss等价于Sigmoid–BCELoss,以下三个loss是相同的
loss = nn.BCEWithLogitsLoss()(input, target)
loss = nn.BCELoss()(nn.Sigmoid()(input),target)
'''过程:
input:tensor([0.6614, 0.2669, 0.0617])
target:tensor([1., 0., 0.])
nn.Sigmoid()(input):
tensor([0.6596, 0.5663, 0.5154])
cross_entropy: tensor(0.6587),即-[lg(0.6596)+lg(1-0.5663)+lg(1-0.5154)]/3
'''
import torch
def onehot_encoding(labels, n_classes):
return torch.zeros(labels.size(0), n_classes).scatter_(dim=1, index=labels.view(-1, 1), value=1)
def label_smoothing(targets_onehot, epsilon, n_classes):
return targets_onehot*(1 - epsilon) + torch.ones_like(targets_onehot)*0.1/n_classes
targets_onehot = onehot_encoding(torch.tensor([0,2]), 4)
targets_LSR = label_smoothing(targets_onehot, 0.1, 4)
'''结果:
labels:tensor([0,2])
targets_onehot:tensor([[1., 0., 0., 0.],
[0., 0., 1., 0.]])
targets_LSR: tensor([[0.9250, 0.0250, 0.0250, 0.0250],
[0.0250, 0.0250, 0.9250, 0.0250]])
'''
优化算法寻找目标函数最小值的过程就像使用一个小球在一个超平面滚来滚去最终滚到最低点的过程,注意是寻找最小值而不是接近0的值
(寻找函数最小值实例)
#(1)可以指定特定的优化器,如SGD、Adam
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
#(2)对于一个优化器而言,可以给特定层的参数指定特定的学习速率、权重衰减等
#如下:除了classifier层的学习率是1e-3,其余层的学习率是1e-2,所有层的momentum是0.9
optimizer = optim.SGD([{'params': model.base.parameters()},
{'params': model.classifier.parameters(), 'lr': 1e-3}],
lr=1e-2, momentum=0.9)
for input, target in dataset:
optimizer.zero_grad()
output = model(input), loss = loss_fn(output, target)
loss.backward()
optimizer.step()
for input, target in dataset:
def closure():
optimizer.zero_grad()
output = model(input), loss = loss_fn(output, target)
loss.backward()
return loss
optimizer.step(closure)
# 调节学习率的模型训练模板,scheduler.step()应该在optimizer.step()后面
scheduler = ...
for epoch in range(100):
train(...)
validate(...)
scheduler.step()
#手动调整实例:
def adjust_learning_rate(optimizer, epoch):
if epoch < 80:lr = 0.1
elif epoch < 120:lr = 0.01
else:lr = 0.001
for param_group in optimizer.param_groups:
param_group['lr'] = lr
optimizer = torch.optim.SGD(net.parameters(),lr=0.1,momentum=0.9,weight_decay=5e-4)
for epoch in range(20):
adjust_learning_rate(optimizer, epoch)
train(...)
validate(...)
scheduler.step()
官方学习率调整函数能一个接一个地应用在前一个调整函数所获得的学习速率上,相当于组合调整
,如:model = [Parameter(torch.randn(2, 2, requires_grad=True))]
optimizer = SGD(model, 0.1)
scheduler1 = ExponentialLR(optimizer, gamma=0.9)
scheduler2 = MultiStepLR(optimizer, milestones=[30,80], gamma=0.1)
for epoch in range(20):
for input, target in dataset:
optimizer.zero_grad()
output = model(input)
loss = loss_fn(output, target)
loss.backward()
optimizer.step()
scheduler1.step() #lr1=lr*0.9**epoch
scheduler2.step() #在第30和80个epoch时,在上面基础上:lr2=lr1*0.9
https://pytorch.org/tutorials ↩︎
https://blog.csdn.net/weixin_43485035/article/details/119062538 ↩︎
https://zhuanlan.zhihu.com/p/48976706 ↩︎
https://www.jianshu.com/p/00ed9abc5555 ↩︎
https://blog.csdn.net/bitcarmanlee/article/details/78819025 ↩︎
https://blog.csdn.net/qq_38410428/article/details/101102075 ↩︎
https://blog.csdn.net/qq_38863413/article/details/104118055 ↩︎
https://blog.csdn.net/qq_27825451/article/details/90550890 ↩︎