模型权重保存
导入必要模型
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
import os
import copy
导入数据
base_dir = r"./dataset/4Weather"
train_dir = os.path.join(base_dir, "train")
test_dir = os.path.join(base_dir, "test")
训练和测试加载
transform = transforms.Compose([
transforms.Resize((96, 96)),
transforms.ToTensor(),
transforms.Normalize(
mean=[.5, .5, .5],
std=[.5, .5, .5]
)
])
train_ds = torchvision.datasets.ImageFolder(
train_dir,
transform=transform
)
test_ds = torchvision.datasets.ImageFolder(
test_dir,
transform=transform
)
BATCH_SIZE = 32
train_dl = DataLoader(
train_ds,
batch_size=BATCH_SIZE,
shuffle=True
)
test_dl = DataLoader(
test_ds,
batch_size=BATCH_SIZE,
)
定义网络模型类
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 16, 3)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(16, 32, 3)
self.conv3 = nn.Conv2d(32, 64, 3)
self.fc1 = nn.Linear(64 * 10 * 10, 1024)
self.fc2 = nn.Linear(1024, 256)
self.fc3 = nn.Linear(256, 4)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = self.pool(F.relu(self.conv3(x)))
x = x.view(-1, 64 * 10 * 10)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
定义损失函数和优化器
model = Net()
if torch.cuda.is_available():
model.to("cuda")
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
定义训练和测试函数
def fit(epoch, model, trainloader, testloader):
correct = 0
total = 0
running_loss = 0
model.train()
for x, y in trainloader:
if torch.cuda.is_available():
x, y = x.to("cuda"), y.to("cuda")
y_pred = model(x)
loss = loss_fn(y_pred, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
with torch.no_grad():
y_pred = torch.argmax(y_pred, dim=1)
correct += (y_pred == y).sum().item()
total += y.size(0)
running_loss += loss.item()
epoch_loss = running_loss / len(trainloader.dataset)
epoch_acc = correct / total
test_correct = 0
test_total = 0
test_running_loss = 0
model.eval()
with torch.no_grad():
for x, y in testloader:
if torch.cuda.is_available():
x, y = x.to("cuda"), y.to("cuda")
y_pred = model(x)
loss = loss_fn(y_pred, y)
y_pred = torch.argmax(y_pred, dim=1)
test_correct += (y_pred == y).sum().item()
test_total += y.size(0)
test_running_loss += loss.item()
epoch_test_loss = test_running_loss / len(testloader.dataset)
epoch_test_acc = test_correct / test_total
print("epoch:", epoch,
"loss:", round(epoch_loss, 3),
"accuracy:", round(epoch_acc, 3),
"test_loss:", round(epoch_test_loss, 3),
"test_accuracy:", round(epoch_test_acc, 3)
)
return epoch_loss, epoch_acc, epoch_test_loss, epoch_test_acc
开启训练
epochs = 100
train_loss = []
train_acc = []
test_loss = []
test_acc = []
for epoch in range(epochs):
epoch_loss, epoch_acc, epoch_test_loss, epoch_test_acc = fit(epoch, model, train_dl, test_dl)
train_loss.append(epoch_loss)
train_acc.append(epoch_acc)
test_loss.append(epoch_test_loss)
test_acc.append(epoch_test_acc)
plt.plot(range(1, epochs + 1), train_loss, label='train_loss')
plt.plot(range(1, epochs + 1), test_loss, label='test_loss')
plt.legend()
模型保存
"""
保存模型
staic_dict 就是一个简单的Python字典,它将模型中的可训练参数(比如:weights和biases, batchnorm的running_mean、
torch.optim参数等)通过将模型每层与层的参数张量之间一一映射,实现保存、更新、变化和再存储。
"""
PATH = "./my_net.pth"
torch.save(model.state_dict(), PATH)
加载模型
new_model = Net()
new_model.load_state_dict(torch.load(PATH))
new_model.to("cuda")
测试
test_correct = 0
test_total = 0
new_model.eval()
with torch.no_grad():
for x, y in test_dl:
if torch.cuda.is_available():
x, y = x.to("cuda"), y.to("cuda")
y_pred = new_model(x)
y_pred = torch.argmax(y_pred, dim=1)
test_correct += (y_pred==y).sum().item()
test_total += y.size(0)
epoch_test_acc = test_correct / test_total
print(epoch_test_acc)
训练函数保存最优参数
model = Net()
if torch.cuda.is_available():
model.to("cuda")
optim = torch.optim.Adam(model.parameters(), lr=0.001)
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
train_loss = []
train_acc = []
test_loss = []
test_acc = []
for epoch in range(epochs):
epoch_loss, epoch_acc, epoch_test_loss, epoch_test_acc = fit(epoch, model, train_dl, test_dl)
train_loss.append(epoch_loss)
train_acc.append(epoch_acc)
test_loss.append(epoch_test_loss)
test_acc.append(epoch_test_acc)
if epoch_test_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
model.load_state_dict(best_model_wts)
model.eval()
完整模型的保存和加载
PATH = r"./my_whole_model.pth"
torch.save(model, PATH)
new_model2 = torch.load(PATH)
new_model2.eval()
PATH = "./my_gpu_model_wts"
torch.save(model.state_dict(), PATH)
device = torch.device("cpu")
model = Net()
model.load_state_dict(torch.load(PATH, map_location=device))
PATH = r"./my_gpu_model2_wts"
torch.save(model.state_dict(), PATH)
device = Net()
model.load_state_dict(torch.load(PATH))
model.to(device)