import os
import json
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
import torchvision
from torchvision import models
from torch.utils.data import Dataset
from torchvision import transforms
from torch.utils.data import DataLoader
import visdom
# from tensorboardX import SummaryWriter
from torch.utils.tensorboard import SummaryWriter
另外一种解决办法是应用迁移学习(transfer learning),将从源数据集学到的知识迁移到目标数据集上。
You can construct a model with random weights by calling its constructor:
resnet18 = models.resnet18()
We provide pre-trained models. hese can be constructed by passing pretrained=True
resnet18 = models.resnet18(pretrained=True)
Instancing a pre-trained model will download its weights to a cache directory. This directory can be set using the TORCH_MODEL_ZOO
environment variable. See torch.utils.model_zoo.load_url()
for details.
Some models use modules which have different training and evaluation behavior, such as batch normalization. To switch between these modes, use model.train()
or model.eval()
as appropriate.
All pre-trained models expect input images normalized in the same way, i.e. mini-batches of 3-channel RGB images of shape (3 x H x W), where H and W are expected to be at least 224
. The images have to be loaded in to a range of [0, 1] and then normalized using mean = [0.485, 0.456, 0.406]
and std = [0.229, 0.224, 0.225]
关于为什么标准化使用mean = [0.485, 0.456, 0.406]
and std = [0.229, 0.224, 0.225]
The origin of the mean=[0.485, 0.456, 0.406]
and std=[0.229, 0.224, 0.225]
we use for the normalization transforms on almost every model is only partially known. We know that they were calculated them on a random subset of the train
split of the ImageNet2012
dataset. Which images were used or even the sample size as well as the used transformation are unfortunately lost.
I’ve tried to reproduce them and found that we probably resized each image to 256
and center cropped it to 224
The process for obtaining the values of mean and std is roughly equivalent to:
import torch
from torchvision import datasets, transforms as T
transform = T.Compose([T.Resize(256), T.CenterCrop(224), T.ToTensor()])
dataset = datasets.ImageNet(".", split="train", transform=transform)
means = []
stds = []
for img in subset(dataset):
mean = torch.mean(torch.tensor(means))
std = torch.mean(torch.tensor(stds))
models.alexnet(pretrained=False, progress=True, **kwargs)
, displays a progress bar of the download to stderrmodels.vgg16_bn(pretrained=False, progress=True, **kwargs)
models.resnet18(pretrained=False, progress=True, **kwargs)
models.densenet169(pretrained=False, progress=True, **kwargs)
models.inception_v3(pretrained=False, progress=True, **kwargs)
Important: In contrast to the other models the inception_v3 expects tensors with a size of N x 3 x 299 x 299
, so ensure your images are sized accordingly.
models.googlenet(pretrained=False, progress=True, **kwargs)
class HotdogData(Dataset):
def __init__(self, img_path, transforms=None):
# 初始化,读取数据集
self.transforms = transforms
self.img_path = img_path
self.pos_dir = img_path + '/hotdog'
self.neg_dir = img_path + '/not-hotdog'
self.pos_num = len(os.listdir(self.pos_dir))
self.neg_num = len(os.listdir(self.neg_dir))
def __len__(self):
return self.pos_num + self.neg_num
def __getitem__(self, index):
if index < self.pos_num: # 获取正样本
label = 1
img = Image.open(self.pos_dir + '/' + str(index if self.img_path[-5:] == 'train' else index + 1000) + '.png')
else: # 获取负样本
label = 0
img = Image.open(self.neg_dir + '/' + str((index - self.pos_num) if self.img_path[-5:] == 'train' else index - self.pos_num + 1000) + '.png')
if self.transforms:
img = self.transforms(img)
return img, label
train_transform = transforms.Compose([
transforms.RandomResizedCrop(size=224, scale=(0.8, 1.0)), # 将图像随意裁剪,宽高均为224
transforms.RandomHorizontalFlip(), # 以0.5的概率左右翻转图像
# transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0),
transforms.RandomRotation(degrees=5, expand=False, fill=None),
transforms.ToTensor(), # 将PIL图像转为Tensor,并且进行归一化
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # 标准化
test_transform = transforms.Compose([
transforms.ToTensor(), # 将PIL图像转为Tensor,并且进行归一化
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # 标准化
train_data = HotdogData('D:/Download/Dataset/hotdog/train', transforms=train_transform)
trainloader = DataLoader(train_data, batch_size=64, shuffle=True)
test_data = HotdogData('D:/Download/Dataset/hotdog/test', transforms=test_transform)
testloader = DataLoader(test_data, batch_size=64, shuffle=True)
# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next() # 选取十张
images = images[:10]
labels = labels[:10]
# show images
vis = visdom.Visdom(env='hotdog')
# images = images / 2 + 0.5 # unnormalize
vis.images(images, nrow=5, opts=dict(title='hotdog'))
# print labels
print(' '.join('%d' % label for label in labels))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = models.resnet18(pretrained=True, progress=True)
net = net.to(device)
# 可视化网络结构
dummy_input = torch.rand(13, 3, 224, 224)
with SummaryWriter('runs/exp-1') as w:
w.add_graph(net, (dummy_input,))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = models.resnet18(pretrained=True, progress=True)
net = net.to(device)
# 全连接层的输入通道in_channels个数
num_fc_in = net.fc.in_features
# 改变全连接层,2分类问题,out_features = 2
net.fc = nn.Linear(num_fc_in, 2)
criterion = nn.CrossEntropyLoss()
lr = 0.001 / 10
fc_params = list(map(id, net.fc.parameters())) # 取得全连接层的参数内存地址的列表
base_params = filter(lambda p: id(p) not in fc_params, net.parameters()) # 取得其他层参数的列表
optimizer = optim.Adam([
{'params': base_params},
{'params': net.fc.parameters(), 'lr': lr * 10}],
lr=lr, betas=(0.9, 0.999))
epoch_num = 2
evaluate_batch_num = 5
for epoch in range(epoch_num): # loop over the dataset multiple times
running_loss = 0.0
epoch_loss = 0.0
for i, data in enumerate(trainloader):
# get the inputs
inputs, labels = data
# zero the parameter gradients
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
# print statistics
running_loss += loss.item()
epoch_loss += loss.item()
if i % evaluate_batch_num == evaluate_batch_num - 1: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch, i + 1, running_loss / evaluate_batch_num))
with SummaryWriter('runs/exp-1') as w:
w.add_scalar('TrainLoss/epoch' + str(epoch), running_loss / evaluate_batch_num, i // evaluate_batch_num)
running_loss = 0.0
with SummaryWriter('runs/exp-1') as w:
w.add_scalar('TrainLoss/all', epoch_loss / len(trainloader), epoch)
epoch_loss = 0.0
print('Finished Training')
[0, 5] loss: 0.536
[0, 10] loss: 0.225
[0, 15] loss: 0.208
[0, 20] loss: 0.132
[0, 25] loss: 0.158
[0, 30] loss: 0.249
[1, 5] loss: 0.084
[1, 10] loss: 0.093
[1, 15] loss: 0.100
[1, 20] loss: 0.101
[1, 25] loss: 0.084
[1, 30] loss: 0.089
Finished Training
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the test images: %d %%' % (
100 * correct / total))
Accuracy of the network on the test images: 93 %