import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
(1)torchvision 是PyTorch中专门用来处理图像的库。这个包中有四个大类。
torchvision.datasets,torchvision.models,torchvision.transforms,torchvision.utils
(2)dataset,这个就是PyTorch已有的数据读取接口(比如torchvision.datasets.ImageFolder)或者自定义的数据接口的输)transforms模块提供了一般的图像转换操作类。
(3)torchvision.transforms这个包中包含resize、crop等常见的data augmentation操作,基本上PyTorch中的data augmentation操作都可以通过该接口实现。
# Device configuration 判断能否使用cuda加速
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# Hyper parameters
num_epochs = 5 # 循环次数
num_classes = 10 # 分类
batch_size = 100 # 每次投喂数据量
learning_rate = 0.001 # 学习率
# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='data/', #MNIST所在的文件夹,存在MNIST/processed/training.pt和MNIST/processed/test.pt的数据集的根目录
train=True, #如果为True,则从training.pt创建数据集,否则从test.pt创建数据集。
transform=transforms.ToTensor(), #将PIL图片或者numpy.ndarray转成Tensor类型的
#将PIL图片或者numpy.ndarray(HxWxC) (范围在0-255) 转成torch.FloatTensor (CxHxW) (范围为0.0-1.0)
download=True) #用于指定是否需要下载
test_dataset = torchvision.datasets.MNIST(root='data/',
train=False,
transform=transforms.ToTensor())
# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, #dataset,这个就是PyTorch已有的数据读取接口(比如torchvision.datasets.ImageFolder)或者自定义的数据接口的输出
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
(1)torchvision.datasets 是用来进行数据加载的,PyTorch团队在这个包中帮我们提前处理好了很多很多图片数据集。
class ConvNet(nn.Module):
def __init__(self, num_classes=10):
super(ConvNet, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2))
self.layer2 = nn.Sequential(
nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2))
self.fc = nn.Linear(7*7*32, num_classes)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.reshape(out.size(0), -1)
out = self.fc(out)
return out
model = ConvNet(num_classes).to(device)
# loss and optimizer
criterion = nn.CrossEntropyLoss() #交叉熵损失的计算
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)
(1)在定义的model/Net后面加上.to(device) ,这样这些方法将递归遍历所有模块,并将其参数和缓冲区转为CUDA tensors。
total_step = len(train_loader)
for epoch in range(num_epochs):
for i,(images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs,labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i + 1) % 100 == 0:
print('Epoch[{}/{}],Step[{}/{}],Loss:{:.4f}'
.format(epoch + 1,num_epochs,i + 1 ,total_step,loss.item()))
(1)enumerate()用于可迭代、可遍历的数据对象组合为一个索引序列,同时列出数据和数据下标。
model.eval() # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/varance)
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data,1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Test Accuracy of the model on the 10000 test images:{}%'.format(100*correct / total))
# Save the model checkpoint
torch.save(model.state_dict(),'model.ckpt')
(1)model.train()和model.eval()
train():启用 BatchNormalization 和 Dropout
eval():不启用 BatchNormalization 和 Dropout,保证BN和dropout不发生变化,pytorch框架会自动把BN和Dropout固定住,不会取平均,而是用训练好的值,不然的话,一旦test的batch_size过小,很容易就会被BN层影响结果。
(2)with是python中上下文管理器,简单理解,当要进行固定的进入,返回操作时,可以将对应需要的操作,放在with所需要的语句中。
(3)关于with torch.no_grad():
在使用pytorch时,并不是所有的操作都需要进行计算图的生成(计算过程的构建,以便梯度反向传播等操作)。而对于tensor的计算操作,默认是要进行计算图的构建的,在这种情况下,可以使用 with torch.no_grad():,强制之后的内容不进行计算图构建。
(4)torch.max(k,1)
其中这个 1代表行,0的话代表列。不加_,返回的是一行中最大的数。加_,则返回一行中最大数的位置。
(5)model.state_dict()仅保存和加载模型参数(推荐使用,需要提前手动构建模型)速度快,占空间少
import torch
import torch.nn as nn
import torchvision
import matplotlib.pyplot as plt
from PIL import Image
from torchvision import transforms
import numpy as np
class ConvNet(nn.Module):
def __init__(self, num_classes=10):
super(ConvNet, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2))
self.layer2 = nn.Sequential(
nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2))
self.fc = nn.Linear(7 * 7 * 32, num_classes)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.reshape(out.size(0), -1)
out = self.fc(out)
return out
def test_mydata():
# 调整图片大小
im = plt.imread('new8.jpg') # 读入图片
images = Image.open('new8.jpg') # 将图片存储到images里面
images = images.resize((28,28)) # 调整图片的大小为28*28
images = images.convert('L') # 灰度化
transform = transforms.ToTensor()
images = transform(images)
images = images.resize(1,1,28,28)
# 加载网络和参数
model = ConvNet()
model.load_state_dict(torch.load('model.ckpt'))
model.eval()
outputs = model(images)
values, indices = outputs.data.max(1) # 返回最大概率值和下标
plt.title('{}'.format((int(indices[0]))))
plt.imshow(im)
plt.show()
test_mydata()
import cv2 as cv
src = cv.imread('8.jpg')
gray = cv.cvtColor(src, cv.COLOR_BGR2GRAY) # 灰度化
ret, binary = cv.threshold(gray, 0, 255,cv.THRESH_BINARY_INV|cv.THRESH_OTSU) # 阈值化
cv.imwrite('new8.jpg', binary)