—以用LeNet5训练MNIST为例。所有源码在[github],直接可以运行的项目(下载的数据集、转换成的图片、生成的模型、libtorch-1.2等)在[百度网盘],提取码:nrmo。
Pytorch_LeNet5_MNIST/prepareData/down_mnist_dataset.py
import os
import torchvision
DOWNLOAD_MNIST = False
if not(os.path.exists('./mnist/')) or not os.listdir('./mnist/'):
DOWNLOAD_MNIST = True
train_data = torchvision.datasets.MNIST(
root='./mnist/',
train=True,
transform=torchvision.transforms.ToTensor(),
download=DOWNLOAD_MNIST,
)
在该python文件位置执行该文件,如果当前位置没有mnist文件夹或有一个空的mnist文件夹,就创建 mnist 文件夹并下载、解压MNIST数据集。
下载的二进制数据集方便快速读取数据。但为了看MNIST数据集的图像形式,同时为了展示自定义数据集的方法以便训练自己的数据,这里把数据集转换成图像形式。
Pytorch_LeNet5_MNIST/prepareData/down_mnist_dataset.py
import os
from skimage import io
import torchvision.datasets.mnist as mnist
import numpy
readFrom = 'mnist/MNIST/raw/'
writeTo = '../mnistImgs/'
if(not os.path.exists(writeTo)):
os.makedirs(writeTo)
train_set = (
mnist.read_image_file(os.path.join(readFrom, 'train-images-idx3-ubyte')),
mnist.read_label_file(os.path.join(readFrom, 'train-labels-idx1-ubyte'))
)
test_set = (
mnist.read_image_file(os.path.join(readFrom,'t10k-images-idx3-ubyte')),
mnist.read_label_file(os.path.join(readFrom,'t10k-labels-idx1-ubyte'))
)
print("train set:", train_set[0].size())
print("test set:", test_set[0].size())
def convert_to_img(train=True):
if(train):
f = open(writeTo + 'train.txt', 'w')
data_path = writeTo + 'train/'
if(not os.path.exists(data_path)):
os.makedirs(data_path)
for i, (img, label) in enumerate(zip(train_set[0], train_set[1])):
img_path = data_path + str(i) + '.jpg'
io.imsave(img_path, img.numpy())
f.write('train/' + str(i) + '.jpg ' + str(int(label)) + '\n')
f.close()
else:
f = open(writeTo + 'test.txt', 'w')
data_path = writeTo + 'test/'
if (not os.path.exists(data_path)):
os.makedirs(data_path)
for i, (img, label) in enumerate(zip(test_set[0], test_set[1])):
img_path = data_path + str(i) + '.jpg'
io.imsave(img_path, img.numpy())
f.write('test/' + str(i) + '.jpg ' + str(int(label)) + '\n')
f.close()
if __name__ == '__main__':
convert_to_img(True)
convert_to_img(False)
该文件会从readFrom文件夹读取数据并把转换后的结果写到writeTo文件夹。
Pytorch_LeNet5_MNIST/train/LeNet5.py
import torch.nn as nn
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(1, 6, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(6, 16, 5),
nn.ReLU(),
nn.MaxPool2d(2, 2)
)
self.fc1 = nn.Sequential(
nn.Linear(16 * 5 * 5, 120),
nn.ReLU()
)
self.fc2 = nn.Sequential(
nn.Linear(120, 84),
nn.ReLU()
)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size()[0], -1)
x = self.fc1(x)
x = self.fc2(x)
x = self.fc3(x)
return x
Pytorch_LeNet5_MNIST/train/dataLoader.py
from PIL import Image
from torch.utils.data import Dataset
def default_loader(path):
return Image.open(path).convert('RGB')
class MyDataset(Dataset):
def __init__(self, root, txt, transform=None, target_transform=None, loader=default_loader):
fh = open(root+txt, 'r')
imgs = []
for line in fh:
line = line.strip('\n')
line = line.rstrip()
words = line.split()
imgs.append((root + words[0], int(words[1])))
self.imgs = imgs
self.transform = transform
self.target_transform = target_transform
self.loader = loader
def __getitem__(self, index):
fn, label = self.imgs[index]
img = self.loader(fn)
if self.transform is not None:
img = self.transform(img)
return img, label
def __len__(self):
return len(self.imgs)
首先定义这样一个类,这三个成员函数名是固定的,根据自己的需要定义这三个函数。该类将作为加载数据的一个参数,见Pytorch_LeNet5_MNIST/train/train.py的mian函数的第3至6行。
Pytorch_LeNet5_MNIST/train/train.py
import torch
from torch.autograd import Variable
from dataLoader import MyDataset
from torchvision import transforms
from torch.utils.data import DataLoader
from LeNet5 import LeNet
mytransform = transforms.Compose([
# transforms.Resize(224),
# transforms.CenterCrop(224),
transforms.Grayscale(num_output_channels=1),
transforms.ToTensor(),
# transforms.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5]),
# transforms.Normalize((0.1307,), (0.3081,))
])
def train(epoch):
log_interval = 100
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data = data.to(device)
target = target.to(device)
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = model(data)
loss = torch.nn.functional.cross_entropy(output, target)
loss.backward()
optimizer.step()
if batch_idx % log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.
format(epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test():
model.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
data = data.to(device)
target = target.to(device)
data, target = Variable(data), Variable(target)
output = model(data)
test_loss += torch.nn.functional.cross_entropy(output, target, size_average=False).item()
pred = output.data.max(1, keepdim=True)[1]
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.
format(test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
if __name__ == '__main__':
root = '../mnistImgs/'
train_data = MyDataset(root, txt='train.txt', transform=mytransform)
test_data = MyDataset(root, txt='test.txt', transform=mytransform)
train_loader = DataLoader(dataset=train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=64)
model = LeNet()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
epochs = 10
for epoch in range(1, epochs+1):
train(epoch)
test()
torch.save(model, 'LeNet5_MNIST_parameter_and_model.pth')
torch.save(model.state_dict(), 'LeNet5_MNIST_parameter.pth')
在main函数的第3、4行创建了我们自定义的数据读取方法,root和txt分别是图片路径和存放图片路径及标签的文件名,transform是对图片的操作,如裁剪、归一化等。
第5、6行的DataLoader是Pytorch的数据加载器,我们只要把3、4行的自定义数据读取方法作为它的参数就可以按照我们的方式读取数据。
最后我们保存了训练过程:第一种方式保存了网络结构和参数;第二种方式只保存了参数。
Pytorch_LeNet5_MNIST/train/predict.py
import torch
import cv2
from torch.autograd import Variable
import numpy as np
if __name__ == '__main__':
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = torch.load('LeNet5_MNIST_parameter_and_model.pth')
model = model.to(device)
model.eval()
img = cv2.imread("digits/a8.jpg")
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = np.array(img).astype(np.float32)
img = np.expand_dims(img, 0)
img = np.expand_dims(img, 0)
img = torch.from_numpy(img)
img = img.to(device)
output = model(Variable(img))
prob = torch.nn.functional.softmax(output, dim=1)
prob = Variable(prob)
prob = prob.cpu().numpy()
#print(prob)
pred = np.argmax(prob)
print(pred.item())
在train.py中保存的结果可以在predict.py中加载并预测。但这些结果还是不能离开定义网络模型的Python文件,更不能被C++ 调用,所以要序列化。
Pytorch_LeNet5_MNIST/torchScript/script/LeNet5.py
import torch.nn as nn
import torch
class LeNet(torch.jit.ScriptModule):
def __init__(self):
super(LeNet, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(1, 6, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(6, 16, 5),
nn.ReLU(),
nn.MaxPool2d(2, 2)
)
self.fc1 = nn.Sequential(
nn.Linear(16 * 5 * 5, 120),
nn.ReLU()
)
self.fc2 = nn.Sequential(
nn.Linear(120, 84),
nn.ReLU()
)
self.fc3 = nn.Linear(84, 10)
@torch.jit.script_method
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size()[0], -1)
x = self.fc1(x)
x = self.fc2(x)
x = self.fc3(x)
return x
my_script_module = LeNet()
my_script_module.load_state_dict(torch.load('../../train/LeNet5_MNIST_parameter.pth'))
"""
for parameters in my_script_module.parameters():
print(parameters)
"""
my_script_module.save("script_model.pt")
这种方法让模型继承torch.jit.ScriptModule而不再是torch.nn.Module,且用 @torch.jit.script_method修饰forward函数。然后创建网络、加载参数、保存序列化结果。
Pytorch_LeNet5_MNIST/torchScript/trace/LeNet5.py
# 同 Pytorch_LeNet5_MNIST/train/LeNet5.py
Pytorch_LeNet5_MNIST/torchScript/trace/trace.py
import torch
from LeNet5 import LeNet
model = LeNet()
model.cuda()
model.load_state_dict(torch.load('../../train/LeNet5_MNIST_parameter.pth'))
"""
for parameters in model.parameters():
print(parameters)
import cv2
from torch.autograd import Variable
import numpy as np
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
model.eval()
img = cv2.imread("a8.jpg")
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = np.array(img).astype(np.float32)
img = np.expand_dims(img, 0)
img = np.expand_dims(img, 0)
img = torch.from_numpy(img)
img = img.to(device)
output = model(Variable(img))
prob = torch.nn.functional.softmax(output, dim=1)
prob = Variable(prob)
prob = prob.cpu().numpy()
pred = np.argmax(prob)
print(pred.item())
"""
example = torch.rand(64, 1, 28, 28).cuda()
traced_script_module = torch.jit.trace(model, example)
traced_script_module.save("trace_model.pt")
这种方法创建网络并加载参数后,生成一个输入数据example,然后使用torch.jit.trace跟踪,最后保存序列化结果。
上面通过两种方法生成了序列化的模型和参数,下面用C++ 调用模型做预测。
Pytorch_LeNet5_MNIST/cpp/main.cpp
#include
#include
#include
#include
#include
#include
using namespace std;
using namespace cv;
int main(int argc, char** argv) {
//string model_path = "../../torchScript/script/script_model.pt";
string model_path = "../../torchScript/trace/trace_model.pt";
string image_path = "../../train/digits/" + (string)argv[1] + ".jpg";
torch::jit::script::Module module = torch::jit::load(model_path);
module.to(at::kCUDA);
Mat img = imread(image_path);
cvtColor(img, img, CV_BGR2GRAY);
img.convertTo(img, CV_32F);
auto options = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false);
torch::Tensor img_tensor = torch::from_blob(img.data, {1, img.rows, img.cols, 1}, options);
img_tensor = img_tensor.permute({0, 3, 1, 2}).to(torch::kCUDA);
vector<torch::jit::IValue> inputs;
inputs.emplace_back(img_tensor.to(at::kCUDA));
torch::Tensor output = module.forward(inputs).toTensor();
inputs.pop_back();
//cout << output << endl << endl;
tuple<at::Tensor, at::Tensor> result = output.max(1, true);
int max_index = get<1>(result).item<float>();
cout<<max_index<<endl;
}
这里的C++ 共有如下几步:加载模型和参数文件、预处理图像、图像转张量、输入网络获取结果、整理结果并输出。
Pytorch_LeNet5_MNIST/cpp/CMakeLists.txt
cmake_minimum_required(VERSION 2.8)
project(main)
#add_compile_options(-std=c++11)
set(Torch_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../libtorch12/share/cmake/Torch)
set(OpenCV_DIR /home/XXX/software/OpenCV320/share/OpenCV)
find_package(Torch REQUIRED)
find_package(OpenCV REQUIRED)
include_directories(
${OpenCV_INCLUDE_DIRS}
${TORCH_INCLUDE_DIRS}
)
add_executable(main main.cpp)
target_link_libraries(main
${OpenCV_LIBS}
${TORCH_LIBRARIES}
)
上面的第6、7行的set分别指明了libtorch和OpenCV的安装位置。如果它们被安装在系统库中,可以用 # 注释掉这两行。
for(it=file_list.begin(); it!=file_list.end(); it++) {
ori_img = imread(dataRoot + *it);
cvtColor(ori_img, img, CV_BGR2RGB);
img.convertTo(img, CV_32F, 1.0/255);
img = Normalize(img, mean_data);
torch::Tensor img_tensor = torch::from_blob(img.data, {1, img.rows, img.cols, 3}, options);
img_tensor = img_tensor.permute({0, 3, 1, 2}).to(torch::kCUDA);
inputs.emplace_back(img_tensor.to(at::kCUDA));
torch::Tensor output = module.forward(inputs).toTensor().cpu();
inputs.pop_back();
output = output.reshape({img.rows, img.cols});
Mat heatmap = drawHeatmap(output);
output = output.sum()/100.0;
float result = output.item().toFloat();
}
第7行把Mat转换成tensor。第10行得到网络输出。第14行把tensor转成Mat。第16行把tensor转float。
Mat drawHeatmap(const torch::Tensor output) {
float max = output.max().item().toFloat();
float min = output.min().item().toFloat();
torch::Tensor tensor = 255.0 * (output - min) / (max + 1e-20);
tensor = tensor.to(at::ones({1, 1}, torch::kFloat));
Mat im(tensor.size(0), tensor.size(1), CV_32FC1, tensor.data<float>());
im.convertTo(im, CV_8UC1);
applyColorMap(im, im, COLORMAP_JET);
return im;
}
网络输出的tensor类型是Variable类型,第5行把它转换成普通tensor,第7行把tensor转换成Mat。