首先将训练完成的pytorch模型进行转换,转换为TorchScript格式模型.
转换程序如下:
from model.model import parsingNet
from utils.common import merge_config
from utils.dist_utils import dist_print
import torch
if __name__ == "__main__":
torch.backends.cudnn.benchmark = True
args, cfg = merge_config()
dist_print('start testing...')
assert cfg.backbone in ['18', '34', '50', '101', '152', '50next', '101next', '50wide', '101wide']
if cfg.dataset == 'CULane':
cls_num_per_lane = 18
elif cfg.dataset == 'Tusimple':
cls_num_per_lane = 56
else:
raise NotImplementedError
net = parsingNet(pretrained=False, backbone=cfg.backbone, cls_dim=(cfg.griding_num + 1, cls_num_per_lane, 4),
use_aux=False).cuda() # we dont need auxiliary segmentation in testing
state_dict = torch.load(cfg.test_model, map_location='cpu')['model']
compatible_state_dict = {}
for k, v in state_dict.items():
if 'module.' in k:
compatible_state_dict[k[7:]] = v
else:
compatible_state_dict[k] = v
net.load_state_dict(compatible_state_dict, strict=False)
net.eval()
# Export the model
trace_model = torch.jit.trace(net, torch.rand(1, 3, 288, 800).cuda())
print(trace_model.code)
output = trace_model(torch.ones(1, 3, 288, 800).cuda())
print(output)
trace_model.save('./model_099.pt')
1.添加libtorch环境
find_package(Torch REQUIRED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
if(NOT Torch_FOUND)
message(FATAL_ERROR "Pythorch not found")
endif(NOT Torch_FOUND)
include_directories(/home/hw/Tools/libtorch/include/torch/csrc/api/include)
2.C++ 程序实现
作者push的代码中有一份C++版本程序,但是程序有些问题,基于那个程序进行一些修改:
#include
#include
#include
#include
#include
#include
#include "opencv2/aruco.hpp"
#include "opencv2/aruco/dictionary.hpp"
#include "opencv2/aruco/charuco.hpp"
#include "opencv2/calib3d.hpp"
#include "opencv2/videoio.hpp"
#include
#include
using namespace std;
using namespace cv;
using namespace torch::indexing;
std::vector linspace(double start_in, double end_in, int num_in)
{
std::vector linspaced;
double start = static_cast(start_in);
double end = static_cast(end_in);
double num = static_cast(num_in);
if (num == 0)
{
return linspaced;
}
if (num == 1)
{
linspaced.push_back(start);
return linspaced;
}
double delta = (end - start) / (num - 1);
for (int i = 0; i < num - 1; ++i)
{
linspaced.push_back(start + delta * i);
}
linspaced.push_back(end); // I want to ensure that start and end
// are exactly the same as the input
return linspaced;
}
std::vector arrange(int num)
{
std::vector result;
for (int i = 1; i < num; i++)
{
result.push_back(i);
}
return result;
}
std::string GetNowTime()
{
time_t timep;
time(&timep);
char tmp[64];
strftime(tmp, sizeof(tmp), "%Y-%m-%d-%H-%M-%S", localtime(&timep));
return tmp;
}
torch::jit::script::Module module_;
int tusimpleGriding_num = 100;
std::vector linSpaceVector = linspace(0, 800 - 1, tusimpleGriding_num);
double linSpace = linSpaceVector[1] - linSpaceVector[0];
int img_w = 1280;
int img_h = 720;
int tusimple_row_anchor[] = {64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112,
116, 120, 124, 128, 132, 136, 140, 144, 148, 152, 156, 160, 164,
168, 172, 176, 180, 184, 188, 192, 196, 200, 204, 208, 212, 216,
220, 224, 228, 232, 236, 240, 244, 248, 252, 256, 260, 264, 268,
272, 276, 280, 284};
Mat RunLaneDetection(Mat frame)
{
cv::Mat img, img_float;
cv::cvtColor(frame, img, cv::COLOR_BGR2RGB);
cv::Size scale(800, 288);
cv::resize(img, img, scale, 0, 0, cv::INTER_LINEAR);
img.convertTo(img_float, CV_32FC3, 1.0f / 255.0f);
auto tensor_img = torch::from_blob(img_float.data, {1, img_float.rows, img_float.cols, img_float.channels()}); // cv Mat change to tensor
tensor_img = tensor_img.permute({0, 3, 1, 2}).contiguous(); // BHWC -> BCHW (Batch, Channel, Height, Width)
tensor_img[0][0] = tensor_img[0][0].sub_(0.485).div_(0.229); // transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
tensor_img[0][1] = tensor_img[0][1].sub_(0.456).div_(0.224);
tensor_img[0][2] = tensor_img[0][2].sub_(0.406).div_(0.225);
tensor_img = tensor_img.to(torch::kCUDA);
torch::jit::IValue output = module_.forward({tensor_img});
torch::Tensor outputTensor = output.toTensor();
outputTensor = outputTensor.squeeze(0); // CUDAHalfType{1,101,56,4} change to CUDAHalfType{101,56,4}
outputTensor = outputTensor.flip(1); // Flip 将行数据倒序
torch::Tensor prob = outputTensor.index({Slice(None, -1), Slice(None), Slice(None)}).softmax(0); // Calculate SoftMax
std::vector idx = arrange(tusimpleGriding_num + 1); // Calculate idx
torch::Tensor c = torch::arange(100) + 1;
auto arrange_idx = torch::reshape(c, {tusimpleGriding_num, 1, 1}).to(torch::kCUDA);
auto mult = prob * arrange_idx;
auto loc = mult.sum(0); //dim=0,对列求和;dim=1,对行求和
outputTensor = outputTensor.argmax(0);
for (int i = 0; i < outputTensor.size(1); i++)
{
for (int j = 0; j < outputTensor.size(0); j++)
{
if (outputTensor[j][i].item() == tusimpleGriding_num)
{
loc[j][i] = 0;
}
}
}
for (int i = 0; i < loc.size(1); i++)
{
for (int k = 0; k < loc.size(0); k++)
{
if (loc[k][i].item() > 0)
{
long widht = int(loc[k][i].item() * double(linSpace * img_w) / 800) - 1;
long height = int(img_h * (double(tusimple_row_anchor[56 - 1 - k]) / 288)) - 1;
circle(frame, Point(widht, height), 5, Scalar(0, 255, 0), -1);
}
}
}
return frame;
}
void RunVideo()
{
VideoCapture cap("~/project/LaneDetection/datasets/video/MAH00146.mp4");
std::cout << "Prepare to load" << std::endl;
Mat frame;
while (true)
{
cap.read(frame); // read a new frame from video
if (frame.cols > 0 && frame.rows > 0)
{
cv::imshow("", RunLaneDetection(frame));
}
if (waitKey(10) >= 0)
break;
}
}
int main()
{
// Load JIT
try
{
module_ = torch::jit::load("~/project/LaneDetection/lane_det/src/lane_detection/src/laneatt/model_099_1.pt");
}
catch (const std::exception &e)
{
std::cerr << "error loading the model\n";
std::cerr << e.what() << '\n';
}
module_.to(torch::kCUDA);
module_.eval();
std::cout << "load model succees" << std::endl;
RunVideo();
cv::destroyAllWindows();
return 0;
}
1.输入图像预处理
在测试过程中发现,使用tensor变量测试Pytorch模型和libTorch模型,输出结果一致,但是使用同一张图像进行预测,发现输出结果不一致.
参考这篇文章可以解决问题,主要是在图片处理过程需要注意.
tensor_img[0][0] = tensor_img[0][0].sub_(0.485).div_(0.229);
tensor_img[0][1] = tensor_img[0][1].sub_(0.456).div_(0.224);
tensor_img[0][2] = tensor_img[0][2].sub_(0.406).div_(0.225);
2.输出结果处理
项目代码中C++程序对结果的处理有问题,需要仔细分析python代码后,进行对程序进行调整
out_j = trt_outputs[0].reshape((101, 56, 4))
# 将一维数组转为 101(w+1)* 56(sample_rows) * 4(lanes num) 的tensor数据
out_j = out_j[:, ::-1, :]
# 将 (56, 4)维度数据行数据进行倒序
prob = scipy.special.softmax(out_j[:-1, :, :], axis=0)
# 去掉第101组数据后计算softmax shape (100, 56, 4)
idx = np.arange(100) + 1
# 创建1-100步长为1的数据
idx = idx.reshape(-1, 1, 1)
# 将idx重构为100*1*1的数据
loc = np.sum(prob * idx, axis=0)
# 三维数组相乘 prob * idx (100,56,4)*(100,1,1) = (100, 56, 4),第1个(56,4)*1,第2个(56,4)*2.....,第100个(56,4)*100shape (56, 4)
# 进行列方向求和 shape (56, 4)
out_j = np.argmax(out_j, axis=0)
# np.argmax 返回最大值的索引,axis=0 列方向比较,101组(56,4)数据101个对应列值中最大值索引 shape (56,4) 返回最大值索引
loc[out_j == 100] = 0
# 将loc矩阵中按照out_j索引,值为100的位置的数据赋值为0,即提取出out_j中最大值对应位置的softmax值 loc shape (56, 4)
out_j = loc
# out_j[k,i]实际代表车道线在W方向上0-100等分的位置,k代表在row_anchor的索引
for i in range(out_j.shape[1]): # out_j.shape[1] = 4
if np.sum(out_j[:, i] != 0) > 2: # 列方向求和
for k in range(out_j.shape[0]): # out_j.shape[0] = 56
if out_j[k, i] > 0: # 取 out_j第k列第i行值
ppp = (int(out_j[k, i] * col_sample_w * img_w / 800) - 1, # 将宽度800等分为100个,out_j[k,i]为第out_j[k,i]个,乘等分的平均值,再转到1280宽度上
int(img_h * (row_anchor[cls_num_per_lane - 1 - k] / 288)) - 1) #图片H方向上标注了56个row_anchor,定位第k行对应的H坐标值,再转到720高度上
cv2.circle(frame, ppp, 5, (0, 255, 0), -1)