Ultra fast lane detection 模型转libtorch使用

转TorchScript模型

首先将训练完成的pytorch模型进行转换,转换为TorchScript格式模型.

转换程序如下:

from model.model import parsingNet
from utils.common import merge_config
from utils.dist_utils import dist_print
import torch


if __name__ == "__main__":
    torch.backends.cudnn.benchmark = True
    args, cfg = merge_config()
    dist_print('start testing...')
    assert cfg.backbone in ['18', '34', '50', '101', '152', '50next', '101next', '50wide', '101wide']

    if cfg.dataset == 'CULane':
        cls_num_per_lane = 18
    elif cfg.dataset == 'Tusimple':
        cls_num_per_lane = 56
    else:
        raise NotImplementedError

    net = parsingNet(pretrained=False, backbone=cfg.backbone, cls_dim=(cfg.griding_num + 1, cls_num_per_lane, 4),
                     use_aux=False).cuda()  # we dont need auxiliary segmentation in testing

    state_dict = torch.load(cfg.test_model, map_location='cpu')['model']
    compatible_state_dict = {}
    for k, v in state_dict.items():
        if 'module.' in k:
            compatible_state_dict[k[7:]] = v
        else:
            compatible_state_dict[k] = v

    net.load_state_dict(compatible_state_dict, strict=False)
    net.eval()
    # Export the model
    trace_model = torch.jit.trace(net, torch.rand(1, 3, 288, 800).cuda())
    print(trace_model.code)
    output = trace_model(torch.ones(1, 3, 288, 800).cuda())
    print(output)
    trace_model.save('./model_099.pt')

C++实现模型预测与结果解析

1.添加libtorch环境

  • 下载libtorch,参考Pytorch官方指导,根据电脑环境下载对应版本
  • 配置CMakeLists.txt 
find_package(Torch REQUIRED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")

if(NOT Torch_FOUND)
  message(FATAL_ERROR "Pythorch not found")  
endif(NOT Torch_FOUND)

include_directories(/home/hw/Tools/libtorch/include/torch/csrc/api/include)

2.C++ 程序实现

        作者push的代码中有一份C++版本程序,但是程序有些问题,基于那个程序进行一些修改:

#include 
#include 
#include 
#include 
#include 
#include 
#include "opencv2/aruco.hpp"
#include "opencv2/aruco/dictionary.hpp"
#include "opencv2/aruco/charuco.hpp"
#include "opencv2/calib3d.hpp"
#include "opencv2/videoio.hpp"
#include 
#include 


using namespace std;
using namespace cv;
using namespace torch::indexing;

std::vector linspace(double start_in, double end_in, int num_in)
{
  std::vector linspaced;

  double start = static_cast(start_in);
  double end = static_cast(end_in);
  double num = static_cast(num_in);

  if (num == 0)
  {
    return linspaced;
  }
  if (num == 1)
  {
    linspaced.push_back(start);
    return linspaced;
  }

  double delta = (end - start) / (num - 1);

  for (int i = 0; i < num - 1; ++i)
  {
    linspaced.push_back(start + delta * i);
  }
  linspaced.push_back(end); // I want to ensure that start and end
                            // are exactly the same as the input

  return linspaced;
}

std::vector arrange(int num)
{
  std::vector result;
  for (int i = 1; i < num; i++)
  {
    result.push_back(i);
  }
  return result;
}

std::string GetNowTime()
{
  time_t timep;
  time(&timep);
  char tmp[64];
  strftime(tmp, sizeof(tmp), "%Y-%m-%d-%H-%M-%S", localtime(&timep));
  return tmp;
}
torch::jit::script::Module module_;
int tusimpleGriding_num = 100;
std::vector linSpaceVector = linspace(0, 800 - 1, tusimpleGriding_num);
double linSpace = linSpaceVector[1] - linSpaceVector[0];

int img_w = 1280;
int img_h = 720;
int tusimple_row_anchor[] = {64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112,
                             116, 120, 124, 128, 132, 136, 140, 144, 148, 152, 156, 160, 164,
                             168, 172, 176, 180, 184, 188, 192, 196, 200, 204, 208, 212, 216,
                             220, 224, 228, 232, 236, 240, 244, 248, 252, 256, 260, 264, 268,
                             272, 276, 280, 284};

Mat RunLaneDetection(Mat frame)
{

  cv::Mat img, img_float;
  cv::cvtColor(frame, img, cv::COLOR_BGR2RGB);
  cv::Size scale(800, 288);
  cv::resize(img, img, scale, 0, 0, cv::INTER_LINEAR);
  img.convertTo(img_float, CV_32FC3, 1.0f / 255.0f);

  auto tensor_img = torch::from_blob(img_float.data, {1, img_float.rows, img_float.cols, img_float.channels()}); // cv Mat change to tensor
  tensor_img = tensor_img.permute({0, 3, 1, 2}).contiguous();                                                    // BHWC -> BCHW (Batch, Channel, Height, Width)
  tensor_img[0][0] = tensor_img[0][0].sub_(0.485).div_(0.229);                                                   // transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
  tensor_img[0][1] = tensor_img[0][1].sub_(0.456).div_(0.224);
  tensor_img[0][2] = tensor_img[0][2].sub_(0.406).div_(0.225);
  tensor_img = tensor_img.to(torch::kCUDA);

  torch::jit::IValue output = module_.forward({tensor_img});
  torch::Tensor outputTensor = output.toTensor();

  outputTensor = outputTensor.squeeze(0);                                                          // CUDAHalfType{1,101,56,4} change to CUDAHalfType{101,56,4}
  outputTensor = outputTensor.flip(1);                                                             // Flip 将行数据倒序
  torch::Tensor prob = outputTensor.index({Slice(None, -1), Slice(None), Slice(None)}).softmax(0); // Calculate SoftMax
  std::vector idx = arrange(tusimpleGriding_num + 1);                                         // Calculate idx
  torch::Tensor c = torch::arange(100) + 1;
  auto arrange_idx = torch::reshape(c, {tusimpleGriding_num, 1, 1}).to(torch::kCUDA);
  auto mult = prob * arrange_idx;
  auto loc = mult.sum(0); //dim=0,对列求和;dim=1,对行求和

  outputTensor = outputTensor.argmax(0);
  for (int i = 0; i < outputTensor.size(1); i++)
  {
    for (int j = 0; j < outputTensor.size(0); j++)
    {
      if (outputTensor[j][i].item() == tusimpleGriding_num)
      {
        loc[j][i] = 0;
      }
    }
  }
  for (int i = 0; i < loc.size(1); i++)
  {
    for (int k = 0; k < loc.size(0); k++)
    {
      if (loc[k][i].item() > 0)
      {
        long widht = int(loc[k][i].item() * double(linSpace * img_w) / 800) - 1;
        long height = int(img_h * (double(tusimple_row_anchor[56 - 1 - k]) / 288)) - 1;
        circle(frame, Point(widht, height), 5, Scalar(0, 255, 0), -1);
      }
    }
  }
  return frame;
}

void RunVideo()
{
  VideoCapture cap("~/project/LaneDetection/datasets/video/MAH00146.mp4");
  std::cout << "Prepare to load" << std::endl;
  Mat frame;
  while (true)
  {
    cap.read(frame); // read a new frame from video
    if (frame.cols > 0 && frame.rows > 0)
    {
      cv::imshow("", RunLaneDetection(frame));
    }
    if (waitKey(10) >= 0)
      break;
  }
}

int main()
{
  // Load JIT
  try
  {
    module_ = torch::jit::load("~/project/LaneDetection/lane_det/src/lane_detection/src/laneatt/model_099_1.pt");
  }
  catch (const std::exception &e)
  {
    std::cerr << "error loading the model\n";
    std::cerr << e.what() << '\n';
  }
  module_.to(torch::kCUDA);
  module_.eval();
  std::cout << "load model succees" << std::endl;
  RunVideo();
  cv::destroyAllWindows();
  return 0;
}

问题总结

1.输入图像预处理

在测试过程中发现,使用tensor变量测试Pytorch模型和libTorch模型,输出结果一致,但是使用同一张图像进行预测,发现输出结果不一致.

参考这篇文章可以解决问题,主要是在图片处理过程需要注意.

  1. torchvision transforms 封装的图像库 PIL的计算浮点数,与opencv计算的不一致,需要将python结果和C++程序处理保持一直,都使用opencv处理即可.
  2. python程序中transforms.Normalize()处理需要在C++中进行同样处理
    tensor_img[0][0] = tensor_img[0][0].sub_(0.485).div_(0.229);                                                   
    tensor_img[0][1] = tensor_img[0][1].sub_(0.456).div_(0.224);
    tensor_img[0][2] = tensor_img[0][2].sub_(0.406).div_(0.225);

2.输出结果处理

        项目代码中C++程序对结果的处理有问题,需要仔细分析python代码后,进行对程序进行调整

out_j = trt_outputs[0].reshape((101, 56, 4))  
# 将一维数组转为 101(w+1)* 56(sample_rows) * 4(lanes num) 的tensor数据
            out_j = out_j[:, ::-1, :] 
            # 将 (56, 4)维度数据行数据进行倒序
            prob = scipy.special.softmax(out_j[:-1, :, :], axis=0)  
            # 去掉第101组数据后计算softmax shape (100, 56, 4)
            idx = np.arange(100) + 1            
            # 创建1-100步长为1的数据
            idx = idx.reshape(-1, 1, 1)         
            # 将idx重构为100*1*1的数据
            loc = np.sum(prob * idx, axis=0)    
            # 三维数组相乘 prob * idx (100,56,4)*(100,1,1) = (100, 56, 4),第1个(56,4)*1,第2个(56,4)*2.....,第100个(56,4)*100shape (56, 4)
            # 进行列方向求和 shape (56, 4)
            out_j = np.argmax(out_j, axis=0)   
            # np.argmax 返回最大值的索引,axis=0 列方向比较,101组(56,4)数据101个对应列值中最大值索引 shape (56,4)  返回最大值索引
            loc[out_j == 100] = 0               
            # 将loc矩阵中按照out_j索引,值为100的位置的数据赋值为0,即提取出out_j中最大值对应位置的softmax值 loc shape (56, 4)
            out_j = loc                         
            # out_j[k,i]实际代表车道线在W方向上0-100等分的位置,k代表在row_anchor的索引
            for i in range(out_j.shape[1]): # out_j.shape[1] = 4
                if np.sum(out_j[:, i] != 0) > 2: # 列方向求和
                    for k in range(out_j.shape[0]):  # out_j.shape[0] = 56
                        if out_j[k, i] > 0: # 取 out_j第k列第i行值
                            ppp = (int(out_j[k, i] * col_sample_w * img_w / 800) - 1, # 将宽度800等分为100个,out_j[k,i]为第out_j[k,i]个,乘等分的平均值,再转到1280宽度上
                                   int(img_h * (row_anchor[cls_num_per_lane - 1 - k] / 288)) - 1) #图片H方向上标注了56个row_anchor,定位第k行对应的H坐标值,再转到720高度上
                            cv2.circle(frame, ppp, 5, (0, 255, 0), -1)

 

你可能感兴趣的:(LaneDetect,pytorch,人工智能,python)