libtorch推理
#include
#include
#include
#include
#include
std::vector<torch::Tensor> non_max_suppression(torch::Tensor preds, float score_thresh = 0.01, float iou_thresh = 0.35)
{
std::vector<torch::Tensor> output;
for (size_t i = 0; i < preds.sizes()[0]; ++i)
{
torch::Tensor pred = preds.select(0, i);
pred = pred.to(at::kCPU);
torch::Tensor scores = pred.select(1, 4) * std::get<0>(torch::max(pred.slice(1, 5, pred.sizes()[1]), 1));
pred = torch::index_select(pred, 0, torch::nonzero(scores > score_thresh).select(1, 0));
if (pred.sizes()[0] == 0) continue;
pred.select(1, 0) = pred.select(1, 0) - pred.select(1, 2) / 2;
pred.select(1, 1) = pred.select(1, 1) - pred.select(1, 3) / 2;
pred.select(1, 2) = pred.select(1, 0) + pred.select(1, 2);
pred.select(1, 3) = pred.select(1, 1) + pred.select(1, 3);
std::tuple<torch::Tensor, torch::Tensor> max_tuple = torch::max(pred.slice(1, 5, pred.sizes()[1]), 1);
pred.select(1, 4) = pred.select(1, 4) * std::get<0>(max_tuple);
pred.select(1, 5) = std::get<1>(max_tuple);
torch::Tensor dets = pred.slice(1, 0, 6);
torch::Tensor keep = torch::empty({ dets.sizes()[0] });
torch::Tensor areas = (dets.select(1, 3) - dets.select(1, 1)) * (dets.select(1, 2) - dets.select(1, 0));
std::tuple<torch::Tensor, torch::Tensor> indexes_tuple = torch::sort(dets.select(1, 4), 0, 1);
torch::Tensor v = std::get<0>(indexes_tuple);
torch::Tensor indexes = std::get<1>(indexes_tuple);
int count = 0;
while (indexes.sizes()[0] > 0)
{
keep[count] = (indexes[0].item().toInt());
count += 1;
torch::Tensor lefts = torch::empty(indexes.sizes()[0] - 1);
torch::Tensor tops = torch::empty(indexes.sizes()[0] - 1);
torch::Tensor rights = torch::empty(indexes.sizes()[0] - 1);
torch::Tensor bottoms = torch::empty(indexes.sizes()[0] - 1);
torch::Tensor widths = torch::empty(indexes.sizes()[0] - 1);
torch::Tensor heights = torch::empty(indexes.sizes()[0] - 1);
for (size_t i = 0; i < indexes.sizes()[0] - 1; ++i)
{
lefts[i] = std::max(dets[indexes[0]][0].item().toFloat(), dets[indexes[i + 1]][0].item().toFloat());
tops[i] = std::max(dets[indexes[0]][1].item().toFloat(), dets[indexes[i + 1]][1].item().toFloat());
rights[i] = std::min(dets[indexes[0]][2].item().toFloat(), dets[indexes[i + 1]][2].item().toFloat());
bottoms[i] = std::min(dets[indexes[0]][3].item().toFloat(), dets[indexes[i + 1]][3].item().toFloat());
widths[i] = std::max(float(0), rights[i].item().toFloat() - lefts[i].item().toFloat());
heights[i] = std::max(float(0), bottoms[i].item().toFloat() - tops[i].item().toFloat());
}
torch::Tensor overlaps = widths * heights;
torch::Tensor ious = overlaps / (areas.select(0, indexes[0].item().toInt()) + torch::index_select(areas, 0, indexes.slice(0, 1, indexes.sizes()[0])) - overlaps);
indexes = torch::index_select(indexes, 0, torch::nonzero(ious <= iou_thresh).select(1, 0) + 1);
}
keep = keep.toType(torch::kInt64);
output.push_back(torch::index_select(dets, 0, keep.slice(0, 0, count)));
}
return output;
}
int main(int argc, char* argv[])
{
torch::DeviceType device_type = at::kCPU;
if (torch::cuda::is_available())
{
device_type = at::kCUDA;
std::cout << "cuda::is_available():" << torch::cuda::is_available() << std::endl;
}
torch::jit::script::Module module = torch::jit::load("yolov5n-gpu.torchscript");
module.to(device_type);
std::vector<std::string> classnames;
std::ifstream f("class_det.txt");
std::string name;
while (std::getline(f, name))
{
classnames.push_back(name);
}
cv::Mat frame = cv::imread("bus.jpg"), img;
cv::resize(frame, img, cv::Size(640, 640));
cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
img.convertTo(img, CV_32FC3, 1.0f / 255.0f);
auto imgTensor = torch::from_blob(img.data, { 1, img.rows, img.cols, img.channels() }).to(device_type);
imgTensor = imgTensor.permute({ 0, 3, 1, 2 }).contiguous();
std::vector<torch::jit::IValue> inputs;
inputs.emplace_back(imgTensor);
torch::jit::IValue output = module.forward(inputs);
auto preds = output.toTuple()->elements()[0].toTensor();
std::vector<torch::Tensor> dets = non_max_suppression(preds, 0.35, 0.5);
if (dets.size() > 0)
{
for (size_t i = 0; i < dets[0].sizes()[0]; ++i)
{
float left = dets[0][i][0].item().toFloat() * frame.cols / 640;
float top = dets[0][i][1].item().toFloat() * frame.rows / 640;
float right = dets[0][i][2].item().toFloat() * frame.cols / 640;
float bottom = dets[0][i][3].item().toFloat() * frame.rows / 640;
float score = dets[0][i][4].item().toFloat();
int classID = dets[0][i][5].item().toInt();
cv::rectangle(frame, cv::Rect(left, top, (right - left), (bottom - top)), cv::Scalar(0, 255, 0), 2);
cv::putText(frame, classnames[classID] + ": " + cv::format("%.2f", score),
cv::Point(left, top), cv::FONT_HERSHEY_SIMPLEX, (right - left) / 200, cv::Scalar(0, 255, 0), 2);
}
}
cv::imshow("", frame);
cv::waitKey();
return 0;
}
onnxruntime推理
#include
#include
#include
#include
void LetterBox(const cv::Mat& image, cv::Mat& outImage,
cv::Vec4d& params,
const cv::Size& newShape = cv::Size(640, 640),
bool autoShape = false,
bool scaleFill = false,
bool scaleUp = true,
int stride = 32,
const cv::Scalar& color = cv::Scalar(114, 114, 114))
{
cv::Size shape = image.size();
float r = std::min((float)newShape.height / (float)shape.height, (float)newShape.width / (float)shape.width);
if (!scaleUp)
{
r = std::min(r, 1.0f);
}
float ratio[2]{ r, r };
int new_un_pad[2] = { (int)std::round((float)shape.width * r),(int)std::round((float)shape.height * r) };
auto dw = (float)(newShape.width - new_un_pad[0]);
auto dh = (float)(newShape.height - new_un_pad[1]);
if (autoShape)
{
dw = (float)((int)dw % stride);
dh = (float)((int)dh % stride);
}
else if (scaleFill)
{
dw = 0.0f;
dh = 0.0f;
new_un_pad[0] = newShape.width;
new_un_pad[1] = newShape.height;
ratio[0] = (float)newShape.width / (float)shape.width;
ratio[1] = (float)newShape.height / (float)shape.height;
}
dw /= 2.0f;
dh /= 2.0f;
if (shape.width != new_un_pad[0] && shape.height != new_un_pad[1])
cv::resize(image, outImage, cv::Size(new_un_pad[0], new_un_pad[1]));
else
outImage = image.clone();
int top = int(std::round(dh - 0.1f));
int bottom = int(std::round(dh + 0.1f));
int left = int(std::round(dw - 0.1f));
int right = int(std::round(dw + 0.1f));
params[0] = ratio[0];
params[1] = ratio[1];
params[2] = left;
params[3] = top;
cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color);
}
void nms(std::vector<cv::Rect>& boxes, std::vector<float>& confs, std::vector<int>& classIds, float threshold)
{
assert(boxes.size() == confs.size());
struct BoxConf
{
cv::Rect box;
float conf;
int id;
};
std::vector<BoxConf> boxes_confs;
for (size_t i = 0; i < boxes.size(); i++)
{
BoxConf box_conf;
box_conf.box = boxes[i];
box_conf.conf = confs[i];
box_conf.id = classIds[i];
boxes_confs.push_back(box_conf);
}
std::sort(boxes_confs.begin(), boxes_confs.end(), [](BoxConf a, BoxConf b) { return a.conf > b.conf; });
std::vector<float> area(boxes_confs.size());
for (size_t i = 0; i < boxes_confs.size(); ++i)
{
area[i] = boxes_confs[i].box.width * boxes_confs[i].box.height;
}
std::vector<bool> isSuppressed(boxes_confs.size(), false);
for (size_t i = 0; i < boxes_confs.size(); ++i)
{
if (isSuppressed[i]) { continue; }
for (size_t j = i + 1; j < boxes_confs.size(); ++j)
{
if (isSuppressed[j]) { continue; }
float x1 = (std::max)(boxes_confs[i].box.x, boxes_confs[j].box.x);
float y1 = (std::max)(boxes_confs[i].box.y, boxes_confs[j].box.y);
float x2 = (std::min)(boxes_confs[i].box.x + boxes_confs[i].box.width, boxes_confs[j].box.x + boxes_confs[j].box.width);
float y2 = (std::min)(boxes_confs[i].box.y + boxes_confs[i].box.height, boxes_confs[j].box.y + boxes_confs[j].box.height);
float w = (std::max)(0.0f, x2 - x1);
float h = (std::max)(0.0f, y2 - y1);
float inter = w * h;
float ovr = inter / (area[i] + area[j] - inter);
if (ovr >= threshold) { isSuppressed[j] = true; }
}
}
boxes.clear();
confs.clear();
classIds.clear();
for (int i = 0; i < boxes_confs.size(); ++i)
{
if (!isSuppressed[i])
{
boxes.push_back(boxes_confs[i].box);
confs.push_back(boxes_confs[i].conf);
classIds.push_back(boxes_confs[i].id);
}
}
}
int main(int argc, char* argv[])
{
std::string imgpath = "zidane.jpg";
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "yolov5s");
Ort::SessionOptions session_options;
session_options.SetIntraOpNumThreads(1);
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
OrtCUDAProviderOptions cuda_option;
cuda_option.device_id = 0;
cuda_option.arena_extend_strategy = 0;
cuda_option.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive;
cuda_option.gpu_mem_limit = SIZE_MAX;
cuda_option.do_copy_in_default_stream = 1;
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
session_options.AppendExecutionProvider_CUDA(cuda_option);
const wchar_t* model_path = L"yolov5s-det.onnx";
std::vector<std::string> class_names;
std::string classesFile = "coco.names";
std::ifstream ifs(classesFile.c_str());
std::string line;
while (getline(ifs, line)) class_names.push_back(line);
Ort::Session session(env, model_path, session_options);
Ort::AllocatorWithDefaultOptions allocator;
size_t num_input_nodes = session.GetInputCount();
std::vector<const char*> input_node_names = { "images" };
std::vector<const char*> output_node_names = { "output" };
const size_t input_tensor_size = 3 * 640 * 640;
std::vector<float> input_tensor_values(input_tensor_size);
cv::Mat srcimg = cv::imread(imgpath);
int newh = 0, neww = 0, padh = 0, padw = 0;
cv::Mat dstimg;
cv::Vec4d params;
cv::Mat letterbox;
LetterBox(srcimg, dstimg, params);
for (int c = 0; c < 3; c++)
{
for (int i = 0; i < 640; i++)
{
for (int j = 0; j < 640; j++)
{
float pix = dstimg.ptr<uchar>(i)[j * 3 + 2 - c];
input_tensor_values[c * 640 * 640 + i * 640 + size_t(j)] = pix / 255.0;
}
}
}
std::vector<int64_t> input_node_dims = { 1, 3, 640, 640 };
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), input_node_dims.size());
std::vector<Ort::Value> ort_inputs;
ort_inputs.push_back(std::move(input_tensor));
std::vector<Ort::Value> output_tensors;
for (int i = 0; i < 10; i++)
{
clock_t start_time = clock();
output_tensors = session.Run(Ort::RunOptions{ nullptr }, input_node_names.data(), ort_inputs.data(), input_node_names.size(), output_node_names.data(), output_node_names.size());
clock_t end_time = clock();
std::cout << "inference time: " << static_cast<double>(end_time - start_time) / CLOCKS_PER_SEC << " seconds" << std::endl;
}
const float* rawOutput = output_tensors[0].GetTensorData<float>();
std::vector<int64_t> outputShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
size_t count = output_tensors[0].GetTensorTypeAndShapeInfo().GetElementCount();
std::vector<float> output(rawOutput, rawOutput + count);
std::vector<cv::Rect> boxes;
std::vector<float> confs;
std::vector<int> classIds;
int numClasses = (int)outputShape[2] - 5;
int elementsInBatch = (int)(outputShape[1] * outputShape[2]);
float confThreshold = 0.5;
float iouThreshold = 0.5;
for (auto it = output.begin(); it != output.begin() + elementsInBatch; it += outputShape[2])
{
float clsConf = *(it + 4);
if (clsConf > confThreshold)
{
int centerX = (int)(*it);
int centerY = (int)(*(it + 1));
int width = (int)(*(it + 2));
int height = (int)(*(it + 3));
int x1 = centerX - width / 2;
int y1 = centerY - height / 2;
boxes.emplace_back(cv::Rect(x1, y1, width, height));
int bestClassId = -1;
float bestConf = 0.0;
for (int i = 5; i < numClasses + 5; i++)
{
if ((*(it + i)) > bestConf)
{
bestConf = it[i];
bestClassId = i - 5;
}
}
confs.push_back(clsConf);
classIds.push_back(bestClassId);
}
}
nms(boxes, confs, classIds, iouThreshold);
for (size_t i = 0; i < boxes.size(); ++i)
{
cv::rectangle(dstimg, cv::Point(boxes[i].tl().x, boxes[i].tl().y), cv::Point(boxes[i].br().x, boxes[i].br().y), cv::Scalar(0, 0, 255), 1);
cv::putText(dstimg, class_names[classIds[i]] + " " + std::to_string(confs[i]), cv::Point(boxes[i].tl().x, boxes[i].tl().y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 0, 0), 1);
}
cv::imshow("result", dstimg);
cv::waitKey();
return 0;
}