DeepStream应用程序将深度神经网络和其他复杂的处理任务引入到流处理管道中,以实现对视频和其他传感器数据的近实时分析。从这些传感器中提取有意义的见解为提高运营效率和安全性创造了机会。例如,摄像头是当前使用最多的物联网传感器。在我们的家中,街道上,停车场,大型购物中心,仓库,工厂中都可以找到相机–无处不在。视频分析的潜在用途是巨大的:访问控制,防止丢失,自动结帐,监视,安全,自动检查(QA),包裹分类(智能物流),交通控制/工程,工业自动化等。
网上大多数是基于 trtexec 来生成 .engine 文件,这里选择基于tensorrtx来生成 .engine 文件。参考: https://github.com/wang-xinyu/tensorrtx/tree/master/scaled-yolov4
git clone https://github.com/wang-xinyu/tensorrtx.git
git clone -b yolov4-csp https://github.com/WongKinYiu/ScaledYOLOv4.git
// download yolov4-csp.weights from https://github.com/WongKinYiu/ScaledYOLOv4/tree/yolov4-csp#yolov4-csp
cp {tensorrtx}/scaled-yolov4/gen_wts.py {ScaledYOLOv4/}
cd {ScaledYOLOv4/}
python gen_wts.py yolov4-csp.weights
// a file 'yolov4_csp.wts' will be generated.
mv yolov4_csp.wts {tensorrtx}/scaled-yolov4/
cd {tensorrtx}/scaled-yolov4
mkdir build
cd build
cmake ..
make
sudo ./yolov4csp -s // serialize model to plan file i.e. 'yolov4csp.engine'
sudo ./yolov4csp -d ../../yolov3-spp/samples // deserialize plan file and run inference, the images in samples will be processed.
#ifdef PLATFORM_TEGRA
if (!gst_element_link_many(rtppay, parse, decoder, NULL))
{
printf("\nFailed to link elements 0.\n");
return -1;
}
if (!gst_element_link_many(streammux, pgie, nvvidconv, nvosd, transform, sink, NULL))
{
printf("\nFailed to link elements 2.\n");
return -1;
}
#else
if (!gst_element_link_many(rtppay, parse, decoder, sink, NULL))
{
printf("\nFailed to link elements.\n");
return -1;
}
#endif
#ifdef PLATFORM_TEGRA
// 这里增加一个探头 参考 deepstream-test1中代码
osd_sink_pad = gst_element_get_static_pad(nvosd, "sink");
if (!osd_sink_pad)
g_print("Unable to get sink pad\n");
else
gst_pad_add_probe(osd_sink_pad, GST_PAD_PROBE_TYPE_BUFFER,
osd_sink_pad_buffer_probe, NULL, NULL);
gst_object_unref(osd_sink_pad);
#endif
static GstPadProbeReturn
osd_sink_pad_buffer_probe (GstPad * pad, GstPadProbeInfo * info,
gpointer u_data)
{
GstBuffer *buf = (GstBuffer *) info->data;
guint num_rects = 0;
NvDsObjectMeta *obj_meta = NULL;
guint vehicle_count = 0;
guint person_count = 0;
NvDsMetaList * l_frame = NULL;
NvDsMetaList * l_obj = NULL;
NvDsDisplayMeta *display_meta = NULL;
NvDsBatchMeta *batch_meta = gst_buffer_get_nvds_batch_meta (buf);
for (l_frame = batch_meta->frame_meta_list; l_frame != NULL;
l_frame = l_frame->next) {
NvDsFrameMeta *frame_meta = (NvDsFrameMeta *) (l_frame->data);
int offset = 0;
for (l_obj = frame_meta->obj_meta_list; l_obj != NULL;
l_obj = l_obj->next) {
obj_meta = (NvDsObjectMeta *) (l_obj->data);
if (obj_meta->class_id == PGIE_CLASS_ID_VEHICLE) {
vehicle_count++;
num_rects++;
}
if (obj_meta->class_id == PGIE_CLASS_ID_PERSON) {
person_count++;
num_rects++;
}
}
display_meta = nvds_acquire_display_meta_from_pool(batch_meta);
NvOSD_TextParams *txt_params = &display_meta->text_params[0];
display_meta->num_labels = 1;
txt_params->display_text = g_malloc0 (MAX_DISPLAY_LEN);
offset = snprintf(txt_params->display_text, MAX_DISPLAY_LEN, "Person = %d ", person_count);
offset = snprintf(txt_params->display_text + offset , MAX_DISPLAY_LEN, "Vehicle = %d ", vehicle_count);
/* Now set the offsets where the string should appear */
txt_params->x_offset = 10;
txt_params->y_offset = 12;
/* Font , font-color and font-size */
txt_params->font_params.font_name = "Serif";
txt_params->font_params.font_size = 10;
txt_params->font_params.font_color.red = 1.0;
txt_params->font_params.font_color.green = 1.0;
txt_params->font_params.font_color.blue = 1.0;
txt_params->font_params.font_color.alpha = 1.0;
/* Text background color */
txt_params->set_bg_clr = 1;
txt_params->text_bg_clr.red = 0.0;
txt_params->text_bg_clr.green = 0.0;
txt_params->text_bg_clr.blue = 0.0;
txt_params->text_bg_clr.alpha = 1.0;
nvds_add_display_meta_to_frame(frame_meta, display_meta);
}
g_print ("Frame Number = %d Number of objects = %d "
"Vehicle Count = %d Person Count = %d\n",
frame_number, num_rects, vehicle_count, person_count);
frame_number++;
return GST_PAD_PROBE_OK;
}
git clone https://github.com/NVIDIA-AI-IOT/yolov4_deepstream
cd yolov4_deepstream/
#include
#include
#include
#include
#include
#include
#include
#include
#include "nvdsinfer_custom_impl.h"
#define BBOX_CONF_THRESH 0.5
#define NMS_THRESH 0.4
static constexpr int LOCATIONS = 4;
struct alignas(float) Detection {
//x y w h
float bbox[LOCATIONS];
float det_confidence;
float class_id;
float class_confidence;
};
static const int NUM_CLASSES_YOLO = 1;
static const int MAX_OUTPUT_BBOX_COUNT = 1000;
static const int DETECTION_SIZE = sizeof(Detection) / sizeof(float);
bool cmp(const Detection& a, const Detection& b) {
return a.det_confidence > b.det_confidence;
}
float clamp(const float val, const float minVal, const float maxVal) {
assert(minVal <= maxVal);
return std::min(maxVal, std::max(minVal, val));
}
extern "C" bool NvDsInferParseYoloV4(
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>&objectList);
static void convertBBoxYoloV4(const Detection& detect, const uint& netW, const uint& netH, NvDsInferParseObjectInfo& b) {
const float* bbox = detect.bbox;
float xCenter = bbox[0];
float yCenter = bbox[1];
float w = bbox[2];
float h = bbox[3];
float x0 = xCenter - w * 0.5;
float y0 = yCenter - h * 0.5;
float x1 = x0 + w;
float y1 = y0 + h;
x0 = clamp(x0, 0, netW);
y0 = clamp(y0, 0, netH);
x1 = clamp(x1, 0, netW);
y1 = clamp(y1, 0, netH);
b.left = x0;
b.width = clamp(x1 - x0, 0, netW);
b.top = y0;
b.height = clamp(y1 - y0, 0, netH);
}
static void
decodeYoloV4Tensor(std::vector<Detection>& probs, const uint& netW, const uint& netH, std::vector<NvDsInferParseObjectInfo>& objInfos)
{
for (uint i = 0; i < probs.size(); ++i)
{
NvDsInferParseObjectInfo obj;
convertBBoxYoloV4(probs[i], netW, netH, obj);
if (obj.width < 1 || obj.height < 1) return;
obj.detectionConfidence = probs[i].det_confidence;
obj.classId = probs[i].class_id;
objInfos.push_back(obj);
}
}
extern "C" bool NvDsInferParseYoloV4(
std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams,
std::vector<NvDsInferParseObjectInfo>&objectList)
{
const NvDsInferLayerInfo& layer = outputLayersInfo[0];
std::vector<Detection> res;
std::map<float, std::vector<Detection>> m;
float* output = (float*)layer.buffer;
for (int i = 0; i < output[0] && i < MAX_OUTPUT_BBOX_COUNT; i++) {
if (output[1 + DETECTION_SIZE * i + 4] <= BBOX_CONF_THRESH) continue;
Detection det;
memcpy(&det, &output[1 + DETECTION_SIZE * i], DETECTION_SIZE * sizeof(float));
res.push_back(det);
}
decodeYoloV4Tensor(res, networkInfo.width, networkInfo.height, objectList);
return true;
}
static NvDsInferParseObjectInfo convertBBox(const float& bx, const float& by, const float& bw,
const float& bh, const int& stride, const uint& netW,
const uint& netH)
{
NvDsInferParseObjectInfo b;
// Restore coordinates to network input resolution
float xCenter = bx * stride;
float yCenter = by * stride;
float x0 = xCenter - bw / 2;
float y0 = yCenter - bh / 2;
float x1 = x0 + bw;
float y1 = y0 + bh;
x0 = clamp(x0, 0, netW);
y0 = clamp(y0, 0, netH);
x1 = clamp(x1, 0, netW);
y1 = clamp(y1, 0, netH);
b.left = x0;
b.width = clamp(x1 - x0, 0, netW);
b.top = y0;
b.height = clamp(y1 - y0, 0, netH);
return b;
}
static void addBBoxProposal(const float bx, const float by, const float bw, const float bh,
const uint stride, const uint& netW, const uint& netH, const int maxIndex,
const float maxProb, std::vector<NvDsInferParseObjectInfo>& binfo)
{
NvDsInferParseObjectInfo bbi = convertBBox(bx, by, bw, bh, stride, netW, netH);
if (bbi.width < 1 || bbi.height < 1) return;
bbi.detectionConfidence = maxProb;
bbi.classId = maxIndex;
binfo.push_back(bbi);
}
/* Check that the custom function has been defined correctly */
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYoloV4);
[property]
gpu-id=0
net-scale-factor=0.0039215697906911373
# yolov4
#0=RGB, 1=BGR
model-color-format=0
model-engine-file=yolov4-face.engine
labelfile-path=labels.txt
batch-size=1
## 0=FP32, 1=INT8, 2=FP16 mode
network-mode=2
num-detected-classes=1
gie-unique-id=1
network-type=0
is-classifier=0
## 0=Group Rectangles, 1=DBSCAN, 2=NMS, 3= DBSCAN+NMS Hybrid, 4 = None(No clustering)
cluster-mode=2
maintain-aspect-ratio=1
parse-bbox-func-name=NvDsInferParseYoloV4
custom-lib-path=nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo_plugins.so
[class-attrs-all]
nms-iou-threshold=0.6
pre-cluster-threshold=0.4
yololayer.cu
1.
CalDetection<<< (yolo.width*yolo.height*batchSize + mThreadCount - 1) / mThreadCount, mThreadCount>>>
to:
CalDetection<<< (yolo.width*yolo.height*batchSize + mThreadCount - 1) / mThreadCount, mThreadCount, 0, stream>>>
2.
CUDA_CHECK(cudaMemset(output + idx*outputElem, 0, sizeof(float)));
to:
CUDA_CHECK(cudaMemsetAsync(output + idx*outputElem, 0, sizeof(float)));
mish.cu
1.
mish_kernel<<<grid_size, block_size>>>(inputs[0], output, input_size_ * batchSize);
to:
mish_kernel<<<grid_size, block_size, 0, stream>>>(inputs[0], output, input_size_ * batchSize);