ffmpeg+nvidia解码SDK+GPU实现视频流硬解码成Mat

方法原理

rtsp流解码方式分为两种:硬解码和软解码。软解码一般通过ffmpeg编解码库实现,但是cpu占用率很高,解码一路1080p视频cpu占用率达到70%左右,对实际应用来说,严重影响机器最大解码摄像头数目。硬解码一般通过硬件来进行加速,硬件一般会提供相关的解码库,如nvdia的解码库以及华为海思硬件解码模块等,这类解码需要占用显存,但是极大程度上降低了cpu的占用率。本文硬件解码一路1080p视频,使用nvidia显卡(2080ti)加速解码,后续会写海思hi3559A的解码博客,cpu占用15%,显存占用100m

软解码

rtsp视频流软解码可通过opencv直接打开,opencv的VideoCapture类封装了ffmpeg解码方式,此处不做介绍,自行实验即可,此处插入部分关键代码

  string haikang2 = "rtsp://admin:[email protected]/Streaming/Channels/1";
  cv::VideoCapture video1;
  video1.open(haikang2);

硬解码

流程如下:

  • ffmpeg解析rtsp流地址
  • 初始化nvidia解码类
  • 调用解码模块
    此处主要用到三个回调函数:
    1.HandleVideoSequenceProc():设置解码器相关参数等
    2.HandlePictureDecodeProc():调用decode函数解码
    3.HandlePictureDisplayProc():解码后图像数据处理,包括分辨率、类型转换等(nv12-rgba)

关键代码实现

int CUDAAPI GetVideo::HandleVideoSequenceProc(void* user, CUVIDEOFORMAT* fmt)
{
	UserInfoData* puser = (UserInfoData*)user;
	if (nullptr == _vdo->m_cuContext)
	{
		printf("The CUcontext is nullptr, you should initialize it before kicking off the decoder.\n");
		exit(EXIT_FAILURE);
	}
	CUVIDDECODECAPS decode_caps;
	memset((char*)&decode_caps, 0x00, sizeof(decode_caps));
	decode_caps.eCodecType = fmt->codec;
	decode_caps.eChromaFormat = fmt->chroma_format;
	decode_caps.nBitDepthMinus8 = fmt->bit_depth_luma_minus8;
	cuCtxPushCurrent(_vdo->m_cuContext);
	_vdo->r = cuvidGetDecoderCaps(&decode_caps);
	if (CUDA_SUCCESS != _vdo->r)
	{
		cuGetErrorString(_vdo->r, &_vdo->err_str);
		printf("Failed to get decoder caps: %s (exiting).\n", _vdo->err_str);
		exit(EXIT_FAILURE);
	}
	cuCtxPopCurrent(NULL);
	if (!decode_caps.bIsSupported)
	{
		printf("The video file format is not supported by NVDECODE. (exiting).\n");
		exit(EXIT_FAILURE);
	}
	if (puser->m_nWidth && puser->m_nHeight) {

        // cuvidCreateDecoder() has been called before, and now there's possible config change
        return _vdo->ReconfigureDecoder(fmt, puser);
    }
	puser->m_eCodec = fmt->codec;
	puser->m_videoFormat = *fmt;
	
	/* Create decoder context. */
	CUVIDDECODECREATEINFO videoDecodeCreateInfo = { 0 };
	videoDecodeCreateInfo.CodecType = fmt->codec;
	videoDecodeCreateInfo.ChromaFormat = fmt->chroma_format;
	videoDecodeCreateInfo.OutputFormat = (fmt->bit_depth_luma_minus8) ? cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
	videoDecodeCreateInfo.bitDepthMinus8 = fmt->bit_depth_luma_minus8;
	videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
	videoDecodeCreateInfo.ulNumOutputSurfaces = 1;
	videoDecodeCreateInfo.ulNumDecodeSurfaces = 20;   
	videoDecodeCreateInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
	videoDecodeCreateInfo.vidLock = _vdo->m_ctxLock;
	videoDecodeCreateInfo.ulIntraDecodeOnly = 0; /* Set to 1 when the source only has intra frames; memory will be optimized. */
	videoDecodeCreateInfo.ulTargetWidth = fmt->coded_width;
	videoDecodeCreateInfo.ulTargetHeight = fmt->coded_height;
	videoDecodeCreateInfo.ulWidth = fmt->coded_width;
	videoDecodeCreateInfo.ulHeight = fmt->coded_height;
	if (puser->m_nMaxWidth < (int)fmt->coded_width)
        puser->m_nMaxWidth = fmt->coded_width;
    if (puser->m_nMaxHeight < (int)fmt->coded_height)
        puser->m_nMaxHeight = fmt->coded_height;
	videoDecodeCreateInfo.ulMaxWidth = puser->m_nMaxWidth;
    videoDecodeCreateInfo.ulMaxHeight = puser->m_nMaxHeight;
	if (!(puser->m_cropRect.r && puser->m_cropRect.b) && !(puser->m_resizeDim.w && puser->m_resizeDim.h)) {
        puser->m_nWidth = fmt->display_area.right - fmt->display_area.left;
        puser->m_nHeight = fmt->display_area.bottom - fmt->display_area.top;
        videoDecodeCreateInfo.ulTargetWidth = fmt->coded_width;
        videoDecodeCreateInfo.ulTargetHeight = fmt->coded_height;
    } else {
        if (puser->m_resizeDim.w && puser->m_resizeDim.h) {
            videoDecodeCreateInfo.display_area.left = fmt->display_area.left;
            videoDecodeCreateInfo.display_area.top = fmt->display_area.top;
            videoDecodeCreateInfo.display_area.right = fmt->display_area.right;
            videoDecodeCreateInfo.display_area.bottom = fmt->display_area.bottom;
            puser->m_nWidth = puser->m_resizeDim.w;
            puser->m_nHeight = puser->m_resizeDim.h;
        }
        if (puser->m_cropRect.r && puser->m_cropRect.b) {
            videoDecodeCreateInfo.display_area.left = puser->m_cropRect.l;
            videoDecodeCreateInfo.display_area.top = puser->m_cropRect.t;
            videoDecodeCreateInfo.display_area.right = puser->m_cropRect.r;
            videoDecodeCreateInfo.display_area.bottom = puser->m_cropRect.b;
            puser->m_nWidth = puser->m_cropRect.r - puser->m_cropRect.l;
            puser->m_nHeight = puser->m_cropRect.b - puser->m_cropRect.t;
        }
        videoDecodeCreateInfo.ulTargetWidth = puser->m_nWidth;
        videoDecodeCreateInfo.ulTargetHeight = puser->m_nHeight;
    }
	puser->m_nSurfaceHeight = videoDecodeCreateInfo.ulTargetHeight;
    puser->m_nSurfaceWidth = videoDecodeCreateInfo.ulTargetWidth;
	puser->m_displayRect.b = videoDecodeCreateInfo.display_area.bottom;
    puser->m_displayRect.t = videoDecodeCreateInfo.display_area.top;
    puser->m_displayRect.l = videoDecodeCreateInfo.display_area.left;
    puser->m_displayRect.r = videoDecodeCreateInfo.display_area.right;
	cuCtxPushCurrent(_vdo->m_cuContext);
	{
		_vdo->r = cuvidCreateDecoder(&puser->m_hDecoder, &videoDecodeCreateInfo);
		if (CUDA_SUCCESS != _vdo->r) {
			cuGetErrorString(_vdo->r, &_vdo->err_str);
			printf("Failed to create the decoder: %s. (exiting).\n", _vdo->err_str);
			exit(EXIT_FAILURE);
		}
	}
	cuCtxPopCurrent(nullptr);
	printf("Created the decoder.\n");
	return 1;
}

int CUDAAPI GetVideo::HandlePictureDecodeProc(void* user, CUVIDPICPARAMS* pic)
{
	UserInfoData* puser = (UserInfoData*)user;
	if (nullptr == puser->m_hDecoder)
	{
		printf("decoder is nullptr. (exiting).");
		exit(EXIT_FAILURE);
	}
	
	_vdo->r = cuvidDecodePicture(puser->m_hDecoder, pic);
	if (CUDA_SUCCESS != _vdo->r) 
	{
		printf("Failed to decode the picture.");
	}
	
	return 1;
}

你可能感兴趣的:(c++项目开发)