配置好CUDA开发环境,同时确保合适的显卡支持
开发时需要的 Lib 和 Header File
#ifdef _DEBUG
#pragma comment(lib, "cutil32D.lib")
#else
#pragram comment(lib, "cutil32.lib")
#endif
#pragram comment(lib, "nvcuvid.lib")
#include "cuviddec.h"
#include "cutil.h"
#include "cutil_inline_drvapi.h"
#include "cuda_runtime_api.h"
#include "cutil_inline_runtime.h"
#include "nvcuvid.h"
CUcontext m_cuContext; // 类似于Windows 的 Handel
CUvideoctxlock m_CtxLock; // 类似于CrticalSection(Windows关键段)
CUvideodecoder m_CUDADecoder; // Decoder 解码器
CUvideoparser m_hParser; // Frame Parser 分析每帧frame使用
CUDAPassStruct m_PassData; // 传递给Callback Function 的使用者自定义资料结构
调用的API
初始化:
//CUDA Test//
CUresult cudaRes = CUDA_SUCCESS;
CUdevice device = NULL;
int iCount = 0;
cuInit(0);
// Check for a min spec of Compute 1.1 capability before running
if (!cutilDrvCudaCapabilities(1,1))
{
cutilExit(0, NULL);
goto EF;
}
cudaRes = cuDeviceGetCount(&iCount);
if(cudaRes != CUDA_SUCCESS)
goto EF;
cudaRes = cuDeviceGet(&device, 0);
if(cudaRes != CUDA_SUCCESS)
goto EF;
cudaRes = cuCtxCreate(&m_cuContext, 0, device);
if(cudaRes != CUDA_SUCCESS)
goto EF;
cudaRes = cuvidCtxLockCreate(&m_CtxLock, m_cuContext);
if(cudaRes != CUDA_SUCCESS)
goto EF;
CUVIDDECODECREATEINFO dci;
// Fill the decoder-create-info struct from the given video-format struct.
memset(&dci, 0, sizeof(CUVIDDECODECREATEINFO));
// Create video decoder
dci.CodecType = cudaVideoCodec_H264;
dci.ulWidth = iW; //视频宽度//
dci.ulHeight = iH; //视频高度//
dci.ulNumDecodeSurfaces = 2;
// Limit decode memory to 24MB (16M pixels at 4:2:0 = 24M bytes)
while (dci.ulNumDecodeSurfaces * dci.ulWidth * dci.ulHeight > 16*1024*1024)
{
dci.ulNumDecodeSurfaces–;
}
dci.ChromaFormat = cudaVideoChromaFormat_420; //输入视频的Pixelformat//
dci.OutputFormat = cudaVideoSurfaceFormat_NV12; //解码后输出的Pixelformat (目前只支持NV12)//
dci.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
// No scaling
dci.ulTargetWidth = dci.ulWidth; //Decode后输出的宽度//
dci.ulTargetHeight = dci.ulHeight; //Decode后输出的高度//
dci.ulNumOutputSurfaces = 2; //英文注释说是只支持2个//
dci.ulCreationFlags = cudaVideoCreate_Default; //可以看一下Header File的定义说明//
dci.vidLock = NULL;//ctx;
cudaRes = cuvidCreateDecoder(&m_CUDADecoder, &dci); //建立H.264 Decoder//
if(cudaRes != CUDA_SUCCESS)
goto EF;
//传递的使用者结构//
m_PassData.CUDADecoder = m_CUDADecoder;
m_PassData.iWidth = dci.ulWidth;
m_PassData.iHeight = dci.ulHeight;
m_PassData.pFrameBuffer = m_pFrameBuffer;
m_PassData.puiFrameSize = &m_uiFrameSize;
m_PassData.pLocker = &m_Locker;
//建立 Parser //
CUVIDPARSERPARAMS oVideoParserParameters;
memset(&oVideoParserParameters, 0, sizeof(CUVIDPARSERPARAMS));
oVideoParserParameters.CodecType = cudaVideoCodec_H264;
oVideoParserParameters.ulMaxNumDecodeSurfaces = dci.ulNumDecodeSurfaces;
oVideoParserParameters.ulMaxDisplayDelay = 4; // this flag is needed so the parser will push frames out to the decoder as quickly as it can//
oVideoParserParameters.pUserData = &m_PassData; //传递使用者自定义的结构//
oVideoParserParameters.pfnSequenceCallback = NULL; // Called before decoding frames and/or whenever there is a format change//
oVideoParserParameters.pfnDecodePicture = HandlePictureDecode; // Called when a picture is ready to be decoded (decode order)//
oVideoParserParameters.pfnDisplayPicture = HandlePictureDisplay; // Called whenever a picture is ready to be displayed (display order)//
cudaRes = cuvidCreateVideoParser(&m_hParser, &oVideoParserParameters);
if(cudaRes != CUDA_SUCCESS)
goto EF;
使用的顺序和 Callback Function(回调函数)
提供给 Parser使用的两个 Callback function:
static int CUDAAPI HandlePictureDecode(void * pUserData, CUVIDPICPARAMS * pPicParams); //解析完可以回调cuvidDecodePicture
static int CUDAAPI HandlePictureDisplay(void *pUserData, CUVIDPARSERDISPINFO * pPicParams); //解码完成后回调的地方
使用Parser是为了取得CUVIDPICPARAMS资料后传递给 Decode API 使用
int CUDAAPI HandlePictureDecode(void * pUserData, CUVIDPICPARAMS * pPicParams)
{
CUDAPassStruct* pa = reinterpret_cast(pUserData);
CUresult cudaRes = cuvidDecodePicture(pa->CUDADecoder, pPicParams);
return 1;
}
int CUDAAPI HandlePictureDisplay(void *pUserData, CUVIDPARSERDISPINFO * pPicParams)
{
CUDAPassStruct* pa = reinterpret_cast(pUserData);
CUVIDPROCPARAMS oVideoProcessingParameters;
memset(&oVideoProcessingParameters, 0, sizeof(CUVIDPROCPARAMS));
CUdeviceptr pDecodedFrame = 0;
unsigned int nDecodedPitch = 0;
unsigned int nWidth = 0;
unsigned int nHeight = 0;
CUresult oResult = cuvidMapVideoFrame(pa->CUDADecoder, pPicParams->picture_index, &pDecodedFrame, &nDecodedPitch, &oVideoProcessingParameters);
unsigned int nv12_size = nDecodedPitch * (pa->iHeight + pa->iHeight/2); // 12bpp
*pa->puiFrameSize = nv12_size;
oResult = cuMemcpyDtoH(pa->pFrameBuffer, pDecodedFrame, nv12_size);
oResult = cuvidUnmapVideoFrame(pa->CUDADecoder, pDecodedFrame);
return 1;
}
释放
CUresult cudaRes = CUDA_SUCCESS;
cudaRes = cuvidCtxLockDestroy(m_CtxLock);
cudaRes = cuvidDestroyDecoder(m_CUDADecoder);
cudaRes = cuvidDestroyVideoParser(m_hParser);
cudaRes = cuCtxDestroy(m_cuContext);
m_CtxLock = 0;
m_CUDADecoder = 0;
m_hParser = 0;
m_cuContext = 0;
外部处理
由于目前输出的Pixel Format 为 NV12,在取得解码后的 Raw Data后有两种办法:
(1)用NV12的Render显示
(2)再转换一次 PixelFormat
参考:https://hauhan.wordpress.com/2010/07/17/nvidia-cuda-h-264-decode-%E5%BF%83%E5%BE%97/