解决方案:dshow拉H264流+h264_qsv硬解码+d3d渲染
最近发现FFmpeg不支持从USB摄像头拉H264,只能拉到MJPEG流。而MJPEG流又不能用来推流,目测只能用H264推流。所以下面直接用DirectShow拉H264流。
dshow的用法可以参考msdn和amcap源码。
这里我创建了个CaptureWorker类,继承ISampleGrabberCB。这样就能在回调函数BufferCB中拿到H264数据包。
class CaptureVideo : public QObject, public ISampleGrabberCB
{
Q_OBJECT
public:
CaptureVideo(QObject *parent = nullptr);
~CaptureVideo(void);
STDMETHODIMP_(ULONG) AddRef() override { return 2; }
STDMETHODIMP_(ULONG) Release() override { return 1; }
STDMETHODIMP QueryInterface(REFIID riid, void ** ppv) override {
if (riid == IID_ISampleGrabberCB || riid == IID_IUnknown) {
*ppv = (void *) static_cast (this);
return NOERROR;
}
return E_NOINTERFACE;
}
STDMETHODIMP SampleCB(double SampleTime, IMediaSample * pSample) override;
STDMETHODIMP BufferCB(double dblSampleTime, BYTE * pBuffer, long lBufferSize) override;
HRESULT OpenDevice(IMoniker *pMoniker);
void Close(); // close all interface
HRESULT ExtensionCtrl(int type, ULONG flag, byte* data, ULONG length);
IMoniker *GetMoniker();
HRESULT StartPreview();
HRESULT StopPreview();
void StartRecord();
void StopRecord();
HRESULT RequestKeyFrame(BYTE byStream);
HRESULT OpenStream(BYTE byStream);
HRESULT CloseStream(BYTE byStream);
HRESULT SetIDR(BYTE byStream, WORD wIDR);
HRESULT SetBitrate(BYTE byStream, BYTE byRCMode, BYTE byMinQP, BYTE byMaxQP, UINT nBitrate);
HRESULT SetProfile(BYTE byStream, BYTE byProfile, BYTE byFrameRateMode);
HRESULT SetVideoSize(BYTE byStream, WORD wWidth, WORD wHeight);
HRESULT SetFrameRate(BYTE byStream, BYTE byFrameRate);
HRESULT SetEncodeFormate(BYTE bySteam, BYTE byEncode);
bool initDecoder();
bool yuv2Rgb(uchar *out, int outWidth, int outHeigh);
bool initD3D_NV12(HWND hwnd, int img_width, int img_height);
bool initD3D_YUVJ420P(HWND hwnd, int img_width, int img_height);
bool setParams();
private:
HRESULT InitializeEnv(); // initialize environment
HRESULT BindCaptureFilter();
void TearDownGraph();
HRESULT BuildPreviewGraph();
HRESULT BuildPreviewGraphEx();
HRESULT InitSampleGrabber();
signals:
void updateImage(QPixmap img);
void finished();
public slots:
void process();
private:
IGraphBuilder *m_pGraphBuilder; // filter graph manager
ICaptureGraphBuilder2 *m_pCaptureGraphBuilder;
IMediaControl *m_pMediaControl; // 控制filter graph中的数据流
IBaseFilter *m_pVCap;
IKsControl *m_pKsCtrl;
IMediaEventEx *m_pME;
IMoniker *m_pMoniker; // 设备别名
IBaseFilter* m_pGrabberFilter;
ISampleGrabber *m_pSampleGrabber;
IPin *m_pPinOutCapture;
IPin *m_pPinInGrabber;
//IPin *m_pPinOutGrabber;
//#ifdef __ENABLE_RECORD__
//CFile m_fileRecorder;
BOOL m_bFileOpen;
BOOL m_bFirstKeyFrame;
//#endif // __ENABLE_RECORD__
bool m_bRecord;
AVCodecContext *m_vDecodeCtx = nullptr;
AVFrame *m_yuvFrame = nullptr;
AVFrame *m_rgbFrame = nullptr;
uint8_t * m_rgbFrameBuf = nullptr;
SwsContext *m_swsCtx = nullptr;
int m_dstWinWidth; // 目标窗口宽度
int m_dstWinHeight; // 目标窗口高度
CD3DVidRender m_d3d;
public:
bool m_bPreview;
};
/* 数据回调 */
STDMETHODIMP CaptureVideo::BufferCB(double dblSampleTime, BYTE * pBuffer, long lBufferSize) {
#ifdef __ENABLE_RECORD__
//if (m_bFileOpen)
//{
// m_fileRecorder.Write(pBuffer, lBufferSize);
//}
#endif // __ENABLE_RECORD__
qDebug() << QString("dblSampleTime: %1, lBufferSize: %2").arg(dblSampleTime).arg(lBufferSize);
//return 0;
static int s_dropFrame = 2;
if (s_dropFrame > 0)
{
--s_dropFrame;
return 0;
}
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = pBuffer;
pkt.size = lBufferSize;
pkt.stream_index = 0;
QTime t = QTime::currentTime();
int re = 1;
//while (re)
//{
re = avcodec_send_packet(m_vDecodeCtx, &pkt);
if (0 != re) // 如果发送失败
{
qDebug() << QString("avcodec_send_packet failed, %1, pkt.size: %2").arg(parseError(re)).arg(lBufferSize);
return 0;
}
re = avcodec_receive_frame(m_vDecodeCtx, m_yuvFrame);
if (0 != re) // 如果解码失败
{
qDebug() << QString("avcodec_receive_frame failed, %1, pkt.size: %2").arg(parseError(re)).arg(lBufferSize);
//if (AVERROR(EAGAIN) != re)
//{
return 0;
//}
}
//}
static bool s_singleshot = false;
if (!s_singleshot)
{
s_singleshot = true;
//avcodec_flush_buffers(m_vDecodeCtx);
}
qDebug() << QStringLiteral("解码耗时:%1,frame->pkt_size: %2").arg(t.elapsed()).arg(m_yuvFrame->pkt_size);
//av_hwframe_transfer_data
return 0;
}
然后将pBuffer和lBufferSize填入AVPacket,拿到AVPakcet后一面可以直接拿去推流,另一面可以解码得到AVFrame。
实践发现,FFmpeg软解码(编码也是)在低配机器(主要是CPU)跑太耗时间和CPU占用率了,特别是解码1080P,解码一帧可能要几十甚至上百毫秒,这就知道导致非常高的延迟了。要保证FPS是30,整个解码显示的过程不能超过33.33毫秒。使用GPU硬解码,即使是低配机器也能保证1080p解码耗时低。
可以通过以下命令查看ffmpeg支持的h264硬解码器
ffmpeg -codecs | findstr "h264"
(decoders: h264 h264_qsv h264_cuvid ) (encoders: libx264 libx264rgb h264_amf h264_nvenc h264_qsv nvenc nvenc_h264 )
intel GPU用h264_qsv,nvidia GPU用h264_cuvid
FFmpeg硬解码代码和软解码差不到,主要区别是查找解码器:
decoder = avcodec_find_decoder_by_name("h264_qsv");
一般硬解码出来的帧好像都是NV12格式
解码得到的帧,如果是硬解码是NV12格式,软解码一般是YUV格式,目前我的USB摄像头是YUVJ420P格式。但是Qt的QImage只支持RGB格式,所以要通过sws_scale转换像素格式。
同样FFmpeg的sws_scale这个API也是非常占用CPU资源的,刚开始我用libyuv代替sws_scale,libyuv耗时会低一点,但是CPU资源占用和sws_scale差不到,我的电脑跑release版本都占用10%多的CPU,这在低配机器就可定不止了。所以后面我改用d3d直接渲染nv12帧,这样就不需要经过像素格式转换,节省CPU资源。
调用方法很简单,InitD3D_NV12初始化,传一个QLabel的句柄和宽高,Render_NV12传AVFrame->data数据。
BOOL CD3DVidRender::InitD3D_NV12(HWND hwnd, int img_width, int img_height)
{
m_nColor = 1;
HRESULT lRet;
if (m_pDirect3D9)
{
m_pDirect3D9->Release();
m_pDirect3D9 = NULL;
}
m_pDirect3D9 = Direct3DCreate9( D3D_SDK_VERSION );
if( m_pDirect3D9 == NULL )
{
return FALSE;
}
ZeroMemory( &d3dpp, sizeof(d3dpp) );
d3dpp.Windowed = TRUE;
d3dpp.hDeviceWindow = hwnd;
d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD;
d3dpp.BackBufferFormat = D3DFMT_UNKNOWN;
d3dpp.EnableAutoDepthStencil = FALSE;
d3dpp.Flags = D3DPRESENTFLAG_VIDEO;
d3dpp.FullScreen_RefreshRateInHz = D3DPRESENT_RATE_DEFAULT;
d3dpp.PresentationInterval = D3DPRESENT_INTERVAL_IMMEDIATE;
D3DCAPS9 caps;
DWORD BehaviorFlags = D3DCREATE_SOFTWARE_VERTEXPROCESSING | D3DCREATE_MULTITHREADED;
HRESULT hr = m_pDirect3D9->GetDeviceCaps ( D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, &caps ) ;
if ( SUCCEEDED(hr) )
{
if ( caps.DevCaps & D3DDEVCAPS_HWTRANSFORMANDLIGHT )
{
BehaviorFlags = D3DCREATE_HARDWARE_VERTEXPROCESSING | D3DCREATE_MULTITHREADED | D3DCREATE_FPU_PRESERVE ;
}
else
{
BehaviorFlags = D3DCREATE_SOFTWARE_VERTEXPROCESSING | D3DCREATE_MULTITHREADED | D3DCREATE_FPU_PRESERVE;
}
}
if (m_pDirect3DDevice)
{
m_pDirect3DDevice->Release();
m_pDirect3DDevice = NULL;
}
lRet = m_pDirect3D9->CreateDevice( D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, hwnd, BehaviorFlags, &d3dpp, &m_pDirect3DDevice );
if(FAILED(lRet))
{
return FALSE;
}
if (m_pDirect3DSurfaceRender)
{
m_pDirect3DSurfaceRender->Release();
m_pDirect3DSurfaceRender = NULL;
}
lRet = m_pDirect3DDevice->CreateOffscreenPlainSurface( img_width, img_height, (D3DFORMAT)MAKEFOURCC('N', 'V', '1', '2'), D3DPOOL_DEFAULT, &m_pDirect3DSurfaceRender, NULL);
if(FAILED(lRet))
{
return FALSE;
}
return TRUE;
}
BOOL CD3DVidRender::Render_NV12(unsigned char * pdata, int img_width, int img_height)
{
HRESULT lRet = -1;
D3DLOCKED_RECT d3d_rect;
lRet = m_pDirect3DSurfaceRender->LockRect(&d3d_rect, NULL, D3DLOCK_DONOTWAIT);
if(FAILED(lRet))
{
return FALSE;
}
byte *pSrc = pdata;
byte * pDest = (BYTE *)d3d_rect.pBits;
int stride = d3d_rect.Pitch;
unsigned long i = 0;
for(i = 0; i < img_height; i ++)
{
memmove(pDest + i * stride,pSrc + i * img_width, img_width);
}
for (i = 0; i < img_height/2; i++)
{
memmove(pDest + stride*img_height + i * stride, pSrc + img_width*img_height + i * img_width, img_width);
}
lRet = m_pDirect3DSurfaceRender->UnlockRect();
if(FAILED(lRet))
{
return FALSE;
}
if (!m_pDirect3DDevice)
return false;
m_pDirect3DDevice->Clear( 0, NULL, D3DCLEAR_TARGET, D3DCOLOR_XRGB(0,0,0), 1.0f, 0 );
m_pDirect3DDevice->BeginScene();
if (m_pBackBuffer)
{
m_pBackBuffer->Release();
m_pBackBuffer = NULL;
}
m_pDirect3DDevice->GetBackBuffer(0, 0, D3DBACKBUFFER_TYPE_MONO, &m_pBackBuffer);
GetClientRect(d3dpp.hDeviceWindow, &m_rtViewport);
m_pDirect3DDevice->StretchRect(m_pDirect3DSurfaceRender, NULL, m_pBackBuffer, &m_rtViewport, D3DTEXF_LINEAR);
m_pDirect3DDevice->EndScene();
m_pDirect3DDevice->Present( NULL, NULL, NULL, NULL );
RECT rcCurrentClient;
GetClientRect(d3dpp.hDeviceWindow, &rcCurrentClient);
if (rcCurrentClient.bottom>0 && rcCurrentClient.right>0 && ((UINT)rcCurrentClient.right != d3dpp.BackBufferWidth ||
( UINT )rcCurrentClient.bottom != d3dpp.BackBufferHeight ))
{
d3dpp.BackBufferWidth = rcCurrentClient.right;
d3dpp.BackBufferHeight = rcCurrentClient.bottom;
InitD3D_NV12(d3dpp.hDeviceWindow, img_width, img_height);
}
if (m_pDirect3DDevice && m_pDirect3DDevice->TestCooperativeLevel() == D3DERR_DEVICENOTRESET)
{
InitD3D_NV12(d3dpp.hDeviceWindow, img_width, img_height);
}
return TRUE;
}
将Qt控件交给native渲染要添加以下两个标志:
// 避免闪屏
ui.imgLbl->setUpdatesEnabled(false);
// 避免程序无响应(鼠标指针转圈)
ui.imgLbl->setAttribute(Qt::WA_NativeWindow, true);
源码