之前一篇说了采集视频并实时进行H264编码,没有讲音频的实时编码,本篇将讲一个比较完整的例子,从DirectShow采集视音频,然后实时进行视音频的编码,最后封装成MP4。视频编码还使用之前用过的X264编码器,音频使用FAAC,这也比较常用的音频编码器,可直接在官网下载源码,最后将编码后的H264和AAC封装成MP4。
本篇将在前一篇(5.使用DirectShow进行摄像头采集并进行H264实时编码)的基础上进行,只要增加音频采集、编码以及最后的合成封装。不过视频的采集有和之前不一样的地方就是之前的编码工作直接在回调中进行的,本篇换了一种更好的方式,在回调中把每次过来的帧数据都放在一个array中保存起来,然后开启了一个处理线程,一边采集,一边编码处理,这样做比较合理一点,避免在回调中编码时花费大量时间而可能导致帧丢失问题。回调代码如下:
HRESULT STDMETHODCALLTYPE CSampleGrabberCB::BufferCB(double SampleTime, BYTE *pBuffer, long BufferLen)
{
CString str;
//str.Format(_T("\n BufferCB--lBufferSize:%ld,lWidth:%d,lHeight:%d"), BufferLen, lWidth, lHeight);
//OutputDebugString(str);
//也是开始录制
if (m_bBeginEncode)
{
BYTE * pRgbData = new BYTE[BufferLen];
memcpy(pRgbData, pBuffer, BufferLen);
GrabDataInfo sData;
sData.pData = pRgbData;
sData.nDataSize = BufferLen;
sData.dSampleTime = SampleTime;
//把数据先存到array中
m_mxMsgLog.Lock();
m_arrGrabDataArr.Add(sData);
m_mxMsgLog.Unlock();
str.Format(_T("\n Video--BufferLen:%ld, SampleTime:%f \n"), BufferLen, SampleTime);
OutputDebugString(str);
if (m_bFirst)
{
m_bFirst = FALSE;
CString str;
str.Format(_T("\n Video--SampleTime:%f \n"), SampleTime);
OutputDebugString(str);
//开启线程来处理
AfxBeginThread(VideoDealFunc, this);
}
}
return 0;
}
struct GrabDataInfo
{
BYTE *pData;
int nDataSize;
double dSampleTime;
GrabDataInfo()
{
pData = NULL;
nDataSize = 0;
dSampleTime = 0.0;
};
GrabDataInfo(const GrabDataInfo &other)
{
*this = other;
};
GrabDataInfo& operator = (const GrabDataInfo& other)
{
pData = other.pData;
nDataSize = other.nDataSize;
dSampleTime = other.dSampleTime;
return *this;
};
};
typedef CArray ASGrabDataInfoArray;
然后处理的部分全部放到线程中了,代码如下:
UINT VideoDealFunc(LPVOID lpVoid)
{
CSampleGrabberCB *pManage = (CSampleGrabberCB*)lpVoid;
if (pManage)
{
pManage->VideoDeal();
}
return 0;
}
void CSampleGrabberCB::VideoDeal()
{
//等待音频正式开始的第一个样本时间有了后再比较
while (!theApp.m_IsBegin)
{
Sleep(200);
}
double dSampleTime = theApp.m_nSampleTime;
m_nFrameIndex = 0;
int csp = X264_CSP_I420;
int width = lWidth;
int height = lHeight;
int y_size = width * height;
ULONG nYUVLen = lWidth * lHeight + (lWidth * lHeight)/2;
USES_CONVERSION;
string strFullPath = W2A(m_sSavePath);
m_fp_dst = fopen(strFullPath.c_str(), "wb");
m_pParam = (x264_param_t*)malloc(sizeof(x264_param_t));
//初始化,是对不正确的参数进行修改,并对各结构体参数和cabac编码,预测等需要的参数进行初始化
x264_param_default(m_pParam);
//如果有编码延迟,可以这样设置就能即时编码
x264_param_default_preset(m_pParam, "fast", "zerolatency");
m_pParam->i_width = width;
m_pParam->i_height = height;
m_pParam->i_csp = X264_CSP_I420;
m_pParam->i_fps_num = 30; // 设置帧率
m_pParam->i_fps_den = 1; // 设置帧率
//m_pParam->b_repeat_headers = 1; // 重复SPS/PPS 放到关键帧前面
//设置Profile,这里有5种级别(编码出来的码流规格),级别越高,清晰度越高,耗费资源越大
x264_param_apply_profile(m_pParam, x264_profile_names[1]);
//x264_picture_t存储压缩编码前的像素数据
m_pPic_in = (x264_picture_t*)malloc(sizeof(x264_picture_t));
m_pPic_out = (x264_picture_t*)malloc(sizeof(x264_picture_t));
x264_picture_init(m_pPic_out);
//为图像结构体x264_picture_t分配内存
x264_picture_alloc(m_pPic_in, csp, m_pParam->i_width, m_pParam->i_height);
//打开编码器
m_pHandle = x264_encoder_open(m_pParam);
if(m_pHandle == NULL)//失败就退出
{
free(m_pPic_in);
m_pPic_in = NULL;
free(m_pPic_out);
m_pPic_out = NULL;
free(m_pParam);
m_pParam = NULL;
return;
}
while (1)
{
DWORD dwRet = WaitForSingleObject(m_hMainExitEvent, 5);
if(dwRet == WAIT_OBJECT_0)
{
if (m_arrGrabDataArr.GetSize() <= 0)
{
break;
}
}
m_mxMsgLog.Lock();
int nCount = m_arrGrabDataArr.GetSize();
if(nCount<=0)
{
m_mxMsgLog.Unlock();
continue;
}
GrabDataInfo sDataInfo = m_arrGrabDataArr.GetAt(0);
m_arrGrabDataArr.RemoveAt(0);
m_mxMsgLog.Unlock();
if (sDataInfo.dSampleTime < dSampleTime)
{
delete[] sDataInfo.pData;
continue;
}
//编码
//每一帧大小
BYTE * yuvByte = new BYTE[nYUVLen];
//先把RGB24转为YUV420
RGB2YUV(sDataInfo.pData, lWidth, lHeight, yuvByte, &nYUVLen);
delete[] sDataInfo.pData;
if (m_pPic_in == NULL || m_pPic_out == NULL || m_pHandle == NULL || m_pParam == NULL)
{
continue;
}
int iNal = 0;
//存储压缩编码后的码流数据
x264_nal_t* pNals = NULL;
//注意写的起始位置和大小,前y_size是Y的数据,然后y_size/4是U的数据,最后y_size/4是V的数据
memcpy(m_pPic_in->img.plane[0], yuvByte, y_size); //先写Y
memcpy(m_pPic_in->img.plane[1], yuvByte + y_size, y_size/4); //再写U
memcpy(m_pPic_in->img.plane[2], yuvByte + y_size + y_size/4, y_size/4); //再写V
m_pPic_in->i_pts = m_nFrameIndex++; //时钟
//编码一帧图像,pNals为返回的码流数据,iNal是返回的pNals中的NAL单元的数目
int ret = x264_encoder_encode(m_pHandle, &pNals, &iNal, m_pPic_in, m_pPic_out);
if (ret < 0)
{
OutputDebugString(_T("\n x264_encoder_encode err"));
delete[] yuvByte;
continue;
}
//写入目标文件
for (int j = 0; j < iNal; ++j)
{
fwrite(pNals[j].p_payload, 1, pNals[j].i_payload, m_fp_dst);
}
delete[] yuvByte; //用完要释放
}
int iNal = 0;
//存储压缩编码后的码流数据
x264_nal_t* pNals = NULL;
//把编码器中剩余的码流数据输出
while (1)
{
int ret = x264_encoder_encode(m_pHandle, &pNals, &iNal, NULL, m_pPic_out);
if (ret == 0)
{
break;
}
for (int j = 0; j < iNal; ++j)
{
fwrite(pNals[j].p_payload, 1, pNals[j].i_payload, m_fp_dst);
}
}
//释放内存
x264_picture_clean(m_pPic_in);
//关闭编码器
x264_encoder_close(m_pHandle);
m_pHandle = NULL;
free(m_pPic_in);
m_pPic_in = NULL;
free(m_pPic_out);
m_pPic_out = NULL;
free(m_pParam);
m_pParam = NULL;
//关闭文件
fclose(m_fp_dst);
m_fp_dst = NULL;
m_nFrameIndex = 0;
m_bEndEncode = TRUE;
}
另外,有些机器好像帧率不是固定的,会随着曝光值的变化而变化,录出来的视频可能播放就像快进一样。这种情况解决方案很简单,把采集设备的曝光值设置为手动固定值即可。设置代码如下:
//关闭自动曝光
IAMCameraControl *m_pCtrl;
m_pVideoFilter->QueryInterface(IID_IAMCameraControl, (void **)&m_pCtrl );
m_pCtrl->Set(CameraControl_Exposure, -5, CameraControl_Flags_Manual );
//设置音频抓取数据
m_pAudioGrabberFilter->QueryInterface(IID_ISampleGrabber, (void **)&m_pAudioGrabber);
//获取音频采集源的相关参数
IAMStreamConfig *pAudioConfig = NULL;
m_pCapture->FindInterface(&PIN_CATEGORY_CAPTURE, &MEDIATYPE_Audio,
m_pAudioFilter, IID_IAMStreamConfig, (void **) &pAudioConfig);
AM_MEDIA_TYPE *audiPmt = NULL;
AUDIO_STREAM_CONFIG_CAPS ascc;
pAudioConfig->GetStreamCaps(0, &audiPmt, (BYTE*)&ascc);
WAVEFORMATEX *pVih = (WAVEFORMATEX*)audiPmt->pbFormat;
m_audioCB.m_nChannels = pVih->nChannels;
m_audioCB.m_nSamplesPerSec = pVih->nSamplesPerSec;
m_audioCB.m_wBitsPerSample = pVih->wBitsPerSample;
//audiPmt->cbFormat = sizeof(WAVEFORMATEX);
//audiPmt->pbFormat = (BYTE*)pVih;
audiPmt->majortype = MEDIATYPE_Audio;//MEDIATYPE_Video
audiPmt->subtype = MEDIASUBTYPE_PCM;//MEDIASUBTYPE_RGB24
audiPmt->formattype = FORMAT_WaveFormatEx;//视频采集时没设置这一项
pAudioConfig->SetFormat(audiPmt);
hr = m_pAudioGrabber->SetMediaType(audiPmt);
if(FAILED(hr))
{
AfxMessageBox(_T("Fail to set audio media type!"));
return;
}
m_pAudioGrabber->SetBufferSamples(FALSE);
m_pAudioGrabber->SetOneShot(FALSE);
m_pAudioGrabber->SetCallback(&m_audioCB, 1);
HRESULT STDMETHODCALLTYPE CAudioSampleGrabber::BufferCB(double SampleTime, BYTE *pBuffer, long BufferLen)
{
CString str;
//str.Format(_T("\n Audio--BufferLen:%ld"), BufferLen);
//OutputDebugString(str);
if(m_bBeginEncode)
{
BYTE * pcmData = new BYTE[BufferLen];
memcpy(pcmData, pBuffer, BufferLen);
GrabDataInfo sData;
sData.pData = pcmData;
sData.nDataSize = BufferLen;
sData.dSampleTime = SampleTime;
m_arrAudioDataInfo.Add(sData);
str.Format(_T("\n Audio--BufferLen:%ld, SampleTime:%f \n"), BufferLen, SampleTime);
OutputDebugString(str);
if (m_bFirst)
{
m_bFirst = FALSE;
AfxBeginThread(AudioDealFunc, this);
}
}
return 0;
}
void CAudioSampleGrabber::AudioDeal()
{
USES_CONVERSION;
string strFullPath = W2A(m_sSavePath);
m_fpOut = fopen(strFullPath.c_str(), "wb");
m_hFaacEncHandle = faacEncOpen(m_nSamplesPerSec, m_nChannels, &m_nInputSamples, &m_nMaxOutputBytes);
if(m_hFaacEncHandle == NULL)
{
OutputDebugString(_T("faacEncOpen failed"));
return;
}
//获取配置
m_faacConfigurePtr = faacEncGetCurrentConfiguration(m_hFaacEncHandle);
m_faacConfigurePtr->inputFormat = FAAC_INPUT_16BIT;
// 0 = Raw,1 = ADTS
m_faacConfigurePtr->outputFormat = 1;
m_faacConfigurePtr->aacObjectType = MAIN;
m_faacConfigurePtr->allowMidside = 0;
m_faacConfigurePtr->useLfe = 0;
m_faacConfigurePtr->useTns = 1;
//设置配置
int nRet = faacEncSetConfiguration(m_hFaacEncHandle, m_faacConfigurePtr);
m_bSampleBegin = TRUE;
while (1)
{
DWORD dwRet = WaitForSingleObject(m_hMainExitEvent, 10);
if(dwRet == WAIT_OBJECT_0)
{
if (m_arrAudioDataInfo.GetSize() <= 0)
{
break;
}
}
m_mxMsgLog.Lock();
int nCount = m_arrAudioDataInfo.GetSize();
if(nCount<=0)
{
m_mxMsgLog.Unlock();
continue;
}
GrabDataInfo sDataInfo = m_arrAudioDataInfo.GetAt(0);
m_arrAudioDataInfo.RemoveAt(0);
m_mxMsgLog.Unlock();
if (m_hFaacEncHandle == NULL || m_faacConfigurePtr == NULL)
{
continue;
}
//设定每次能编码的数据大小
int nPCMBufferSize = m_nInputSamples*m_wBitsPerSample / 8;
BYTE* pbPCMBuffer = new BYTE[nPCMBufferSize];
BYTE *pbAACBuffer = new BYTE [m_nMaxOutputBytes];
ULONG ulTotalEncode = 0;
int nTime = 0;
while (1)
{
//pBuffer大小为BufferLen,远大于编码能力nPCMBufferSize,所以这里多分几次编
//每次从pBuffer中取出nPCMBufferSize的大小,直到取完
memcpy(pbPCMBuffer, sDataInfo.pData+ulTotalEncode, nPCMBufferSize);
ulTotalEncode += nPCMBufferSize;
nTime++;
int nRet = faacEncEncode(m_hFaacEncHandle, (int*) pbPCMBuffer, m_nInputSamples, pbAACBuffer, m_nMaxOutputBytes);
if (nRet <= 0) //faac一般需要几个样本缓存,所以相当于丢弃
{
break;
}
//记录正式开始编码的第一个样本时间
if (m_bSampleBegin)
{
m_bSampleBegin = FALSE;
theApp.m_nSampleTime = sDataInfo.dSampleTime; //用来与视频样本同步的时间
theApp.m_IsBegin = TRUE;
}
//写入文件
fwrite(pbAACBuffer, 1, nRet, m_fpOut);
//取到最后一次要注意,大小不是nPCMBufferSize了,而是BufferLen - ulTotalEncode
if (sDataInfo.nDataSize < ulTotalEncode + nPCMBufferSize)
{
int nEndDataSize = sDataInfo.nDataSize - ulTotalEncode;
if (nEndDataSize > 0) //剩余的
{
delete[] pbPCMBuffer;
pbPCMBuffer = new BYTE[nEndDataSize];
memcpy(pbPCMBuffer, sDataInfo.pData+ulTotalEncode, nEndDataSize);
//要修改一下输入采样
int nInputSamples = nEndDataSize / (m_wBitsPerSample/8);
//对剩余的数据编码
nRet = faacEncEncode(m_hFaacEncHandle, (int*)pbPCMBuffer, nInputSamples, pbAACBuffer, m_nMaxOutputBytes);
if (nRet <= 0)
{
break;
}
fwrite(pbAACBuffer, 1, nRet, m_fpOut);
}
break;
}
}
delete [] pbPCMBuffer;
delete [] pbAACBuffer;
delete [] sDataInfo.pData;
}
//最后flush一下编码器中的数据
BYTE *pbAACBuffer = new BYTE [m_nMaxOutputBytes];
while(( nRet=faacEncEncode(m_hFaacEncHandle, NULL, 0, pbAACBuffer,m_nMaxOutputBytes)) > 0)
{
fwrite(pbAACBuffer,1,nRet,m_fpOut);
}
delete [] pbAACBuffer;
faacEncClose(m_hFaacEncHandle);
fclose(m_fpOut);
m_bEndEncode = TRUE;
}
最后是合成封装,将编码后H264和AAC封装成MP4,这个就不多讲了,只要按照MP4的格式规范来写,一般不是问题,当然,最好的方法是用ffmpeg来实现,这里我参考了一下雷霄骅的《最简单的基于FFmpeg的封装格式处理:视音频复用器(muxer)》,雷大神是我非常敬佩的一个程序员,虽然他已离开这个世界,但他的技术博客却永远在这里发着光。具体见下方的工程下载。
最后的界面再次展示一下,
完整工程下载见这里:工程代码下载