参见“海思平台的利用mp4v2库合成mp4原理说明”
m_pHandle = MP4Create(strFileName, 0);
MP4SetTimeScale(m_pHandle, m_nVideoTimeScale); //m_nVideoTimeScale设置为固定值90000
if (MP4_INVALID_TRACK_ID == m_eVideoId) { //m_eVideoId视频ID
int nWidth = 0;
int nHeight = 0;
int nFrameRate = 25;
ih264_decode_sps(aSpsData, nDataSize, nWidth, nHeight, nFrameRate); //aSpsData是SPS数据
m_eVideoId = MP4AddH264VideoTrack(m_pHandle,
m_nVideoTimeScale, // 一秒钟多少timescale
MP4_INVALID_DURATION, //m_nVideoTimeScale / nFrameRate // 每个帧有多少个timescale
nWidth, // width
nHeight, // height
aSpsData[1], // sps[1] AVCProfileIndication
aSpsData[2], // sps[2] profile_compat
aSpsData[3], // sps[3] AVCLevelIndication
3); // 4 bytes length before each NAL unit
MP4SetVideoProfileLevel(m_pHandle, 0x7F);
}
MP4AddH264SequenceParameterSet(m_pHandle, m_eVideoId, aSpsData, nDataSize);
h264_decode_sps()解析出视频图片的宽、高、帧率,代码参见《雷宵骅:simplest libRTMP example》
const int nNaluSize = nDataSize - nSCPLen; //nSCPLen是帧起始符长度,这里要求是4字节, 即0x00 0x00 0x00 0x01
const unsigned char *pNalu = aVideoFrame + nSCPLen; //pNalu = pBuf+4; //aVideoFrame 帧缓冲区
const unsigned char nNaluType = pNalu[0] & RL_NALU_TYPE_MASK;
const bool isSyncSample = (RL_NALU_TYPE_IDR == nNaluType);
switch (nNaluType)
{
case RL_NALU_TYPE_SPS: //0x07: // SPS
WriteSPS(pNalu, nNaluSize);
break;
case RL_NALU_TYPE_PPS: //0x08: // PPS
MP4AddH264PictureParameterSet(m_pHandle, m_eVideoId, pNalu, nNaluSize);
break;
default:
aVideoFrame[0] = (nNaluSize>>24)&0xFF; //帧起始符位置填写为帧的长度
aVideoFrame[1] = (nNaluSize>>16)&0xFF;
aVideoFrame[2] = (nNaluSize>>8)&0xFF;
aVideoFrame[3] = (nNaluSize>>0)&0xFF;
//视频或者音画同步的主要参数是duration, 公式可以是: "当前帧录制时间-上一针录制时间)*90000/1000";
const MP4Duration duration = (nTimeStamp - this->m_nLastVideoTimeStamp) * this->m_nVideoTimeScale / 1000;
m_nLastVideoTimeStamp = nTimeStamp;
MP4WriteSample(m_pHandle, m_eVideoId, aVideoFrame, nNaluSize + nSCPLen,
duration, //MP4_INVALID_DURATION
0, isSyncSample);
this->m_nLastVideoTimeStamp = nTimeStamp;
break;
}
const int nProfileOffset = 2;
const uint8_t nProfileMask = 0xC0;
const uint8_t nSampleRateMask = 0x3C;
const uint32_t aSmapleRates[] = {96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000};
//第2个字节的最高2位,profile, the MPEG-4 Audio Object Type minus 1
uint8_t nProfile = ((aAudioFrame[nProfileOffset] & nProfileMask) >> 6) + 1;
//MPEG-4 Sampling Frequency Index (15 is forbidden)
uint8_t nSampleRate = (aAudioFrame[nProfileOffset] & nSampleRateMask) >> 2;
//第3字节的最低位和第4字节的最高2位, MPEG-4 Channel Configuration (in the case of 0,
//the channel configuration is sent via an inband PCE)
uint8_t nChannel = ((aAudioFrame[nProfileOffset] & 0x1) << 2) | ((aAudioFrame[nProfileOffset + 1] & 0xc0) >> 6);
/*
1. timeScale 采样率 16000 32000 44100
2. sampleDuration 这个参数填写的是每一帧的字节数: sampleDuration * 1000 / timeScale = Duration Time
一帧的持续时间。
3. 参阅:https://www.21ic.com/tougao/article/24188.html
海思平台的利用mp4v2库合成mp4原理说明
*/
m_nAudioTimeScale = aSmapleRates[nSampleRate]; //this->m_nVideoTimeScale
m_eAudioId = MP4AddAudioTrack(m_pHandle, m_nAudioTimeScale,
MP4_INVALID_DURATION, //1024
MP4_MPEG2_AAC_LC_AUDIO_TYPE); //MP4_MPEG4_AUDIO_TYPE
MP4SetAudioProfileLevel(m_pHandle, nProfile); //0x2);
/*
aacObjectType(5bits) -- 就是nProfile.
sampleRateIdx(4bits),
numChannels(4bits)
*/
uint8_t aEsConfig[2] = {0};
aEsConfig[0] = ((nProfile << 3) | (nSampleRate >> 1));
aEsConfig[1] = ((nSampleRate & 0x01) << 7) | (nChannel << 3);
printf("MP4 ES config: 0x%02x 0x%02x\n", aEsConfig[0], aEsConfig[1]);
MP4SetTrackESConfiguration(m_pHandle, m_eAudioId, aEsConfig, sizeof(aEsConfig);
const unsigned int nMetaLen = ((aAudioFrame[1] & 0x01) == 1 ? 7 : 9); //元数据长度, 即ADTS头长度
const MP4Duration duration = (nTimeStamp - this->m_nLastAudioTimeStamp) * this->m_nAudioTimeScale / 1000;
m_nLastAudioTimeStamp = nTimeStamp;
bool bRet = MP4WriteSample(m_pHandle, m_eAudioId, aAudioFrame + nMetaLen,
nFrameSize - nMetaLen,
duration, //MP4_INVALID_DURATION,
0, 1);
MP4Close(m_pHandle, 0);