2020-08-17

MP4录制学习笔记

1. 时间戳的含义

参见“海思平台的利用mp4v2库合成mp4原理说明”

2. 关键代码

2.1 创建MP4文件

 m_pHandle = MP4Create(strFileName, 0);
 MP4SetTimeScale(m_pHandle, m_nVideoTimeScale);  //m_nVideoTimeScale设置为固定值90000

2.2 H264->MP4

2.2.1 SPS-> MP4

	if (MP4_INVALID_TRACK_ID == m_eVideoId) { //m_eVideoId视频ID
        int nWidth = 0;
        int nHeight = 0;
        int nFrameRate = 25;
        ih264_decode_sps(aSpsData, nDataSize, nWidth, nHeight, nFrameRate);  //aSpsData是SPS数据

        m_eVideoId = MP4AddH264VideoTrack(m_pHandle, 
            m_nVideoTimeScale,              // 一秒钟多少timescale
            MP4_INVALID_DURATION, //m_nVideoTimeScale / nFrameRate    // 每个帧有多少个timescale
            nWidth,                  // width
            nHeight,                 // height
            aSpsData[1],               // sps[1] AVCProfileIndication
            aSpsData[2],               // sps[2] profile_compat
            aSpsData[3],               // sps[3] AVCLevelIndication
            3);                     // 4 bytes length before each NAL unit
        MP4SetVideoProfileLevel(m_pHandle, 0x7F);
    }

    MP4AddH264SequenceParameterSet(m_pHandle, m_eVideoId, aSpsData, nDataSize);

h264_decode_sps()解析出视频图片的宽、高、帧率,代码参见《雷宵骅:simplest libRTMP example》

2.2.2 非SPS帧保存

	const int nNaluSize = nDataSize - nSCPLen;  //nSCPLen是帧起始符长度,这里要求是4字节, 即0x00 0x00 0x00 0x01
    const unsigned char *pNalu = aVideoFrame + nSCPLen;  //pNalu = pBuf+4;  //aVideoFrame 帧缓冲区
    const unsigned char nNaluType = pNalu[0] & RL_NALU_TYPE_MASK;
    const bool isSyncSample = (RL_NALU_TYPE_IDR == nNaluType);

    switch (nNaluType)
    {
    case RL_NALU_TYPE_SPS: //0x07: // SPS
       WriteSPS(pNalu, nNaluSize);
        break;
    
    case RL_NALU_TYPE_PPS: //0x08: // PPS
        MP4AddH264PictureParameterSet(m_pHandle, m_eVideoId, pNalu, nNaluSize);
        break;

    default:
        aVideoFrame[0] = (nNaluSize>>24)&0xFF;    //帧起始符位置填写为帧的长度
        aVideoFrame[1] = (nNaluSize>>16)&0xFF;
        aVideoFrame[2] = (nNaluSize>>8)&0xFF;
        aVideoFrame[3] = (nNaluSize>>0)&0xFF;

        //视频或者音画同步的主要参数是duration, 公式可以是:  "当前帧录制时间-上一针录制时间)*90000/1000";
        const MP4Duration duration = (nTimeStamp - this->m_nLastVideoTimeStamp) * this->m_nVideoTimeScale / 1000;
        m_nLastVideoTimeStamp = nTimeStamp;

        MP4WriteSample(m_pHandle, m_eVideoId, aVideoFrame, nNaluSize + nSCPLen, 
            duration, //MP4_INVALID_DURATION
            0, isSyncSample);

        this->m_nLastVideoTimeStamp = nTimeStamp;
        break;
    }

2.3 AAC->MP4

2.3.1 添加音频轨道

        const int nProfileOffset = 2;
        const uint8_t nProfileMask = 0xC0;
        const uint8_t nSampleRateMask = 0x3C;
        const uint32_t aSmapleRates[] = {96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000};
        //第2个字节的最高2位,profile, the MPEG-4 Audio Object Type minus 1
        uint8_t nProfile = ((aAudioFrame[nProfileOffset] & nProfileMask) >> 6) + 1;
    
        //MPEG-4 Sampling Frequency Index (15 is forbidden)
        uint8_t nSampleRate = (aAudioFrame[nProfileOffset] & nSampleRateMask) >> 2;

        //第3字节的最低位和第4字节的最高2位, MPEG-4 Channel Configuration (in the case of 0, 
        //the channel configuration is sent via an inband PCE)
        uint8_t nChannel = ((aAudioFrame[nProfileOffset] & 0x1) << 2) | ((aAudioFrame[nProfileOffset + 1] & 0xc0) >> 6); 
        /*
        1. timeScale 采样率 16000 32000 44100
        2. sampleDuration  这个参数填写的是每一帧的字节数: sampleDuration * 1000 / timeScale = Duration Time 
        一帧的持续时间。
        3. 参阅:https://www.21ic.com/tougao/article/24188.html
              海思平台的利用mp4v2库合成mp4原理说明
        */
        m_nAudioTimeScale = aSmapleRates[nSampleRate]; //this->m_nVideoTimeScale
        m_eAudioId = MP4AddAudioTrack(m_pHandle, m_nAudioTimeScale, 
            MP4_INVALID_DURATION, //1024
            MP4_MPEG2_AAC_LC_AUDIO_TYPE);  //MP4_MPEG4_AUDIO_TYPE
        MP4SetAudioProfileLevel(m_pHandle, nProfile); //0x2); 

        /*
        aacObjectType(5bits) -- 就是nProfile.
        sampleRateIdx(4bits),
        numChannels(4bits)
        */
        uint8_t aEsConfig[2] = {0};
        aEsConfig[0] = ((nProfile << 3) | (nSampleRate >> 1));
        aEsConfig[1] = ((nSampleRate & 0x01) << 7) | (nChannel << 3);
        printf("MP4 ES config: 0x%02x 0x%02x\n", aEsConfig[0], aEsConfig[1]);
        MP4SetTrackESConfiguration(m_pHandle, m_eAudioId, aEsConfig, sizeof(aEsConfig);

2.3.2 AAC帧->MP4

	const unsigned int nMetaLen = ((aAudioFrame[1] & 0x01) == 1 ? 7 : 9);  //元数据长度, 即ADTS头长度
	const MP4Duration duration = (nTimeStamp - this->m_nLastAudioTimeStamp) * this->m_nAudioTimeScale / 1000;
    m_nLastAudioTimeStamp = nTimeStamp;

    bool bRet = MP4WriteSample(m_pHandle, m_eAudioId, aAudioFrame + nMetaLen, 
        nFrameSize - nMetaLen, 
        duration, //MP4_INVALID_DURATION,
        0, 1);

2.4 关闭MP4文件

MP4Close(m_pHandle, 0);

你可能感兴趣的:(音视频)