AAC ADTS格式分析 + 代码实战

AAC⾳频格式:Advanced Audio Coding(⾼级⾳频解码),是⼀种由MPEG-4
标准定义的有损⾳频压缩格式,由Fraunhofer发展,Dolby, Sony和AT&T是主
要的贡献者

AAC分两种格式:
ADIF(Audio Data Interchange Format)⾳频数据交换格式。
ADTS(Audio Data Transport Stream) 音频数据传输流。
他们之间的主要区别是:ADTS可以在任意帧解码,也就是说它每⼀帧都有头信息。ADIF只有⼀个统⼀的头,所以必须得到所有的数据后解码。

这篇文章主要讲解ADTS这种格式。

AAC⾳频⽂件的每⼀帧由ADTS Header和AAC Audio Data组成,每⼀帧的ADTS的头⽂件都包含了⾳频的采样率,声道,帧⻓度等信息,这样解码器才能解析读取。

ADTS的头信息分两部分:其⼀为固定头信息,紧接着是可变头信息。固定头信息中的数据每⼀帧都相同,⽽可变头信息则在帧与帧之间可变。

ADTS Header的长度可能为7字节或9字节,protection_absent=0时占9个字节,protection_absent=1时占7个字节。

固定头部分说明:

位域 占位数 说明
synword 12 同步头,总是0xFFF,代表一个ADTS帧的开始
ID 1 MPEG标识符,0标识MPEG-4,1标识MPEG-2
Layer 2 总为00
protection_absent 1 表示是否做校验,0表示做校验,1表示不做校验
profile 2 表示使⽤哪个级别的AAC
sampling_frequency_index 4 表示使⽤的采样率下标
private_bit 1 保护位
channel_configuration 3 表示声道数
original_copy 1 原始数据位
home 1 home位

可变头部部分说明:

copyright_identifcation_bit 1 拷贝权限识别位
copyright_identification_start 1 拷贝权限识别开始
aac_frame_length 13 ⼀个ADTS帧的⻓度包括ADTS头和AAC原始流,注意头部可能为7或9个字节大小
adts_buffer_fullness 11 0x7FF 说明是码率可变的码流
number_of_raw_data_blocks_in_frame 2 表示ADTS帧中有number_of_raw_data_blocks_in_frame + 1个AAC原始帧,number_of_raw_data_blocks_in_frame一般都是0

解析aac代码:

#include 
#include 
#include 
#include 

#define ADTS_HEADER_LEN  7;

const int sampling_frequencies[] = {
    96000,  // 0x0
    88200,  // 0x1
    64000,  // 0x2
    48000,  // 0x3
    44100,  // 0x4
    32000,  // 0x5
    24000,  // 0x6
    22050,  // 0x7
    16000,  // 0x8
    12000,  // 0x9
    11025,  // 0xa
    8000   // 0xb
    // 0xc d e f是保留的
};

int adts_header(char * const p_adts_header, const int data_length,
                const int profile, const int samplerate,
                const int channels)
{

    int sampling_frequency_index = 3; // 默认使用48000hz
    int adtsLen = data_length + 7;

    int frequencies_size = sizeof(sampling_frequencies) / sizeof(sampling_frequencies[0]);
    int i = 0;
    for(i = 0; i < frequencies_size; i++)
    {
        if(sampling_frequencies[i] == samplerate)
        {
            sampling_frequency_index = i;
            break;
        }
    }
    if(i >= frequencies_size)
    {
        printf("unsupport samplerate:%d\n", samplerate);
        return -1;
    }

    p_adts_header[0] = 0xff;         //syncword:0xfff                          高8bits
    p_adts_header[1] = 0xf0;         //syncword:0xfff                          低4bits
    p_adts_header[1] |= (0 << 3);    //MPEG Version:0 for MPEG-4,1 for MPEG-2  1bit
    p_adts_header[1] |= (0 << 1);    //Layer:0                                 2bits
    p_adts_header[1] |= 1;           //protection absent:1                     1bit

    p_adts_header[2] = (profile)<<6;            //profile:profile               2bits
    p_adts_header[2] |= (sampling_frequency_index & 0x0f)<<2; //sampling frequency index:sampling_frequency_index  4bits
    p_adts_header[2] |= (0 << 1);             //private bit:0                   1bit
    p_adts_header[2] |= (channels & 0x04)>>2; //channel configuration:channels  高1bit

    p_adts_header[3] = (channels & 0x03)<<6; //channel configuration:channels 低2bits
    p_adts_header[3] |= (0 << 5);               //original:0                1bit
    p_adts_header[3] |= (0 << 4);               //home:0                    1bit
    p_adts_header[3] |= (0 << 3);               //copyright id bit:0        1bit
    p_adts_header[3] |= (0 << 2);               //copyright id start:0      1bit
    p_adts_header[3] |= ((adtsLen & 0x1800) >> 11);           //frame length:value   高2bits

    p_adts_header[4] = (uint8_t)((adtsLen & 0x7f8) >> 3);     //frame length:value    中间8bits
    p_adts_header[5] = (uint8_t)((adtsLen & 0x7) << 5);       //frame length:value    低3bits
    p_adts_header[5] |= 0x1f;                                 //buffer fullness:0x7ff 高5bits
    p_adts_header[6] = 0xfc;      //11111100       //buffer fullness:0x7ff 低6bits
    // number_of_raw_data_blocks_in_frame:
    //    表示ADTS帧中有number_of_raw_data_blocks_in_frame + 1个AAC原始帧。

    return 0;
}

int main(int argc, char *argv[])
{
    int ret = -1;
    char errors[1024];

    char *in_filename = NULL;
    char *aac_filename = NULL;

    FILE *aac_fd = NULL;

    int audio_index = -1;
    int len = 0;


    AVFormatContext *ifmt_ctx = NULL;
    AVPacket pkt;

    // 设置打印级别
    av_log_set_level(AV_LOG_DEBUG);

    if(argc < 3)
    {
        av_log(NULL, AV_LOG_DEBUG, "the count of parameters should be more than three!\n");
        return -1;
    }

    in_filename = argv[1];      // 输入文件
    aac_filename = argv[2];     // 输出文件

    if(in_filename == NULL || aac_filename == NULL)
    {
        av_log(NULL, AV_LOG_DEBUG, "src or dts file is null, plz check them!\n");
        return -1;
    }

    aac_fd = fopen(aac_filename, "wb");
    if (!aac_fd)
    {
        av_log(NULL, AV_LOG_DEBUG, "Could not open destination file %s\n", aac_filename);
        return -1;
    }

    // 打开输入文件
    if((ret = avformat_open_input(&ifmt_ctx, in_filename, NULL, NULL)) < 0)
    {
        av_strerror(ret, errors, 1024);
        av_log(NULL, AV_LOG_DEBUG, "Could not open source file: %s, %d(%s)\n",
               in_filename,
               ret,
               errors);
        return -1;
    }

    // 获取解码器信息
    if((ret = avformat_find_stream_info(ifmt_ctx, NULL)) < 0)
    {
        av_strerror(ret, errors, 1024);
        av_log(NULL, AV_LOG_DEBUG, "failed to find stream information: %s, %d(%s)\n",
               in_filename,
               ret,
               errors);
        return -1;
    }

    // dump媒体信息
    av_dump_format(ifmt_ctx, 0, in_filename, 0);

    // 初始化packet
    av_init_packet(&pkt);

    // 查找audio对应的steam index
    audio_index = av_find_best_stream(ifmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
    if(audio_index < 0)
    {
        av_log(NULL, AV_LOG_DEBUG, "Could not find %s stream in input file %s\n",
               av_get_media_type_string(AVMEDIA_TYPE_AUDIO),
               in_filename);
        return AVERROR(EINVAL);
    }

    // 打印AAC级别
    printf("audio profile:%d, FF_PROFILE_AAC_LOW:%d\n",
           ifmt_ctx->streams[audio_index]->codecpar->profile,
           FF_PROFILE_AAC_LOW);

    if(ifmt_ctx->streams[audio_index]->codecpar->codec_id != AV_CODEC_ID_AAC)
    {
        printf("the media file no contain AAC stream, it's codec_id is %d\n",
               ifmt_ctx->streams[audio_index]->codecpar->codec_id);
        goto failed;
    }
    // 读取媒体文件,并把aac数据帧写入到本地文件
    while(av_read_frame(ifmt_ctx, &pkt) >=0 )
    {
        if(pkt.stream_index == audio_index)
        {
            char adts_header_buf[7] = {0};
            adts_header(adts_header_buf, pkt.size,
                        ifmt_ctx->streams[audio_index]->codecpar->profile,
                        ifmt_ctx->streams[audio_index]->codecpar->sample_rate,
                        ifmt_ctx->streams[audio_index]->codecpar->channels);
            fwrite(adts_header_buf, 1, 7, aac_fd);  // 写adts header , ts流不适用,ts流分离出来的packet带了adts header
            len = fwrite( pkt.data, 1, pkt.size, aac_fd);   // 写adts data
            if(len != pkt.size)
            {
                av_log(NULL, AV_LOG_DEBUG, "warning, length of writed data isn't equal pkt.size(%d, %d)\n",
                       len,
                       pkt.size);
            }
        }
        av_packet_unref(&pkt);
    }

failed:
    // 关闭输入文件
    if(ifmt_ctx)
    {
        avformat_close_input(&ifmt_ctx);
    }
    if(aac_fd)
    {
        fclose(aac_fd);
    }

    return 0;
}

输入:

believe.flv out.aac

输出:

[NULL @ 00dd8540] Opening 'believe.flv' for reading
[file @ 00dc3680] Setting default whitelist 'file,crypto'
[flv @ 00dd8540] Format flv probed with size=2048 and score=100
[flv @ 00dd8540] Before avformat_find_stream_info() pos: 13 bytes read:32768 seeks:0 nb_streams:0
[NULL @ 00dd4100] nal_unit_type: 7(SPS), nal_ref_idc: 3
[NULL @ 00dd4100] nal_unit_type: 8(PPS), nal_ref_idc: 3
[h264 @ 00dd4100] nal_unit_type: 7(SPS), nal_ref_idc: 3
[h264 @ 00dd4100] nal_unit_type: 8(PPS), nal_ref_idc: 3
[h264 @ 00dd4100] nal_unit_type: 6(SEI), nal_ref_idc: 0
[h264 @ 00dd4100] nal_unit_type: 5(IDR), nal_ref_idc: 3
[h264 @ 00dd4100] nal_unit_type: 5(IDR), nal_ref_idc: 3
[h264 @ 00dd4100] nal_unit_type: 5(IDR), nal_ref_idc: 3
[h264 @ 00dd4100] nal_unit_type: 5(IDR), nal_ref_idc: 3
[h264 @ 00dd4100] nal_unit_type: 5(IDR), nal_ref_idc: 3
[h264 @ 00dd4100] nal_unit_type: 5(IDR), nal_ref_idc: 3
[h264 @ 00dd4100] nal_unit_type: 5(IDR), nal_ref_idc: 3
[h264 @ 00dd4100] nal_unit_type: 5(IDR), nal_ref_idc: 3
[h264 @ 00dd4100] Format yuv420p chosen by get_format().
[h264 @ 00dd4100] Reinit context to 1920x1088, pix_fmt: yuv420p
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[h264 @ 00dd4100] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2
[flv @ 00dd8540] All info found
[flv @ 00dd8540] rfps: 14.833333 0.017658
[flv @ 00dd8540] rfps: 14.916667 0.004437
[flv @ 00dd8540] rfps: 15.000000 0.000018
[flv @ 00dd8540] rfps: 15.000000 0.000017
[flv @ 00dd8540] rfps: 15.083333 0.004399
[flv @ 00dd8540] rfps: 15.083333 0.004399
[flv @ 00dd8540] rfps: 15.166667 0.017581
[flv @ 00dd8540] rfps: 29.833333 0.017749
[flv @ 00dd8540] rfps: 29.916667 0.004509
[flv @ 00dd8540] rfps: 29.916667 0.004509
[flv @ 00dd8540] rfps: 30.000000 0.000070
[flv @ 00dd8540] rfps: 30.000000 0.000070
[flv @ 00dd8540] rfps: 45.000000 0.000157
[flv @ 00dd8540] rfps: 45.000000 0.000157
[flv @ 00dd8540] rfps: 60.000000 0.000280
[flv @ 00dd8540] rfps: 60.000000 0.000280
[flv @ 00dd8540] rfps: 120.000000 0.001120
[flv @ 00dd8540] rfps: 120.000000 0.001120
[flv @ 00dd8540] rfps: 240.000000 0.004480
[flv @ 00dd8540] rfps: 240.000000 0.004480
[flv @ 00dd8540] rfps: 29.970030 0.000653
[flv @ 00dd8540] rfps: 29.970030 0.000653
[flv @ 00dd8540] rfps: 59.940060 0.002612
[flv @ 00dd8540] rfps: 59.940060 0.002612
[flv @ 00dd8540] rfps: 14.985015 0.000163
[flv @ 00dd8540] rfps: 14.985015 0.000163
[flv @ 00dd8540] After avformat_find_stream_info() pos: 177501 bytes read:198684 seeks:0 frames:170
Input #0, flv, from 'believe.flv':
  Metadata:
    major_brand     : isom
    minor_version   : 512
    compatible_brands: isomiso2avc1mp41
    comment         : www.ieway.cn
    encoder         : Lavf58.29.100
  Duration: 00:03:42.53, start: 0.000000, bitrate: 286 kb/s
    Stream #0:0, 41, 1/1000: Video: h264 (Constrained Baseline), 1 reference frame, yuv420p(progressive, left), 1920x1080 (1920x1088), 0/1, 150 kb/s, 14.46 fps, 15 tbr, 1k tbn, 30 tbc
    Stream #0:1, 129, 1/1000: Audio: aac (LC), 48000 Hz, stereo, fltp, 128 kb/s
audio profile:1, FF_PROFILE_AAC_LOW:1
[AVIOContext @ 00dca700] Statistics: 7972715 bytes read, 0 seeks
Press  to close this window...

你可能感兴趣的:(AAC ADTS格式分析 + 代码实战)