之前已知在mp4格式中,aac的extradata是以AudioSpecificConfig的形式存在的。搜索到的相关资料
https://stackoverflow.com/questions/3987850/mp4-atom-how-to-discriminate-the-audio-codec-is-it-aac-or-mp3
说是 esds atom 中的ASC就是AudioSpecificConfig。对应ffmpeg的代码,看ffmpeg是如何写入esds的
/libavformat/movenc.c
static int mov_write_esds_tag(AVIOContext *pb, MOVTrack *track) // Basic
{
struct mpeg4_bit_rate_values bit_rates = calculate_mpeg4_bit_rates(track);
int64_t pos = avio_tell(pb);
int decoder_specific_info_len = track->vos_len ? 5 + track->vos_len : 0;
avio_wb32(pb, 0); // size
ffio_wfourcc(pb, "esds");
avio_wb32(pb, 0); // Version
// ES descriptor
put_descr(pb, 0x03, 3 + 5+13 + decoder_specific_info_len + 5+1);
avio_wb16(pb, track->track_id);
avio_w8(pb, 0x00); // flags (= no flags)
// DecoderConfig descriptor
put_descr(pb, 0x04, 13 + decoder_specific_info_len);
// Object type indication
if ((track->par->codec_id == AV_CODEC_ID_MP2 ||
track->par->codec_id == AV_CODEC_ID_MP3) &&
track->par->sample_rate > 24000)
avio_w8(pb, 0x6B); // 11172-3
else
avio_w8(pb, ff_codec_get_tag(ff_mp4_obj_type, track->par->codec_id));
// the following fields is made of 6 bits to identify the streamtype (4 for video, 5 for audio)
// plus 1 bit to indicate upstream and 1 bit set to 1 (reserved)
if (track->par->codec_id == AV_CODEC_ID_DVD_SUBTITLE)
avio_w8(pb, (0x38 << 2) | 1); // flags (= NeroSubpicStream)
else if (track->par->codec_type == AVMEDIA_TYPE_AUDIO)
avio_w8(pb, 0x15); // flags (= Audiostream)
else
avio_w8(pb, 0x11); // flags (= Visualstream)
avio_wb24(pb, bit_rates.buffer_size); // Buffersize DB
avio_wb32(pb, bit_rates.max_bit_rate); // maxbitrate
avio_wb32(pb, bit_rates.avg_bit_rate);
if (track->vos_len) {
// DecoderSpecific info descriptor
put_descr(pb, 0x05, track->vos_len);
avio_write(pb, track->vos_data, track->vos_len);
}
// SL descriptor
put_descr(pb, 0x06, 1);
avio_w8(pb, 0x02);
return update_size(pb, pos);
}
从代码可以大概看出 track->vos_data 是 AudioSpecificConfig。 mov_write_esds_tag 的调用链条为
mov_write_trailer -->
mov_write_moov_tag -->
mov_write_trak_tag -->
mov_write_mdia_tag -->
mov_write_minf_tag -->
mov_write_stbl_tag -->
mov_write_stsd_tag -->
mov_write_audio_tag -->
mov_write_wave_tag -->
mov_write_esds_tag
不得不吐槽一下,mp4格式的套娃现象也非常严重。继续看track->vos_data是怎么来的
static int mov_write_header(AVFormatContext *s)
{
...
for (i = 0; i < s->nb_streams; i++) {
int j;
AVStream *st= s->streams[i];
MOVTrack *track= &mov->tracks[i];
/* copy extradata if it exists */
if (st->codecpar->extradata_size) {
if (st->codecpar->codec_id == AV_CODEC_ID_DVD_SUBTITLE)
mov_create_dvd_sub_decoder_specific_info(track, st);
else if (!TAG_IS_AVCI(track->tag) && st->codecpar->codec_id != AV_CODEC_ID_DNXHD) {
track->vos_len = st->codecpar->extradata_size;
track->vos_data = av_malloc(track->vos_len + AV_INPUT_BUFFER_PADDING_SIZE);
if (!track->vos_data) {
return AVERROR(ENOMEM);
}
memcpy(track->vos_data, st->codecpar->extradata, track->vos_len);
memset(track->vos_data + track->vos_len, 0, AV_INPUT_BUFFER_PADDING_SIZE);
}
}
可见vos_data就是codec的extradata。顺便看一下mov解复用的时候是怎么处理esds tag的
libavformat/mov_esds.c
int ff_mov_read_esds(AVFormatContext *fc, AVIOContext *pb)
{
AVStream *st;
int tag, ret = 0;
if (fc->nb_streams < 1)
return 0;
st = fc->streams[fc->nb_streams-1];
avio_rb32(pb); /* version + flags */
ff_mp4_read_descr(fc, pb, &tag);
if (tag == MP4ESDescrTag) {
ff_mp4_parse_es_descr(pb, NULL);
} else
avio_rb16(pb); /* ID */
ff_mp4_read_descr(fc, pb, &tag);
if (tag == MP4DecConfigDescrTag)
ret = ff_mp4_read_dec_config_descr(fc, st, pb);
return ret;
}
libavformat/isom.c
int ff_mp4_read_dec_config_descr(AVFormatContext *fc, AVStream *st, AVIOContext *pb)
{
enum AVCodecID codec_id;
int len, tag;
int ret;
int object_type_id = avio_r8(pb);
avio_r8(pb); /* stream type */
avio_rb24(pb); /* buffer size db */
avio_rb32(pb); /* rc_max_rate */
st->codecpar->bit_rate = avio_rb32(pb); /* avg bitrate */
codec_id= ff_codec_get_id(ff_mp4_obj_type, object_type_id);
if (codec_id)
st->codecpar->codec_id = codec_id;
av_log(fc, AV_LOG_TRACE, "esds object type id 0x%02x\n", object_type_id);
len = ff_mp4_read_descr(fc, pb, &tag);
if (tag == MP4DecSpecificDescrTag) {
av_log(fc, AV_LOG_TRACE, "Specific MPEG-4 header len=%d\n", len);
/* As per 14496-3:2009 9.D.2.2, No decSpecificInfo is defined
for MPEG-1 Audio or MPEG-2 Audio; MPEG-2 AAC excluded. */
if (object_type_id == 0x69 || object_type_id == 0x6b)
return 0;
if (!len || (uint64_t)len > (1<<30))
return AVERROR_INVALIDDATA;
if ((ret = ff_get_extradata(fc, st->codecpar, pb, len)) < 0)
return ret;
if (st->codecpar->codec_id == AV_CODEC_ID_AAC) {
MPEG4AudioConfig cfg = {0};
ret = avpriv_mpeg4audio_get_config2(&cfg, st->codecpar->extradata,
st->codecpar->extradata_size, 1, fc);
if (ret < 0)
return ret;
st->codecpar->ch_layout.order = AV_CHANNEL_ORDER_UNSPEC;
st->codecpar->ch_layout.nb_channels = cfg.channels;
if (cfg.object_type == 29 && cfg.sampling_index < 3) // old mp3on4
st->codecpar->sample_rate = ff_mpa_freq_tab[cfg.sampling_index];
else if (cfg.ext_sample_rate)
st->codecpar->sample_rate = cfg.ext_sample_rate;
else
st->codecpar->sample_rate = cfg.sample_rate;
av_log(fc, AV_LOG_TRACE, "mp4a config channels %d obj %d ext obj %d "
"sample rate %d ext sample rate %d\n", cfg.channels,
cfg.object_type, cfg.ext_object_type,
cfg.sample_rate, cfg.ext_sample_rate);
if (!(st->codecpar->codec_id = ff_codec_get_id(mp4_audio_types,
cfg.object_type)))
st->codecpar->codec_id = AV_CODEC_ID_AAC;
}
}
return 0;
}
通过 ff_get_extradata,已经读到 st->codecpar 的extradata了。