这个问题是这样产生的,一同事反应会随机出现ijk获取到的aac文件的duration不准,发来一看,确实不准,在AE或者系统mediaplayer中得到的都是8.4秒(准确时间是MtkAACExtractor: ADTS: duration = 8448000 us),ijk得到的是9.3秒,在播放的时候,在8秒的时候流就结束了,放到编译的ffmpeg中,一看也是9.3秒。
下面开始分析这个问题,命令行看下这个文件,ffmpeg中获取到的确实是9.3秒
仔细看下红色箭头所指,这个意思是获取到的duration是根据比特率计算的,可能不准确。这种获取音视频info有问题的我们一般可以从avformat_find_stream_info函数开始分析。
这里直接从log开始看,waring出现出现在utils.c/libavformat下
static void estimate_timings_from_bit_rate(AVFormatContext *ic)
{
int64_t filesize, duration;
int i, show_warning = 0;
AVStream *st;
av_log(ic, AV_LOG_WARNING,
"hxk-->ic->bit_rate:%lld\n",ic->bit_rate);
//这里从log可以看到,bitrate也没获取到,bitrate = 0
/* if bit_rate is already set, we believe it */
if (ic->bit_rate <= 0) {
int64_t bit_rate = 0;
for (i = 0; i < ic->nb_streams; i++) {
st = ic->streams[i];
if (st->codecpar->bit_rate <= 0 && st->internal->avctx->bit_rate > 0)
st->codecpar->bit_rate = st->internal->avctx->bit_rate;
if (st->codecpar->bit_rate > 0) {
if (INT64_MAX - st->codecpar->bit_rate < bit_rate) {
bit_rate = 0;
break;
}
bit_rate += st->codecpar->bit_rate;
} else if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO && st->codec_info_nb_frames > 1) {
// If we have a videostream with packets but without a bitrate
// then consider the sum not known
bit_rate = 0;
break;
}
}
//这里算出来一个bitrate
ic->bit_rate = bit_rate;
av_log(ic, AV_LOG_WARNING,
"hxk-->ic->bit_rate:%lld\n",ic->bit_rate);
}
//从log中可以看到,这里的duration也是0
/* if duration is already set, we believe it */
av_log(ic, AV_LOG_WARNING,
"hxk-->ic->duration:%lld\n",ic->duration);
if (ic->duration == AV_NOPTS_VALUE &&
ic->bit_rate != 0) {
filesize = ic->pb ? avio_size(ic->pb) : 0;
av_log(ic, AV_LOG_WARNING,
"hxk-->ic->filesize:%lld\n",filesize);
if (filesize > ic->internal->data_offset) {
filesize -= ic->internal->data_offset;
for (i = 0; i < ic->nb_streams; i++) {
st = ic->streams[i];
if ( st->time_base.num <= INT64_MAX / ic->bit_rate
&& st->duration == AV_NOPTS_VALUE) {
//这里根据文件字节*8 /比特率来计算duration,这里cbr这样计算可以计算,但是如果vbr(码率动态)的话就有问题了
duration = av_rescale(8 * filesize, st->time_base.den,
ic->bit_rate *
(int64_t) st->time_base.num);
//获取到的duration就不准确了
st->duration = duration;
show_warning = 1;
}
}
}
}
if (show_warning)
av_log(ic, AV_LOG_WARNING,
"Estimating duration from bitrate, this may be inaccurate\n");
}
调用上面这个函数的地方是utils.c/libavofrmat:
static void estimate_timings(AVFormatContext *ic, int64_t old_offset)
{
int64_t file_size;
/* get the file size, if possible */
if (ic->iformat->flags & AVFMT_NOFILE) {
file_size = 0;
} else {
file_size = avio_size(ic->pb);
file_size = FFMAX(0, file_size);
}
av_log(ic, AV_LOG_WARNING, "hxk->ic->iformat->name:%s\n", ic->iformat->name);
av_log(ic, AV_LOG_WARNING, "hxk->file_size:%lld\n", file_size);
av_log(ic, AV_LOG_WARNING, "hxk->ic->pb->seekable:%d\n", ic->pb->seekable);
if ((!strcmp(ic->iformat->name, "mpeg") ||
!strcmp(ic->iformat->name, "mpegts")) &&
file_size && (ic->pb->seekable & AVIO_SEEKABLE_NORMAL)) {
/* get accurate estimate from the PTSes */
estimate_timings_from_pts(ic, old_offset);
ic->duration_estimation_method = AVFMT_DURATION_FROM_PTS;
} else if (has_duration(ic)) {
//如果在demuxer中获取到duration了
/* at least one component has timings - we use them for all
* the components */
fill_all_stream_timings(ic);
ic->duration_estimation_method = AVFMT_DURATION_FROM_STREAM;
} else {
//这个文件没有获取到duration,所以走的是这里
/* less precise: use bitrate info */
estimate_timings_from_bit_rate(ic);
ic->duration_estimation_method = AVFMT_DURATION_FROM_BITRATE;
}
update_stream_timings(ic);
{
int i;
AVStream av_unused *st;
for (i = 0; i < ic->nb_streams; i++) {
st = ic->streams[i];
av_log(ic, AV_LOG_TRACE, "stream %d: start_time: %0.3f duration: %0.3f\n", i,
(double) st->start_time * av_q2d(st->time_base),
(double) st->duration * av_q2d(st->time_base));
}
av_log(ic, AV_LOG_TRACE,
"format: start_time: %0.3f duration: %0.3f bitrate=%"PRId64" kb/s\n",
(double) ic->start_time / AV_TIME_BASE,
(double) ic->duration / AV_TIME_BASE,
(int64_t)ic->bit_rate / 1000);
}
}
调用上面这个方法是在avformat_find_stream_info/utils.c/libavformat函数中。
原因已经知道了,那么可以如何解决这个问题呢?
aac的duration可以如何获取呢?
我们看下android系统中libstagefright框架中aacextractore的实现
AACExtractor.cpp/libstagefrgiht
AACExtractor::AACExtractor(
const sp &source, const sp &_meta)
: mDataSource(source),
mInitCheck(NO_INIT),
mFrameDurationUs(0) {
sp meta = _meta;
if (meta == NULL) {
String8 mimeType;
float confidence;
sp _meta;
if (!SniffAAC(mDataSource, &mimeType, &confidence, &meta)) {
return;
}
}
int64_t offset;
CHECK(meta->findInt64("offset", &offset));
uint8_t profile, sf_index, channel, header[2];
if (mDataSource->readAt(offset + 2, &header, 2) < 2) {
return;
}
//获取profile
profile = (header[0] >> 6) & 0x3;
//获取采样索引
sf_index = (header[0] >> 2) & 0xf;
//获取采样率
uint32_t sr = get_sample_rate(sf_index);
if (sr == 0) {
return;
}
//通道
channel = (header[0] & 0x1) << 2 | (header[1] >> 6);
mMeta = MakeAACCodecSpecificData(profile, sf_index, channel);
off64_t streamSize, numFrames = 0;
size_t frameSize = 0;
int64_t duration = 0;
//获取文件大小
if (mDataSource->getSize(&streamSize) == OK) {
while (offset < streamSize) {
//获取adts每一帧大小
if ((frameSize = getAdtsFrameLength(source, offset, NULL)) == 0) {
return;
}
mOffsetVector.push(offset);
offset += frameSize;//偏移加加
numFrames ++;//计算帧数目
}
//***************重点看下这里,这里在下面分析aac文件格式的时候会讲解细致一点*************
// Round up and get the duration
mFrameDurationUs = (1024 * 1000000ll + (sr - 1)) / sr;
duration = numFrames * mFrameDurationUs;//总帧数x一个AAC音频帧的播放时间
mMeta->setInt64(kKeyDuration, duration);
}
mInitCheck = OK;
}
我们再看下getAdtsFrameLength/AACExtractor.cpp/libstagefrgiht函数,这个函数其实就是根据adts头来计算出每一个framesize的大小的
static size_t getAdtsFrameLength(const sp &source, off64_t offset, size_t* headerSize) {
//CRC
const size_t kAdtsHeaderLengthNoCrc = 7;
const size_t kAdtsHeaderLengthWithCrc = 9;
size_t frameSize = 0;
//同步字
uint8_t syncword[2];
if (source->readAt(offset, &syncword, 2) != 2) {
return 0;
}
if ((syncword[0] != 0xff) || ((syncword[1] & 0xf6) != 0xf0)) {
return 0;
}
//0没有crc,1有crc
uint8_t protectionAbsent;
if (source->readAt(offset + 1, &protectionAbsent, 1) < 1) {
return 0;
}
protectionAbsent &= 0x1;
uint8_t header[3];
if (source->readAt(offset + 3, &header, 3) < 3) {
return 0;
}
//获取framesize的大小
frameSize = (header[0] & 0x3) << 11 | header[1] << 3 | header[2] >> 5;
// protectionAbsent is 0 if there is CRC
size_t headSize = protectionAbsent ? kAdtsHeaderLengthNoCrc : kAdtsHeaderLengthWithCrc;
if (headSize > frameSize) {
return 0;
}
if (headerSize != NULL) {
*headerSize = headSize;
}
return frameSize;
}
上面的实现原理就是根据一个AAC原始帧包含一段时间内1024个采样及相关数据。一个AAC音频帧的播放时间=一个AAC帧对应的采样样本的个数/采样率。所以aac音频文件总时间t=总帧数x一个AAC音频帧的播放时间
下面看一下aac的demuxer,在aacdec.c/libavformat下,发现里面连对aidf头的处理都没有,这个先不管了。
下面我们先简单看下aac的格式:
详细的AAC格式参考下这篇文章吧,实在懒得写。
AAC文件格式与音频文件时长计算
下面我们看下ffmpeg中这个格式的demuxer,这个文件封装格式raw ADTS AAC,下面我们看下aacdec.c/libavformat
修改aacdec.c文件,新加函数
//add by hxk
//获取adts frame的帧长
static int getAdtsFrameLength(AVFormatContext *s,int64_t offset,int* headerSize)
{
int64_t filesize, position = avio_tell(s->pb);
filesize = avio_size(s->pb);
//av_log(NULL, AV_LOG_WARNING, "hxk->getAdtsFrameLength.filesize:%d\n",filesize);
const int kAdtsHeaderLengthNoCrc = 7;
const int kAdtsHeaderLengthWithCrc = 9;
int frameSize = 0;
uint8_t syncword[2];
avio_seek(s->pb, offset, SEEK_SET);
//读取同步字
if(avio_read(s->pb,&syncword, 2)!= 2){
return 0;
}
if ((syncword[0] != 0xff) || ((syncword[1] & 0xf6) != 0xf0)) {
return 0;
}
uint8_t protectionAbsent;
avio_seek(s->pb, offset+1, SEEK_SET);
//读取protectionAbsent
if (avio_read(s->pb, &protectionAbsent, 1) < 1) {
return 0;
}
protectionAbsent &= 0x1;
uint8_t header[3];
//读取header
avio_seek(s->pb, offset+3, SEEK_SET);
if (avio_read(s->pb, &header, 3) < 3) {
return 0;
}
//获取framesize
frameSize = (header[0] & 0x3) << 11 | header[1] << 3 | header[2] >> 5;
// protectionAbsent is 0 if there is CRC
int headSize = protectionAbsent ? kAdtsHeaderLengthNoCrc : kAdtsHeaderLengthWithCrc;
if (headSize > frameSize) {
return 0;
}
if (headerSize != NULL) {
*headerSize = headSize;
}
return frameSize;
}
//根据采样率下标获取采样率
static uint32_t get_sample_rate(const uint8_t sf_index)
{
static const uint32_t sample_rates[] =
{
96000, 88200, 64000, 48000, 44100, 32000,
24000, 22050, 16000, 12000, 11025, 8000
};
if (sf_index < sizeof(sample_rates) / sizeof(sample_rates[0])) {
return sample_rates[sf_index];
}
return 0;
}
//add end
修改adts_aac_read_header函数
static int adts_aac_read_header(AVFormatContext *s)
{
av_log(NULL, AV_LOG_WARNING, "hxk->adts_aac_read_header!\n");
AVStream *st;
uint16_t state;
st = avformat_new_stream(s, NULL);
if (!st)
return AVERROR(ENOMEM);
st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
st->codecpar->codec_id = s->iformat->raw_codec_id;
st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
ff_id3v1_read(s);
if ((s->pb->seekable & AVIO_SEEKABLE_NORMAL) &&
!av_dict_get(s->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX)) {
int64_t cur = avio_tell(s->pb);
ff_ape_parse_tag(s);
avio_seek(s->pb, cur, SEEK_SET);
}
// skip data until the first ADTS frame is found
state = avio_r8(s->pb);
while (!avio_feof(s->pb) && avio_tell(s->pb) < s->probesize) {
state = (state << 8) | avio_r8(s->pb);
if ((state >> 4) != 0xFFF)
continue;
avio_seek(s->pb, -2, SEEK_CUR);
break;
}
if ((state >> 4) != 0xFFF)
return AVERROR_INVALIDDATA;
// LCM of all possible ADTS sample rates
// avpriv_set_pts_info(st, 64, 1, 28224000);
//add by hxk
#if 1
//句柄指回起点
avio_seek(s->pb, 0, SEEK_SET);
uint8_t profile, sf_index, channel, header[2];
//文件指针移动到文件起点前2个字节
avio_seek(s->pb, 2, SEEK_SET);
if (avio_read(s->pb,&header, 2) < 2) {
av_log(NULL, AV_LOG_ERROR, "avio_read header error!\n");
return 0;
}
int64_t offset = 0;
//获取profile
profile = (header[0] >> 6) & 0x3;
st->codecpar->profile = profile;
//av_log(NULL, AV_LOG_WARNING, "hxk->profile:%d!\n",profile);
sf_index = (header[0] >> 2) & 0xf;
//获取采样率
uint32_t sr = get_sample_rate(sf_index);
//av_log(NULL, AV_LOG_WARNING, "hxk->samplerate:%d!\n",sr);
if (sr == 0) {
av_log(NULL, AV_LOG_ERROR, "avio_read read sampletare error!\n");
return 0;
}
//赋值给codec参数
st->codecpar->sample_rate = sr;
//获取通道
channel = (header[0] & 0x1) << 2 | (header[1] >> 6);
if (channel == 0) {
av_log(NULL, AV_LOG_ERROR, "adts_aac_read_header read channel error!\n");
return 0;
}
//赋值给codec 参数
st->codecpar->channels = channel;
//av_log(NULL, AV_LOG_WARNING, "hxk->channel:%d!\n",channel);
sf_index = (header[0] >> 2) & 0xf;
int frameSize = 0;
int64_t mFrameDurationUs = 0;
int64_t duration = 0;
//采样率赋值给codec
st->codecpar->sample_rate = sr;
int64_t streamSize, numFrames = 0;
avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
//获取文件大小
streamSize = avio_size(s->pb);
// av_log(NULL, AV_LOG_WARNING, "hxk->streamSize:%d!\n",streamSize);
if (streamSize > 0) {
while (offset < streamSize) {
if ((frameSize = getAdtsFrameLength(s, offset, NULL)) == 0) {
return 0;
}
offset += frameSize;//偏移加加
numFrames ++;//帧数加加,获取总帧数
// av_log(NULL, AV_LOG_WARNING, "hxk->frameSize:%d!\n",frameSize);
}
// av_log(NULL, AV_LOG_WARNING, "hxk->numFrames:%lld!\n",numFrames);
// Round up and get the duration,计算每一帧时间
mFrameDurationUs = (1024 * 1000000ll + (sr - 1)) / sr;
duration = numFrames * mFrameDurationUs;//us
//时间基转换avstream的,us单位(AV_TIME_BASE_Q)转avstream的时间基
duration = av_rescale_q(duration,AV_TIME_BASE_Q, st->time_base);
st->duration = duration;
// av_log(NULL, AV_LOG_WARNING, "hxk->duration:%d!\n",duration);
}
#endif
//add end
return 0;
}
这样在demuxer中就获得了duration,在上面的estimate_timings函数中就直接走has_duration这个判断了,得到的duration也是比较准确的了。
运行一下修改后的代码,从下图可以看到时间已经改变了,和android中MtkAACExtractor获取的duration是一样的了。
正满心欢喜解决了问题后,把改动的代码移植到ijk上的时候,发现不能播放,没报任何错误,文件info读取都是正确的,seek一下的时候报了这么一行错误
IJKMEDIA: /storage/emulated/0/3ee807175fc2488d8264ac014ccf55ff.aac: error while seeking
原来忘记把句柄置回去了
修改如下:
static int adts_aac_read_header(AVFormatContext *s)
{
AVStream *st;
uint16_t state;
st = avformat_new_stream(s, NULL);
if (!st)
return AVERROR(ENOMEM);
st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
st->codecpar->codec_id = s->iformat->raw_codec_id;
st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
ff_id3v1_read(s);
if ((s->pb->seekable & AVIO_SEEKABLE_NORMAL) &&
!av_dict_get(s->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX)) {
int64_t cur = avio_tell(s->pb);
ff_ape_parse_tag(s);
avio_seek(s->pb, cur, SEEK_SET);
}
// skip data until the first ADTS frame is found
state = avio_r8(s->pb);
while (!avio_feof(s->pb) && avio_tell(s->pb) < s->probesize) {
state = (state << 8) | avio_r8(s->pb);
if ((state >> 4) != 0xFFF)
continue;
avio_seek(s->pb, -2, SEEK_CUR);
break;
}
if ((state >> 4) != 0xFFF)
return AVERROR_INVALIDDATA;
// LCM of all possible ADTS sample rates
// avpriv_set_pts_info(st, 64, 1, 28224000);
//add by hxk
#if 1
avio_seek(s->pb, 0, SEEK_SET);
uint8_t profile, sf_index, channel, header[2];
avio_seek(s->pb, 2, SEEK_SET);
if (avio_read(s->pb,&header, 2) < 2) {
av_log(NULL, AV_LOG_ERROR, "avio_read header error!\n");
return 0;
}
int64_t offset = 0;
profile = (header[0] >> 6) & 0x3;
st->codecpar->profile = profile;
sf_index = (header[0] >> 2) & 0xf;
uint32_t sr = get_sample_rate(sf_index);
if (sr == 0) {
av_log(NULL, AV_LOG_ERROR, "adts_aac_read_header read sampletare error!\n");
return 0;
}
st->codecpar->sample_rate = sr;
channel = (header[0] & 0x1) << 2 | (header[1] >> 6);
if(channel == 0) {
av_log(NULL, AV_LOG_ERROR, "adts_aac_read_header read channel error!\n");
return 0;
}
st->codecpar->channels = channel;
sf_index = (header[0] >> 2) & 0xf;
int frameSize = 0;
int64_t mFrameDurationUs = 0;
int64_t duration = 0;
st->codecpar->sample_rate = sr;
int64_t streamSize, numFrames = 0;
avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
streamSize = avio_size(s->pb);
//av_log(NULL, AV_LOG_WARNING, "hxk->streamSize:%d!\n",streamSize);
if (streamSize > 0) {
while (offset < streamSize) {
if ((frameSize = getAdtsFrameLength(s, offset, NULL)) == 0) {
return 0;
}
offset += frameSize;
numFrames ++;
//av_log(NULL, AV_LOG_WARNING, "hxk->frameSize:%d!\n",frameSize);
}
// av_log(NULL, AV_LOG_WARNING, "hxk->numFrames:%lld!\n",numFrames);
// Round up and get the duration
mFrameDurationUs = (1024 * 1000000ll + (sr - 1)) / sr;
duration = numFrames * mFrameDurationUs;//us
duration = av_rescale_q(duration,AV_TIME_BASE_Q, st->time_base);
st->duration = duration;
//av_log(NULL, AV_LOG_WARNING, "hxk->duration:%d!\n",duration);
}
//置回句柄
avio_seek(s->pb, 0, SEEK_SET);
#endif
//add end
return 0;
}
嗯,可以获取正确的时间来正常播放了。