FFmpeg使用av_read_frame()方法读取音频流、视频流、字幕流,得到AVPacket数据包。FFmpeg官方提供的samples有使用示例,或者在ffplay.c代码中:打开文件/网络流后,while循环调用av_read_frame()读取帧数据,也就是解封装demux过程,直到文件末尾EOF。
av_read_frame()的调用链如下图所示:
av_read_frame方法的声明位于libavformat/avformat.h,如下所示:
/**
* Return the next frame of a stream.
* This function returns what is stored in the file, and does not validate
* that what is there are valid frames for the decoder. It will split what is
* stored in the file into frames and return one for each call. It will not
* omit invalid data between valid frames so as to give the decoder the maximum
* information possible for decoding.
*
* On success, the returned packet is reference-counted (pkt->buf is set) and
* valid indefinitely. The packet must be freed with av_packet_unref() when
* it is no longer needed. For video, the packet contains exactly one frame.
* For audio, it contains an integer number of frames if each frame has
* a known fixed size (e.g. PCM or ADPCM data). If the audio frames have
* a variable size (e.g. MPEG audio), then it contains one frame.
*
* pkt->pts, pkt->dts and pkt->duration are always set to correct
* values in AVStream.time_base units (and guessed if the format cannot
* provide them). pkt->pts can be AV_NOPTS_VALUE if the video format
* has B-frames, so it is better to rely on pkt->dts if you do not
* decompress the payload.
*
* @return 0 if OK, < 0 on error or end of file. On error, pkt will be blank
* (as if it came from av_packet_alloc()).
*
* @note pkt will be initialized, so it may be uninitialized, but it must not
* contain data that needs to be freed.
*/
int av_read_frame(AVFormatContext *s, AVPacket *pkt);
中文大意为:该方法返回文件存储的帧数据,但不会为解码器校验帧是否有效。把文件拆分为若干个帧,每次调用返回一帧数据包。
返回的数据包被引用计数,并且永久有效。如果不再需要该packet包,必须使用av_packet_unref()进行释放。对于视频,每个packet只包含一帧。对于音频,如果是固定大小(比如PCM或ADPCM),返回若干帧;如果是可变大小,它只包含一帧。
pkt->dts、pkt->pts、pkt->duration使用timebase作为单位,总是被设为准确值。如果视频中存在B帧,pkt->pts可能为AV_NOPTS_VALUE,所以最好使用pkt->dts作为依赖。
返回0代表成功,小于0代表错误或文件结束。如果是错误,pkt将会为空。
av_read_frame()方法主要调用avpriv_packet_list_get()和av_read_frame_internal(),分为无genpts与有genpts两种情况进行读取:
int av_read_frame(AVFormatContext *s, AVPacket *pkt)
{
const int genpts = s->flags & AVFMT_FLAG_GENPTS;
int eof = 0;
int ret;
AVStream *st;
// 没有设置genpts,直接读取一帧数据
if (!genpts) {
ret = s->internal->packet_buffer
? avpriv_packet_list_get(&s->internal->packet_buffer,
&s->internal->packet_buffer_end, pkt)
: read_frame_internal(s, pkt);
if (ret < 0)
return ret;
goto return_packet;
}
for (;;) {
PacketList *pktl = s->internal->packet_buffer;
if (pktl) {
AVPacket *next_pkt = &pktl->pkt;
if (next_pkt->dts != AV_NOPTS_VALUE) {
int wrap_bits = s->streams[next_pkt->stream_index]->pts_wrap_bits;
int64_t last_dts = next_pkt->dts;
while (pktl && next_pkt->pts == AV_NOPTS_VALUE) {
if (pktl->pkt.stream_index == next_pkt->stream_index &&
av_compare_mod(next_pkt->dts, pktl->pkt.dts, 2ULL << (wrap_bits - 1)) < 0) {
if (av_compare_mod(pktl->pkt.pts, pktl->pkt.dts, 2ULL << (wrap_bits - 1))) {
// 没有B帧
next_pkt->pts = pktl->pkt.dts;
}
if (last_dts != AV_NOPTS_VALUE) {
last_dts = pktl->pkt.dts;
}
}
pktl = pktl->next;
}
if (eof && next_pkt->pts == AV_NOPTS_VALUE && last_dts != AV_NOPTS_VALUE) {
next_pkt->pts = last_dts + next_pkt->duration;
}
pktl = s->internal->packet_buffer;
}
// 从packet缓冲区读取packet
st = s->streams[next_pkt->stream_index];
if (!(next_pkt->pts == AV_NOPTS_VALUE && st->discard < AVDISCARD_ALL &&
next_pkt->dts != AV_NOPTS_VALUE && !eof)) {
ret = avpriv_packet_list_get(&s->internal->packet_buffer,
&s->internal->packet_buffer_end, pkt);
goto return_packet;
}
}
// 调用内部方法读取一帧
ret = read_frame_internal(s, pkt);
if (ret < 0) {
if (pktl && ret != AVERROR(EAGAIN)) {
eof = 1;
continue;
} else
return ret;
}
ret = avpriv_packet_list_put(&s->internal->packet_buffer,
&s->internal->packet_buffer_end,
pkt, NULL, 0);
if (ret < 0) {
av_packet_unref(pkt);
return ret;
}
}
return_packet:
......
return ret;
}
av_read_frame_internal()为内部读取一帧数据,主要调用ff_read_packet()和parse_packet(),具体代码如下:
static int read_frame_internal(AVFormatContext *s, AVPacket *pkt)
{
int ret, i, got_packet = 0;
AVDictionary *metadata = NULL;
while (!got_packet && !s->internal->parse_queue) {
AVStream *st;
// 读取下一个数据包
ret = ff_read_packet(s, pkt);
if (ret < 0) {
if (ret == AVERROR(EAGAIN))
return ret;
// 刷新解析器
for (i = 0; i < s->nb_streams; i++) {
st = s->streams[i];
if (st->parser && st->need_parsing)
parse_packet(s, pkt, st->index, 1);
}
break;
}
ret = 0;
st = s->streams[pkt->stream_index];
st->event_flags |= AVSTREAM_EVENT_FLAG_NEW_PACKETS;
if (st->internal->need_context_update) {
if (avcodec_is_open(st->internal->avctx)) {
avcodec_close(st->internal->avctx);
st->internal->info->found_decoder = 0;
}
// 关闭解析器,因为它依赖于codec
if (st->parser && st->internal->avctx->codec_id != st->codecpar->codec_id) {
av_parser_close(st->parser);
st->parser = NULL;
}
ret = avcodec_parameters_to_context(st->internal->avctx, st->codecpar);
if (ret < 0) {
av_packet_unref(pkt);
return ret;
}
st->internal->need_context_update = 0;
}
if (st->need_parsing && !st->parser && !(s->flags & AVFMT_FLAG_NOPARSE)) {
// 初始化解析器
st->parser = av_parser_init(st->codecpar->codec_id);
if (!st->parser) {
st->need_parsing = AVSTREAM_PARSE_NONE;
} else if (st->need_parsing == AVSTREAM_PARSE_HEADERS)
st->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;
else if (st->need_parsing == AVSTREAM_PARSE_FULL_ONCE)
st->parser->flags |= PARSER_FLAG_ONCE;
else if (st->need_parsing == AVSTREAM_PARSE_FULL_RAW)
st->parser->flags |= PARSER_FLAG_USE_CODEC_TS;
}
if (!st->need_parsing || !st->parser) {
// 不需要解析,直接输出packet
compute_pkt_fields(s, st, NULL, pkt, AV_NOPTS_VALUE, AV_NOPTS_VALUE);
if ((s->iformat->flags & AVFMT_GENERIC_INDEX) &&
(pkt->flags & AV_PKT_FLAG_KEY) && pkt->dts != AV_NOPTS_VALUE) {
ff_reduce_index(s, st->index);
av_add_index_entry(st, pkt->pos, pkt->dts,
0, 0, AVINDEX_KEYFRAME);
}
got_packet = 1;
} else if (st->discard < AVDISCARD_ALL) {
// 解析packet
if ((ret = parse_packet(s, pkt, pkt->stream_index, 0)) < 0)
return ret;
st->codecpar->sample_rate = st->internal->avctx->sample_rate;
st->codecpar->bit_rate = st->internal->avctx->bit_rate;
st->codecpar->channels = st->internal->avctx->channels;
st->codecpar->channel_layout = st->internal->avctx->channel_layout;
st->codecpar->codec_id = st->internal->avctx->codec_id;
} else {
av_packet_unref(pkt);
}
if (pkt->flags & AV_PKT_FLAG_KEY)
st->internal->skip_to_keyframe = 0;
if (st->internal->skip_to_keyframe) {
av_packet_unref(pkt);
got_packet = 0;
}
}
......
return ret;
}
ff_read_packet()主要是调用AVInputFormat指向的read_packet()方法来读取数据包:
int ff_read_packet(AVFormatContext *s, AVPacket *pkt)
{
......
for (;;) {
PacketList *pktl = s->internal->raw_packet_buffer;
const AVPacket *pkt1;
if (pktl) {
st = s->streams[pktl->pkt.stream_index];
if (s->internal->raw_packet_buffer_remaining_size <= 0)
if ((err = probe_codec(s, st, NULL)) < 0)
return err;
if (st->internal->request_probe <= 0) {
avpriv_packet_list_get(&s->internal->raw_packet_buffer,
&s->internal->raw_packet_buffer_end, pkt);
s->internal->raw_packet_buffer_remaining_size += pkt->size;
return 0;
}
}
// 读取数据包
ret = s->iformat->read_packet(s, pkt);
if (ret < 0) {
av_packet_unref(pkt);
if (ret == FFERROR_REDO)
continue;
if (!pktl || ret == AVERROR(EAGAIN))
return ret;
for (i = 0; i < s->nb_streams; i++) {
st = s->streams[i];
if (st->probe_packets || st->internal->request_probe > 0)
if ((err = probe_codec(s, st, NULL)) < 0)
return err;
}
continue;
}
err = av_packet_make_refcounted(pkt);
if (err < 0) {
av_packet_unref(pkt);
return err;
}
if (pkt->flags & AV_PKT_FLAG_CORRUPT) {
if (s->flags & AVFMT_FLAG_DISCARD_CORRUPT) {
av_log(s, AV_LOG_WARNING, ", dropping it.\n");
av_packet_unref(pkt);
continue;
}
}
st = s->streams[pkt->stream_index];
if (update_wrap_reference(s, st, pkt->stream_index, pkt)
&& st->internal->pts_wrap_behavior == AV_PTS_WRAP_SUB_OFFSET) {
// 校正第一个时间戳
if (!is_relative(st->first_dts))
st->first_dts = wrap_timestamp(st, st->first_dts);
if (!is_relative(st->start_time))
st->start_time = wrap_timestamp(st, st->start_time);
if (!is_relative(st->cur_dts))
st->cur_dts = wrap_timestamp(st, st->cur_dts);
}
pkt->dts = wrap_timestamp(st, pkt->dts);
pkt->pts = wrap_timestamp(st, pkt->pts);
force_codec_ids(s, st);
/* TODO: audio: time filter; video: frame reordering (pts != dts) */
if (s->use_wallclock_as_timestamps)
pkt->dts = pkt->pts = av_rescale_q(av_gettime(), AV_TIME_BASE_Q, st->time_base);
if (!pktl && st->internal->request_probe <= 0)
return ret;
err = avpriv_packet_list_put(&s->internal->raw_packet_buffer,
&s->internal->raw_packet_buffer_end,
pkt, NULL, 0);
if (err < 0) {
av_packet_unref(pkt);
return err;
}
pkt1 = &s->internal->raw_packet_buffer_end->pkt;
s->internal->raw_packet_buffer_remaining_size -= pkt1->size;
if ((err = probe_codec(s, st, pkt1)) < 0)
return err;
}
}
其中,read_packet()为函数指针。以mp4的解封装过程为例,位于libavformat/mov.c,对应的AVInputFormat如下:
AVInputFormat ff_mov_demuxer = {
.name = "mov,mp4,m4a,3gp,3g2,mj2",
.long_name = NULL_IF_CONFIG_SMALL("QuickTime / MOV"),
.priv_class = &mov_class,
.priv_data_size = sizeof(MOVContext),
.extensions = "mov,mp4,m4a,3gp,3g2,mj2,psp,m4b,ism,ismv,isma,f4v",
.read_probe = mov_probe,
.read_header = mov_read_header,
.read_packet = mov_read_packet,
.read_close = mov_read_close,
.read_seek = mov_read_seek,
.flags = AVFMT_NO_BYTE_SEEK | AVFMT_SEEK_TO_PTS,
};
那么iformat->read_packet()所指向的函数为mov_read_packet(),具体代码如下:
static int mov_read_packet(AVFormatContext *s, AVPacket *pkt)
{
MOVContext *mov = s->priv_data;
MOVStreamContext *sc;
AVIndexEntry *sample;
AVStream *st = NULL;
int64_t current_index;
int ret;
mov->fc = s;
retry:
// 查找下一个sample
sample = mov_find_next_sample(s, &st);
......
if (st->discard != AVDISCARD_ALL) {
// 读取数据包
if (st->codecpar->codec_id == AV_CODEC_ID_EIA_608 && sample->size > 8)
ret = get_eia608_packet(sc->pb, pkt, sample->size);
else
ret = av_get_packet(sc->pb, pkt, sample->size);
if (ret < 0) {
if (should_retry(sc->pb, ret)) {
mov_current_sample_dec(sc);
}
return ret;
}
}
......
if (sc->ctts_data && sc->ctts_index < sc->ctts_count) {
pkt->pts = pkt->dts + sc->dts_shift + sc->ctts_data[sc->ctts_index].duration;
// 更新ctts上下文
sc->ctts_sample++;
if (sc->ctts_index < sc->ctts_count &&
sc->ctts_data[sc->ctts_index].count == sc->ctts_sample) {
sc->ctts_index++;
sc->ctts_sample = 0;
}
} else {
int64_t next_dts = (sc->current_sample < st->nb_index_entries) ?
st->index_entries[sc->current_sample].timestamp : st->duration;
if (next_dts >= pkt->dts)
pkt->duration = next_dts - pkt->dts;
pkt->pts = pkt->dts;
}
......
return 0;
}
parse_packet()主要调用av_parser_parse2()来实现解析,代码如下:
static int parse_packet(AVFormatContext *s, AVPacket *pkt,
int stream_index, int flush)
{
......
while (size > 0 || (flush && got_output)) {
int len;
int64_t next_pts = pkt->pts;
int64_t next_dts = pkt->dts;
// 真正去解析数据包
len = av_parser_parse2(st->parser, st->internal->avctx,
&out_pkt->data, &out_pkt->size, data, size,
pkt->pts, pkt->dts, pkt->pos);
......
// 设置时长
out_pkt->duration = (st->parser->flags & PARSER_FLAG_COMPLETE_FRAMES) ? pkt->duration : 0;
if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
if (st->internal->avctx->sample_rate > 0) {
out_pkt->duration =
av_rescale_q_rnd(st->parser->duration,
(AVRational) { 1, st->internal->avctx->sample_rate },
st->time_base,
AV_ROUND_DOWN);
}
}
......
// flag设为关键帧
if (st->parser->key_frame == 1 ||
(st->parser->key_frame == -1 &&
st->parser->pict_type == AV_PICTURE_TYPE_I))
out_pkt->flags |= AV_PKT_FLAG_KEY;
if (st->parser->key_frame == -1 && st->parser->pict_type ==AV_PICTURE_TYPE_NONE && (pkt->flags&AV_PKT_FLAG_KEY))
out_pkt->flags |= AV_PKT_FLAG_KEY;
......
}
if (flush) {
av_parser_close(st->parser);
st->parser = NULL;
}
fail:
av_packet_unref(pkt);
return ret;
}
最终是调用libavcodec/parser.c的av_parser_parse2()方法去解析数据包。至于数据包解析,在下一篇文章与大家探讨分析。