ffmpeg中的av_read_frame()的作用是读取码流中的音频若干帧或者视频一帧。例如,解码视频的时候,每解码一个视频帧,需要先调用 av_read_frame()获得一帧视频的压缩数据,然后才能对该数据进行解码(例如H.264中一帧压缩数据通常对应一个NAL)。
av_read_frame()的声明位于libavformat\avformat.h,如下所示。
/**
* Return the next frame of a stream.
* This function returns what is stored in the file, and does not validate
* that what is there are valid frames for the decoder. It will split what is
* stored in the file into frames and return one for each call. It will not
* omit invalid data between valid frames so as to give the decoder the maximum
* information possible for decoding.
*
* If pkt->buf is NULL, then the packet is valid until the next
* av_read_frame() or until avformat_close_input(). Otherwise the packet
* is valid indefinitely. In both cases the packet must be freed with
* av_packet_unref when it is no longer needed. For video, the packet contains
* exactly one frame. For audio, it contains an integer number of frames if each
* frame has a known fixed size (e.g. PCM or ADPCM data). If the audio frames
* have a variable size (e.g. MPEG audio), then it contains one frame.
*
* pkt->pts, pkt->dts and pkt->duration are always set to correct
* values in AVStream.time_base units (and guessed if the format cannot
* provide them). pkt->pts can be AV_NOPTS_VALUE if the video format
* has B-frames, so it is better to rely on pkt->dts if you do not
* decompress the payload.
*
* @return 0 if OK, < 0 on error or end of file
*/
int av_read_frame(AVFormatContext *s, AVPacket *pkt);
av_read_frame()的定义位于libavformat\utils.c,如下所示:
int av_read_frame(AVFormatContext *s, AVPacket *pkt)
{
const int genpts = s->flags & AVFMT_FLAG_GENPTS;
int eof = 0;
int ret;
AVStream *st;
if (!genpts) {
ret = s->internal->packet_buffer
? ff_packet_list_get(&s->internal->packet_buffer,
&s->internal->packet_buffer_end, pkt)
: read_frame_internal(s, pkt);
if (ret < 0)
return ret;
goto return_packet;
}
for (;;) {
AVPacketList *pktl = s->internal->packet_buffer;
if (pktl) {
AVPacket *next_pkt = &pktl->pkt;
if (next_pkt->dts != AV_NOPTS_VALUE) {
int wrap_bits = s->streams[next_pkt->stream_index]->pts_wrap_bits;
// last dts seen for this stream. if any of packets following
// current one had no dts, we will set this to AV_NOPTS_VALUE.
int64_t last_dts = next_pkt->dts;
av_assert2(wrap_bits <= 64);
while (pktl && next_pkt->pts == AV_NOPTS_VALUE) {
if (pktl->pkt.stream_index == next_pkt->stream_index &&
av_compare_mod(next_pkt->dts, pktl->pkt.dts, 2ULL << (wrap_bits - 1)) < 0) {
if (av_compare_mod(pktl->pkt.pts, pktl->pkt.dts, 2ULL << (wrap_bits - 1))) {
// not B-frame
next_pkt->pts = pktl->pkt.dts;
}
if (last_dts != AV_NOPTS_VALUE) {
// Once last dts was set to AV_NOPTS_VALUE, we don't change it.
last_dts = pktl->pkt.dts;
}
}
pktl = pktl->next;
}
if (eof && next_pkt->pts == AV_NOPTS_VALUE && last_dts != AV_NOPTS_VALUE) {
// Fixing the last reference frame had none pts issue (For MXF etc).
// We only do this when
// 1. eof.
// 2. we are not able to resolve a pts value for current packet.
// 3. the packets for this stream at the end of the files had valid dts.
next_pkt->pts = last_dts + next_pkt->duration;
}
pktl = s->internal->packet_buffer;
}
/* read packet from packet buffer, if there is data */
st = s->streams[next_pkt->stream_index];
if (!(next_pkt->pts == AV_NOPTS_VALUE && st->discard < AVDISCARD_ALL &&
next_pkt->dts != AV_NOPTS_VALUE && !eof)) {
ret = ff_packet_list_get(&s->internal->packet_buffer,
&s->internal->packet_buffer_end, pkt);
goto return_packet;
}
}
ret = read_frame_internal(s, pkt);
if (ret < 0) {
if (pktl && ret != AVERROR(EAGAIN)) {
eof = 1;
continue;
} else
return ret;
}
ret = ff_packet_list_put(&s->internal->packet_buffer,
&s->internal->packet_buffer_end,
pkt, FF_PACKETLIST_FLAG_REF_PACKET);
av_packet_unref(pkt);
if (ret < 0)
return ret;
}
return_packet:
st = s->streams[pkt->stream_index];
if ((s->iformat->flags & AVFMT_GENERIC_INDEX) && pkt->flags & AV_PKT_FLAG_KEY) {
ff_reduce_index(s, st->index);
av_add_index_entry(st, pkt->pos, pkt->dts, 0, 0, AVINDEX_KEYFRAME);
}
if (is_relative(pkt->dts))
pkt->dts -= RELATIVE_TS_BASE;
if (is_relative(pkt->pts))
pkt->pts -= RELATIVE_TS_BASE;
return ret;
}
可以从源代码中看出,av_read_frame()调用了read_frame_internal()。
static int read_frame_internal(AVFormatContext *s, AVPacket *pkt)
{
int ret = 0, i, got_packet = 0;
AVDictionary *metadata = NULL;
av_init_packet(pkt);
while (!got_packet && !s->internal->parse_queue) {
AVStream *st;
AVPacket cur_pkt;
/* read next packet */
ret = ff_read_packet(s, &cur_pkt);
if (ret < 0) {
if (ret == AVERROR(EAGAIN))
return ret;
/* flush the parsers */
for (i = 0; i < s->nb_streams; i++) {
st = s->streams[i];
if (st->parser && st->need_parsing)
parse_packet(s, NULL, st->index);
}
/* all remaining packets are now in parse_queue =>
* really terminate parsing */
break;
}
ret = 0;
st = s->streams[cur_pkt.stream_index];
/* update context if required */
if (st->internal->need_context_update) {
if (avcodec_is_open(st->internal->avctx)) {
av_log(s, AV_LOG_DEBUG, "Demuxer context update while decoder is open, closing and trying to re-open\n");
avcodec_close(st->internal->avctx);
st->info->found_decoder = 0;
}
/* close parser, because it depends on the codec */
if (st->parser && st->internal->avctx->codec_id != st->codecpar->codec_id) {
av_parser_close(st->parser);
st->parser = NULL;
}
ret = avcodec_parameters_to_context(st->internal->avctx, st->codecpar);
if (ret < 0) {
av_packet_unref(&cur_pkt);
return ret;
}
#if FF_API_LAVF_AVCTX
FF_DISABLE_DEPRECATION_WARNINGS
/* update deprecated public codec context */
ret = avcodec_parameters_to_context(st->codec, st->codecpar);
if (ret < 0) {
av_packet_unref(&cur_pkt);
return ret;
}
FF_ENABLE_DEPRECATION_WARNINGS
#endif
st->internal->need_context_update = 0;
}
if (cur_pkt.pts != AV_NOPTS_VALUE &&
cur_pkt.dts != AV_NOPTS_VALUE &&
cur_pkt.pts < cur_pkt.dts) {
av_log(s, AV_LOG_WARNING,
"Invalid timestamps stream=%d, pts=%s, dts=%s, size=%d\n",
cur_pkt.stream_index,
av_ts2str(cur_pkt.pts),
av_ts2str(cur_pkt.dts),
cur_pkt.size);
}
if (s->debug & FF_FDEBUG_TS)
av_log(s, AV_LOG_DEBUG,
"ff_read_packet stream=%d, pts=%s, dts=%s, size=%d, duration=%"PRId64", flags=%d\n",
cur_pkt.stream_index,
av_ts2str(cur_pkt.pts),
av_ts2str(cur_pkt.dts),
cur_pkt.size, cur_pkt.duration, cur_pkt.flags);
if (st->need_parsing && !st->parser && !(s->flags & AVFMT_FLAG_NOPARSE)) {
st->parser = av_parser_init(st->codecpar->codec_id);
if (!st->parser) {
av_log(s, AV_LOG_VERBOSE, "parser not found for codec "
"%s, packets or times may be invalid.\n",
avcodec_get_name(st->codecpar->codec_id));
/* no parser available: just output the raw packets */
st->need_parsing = AVSTREAM_PARSE_NONE;
} else if (st->need_parsing == AVSTREAM_PARSE_HEADERS)
st->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;
else if (st->need_parsing == AVSTREAM_PARSE_FULL_ONCE)
st->parser->flags |= PARSER_FLAG_ONCE;
else if (st->need_parsing == AVSTREAM_PARSE_FULL_RAW)
st->parser->flags |= PARSER_FLAG_USE_CODEC_TS;
}
if (!st->need_parsing || !st->parser) {
/* no parsing needed: we just output the packet as is */
*pkt = cur_pkt;
compute_pkt_fields(s, st, NULL, pkt, AV_NOPTS_VALUE, AV_NOPTS_VALUE);
if ((s->iformat->flags & AVFMT_GENERIC_INDEX) &&
(pkt->flags & AV_PKT_FLAG_KEY) && pkt->dts != AV_NOPTS_VALUE) {
ff_reduce_index(s, st->index);
av_add_index_entry(st, pkt->pos, pkt->dts,
0, 0, AVINDEX_KEYFRAME);
}
got_packet = 1;
} else if (st->discard < AVDISCARD_ALL) {
if ((ret = parse_packet(s, &cur_pkt, cur_pkt.stream_index)) < 0)
return ret;
st->codecpar->sample_rate = st->internal->avctx->sample_rate;
st->codecpar->bit_rate = st->internal->avctx->bit_rate;
st->codecpar->channels = st->internal->avctx->channels;
st->codecpar->channel_layout = st->internal->avctx->channel_layout;
st->codecpar->codec_id = st->internal->avctx->codec_id;
} else {
/* free packet */
av_packet_unref(&cur_pkt);
}
if (pkt->flags & AV_PKT_FLAG_KEY)
st->skip_to_keyframe = 0;
if (st->skip_to_keyframe) {
av_packet_unref(&cur_pkt);
if (got_packet) {
*pkt = cur_pkt;
}
got_packet = 0;
}
}
if (!got_packet && s->internal->parse_queue)
ret = ff_packet_list_get(&s->internal->parse_queue, &s->internal->parse_queue_end, pkt);
if (ret >= 0) {
AVStream *st = s->streams[pkt->stream_index];
int discard_padding = 0;
if (st->first_discard_sample && pkt->pts != AV_NOPTS_VALUE) {
int64_t pts = pkt->pts - (is_relative(pkt->pts) ? RELATIVE_TS_BASE : 0);
int64_t sample = ts_to_samples(st, pts);
int duration = ts_to_samples(st, pkt->duration);
int64_t end_sample = sample + duration;
if (duration > 0 && end_sample >= st->first_discard_sample &&
sample < st->last_discard_sample)
discard_padding = FFMIN(end_sample - st->first_discard_sample, duration);
}
if (st->start_skip_samples && (pkt->pts == 0 || pkt->pts == RELATIVE_TS_BASE))
st->skip_samples = st->start_skip_samples;
if (st->skip_samples || discard_padding) {
uint8_t *p = av_packet_new_side_data(pkt, AV_PKT_DATA_SKIP_SAMPLES, 10);
if (p) {
AV_WL32(p, st->skip_samples);
AV_WL32(p + 4, discard_padding);
av_log(s, AV_LOG_DEBUG, "demuxer injecting skip %d / discard %d\n", st->skip_samples, discard_padding);
}
st->skip_samples = 0;
}
if (st->inject_global_side_data) {
for (i = 0; i < st->nb_side_data; i++) {
AVPacketSideData *src_sd = &st->side_data[i];
uint8_t *dst_data;
if (av_packet_get_side_data(pkt, src_sd->type, NULL))
continue;
dst_data = av_packet_new_side_data(pkt, src_sd->type, src_sd->size);
if (!dst_data) {
av_log(s, AV_LOG_WARNING, "Could not inject global side data\n");
continue;
}
memcpy(dst_data, src_sd->data, src_sd->size);
}
st->inject_global_side_data = 0;
}
}
av_opt_get_dict_val(s, "metadata", AV_OPT_SEARCH_CHILDREN, &metadata);
if (metadata) {
s->event_flags |= AVFMT_EVENT_FLAG_METADATA_UPDATED;
av_dict_copy(&s->metadata, metadata, 0);
av_dict_free(&metadata);
av_opt_set_dict_val(s, "metadata", NULL, AV_OPT_SEARCH_CHILDREN);
}
#if FF_API_LAVF_AVCTX
update_stream_avctx(s);
#endif
if (s->debug & FF_FDEBUG_TS)
av_log(s, AV_LOG_DEBUG,
"read_frame_internal stream=%d, pts=%s, dts=%s, "
"size=%d, duration=%"PRId64", flags=%d\n",
pkt->stream_index,
av_ts2str(pkt->pts),
av_ts2str(pkt->dts),
pkt->size, pkt->duration, pkt->flags);
return ret;
}
read_frame_internal()代码比较长,这里只简单看一下它前面的部分。它前面部分有2步是十分关键的:
(1)调用了ff_read_packet()从相应的AVInputFormat读取数据。
(2)如果媒体频流需要使用AVCodecParser,则调用parse_packet()解析相应的AVPacket。
下面我们分成分别看一下ff_read_packet()和parse_packet()的源代码。
ff_read_packet()的代码比较长,如下所示。
int ff_read_packet(AVFormatContext *s, AVPacket *pkt)
{
int ret, i, err;
AVStream *st;
for (;;) {
AVPacketList *pktl = s->internal->raw_packet_buffer;
if (pktl) {
*pkt = pktl->pkt;
st = s->streams[pkt->stream_index];
if (s->internal->raw_packet_buffer_remaining_size <= 0)
if ((err = probe_codec(s, st, NULL)) < 0)
return err;
if (st->request_probe <= 0) {
s->internal->raw_packet_buffer = pktl->next;
s->internal->raw_packet_buffer_remaining_size += pkt->size;
av_free(pktl);
return 0;
}
}
pkt->data = NULL;
pkt->size = 0;
av_init_packet(pkt);
ret = s->iformat->read_packet(s, pkt);
if (ret < 0) {
/* Some demuxers return FFERROR_REDO when they consume
data and discard it (ignored streams, junk, extradata).
We must re-call the demuxer to get the real packet. */
if (ret == FFERROR_REDO)
continue;
if (!pktl || ret == AVERROR(EAGAIN))
return ret;
for (i = 0; i < s->nb_streams; i++) {
st = s->streams[i];
if (st->probe_packets || st->request_probe > 0)
if ((err = probe_codec(s, st, NULL)) < 0)
return err;
av_assert0(st->request_probe <= 0);
}
continue;
}
err = av_packet_make_refcounted(pkt);
if (err < 0) {
av_packet_unref(pkt);
return err;
}
if ((s->flags & AVFMT_FLAG_DISCARD_CORRUPT) &&
(pkt->flags & AV_PKT_FLAG_CORRUPT)) {
av_log(s, AV_LOG_WARNING,
"Dropped corrupted packet (stream = %d)\n",
pkt->stream_index);
av_packet_unref(pkt);
continue;
}
if (pkt->stream_index >= (unsigned)s->nb_streams) {
av_log(s, AV_LOG_ERROR, "Invalid stream index %d\n", pkt->stream_index);
continue;
}
st = s->streams[pkt->stream_index];
if (update_wrap_reference(s, st, pkt->stream_index, pkt) && st->pts_wrap_behavior == AV_PTS_WRAP_SUB_OFFSET) {
// correct first time stamps to negative values
if (!is_relative(st->first_dts))
st->first_dts = wrap_timestamp(st, st->first_dts);
if (!is_relative(st->start_time))
st->start_time = wrap_timestamp(st, st->start_time);
if (!is_relative(st->cur_dts))
st->cur_dts = wrap_timestamp(st, st->cur_dts);
}
pkt->dts = wrap_timestamp(st, pkt->dts);
pkt->pts = wrap_timestamp(st, pkt->pts);
force_codec_ids(s, st);
/* TODO: audio: time filter; video: frame reordering (pts != dts) */
if (s->use_wallclock_as_timestamps)
pkt->dts = pkt->pts = av_rescale_q(av_gettime(), AV_TIME_BASE_Q, st->time_base);
if (!pktl && st->request_probe <= 0)
return ret;
err = ff_packet_list_put(&s->internal->raw_packet_buffer,
&s->internal->raw_packet_buffer_end,
pkt, 0);
if (err < 0) {
av_packet_unref(pkt);
return err;
}
s->internal->raw_packet_buffer_remaining_size -= pkt->size;
if ((err = probe_codec(s, st, pkt)) < 0)
return err;
}
}
ff_read_packet()中最关键的地方就是调用了AVInputFormat的read_packet()方法。AVInputFormat的read_packet()是一个函数指针,指向当前的AVInputFormat的读取数据的函数。在这里我们以FLV封装格式对应的AVInputFormat为例,看看read_packet()的实现函数是什么样子的。
FLV封装格式对应的AVInputFormat的定义位于libavformat\flvdec.c,如下所示。
AVInputFormat ff_flv_demuxer = {
.name = "flv",
.long_name = NULL_IF_CONFIG_SMALL("FLV (Flash Video)"),
.priv_data_size = sizeof(FLVContext),
.read_probe = flv_probe,
.read_header = flv_read_header,
.read_packet = flv_read_packet,
.read_seek = flv_read_seek,
.read_close = flv_read_close,
.extensions = "flv",
.priv_class = &flv_class,
};
我们先通过文章《xxx》了解一下FLV的基本格式。了解了FLV的基本格式之后,就可以看一下FLV解析Tag的函数flv_read_packet()了。
static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
{
FLVContext *flv = s->priv_data;
int ret, i, type, size, flags;
int stream_type=-1;
int64_t next, pos, meta_pos;
int64_t dts, pts = AV_NOPTS_VALUE;
int av_uninit(channels);
int av_uninit(sample_rate);
AVStream *st = NULL;
/* pkt size is repeated at end. skip it */
for (;; avio_skip(s->pb, 4)) {
pos = avio_tell(s->pb);
//解析Tag Header==========
//Tag类型
type = (avio_r8(s->pb) & 0x1F);
//Datasize数据大小
size = avio_rb24(s->pb);
//Timstamp时间戳
dts = avio_rb24(s->pb);
dts |= avio_r8(s->pb) << 24;
av_dlog(s, "type:%d, size:%d, dts:%"PRId64" pos:%"PRId64"\n", type, size, dts, avio_tell(s->pb));
if (avio_feof(s->pb))
return AVERROR_EOF;
//StreamID
avio_skip(s->pb, 3); /* stream id, always 0 */
flags = 0;
//========================
if (flv->validate_next < flv->validate_count) {
int64_t validate_pos = flv->validate_index[flv->validate_next].pos;
if (pos == validate_pos) {
if (FFABS(dts - flv->validate_index[flv->validate_next].dts) <=
VALIDATE_INDEX_TS_THRESH) {
flv->validate_next++;
} else {
clear_index_entries(s, validate_pos);
flv->validate_count = 0;
}
} else if (pos > validate_pos) {
clear_index_entries(s, validate_pos);
flv->validate_count = 0;
}
}
if (size == 0)
continue;
next = size + avio_tell(s->pb);
if (type == FLV_TAG_TYPE_AUDIO) {
//Type是音频
stream_type = FLV_STREAM_TYPE_AUDIO;
//Tag Data的第一个字节
flags = avio_r8(s->pb);
size--;
} else if (type == FLV_TAG_TYPE_VIDEO) {
//Type是音频
stream_type = FLV_STREAM_TYPE_VIDEO;
//Tag Data的第一个字节
flags = avio_r8(s->pb);
size--;
if ((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_VIDEO_INFO_CMD)
goto skip;
} else if (type == FLV_TAG_TYPE_META) {
stream_type=FLV_STREAM_TYPE_DATA;
if (size > 13 + 1 + 4 && dts == 0) { // Header-type metadata stuff
meta_pos = avio_tell(s->pb);
if (flv_read_metabody(s, next) <= 0) {
goto skip;
}
avio_seek(s->pb, meta_pos, SEEK_SET);
}
} else {
av_log(s, AV_LOG_DEBUG,
"Skipping flv packet: type %d, size %d, flags %d.\n",
type, size, flags);
skip:
avio_seek(s->pb, next, SEEK_SET);
continue;
}
/* skip empty data packets */
if (!size)
continue;
/* now find stream */
for (i = 0; i < s->nb_streams; i++) {
st = s->streams[i];
if (stream_type == FLV_STREAM_TYPE_AUDIO) {
if (st->codec->codec_type == AVMEDIA_TYPE_AUDIO &&
(s->audio_codec_id || flv_same_audio_codec(st->codec, flags)))
break;
} else if (stream_type == FLV_STREAM_TYPE_VIDEO) {
if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
(s->video_codec_id || flv_same_video_codec(st->codec, flags)))
break;
} else if (stream_type == FLV_STREAM_TYPE_DATA) {
if (st->codec->codec_type == AVMEDIA_TYPE_DATA)
break;
}
}
if (i == s->nb_streams) {
static const enum AVMediaType stream_types[] = {AVMEDIA_TYPE_VIDEO, AVMEDIA_TYPE_AUDIO, AVMEDIA_TYPE_DATA};
av_log(s, AV_LOG_WARNING, "Stream discovered after head already parsed\n");
st = create_stream(s, stream_types[stream_type]);
if (!st)
return AVERROR(ENOMEM);
}
av_dlog(s, "%d %X %d \n", stream_type, flags, st->discard);
if ((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_KEY ||
stream_type == FLV_STREAM_TYPE_AUDIO)
av_add_index_entry(st, pos, dts, size, 0, AVINDEX_KEYFRAME);
if ( (st->discard >= AVDISCARD_NONKEY && !((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_KEY || (stream_type == FLV_STREAM_TYPE_AUDIO)))
||(st->discard >= AVDISCARD_BIDIR && ((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_DISP_INTER && (stream_type == FLV_STREAM_TYPE_VIDEO)))
|| st->discard >= AVDISCARD_ALL
) {
avio_seek(s->pb, next, SEEK_SET);
continue;
}
break;
}
// if not streamed and no duration from metadata then seek to end to find
// the duration from the timestamps
if (s->pb->seekable && (!s->duration || s->duration == AV_NOPTS_VALUE) && !flv->searched_for_end) {
int size;
const int64_t pos = avio_tell(s->pb);
// Read the last 4 bytes of the file, this should be the size of the
// previous FLV tag. Use the timestamp of its payload as duration.
int64_t fsize = avio_size(s->pb);
retry_duration:
avio_seek(s->pb, fsize - 4, SEEK_SET);
size = avio_rb32(s->pb);
// Seek to the start of the last FLV tag at position (fsize - 4 - size)
// but skip the byte indicating the type.
avio_seek(s->pb, fsize - 3 - size, SEEK_SET);
if (size == avio_rb24(s->pb) + 11) {
uint32_t ts = avio_rb24(s->pb);
ts |= avio_r8(s->pb) << 24;
if (ts)
s->duration = ts * (int64_t)AV_TIME_BASE / 1000;
else if (fsize >= 8 && fsize - 8 >= size) {
fsize -= size+4;
goto retry_duration;
}
}
avio_seek(s->pb, pos, SEEK_SET);
flv->searched_for_end = 1;
}
if (stream_type == FLV_STREAM_TYPE_AUDIO) {
int bits_per_coded_sample;
channels = (flags & FLV_AUDIO_CHANNEL_MASK) == FLV_STEREO ? 2 : 1;
sample_rate = 44100 << ((flags & FLV_AUDIO_SAMPLERATE_MASK) >>
FLV_AUDIO_SAMPLERATE_OFFSET) >> 3;
bits_per_coded_sample = (flags & FLV_AUDIO_SAMPLESIZE_MASK) ? 16 : 8;
if (!st->codec->channels || !st->codec->sample_rate ||
!st->codec->bits_per_coded_sample) {
st->codec->channels = channels;
st->codec->channel_layout = channels == 1
? AV_CH_LAYOUT_MONO
: AV_CH_LAYOUT_STEREO;
st->codec->sample_rate = sample_rate;
st->codec->bits_per_coded_sample = bits_per_coded_sample;
}
if (!st->codec->codec_id) {
flv_set_audio_codec(s, st, st->codec,
flags & FLV_AUDIO_CODECID_MASK);
flv->last_sample_rate =
sample_rate = st->codec->sample_rate;
flv->last_channels =
channels = st->codec->channels;
} else {
AVCodecContext ctx = {0};
ctx.sample_rate = sample_rate;
ctx.bits_per_coded_sample = bits_per_coded_sample;
flv_set_audio_codec(s, st, &ctx, flags & FLV_AUDIO_CODECID_MASK);
sample_rate = ctx.sample_rate;
}
} else if (stream_type == FLV_STREAM_TYPE_VIDEO) {
size -= flv_set_video_codec(s, st, flags & FLV_VIDEO_CODECID_MASK, 1);
}
//几种特殊的格式
if (st->codec->codec_id == AV_CODEC_ID_AAC ||
st->codec->codec_id == AV_CODEC_ID_H264 ||
st->codec->codec_id == AV_CODEC_ID_MPEG4) {
//对应AACPacketType或者AVCPacketType
int type = avio_r8(s->pb);
size--;
//H.264
if (st->codec->codec_id == AV_CODEC_ID_H264 || st->codec->codec_id == AV_CODEC_ID_MPEG4) {
// sign extension
//对应CompositionTime
int32_t cts = (avio_rb24(s->pb) + 0xff800000) ^ 0xff800000;
//计算PTS
pts = dts + cts;
if (cts < 0) { // dts might be wrong
if (!flv->wrong_dts)
av_log(s, AV_LOG_WARNING,
"Negative cts, previous timestamps might be wrong.\n");
flv->wrong_dts = 1;
} else if (FFABS(dts - pts) > 1000*60*15) {
av_log(s, AV_LOG_WARNING,
"invalid timestamps %"PRId64" %"PRId64"\n", dts, pts);
dts = pts = AV_NOPTS_VALUE;
}
}
//如果编码器是AAC或者H.264
if (type == 0 && (!st->codec->extradata || st->codec->codec_id == AV_CODEC_ID_AAC ||
st->codec->codec_id == AV_CODEC_ID_H264)) {
AVDictionaryEntry *t;
if (st->codec->extradata) {
if ((ret = flv_queue_extradata(flv, s->pb, stream_type, size)) < 0)
return ret;
ret = AVERROR(EAGAIN);
goto leave;
}
if ((ret = flv_get_extradata(s, st, size)) < 0)
return ret;
/* Workaround for buggy Omnia A/XE encoder */
t = av_dict_get(s->metadata, "Encoder", NULL, 0);
if (st->codec->codec_id == AV_CODEC_ID_AAC && t && !strcmp(t->value, "Omnia A/XE"))
st->codec->extradata_size = 2;
//AAC
if (st->codec->codec_id == AV_CODEC_ID_AAC && 0) {
MPEG4AudioConfig cfg;
if (avpriv_mpeg4audio_get_config(&cfg, st->codec->extradata,
st->codec->extradata_size * 8, 1) >= 0) {
st->codec->channels = cfg.channels;
st->codec->channel_layout = 0;
if (cfg.ext_sample_rate)
st->codec->sample_rate = cfg.ext_sample_rate;
else
st->codec->sample_rate = cfg.sample_rate;
av_dlog(s, "mp4a config channels %d sample rate %d\n",
st->codec->channels, st->codec->sample_rate);
}
}
ret = AVERROR(EAGAIN);
goto leave;
}
}
/* skip empty data packets */
if (!size) {
ret = AVERROR(EAGAIN);
goto leave;
}
ret = av_get_packet(s->pb, pkt, size);
if (ret < 0)
return ret;
//设置PTS、DTS等等
pkt->dts = dts;
pkt->pts = pts == AV_NOPTS_VALUE ? dts : pts;
pkt->stream_index = st->index;
if (flv->new_extradata[stream_type]) {
uint8_t *side = av_packet_new_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA,
flv->new_extradata_size[stream_type]);
if (side) {
memcpy(side, flv->new_extradata[stream_type],
flv->new_extradata_size[stream_type]);
av_freep(&flv->new_extradata[stream_type]);
flv->new_extradata_size[stream_type] = 0;
}
}
if (stream_type == FLV_STREAM_TYPE_AUDIO &&
(sample_rate != flv->last_sample_rate ||
channels != flv->last_channels)) {
flv->last_sample_rate = sample_rate;
flv->last_channels = channels;
ff_add_param_change(pkt, channels, 0, sample_rate, 0, 0);
}
//标记上Keyframe
if ( stream_type == FLV_STREAM_TYPE_AUDIO ||
((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_KEY) ||
stream_type == FLV_STREAM_TYPE_DATA)
pkt->flags |= AV_PKT_FLAG_KEY;
leave:
avio_skip(s->pb, 4);
return ret;
}
flv_read_packet()的代码比较长,但是逻辑比较简单。它的主要功能就是根据FLV文件格式的规范,逐层解析Tag以及TagData,获取Tag以及TagData中的信息。比较关键的地方已经写上了注释,不再详细叙述。
parse_packet()给需要AVCodecParser的媒体流提供解析AVPacket的功能。它的代码如下所示:
/**
* Parse a packet, add all split parts to parse_queue.
*
* @param pkt Packet to parse, NULL when flushing the parser at end of stream.
*/
static int parse_packet(AVFormatContext *s, AVPacket *pkt, int stream_index)
{
AVPacket out_pkt = { 0 }, flush_pkt = { 0 };
AVStream *st = s->streams[stream_index];
uint8_t *data = pkt ? pkt->data : NULL;
int size = pkt ? pkt->size : 0;
int ret = 0, got_output = 0;
if (!pkt) {
av_init_packet(&flush_pkt);
pkt = &flush_pkt;
got_output = 1;
} else if (!size && st->parser->flags & PARSER_FLAG_COMPLETE_FRAMES) {
// preserve 0-size sync packets
compute_pkt_fields(s, st, st->parser, pkt);
}
while (size > 0 || (pkt == &flush_pkt && got_output)) {
int len;
av_init_packet(&out_pkt);
//解析
len = av_parser_parse2(st->parser, st->codec,
&out_pkt.data, &out_pkt.size, data, size,
pkt->pts, pkt->dts, pkt->pos);
pkt->pts = pkt->dts = AV_NOPTS_VALUE;
pkt->pos = -1;
/* increment read pointer */
data += len;
size -= len;
got_output = !!out_pkt.size;
//继续
if (!out_pkt.size)
continue;
if (pkt->side_data) {
out_pkt.side_data = pkt->side_data;
out_pkt.side_data_elems = pkt->side_data_elems;
pkt->side_data = NULL;
pkt->side_data_elems = 0;
}
/* set the duration */
out_pkt.duration = 0;
if (st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
if (st->codec->sample_rate > 0) {
out_pkt.duration =
av_rescale_q_rnd(st->parser->duration,
(AVRational) { 1, st->codec->sample_rate },
st->time_base,
AV_ROUND_DOWN);
}
}
//设置属性值
out_pkt.stream_index = st->index;
out_pkt.pts = st->parser->pts;
out_pkt.dts = st->parser->dts;
out_pkt.pos = st->parser->pos;
if (st->need_parsing == AVSTREAM_PARSE_FULL_RAW)
out_pkt.pos = st->parser->frame_offset;
if (st->parser->key_frame == 1 ||
(st->parser->key_frame == -1 &&
st->parser->pict_type == AV_PICTURE_TYPE_I))
out_pkt.flags |= AV_PKT_FLAG_KEY;
if (st->parser->key_frame == -1 && st->parser->pict_type ==AV_PICTURE_TYPE_NONE && (pkt->flags&AV_PKT_FLAG_KEY))
out_pkt.flags |= AV_PKT_FLAG_KEY;
compute_pkt_fields(s, st, st->parser, &out_pkt);
if (out_pkt.data == pkt->data && out_pkt.size == pkt->size) {
out_pkt.buf = pkt->buf;
pkt->buf = NULL;
#if FF_API_DESTRUCT_PACKET
FF_DISABLE_DEPRECATION_WARNINGS
out_pkt.destruct = pkt->destruct;
pkt->destruct = NULL;
FF_ENABLE_DEPRECATION_WARNINGS
#endif
}
if ((ret = av_dup_packet(&out_pkt)) < 0)
goto fail;
if (!add_to_pktbuf(&s->parse_queue, &out_pkt, &s->parse_queue_end)) {
av_free_packet(&out_pkt);
ret = AVERROR(ENOMEM);
goto fail;
}
}
/* end of the stream => close and free the parser */
if (pkt == &flush_pkt) {
av_parser_close(st->parser);
st->parser = NULL;
}
fail:
av_free_packet(pkt);
return ret;
}
从代码中可以看出,最终调用了相应AVCodecParser的av_parser_parse2()函数,解析出来AVPacket。此后根据解析的信息还进行了一系列的赋值工作,不再详细叙述。
参考文献:
https://blog.csdn.net/leixiaohua1020/article/details/12678577