一、什么是音视频解封装
解封装就是把视频文件(flv,mp4,rmvb,avi)按照一定的规则进行拆分,把该视频的音频流、视频流、字幕流等信息拆分出来。通过解封装,我们可以得到视频文件详细参数指标,包括:编码格式、文件大小、播放时长、分辨率、音频采样率、单声道/双声道等音视频信息。
三、实现源代码
#include
#include
int main(int argc, char **argv)
{
const char *in_filename = "believe.mp4";
AVFormatContext *ifmt_ctx = NULL; // 描述一个媒体文件或媒体流的构成和基本信息的结构体
// 打开文件
int ret = avformat_open_input(&ifmt_ctx, in_filename, NULL, NULL);
if (ret < 0)
{
char buf[1024] = { 0 };
av_strerror(ret, buf, sizeof(buf) - 1);
printf("open %s failed:%s\n", in_filename, buf);
goto failed;
}
//获取视频文件信息
ret = avformat_find_stream_info(ifmt_ctx, NULL);
if (ret < 0)
{
char buf[1024] = { 0 };
av_strerror(ret, buf, sizeof(buf) - 1);
printf("avformat_find_stream_info %s failed:%s\n", in_filename, buf);
goto failed;
}
//打印显示媒体文件信息
av_dump_format(ifmt_ctx, 0, in_filename, 0);
/*
* 老版本通过遍历的方式读取媒体文件视频和音频的信息
* 新版本的FFmpeg新增加了函数av_find_best_stream,也可以取得同样的效果
*/
int i = 0;
for (i = 0; i < ifmt_ctx->nb_streams; i++)
{
AVStream *in_stream = ifmt_ctx->streams[i];// 音频流、视频流、字幕流
if (AVMEDIA_TYPE_AUDIO == in_stream->codecpar->codec_type)//音频流
{
if (AV_SAMPLE_FMT_FLTP == in_stream->codecpar->format)
{
printf("sampleformat:AV_SAMPLE_FMT_FLTP\n");
}
else if (AV_SAMPLE_FMT_S16P == in_stream->codecpar->format)
{
printf("sampleformat:AV_SAMPLE_FMT_S16P\n");
}
// codec_id: 音频压缩编码格式
if (AV_CODEC_ID_AAC == in_stream->codecpar->codec_id)
{
printf("audio codec:AAC\n");
}
else if (AV_CODEC_ID_MP3 == in_stream->codecpar->codec_id)
{
printf("audio codec:MP3\n");
}
else
{
printf("audio codec_id:%d\n", in_stream->codecpar->codec_id);
}
// 音频总时长,单位为秒
if(in_stream->duration != AV_NOPTS_VALUE)
{
int duration_audio = (in_stream->duration) * av_q2d(in_stream->time_base);
printf("audio duration: %02d:%02d:%02d\n",
duration_audio / 3600, (duration_audio % 3600) / 60, (duration_audio % 60));
}
else
{
printf("audio duration unknown");
}
}
else if (AVMEDIA_TYPE_VIDEO == in_stream->codecpar->codec_type) //视频流
{
if (AV_CODEC_ID_MPEG4 == in_stream->codecpar->codec_id) //视频压缩编码格式
{
printf("video codec:MPEG4\n");
}
else if (AV_CODEC_ID_H264 == in_stream->codecpar->codec_id) //视频压缩编码格式
{
printf("video codec:H264\n");
}
else
{
printf("video codec_id:%d\n", in_stream->codecpar->codec_id);
}
//视频总时长,单位为秒
if(in_stream->duration != AV_NOPTS_VALUE)
{
int duration_video = (in_stream->duration) * av_q2d(in_stream->time_base);
printf("video duration: %02d:%02d:%02d\n",
duration_video / 3600,
(duration_video % 3600) / 60,
(duration_video % 60));
}
else
{
printf("video duration unknown");
}
}
}
failed:
if(ifmt_ctx)
avformat_close_input(&ifmt_ctx);
return 0;
}
1、avformat_open_input函数
int avformat_open_input(AVFormatContext **ps, const char *url, ff_const59 AVInputFormat *fmt, AVDictionary **options);
功能:打开输入视频文件。使用该函数注意以下事项:
(1)如果FFmpeg是4.x版本前的版本,需要先调用av_register_all()函数进行注册,如果是4.x版本会自动注册用户无需手动注册。
(2)如果打开的是网络流,需要调用avformat_network_init()函数。
参数:
AVFormatContext **ps, 格式化的上下文。可以为空指针,也可以为非空指针。如果传入的是一个AVFormatContext*的指针,则该空间须自己手动清理。如果传入的指针为空,则FFmpeg会内部自己创建,内部自己清理释放。上述例子我们传入的是空指针。
const char *url, 传入的地址。支持http,RTSP,以及普通的本地文件。地址最终会存入到AVFormatContext结构体当中。
AVInputFormat *fmt, 指定输入的封装格式。一般传NULL,由FFmpeg自行探测。
AVDictionary **options, 其它参数设置。它是一个字典,用于参数传递,不传则写NULL。
2、avformat_find_stream_info函数
int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options);
功能:获取视频文件信息。
参数:
AVFormatContext *ic,结构体上下文。由上面avformat_open_input函数的输出值作为传入值。
AVDictionary **options,字典选项,一般可以设置为NULL。
3、av_dump_format函数
void av_dump_format(AVFormatContext *ic, int index, const char *url, int is_output);
功能:将音视频数据格式通过av_log输出到指定的文件或者控制台,方便开发者了解输入的视音频格式,对于程序的调用,删除该函数的调用没有任何的影响。
参数:
AVFormatContext *ic,要分析的上下文,由上面avformat_find_stream_info函数的输出值作为传入值。
int index,要转储有关信息的流的索引。
const char *url,要打印的url,例如源文件或目标文件。
int is_output,选择指定的上下文是输入,还是输出,如果是输入赋值0,输出则赋值1。
4、avformat_close_input函数
void avformat_close_input(AVFormatContext **s);
功能:关闭AVFormatContext。清理释放结构体的所有内容,并设置为NULL。
参数:
AVFormatContext **s,要关闭的上下文。
五、AVFormatContext数据结构说明
AVFormatContext结构体定义:
typedef struct AVFormatContext {
/* A class for logging and @ref avoptions. Set by avformat_alloc_context().*/
const AVClass *av_class;
/*The input container format.*/
ff_const59 struct AVInputFormat *iformat;
/*The output container format. */
ff_const59 struct AVOutputFormat *oformat;
/* Format private data. This is an AVOptions-enabled struct
* if and only if iformat/oformat.priv_class is not NULL.*/
void *priv_data;
/*I/O context.*/
AVIOContext *pb;
/* stream info */
/*Flags signalling stream properties. A combination of AVFMTCTX_*.
* Set by libavformat.*/
int ctx_flags;
/* Number of elements in AVFormatContext.streams.*/
unsigned int nb_streams;
/* A list of all streams in the file. New streams are created with
* avformat_new_stream().*/
AVStream **streams;
#if FF_API_FORMAT_FILENAME
/*input or output filename*/
attribute_deprecated
char filename[1024];
#endif
/* input or output URL. Unlike the old filename field, this field has no
* length restriction.*/
char *url;
/*Position of the first frame of the component, in
* AV_TIME_BASE fractional seconds.*/
int64_t start_time;
/*Duration of the stream, in AV_TIME_BASE fractional seconds.*/
int64_t duration;
/*Total stream bitrate in bit/s, 0 if not available.*/
int64_t bit_rate;
unsigned int packet_size;
int max_delay;
/* Flags modifying the (de)muxer behaviour. A combination of AVFMT_FLAG_*.
* Set by the user before avformat_open_input() / avformat_write_header().*/
int flags;
#define AVFMT_FLAG_GENPTS 0x0001 ///< Generate missing pts even if it requires parsing future frames.
#define AVFMT_FLAG_IGNIDX 0x0002 ///< Ignore index.
#define AVFMT_FLAG_NONBLOCK 0x0004 ///< Do not block when reading packets from input.
#define AVFMT_FLAG_IGNDTS 0x0008 ///< Ignore DTS on frames that contain both DTS & PTS
#define AVFMT_FLAG_NOFILLIN 0x0010 ///< Do not infer any values from other values, just return what is stored in the container
#define AVFMT_FLAG_NOPARSE 0x0020 ///< Do not use AVParsers, you also must set AVFMT_FLAG_NOFILLIN as the fillin code works on frames and no parsing -> no frames. Also seeking to frames can not work if parsing to find frame boundaries has been disabled
#define AVFMT_FLAG_NOBUFFER 0x0040 ///< Do not buffer frames when possible
#define AVFMT_FLAG_CUSTOM_IO 0x0080 ///< The caller has supplied a custom AVIOContext, don't avio_close() it.
#define AVFMT_FLAG_DISCARD_CORRUPT 0x0100 ///< Discard frames marked corrupted
#define AVFMT_FLAG_FLUSH_PACKETS 0x0200 ///< Flush the AVIOContext every packet.
#define AVFMT_FLAG_BITEXACT 0x0400
#if FF_API_LAVF_MP4A_LATM
#define AVFMT_FLAG_MP4A_LATM 0x8000 ///< Deprecated, does nothing.
#endif
#define AVFMT_FLAG_SORT_DTS 0x10000 ///< try to interleave outputted packets by dts (using this flag can slow demuxing down)
#define AVFMT_FLAG_PRIV_OPT 0x20000 ///< Enable use of private options by delaying codec open (this could be made default once all code is converted)
#if FF_API_LAVF_KEEPSIDE_FLAG
#define AVFMT_FLAG_KEEP_SIDE_DATA 0x40000 ///< Deprecated, does nothing.
#endif
#define AVFMT_FLAG_FAST_SEEK 0x80000 ///< Enable fast, but inaccurate seeks for some formats
#define AVFMT_FLAG_SHORTEST 0x100000 ///< Stop muxing when the shortest stream stops.
#define AVFMT_FLAG_AUTO_BSF 0x200000 ///< Add bitstream filters as requested by the muxer
/*Maximum size of the data read from input for determining
* the input container format.*/
int64_t probesize;
/* Maximum duration (in AV_TIME_BASE units) of the data read
* from input in avformat_find_stream_info().*/
int64_t max_analyze_duration;
const uint8_t *key;
int keylen;
unsigned int nb_programs;
AVProgram **programs;
/*Forced video codec_id.*/
enum AVCodecID video_codec_id;
/*Forced audio codec_id.*/
enum AVCodecID audio_codec_id;
/*Forced subtitle codec_id.*/
enum AVCodecID subtitle_codec_id;
/* Maximum amount of memory in bytes to use for the index of each stream.*/
unsigned int max_index_size;
/* Maximum amount of memory in bytes to use for buffering frames
* obtained from realtime capture devices.*/
unsigned int max_picture_buffer;
/*Number of chapters in AVChapter array.*/
unsigned int nb_chapters;
AVChapter **chapters;
/*Metadata that applies to the whole file.*/
AVDictionary *metadata;
/*Start time of the stream in real world time, in microseconds*/
int64_t start_time_realtime;
/* The number of frames used for determining the framerate in
* avformat_find_stream_info().*/
int fps_probe_size;
/*Error recognition; higher values will detect more errors but may
* misdetect some more or less valid parts as errors.*/
int error_recognition;
/*Custom interrupt callbacks for the I/O layer.*/
AVIOInterruptCB interrupt_callback;
/* Flags to enable debugging. */
int debug;
#define FF_FDEBUG_TS 0x0001
/*Maximum buffering duration for interleaving.*/
int64_t max_interleave_delta;
/*Allow non-standard and experimental extension
* @see AVCodecContext.strict_std_compliance*/
int strict_std_compliance;
/*Flags for the user to detect events happening on the file. Flags must
* be cleared by the user once the event has been handled.
* A combination of AVFMT_EVENT_FLAG_*.*/
int event_flags;
#define AVFMT_EVENT_FLAG_METADATA_UPDATED 0x0001 ///< The call resulted in updated metadata.
/*Maximum number of packets to read while waiting for the first timestamp.
* Decoding only.*/
int max_ts_probe;
/*Avoid negative timestamps during muxing.*/
int avoid_negative_ts;
#define AVFMT_AVOID_NEG_TS_AUTO -1 ///< Enabled when required by target format
#define AVFMT_AVOID_NEG_TS_MAKE_NON_NEGATIVE 1 ///< Shift timestamps so they are non negative
#define AVFMT_AVOID_NEG_TS_MAKE_ZERO 2 ///< Shift timestamps so that they start at 0
/* Transport stream id*/
int ts_id;
/*Audio preload in microseconds.*/
int audio_preload;
/*Max chunk time in microseconds.*/
int max_chunk_duration;
/* Max chunk size in bytes*/
int max_chunk_size;
/*forces the use of wallclock timestamps as pts/dts of packets*/
int use_wallclock_as_timestamps;
/*avio flags, used to force AVIO_FLAG_DIRECT.*/
int avio_flags;
enum AVDurationEstimationMethod duration_estimation_method;
/*Skip initial bytes when opening stream*/
int64_t skip_initial_bytes;
/* Correct single timestamp overflows*/
unsigned int correct_ts_overflow;
/*Force seeking to any (also non key) frames.*/
int seek2any;
/*Flush the I/O context after each packet.*/
int flush_packets;
/*format probing score.*/
int probe_score;
/* number of bytes to read maximally to identify format.*/
int format_probesize;
/* ',' separated list of allowed decoders.*/
char *codec_whitelist;
/*',' separated list of allowed demuxers.*/
char *format_whitelist;
/*An opaque field for libavformat internal usage.
* Must not be accessed in any way by callers.*/
AVFormatInternal *internal;
/*IO repositioned flag.*/
int io_repositioned;
/* Forced video codec.*/
AVCodec *video_codec;
/*Forced audio codec.*/
AVCodec *audio_codec;
/* Forced subtitle codec.*/
AVCodec *subtitle_codec;
/* Forced data codec.*/
AVCodec *data_codec;
/* Number of bytes to be written as padding in a metadata header.*/
int metadata_header_padding;
/* User data.
* This is a place for some private data of the user.*/
void *opaque;
/*Callback used by devices to communicate with application.*/
av_format_control_message control_message_cb;
/* Output timestamp offset, in microseconds.*/
int64_t output_ts_offset;
/* dump format separator.
* can be ", " or "\n " or anything else*/
uint8_t *dump_separator;
/*Forced Data codec_id.*/
enum AVCodecID data_codec_id;
#if FF_API_OLD_OPEN_CALLBACKS
/* Called to open further IO contexts when needed for demuxing.*/
attribute_deprecated
int (*open_cb)(struct AVFormatContext *s, AVIOContext **p, const char *url, int flags, const AVIOInterruptCB *int_cb, AVDictionary **options);
#endif
/*',' separated list of allowed protocols.*/
char *protocol_whitelist;
/* A callback for opening new IO streams.*/
int (*io_open)(struct AVFormatContext *s, AVIOContext **pb, const char *url,
int flags, AVDictionary **options);
/* A callback for closing the streams opened with AVFormatContext.io_open(). */
void (*io_close)(struct AVFormatContext *s, AVIOContext *pb);
/* ',' separated list of disallowed protocols.*/
char *protocol_blacklist;
/* The maximum number of streams.*/
int max_streams;
/* Skip duration calcuation in estimate_timings_from_pts.*/
int skip_estimate_duration_from_pts;
} AVFormatContext;
解封装一些主要参数:
char *url:调用avformat_open_input读取到的媒体文件的路径/名字
unsigned int nb_streams:媒体流数量
int64_t bit_rate:媒体文件的码率,单位:bit/s,转换为kbps需要除以1024
int64_t duration:媒体文件时长,单位:微妙,转换为秒需要除以1000000
AVStream **streams:媒体流,可以是音频流、视频流、字幕流等,信息包含:音频编解码器的采样率、音频采样格式、音频信道数目、音频压缩编码格式、视频帧率、视频压缩编码格式(MP4、H264)、视频帧宽度和帧高度以及视频总时长等。