enum AVSampleFormat {
AV_SAMPLE_FMT_NONE = -1,
AV_SAMPLE_FMT_U8, ///< unsigned 8 bits
AV_SAMPLE_FMT_S16, ///< signed 16 bits
AV_SAMPLE_FMT_S32, ///< signed 32 bits
AV_SAMPLE_FMT_FLT, ///< float
AV_SAMPLE_FMT_DBL, ///< double
AV_SAMPLE_FMT_U8P, ///< unsigned 8 bits, planar
AV_SAMPLE_FMT_S16P, ///< signed 16 bits, planar
AV_SAMPLE_FMT_S32P, ///< signed 32 bits, planar
AV_SAMPLE_FMT_FLTP, ///< float, planar
AV_SAMPLE_FMT_DBLP, ///< double, planar
AV_SAMPLE_FMT_S64, ///< signed 64 bits
AV_SAMPLE_FMT_S64P, ///< signed 64 bits, planar
AV_SAMPLE_FMT_NB ///< Number of sample formats. DO NOT USE if linking dynamically
};
37 /**
38 * @defgroup channel_masks Audio channel masks
39 *
40 * A channel layout is a 64-bits integer with a bit set for every channel.
41 * The number of bits set must be equal to the number of channels.
42 * The value 0 means that the channel layout is not known.
43 * @note this data structure is not powerful enough to handle channels
44 * combinations that have the same channel multiple times, such as
45 * dual-mono.
46 *
47 * @{
48 */
49 #define AV_CH_FRONT_LEFT 0x00000001
50 #define AV_CH_FRONT_RIGHT 0x00000002
51 #define AV_CH_FRONT_CENTER 0x00000004
52 #define AV_CH_LOW_FREQUENCY 0x00000008
53 #define AV_CH_BACK_LEFT 0x00000010
54 #define AV_CH_BACK_RIGHT 0x00000020
55 #define AV_CH_FRONT_LEFT_OF_CENTER 0x00000040
56 #define AV_CH_FRONT_RIGHT_OF_CENTER 0x00000080
57 #define AV_CH_BACK_CENTER 0x00000100
58 #define AV_CH_SIDE_LEFT 0x00000200
59 #define AV_CH_SIDE_RIGHT 0x00000400
60 #define AV_CH_TOP_CENTER 0x00000800
61 #define AV_CH_TOP_FRONT_LEFT 0x00001000
62 #define AV_CH_TOP_FRONT_CENTER 0x00002000
63 #define AV_CH_TOP_FRONT_RIGHT 0x00004000
64 #define AV_CH_TOP_BACK_LEFT 0x00008000
65 #define AV_CH_TOP_BACK_CENTER 0x00010000
66 #define AV_CH_TOP_BACK_RIGHT 0x00020000
67 #define AV_CH_STEREO_LEFT 0x20000000 ///< Stereo downmix.
68 #define AV_CH_STEREO_RIGHT 0x40000000 ///< See AV_CH_STEREO_LEFT.
69 #define AV_CH_WIDE_LEFT 0x0000000080000000ULL
70 #define AV_CH_WIDE_RIGHT 0x0000000100000000ULL
71 #define AV_CH_SURROUND_DIRECT_LEFT 0x0000000200000000ULL
72 #define AV_CH_SURROUND_DIRECT_RIGHT 0x0000000400000000ULL
73 #define AV_CH_LOW_FREQUENCY_2 0x0000000800000000ULL
74
75 /** Channel mask value used for AVCodecContext.request_channel_layout
76 to indicate that the user requests the channel order of the decoder output
77 to be the native codec channel order. */
78 #define AV_CH_LAYOUT_NATIVE 0x8000000000000000ULL
79
80 /**
81 * @}
82 * @defgroup channel_mask_c Audio channel layouts
83 * @{
84 * */
85 #define AV_CH_LAYOUT_MONO (AV_CH_FRONT_CENTER)
86 #define AV_CH_LAYOUT_STEREO (AV_CH_FRONT_LEFT|AV_CH_FRONT_RIGHT)
87 #define AV_CH_LAYOUT_2POINT1 (AV_CH_LAYOUT_STEREO|AV_CH_LOW_FREQUENCY)
88 #define AV_CH_LAYOUT_2_1 (AV_CH_LAYOUT_STEREO|AV_CH_BACK_CENTER)
89 #define AV_CH_LAYOUT_SURROUND (AV_CH_LAYOUT_STEREO|AV_CH_FRONT_CENTER)
90 #define AV_CH_LAYOUT_3POINT1 (AV_CH_LAYOUT_SURROUND|AV_CH_LOW_FREQUENCY)
91 #define AV_CH_LAYOUT_4POINT0 (AV_CH_LAYOUT_SURROUND|AV_CH_BACK_CENTER)
92 #define AV_CH_LAYOUT_4POINT1 (AV_CH_LAYOUT_4POINT0|AV_CH_LOW_FREQUENCY)
93 #define AV_CH_LAYOUT_2_2 (AV_CH_LAYOUT_STEREO|AV_CH_SIDE_LEFT|AV_CH_SIDE_RIGHT)
94 #define AV_CH_LAYOUT_QUAD (AV_CH_LAYOUT_STEREO|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)
95 #define AV_CH_LAYOUT_5POINT0 (AV_CH_LAYOUT_SURROUND|AV_CH_SIDE_LEFT|AV_CH_SIDE_RIGHT)
96 #define AV_CH_LAYOUT_5POINT1 (AV_CH_LAYOUT_5POINT0|AV_CH_LOW_FREQUENCY)
97 #define AV_CH_LAYOUT_5POINT0_BACK (AV_CH_LAYOUT_SURROUND|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)
98 #define AV_CH_LAYOUT_5POINT1_BACK (AV_CH_LAYOUT_5POINT0_BACK|AV_CH_LOW_FREQUENCY)
99 #define AV_CH_LAYOUT_6POINT0 (AV_CH_LAYOUT_5POINT0|AV_CH_BACK_CENTER)
100 #define AV_CH_LAYOUT_6POINT0_FRONT (AV_CH_LAYOUT_2_2|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
101 #define AV_CH_LAYOUT_HEXAGONAL (AV_CH_LAYOUT_5POINT0_BACK|AV_CH_BACK_CENTER)
102 #define AV_CH_LAYOUT_6POINT1 (AV_CH_LAYOUT_5POINT1|AV_CH_BACK_CENTER)
103 #define AV_CH_LAYOUT_6POINT1_BACK (AV_CH_LAYOUT_5POINT1_BACK|AV_CH_BACK_CENTER)
104 #define AV_CH_LAYOUT_6POINT1_FRONT (AV_CH_LAYOUT_6POINT0_FRONT|AV_CH_LOW_FREQUENCY)
105 #define AV_CH_LAYOUT_7POINT0 (AV_CH_LAYOUT_5POINT0|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)
106 #define AV_CH_LAYOUT_7POINT0_FRONT (AV_CH_LAYOUT_5POINT0|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
107 #define AV_CH_LAYOUT_7POINT1 (AV_CH_LAYOUT_5POINT1|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)
108 #define AV_CH_LAYOUT_7POINT1_WIDE (AV_CH_LAYOUT_5POINT1|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
109 #define AV_CH_LAYOUT_7POINT1_WIDE_BACK (AV_CH_LAYOUT_5POINT1_BACK|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
110 #define AV_CH_LAYOUT_OCTAGONAL (AV_CH_LAYOUT_5POINT0|AV_CH_BACK_LEFT|AV_CH_BACK_CENTER|AV_CH_BACK_RIGHT)
111 #define AV_CH_LAYOUT_HEXADECAGONAL (AV_CH_LAYOUT_OCTAGONAL|AV_CH_WIDE_LEFT|AV_CH_WIDE_RIGHT|AV_CH_TOP_BACK_LEFT|AV_CH_TOP_BACK_RIGHT|AV_CH_TOP_BACK_CENTER|AV_CH_TOP_FRONT_CENTER|AV_CH_TOP_FRONT_LEFT|AV_CH_TOP_FRONT_RIGHT)
112 #define AV_CH_LAYOUT_STEREO_DOWNMIX (AV_CH_STEREO_LEFT|AV_CH_STEREO_RIGHT)
这里有个官方例子:FFmpeg: decode_audio.c
以下是跟音频相关的常用参数
typedef struct AVCodecContext {
/* audio only */
int sample_rate; ///< samples per second
int channels; ///< number of audio channels
/**
* audio sample format
* - encoding: Set by user.
* - decoding: Set by libavcodec.
*/
enum AVSampleFormat sample_fmt; ///< sample format
/* The following data should not be initialized. */
/**
* Number of samples per channel in an audio frame.
*
* - encoding: set by libavcodec in avcodec_open2(). Each submitted frame
* except the last must contain exactly frame_size samples per channel.
* May be 0 when the codec has AV_CODEC_CAP_VARIABLE_FRAME_SIZE set, then the
* frame size is not restricted.
* - decoding: may be set by some decoders to indicate constant frame size
*/
int frame_size;
/**
* Audio cutoff bandwidth (0 means "automatic")
* - encoding: Set by user.
* - decoding: unused
*/
int cutoff;
/**
* Audio channel layout.
* - encoding: set by user.
* - decoding: set by user, may be overwritten by libavcodec.
*/
uint64_t channel_layout;
/**
* Request decoder to use this channel layout if it can (0 for default)
* - encoding: unused
* - decoding: Set by user.
*/
uint64_t request_channel_layout;
/**
* Type of service that the audio stream conveys.
* - encoding: Set by user.
* - decoding: Set by libavcodec.
*/
enum AVAudioServiceType audio_service_type;
/**
* desired sample format
* - encoding: Not used.
* - decoding: Set by user.
* Decoder will decode to this format if it can.
*/
enum AVSampleFormat request_sample_fmt;
}
libswresample主要是用于音频的重采样和格式转换的,包含如下功能:
当音频的采样率与播放器的采样率不一致时,那么想在播放器正常播放,就需要对音频进行重采样,否则可能会出现音频变速的问题
data_size = av_get_bytes_per_sample(dec_ctx->sample_fmt);
for (i = 0; i < frame->nb_samples; i++)
for (ch = 0; ch < dec_ctx->channels; ch++)
fwrite(frame->data[ch] + data_size*i, 1, data_size, outfile);
uint8_t **input;
int in_samples;
//第一种方式创建SwrContext
//SwrContext *swr = swr_alloc();
// av_opt_set_channel_layout(swr, "in_channel_layout", AV_CH_LAYOUT_5POINT1, 0);
// av_opt_set_channel_layout(swr, "out_channel_layout", AV_CH_LAYOUT_STEREO, 0);
// av_opt_set_int(swr, "in_sample_rate", 48000, 0);
// av_opt_set_int(swr, "out_sample_rate", 44100, 0);
// av_opt_set_sample_fmt(swr, "in_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
// av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_S32, 0);
//第二种方式创建SwrContext,以下代码作用等同于上面的
SwrContext *swr = swr_alloc_set_opts(NULL, // we're allocating a new context
AV_CH_LAYOUT_STEREO, // out_ch_layout
AV_SAMPLE_FMT_S32, // out_sample_fmt
44100, // out_sample_rate
AV_CH_LAYOUT_5POINT1, // in_ch_layout
AV_SAMPLE_FMT_FLTP, // in_sample_fmt
48000, // in_sample_rate
0, // log_offset
NULL); // log_ctx
//在得到SwrContext后就要进行初始化 ,如果SwrContext的参数有任何变化,则必须再次调用以下初始化函数
swr_init(swr);
//这里演示修改了第三个参数为AV_SAMPLE_FMT_S16,则需要再次调用swr_init
swr = swr_alloc_set_opts(swr,
AV_CH_LAYOUT_STEREO, // out_ch_layout
AV_SAMPLE_FMT_S16, // out_sample_fmt
44100, // out_sample_rate
AV_CH_LAYOUT_5POINT1, // in_ch_layout
AV_SAMPLE_FMT_FLTP, // in_sample_fmt
48000, // in_sample_rate
0, // log_offset
NULL); // log_ctx
swr_init(swr);//再次调用
//计算转换后的采样数samples,计算公式为 in_samples*out_sample_rate=out_samples*in_sample_rate
//该运算在数学上等价于a * b / c,最后一个参数可以支持多种取舍
int out_samples = av_rescale_rnd(swr_get_delay(swr, //获取下一个输入样本相对于下一个输出样本将经历的延迟
48000)//输入采样率
+in_samples,
44100, //输出采样率
48000, //输入采样率
AV_ROUND_UP);//表示向上取整,如3/2=2
//根据转换后的音频参数分配一块缓冲来存储数据
uint8_t * output[8];//用于存储转换后的数据
//分配一个样本缓冲区,并相应地填充数据指针和行大小
//可以使用av_freep(&output [0])释放分配的样本缓冲区
av_samples_alloc(&output,//[out]
NULL, //[out]
2, //通道数
out_samples,//采样数
AV_SAMPLE_FMT_S16, //采样格式
0);//对齐,0--默认,1--不对齐
out_samples = swr_convert(swr,
&output, //转换后的数据
out_samples,
input, //要转换的数据
in_samples);
if(swr_get_out_samples(swr,0)>0){//表示有缓冲数据
//通过设置in和in_count为0将缓存中的全部处理完毕,这通常是最后一步,如果没有这步,则可能最后的音频数据会存在缓冲中没有全部转换出来
out_samples = swr_convert(swr,
&output,
out_samples,
NULL,
0);
}
swr_free(&swr);//最后释放
A HE-AAC v1 or v2 audio frame contains 2048 PCM samples per channel
(there is also one mode with 1920 samples per channel but this is
only for special purposes such as DAB+ digital radio). These
bits/frame figures are average figures where each AAC frame generally
has a different size in bytes. To calculate the same for AAC-LC just
use 1024 instead of 2048 PCM samples per frame and channel. For
AAC-LD/ELD it is either 480 or 512 PCM samples per frame and channel.