在FFmpeg/libavfilter/formats.c中定义了negotiate_video
和negotiate_audio
,在格式协商,对于video如果需要scale
,那么就会自动创建scale作为convert,对于audio,如果需要重采样,则会创建aresample
。
static const AVFilterNegotiation negotiate_video = {
.nb_mergers = FF_ARRAY_ELEMS(mergers_video),
.mergers = mergers_video,
.conversion_filter = "scale",
.conversion_opts_offset = offsetof(AVFilterGraph, scale_sws_opts),
};
static const AVFilterNegotiation negotiate_audio = {
.nb_mergers = FF_ARRAY_ELEMS(mergers_audio),
.mergers = mergers_audio,
.conversion_filter = "aresample",
.conversion_opts_offset = offsetof(AVFilterGraph, aresample_swr_opts),
};
格式协商merge的时候,video有merge_pix_fmts
,audio有merge_channel_layouts
,merge_samplerates
,merge_sample_fmts
。
static const AVFilterFormatsMerger mergers_video[] = {
{
.offset = offsetof(AVFilterFormatsConfig, formats),
.merge = merge_pix_fmts,
.can_merge = can_merge_pix_fmts,
},
};
static const AVFilterFormatsMerger mergers_audio[] = {
{
.offset = offsetof(AVFilterFormatsConfig, channel_layouts),
.merge = merge_channel_layouts,
.can_merge = NULL,
},
{
.offset = offsetof(AVFilterFormatsConfig, samplerates),
.merge = merge_samplerates,
.can_merge = can_merge_samplerates,
},
{
.offset = offsetof(AVFilterFormatsConfig, formats),
.merge = merge_sample_fmts,
.can_merge = can_merge_sample_fmts,
},
};
在ffmpeg_opt.c中有这个定义:
int auto_conversion_filters = 1;
如果是0,那么audio conversion是都可以关掉的,这段代码在configure_filtergraph()函数中,flag设置为AVFILTER_AUTO_CONVERT_NONE
,所有的自动转换会被禁用。
if (!auto_conversion_filters)
avfilter_graph_set_auto_convert(fg->graph, AVFILTER_AUTO_CONVERT_NONE);
-af aresample=resampler=soxr
ffmpeg -i chd-44100.wav -af aresample=resampler=soxr -ar 48000 chd-48000.wav -v 56
FFmpeg命令中,默认不指定aresample的时候是swr采样,使用soxr,就需要手动指定-af aresample=resampler=soxr
。
ret= s->resampler->multiple_resample(s->resample, &out, out_count, &in, FFMAX(in_count-padless, 0), &consumed);
out_count -= ret;
ret_sum += ret;
buf_set(&out, &out, ret);
in_count -= consumed;
buf_set(&in, &in, consumed);
s->resampler->multiple_resample
返回实际resample的sample数,consumed返回实际消耗的input sample数。
然后buf_set(&out, &out, ret)
对out数据进行有效的长度设置,同时也重新计算了out_count
和in_count
,buf_set(&in, &in, consumed)
设置了输入数据的有效长度。
首先要看下internal format的来历:
struct SwrContext {
const AVClass *av_class; ///< AVClass used for AVOption and av_log()
int log_level_offset; ///< logging level offset
void *log_ctx; ///< parent logging context
enum AVSampleFormat in_sample_fmt; ///< input sample format
enum AVSampleFormat int_sample_fmt; ///< internal sample format (AV_SAMPLE_FMT_FLTP or AV_SAMPLE_FMT_S16P)
enum AVSampleFormat out_sample_fmt; ///< output sample format
SwrContext中,定义了in_sample_fmt
,int_sample_fmt
,out_sample_fmt
,其中int_sample_fmt就表示internal format,顾名思义,就是用于swresample内部的sample format格式。并且有四种取值:
AV_SAMPLE_FMT_S16P
AV_SAMPLE_FMT_S32P
AV_SAMPLE_FMT_FLTP
AV_SAMPLE_FMT_DBLP
实际的代码是:
if(s->int_sample_fmt == AV_SAMPLE_FMT_NONE){
if( av_get_bytes_per_sample(s-> in_sample_fmt) <= 2
&& av_get_bytes_per_sample(s->out_sample_fmt) <= 2){
s->int_sample_fmt= AV_SAMPLE_FMT_S16P;
}else if( av_get_bytes_per_sample(s-> in_sample_fmt) <= 2
&& !s->rematrix
&& s->out_sample_rate==s->in_sample_rate
&& !(s->flags & SWR_FLAG_RESAMPLE)){
s->int_sample_fmt= AV_SAMPLE_FMT_S16P;
}else if( av_get_planar_sample_fmt(s-> in_sample_fmt) == AV_SAMPLE_FMT_S32P
&& av_get_planar_sample_fmt(s->out_sample_fmt) == AV_SAMPLE_FMT_S32P
&& !s->rematrix
&& s->out_sample_rate == s->in_sample_rate
&& !(s->flags & SWR_FLAG_RESAMPLE)
&& s->engine != SWR_ENGINE_SOXR){
s->int_sample_fmt= AV_SAMPLE_FMT_S32P;
}else if(av_get_bytes_per_sample(s->in_sample_fmt) <= 4){
s->int_sample_fmt= AV_SAMPLE_FMT_FLTP;
}else{
s->int_sample_fmt= AV_SAMPLE_FMT_DBLP;
}
}
int_sample_fmt
是否指定,如果未指定,则根据一些规则来选择一个合适的内部采样格式为:
if
语句块中,如果输入和输出采样格式的每个采样点的字节数都小于等于2,则选择AV_SAMPLE_FMT_S16P
作为内部采样格式。else if
语句块中,如果输入采样格式的每个采样点的字节数小于等于2,且不需要重新混音(rematrix
为false)、输出采样率等于输入采样率、不需要重新采样(SWR_FLAG_RESAMPLE
为false),则选择AV_SAMPLE_FMT_S16P
作为内部采样格式。else if
语句块中,如果输入和输出采样格式都是32位平面格式(AV_SAMPLE_FMT_S32P
),且不需要重新混音、输出采样率等于输入采样率、不需要重新采样、使用的引擎不是SOXR,则选择AV_SAMPLE_FMT_S32P
作为内部采样格式。else if
语句块中,如果输入采样格式的每个采样点的字节数小于等于4,则选择AV_SAMPLE_FMT_FLTP
作为内部采样格式。else
语句块中,如果以上条件都不满足,则选择AV_SAMPLE_FMT_DBLP
作为内部采样格式。 s->in_convert = swri_audio_convert_alloc(s->int_sample_fmt,
s-> in_sample_fmt, s->used_ch_count, s->channel_map, 0);
s->out_convert = swri_audio_convert_alloc(s->out_sample_fmt,
s->int_sample_fmt, s->out.ch_count, NULL, 0);
比如internal sample fmt
是fltp,输入输出sample fmt没有指定,输入文件是s16,那么输入输出默认就是s16,那么in_convert和out_convert的conv_f值如下:
s->in_convert
s->out_convert
convert初始化,不同的平台对应不同的版本:
#if ARCH_X86 && HAVE_X86ASM && HAVE_MMX
swri_audio_convert_init_x86(ctx, out_fmt, in_fmt, channels);
#elif ARCH_ARM
swri_audio_convert_init_arm(ctx, out_fmt, in_fmt, channels);
#elif ARCH_AARCH64
swri_audio_convert_init_aarch64(ctx, out_fmt, in_fmt, channels);
#endif
ffmpeg -y -i 2ch-16k.wav -af aresample=resampler=swr -ac 2 -ar 48000 -f f32le out.pcm
不写aresample,默认会走swr
:
ffmpeg -y -i 2ch-16k.wav -ac 2 -ar 48000 -f f32le out.pcm
-f f32le
:指定了保存的文件格式是PCM,不是wav,所以保存出来的文件按wav来解析是不对的,即使文件名为out.wav也不行。
ffmpeg -y -i 2ch-16k.wav -af aresample=resampler=soxr -ac 2 -ar 48000 -f f32le out.pcm
ffmpeg -y -i 2ch-16k.wav -af "aresample=resampler=src" -filter_type sinc_best \
-ac 2 -ar 48000 -acodec pcm_f32le out.wav
ffmpeg -y -i 2ch-16k.wav -af "aresample=resampler=src" -ac 2 -ar 48000 -f f32le out.pcm -v 56
-acodec pcm_f32le
:指定输出的格式是pcm_f32le,没有显示指定-f wav
,实际上会根据输出文件名使用wav muxer.
-f f32le
:f32le参数指定了输出的格式的同时,也保证了src重采样使用的内部数据格式是fltp
。
指定-acodec pcm_f32le
,输出的格式codec格式是pcm_f32le
,所以aresample的输出格式会设置为f32le
:
./ffmpeg -y -i 2ch-16k.wav -af "aresample=resampler=src:filter_type=sinc_best" -ac 2 -ar 48000 -acodec pcm_f32le out.wav -v 56
./ffmpeg -y -i 2ch-16k.wav -af "aresample=resampler=src:filter_type=sinc_fast" -ac 2 -ar 48000 -acodec pcm_f32le out.wav -v 56
./ffmpeg -y -i 2ch-16k.wav -af "aresample=resampler=src:filter_type=sinc_fast:internal_sample_fmt=fltp" -ac 2 -ar 48000 out.wav -v 56
./ffmpeg -y -i 2ch-16k.wav -af "aresample=resampler=src:filter_type=linear:internal_sample_fmt=fltp" -ac 2 -ar 48000 out.wav -v 56
./ffmpeg -y -i 2ch-16k.wav -af "aresample=resampler=src:filter_type=zoh:internal_sample_fmt=fltp" -ac 2 -ar 48000 out.wav -v 56
如果没有指定-acodec pcm_f32le
,而是通过aresample的option指定out_sample_fmt=flt
,这时候,flt
只是一个中间格式,最后会转换和输入格式一样的s16le
ffmpeg -y -i 2ch-16k.wav -af "aresample=resampler=src:filter_type=sinc_best:out_sample_fmt=flt" \
-ac 2 -ar 48000 out.wav -v 56
可以看到如下log:
[ap] ch:2 chl:stereo fmt:s16 r:16000Hz -> ch:2 chl:stereo fmt:flt r:48000Hz
[ap] Using fltp internally between filters
[ap] ch:2 chl:stereo fmt:flt r:48000Hz -> ch:2 chl:stereo fmt:s16 r:48000Hz