ffmpeg音频重采样分析

主机环境:Windows XP

开发环境:CodeBlocks13.12

ffmpeg版本:ffmpeg2.4

在学习ffmpeg教程http://dranger.com/ffmpeg/ffmpeg.html的过程中,由于教程中的ffmpeg版本较低,与ffmpeg2.4API有些许出入,解码后的音频不能直接播放了,在ffplayer.c中都对解码后的音频进行了重采样操作,于是乎去了解了一下重采样的相关知识,学习例程是ffmpeg2.4源代码目录下的doc/examples/resampling_audio.c文件,为便于学习修改后的代码如下

/*
 * Copyright (c) 2012 Stefano Sabatini
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

/**
 * @example resampling_audio.c
 * libswresample API use example.
 */

#include <libavutil/opt.h>
#include <libavutil/channel_layout.h>
#include <libavutil/samplefmt.h>
#include <libswresample/swresample.h>

/*
获取采样格式
*/
static int get_format_from_sample_fmt(const char **fmt,
                                      enum AVSampleFormat sample_fmt)
{
    int i;
    struct sample_fmt_entry {
        enum AVSampleFormat sample_fmt; const char *fmt_be, *fmt_le;
    } sample_fmt_entries[] = {
        { AV_SAMPLE_FMT_U8,  "u8",    "u8"    },
        { AV_SAMPLE_FMT_S16, "s16be", "s16le" },
        { AV_SAMPLE_FMT_S32, "s32be", "s32le" },
        { AV_SAMPLE_FMT_FLT, "f32be", "f32le" },
        { AV_SAMPLE_FMT_DBL, "f64be", "f64le" },
    };
    *fmt = NULL;

    for (i = 0; i < FF_ARRAY_ELEMS(sample_fmt_entries); i++) {
        struct sample_fmt_entry *entry = &sample_fmt_entries[i];
        if (sample_fmt == entry->sample_fmt) {
            *fmt = AV_NE(entry->fmt_be, entry->fmt_le);
            return 0;
        }
    }

    fprintf(stderr,
            "Sample format %s not supported as output format\n",
            av_get_sample_fmt_name(sample_fmt));
    return AVERROR(EINVAL);
}

/**
 * Fill dst buffer with nb_samples, generated starting from t.
 *使用nb_samples 填充dst buffer,确保从t开始
 *在440hz的曲线上以sample_rate的频率取nb_samples个样本点存储在dst中,nb_channels通道数据都一样
 */
static void fill_samples(double *dst, int nb_samples, int nb_channels, int sample_rate, double *t)
{
    int i, j;
    double tincr = 1.0 / sample_rate, *dstp = dst;//tincr是时间间隔
    const double c = 2 * M_PI * 440.0;//频率440Hz
    /* generate sin tone with 440Hz frequency and duplicated channels */
    for (i = 0; i < nb_samples; i++) {
        *dstp = sin(c * *t);//得到曲线上的采样点
        for (j = 1; j < nb_channels; j++)
            dstp[j] = dstp[0];//每一个通道都与第一个通道写一样的值
        dstp += nb_channels;//顺序写满通道后移动采样点
        *t += tincr;//时间向后移动
    }
}

int main(int argc, char **argv)
{
    int64_t src_ch_layout = AV_CH_LAYOUT_STEREO, dst_ch_layout = AV_CH_LAYOUT_SURROUND;
	//源文件布局为立体声,目的文件布局为立体环绕声
    int src_rate = 48000, dst_rate = 44100;//源文件及目的文件的码率
    uint8_t **src_data = NULL, **dst_data = NULL;//源文件及目的文件数据初始化为空
    int src_nb_channels = 0, dst_nb_channels = 0;//源文件及目的文件通道数初始化为0
    int src_linesize, dst_linesize;//源文件及目的文件通道数据大小
    int src_nb_samples = 1024, dst_nb_samples, max_dst_nb_samples;//源文件及目的文件样品数
    enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_DBL, dst_sample_fmt = AV_SAMPLE_FMT_S16;
	//设置源文件及目的文件的样品格式,通过采样格式可知一个样品所占的字节数
    const char *src_filename = NULL,*dst_filename = NULL;//目的文件名字
    FILE *src_file,*dst_file;//目的文件指针
    int dst_bufsize;//目的文件缓存大小
    const char *fmt;
    struct SwrContext *swr_ctx;//对其成员变量不能直接操作,需使用avoption api操作
    double t;
    int ret;

	/*
	重采样音频帧以特定的格式并输出到目的文件中
	*/
    if (argc != 3) {
        fprintf(stderr, "Usage: %s input_file output_file\n"
                "API example program to show how to resample an audio stream with libswresample.\n"
                "This program generates a series of audio frames, resamples them to a specified "
                "output format and rate and saves them to a input file named input_file and an output file named output_file.\n",
            argv[0]);
        exit(1);
    }
    src_filename = argv[1];
    dst_filename = argv[2];//赋值目的文件名字

    src_file = fopen(src_filename, "wb");//以二进制写方式打开目的文件
    if (!src_file) {
        fprintf(stderr, "Could not open src file %s\n", src_filename);
        exit(1);//打开失败退出
    }
    dst_file = fopen(dst_filename, "wb");//以二进制写方式打开目的文件
    if (!dst_file) {
        fprintf(stderr, "Could not open destination file %s\n", dst_filename);
        exit(1);//打开失败退出
    }

    /* create resampler context 创建重采样上下文*/
    swr_ctx = swr_alloc();//为重采样上下文申请空间
    if (!swr_ctx) {
        fprintf(stderr, "Could not allocate resampler context\n");
        ret = AVERROR(ENOMEM);//创建重采样上下文失败返回
        goto end;
    }

    /* set options 设置重采样上下文以avoption api方式(间接)*/
    av_opt_set_int(swr_ctx, "in_channel_layout",    src_ch_layout, 0);//设置输入源的通道布局
    av_opt_set_int(swr_ctx, "in_sample_rate",       src_rate, 0);//设置输入源的采样率
    av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", src_sample_fmt, 0);//设置输入源的采样格式

    av_opt_set_int(swr_ctx, "out_channel_layout",    dst_ch_layout, 0);//设置输出源的通道布局
    av_opt_set_int(swr_ctx, "out_sample_rate",       dst_rate, 0);//设置输出源的采样率
    av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0);//设置输出源的采样格式

    /* initialize the resampling context 调用swr_init生效*/
    if ((ret = swr_init(swr_ctx)) < 0) {
        fprintf(stderr, "Failed to initialize the resampling context\n");
        goto end;//初始化失败退出
    }

    /* allocate source and destination samples buffers 申请输入源、输出源样品缓存*/

    src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);//得到输入源的通道数,数值为2
    ret = av_samples_alloc_array_and_samples(&src_data, &src_linesize, src_nb_channels,
                                             src_nb_samples, src_sample_fmt, 0);
    printf("src_linesize:%d\n",src_linesize);//16384:src_linesize=src_nb_samples*src_sample_fmt(size)*src_nb_channels=1024*8*2
	//为输入源申请采样空间
    if (ret < 0) {
        fprintf(stderr, "Could not allocate source samples\n");
        goto end;
    }

    /* compute the number of converted samples: buffering is avoided
     * ensuring that the output buffer will contain at least all the
     * converted input samples 计算输出源的样品数,要避免溢出*/
    max_dst_nb_samples = dst_nb_samples =
        av_rescale_rnd(src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);//输出源与输入源什么量是一定的关系式:时间一定
    printf("max_dst_nb_samples:%d\n",max_dst_nb_samples);
    /* buffer is going to be directly written to a rawaudio file, no alignment */
    dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);//得到输出源的通道数:3
    ret = av_samples_alloc_array_and_samples(&dst_data, &dst_linesize, dst_nb_channels,
                                             dst_nb_samples, dst_sample_fmt, 0);
    //为输出源申请空间dst_linesize=dst_nb_samples*2*3
    printf("dst_linesize:%d\n",dst_linesize);
    if (ret < 0) {
        fprintf(stderr, "Could not allocate destination samples\n");
        goto end;
    }

    t = 0;
    do {
        /* generate synthetic audio 生成合成音频作为输入源*/
        fill_samples((double *)src_data[0], src_nb_samples, src_nb_channels, src_rate, &t);

        /* compute destination number of samples 计算输出源的采样数*/
        dst_nb_samples = av_rescale_rnd(swr_get_delay(swr_ctx, src_rate) +
                                        src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);
        //printf("dst_nb_samples:%d\n",dst_nb_samples);
        if (dst_nb_samples > max_dst_nb_samples) {
            av_freep(&dst_data[0]);//如果计算所得的空间小于之前所申请的空间 ?
            ret = av_samples_alloc(dst_data, &dst_linesize, dst_nb_channels,
                                   dst_nb_samples, dst_sample_fmt, 1);//重新申请输出源空间
            if (ret < 0)
                break;
            max_dst_nb_samples = dst_nb_samples;//更新max_dst_nb_samples
        }

        /* convert to destination format 转换成目标格式*/
        ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t **)src_data, src_nb_samples);
        if (ret < 0) {
            fprintf(stderr, "Error while converting\n");
            goto end;//转换失败,退出
        }
        dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
                                                 ret, dst_sample_fmt, 1);
        //得到输出源实际所需要的空间大小
        if (dst_bufsize < 0) {
            fprintf(stderr, "Could not get sample buffer size\n");
            goto end;
        }
        printf("t:%f in:%d out:%d\n", t, src_nb_samples, ret);
        fwrite((double *)src_data[0], 1, src_linesize, src_file);
        fwrite(dst_data[0], 1, dst_bufsize, dst_file);//写入文件大小为dst_bufsize
    } while (t < 1);

    if ((ret = get_format_from_sample_fmt(&fmt, dst_sample_fmt)) < 0)
        goto end;
    fprintf(stderr, "Resampling succeeded. Play the output file with the command:\n"
            "ffplay -f %s -channel_layout %"PRId64" -channels %d -ar %d %s\n",
            fmt, dst_ch_layout, dst_nb_channels, dst_rate, dst_filename);

end:
    if (src_file)
        fclose(src_file);
    if (dst_file)
        fclose(dst_file);//关闭文件

    if (src_data)
        av_freep(&src_data[0]);//释放资源
    av_freep(&src_data);//释放资源

    if (dst_data)
        av_freep(&dst_data[0]);//释放资源
    av_freep(&dst_data);//释放资源

    swr_free(&swr_ctx);//释放重采样上下文资源
    return ret < 0;
}

这里面一个很重要的结构体是SwrContext其说明如下

/**
 * The libswresample context. Unlike libavcodec and libavformat, this structure
 * is opaque. This means that if you would like to set options, you must use
 * the @ref avoptions API and cannot directly set values to members of the
 * structure.
 */
typedef struct SwrContext SwrContext;

说明中提到不能对其成员直接进行操作,需借助avoptions API来实现对其成员变量的赋值。

在主函数中声明了一些源数据以及目的数据的一些相关信息,源数据通道布局为STEREO,目的数据通道布局为SURROUND,增加了一个通道,源数据采样率为48000,目的数据为44100等等,其中指明了源数据的样品大小为1024,且源数据样品格式为DBL类型,目的数据格式为S16,初始化SwrContext结构体之前分别打开了源文件以及目的文件,SwrContext有两种初始化方法

 * @code
 * SwrContext *swr = swr_alloc();
 * av_opt_set_channel_layout(swr, "in_channel_layout",  AV_CH_LAYOUT_5POINT1, 0);
 * av_opt_set_channel_layout(swr, "out_channel_layout", AV_CH_LAYOUT_STEREO,  0);
 * av_opt_set_int(swr, "in_sample_rate",     48000,                0);
 * av_opt_set_int(swr, "out_sample_rate",    44100,                0);
 * av_opt_set_sample_fmt(swr, "in_sample_fmt",  AV_SAMPLE_FMT_FLTP, 0);
 * av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_S16,  0);
 * @endcode
 *
 * The same job can be done using swr_alloc_set_opts() as well:
 * @code
 * SwrContext *swr = swr_alloc_set_opts(NULL,  // we're allocating a new context
 *                       AV_CH_LAYOUT_STEREO,  // out_ch_layout
 *                       AV_SAMPLE_FMT_S16,    // out_sample_fmt
 *                       44100,                // out_sample_rate
 *                       AV_CH_LAYOUT_5POINT1, // in_ch_layout
 *                       AV_SAMPLE_FMT_FLTP,   // in_sample_fmt
 *                       48000,                // in_sample_rate
 *                       0,                    // log_offset
 *                       NULL);                // log_ctx
 * @endcode
 *

例程中是采用的前者,更直观一些,初始化完毕后,分别对源数据以及目的数据进行了参数的设置(通道布局、采样率、采样格式),通过调用swr_init函数来使其生效。

接着为源数据申请空间

src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);//得到输入源的通道数,数值为2
ret = av_samples_alloc_array_and_samples(&src_data, &src_linesize, src_nb_channels,
										 src_nb_samples, src_sample_fmt, 0);
printf("src_linesize:%d\n",src_linesize);//16384:src_linesize=src_nb_samples*src_sample_fmt(size)*src_nb_channels=1024*8*2

首先通过av_get_channel_layout_nb_channels函数得到源数据的通道数,接着通过av_sample_alloc_array_and_samples函数来计算源数据的数据量,接着根据源数据的样品数来计算所需要的目的样品数通过av_rescale_rnd函数来实现

/**
 * Rescale a 64-bit integer with specified rounding.
 * A simple a*b/c isn't possible as it can overflow.
 *
 * @return rescaled value a, or if AV_ROUND_PASS_MINMAX is set and a is
 *         INT64_MIN or INT64_MAX then a is passed through unchanged.
 */
int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding) av_const;

可以看到其返回的值是a*b/c,因为源数据与目的数据的时间是一定的,即源数据采样src_nb_samples所需要的时间为src_nb_samples/src_rate=dst_nb_samples/dst_rate,因此dst_nb_samples=src_nb_samples*dst_rate/src_rate。同时也为目的数据申请所需要的空间,该空间是理论上计算所得。接下来进入了fdo-while循环,do-while循环中做了两件事,一个是通过fill_samples来填充源数据,一个是通过swr_convert对源数据进行重采样转成目的数据。

/**
 * Fill dst buffer with nb_samples, generated starting from t.
 *使用nb_samples 填充dst buffer,确保从t开始
 *在440hz的曲线上以sample_rate的频率取nb_samples个样本点存储在dst中,nb_channels通道数据都一样
 */
static void fill_samples(double *dst, int nb_samples, int nb_channels, int sample_rate, double *t)
{
    int i, j;
    double tincr = 1.0 / sample_rate, *dstp = dst;//tincr是时间间隔
    const double c = 2 * M_PI * 440.0;//频率440Hz
    /* generate sin tone with 440Hz frequency and duplicated channels */
    for (i = 0; i < nb_samples; i++) {
        *dstp = sin(c * *t);//得到曲线上的采样点
        for (j = 1; j < nb_channels; j++)
            dstp[j] = dstp[0];//每一个通道都与第一个通道写一样的值
        dstp += nb_channels;//顺序写满通道后移动采样点
        *t += tincr;//时间向后移动
    }
}

fill_samples是以正弦波形来填充源数据的正弦波形函数为 y=Asin(ωx+φ)+h,其中A为振幅,h为相对于y轴的距离,φ为相位,与x轴距离,ω决定周期,其最小正周期T=2π/|ω|,因此fill_samples实际上产生了一个频率为440Hz的正弦波形,函数中以sample_rate的采样率在该波形上提取了nb_samples个样本存储在源数据中,且源数据中两个通道的数据是相同的。当源数据的nb_samples个样品采样完毕后,又一次计算了目的数据的样品数,加上了一个时间因子(swr_get_delay),如果该次计算的样品数大于之前计算所得的样品数则对目的数据重新进行空间申请,防止目的数据溢出,同时更新max_nb_samples数值,swr_convert是对源数据的转换,

/** Convert audio.
 *
 * in and in_count can be set to 0 to flush the last few samples out at the
 * end.
 *
 * If more input is provided than output space then the input will be buffered.
 * You can avoid this buffering by providing more output space than input.
 * Conversion will run directly without copying whenever possible.
 *
 * @param s         allocated Swr context, with parameters set
 * @param out       output buffers, only the first one need be set in case of packed audio
 * @param out_count amount of space available for output in samples per channel
 * @param in        input buffers, only the first one need to be set in case of packed audio
 * @param in_count  number of input samples available in one channel
 *
 * @return number of samples output per channel, negative value on error
 */
int swr_convert(struct SwrContext *s, uint8_t **out, int out_count,
                                const uint8_t **in , int in_count);

转换后的目的数据存储于out中,需注意的是out_count要足够大,避免转换后的数据溢出,该函数返回目的数据的实际样品大小,例程运行结果如下:

ffmpeg音频重采样分析_第1张图片

由图中可以看出一开始计算的目的样本数以及空间是可以满足需要的,不过还是小心谨慎为好。哈哈

你可能感兴趣的:(ffmpeg音频重采样分析)