FFmpeg libswresample对音频进行重采样

还是之前的项目，见这篇文章https://segmentfault.com/a/11...，libsox的使用没有成功，还差最后一步，到现在依然没有找到问题。为了尽快完成，我只好改用了FFmpeg提供的库libswresample来完成对音频数据的重采样。还是这个背景，应用场景是对已解码的8K采样率、16bit采样深度、单声道、无文件头的raw格式语音数据进行重采样，将采样率变为16K，其余参数不变。用FFmpeg库做的目前可以用，代码记录如下：

extern "C" {
#ifdef __cplusplus      //由于是C++程序调用C的库，必须加这段关于stdint.h的定义，不然会报错
#define __STDC_CONSTANT_MACROS
#ifdef _STDINT_H
#undef _STDINT_H
#endif
#include 
#endif

#include "libavutil/opt.h"
#include "libavutil/channel_layout.h"
#include "libavutil/samplefmt.h"
#include "libswresample/swresample.h"
}

bool Resample (short* pWavBuf, int wavLen, short* pWav16k, int wavLen16k) {
//pWavBuf为输入的raw格式语音数据，采样率8K，采样深度16bit；
//wavLen为输入数据的长度，单位为样点数，并非数据长度的字节数
//pWav16k为输出的缓存
//wavLen16k为输出数据的长度，单位依然为样点数

    //setting src and dst format
    int64_t src_ch_layout = AV_CH_LAYOUT_MONO, dst_ch_layout = AV_CH_LAYOUT_MONO;       //声道的类型
    int src_rate = 8000, dst_rate = 16000;  //采样率
    uint8_t **src_data = NULL, uint8_t **dst_data = NULL;   //数据缓存
    int src_nb_channels = 0, dst_nb_channels = 0;   //声道数，在后续通过函数获得
    int src_linesize, dst_linesize;
    int src_nb_samples = wavLen, dst_nb_samples,            max_dst_nb_samples;            //采样点数（samples * channels）
    enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_S16, dst_sample_fmt = AV_SAMPLE_FMT_S16; //编码格式，此处为16bit有符号数

    //context定义上下文
    struct SwrContext *swr_ctx;
    int ret;

    //create resampler context
    swr_ctx = swr_alloc_set_opts(NULL, dst_ch_layout,  dst_sample_fmt, dst_rate, src_ch_layout, src_sample_fmt, src_rate, 0, NULL);      //设置选项
    if (!swr_ctx) {
        printf("Cannot allocate resampler context!\n");
        return false;
    }

    //initialize resampling context 初始化上下文，每次进行设置后都必须重新初始化上下文使其生效
    if ((ret = swr_init(swr_ctx)) < 0) {
        printf("Initialize the resampling context failed!\n");
        return false;
    }
    
    //allocate source sample buffer
    src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout); //声道数
    ret = av_samples_alloc_array_and_samples(&src_data, &src_linesize, src_nb_channels, src_nb_samples, src_sample_fmt, 0);     //分配空间
    if (ret < 0) {
        printf("allocate source samples failed!\n");
        return false;
    }
    
    //compute the number of converted samples and allocate buffer
    max_dst_nb_samples = dst_nb_samples = av_rescale_rnd(src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);  //计算输出的样本数，向上取整
    dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);
    ret = av_samples_alloc_array_and_samples(&dst_data, &dst_linesize, dst_mb_channels, dst_nb_samples, dst_sample_fmt, 0);
    if (ret < 0) {
        printf("allocate destination samples failed!\n");
        return false;
    }
    
    //copy wav data to src_data
    memcpy(src_data[0], (char*)pWavBuf, wavLen * sizeof(short));
    
    //compute number of dst samples
    dst_nb_samples = av_rescale_rnd(swr_get_delay(swr_ctx, src_rate) + src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);    //delay的作用我这里可能用不到，网上查到的说法是为了保证能实时处理，给函数处理的时间内新产生的数据分配空间
    if (dst_nb_samples > max_dst_nb_samples) {
        av_freep(&dst_data[0]);
        ret = av_samples_alloc(dst_data, &dst_linesize, dst_nb_channels, dst_nb_samples, dst_sample_fmt, 1);
        if (ret < 0) {
            printf("allocate destination samples failed!\n");
            return false;
        }
        max_dst_nb_samples = dst_nb_samples;
    }
    
    //resample
    ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t **)src_data, src_nb_samples);
    if (ret < 0) {
        printf("Error while converting.\n");
        return false;
    }
    int dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels, ret, dst_sample_fmt, 1);   //buffer size of dst实际转换的输出样本占的空间
    if (dst_bufsize < 0) {
        printf("Cannot get sample buffer size!\n");
        return false;
    }
    memcpy((char*)pWav16k, dst_data[0], dst_bufsize);   //保存转换输出
    wavLen16k = ret;        //转换的样本数
    
    //flush swr期待后续的输入，会有一部分转换完的数据还留在缓冲区中，需要通过告知swr没有输入了来冲出缓冲区中的数据。方法是将输入的数据置为NULL，长度置为0。
    dst_nb_samples -= ret;      //剩余长度
    ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, NULL, 0);
    int rest_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels, ret, dst_sample_fmt, 1);
    if (rest_bufsize < 0) {
        printf("Can not get sample buffer size!\n");
        return false;
    }
    memcpy((char*)pWav16k + dst_bufsize, dst_data[0], rest_bufsize);          //保存剩余数据
    wavLen16k += ret;   //剩余的样本数
    
    //free
    if (src_data) {
        av_freep(&src_data[0]);
    }
    av_freep(&src_data);
    if (dst_data) {
        av_freep(&dst_data[0]);
    }
    av_freep(dst_data);
    swr_free(&swr_ctx);
    
    return true;
}

另外，C++调用的时候，还需要在libavutil下的commom.h头文件中增加以下定义，否则依然会报错。

#ifndef INT64_C
#define INT64_C(c) (c##LL)
#define UINT64_C(c) (c##ULL)
#endif

FFmpeg libswresample对音频进行重采样

你可能感兴趣的:(c++,语音)