FFmpeg版本:3.4.2
FFmpeg的官方例子中的resampling_audio,从名字上看它是一个对音频重采样的例子。但实际上除了重采样,它还能转换采样格式和声道数。
重采样:举个例子,将一个采样率为44100的音频转换成采样率为8000的音频,这个过程就叫做音频的重采样。由于源音频和目标音频的采样率之比不一定是整数,为了重采样后的目标音频尽可能地不失真,其中涉及到的算法是很复杂的。
声道:常见的音频有立体声(stereo)和单声道(mono)两种类型,另外还有环绕立体声等其它不太常用的类型。立体声包含左声道和右声道。
采样格式:数字音频本质上是由很多个“采样”组成的。以不同声道的采样排列方式来区分,采样格式可以分为平坦(planar)和非平坦两种类型;除此之外还会以采样的数值精度和数值类型来区分采样格式。
平坦形式是指各个声道的采样都由各自的数组储存,有多少个声道就有多少个数组;非平坦形式则只有一条数组,所有声道的采样都交错储存在同一个数组中。由于单声道只有一个声道,所以平坦和非平坦储存都是一样的。以立体声为例:
另外,根据采样的取值范围和类型来区分不同的格式:
AV_SAMPLE_FMT_U8:无符号8位整型
AV_SAMPLE_FMT_S16: 带符号16位整型
AV_SAMPLE_FMT_S32: 带符号32位整型
AV_SAMPLE_FMT_S64: 带符号64位整型
AV_SAMPLE_FMT_FLT: float
AV_SAMPLE_FMT_DBL: double
知道上面这些概念,代码就就很好理解了。
- 初始化
int64_t src_ch_layout = AV_CH_LAYOUT_STEREO; // 输入立体声
int64_t dst_ch_layout = AV_CH_LAYOUT_SURROUND; // 输出环绕立体声
int src_rate = 48000;
Int dst_rate = 11025; // 例子中的输出采样率为44100,但为了更直观地看出转换后的变化,所以选择一个小一点的输出采样率。
/* 创建Context */
struct SwrContext *swr_ctx = swr_alloc();
/* 设置参数 */
av_opt_set_int(swr_ctx, "in_channel_layout", src_ch_layout, 0);
av_opt_set_int(swr_ctx, "in_sample_rate", src_rate, 0);
av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", src_sample_fmt, 0);
av_opt_set_int(swr_ctx, "out_channel_layout", dst_ch_layout, 0);
av_opt_set_int(swr_ctx, "out_sample_rate", dst_rate, 0);
av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0);
- 分配缓冲数组空间
/* 分配输入缓冲空间 */
uint8_t **src_data = NULL;
src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);
ret = av_samples_alloc_array_and_samples(&src_data, &src_linesize, src_nb_channels,
src_nb_samples, src_sample_fmt, 0);
/* 分配输出缓冲空间 */
Uint8_t **dst_data = NULL;
dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);
ret = av_samples_alloc_array_and_samples(&dst_data, &dst_linesize, dst_nb_channels,
dst_nb_samples, dst_sample_fmt, 0);
- 填充输入数据
/**
* 将一段正弦波作为测试音频填充到输入数组中
*/
static void fill_samples(double *dst, int nb_samples, int nb_channels, int sample_rate, double *t)
{
int i, j;
double tincr = 1.0 / sample_rate, *dstp = dst;
const double c = 2 * M_PI * 440.0;
/* generate sin tone with 440Hz frequency and duplicated channels */
for (i = 0; i < nb_samples; i++) {
*dstp = sin(c * *t);
for (j = 1; j < nb_channels; j++)
dstp[j] = dstp[0];
dstp += nb_channels;
*t += tincr;
}
}
/* generate synthetic audio */
fill_samples((double *)src_data[0], src_nb_samples, src_nb_channels, src_rate, &t);
例子中的输入采样格式是AV_SAMPLE_FMT_DBL,采样数值类型为double,为非平坦格式,所有数据都存在src_data[0]这个数组中。
- 转换并输出数据
/* 转换 */
ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t **)src_data, src_nb_samples);
if (ret < 0) {
fprintf(stderr, "Error while converting\n");
goto end;
}
// 获取转换后的数据长度
dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
ret, dst_sample_fmt, 1);
if (dst_bufsize < 0) {
fprintf(stderr, "Could not get sample buffer size\n");
goto end;
}
printf("t:%f in:%d out:%d\n", t, src_nb_samples, ret);
// 输出数据到文件
fwrite(dst_data[0], 1, dst_bufsize, dst_file);
由于例子中目标采样格式是AV_SAMPLE_FMT_S16,为非平坦格式,所有输出数据都存在dst_data[0]这个数组中
- 结束
/* 释放缓冲数组 */
if (src_data)
av_freep(&src_data[0]);
av_freep(&src_data);
if (dst_data)
av_freep(&dst_data[0]);
av_freep(&dst_data);
/* 释放Context */
swr_free(&swr_ctx);
- 将输入和输出的音频用Audacity打开,可以直观地看到转换前后的变化。这里有一个问题:为什么输出音频的第三声道的波形是一条直线?
- 总结:这个例子只展示非平坦采样格式的音频转换,没有包含平坦与非平坦采样格式的音频转换功能,刚开始我并不熟悉这两者的区别,导致音频转换失败。下面我对这个例子进行重构,让它能适用于更多种音频格式的转换。
convertor.h
//
// convertor.h
// SoundEditor
//
// Created by Kidon Liang on 2018/4/1.
// Copyright © 2018年 Kidon Liang. All rights reserved.
//
#ifndef convertor_h
#define convertor_h
#include
#include
#include
#include
/**
* 初始化
*
* @param src_ch_layout 输入声道类型
* @param src_sample_fmt 输入采样格式
* @param src_sample_rate 输入采样率
* @param dst_ch_layout 输出声道类型
* @param dst_sample_fmt 输出采样格式
* @param dst_sample_rate 输出采样率
**/
int convertor_init(int64_t src_ch_layout, enum AVSampleFormat src_sample_fmt, int src_sample_rate,
int64_t dst_ch_layout, enum AVSampleFormat dst_sample_fmt, int dst_sample_rate);
/**
* 输入数据
*
* @param data 音频数据
* @param len 数据长度
**/
int convertor_feed_data(uint8_t **data, int len);
/**
* 获取已转换数据长度
**/
int convertor_get_converted_size(void);
/**
* 接收已转换数据
*
* @param data 接收数据的数组
**/
int convertor_receive_converted_data(uint8_t **data);
/**
* 排空所有数据
**/
void convertor_flush(void);
/**
* 关闭转换器
**/
void convertor_close(void);
#endif /* convertor_h */
convertor.c
//
// convertor.c
// SoundEditor
//
// Created by Kidon Liang on 2018/4/1.
// Copyright © 2018年 Kidon Liang. All rights reserved.
//
#include "convertor.h"
static int tmp_ret = 0;
static struct SwrContext *swr_ctx = NULL;
static uint8_t **src_buffers = NULL;
static uint8_t **dst_buffers = NULL;
static enum AVSampleFormat src_sample_fmt;
static enum AVSampleFormat dst_sample_fmt;
static int src_sample_rate;
static int dst_sample_rate;
static int src_nb_samples = 1024; // ffmpeg默认每次采样数为1024
static int dst_nb_samples;
static int max_dst_nb_samples; // 用于记录最大的输出采样数,防止数组越界
static int src_linesize;
static int dst_linesize;
static int src_nb_channels;
static int src_nb_buffers;
static int dst_nb_channels;
static int dst_nb_buffers;
int convertor_init(int64_t src_ch_layout, enum AVSampleFormat src_sp_fmt, int src_sp_rate,
int64_t dst_ch_layout, enum AVSampleFormat dst_sp_fmt, int dst_sp_rate) {
src_sample_fmt = src_sp_fmt;
dst_sample_fmt = dst_sp_fmt;
src_sample_rate = src_sp_rate;
dst_sample_rate = dst_sp_rate;
src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);
src_nb_buffers = av_sample_fmt_is_planar(src_sample_fmt) ? src_nb_channels : 1;
dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);
dst_nb_buffers = av_sample_fmt_is_planar(dst_sample_fmt) ? dst_nb_channels : 1;
// init
swr_ctx = swr_alloc();
if (!swr_ctx) {
printf("can not alloc SwrContext.");
convertor_close();
return -1;
}
/* set options */
av_opt_set_int(swr_ctx, "in_channel_layout", src_ch_layout, 0);
av_opt_set_int(swr_ctx, "in_sample_rate", src_sample_rate, 0);
av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", src_sample_fmt, 0);
av_opt_set_int(swr_ctx, "out_channel_layout", dst_ch_layout, 0);
av_opt_set_int(swr_ctx, "out_sample_rate", dst_sample_rate, 0);
av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0);
/* initialize the resampling context */
if (swr_init(swr_ctx) < 0) {
printf("Failed to initialize the resampling context\n");
convertor_close();
return -2;
}
// allocate samples buffers
tmp_ret = av_samples_alloc_array_and_samples(&src_buffers, &src_linesize, src_nb_channels, src_nb_samples, src_sample_fmt, 0);
printf("src_linesize=%d\n", src_linesize);
if (tmp_ret < 0) {
printf("Could not allocate source samples\n");
convertor_close();
return -3;
}
max_dst_nb_samples = dst_nb_samples =
(int)av_rescale_rnd(src_nb_samples, dst_sample_rate, src_sample_rate, AV_ROUND_UP);
tmp_ret = av_samples_alloc_array_and_samples(&dst_buffers, &dst_linesize, dst_nb_channels,
dst_nb_samples, dst_sample_fmt, 0);
printf("dst_linesize=%d\n", dst_linesize);
if (tmp_ret < 0) {
printf("Could not allocate destination samples\n");
convertor_close();
return -4;
}
return 0;
}
static int converted_size = 0;
/**
* 输入数据,需要确保每次输入1024个样本数据。
**/
int convertor_feed_data(uint8_t **data, int len) {
// 填充数据
for (int i=0;i max_dst_nb_samples) {
// 重新分配输出buffer
if (dst_buffers) {
av_freep(&dst_buffers[0]);
}
tmp_ret = av_samples_alloc(dst_buffers, &dst_linesize, dst_nb_channels,
dst_nb_samples, dst_sample_fmt, 1);
printf("realloc dst_linesize=%d\n", dst_linesize);
if (tmp_ret < 0) {
printf("重新分配输出buffer失败\n");
convertor_close();
return -1;
}
max_dst_nb_samples = dst_nb_samples;
}
/* convert to destination format */
tmp_ret = swr_convert(swr_ctx, dst_buffers, dst_nb_samples, (const uint8_t **)src_buffers, src_nb_samples);
if (tmp_ret < 0) {
printf("Error while converting\n");
convertor_close();
return -2;
}
converted_size = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
tmp_ret, dst_sample_fmt, 1);
if (converted_size < 0) {
printf("Could not get sample buffer size\n");
convertor_close();
return -3;
}
return 0;
}
int convertor_get_converted_size(void) {
return converted_size;
}
int convertor_receive_converted_data(uint8_t **data) {
tmp_ret = converted_size;
for (int i=0;i
main.c
//
// main.c
// SoundEditor
//
// Created by Kidon Liang on 2018/3/31.
// Copyright © 2018年 Kidon Liang. All rights reserved.
//
#include
#include "convertor.h"
static void fill_samples_float(float **data, enum AVSampleFormat sample_fmt,
int nb_samples, int nb_channels, int sample_rate, double *time) {
int i, j;
double tincr = 1.0 / sample_rate;
const double c = 2 * M_PI * 440.0;
double val;
if (av_sample_fmt_is_planar(sample_fmt)) {
for (i = 0; i < nb_samples; i++) {
val = sin(c * *time);
for (j = 0; j < nb_channels; j++) {
data[j][i] = val;
}
*time += tincr;
}
} else {
float *data_p = data[0];
for (i = 0; i < nb_samples; i++) {
val = sin(c * *time);
for (j = 0; j < nb_channels; j++) {
data_p[j] = val;
}
data_p += nb_channels;
*time += tincr;
}
}
}
static void fill_samples_double(double **data, enum AVSampleFormat sample_fmt,
int nb_samples, int nb_channels, int sample_rate, double *time) {
int i, j;
double tincr = 1.0 / sample_rate;
const double c = 2 * M_PI * 440.0;
double val;
if (av_sample_fmt_is_planar(sample_fmt)) {
for (i = 0; i < nb_samples; i++) {
val = sin(c * *time);
for (j = 0; j < nb_channels; j++) {
data[j][i] = val;
}
*time += tincr;
}
} else {
double *data_p = data[0];
for (i = 0; i < nb_samples; i++) {
val = sin(c * *time);
for (j = 0; j < nb_channels; j++) {
data_p[j] = val;
}
data_p += nb_channels;
*time += tincr;
}
}
}
static void fill_samples_16(int16_t **data, enum AVSampleFormat sample_fmt,
int nb_samples, int nb_channels, int sample_rate, double *time) {
int i, j;
double tincr = 1.0 / sample_rate;
const double c = 2 * M_PI * 440.0;
double val;
if (av_sample_fmt_is_planar(sample_fmt)) {
for (i = 0; i < nb_samples; i++) {
val = (32768 - 1) * sin(c * *time);
for (j = 0; j < nb_channels; j++) {
data[j][i] = val;
}
*time += tincr;
}
} else {
int16_t *data_p = data[0];
for (i = 0; i < nb_samples; i++) {
val = (32768 - 1) * sin(c * *time);
// val = (32768 - 1) * 0.25;
for (j = 0; j < nb_channels; j++) {
// data_p[j] = (-1 + j*2) * val;
data_p[j] = val;
}
data_p += nb_channels;
*time += tincr;
}
}
}
static void fill_samples(uint8_t **data, enum AVSampleFormat sample_fmt,
int nb_samples, int nb_channels, int sample_rate, double *time) {
switch (sample_fmt) {
case AV_SAMPLE_FMT_S16:
case AV_SAMPLE_FMT_S16P:
fill_samples_16((int16_t **) data, sample_fmt, nb_samples, nb_channels, sample_rate, time);
return;
case AV_SAMPLE_FMT_FLT:
case AV_SAMPLE_FMT_FLTP:
fill_samples_float((float **) data, sample_fmt, nb_samples, nb_channels, sample_rate, time);
return;
case AV_SAMPLE_FMT_DBL:
case AV_SAMPLE_FMT_DBLP:
fill_samples_double((double **) data, sample_fmt, nb_samples, nb_channels, sample_rate, time);
return;
default:
printf("其它格式的采样填充方法就不写了。");
return;
}
}
int main() {
char *src_path = "/Users/kidonliang/Desktop/src-0.pcm";
FILE *src_file = fopen(src_path, "wb");
char *dst_path = "/Users/kidonliang/Desktop/dst-0.pcm";
FILE *dst_file = fopen(dst_path, "wb");
enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_S16;
enum AVSampleFormat dst_sample_fmt = AV_SAMPLE_FMT_FLTP;
int64_t src_ch_layout = AV_CH_LAYOUT_MONO;
int64_t dst_ch_layout = AV_CH_LAYOUT_MONO;
int src_sample_rate = 44100;
int dst_sample_rate = 16000;
int src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);
int dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);
convertor_init(src_ch_layout, src_sample_fmt, src_sample_rate,
dst_ch_layout, dst_sample_fmt, dst_sample_rate);
double time = 0;
uint8_t **src_buffers = NULL;
int src_bytes_ps = av_get_bytes_per_sample(src_sample_fmt);
// alloc source buffers
if (av_sample_fmt_is_planar(src_sample_fmt)) {
src_buffers = malloc(src_nb_channels * sizeof(void *));
for (int i=0;i