创建ffmpeg avcontext相关
这里注意 mAudioCodecCtx = mAudioStream->codec这个已经过时了,不建议这么用,改用下面的方式更好:
AVCodecParameters *codecpar = mAudioStream->codecpar;
mAudioCodecCtx = avcodec_alloc_context3(mAudioCodec);
avcodec_parameters_to_context(mAudioCodecCtx, codecpar);
全部代码如下:
av_log_set_level(AV_LOG_WARNING);
av_register_all();
avformat_network_init();
mPacket = new AVPacket();
mFrame = new AVFrame();
mFormatCtx = avformat_alloc_context();
if (!mFormatCtx) {
destroy();
throw logic_error(string("alloc avformat context failed."));
}
if (avformat_open_input(&mFormatCtx, mVideoFile.c_str(), nullptr, nullptr) != 0) {
destroy();
throw logic_error(string("Could not open input file: ") + mVideoFile);
}
if (avformat_find_stream_info(mFormatCtx, nullptr) < 0) {
destroy();
throw logic_error("Could not find stream information");
}
for (uint8_t i = 0; i < mFormatCtx->nb_streams; ++i) {
AVStream *stream = mFormatCtx->streams[i];
auto ctx = mFormatCtx->streams[i]->codecpar;
if (ctx->codec_type == AVMEDIA_TYPE_AUDIO) {
mAudioTrackIdx = i;
break;
}
}
if (mAudioTrackIdx < 0) {
throw logic_error("No audio track found");
}
// mAudioCodecCtx = mAudioStream->codec;
// mAudioStream->codec 已经是deprecated的,所以用下面的方式更好
mAudioStream = mFormatCtx->streams[mAudioTrackIdx];
AVCodecParameters *codecpar = mAudioStream->codecpar;
mAudioCodec = avcodec_find_decoder(codecpar->codec_id);
mAudioCodecCtx = avcodec_alloc_context3(mAudioCodec);
avcodec_parameters_to_context(mAudioCodecCtx, codecpar);
if (mAudioCodec == nullptr) {
destroy();
throw logic_error("Audio Codec not found");
}
if (avcodec_open2(mAudioCodecCtx, mAudioCodec, nullptr) < 0) {
destroy();
throw logic_error("Could not open Audio codec");
}
创建resample的swrcontext
获取输入源的相关参数:channels, sample rate, sample count, sample format
初始化输出的相关参数:channels, sample rate, sample count, sample format
初始化wav writer需要的参数:channels,sample rate, sample bits
初始化SwrContext
注意:从 mAudioCodecCtx->frame_size的到的sample count, 可能是0,所以如果是0的话就从mFrame->nb_samples获得,mFrame是解码后的帧。重采样后的采样数也是会变化的,每次应该重新计算。
if (mSwrCtxInit)
return;
// init resample context
int in_channels = mAudioCodecCtx->channels;
int in_sample_rate = mAudioCodecCtx->sample_rate;
int src_nb_samples = mAudioCodecCtx->frame_size != 0 ? mAudioCodecCtx->frame_size : mFrame->nb_samples;
enum AVSampleFormat in_sample_format = mAudioCodecCtx->sample_fmt;
int64_t in_ch_layout = av_get_default_channel_layout(mAudioCodecCtx->channels);
// init out format
if (src_nb_samples > 0)
{
mOutSampeRate = SAMPLE_RATE_16K;
mOutSmapleFormat = AV_SAMPLE_FMT_S16;
mOutChannelLayout = av_get_default_channel_layout(mOutChannelNum);
mOutSampleNum = av_rescale_rnd(src_nb_samples, mOutSampeRate, in_sample_rate, AV_ROUND_UP);
mOutSize = av_samples_get_buffer_size(NULL, mOutChannelNum, mOutSampleNum, mOutSmapleFormat, 1);
// init writer format
if (!mWriterFormat)
{
mWriterFormat = new wave_writer_format;
mWriterFormat->num_channels = mOutChannelNum;
mWriterFormat->sample_rate = mOutSampeRate;
mWriterFormat->sample_bits = (mOutSampeRate == SAMPLE_RATE_16K ? SAMPLE_BITS_16 : SAMPLE_BITS_32);
}
if (!mSwrCtx)
{
mSwrCtx = swr_alloc();
swr_alloc_set_opts(mSwrCtx,
mOutChannelLayout, mOutSmapleFormat, mOutSampeRate,
in_ch_layout, in_sample_format, in_sample_rate,
0, NULL);
swr_init(mSwrCtx);
GlobalLog::getInstance()->getLog()->info("init swr context done.");
}
}
mSwrCtxInit = true;
解码并且进行resample然后写到wav文件中
int ret = 0;
bool isAudioFrame = false;
av_packet_unref(mPacket);
// 循环读取audio packet,然后send给decoder解码
// 读完退出(AVERROR_EOF)
while (true) {
ret = av_read_frame(mFormatCtx, mPacket);
if (ret >= 0) {
isAudioFrame = mPacket->stream_index == mAudioTrackIdx;
if (!isAudioFrame) {
av_packet_unref(mPacket);
continue;
}
int rec = avcodec_send_packet(mAudioCodecCtx, mPacket);
if (ret < 0) {
break;
}
// 循环获取解码的frame,然后对frame进行dump, resample, write wav
// 取完(EAGAIN)退出
while (true) {
av_frame_unref(mFrame);
rec = avcodec_receive_frame(mAudioCodecCtx, mFrame);
if (rec >= 0) {
if (!mSwrCtxInit) {
initSwrContext();
}
#ifdef DUMP_PCM
dumpDecodePCM();
#endif
resample();
writeWav();
} else {
if (rec == AVERROR(EAGAIN))
break;
else if (rec < 0) {
throw logic_error("Error to get decoded frame.\n");
}
}
}
} else {
if (AVERROR_EOF == ret) {
cout << "read frame, got EOF." << endl;
break;
}
}
}
重采样的过程
if (mOutFrame) {
av_frame_unref(mOutFrame);
}
// 每次对输出的nb_samples(mOutSampleNum)重新计算
int in_sample_rate = mFrame->sample_rate;
int src_nb_samples = mFrame->nb_samples;
mOutSampleNum = av_rescale_rnd(swr_get_delay(mSwrCtx, in_sample_rate)
+ src_nb_samples, mOutSampeRate, in_sample_rate, AV_ROUND_UP);
mOutFrame = av_frame_alloc();
mOutFrame->nb_samples = mOutSampleNum;
mOutFrame->format = mOutSmapleFormat;
mOutFrame->channels = mOutChannelNum;
mOutFrame->sample_rate = mOutSampeRate;
// // 每次对输出的out buffer size(mOutSize)重新计算
mOutSize = av_samples_get_buffer_size(NULL, mOutChannelNum, mOutSampleNum, mOutSmapleFormat, 1);
uint8_t *data = (uint8_t *)malloc(mOutSize);
avcodec_fill_audio_frame(mOutFrame, mOutChannelNum,
mOutSmapleFormat, (const uint8_t *)data, mOutSize, 1);
// mResampleCount最后swr_convert返回值才是真正的采样后的sample数,这个是有变化的
// 如果用前面计算得来的mOutSampleNum,那么后面保存的pcm, wav文件可能出现噪音。
if (mSwrCtx != nullptr) {
mResampleCount = swr_convert(mSwrCtx,
(uint8_t **)mOutFrame->data, mOutFrame->nb_samples,
(const uint8_t **)mFrame->data, mFrame->nb_samples);
if (mResampleCount < 0) {
throw logic_error("Error while converting!");
}
#ifdef DUMP_PCM_AUDIO
dumpResamplePCM();
#endif
}
dump pcm的代码
planar格式,dump一个声道的,dump的size就等于nb_samples * sample_size;
packet格式,dump一个声道,对于单声道的和planar一样,一次写size个数据
非单声道的,需要从交错数据中取出来第一个声道的数据,循环写完
同一个声道的数据偏移量为sample_size * channels
if (in_fp != nullptr) {
// planar格式,dump一个声道的,dump的size就等于nb_samples * sample_size;
if (av_sample_fmt_is_planar(mAudioCodecCtx->sample_fmt)) {
int nb_samples = mFrame->nb_samples;
int sample_size = av_get_bytes_per_sample(mAudioCodecCtx->sample_fmt);
int size = nb_samples * sample_size;
fwrite(mFrame->data[0], size, 1, in_fp);
} else {
// packet格式,dump一个声道,对于单声道的和planar一样,一次写size个数据
// 飞单声道的,需要从交错数据中取出来第一个声道的数据,循环写完
// 同一个声道的数据偏移量为sample_size * channels
int i;
uint64_t channel_layout = mFrame->channel_layout;
int channels = mFrame->channels;
int nb_samples = mFrame->nb_samples;
int sample_size = av_get_bytes_per_sample(mAudioCodecCtx->sample_fmt);
int size = nb_samples * sample_size;
if (channel_layout == AV_CH_LAYOUT_MONO) {
fwrite(mFrame->data[0], size, 1, in_fp);
} else {
for (i = 0; i < nb_samples; i++) {
fwrite(mFrame->data[0] + i * (sample_size * channels), sample_size, 1, in_fp);
}
}
}
}
dump重采样后的PCM数据
因为程序中写目标是输出wav文件单声道,所以dump pcm就很简单了,将frame的数据全部写到文件中就OK了。
if (out_fp != nullptr)
{
int bytes;
switch (mOutSmapleFormat) {
case AV_SAMPLE_FMT_S16:
bytes = 2;
break;
case AV_SAMPLE_FMT_FLT:
bytes = 4;
break;
case AV_SAMPLE_FMT_S32:
bytes = 4;
break;
default:
bytes = 2;
}
fwrite(mOutFrame->data[0], mResampleCount * bytes, 1, out_fp);
}
总结
重采样的采样数计算
44.1k采样到16k采样后,采样前是1024个采样,重采样后时长不变,采样率降低,采样数相应的减少了,所以不是不变,需要通过下面的计算得到新的采样数。
out_nb_samples = av_rescale_rnd(swr_get_delay(swrctx, in_sample_rate)
+ src_nb_samples, out_sample_rate, in_sample_rate, AV_ROUND_UP);
out_nb_samples = av_rescale_rnd(src_nb_samples, out_sample_rate, in_sample_rate, AV_ROUND_UP);
一个小细节
out_buffer_size = av_samples_get_buffer_size(NULL, out_channels, out_nb_samples, out_sample_format, 1);
在这个计算输出buffersize的时候,最后一个参数align设置为0的时候会crash,设置为1的时候,就可以工作了。
重采样后的杂音问题
44.1k采样到16k采样后,采样数通过计算为372,我在写wav或者pcm文件的时候,用的size就是372 * bytes_per_sample,分析后发现这个是产生杂音的原因,因为实际swr_convert返回的采样书在这个例子里面是371,372交替变化,写的长度固定为372话,如果返回的值是371,那就多写了一个byte的无效数据,就产生了杂音。
重采样分析输入输出
用AC3的源输入如下
Stream #0:0: Audio: ac3, 48000 Hz, stereo, fltp, 384 kb/s
src nb_samples : 1536
calculate out nb_samples : 528
calculate out buffer size : 1056
after resample nb_samples : 512
可以验证,采样前的计算出来的理论值和采样后的实际值是有出入的。
用flac的非planar,双声道源
Stream #0:0: Audio: flac, 48000 Hz, stereo, s16
src channels: 2
src sample_size: 2
calculate buffer size: 9216
calculate nb_samples: 4608
resample为wav格式:
src nb_samples : 4608
calculate out nb_samples : 1552
calculate out buffer size : 3104
after resample nb_samples : 1536
再次验证,采样前的计算出来的理论值和采样后的实际值是有出入的。