提取audio重采样为wav文件

创建ffmpeg avcontext相关

这里注意 mAudioCodecCtx = mAudioStream->codec这个已经过时了,不建议这么用,改用下面的方式更好:

    AVCodecParameters *codecpar = mAudioStream->codecpar;
    mAudioCodecCtx = avcodec_alloc_context3(mAudioCodec);
    avcodec_parameters_to_context(mAudioCodecCtx, codecpar);

全部代码如下:

av_log_set_level(AV_LOG_WARNING);
    av_register_all();
    avformat_network_init();

    mPacket = new AVPacket();
    mFrame = new AVFrame();
    mFormatCtx = avformat_alloc_context();
    if (!mFormatCtx) {
        destroy();
        throw logic_error(string("alloc avformat context failed."));
    }

    if (avformat_open_input(&mFormatCtx, mVideoFile.c_str(), nullptr, nullptr) != 0) {
        destroy();
        throw logic_error(string("Could not open input file: ") + mVideoFile);
    }

    if (avformat_find_stream_info(mFormatCtx, nullptr) < 0) {
        destroy();
        throw logic_error("Could not find stream information");
    }

    for (uint8_t i = 0; i < mFormatCtx->nb_streams; ++i) {
        AVStream *stream = mFormatCtx->streams[i];
        auto ctx = mFormatCtx->streams[i]->codecpar;
        if (ctx->codec_type == AVMEDIA_TYPE_AUDIO) {
            mAudioTrackIdx = i;
            break;
        }
    }

    if (mAudioTrackIdx < 0) {
        throw logic_error("No audio track found");
    }

    // mAudioCodecCtx = mAudioStream->codec;
    // mAudioStream->codec 已经是deprecated的,所以用下面的方式更好
    mAudioStream = mFormatCtx->streams[mAudioTrackIdx];
    AVCodecParameters *codecpar = mAudioStream->codecpar;
    mAudioCodec = avcodec_find_decoder(codecpar->codec_id);
    mAudioCodecCtx = avcodec_alloc_context3(mAudioCodec);
    avcodec_parameters_to_context(mAudioCodecCtx, codecpar);
    if (mAudioCodec == nullptr) {
        destroy();
        throw logic_error("Audio Codec not found");
    }

    if (avcodec_open2(mAudioCodecCtx, mAudioCodec, nullptr) < 0) {
        destroy();
        throw logic_error("Could not open Audio codec");
    }

创建resample的swrcontext

获取输入源的相关参数:channels, sample rate, sample count, sample format
初始化输出的相关参数:channels, sample rate, sample count, sample format
初始化wav writer需要的参数:channels,sample rate, sample bits
初始化SwrContext

注意:从 mAudioCodecCtx->frame_size的到的sample count, 可能是0,所以如果是0的话就从mFrame->nb_samples获得,mFrame是解码后的帧。重采样后的采样数也是会变化的,每次应该重新计算。

    if (mSwrCtxInit)
        return;

    // init resample context
    int in_channels = mAudioCodecCtx->channels;
    int in_sample_rate = mAudioCodecCtx->sample_rate;
    int src_nb_samples = mAudioCodecCtx->frame_size != 0 ? mAudioCodecCtx->frame_size : mFrame->nb_samples;
    enum AVSampleFormat in_sample_format = mAudioCodecCtx->sample_fmt;
    int64_t in_ch_layout = av_get_default_channel_layout(mAudioCodecCtx->channels);

    // init out format
    if (src_nb_samples > 0)
    {
        mOutSampeRate = SAMPLE_RATE_16K;
        mOutSmapleFormat = AV_SAMPLE_FMT_S16;
        mOutChannelLayout = av_get_default_channel_layout(mOutChannelNum);
        mOutSampleNum = av_rescale_rnd(src_nb_samples, mOutSampeRate, in_sample_rate, AV_ROUND_UP);
        mOutSize = av_samples_get_buffer_size(NULL, mOutChannelNum, mOutSampleNum, mOutSmapleFormat, 1);

        // init writer format
        if (!mWriterFormat)
        {
            mWriterFormat = new wave_writer_format;
            mWriterFormat->num_channels = mOutChannelNum;
            mWriterFormat->sample_rate = mOutSampeRate;
            mWriterFormat->sample_bits = (mOutSampeRate == SAMPLE_RATE_16K ? SAMPLE_BITS_16 : SAMPLE_BITS_32);
        }

        if (!mSwrCtx)
        {
            mSwrCtx = swr_alloc();
            swr_alloc_set_opts(mSwrCtx,
                            mOutChannelLayout, mOutSmapleFormat, mOutSampeRate,
                            in_ch_layout, in_sample_format, in_sample_rate,
                            0, NULL);
            swr_init(mSwrCtx);
            GlobalLog::getInstance()->getLog()->info("init swr context done.");
        }
    }
    mSwrCtxInit = true;

解码并且进行resample然后写到wav文件中

int ret = 0;
    bool isAudioFrame = false;

    av_packet_unref(mPacket);
    // 循环读取audio packet,然后send给decoder解码
    // 读完退出(AVERROR_EOF)
    while (true) {
        ret = av_read_frame(mFormatCtx, mPacket);
        if (ret >= 0) {
            isAudioFrame = mPacket->stream_index == mAudioTrackIdx;
            if (!isAudioFrame) {
                av_packet_unref(mPacket);
                continue;
            }

            int rec = avcodec_send_packet(mAudioCodecCtx, mPacket);
            if (ret < 0) {
                break;
            }
            // 循环获取解码的frame,然后对frame进行dump, resample, write wav
            // 取完(EAGAIN)退出
            while (true) {
                av_frame_unref(mFrame);
                rec = avcodec_receive_frame(mAudioCodecCtx, mFrame);
                if (rec >= 0) {
                    if (!mSwrCtxInit) {
                        initSwrContext();
                    }
#ifdef DUMP_PCM
                    dumpDecodePCM();
#endif
                    resample();
                    writeWav();
                } else {
                    if (rec == AVERROR(EAGAIN))
                        break;
                    else if (rec < 0) {
                        throw logic_error("Error to get decoded frame.\n");
                    }
                }
            }
        } else {
            if (AVERROR_EOF == ret) {
                cout << "read frame, got EOF." << endl;
                break;
            }
        }
    }

重采样的过程

if (mOutFrame) {
        av_frame_unref(mOutFrame);
    }

    // 每次对输出的nb_samples(mOutSampleNum)重新计算
    int in_sample_rate = mFrame->sample_rate;
    int src_nb_samples = mFrame->nb_samples;
    mOutSampleNum = av_rescale_rnd(swr_get_delay(mSwrCtx, in_sample_rate)
         + src_nb_samples, mOutSampeRate, in_sample_rate, AV_ROUND_UP);

    mOutFrame = av_frame_alloc();
    mOutFrame->nb_samples = mOutSampleNum;
    mOutFrame->format = mOutSmapleFormat;
    mOutFrame->channels = mOutChannelNum;
    mOutFrame->sample_rate = mOutSampeRate;
    // // 每次对输出的out buffer size(mOutSize)重新计算
    mOutSize = av_samples_get_buffer_size(NULL, mOutChannelNum, mOutSampleNum, mOutSmapleFormat, 1);
    uint8_t *data = (uint8_t *)malloc(mOutSize);
    avcodec_fill_audio_frame(mOutFrame, mOutChannelNum,
                             mOutSmapleFormat, (const uint8_t *)data, mOutSize, 1);

    // mResampleCount最后swr_convert返回值才是真正的采样后的sample数,这个是有变化的
    // 如果用前面计算得来的mOutSampleNum,那么后面保存的pcm, wav文件可能出现噪音。
    if (mSwrCtx != nullptr) {
        mResampleCount = swr_convert(mSwrCtx,
                                     (uint8_t **)mOutFrame->data, mOutFrame->nb_samples,
                                     (const uint8_t **)mFrame->data, mFrame->nb_samples);
        if (mResampleCount < 0) {
            throw logic_error("Error while converting!");
        }
#ifdef DUMP_PCM_AUDIO
        dumpResamplePCM();
#endif
    }

dump pcm的代码

planar格式,dump一个声道的,dump的size就等于nb_samples * sample_size;
packet格式,dump一个声道,对于单声道的和planar一样,一次写size个数据
非单声道的,需要从交错数据中取出来第一个声道的数据,循环写完
同一个声道的数据偏移量为sample_size * channels

 if (in_fp != nullptr) {
        // planar格式,dump一个声道的,dump的size就等于nb_samples * sample_size;
        if (av_sample_fmt_is_planar(mAudioCodecCtx->sample_fmt)) {
            int nb_samples = mFrame->nb_samples;
            int sample_size = av_get_bytes_per_sample(mAudioCodecCtx->sample_fmt);
            int size = nb_samples * sample_size;
            fwrite(mFrame->data[0], size, 1, in_fp);
        } else {
            // packet格式,dump一个声道,对于单声道的和planar一样,一次写size个数据
            // 飞单声道的,需要从交错数据中取出来第一个声道的数据,循环写完
            // 同一个声道的数据偏移量为sample_size * channels
            int i;
            uint64_t channel_layout = mFrame->channel_layout;
            int channels = mFrame->channels;
            int nb_samples = mFrame->nb_samples;
            int sample_size = av_get_bytes_per_sample(mAudioCodecCtx->sample_fmt);
            int size = nb_samples * sample_size;

            if (channel_layout == AV_CH_LAYOUT_MONO) {
                fwrite(mFrame->data[0], size, 1, in_fp);
            } else {
                for (i = 0; i < nb_samples; i++) {
                    fwrite(mFrame->data[0] + i * (sample_size * channels), sample_size, 1, in_fp);
                }
            }
        }
    }

dump重采样后的PCM数据

因为程序中写目标是输出wav文件单声道,所以dump pcm就很简单了,将frame的数据全部写到文件中就OK了。

if (out_fp != nullptr)
    {
        int bytes;
        switch (mOutSmapleFormat) {
        case AV_SAMPLE_FMT_S16:
            bytes = 2;
            break;
        case AV_SAMPLE_FMT_FLT:
            bytes = 4;
            break;
        case AV_SAMPLE_FMT_S32:
            bytes = 4;
            break;
        default:
            bytes = 2;
        }
        fwrite(mOutFrame->data[0], mResampleCount * bytes, 1, out_fp);
    }

总结

重采样的采样数计算

44.1k采样到16k采样后,采样前是1024个采样,重采样后时长不变,采样率降低,采样数相应的减少了,所以不是不变,需要通过下面的计算得到新的采样数。

out_nb_samples = av_rescale_rnd(swr_get_delay(swrctx, in_sample_rate)
     + src_nb_samples, out_sample_rate, in_sample_rate, AV_ROUND_UP);
out_nb_samples = av_rescale_rnd(src_nb_samples, out_sample_rate, in_sample_rate, AV_ROUND_UP);

一个小细节

out_buffer_size = av_samples_get_buffer_size(NULL, out_channels, out_nb_samples, out_sample_format, 1);

在这个计算输出buffersize的时候,最后一个参数align设置为0的时候会crash,设置为1的时候,就可以工作了。

重采样后的杂音问题

44.1k采样到16k采样后,采样数通过计算为372,我在写wav或者pcm文件的时候,用的size就是372 * bytes_per_sample,分析后发现这个是产生杂音的原因,因为实际swr_convert返回的采样书在这个例子里面是371,372交替变化,写的长度固定为372话,如果返回的值是371,那就多写了一个byte的无效数据,就产生了杂音。

重采样分析输入输出

用AC3的源输入如下

Stream #0:0: Audio: ac3, 48000 Hz, stereo, fltp, 384 kb/s
src            nb_samples : 1536
calculate out  nb_samples : 528
calculate out buffer size : 1056
after resample nb_samples : 512

可以验证,采样前的计算出来的理论值和采样后的实际值是有出入的。

用flac的非planar,双声道源

Stream #0:0: Audio: flac, 48000 Hz, stereo, s16
src          channels: 2
src       sample_size: 2
calculate buffer size: 9216
calculate  nb_samples: 4608

resample为wav格式:

src            nb_samples : 4608
calculate out  nb_samples : 1552
calculate out buffer size : 3104
after resample nb_samples : 1536

再次验证,采样前的计算出来的理论值和采样后的实际值是有出入的。

你可能感兴趣的:(提取audio重采样为wav文件)