ffplay.c 源码分析- 音频部分

FFmpeg 代码 version 3.3:

ffplay中的线程模型

音频的线程模型.png

概述

主要的负责音频播放的线程，主要有如上几个

1. 读取线程-read_thread
在main方法中会启动的读取的线程。
这个和视频的线程模型中是一致的。不同的是，循环读取的数据是音频数据。

循环读取
这个线程中，会进行读取的循环。不断的通过av_read_frame方法，读取解码前的数据packet。
送入队列
最后将得到的数据，送入对应的流的packet队列（视频/音频/字幕都对应视频流自己的队列）

2. 对应流的解码线程-audio - thread
在读取线程中，对AVFormatContext进行初始化，获取AVStream信息后，对应不同的码流会开启对应的解码线程Decode Thread。
ffplay中这里包括了3种流。视频流。音频流和字幕流。

循环读取
会从对应流的packet队列中，得到数据。
然后送入解码器通过avcodec_decode_video2(旧的API)进行解码。
送入队列
解码之后，得到解码前的数据AVFrame,并确定对应的pts。
最后然后其再次送入队列当中。

3.播放的设置
SDL的音频播放，主要是设置一个audiocallback,在callback当中，将我们解码后的数据设置给传入buff地址中。
后续SDL会再将这个buff地址，传给对应的音频播放设备，进行播放。
具体可见 SDL2库(4)-Android 端源码简要分析（AudioSubSystem）文章中所述。

整体的流程就是这样简单。

音频参数

定义了一个结构体，来简单的保存音频的参数。

typedef struct AudioParams {
    //sampleRate
    int freq;
    //声道数
    int channels;
   //channel_layout 有什么不同吗？
    int64_t channel_layout;
    //音频的采样格式
    enum AVSampleFormat fmt;
    //每一帧的大小= 采样深度*声道数    
int frame_size;
    //每一秒的字节数
    int bytes_per_sec;
} AudioParams;

ffplay初始化(main_thread)

进行初始化的整体流程，大部分和上一边文章相似ffplay.c 源码分析- 视频部分。
（其中包括对FFmpeg的初始化，对传递的参数进行初始化，SDL的初始化，通过stream_open函数，开启read_thread读取线程）
这里就不做过多描述了。
具体来看一下音频相关的部分。

开启对应的解码线程

打开stream_component_open对应的AVStream。打开解码线程。
ffplay中对应三种码流。（视频、音频和字幕，对应打开自己的解码线程）

stream_component_open中的音频部分

   switch (avctx->codec_type) {
    case AVMEDIA_TYPE_AUDIO:
        //忽略声音滤镜部分
        sample_rate    = avctx->sample_rate;
        nb_channels    = avctx->channels;
        channel_layout = avctx->channel_layout;

        /* prepare audio output */
        //打开音频设备。返回的值是音频设备中buffer的大小
        if ((ret = audio_open(is, channel_layout, nb_channels, sample_rate, &is->audio_tgt)) < 0)
            goto fail;
        //将当前的硬件参数保存下来
        is->audio_hw_buf_size = ret;
        is->audio_src = is->audio_tgt;
        is->audio_buf_size  = 0;
        is->audio_buf_index = 0;

        /* init averaging filter */
        //这个是用来进行音视频同步的算法的部分。暂时不管
        is->audio_diff_avg_coef  = exp(log(0.01) / AUDIO_DIFF_AVG_NB);
        is->audio_diff_avg_count = 0;
        /* since we do not have a precise anough audio FIFO fullness,
           we correct audio sync only if larger than this threshold */
        is->audio_diff_threshold = (double)(is->audio_hw_buf_size) / is->audio_tgt.bytes_per_sec;

        is->audio_stream = stream_index;
        is->audio_st = ic->streams[stream_index];
        //初始化解码器
        decoder_init(&is->auddec, avctx, &is->audioq, is->continue_read_thread);
        if ((is->ic->iformat->flags & (AVFMT_NOBINSEARCH | AVFMT_NOGENSEARCH | AVFMT_NO_BYTE_SEEK)) && !is->ic->iformat->read_seek) {
            is->auddec.start_pts = is->audio_st->start_time;
            is->auddec.start_pts_tb = is->audio_st->time_base;
        }
        //开启音频解码线程
        if ((ret = decoder_start(&is->auddec, audio_thread, is)) < 0)
            goto out;
        //播放
        SDL_PauseAudioDevice(audio_dev, 0);
        break;

这里重点开看一下打开音频流的部分
audio_open

static int audio_open(void *opaque, int64_t wanted_channel_layout, int wanted_nb_channels, int wanted_sample_rate, struct AudioParams *audio_hw_params)
{
    //这个结构体是SDL内部分音频时，记录音频参数的结构体
    SDL_AudioSpec wanted_spec, spec;
    const char *env;
    static const int next_nb_channels[] = {0, 0, 1, 6, 2, 6, 4, 6};
    static const int next_sample_rates[] = {0, 44100, 48000, 96000, 192000};
    int next_sample_rate_idx = FF_ARRAY_ELEMS(next_sample_rates) - 1;

    env = SDL_getenv("SDL_AUDIO_CHANNELS");
    if (env) {
        wanted_nb_channels = atoi(env);
        wanted_channel_layout = av_get_default_channel_layout(wanted_nb_channels);
    }
    if (!wanted_channel_layout || wanted_nb_channels != av_get_channel_layout_nb_channels(wanted_channel_layout)) {
        wanted_channel_layout = av_get_default_channel_layout(wanted_nb_channels);
        wanted_channel_layout &= ~AV_CH_LAYOUT_STEREO_DOWNMIX;
    }
    //可以看到channels 的数量可以通过av_get_channel_layout_nb_channels来进行计算
    wanted_nb_channels = av_get_channel_layout_nb_channels(wanted_channel_layout);
    wanted_spec.channels = wanted_nb_channels;
    wanted_spec.freq = wanted_sample_rate;
    if (wanted_spec.freq <= 0 || wanted_spec.channels <= 0) {
        av_log(NULL, AV_LOG_ERROR, "Invalid sample rate or channel count!\n");
        return -1;
    }
    while (next_sample_rate_idx && next_sample_rates[next_sample_rate_idx] >= wanted_spec.freq)
        next_sample_rate_idx--;
    //这里的format是暂时写死的AUDIO_S16SYS
    wanted_spec.format = AUDIO_S16SYS;
    wanted_spec.silence = 0;
    //samples 变量表示每一个声道对应的每一秒的采样的帧数
    wanted_spec.samples = FFMAX(SDL_AUDIO_MIN_BUFFER_SIZE, 2 << av_log2(wanted_spec.freq / SDL_AUDIO_MAX_CALLBACKS_PER_SEC));
   //SDL播放音频的话，会将每次需要播放的数据，根据callback将数据位置传递给callback,
   //我们需要在CallBack中自定义对音频数据的填充，就可以完成播放
    wanted_spec.callback = sdl_audio_callback;
    wanted_spec.userdata = opaque;
    //打开音频设备
    while (!(audio_dev = SDL_OpenAudioDevice(NULL, 0, &wanted_spec, &spec, SDL_AUDIO_ALLOW_FREQUENCY_CHANGE | SDL_AUDIO_ALLOW_CHANNELS_CHANGE))) {
        av_log(NULL, AV_LOG_WARNING, "SDL_OpenAudio (%d channels, %d Hz): %s\n",
               wanted_spec.channels, wanted_spec.freq, SDL_GetError());
        wanted_spec.channels = next_nb_channels[FFMIN(7, wanted_spec.channels)];
        if (!wanted_spec.channels) {
            wanted_spec.freq = next_sample_rates[next_sample_rate_idx--];
            wanted_spec.channels = wanted_nb_channels;
            if (!wanted_spec.freq) {
                av_log(NULL, AV_LOG_ERROR,
                       "No more combinations to try, audio open failed\n");
                return -1;
            }
        }
        wanted_channel_layout = av_get_default_channel_layout(wanted_spec.channels);
    }
    if (spec.format != AUDIO_S16SYS) {
        av_log(NULL, AV_LOG_ERROR,
               "SDL advised audio format %d is not supported!\n", spec.format);
        return -1;
    }
    if (spec.channels != wanted_spec.channels) {
        wanted_channel_layout = av_get_default_channel_layout(spec.channels);
        if (!wanted_channel_layout) {
            av_log(NULL, AV_LOG_ERROR,
                   "SDL advised channel count %d is not supported!\n", spec.channels);
            return -1;
        }
    }
    //最后将传递回来的硬件设备的数据进行记录
    audio_hw_params->fmt = AV_SAMPLE_FMT_S16;
    audio_hw_params->freq = spec.freq;
    audio_hw_params->channel_layout = wanted_channel_layout;
    audio_hw_params->channels =  spec.channels;
    audio_hw_params->frame_size = av_samples_get_buffer_size(NULL, audio_hw_params->channels, 1, audio_hw_params->fmt, 1);
    audio_hw_params->bytes_per_sec = av_samples_get_buffer_size(NULL, audio_hw_params->channels, audio_hw_params->freq, audio_hw_params->fmt, 1);
    if (audio_hw_params->bytes_per_sec <= 0 || audio_hw_params->frame_size <= 0) {
        av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size failed\n");
        return -1;
    }
    return spec.size;
}

这里最值得注意的就是
wanted_spec.callback = sdl_audio_callback;
SDL播放音频的话，会将每次需要播放的数据，根据callback将数据位置传递给callback,
我们需要在callback中自定义对音频数据的填充，就可以完成播放。

视频解码线程`audio_thread`

在read_thread的中对应视频流时，初始化好了AVCodec和AVCodecContext。通过decoder_start方法，开启了video_thread。
在video_thread中需要创建AVFrame来接受解码后的数据，确定视频的帧率。
然后开启解码循环。
不断的从队列中获取解码前的数据，然后送入解码器解码。
再得到解码后的数据，在送入对应的队列当中。

初始化参数

创建AVFrame和得到大致的视频帧率

    //创建AVFrame
    AVFrame *frame = av_frame_alloc();
    //设置好time_base和frame_rate
    AVRational tb = is->video_st->time_base;
    // 猜测视频帧率
    AVRational frame_rate = av_guess_frame_rate(is->ic, is->video_st, NULL);

开始循环解码

解码的方式和视频播放相同。这儿就不做过多解释了。解码完，同样送入队列当中。

音频设置部分

正如上面所述，我们需要在传入的callback中对我们的数据进行处理。callback也同样运行在SDL中创建的RunAudio线程。
SDL具体的运行方式，可见SDL2库(4)-Android 端源码简要分析（AudioSubSystem）;

/* prepare a new audio buffer */
static void sdl_audio_callback(void *opaque, Uint8 *stream, int len)
{
    VideoState *is = opaque;
    int audio_size, len1;

    audio_callback_time = av_gettime_relative();
    
    //len表示传入的数据的长度，stream表示传入的数组的指针(开始位置)
    while (len > 0) {
        if (is->audio_buf_index >= is->audio_buf_size) {
           //进行解码。得到audio_size
           audio_size = audio_decode_frame(is);
          //小于0，则表示失败
           if (audio_size < 0) {
                /* if error, just output silence */
               is->audio_buf = NULL;
               is->audio_buf_size = SDL_AUDIO_MIN_BUFFER_SIZE / is->audio_tgt.frame_size * is->audio_tgt.frame_size;
           } else {
               if (is->show_mode != SHOW_MODE_VIDEO)
                   update_sample_display(is, (int16_t *)is->audio_buf, audio_size);
               //记录audio_buf_size
               is->audio_buf_size = audio_size;
           }
          //重置0
           is->audio_buf_index = 0;
        }
         //用len1 表示单次解码的frame  的长度
        len1 = is->audio_buf_size - is->audio_buf_index;
        //最后送入的数据长度不能大于可用的空间
        if (len1 > len)
            len1 = len;
        //如果不进行音量调节，则可直接将音频数据拷贝进入
        if (!is->muted && is->audio_buf && is->audio_volume == SDL_MIX_MAXVOLUME)
            memcpy(stream, (uint8_t *)is->audio_buf + is->audio_buf_index, len1);
        else {
            memset(stream, 0, len1);
            if (!is->muted && is->audio_buf)
                //通过SDL_MixAudioFormat进行混音。可用调整音量的大小
                SDL_MixAudioFormat(stream, (uint8_t *)is->audio_buf + is->audio_buf_index, AUDIO_S16SYS, len1, is->audio_volume);
        }
       //进行偏移，如果还有空间，则继续解码更多的帧送入
        len -= len1;
        stream += len1;
        is->audio_buf_index += len1;
    }
    
    //最后用audio_write_buf_size 来记录这次一共解码出来的buf size
    is->audio_write_buf_size = is->audio_buf_size - is->audio_buf_index;
    /* Let's assume the audio driver that is used by SDL has two periods. */
    if (!isnan(is->audio_clock)) {
        //同步音频的时间钟。
        set_clock_at(&is->audclk, is->audio_clock - (double)(2 * is->audio_hw_buf_size + is->audio_write_buf_size) / is->audio_tgt.bytes_per_sec, is->audio_clock_serial, audio_callback_time / 1000000.0);
        sync_clock_to_slave(&is->extclk, &is->audclk);
    }
}

audio_decode_frame

static int audio_decode_frame(VideoState *is)
{
    int data_size, resampled_data_size;
    int64_t dec_channel_layout;
    av_unused double audio_clock0;
    int wanted_nb_samples;
    Frame *af;

    if (is->paused)
        return -1;

    do {
        //从队列中取得数据
        if (!(af = frame_queue_peek_readable(&is->sampq)))
            return -1;
        frame_queue_next(&is->sampq);
    } while (af->serial != is->audioq.serial);
    
    //计算data_size 
    data_size = av_samples_get_buffer_size(NULL, af->frame->channels,
                                           af->frame->nb_samples,
                                           af->frame->format, 1);
    
    //计算channel_layout 
    dec_channel_layout =
        (af->frame->channel_layout && af->frame->channels == av_get_channel_layout_nb_channels(af->frame->channel_layout)) ?
        af->frame->channel_layout : av_get_default_channel_layout(af->frame->channels);
    wanted_nb_samples = synchronize_audio(is, af->frame->nb_samples);
    
    //如果解码的格式和目标的格式不同的话，则需要对音频的数据进行转码
    if (af->frame->format        != is->audio_src.fmt            ||
        dec_channel_layout       != is->audio_src.channel_layout ||
        af->frame->sample_rate   != is->audio_src.freq           ||
        (wanted_nb_samples       != af->frame->nb_samples && !is->swr_ctx)) {
        swr_free(&is->swr_ctx);
        //创建和设置swr
        is->swr_ctx = swr_alloc_set_opts(NULL,
                                         is->audio_tgt.channel_layout, is->audio_tgt.fmt, is->audio_tgt.freq,
                                         dec_channel_layout,           af->frame->format, af->frame->sample_rate,
                                         0, NULL);
        if (!is->swr_ctx || swr_init(is->swr_ctx) < 0) {
            av_log(NULL, AV_LOG_ERROR,
                   "Cannot create sample rate converter for conversion of %d Hz %s %d channels to %d Hz %s %d channels!\n",
                    af->frame->sample_rate, av_get_sample_fmt_name(af->frame->format), af->frame->channels,
                    is->audio_tgt.freq, av_get_sample_fmt_name(is->audio_tgt.fmt), is->audio_tgt.channels);
            swr_free(&is->swr_ctx);
            return -1;
        }
        is->audio_src.channel_layout = dec_channel_layout;
        is->audio_src.channels       = af->frame->channels;
        is->audio_src.freq = af->frame->sample_rate;
        is->audio_src.fmt = af->frame->format;
    }
    
    //进行转码
    if (is->swr_ctx) {
        const uint8_t **in = (const uint8_t **)af->frame->extended_data;
        uint8_t **out = &is->audio_buf1;
        //这里加的256是什么意思？这个256是固定的吗
        int out_count = (int64_t)wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate + 256;
        int out_size  = av_samples_get_buffer_size(NULL, is->audio_tgt.channels, out_count, is->audio_tgt.fmt, 0);
        int len2;
        if (out_size < 0) {
            av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size() failed\n");
            return -1;
        }
        if (wanted_nb_samples != af->frame->nb_samples) {
            if (swr_set_compensation(is->swr_ctx, (wanted_nb_samples - af->frame->nb_samples) * is->audio_tgt.freq / af->frame->sample_rate,
                                        wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate) < 0) {
                av_log(NULL, AV_LOG_ERROR, "swr_set_compensation() failed\n");
                return -1;
            }
        }
        av_fast_malloc(&is->audio_buf1, &is->audio_buf1_size, out_size);
        if (!is->audio_buf1)
            return AVERROR(ENOMEM);
        //进行转换
        len2 = swr_convert(is->swr_ctx, out, out_count, in, af->frame->nb_samples);
        if (len2 < 0) {
            av_log(NULL, AV_LOG_ERROR, "swr_convert() failed\n");
            return -1;
        }
        if (len2 == out_count) {
            av_log(NULL, AV_LOG_WARNING, "audio buffer is probably too small\n");
            if (swr_init(is->swr_ctx) < 0)
                swr_free(&is->swr_ctx);
        }
        is->audio_buf = is->audio_buf1;
        //重新计算采样的数据大小，并返回
        resampled_data_size = len2 * is->audio_tgt.channels * av_get_bytes_per_sample(is->audio_tgt.fmt);
    } else {
        is->audio_buf = af->frame->data[0];
        resampled_data_size = data_size;
    }

    audio_clock0 = is->audio_clock;
    /* update the audio clock with the pts */
    if (!isnan(af->pts))
        //更新pts  这个pts 等于当前的帧包含的所有帧数
        is->audio_clock = af->pts + (double) af->frame->nb_samples / af->frame->sample_rate;
    else
        is->audio_clock = NAN;
    is->audio_clock_serial = af->serial;
#ifdef DEBUG
    {
        static double last_clock;
        printf("audio: delay=%0.3f clock=%0.3f clock0=%0.3f\n",
               is->audio_clock - last_clock,
               is->audio_clock, audio_clock0);
        last_clock = is->audio_clock;
    }
#endif
    return resampled_data_size;
}

这里指的注意的有两点：

如果解码出来的音频数据不是Ouput的类型，是进行转码的
设置时间戳。为当前一帧播放完，所有音频数据的时间。
在之前的sdl_audio_callback中，我们可以看到最后的同步时间戳。

  set_clock_at(&is->audclk, is->audio_clock - (double)(2 * is->audio_hw_buf_size + is->audio_write_buf_size) / is->audio_tgt.bytes_per_sec, is->audio_clock_serial, audio_callback_time / 1000000.0);

最后的传入同步的时间戳，是我们完整的帧包含的时间戳-实际写入的帧数+2个硬件buffer的延迟。
因为我们的写入的时候，还需要考虑传入的buffer的大小，预期情况下，如果buffer相同，则这里就是原来的pts-硬件延迟的时间。