FFmpeg 代码 version 3.3:
ffplay中的线程模型
概述
主要的负责音频播放的线程,主要有如上几个
1. 读取线程-read_thread
在main方法中会启动的读取的线程。
这个和视频的线程模型中是一致的。不同的是,循环读取的数据是音频数据。
- 循环读取
这个线程中,会进行读取的循环。不断的通过av_read_frame
方法,读取解码前的数据packet。 - 送入队列
最后将得到的数据,送入对应的流的packet
队列(视频/音频/字幕都对应视频流自己的队列)
2. 对应流的解码线程-audio - thread
在读取线程中,对AVFormatContext
进行初始化,获取AVStream
信息后,对应不同的码流会开启对应的解码线程Decode Thread。
ffplay
中这里包括了3种流。视频流。音频流和字幕流。
循环读取
会从对应流的packet队列中,得到数据。
然后送入解码器通过avcodec_decode_video2
(旧的API)进行解码。送入队列
解码之后,得到解码前的数据AVFrame
,并确定对应的pts
。
最后然后其再次送入队列当中。
3.播放的设置
SDL
的音频播放,主要是设置一个audiocallback
,在callback当中,将我们解码后的数据设置给传入buff
地址中。
后续SDL会再将这个buff地址,传给对应的音频播放设备,进行播放。
具体可见 SDL2库(4)-Android 端源码简要分析(AudioSubSystem) 文章中所述。
整体的流程就是这样简单。
音频参数
定义了一个结构体,来简单的保存音频的参数。
typedef struct AudioParams {
//sampleRate
int freq;
//声道数
int channels;
//channel_layout 有什么不同吗?
int64_t channel_layout;
//音频的采样格式
enum AVSampleFormat fmt;
//每一帧的大小= 采样深度*声道数
int frame_size;
//每一秒的字节数
int bytes_per_sec;
} AudioParams;
ffplay初始化(main_thread)
进行初始化的整体流程,大部分和上一边文章相似ffplay.c 源码分析- 视频部分。
(其中包括对FFmpeg的初始化
,对传递的参数进行初始化
,SDL的初始化
,通过
stream_open函数
,开启
read_thread读取线程
)
这里就不做过多描述了。
具体来看一下音频相关的部分。
开启对应的解码线程
打开stream_component_open
对应的AVStream
。打开解码线程。
ffplay
中对应三种码流。(视频、音频和字幕,对应打开自己的解码线程)
stream_component_open
中的音频部分
switch (avctx->codec_type) {
case AVMEDIA_TYPE_AUDIO:
//忽略声音滤镜部分
sample_rate = avctx->sample_rate;
nb_channels = avctx->channels;
channel_layout = avctx->channel_layout;
/* prepare audio output */
//打开音频设备。返回的值是音频设备中buffer的大小
if ((ret = audio_open(is, channel_layout, nb_channels, sample_rate, &is->audio_tgt)) < 0)
goto fail;
//将当前的硬件参数保存下来
is->audio_hw_buf_size = ret;
is->audio_src = is->audio_tgt;
is->audio_buf_size = 0;
is->audio_buf_index = 0;
/* init averaging filter */
//这个是用来进行音视频同步的算法的部分。暂时不管
is->audio_diff_avg_coef = exp(log(0.01) / AUDIO_DIFF_AVG_NB);
is->audio_diff_avg_count = 0;
/* since we do not have a precise anough audio FIFO fullness,
we correct audio sync only if larger than this threshold */
is->audio_diff_threshold = (double)(is->audio_hw_buf_size) / is->audio_tgt.bytes_per_sec;
is->audio_stream = stream_index;
is->audio_st = ic->streams[stream_index];
//初始化解码器
decoder_init(&is->auddec, avctx, &is->audioq, is->continue_read_thread);
if ((is->ic->iformat->flags & (AVFMT_NOBINSEARCH | AVFMT_NOGENSEARCH | AVFMT_NO_BYTE_SEEK)) && !is->ic->iformat->read_seek) {
is->auddec.start_pts = is->audio_st->start_time;
is->auddec.start_pts_tb = is->audio_st->time_base;
}
//开启音频解码线程
if ((ret = decoder_start(&is->auddec, audio_thread, is)) < 0)
goto out;
//播放
SDL_PauseAudioDevice(audio_dev, 0);
break;
这里重点开看一下打开音频流的部分
audio_open
static int audio_open(void *opaque, int64_t wanted_channel_layout, int wanted_nb_channels, int wanted_sample_rate, struct AudioParams *audio_hw_params)
{
//这个结构体是SDL内部分音频时,记录音频参数的结构体
SDL_AudioSpec wanted_spec, spec;
const char *env;
static const int next_nb_channels[] = {0, 0, 1, 6, 2, 6, 4, 6};
static const int next_sample_rates[] = {0, 44100, 48000, 96000, 192000};
int next_sample_rate_idx = FF_ARRAY_ELEMS(next_sample_rates) - 1;
env = SDL_getenv("SDL_AUDIO_CHANNELS");
if (env) {
wanted_nb_channels = atoi(env);
wanted_channel_layout = av_get_default_channel_layout(wanted_nb_channels);
}
if (!wanted_channel_layout || wanted_nb_channels != av_get_channel_layout_nb_channels(wanted_channel_layout)) {
wanted_channel_layout = av_get_default_channel_layout(wanted_nb_channels);
wanted_channel_layout &= ~AV_CH_LAYOUT_STEREO_DOWNMIX;
}
//可以看到channels 的数量可以通过av_get_channel_layout_nb_channels来进行计算
wanted_nb_channels = av_get_channel_layout_nb_channels(wanted_channel_layout);
wanted_spec.channels = wanted_nb_channels;
wanted_spec.freq = wanted_sample_rate;
if (wanted_spec.freq <= 0 || wanted_spec.channels <= 0) {
av_log(NULL, AV_LOG_ERROR, "Invalid sample rate or channel count!\n");
return -1;
}
while (next_sample_rate_idx && next_sample_rates[next_sample_rate_idx] >= wanted_spec.freq)
next_sample_rate_idx--;
//这里的format是暂时写死的AUDIO_S16SYS
wanted_spec.format = AUDIO_S16SYS;
wanted_spec.silence = 0;
//samples 变量表示每一个声道对应的每一秒的采样的帧数
wanted_spec.samples = FFMAX(SDL_AUDIO_MIN_BUFFER_SIZE, 2 << av_log2(wanted_spec.freq / SDL_AUDIO_MAX_CALLBACKS_PER_SEC));
//SDL播放音频的话,会将每次需要播放的数据,根据callback将数据位置传递给callback,
//我们需要在CallBack中自定义对音频数据的填充,就可以完成播放
wanted_spec.callback = sdl_audio_callback;
wanted_spec.userdata = opaque;
//打开音频设备
while (!(audio_dev = SDL_OpenAudioDevice(NULL, 0, &wanted_spec, &spec, SDL_AUDIO_ALLOW_FREQUENCY_CHANGE | SDL_AUDIO_ALLOW_CHANNELS_CHANGE))) {
av_log(NULL, AV_LOG_WARNING, "SDL_OpenAudio (%d channels, %d Hz): %s\n",
wanted_spec.channels, wanted_spec.freq, SDL_GetError());
wanted_spec.channels = next_nb_channels[FFMIN(7, wanted_spec.channels)];
if (!wanted_spec.channels) {
wanted_spec.freq = next_sample_rates[next_sample_rate_idx--];
wanted_spec.channels = wanted_nb_channels;
if (!wanted_spec.freq) {
av_log(NULL, AV_LOG_ERROR,
"No more combinations to try, audio open failed\n");
return -1;
}
}
wanted_channel_layout = av_get_default_channel_layout(wanted_spec.channels);
}
if (spec.format != AUDIO_S16SYS) {
av_log(NULL, AV_LOG_ERROR,
"SDL advised audio format %d is not supported!\n", spec.format);
return -1;
}
if (spec.channels != wanted_spec.channels) {
wanted_channel_layout = av_get_default_channel_layout(spec.channels);
if (!wanted_channel_layout) {
av_log(NULL, AV_LOG_ERROR,
"SDL advised channel count %d is not supported!\n", spec.channels);
return -1;
}
}
//最后将传递回来的硬件设备的数据进行记录
audio_hw_params->fmt = AV_SAMPLE_FMT_S16;
audio_hw_params->freq = spec.freq;
audio_hw_params->channel_layout = wanted_channel_layout;
audio_hw_params->channels = spec.channels;
audio_hw_params->frame_size = av_samples_get_buffer_size(NULL, audio_hw_params->channels, 1, audio_hw_params->fmt, 1);
audio_hw_params->bytes_per_sec = av_samples_get_buffer_size(NULL, audio_hw_params->channels, audio_hw_params->freq, audio_hw_params->fmt, 1);
if (audio_hw_params->bytes_per_sec <= 0 || audio_hw_params->frame_size <= 0) {
av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size failed\n");
return -1;
}
return spec.size;
}
这里最值得注意的就是
wanted_spec.callback = sdl_audio_callback;
SDL播放音频的话,会将每次需要播放的数据,根据callback将数据位置传递给callback,
我们需要在callback
中自定义对音频数据的填充,就可以完成播放。
视频解码线程audio_thread
在read_thread
的中对应视频流时,初始化好了AVCodec
和AVCodecContext
。通过decoder_start
方法,开启了video_thread
。
在video_thread
中需要创建AVFrame来接受解码后的数据,确定视频的帧率。
然后开启解码循环。
不断的从队列中获取解码前的数据,然后送入解码器解码。
再得到解码后的数据,在送入对应的队列当中。
初始化参数
创建AVFrame和得到大致的视频帧率
//创建AVFrame
AVFrame *frame = av_frame_alloc();
//设置好time_base和frame_rate
AVRational tb = is->video_st->time_base;
// 猜测视频帧率
AVRational frame_rate = av_guess_frame_rate(is->ic, is->video_st, NULL);
开始循环解码
解码的方式和视频播放相同。这儿就不做过多解释了。解码完,同样送入队列当中。
音频设置部分
正如上面所述,我们需要在传入的callback
中对我们的数据进行处理。callback也同样运行在SDL中创建的RunAudio线程。
SDL具体的运行方式,可见SDL2库(4)-Android 端源码简要分析(AudioSubSystem);
/* prepare a new audio buffer */
static void sdl_audio_callback(void *opaque, Uint8 *stream, int len)
{
VideoState *is = opaque;
int audio_size, len1;
audio_callback_time = av_gettime_relative();
//len表示传入的数据的长度,stream表示传入的数组的指针(开始位置)
while (len > 0) {
if (is->audio_buf_index >= is->audio_buf_size) {
//进行解码。得到audio_size
audio_size = audio_decode_frame(is);
//小于0,则表示失败
if (audio_size < 0) {
/* if error, just output silence */
is->audio_buf = NULL;
is->audio_buf_size = SDL_AUDIO_MIN_BUFFER_SIZE / is->audio_tgt.frame_size * is->audio_tgt.frame_size;
} else {
if (is->show_mode != SHOW_MODE_VIDEO)
update_sample_display(is, (int16_t *)is->audio_buf, audio_size);
//记录audio_buf_size
is->audio_buf_size = audio_size;
}
//重置0
is->audio_buf_index = 0;
}
//用len1 表示单次解码的frame 的长度
len1 = is->audio_buf_size - is->audio_buf_index;
//最后送入的数据长度不能大于可用的空间
if (len1 > len)
len1 = len;
//如果不进行音量调节,则可直接将音频数据拷贝进入
if (!is->muted && is->audio_buf && is->audio_volume == SDL_MIX_MAXVOLUME)
memcpy(stream, (uint8_t *)is->audio_buf + is->audio_buf_index, len1);
else {
memset(stream, 0, len1);
if (!is->muted && is->audio_buf)
//通过SDL_MixAudioFormat进行混音。可用调整音量的大小
SDL_MixAudioFormat(stream, (uint8_t *)is->audio_buf + is->audio_buf_index, AUDIO_S16SYS, len1, is->audio_volume);
}
//进行偏移,如果还有空间,则继续解码更多的帧送入
len -= len1;
stream += len1;
is->audio_buf_index += len1;
}
//最后用audio_write_buf_size 来记录这次一共解码出来的buf size
is->audio_write_buf_size = is->audio_buf_size - is->audio_buf_index;
/* Let's assume the audio driver that is used by SDL has two periods. */
if (!isnan(is->audio_clock)) {
//同步音频的时间钟。
set_clock_at(&is->audclk, is->audio_clock - (double)(2 * is->audio_hw_buf_size + is->audio_write_buf_size) / is->audio_tgt.bytes_per_sec, is->audio_clock_serial, audio_callback_time / 1000000.0);
sync_clock_to_slave(&is->extclk, &is->audclk);
}
}
audio_decode_frame
static int audio_decode_frame(VideoState *is)
{
int data_size, resampled_data_size;
int64_t dec_channel_layout;
av_unused double audio_clock0;
int wanted_nb_samples;
Frame *af;
if (is->paused)
return -1;
do {
//从队列中取得数据
if (!(af = frame_queue_peek_readable(&is->sampq)))
return -1;
frame_queue_next(&is->sampq);
} while (af->serial != is->audioq.serial);
//计算data_size
data_size = av_samples_get_buffer_size(NULL, af->frame->channels,
af->frame->nb_samples,
af->frame->format, 1);
//计算channel_layout
dec_channel_layout =
(af->frame->channel_layout && af->frame->channels == av_get_channel_layout_nb_channels(af->frame->channel_layout)) ?
af->frame->channel_layout : av_get_default_channel_layout(af->frame->channels);
wanted_nb_samples = synchronize_audio(is, af->frame->nb_samples);
//如果解码的格式和目标的格式不同的话,则需要对音频的数据进行转码
if (af->frame->format != is->audio_src.fmt ||
dec_channel_layout != is->audio_src.channel_layout ||
af->frame->sample_rate != is->audio_src.freq ||
(wanted_nb_samples != af->frame->nb_samples && !is->swr_ctx)) {
swr_free(&is->swr_ctx);
//创建和设置swr
is->swr_ctx = swr_alloc_set_opts(NULL,
is->audio_tgt.channel_layout, is->audio_tgt.fmt, is->audio_tgt.freq,
dec_channel_layout, af->frame->format, af->frame->sample_rate,
0, NULL);
if (!is->swr_ctx || swr_init(is->swr_ctx) < 0) {
av_log(NULL, AV_LOG_ERROR,
"Cannot create sample rate converter for conversion of %d Hz %s %d channels to %d Hz %s %d channels!\n",
af->frame->sample_rate, av_get_sample_fmt_name(af->frame->format), af->frame->channels,
is->audio_tgt.freq, av_get_sample_fmt_name(is->audio_tgt.fmt), is->audio_tgt.channels);
swr_free(&is->swr_ctx);
return -1;
}
is->audio_src.channel_layout = dec_channel_layout;
is->audio_src.channels = af->frame->channels;
is->audio_src.freq = af->frame->sample_rate;
is->audio_src.fmt = af->frame->format;
}
//进行转码
if (is->swr_ctx) {
const uint8_t **in = (const uint8_t **)af->frame->extended_data;
uint8_t **out = &is->audio_buf1;
//这里加的256是什么意思?这个256是固定的吗
int out_count = (int64_t)wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate + 256;
int out_size = av_samples_get_buffer_size(NULL, is->audio_tgt.channels, out_count, is->audio_tgt.fmt, 0);
int len2;
if (out_size < 0) {
av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size() failed\n");
return -1;
}
if (wanted_nb_samples != af->frame->nb_samples) {
if (swr_set_compensation(is->swr_ctx, (wanted_nb_samples - af->frame->nb_samples) * is->audio_tgt.freq / af->frame->sample_rate,
wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate) < 0) {
av_log(NULL, AV_LOG_ERROR, "swr_set_compensation() failed\n");
return -1;
}
}
av_fast_malloc(&is->audio_buf1, &is->audio_buf1_size, out_size);
if (!is->audio_buf1)
return AVERROR(ENOMEM);
//进行转换
len2 = swr_convert(is->swr_ctx, out, out_count, in, af->frame->nb_samples);
if (len2 < 0) {
av_log(NULL, AV_LOG_ERROR, "swr_convert() failed\n");
return -1;
}
if (len2 == out_count) {
av_log(NULL, AV_LOG_WARNING, "audio buffer is probably too small\n");
if (swr_init(is->swr_ctx) < 0)
swr_free(&is->swr_ctx);
}
is->audio_buf = is->audio_buf1;
//重新计算采样的数据大小,并返回
resampled_data_size = len2 * is->audio_tgt.channels * av_get_bytes_per_sample(is->audio_tgt.fmt);
} else {
is->audio_buf = af->frame->data[0];
resampled_data_size = data_size;
}
audio_clock0 = is->audio_clock;
/* update the audio clock with the pts */
if (!isnan(af->pts))
//更新pts 这个pts 等于当前的帧包含的所有帧数
is->audio_clock = af->pts + (double) af->frame->nb_samples / af->frame->sample_rate;
else
is->audio_clock = NAN;
is->audio_clock_serial = af->serial;
#ifdef DEBUG
{
static double last_clock;
printf("audio: delay=%0.3f clock=%0.3f clock0=%0.3f\n",
is->audio_clock - last_clock,
is->audio_clock, audio_clock0);
last_clock = is->audio_clock;
}
#endif
return resampled_data_size;
}
这里指的注意的有两点:
- 如果解码出来的音频数据不是Ouput的类型,是进行转码的
- 设置时间戳。为当前一帧播放完,所有音频数据的时间。
在之前的sdl_audio_callback
中,我们可以看到最后的同步时间戳。
set_clock_at(&is->audclk, is->audio_clock - (double)(2 * is->audio_hw_buf_size + is->audio_write_buf_size) / is->audio_tgt.bytes_per_sec, is->audio_clock_serial, audio_callback_time / 1000000.0);
最后的传入同步的时间戳,是我们完整的帧包含的时间戳-实际写入的帧数+2个硬件buffer的延迟。
因为我们的写入的时候,还需要考虑传入的buffer的大小,预期情况下,如果buffer相同,则这里就是原来的pts-硬件延迟的时间。