在以前的博文中,我们通过FFmpeg解码,并基于OpenGL ES完成了视频的渲染。本文我们将基于OpenSL ES完成native音频的注入播放。
OpenSL ES也是The Khronos Group Inc组织制定的一个音频规范,网上资料很多,在Android SDK代码里还有例子:
native-audio,里面详细的实现了OpenSL ES的不同功能,本文不再阐述原理了,基本上是按照固定流程调用。
当我们基于FFmpeg解码时,最合适的方法是采用buffer管理的方式,将音频PCM数据连续注入解码内存,再由芯片完成PCM的播放。有关采样率,声道存储位宽等音频相关知识,请自行学习。
仍然类似于前面视频播放的播放流程设计,由于不涉及surface的管理,所以要简单一些,注意下面几点:
1. createEngine()和createBufferQueueAudioPlayer()函数放在打开媒体文件之后,因为要依赖媒体的音频属性(声道数,采样率等);
2. 播放器PCM数据的读取是通过回调的方式从媒体队列中读取并进行解码,而不是另建一个线程主动注入,这点与视频解码是不同的。
3. 因为音频的播放速度是由采样率决定的,所以音频的播放速率无需干预,不存在视频帧播放快慢的问题。
audio-jni.cpp提供jni相关函数的实现,本地代码最终生成libaudio-jni.so库。
audio.cpp完成基于OpenSL ES的音频管理。
player.cpp里实现媒体文件的打开播放(基于FFmpeg)。
下面是audio-jni.cpp的源码,这是本项目的核心实现。需要特别注意如下几点,否则音频无法播放成功:
1. createBufferQueueAudioPlayer()函数中,SLDataFormat_PCM format_pcm的第二个参数是声道数,第三个参数是采样率,FFmpeg和OpenSL ES差1000的倍乘,要注意的是参数channelMask,一定要和声道数对应,比如,如果是双声道,我们需要将两个声道通过或关系绑定(SL_SPEAKER_FRONT_LEFT | SL_SPEAKER_FRONT_RIGHT),而不能简单的设置SL_SPEAKER_FRONT_CENTER,否则CreateAudioPlayer会失败,这是我遇到过的问题。
2. 播放器SetPlayState状态为SL_PLAYSTATE_PLAYING后,并不会主动调用回调函数来取数据,所以我们需要主动调用一次回调函数(通过fireOnPlayer函数)触发数据读取。
3. 回调函数读取数据并不会像SDL那样,告知底层需要读取的数据长度,所以我们可以做简单一点,仅读取一个包并解码回调即可,待数据消耗完毕,还会再次调用回调函数读PCM数据的,效率并不低。
#include
#include
#include
#include "com_opensles_ffmpeg_MainActivity.h"
#include "player.h"
// for native audio
#include
#include
// engine interfaces
static SLObjectItf engineObject = NULL;
static SLEngineItf engineEngine;
// output mix interfaces
static SLObjectItf outputMixObject = NULL;
static SLEnvironmentalReverbItf outputMixEnvironmentalReverb = NULL;
// buffer queue player interfaces
static SLObjectItf bqPlayerObject = NULL;
static SLPlayItf bqPlayerPlay;
static SLAndroidSimpleBufferQueueItf bqPlayerBufferQueue;
static SLEffectSendItf bqPlayerEffectSend;
static SLVolumeItf bqPlayerVolume;
static uint8_t decoded_audio_buf[AVCODEC_MAX_AUDIO_FRAME_SIZE];
// this callback handler is called every time a buffer finishes playing
void bqPlayerCallback(SLAndroidSimpleBufferQueueItf bq, void *context) {
SLresult result;
//LOGV2("bqPlayerCallback...");
if (bq != bqPlayerBufferQueue) {
LOGV2("bqPlayerCallback : not the same player object.");
return;
}
int decoded_size = audio_decode_frame(decoded_audio_buf,
sizeof(decoded_audio_buf));
if (decoded_size > 0) {
result = (*bqPlayerBufferQueue)->Enqueue(bqPlayerBufferQueue,
decoded_audio_buf, decoded_size);
// the most likely other result is SL_RESULT_BUFFER_INSUFFICIENT,
// which for this code example would indicate a programming error
if (SL_RESULT_SUCCESS != result) {
LOGV2("bqPlayerCallback : bqPlayerBufferQueue Enqueue failure.");
}
}
}
int createEngine() {
SLresult result;
// create engine
result = slCreateEngine(&engineObject, 0, NULL, 0, NULL, NULL);
if (SL_RESULT_SUCCESS != result) {
LOGV2("slCreateEngine failure.");
return -1;
}
// realize the engine
result = (*engineObject)->Realize(engineObject, SL_BOOLEAN_FALSE );
if (SL_RESULT_SUCCESS != result) {
LOGV2("engineObject Realize failure.");
(*engineObject)->Destroy(engineObject);
engineObject = NULL;
engineEngine = NULL;
return -1;
}
// get the engine interface, which is needed in order to create other objects
result = (*engineObject)->GetInterface(engineObject, SL_IID_ENGINE,
&engineEngine);
if (SL_RESULT_SUCCESS != result) {
LOGV2("engineObject GetInterface failure.");
(*engineObject)->Destroy(engineObject);
engineObject = NULL;
engineEngine = NULL;
return -1;
}
// create output mix, with environmental reverb specified as a non-required interface
const SLInterfaceID ids[1] = { SL_IID_ENVIRONMENTALREVERB };
const SLboolean req[1] = { SL_BOOLEAN_FALSE };
result = (*engineEngine)->CreateOutputMix(engineEngine, &outputMixObject, 1,
ids, req);
if (SL_RESULT_SUCCESS != result) {
LOGV2("engineObject CreateOutputMix failure.");
(*engineObject)->Destroy(engineObject);
engineObject = NULL;
engineEngine = NULL;
return -1;
}
// realize the output mix
result = (*outputMixObject)->Realize(outputMixObject, SL_BOOLEAN_FALSE );
if (SL_RESULT_SUCCESS != result) {
LOGV2("outputMixObject Realize failure.");
(*outputMixObject)->Destroy(outputMixObject);
outputMixObject = NULL;
outputMixEnvironmentalReverb = NULL;
(*engineObject)->Destroy(engineObject);
engineObject = NULL;
engineEngine = NULL;
return -1;
}
// get the environmental reverb interface
// this could fail if the environmental reverb effect is not available,
// either because the feature is not present, excessive CPU load, or
// the required MODIFY_AUDIO_SETTINGS permission was not requested and granted
result = (*outputMixObject)->GetInterface(outputMixObject,
SL_IID_ENVIRONMENTALREVERB, &outputMixEnvironmentalReverb);
if (SL_RESULT_SUCCESS != result) {
LOGV2("outputMixObject Realize failure.");
(*outputMixObject)->Destroy(outputMixObject);
outputMixObject = NULL;
outputMixEnvironmentalReverb = NULL;
(*engineObject)->Destroy(engineObject);
engineObject = NULL;
engineEngine = NULL;
return -1;
}
LOGV2("OpenSL ES createEngine success.");
return 0;
}
int createBufferQueueAudioPlayer() {
SLresult result;
SLuint32 channelMask;
// configure audio source
SLDataLocator_AndroidSimpleBufferQueue loc_bufq = {
SL_DATALOCATOR_ANDROIDSIMPLEBUFFERQUEUE, 2 };
if (global_context.acodec_ctx->channels == 2)
channelMask = SL_SPEAKER_FRONT_LEFT | SL_SPEAKER_FRONT_RIGHT;
else
channelMask = SL_SPEAKER_FRONT_CENTER;
SLDataFormat_PCM format_pcm = { SL_DATAFORMAT_PCM,
global_context.acodec_ctx->channels,
global_context.acodec_ctx->sample_rate * 1000,
SL_PCMSAMPLEFORMAT_FIXED_16, SL_PCMSAMPLEFORMAT_FIXED_16,
channelMask, SL_BYTEORDER_LITTLEENDIAN };
SLDataSource audioSrc = { &loc_bufq, &format_pcm };
// configure audio sink
SLDataLocator_OutputMix loc_outmix = { SL_DATALOCATOR_OUTPUTMIX,
outputMixObject };
SLDataSink audioSnk = { &loc_outmix, NULL };
// create audio player
const SLInterfaceID ids[3] = { SL_IID_BUFFERQUEUE, SL_IID_EFFECTSEND,
SL_IID_VOLUME };
const SLboolean req[3] =
{ SL_BOOLEAN_TRUE, SL_BOOLEAN_TRUE, SL_BOOLEAN_TRUE };
result = (*engineEngine)->CreateAudioPlayer(engineEngine, &bqPlayerObject,
&audioSrc, &audioSnk, 3, ids, req);
if (SL_RESULT_SUCCESS != result) {
LOGV2("CreateAudioPlayer failure.");
return -1;
}
// realize the player
result = (*bqPlayerObject)->Realize(bqPlayerObject, SL_BOOLEAN_FALSE );
if (SL_RESULT_SUCCESS != result) {
LOGV2("bqPlayerObject Realize failure.");
return -1;
}
// get the play interface
result = (*bqPlayerObject)->GetInterface(bqPlayerObject, SL_IID_PLAY,
&bqPlayerPlay);
if (SL_RESULT_SUCCESS != result) {
LOGV2("bqPlayerObject GetInterface failure.");
return -1;
}
// get the buffer queue interface
result = (*bqPlayerObject)->GetInterface(bqPlayerObject, SL_IID_BUFFERQUEUE,
&bqPlayerBufferQueue);
if (SL_RESULT_SUCCESS != result) {
LOGV2("bqPlayerObject GetInterface failure.");
return -1;
}
// register callback on the buffer queue
result = (*bqPlayerBufferQueue)->RegisterCallback(bqPlayerBufferQueue,
bqPlayerCallback, NULL);
if (SL_RESULT_SUCCESS != result) {
LOGV2("bqPlayerObject RegisterCallback failure.");
return -1;
}
// get the effect send interface
result = (*bqPlayerObject)->GetInterface(bqPlayerObject, SL_IID_EFFECTSEND,
&bqPlayerEffectSend);
if (SL_RESULT_SUCCESS != result) {
LOGV2("bqPlayerObject GetInterface SL_IID_EFFECTSEND failure.");
return -1;
}
// get the volume interface
result = (*bqPlayerObject)->GetInterface(bqPlayerObject, SL_IID_VOLUME,
&bqPlayerVolume);
if (SL_RESULT_SUCCESS != result) {
LOGV2("bqPlayerObject GetInterface SL_IID_VOLUME failure.");
return -1;
}
// set the player's state to playing
result = (*bqPlayerPlay)->SetPlayState(bqPlayerPlay, SL_PLAYSTATE_PLAYING );
if (SL_RESULT_SUCCESS != result) {
LOGV2("bqPlayerObject SetPlayState SL_PLAYSTATE_PLAYING failure.");
return -1;
}
LOGV2("OpenSL ES CreateAudioPlayer success.");
return 0;
}
void fireOnPlayer() {
bqPlayerCallback(bqPlayerBufferQueue, NULL);
}
/**
* Destroys the given object instance.
*
* @param object object instance. [IN/OUT]
*/
static void DestroyObject(SLObjectItf& object) {
if (0 != object)
(*object)->Destroy(object);
object = 0;
}
void destroyPlayerAndEngine() {
// Destroy audio player object
DestroyObject(bqPlayerObject);
// Destroy output mix object
DestroyObject(outputMixObject);
// Destroy the engine instance
DestroyObject(engineObject);
}
/*
* Class: com_opensles_ffmpeg_MainActivity
* Method: startAudioPlayer
* Signature: ()I
*/JNIEXPORT jint JNICALL Java_com_opensles_ffmpeg_MainActivity_startAudioPlayer(
JNIEnv *, jclass) {
pthread_t thread;
pthread_create(&thread, NULL, open_media, NULL);
return 0;
}
/*
* Class: com_opensles_ffmpeg_MainActivity
* Method: destroyEngine
* Signature: ()I
*/JNIEXPORT jint JNICALL Java_com_opensles_ffmpeg_MainActivity_destroyEngine(
JNIEnv *, jclass) {
destroyPlayerAndEngine();
return 0;
}
/*
* Class: com_opensles_ffmpeg_MainActivity
* Method: stopAudioPlayer
* Signature: ()I
*/JNIEXPORT jint JNICALL Java_com_opensles_ffmpeg_MainActivity_stopAudioPlayer(
JNIEnv *, jclass) {
(*bqPlayerPlay)->SetPlayState(bqPlayerPlay, SL_PLAYSTATE_STOPPED );
global_context.pause = 1;
global_context.quit = 1;
usleep(50000);
return 0;
}
下面是audio.cpp的源码,audio_decode_frame()函数通过FFmpeg的avcodec_decode_audio4()函数完成音频数据解码,init_filter_graph()函数,av_buffersrc_add_frame()函数,av_buffersink_get_frame()函数,完成解码后音频格式的统一转换,最后都变成AV_SAMPLE_FMT_S16的存储形式,当然采样率和声道数保持原始音频属性,这样做的好处是避免了不同音频格式解码出不同PCM存储类型。
#include "player.h"
#define DECODE_AUDIO_BUFFER_SIZE ((AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) )
static AVFilterContext *in_audio_filter; // the first filter in the audio chain
static AVFilterContext *out_audio_filter; // the last filter in the audio chain
static AVFilterGraph *agraph; // audio filter graph
static struct AudioParams audio_filter_src;
static int init_filter_graph(AVFilterGraph **graph, AVFilterContext **src,
AVFilterContext **sink) {
AVFilterGraph *filter_graph;
AVFilterContext *abuffer_ctx;
AVFilter *abuffer;
AVFilterContext *aformat_ctx;
AVFilter *aformat;
AVFilterContext *abuffersink_ctx;
AVFilter *abuffersink;
char options_str[1024];
char ch_layout[64];
int err;
/* Create a new filtergraph, which will contain all the filters. */
filter_graph = avfilter_graph_alloc();
if (!filter_graph) {
av_log(NULL, AV_LOG_ERROR, "Unable to create filter graph.\n");
return AVERROR(ENOMEM);
}
/* Create the abuffer filter;
* it will be used for feeding the data into the graph. */
abuffer = avfilter_get_by_name("abuffer");
if (!abuffer) {
av_log(NULL, AV_LOG_ERROR, "Could not find the abuffer filter.\n");
return AVERROR_FILTER_NOT_FOUND ;
}
abuffer_ctx = avfilter_graph_alloc_filter(filter_graph, abuffer, "src");
if (!abuffer_ctx) {
av_log(NULL, AV_LOG_ERROR,
"Could not allocate the abuffer instance.\n");
return AVERROR(ENOMEM);
}
/* Set the filter options through the AVOptions API. */
av_get_channel_layout_string(ch_layout, sizeof(ch_layout), (int) 0,
audio_filter_src.channel_layout);
av_opt_set(abuffer_ctx, "channel_layout", ch_layout,
AV_OPT_SEARCH_CHILDREN);
av_opt_set(abuffer_ctx, "sample_fmt",
av_get_sample_fmt_name(audio_filter_src.fmt),
AV_OPT_SEARCH_CHILDREN);
av_opt_set_q(abuffer_ctx, "time_base",
(AVRational ) { 1, audio_filter_src.freq },
AV_OPT_SEARCH_CHILDREN);
av_opt_set_int(abuffer_ctx, "sample_rate", audio_filter_src.freq,
AV_OPT_SEARCH_CHILDREN);
/* Now initialize the filter; we pass NULL options, since we have already
* set all the options above. */
err = avfilter_init_str(abuffer_ctx, NULL);
if (err < 0) {
av_log(NULL, AV_LOG_ERROR,
"Could not initialize the abuffer filter.\n");
return err;
}
/* Create the aformat filter;
* it ensures that the output is of the format we want. */
aformat = avfilter_get_by_name("aformat");
if (!aformat) {
av_log(NULL, AV_LOG_ERROR, "Could not find the aformat filter.\n");
return AVERROR_FILTER_NOT_FOUND ;
}
aformat_ctx = avfilter_graph_alloc_filter(filter_graph, aformat, "aformat");
if (!aformat_ctx) {
av_log(NULL, AV_LOG_ERROR,
"Could not allocate the aformat instance.\n");
return AVERROR(ENOMEM);
}
/* A third way of passing the options is in a string of the form
* key1=value1:key2=value2.... */
snprintf(options_str, sizeof(options_str),
"sample_fmts=%s:sample_rates=%d:channel_layouts=0x%x",
av_get_sample_fmt_name(AV_SAMPLE_FMT_S16), audio_filter_src.freq,
audio_filter_src.channel_layout);
err = avfilter_init_str(aformat_ctx, options_str);
if (err < 0) {
av_log(NULL, AV_LOG_ERROR,
"Could not initialize the aformat filter.\n");
return err;
}
/* Finally create the abuffersink filter;
* it will be used to get the filtered data out of the graph. */
abuffersink = avfilter_get_by_name("abuffersink");
if (!abuffersink) {
av_log(NULL, AV_LOG_ERROR, "Could not find the abuffersink filter.\n");
return AVERROR_FILTER_NOT_FOUND ;
}
abuffersink_ctx = avfilter_graph_alloc_filter(filter_graph, abuffersink,
"sink");
if (!abuffersink_ctx) {
av_log(NULL, AV_LOG_ERROR,
"Could not allocate the abuffersink instance.\n");
return AVERROR(ENOMEM);
}
/* This filter takes no options. */
err = avfilter_init_str(abuffersink_ctx, NULL);
if (err < 0) {
av_log(NULL, AV_LOG_ERROR,
"Could not initialize the abuffersink instance.\n");
return err;
}
/* Connect the filters;
* in this simple case the filters just form a linear chain. */
err = avfilter_link(abuffer_ctx, 0, aformat_ctx, 0);
if (err >= 0) {
err = avfilter_link(aformat_ctx, 0, abuffersink_ctx, 0);
}
if (err < 0) {
av_log(NULL, AV_LOG_ERROR, "Error connecting filters\n");
return err;
}
/* Configure the graph. */
err = avfilter_graph_config(filter_graph, NULL);
if (err < 0) {
av_log(NULL, AV_LOG_ERROR, "Error configuring the filter graph\n");
return err;
}
*graph = filter_graph;
*src = abuffer_ctx;
*sink = abuffersink_ctx;
return 0;
}
static inline int64_t get_valid_channel_layout(int64_t channel_layout,
int channels) {
if (channel_layout
&& av_get_channel_layout_nb_channels(channel_layout) == channels) {
return channel_layout;
} else {
return 0;
}
}
// decode a new packet(not multi-frame)
// return decoded frame size, not decoded packet size
int audio_decode_frame(uint8_t *audio_buf, int buf_size) {
static AVPacket pkt;
static uint8_t *audio_pkt_data = NULL;
static int audio_pkt_size = 0;
int len1, data_size;
int got_frame;
AVFrame * frame = NULL;
static int reconfigure = 1;
int ret = -1;
for (;;) {
while (audio_pkt_size > 0) {
if (NULL == frame) {
frame = av_frame_alloc();
}
data_size = buf_size;
got_frame = 0;
// len1 is decoded packet size
len1 = avcodec_decode_audio4(global_context.acodec_ctx, frame,
&got_frame, &pkt);
if (got_frame) {
if (reconfigure) {
reconfigure = 0;
int64_t dec_channel_layout = get_valid_channel_layout(
frame->channel_layout,
av_frame_get_channels(frame));
// used by init_filter_graph()
audio_filter_src.fmt = (enum AVSampleFormat) frame->format;
audio_filter_src.channels = av_frame_get_channels(frame);
audio_filter_src.channel_layout = dec_channel_layout;
audio_filter_src.freq = frame->sample_rate;
init_filter_graph(&agraph, &in_audio_filter,
&out_audio_filter);
}
if ((ret = av_buffersrc_add_frame(in_audio_filter, frame))
< 0) {
av_log(NULL, AV_LOG_ERROR,
"av_buffersrc_add_frame : failure. \n");
return ret;
}
if ((ret = av_buffersink_get_frame(out_audio_filter, frame))
< 0) {
av_log(NULL, AV_LOG_ERROR,
"av_buffersink_get_frame : failure. \n");
continue;
}
data_size = av_samples_get_buffer_size(NULL, frame->channels,
frame->nb_samples, (enum AVSampleFormat) frame->format,
1);
// len1 is decoded packet size
// < 0 means failure or error,so break to get a new packet
if (len1 < 0) {
audio_pkt_size = 0;
av_log(NULL, AV_LOG_ERROR,
"avcodec_decode_audio4 failure. \n");
break;
}
// decoded data to audio buf
memcpy(audio_buf, frame->data[0], data_size);
audio_pkt_data += len1;
audio_pkt_size -= len1;
int n = 2 * global_context.acodec_ctx->channels;
/*audio_clock += (double) data_size
/ (double) (n * global_context.acodec_ctx->sample_rate); // add bytes offset */
av_free_packet(&pkt);
av_frame_free(&frame);
return data_size;
} else if (len1 < 0) {
char errbuf[64];
av_strerror(ret, errbuf, 64);
LOGV2("avcodec_decode_audio4 ret < 0, %s", errbuf);
}
}
av_free_packet(&pkt);
av_frame_free(&frame);
// get a new packet
if (packet_queue_get(&global_context.audio_queue, &pkt) < 0) {
return -1;
}
//LOGV2("pkt.size is %d", pkt.size);
audio_pkt_data = pkt.data;
audio_pkt_size = pkt.size;
}
return ret;
}
请参考完整的源码路径:
https://github.com/ericbars/FFmpegOpenSLES