原生Android只支持2 channel的录音。可是偏偏会有多mic的需求,比如说语音识别。目前已知TDM协议可以将多mic数据从kernel送到hal,从内核空间搬运到用户空间中。可是原生AudioRecord接口是完全不支持多channel录音数据的采集的,怎么修改,才能让原生进行支持呢?
我们就从AudioRecord的构造函数开始往下研究。无论行不行,都要研究出个所以然来!我们如果写个录音app,我们一般这么使用AudioRecord:
int sampleRateInHz = 8000;
int audioEncodingBits = AudioFormat.ENCODING_PCM_16BIT;
int recordBufferSize = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfiguration, audioEncodingBits);
mAudioRecord = new AudioRecord(MediaRecorder.AudioSource.MIC,
sampleRateInHz, channelConfiguration, audioEncodingBits,
recordBufferSize);
先说AudioRecord构造函数最后一个参数recordBufferSize。来自:
getMinBufferSize
//AudioRecord.java
static public int getMinBufferSize(int sampleRateInHz, int channelConfig, int audioFormat) {
int channelCount = 0;
...
//根据channelMask得出channelCount
//这里竟然有个6声道的,估计可以参考下
case AudioFormat.CHANNEL_IN_5POINT1:
channelCount = 6;
...
int size = native_get_min_buff_size(sampleRateInHz, channelCount, audioFormat);
...
}
native_get_min_buff_size对应android_media_AudioRecord_get_min_buff_size:
//android_media_AudioRecord.cpp
static jint android_media_AudioRecord_get_min_buff_size(JNIEnv *env, jobject thiz,jint sampleRateInHertz, jint channelCount, jint audioFormat) {
size_t frameCount = 0;
audio_format_t format = audioFormatToNative(audioFormat);
status_t result = AudioRecord::getMinFrameCount(&frameCount,
sampleRateInHertz,
format,
audio_channel_in_mask_from_count(channelCount));
return frameCount * channelCount * audio_bytes_per_sample(format);
}
这里传入的format是AudioFormat.ENCODING_PCM_16BIT,根据audio_bytes_per_sample:
//audio.h
static inline size_t audio_bytes_per_sample(audio_format_t format)
{
...
case AUDIO_FORMAT_PCM_16_BIT:
case AUDIO_FORMAT_IEC61937:
size = sizeof(int16_t);
...
}
audio_bytes_per_sample返回的是sizeof(signed short) = 2.
status_t AudioRecord::getMinFrameCount(
size_t* frameCount,
uint32_t sampleRate,
audio_format_t format,
audio_channel_mask_t channelMask)
{
status_t status = AudioSystem::getInputBufferSize(sampleRate, format, channelMask, &size);
...
//这里需要double一下
// We double the size of input buffer for ping pong use of record buffer.
// Assumes audio_is_linear_pcm(format)
if ((*frameCount = (size * 2) / (audio_channel_count_from_in_mask(channelMask) *
audio_bytes_per_sample(format))) == 0) {
ALOGE("Unsupported configuration: sampleRate %u, format %#x, channelMask %#x",
sampleRate, format, channelMask);
return BAD_VALUE;
}
}
getInputBufferSize直接看hal层:
//audio_hw.c
static size_t get_input_buffer_size(uint32_t sample_rate,
audio_format_t format,
int channel_count,
bool is_low_latency)
{
...
//这里是(8000*20)/1000
size = (sample_rate * AUDIO_CAPTURE_PERIOD_DURATION_MSEC) / 1000;
size *= sizeof(short) * channel_count;
...
}
size = (8000*20)/1000 * 2 * 2 = 640,get_input_buffer_size返回640.
目前这种场景getMinFrameCount取得frameCount = (640 *2) / (2 * 2) = 320
getMinBufferSize将返回320 * 2 * 2 = 1280,调用完构造函数之后,AudioRecord将通过audioBuffSizeCheck将这个值设置生效(函数名字是check,我觉得这个地方不太合理。)
private void audioBuffSizeCheck(int audioBufferSize) throws IllegalArgumentException {
// NB: this section is only valid with PCM data.
// To update when supporting compressed formats
//只支持无压缩的pcm
int frameSizeInBytes = mChannelCount
* (AudioFormat.getBytesPerSample(mAudioFormat));
//检查用户设置的这个值是不是frameSizeInBytes的整数倍
if ((audioBufferSize % frameSizeInBytes != 0) || (audioBufferSize < 1)) {
throw new IllegalArgumentException("Invalid audio buffer size " + audioBufferSize
+ " (frame size " + frameSizeInBytes + ")");
}
//存到这里。作为录音数据的buffer
mNativeBufferSizeInBytes = audioBufferSize;
}
然后,通过调用native_setup将值传入native层。
//android_media_AudioRecord.cpp
static jint
android_media_AudioRecord_setup
{
...
size_t frameSize = channelCount * bytesPerSample;
//这里还是上文说的320
size_t frameCount = buffSizeInBytes / frameSize;
...
const status_t status = lpRecorder->set(
...
frameCount
...
);
}
然后这个函数会调用AudioRecord set接口.
//AudioRecord.cpp
status_t AudioRecord::set(//参数省略)
{
...
//上层请求的frameCount
// mFrameCount is initialized in openRecord_l
mReqFrameCount = frameCount;
...
size_t frameCount = mReqFrameCount;
...
//temp有可能会被openRecord修订
size_t temp = frameCount;
...
sp record = audioFlinger->openRecord(
...
&temp,
...
);
}
然后设置到AudioFlinger端:
//services/audioflinger/Tracks.cpp
AudioFlinger::PlaybackThread::Track::Track(/*省略参数*/)
{
...
if (sharedBuffer == 0) {
mAudioTrackServerProxy = new AudioTrackServerProxy(mCblk, mBuffer, frameCount,mFrameSize, !isExternalTrack(), sampleRate);
} else {
mAudioTrackServerProxy = new StaticAudioTrackServerProxy(mCblk, mBuffer, frameCount,mFrameSize);
}
...
}
本文的主题是研究多声道录音,所以先就此打住。
回到前文,如果需要支持多声道,需要看看第四个参数
channelConfiguration
取值范围只有这些:
public static final int CHANNEL_IN_DEFAULT = 1;
// These directly match native
public static final int CHANNEL_IN_LEFT = 0x4;
public static final int CHANNEL_IN_RIGHT = 0x8;
public static final int CHANNEL_IN_FRONT = 0x10;
public static final int CHANNEL_IN_BACK = 0x20;
public static final int CHANNEL_IN_LEFT_PROCESSED = 0x40;
public static final int CHANNEL_IN_RIGHT_PROCESSED = 0x80;
public static final int CHANNEL_IN_FRONT_PROCESSED = 0x100;
public static final int CHANNEL_IN_BACK_PROCESSED = 0x200;
public static final int CHANNEL_IN_PRESSURE = 0x400;
public static final int CHANNEL_IN_X_AXIS = 0x800;
public static final int CHANNEL_IN_Y_AXIS = 0x1000;
public static final int CHANNEL_IN_Z_AXIS = 0x2000;
public static final int CHANNEL_IN_VOICE_UPLINK = 0x4000;
public static final int CHANNEL_IN_VOICE_DNLINK = 0x8000;
public static final int CHANNEL_IN_MONO = CHANNEL_IN_FRONT;
public static final int CHANNEL_IN_STEREO = (CHANNEL_IN_LEFT | CHANNEL_IN_RIGHT);
/** @hide */
public static final int CHANNEL_IN_FRONT_BACK = CHANNEL_IN_FRONT | CHANNEL_IN_BACK;
// CHANNEL_IN_ALL is not yet defined; if added then it should match AUDIO_CHANNEL_IN_ALL
刚开始没看明白为什么这么定义。直到看到了...往下看,后面会说
//AudioRecord.java
public AudioRecord(int audioSource, int sampleRateInHz, int channelConfig, int audioFormat,
int bufferSizeInBytes)
throws IllegalArgumentException {
//调用另外一个重载的构造函数
this((new AudioAttributes.Builder())
.setInternalCapturePreset(audioSource)
.build(),
(new AudioFormat.Builder()) .setChannelMask(getChannelMaskFromLegacyConfig(channelConfig,
true/*allow legacy configurations*/))
.setEncoding(audioFormat)
.setSampleRate(sampleRateInHz)
.build(),
bufferSizeInBytes,
AudioManager.AUDIO_SESSION_ID_GENERATE);
}
注意看这一行:
.setChannelMask(getChannelMaskFromLegacyConfig(channelConfig,
true/*allow legacy configurations*/))
做一个兼容性转换。最终结果还是前面那些。关键是这里是Mask(中文叫掩码)。Android中有很多这种用法
private static int getChannelMaskFromLegacyConfig(int inChannelConfig,
boolean allowLegacyConfig) {
int mask;
switch (inChannelConfig) {
case AudioFormat.CHANNEL_IN_DEFAULT: // AudioFormat.CHANNEL_CONFIGURATION_DEFAULT
case AudioFormat.CHANNEL_IN_MONO:
case AudioFormat.CHANNEL_CONFIGURATION_MONO:
mask = AudioFormat.CHANNEL_IN_MONO;
break;
case AudioFormat.CHANNEL_IN_STEREO:
case AudioFormat.CHANNEL_CONFIGURATION_STEREO:
mask = AudioFormat.CHANNEL_IN_STEREO;
break;
case (AudioFormat.CHANNEL_IN_FRONT | AudioFormat.CHANNEL_IN_BACK):
mask = inChannelConfig;
break;
default:
throw new IllegalArgumentException("Unsupported channel configuration.");
}
if (!allowLegacyConfig && ((inChannelConfig == AudioFormat.CHANNEL_CONFIGURATION_MONO)
|| (inChannelConfig == AudioFormat.CHANNEL_CONFIGURATION_STEREO))) {
// only happens with the constructor that uses AudioAttributes and AudioFormat
throw new IllegalArgumentException("Unsupported deprecated configuration.");
}
return mask;
}
getChannelMaskFromLegacyConfig根本没对超过2个的声道就行处理。包括AudioFormat里的hide参数:
/** @hide */
public static final int CHANNEL_IN_5POINT1 = (CHANNEL_IN_LEFT |
CHANNEL_IN_RIGHT | CHANNEL_IN_FRONT | CHANNEL_IN_BACK |
CHANNEL_IN_LEFT_PROCESSED | CHANNEL_IN_RIGHT_PROCESSED);
看了是打算先占个位置,将来会支持这种5.1声道的方式。那我们岂不是可以加上个同样的定义,比如说7.1声道:
/** @hide */
public static final int CHANNEL_IN_7POINT1 = (CHANNEL_IN_LEFT |
CHANNEL_IN_RIGHT | CHANNEL_IN_FRONT | CHANNEL_IN_BACK |
CHANNEL_IN_LEFT_PROCESSED | CHANNEL_IN_RIGHT_PROCESSED|
CHANNEL_IN_FRONT_PROCESSED | CHANNEL_IN_BACK_PROCESSED);
虽然感觉不太对。对应的,getChannelMaskFromLegacyConfig就需要做添加,不然直接抛出IllegalArgumentException:
//AudioRecord.java-getChannelMaskFromLegacyConfig
case AudioFormat.CHANNEL_IN_7POINT1:
mask = AudioFormat.CHANNEL_IN_7POINT1:
break;
然后,计算bufferSize的地方也要修改:
case AudioFormat.CHANNEL_IN_7POINT1:
channelCount = 8;
再往下,貌似是不会被参数检查所拦截了。
只有channelCount够了,frameCount才能对,bufferSize才能对应。不然数据就错乱了。因为声道数,格式等决定了每一帧所需要的缓存空间!
另外一个设置channel_count的地方:
//hal/audio_hw.c
struct pcm_config pcm_config_audio_capture = {
.channels = 2,
.period_count = AUDIO_CAPTURE_PERIOD_COUNT,
.format = PCM_FORMAT_S16_LE,
};
普通录音场景给了个默认的2 channel.然后
static int adev_open_input_stream(struct audio_hw_device *dev,
audio_io_handle_t handle,
audio_devices_t devices,
//注意这个:
struct audio_config *config,
struct audio_stream_in **stream_in,
audio_input_flags_t flags __unused,
const char *address __unused,
audio_source_t source)
{
...
in->config = pcm_config_audio_capture;//此时是默认值2
...
//这里会取出应用层设置下来的channel_count
int channel_count = audio_channel_count_from_in_mask(config->channel_mask);
//如果应用设置的不是2.修改之
in->config.channels = channel_count;
...
}
这些都改完之后,全编译一把,然后:
int channelConfiguration = AudioFormat.CHANNEL_IN_7POINT1;
int audioEncodingBits = AudioFormat.ENCODING_PCM_16BIT;
int sampleRateInHz = 8000;
int recordBufferSize = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfiguration, audioEncodingBits);
LogD("recordBufferSize = " + String.valueOf(recordBufferSize));
recordBufferSize = 5120(之前立体声1280的四倍)
表明,修改成功!Oh yeah!当然,这么修改之后,frameCount还是320(5120/(声道数×每个采样的字节数)).知道为啥定义帧的概念了吧。
稍等,好坑!
AudioRecord: set(): inputSource 6, sampleRate 8000, format 0x1, channelMask 0x3fc
...
audio_hw_primary: adev_open_input_stream: enter: sample_rate(8000) channel_mask(0xc)
看这log,AudioRecord:set之后,adev_open_input_stream之前会改变到这个channelMask.跟了下代码,发现是这里在搞怪:
audio_io_handle_t AudioPolicyManager::getInputForDevice(
...
audio_channel_mask_t channelMask,
...)
{
...
audio_channel_mask_t profileChannelMask = channelMask;
for (;;) {
//就是这里
profile = getInputProfile(device, address,profileSamplingRate, profileFormat, profileChannelMask,profileFlags);
if (profile != 0) {
break; // success
} else if (profileFlags & AUDIO_INPUT_FLAG_RAW) {
profileFlags = (audio_input_flags_t) (profileFlags & ~AUDIO_INPUT_FLAG_RAW); // retry
} else if (profileFlags != AUDIO_INPUT_FLAG_NONE) {
profileFlags = AUDIO_INPUT_FLAG_NONE; // retry
} else { // fail
return input;
}
}
...
}
我们来看看getInputProfile
sp AudioPolicyManager::getInputProfile(audio_devices_t device,const String8& address,uint32_t& samplingRate,audio_format_t& format,audio_channel_mask_t& channelMask,audio_input_flags_t flags)
{
// Choose an input profile based on the requested capture parameters: select the first available
// profile supporting all requested parameters.
for (size_t i = 0; i < mHwModules.size(); i++)
{
if (mHwModules[i]->mHandle == 0) {
continue;
}
for (size_t j = 0; j < mHwModules[i]->mInputProfiles.size(); j++)
{
sp profile = mHwModules[i]->mInputProfiles[j];
// profile->log();
if (profile->isCompatibleProfile(/*一堆参数*/) {
return profile;
}
}
//恕老夫眼拙,没看出来和上面的for循环有什么区别?????
for (size_t j = 0; j < mHwModules[i]->mInputProfiles.size(); j++)
{
sp profile = mHwModules[i]->mInputProfiles[j];
// profile->log();
if (profile->isCompatibleProfile(/*一堆参数同上*/) {
return profile;
}
}
}
return NULL;
}
基于请求的capture参数,选择一个input profile,选中第一个可用的。
看了会儿相关代码,都要跟吐了。不过,我感觉基本就是改
audio_policy_configuration.xml(Android O新加入的)或者audio_policy.conf了,加入8.1声道的支持。比如这样:
相应的,audo-base也要改一下。
//audio-base.h
//这个值应该要和java定义的对应0x3fc
AUDIO_CHANNEL_IN_8 = 1020u
再后来,发现,读入的时候,需要在这里加一下,不然,没法识别
//libmedia/TypeConverter.cpp
template <>
const InputChannelConverter::Table InputChannelConverter::mTable[] = {
MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_MONO),
MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_STEREO),
MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_FRONT_BACK),
MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_6),
MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_VOICE_UPLINK_MONO),
MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_VOICE_DNLINK_MONO),
MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_VOICE_CALL_MONO),
MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_8),
TERMINATOR
};
因为:
//Serializer.cpp
status_t AudioProfileTraits::deserialize(_xmlDoc */*doc*/, const _xmlNode *root, PtrElement &profile,
PtrSerializingCtx /*serializingContext*/)
{
string samplingRates = getXmlAttribute(root, Attributes::samplingRates);
string format = getXmlAttribute(root, Attributes::format);
string channels = getXmlAttribute(root, Attributes::channelMasks);
profile = new Element(formatFromString(format, gDynamicFormat),
//这里
channelMasksFromString(channels, ","),
samplingRatesFromString(samplingRates, ","));
profile->setDynamicFormat(profile->getFormat() == gDynamicFormat);
profile->setDynamicChannels(profile->getChannels().isEmpty());
profile->setDynamicRate(profile->getSampleRates().isEmpty());
return NO_ERROR;
}
这样改完之后。就支持8channel录音了。当然了。。。如果tinyalsa的实现不支持,pcm_open的时候恐怕是要报错的。那就是另外一个话题了.