原生Android只支持2 channel的录音。可是偏偏会有多mic的需求,比如说语音识别。目前已知TDM协议可以将多mic数据从kernel送到hal,从内核空间搬运到用户空间中。可是原生AudioRecord接口是完全不支持多channel录音数据的采集的,怎么修改,才能让原生进行支持呢?
我们就从AudioRecord的构造函数开始往下研究。无论行不行,都要研究出个所以然来!我们如果写个录音app,我们一般这么使用AudioRecord:
int sampleRateInHz = 8000; int audioEncodingBits = AudioFormat.ENCODING_PCM_16BIT; int recordBufferSize = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfiguration, audioEncodingBits); mAudioRecord = new AudioRecord(MediaRecorder.AudioSource.MIC, sampleRateInHz, channelConfiguration, audioEncodingBits, recordBufferSize);
先说AudioRecord构造函数最后一个参数recordBufferSize。来自:
getMinBufferSize
//AudioRecord.java static public int getMinBufferSize(int sampleRateInHz, int channelConfig, int audioFormat) { int channelCount = 0; ... //根据channelMask得出channelCount //这里竟然有个6声道的,估计可以参考下 case AudioFormat.CHANNEL_IN_5POINT1: channelCount = 6; ... int size = native_get_min_buff_size(sampleRateInHz, channelCount, audioFormat); ... }
native_get_min_buff_size对应android_media_AudioRecord_get_min_buff_size:
//android_media_AudioRecord.cpp static jint android_media_AudioRecord_get_min_buff_size(JNIEnv *env, jobject thiz,jint sampleRateInHertz, jint channelCount, jint audioFormat) { size_t frameCount = 0; audio_format_t format = audioFormatToNative(audioFormat); status_t result = AudioRecord::getMinFrameCount(&frameCount, sampleRateInHertz, format, audio_channel_in_mask_from_count(channelCount)); return frameCount * channelCount * audio_bytes_per_sample(format); }
这里传入的format是AudioFormat.ENCODING_PCM_16BIT,根据audio_bytes_per_sample:
//audio.h static inline size_t audio_bytes_per_sample(audio_format_t format) { ... case AUDIO_FORMAT_PCM_16_BIT: case AUDIO_FORMAT_IEC61937: size = sizeof(int16_t); ... }
audio_bytes_per_sample返回的是sizeof(signed short) = 2.
status_t AudioRecord::getMinFrameCount( size_t* frameCount, uint32_t sampleRate, audio_format_t format, audio_channel_mask_t channelMask) { status_t status = AudioSystem::getInputBufferSize(sampleRate, format, channelMask, &size); ... //这里需要double一下 // We double the size of input buffer for ping pong use of record buffer. // Assumes audio_is_linear_pcm(format) if ((*frameCount = (size * 2) / (audio_channel_count_from_in_mask(channelMask) * audio_bytes_per_sample(format))) == 0) { ALOGE("Unsupported configuration: sampleRate %u, format %#x, channelMask %#x", sampleRate, format, channelMask); return BAD_VALUE; } }
getInputBufferSize直接看hal层:
//audio_hw.c static size_t get_input_buffer_size(uint32_t sample_rate, audio_format_t format, int channel_count, bool is_low_latency) { ... //这里是(8000*20)/1000 size = (sample_rate * AUDIO_CAPTURE_PERIOD_DURATION_MSEC) / 1000; size *= sizeof(short) * channel_count; ... }
size = (8000*20)/1000 * 2 * 2 = 640,get_input_buffer_size返回640.
目前这种场景getMinFrameCount取得frameCount = (640 *2) / (2 * 2) = 320
getMinBufferSize将返回320 * 2 * 2 = 1280,调用完构造函数之后,AudioRecord将通过audioBuffSizeCheck将这个值设置生效(函数名字是check,我觉得这个地方不太合理。)
-
private void audioBuffSizeCheck(int audioBufferSize) throws IllegalArgumentException {
-
// NB: this section is only valid with PCM data.
-
// To update when supporting compressed formats
-
//只支持无压缩的pcm
-
int frameSizeInBytes = mChannelCount
-
* (AudioFormat.getBytesPerSample(mAudioFormat));
-
//检查用户设置的这个值是不是frameSizeInBytes的整数倍
-
if ((audioBufferSize % frameSizeInBytes != 0) || (audioBufferSize
< 1)) {
-
throw new IllegalArgumentException("Invalid audio buffer size " + audioBufferSize
-
+ " (frame size " + frameSizeInBytes + ")");
-
}
-
//存到这里。作为录音数据的buffer
-
mNativeBufferSizeInBytes = audioBufferSize;
-
}
然后,通过调用native_setup将值传入native层。
//android_media_AudioRecord.cpp static jint android_media_AudioRecord_setup { ... size_t frameSize = channelCount * bytesPerSample; //这里还是上文说的320 size_t frameCount = buffSizeInBytes / frameSize; ... const status_t status = lpRecorder->set( ... frameCount ... ); }
然后这个函数会调用AudioRecord set接口.
-
//AudioRecord.cpp
-
status_t AudioRecord::set(//参数省略)
-
{
-
...
-
//上层请求的frameCount
-
// mFrameCount is initialized in openRecord_l
-
mReqFrameCount = frameCount;
-
...
-
size_t frameCount = mReqFrameCount;
-
...
-
//temp有可能会被openRecord修订
-
size_t temp = frameCount;
-
...
-
sp
<IAudioRecord> record = audioFlinger->openRecord(
-
...
-
&temp,
-
...
-
);
-
}
然后设置到AudioFlinger端:
//services/audioflinger/Tracks.cpp AudioFlinger::PlaybackThread::Track::Track(/*省略参数*/) { ... if (sharedBuffer == 0) { mAudioTrackServerProxy = new AudioTrackServerProxy(mCblk, mBuffer, frameCount,mFrameSize, !isExternalTrack(), sampleRate); } else { mAudioTrackServerProxy = new StaticAudioTrackServerProxy(mCblk, mBuffer, frameCount,mFrameSize); } ... }
本文的主题是研究多声道录音,所以先就此打住。
回到前文,如果需要支持多声道,需要看看第四个参数
channelConfiguration
取值范围只有这些:
public static final int CHANNEL_IN_DEFAULT = 1; // These directly match native public static final int CHANNEL_IN_LEFT = 0x4; public static final int CHANNEL_IN_RIGHT = 0x8; public static final int CHANNEL_IN_FRONT = 0x10; public static final int CHANNEL_IN_BACK = 0x20; public static final int CHANNEL_IN_LEFT_PROCESSED = 0x40; public static final int CHANNEL_IN_RIGHT_PROCESSED = 0x80; public static final int CHANNEL_IN_FRONT_PROCESSED = 0x100; public static final int CHANNEL_IN_BACK_PROCESSED = 0x200; public static final int CHANNEL_IN_PRESSURE = 0x400; public static final int CHANNEL_IN_X_AXIS = 0x800; public static final int CHANNEL_IN_Y_AXIS = 0x1000; public static final int CHANNEL_IN_Z_AXIS = 0x2000; public static final int CHANNEL_IN_VOICE_UPLINK = 0x4000; public static final int CHANNEL_IN_VOICE_DNLINK = 0x8000; public static final int CHANNEL_IN_MONO = CHANNEL_IN_FRONT; public static final int CHANNEL_IN_STEREO = (CHANNEL_IN_LEFT | CHANNEL_IN_RIGHT); /** @hide */ public static final int CHANNEL_IN_FRONT_BACK = CHANNEL_IN_FRONT | CHANNEL_IN_BACK; // CHANNEL_IN_ALL is not yet defined; if added then it should match AUDIO_CHANNEL_IN_ALL
刚开始没看明白为什么这么定义。直到看到了...往下看,后面会说
//AudioRecord.java public AudioRecord(int audioSource, int sampleRateInHz, int channelConfig, int audioFormat, int bufferSizeInBytes) throws IllegalArgumentException { //调用另外一个重载的构造函数 this((new AudioAttributes.Builder()) .setInternalCapturePreset(audioSource) .build(), (new AudioFormat.Builder()) .setChannelMask(getChannelMaskFromLegacyConfig(channelConfig, true/*allow legacy configurations*/)) .setEncoding(audioFormat) .setSampleRate(sampleRateInHz) .build(), bufferSizeInBytes, AudioManager.AUDIO_SESSION_ID_GENERATE); }
注意看这一行:
.setChannelMask(getChannelMaskFromLegacyConfig(channelConfig, true/*allow legacy configurations*/))
做一个兼容性转换。最终结果还是前面那些。关键是这里是Mask(中文叫掩码)。Android中有很多这种用法
private static int getChannelMaskFromLegacyConfig(int inChannelConfig, boolean allowLegacyConfig) { int mask; switch (inChannelConfig) { case AudioFormat.CHANNEL_IN_DEFAULT: // AudioFormat.CHANNEL_CONFIGURATION_DEFAULT case AudioFormat.CHANNEL_IN_MONO: case AudioFormat.CHANNEL_CONFIGURATION_MONO: mask = AudioFormat.CHANNEL_IN_MONO; break; case AudioFormat.CHANNEL_IN_STEREO: case AudioFormat.CHANNEL_CONFIGURATION_STEREO: mask = AudioFormat.CHANNEL_IN_STEREO; break; case (AudioFormat.CHANNEL_IN_FRONT | AudioFormat.CHANNEL_IN_BACK): mask = inChannelConfig; break; default: throw new IllegalArgumentException("Unsupported channel configuration."); } if (!allowLegacyConfig && ((inChannelConfig == AudioFormat.CHANNEL_CONFIGURATION_MONO) || (inChannelConfig == AudioFormat.CHANNEL_CONFIGURATION_STEREO))) { // only happens with the constructor that uses AudioAttributes and AudioFormat throw new IllegalArgumentException("Unsupported deprecated configuration."); } return mask; }
getChannelMaskFromLegacyConfig根本没对超过2个的声道就行处理。包括AudioFormat里的hide参数:
/** @hide */ public static final int CHANNEL_IN_5POINT1 = (CHANNEL_IN_LEFT | CHANNEL_IN_RIGHT | CHANNEL_IN_FRONT | CHANNEL_IN_BACK | CHANNEL_IN_LEFT_PROCESSED | CHANNEL_IN_RIGHT_PROCESSED);
看了是打算先占个位置,将来会支持这种5.1声道的方式。那我们岂不是可以加上个同样的定义,比如说7.1声道:
/** @hide */ public static final int CHANNEL_IN_7POINT1 = (CHANNEL_IN_LEFT | CHANNEL_IN_RIGHT | CHANNEL_IN_FRONT | CHANNEL_IN_BACK | CHANNEL_IN_LEFT_PROCESSED | CHANNEL_IN_RIGHT_PROCESSED| CHANNEL_IN_FRONT_PROCESSED | CHANNEL_IN_BACK_PROCESSED);
虽然感觉不太对。对应的,getChannelMaskFromLegacyConfig就需要做添加,不然直接抛出IllegalArgumentException:
//AudioRecord.java-getChannelMaskFromLegacyConfig case AudioFormat.CHANNEL_IN_7POINT1: mask = AudioFormat.CHANNEL_IN_7POINT1: break;
然后,计算bufferSize的地方也要修改:
case AudioFormat.CHANNEL_IN_7POINT1: channelCount = 8;
再往下,貌似是不会被参数检查所拦截了。
只有channelCount够了,frameCount才能对,bufferSize才能对应。不然数据就错乱了。因为声道数,格式等决定了每一帧所需要的缓存空间!
另外一个设置channel_count的地方:
//hal/audio_hw.c struct pcm_config pcm_config_audio_capture = { .channels = 2, .period_count = AUDIO_CAPTURE_PERIOD_COUNT, .format = PCM_FORMAT_S16_LE, };
普通录音场景给了个默认的2 channel.然后
static int adev_open_input_stream(struct audio_hw_device *dev, audio_io_handle_t handle, audio_devices_t devices, //注意这个: struct audio_config *config, struct audio_stream_in **stream_in, audio_input_flags_t flags __unused, const char *address __unused, audio_source_t source) { ... in->config = pcm_config_audio_capture;//此时是默认值2 ... //这里会取出应用层设置下来的channel_count int channel_count = audio_channel_count_from_in_mask(config->channel_mask); //如果应用设置的不是2.修改之 in->config.channels = channel_count; ... }
这些都改完之后,全编译一把,然后:
int channelConfiguration = AudioFormat.CHANNEL_IN_7POINT1; int audioEncodingBits = AudioFormat.ENCODING_PCM_16BIT; int sampleRateInHz = 8000; int recordBufferSize = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfiguration, audioEncodingBits); LogD("recordBufferSize = " + String.valueOf(recordBufferSize));
recordBufferSize = 5120(之前立体声1280的四倍)
表明,修改成功!Oh yeah!当然,这么修改之后,frameCount还是320(5120/(声道数×每个采样的字节数)).知道为啥定义帧的概念了吧。
稍等,好坑!
AudioRecord: set(): inputSource 6, sampleRate 8000, format 0x1, channelMask 0x3fc ... audio_hw_primary: adev_open_input_stream: enter: sample_rate(8000) channel_mask(0xc)
看这log,AudioRecord:set之后,adev_open_input_stream之前会改变到这个channelMask.跟了下代码,发现是这里在搞怪:
audio_io_handle_t AudioPolicyManager::getInputForDevice( ... audio_channel_mask_t channelMask, ...) { ... audio_channel_mask_t profileChannelMask = channelMask; for (;;) { //就是这里 profile = getInputProfile(device, address,profileSamplingRate, profileFormat, profileChannelMask,profileFlags); if (profile != 0) { break; // success } else if (profileFlags & AUDIO_INPUT_FLAG_RAW) { profileFlags = (audio_input_flags_t) (profileFlags & ~AUDIO_INPUT_FLAG_RAW); // retry } else if (profileFlags != AUDIO_INPUT_FLAG_NONE) { profileFlags = AUDIO_INPUT_FLAG_NONE; // retry } else { // fail return input; } } ... }
我们来看看getInputProfile
-
sp
<IOProfile> AudioPolicyManager::getInputProfile(audio_devices_t device,const String8& address,uint32_t& samplingRate,audio_format_t& format,audio_channel_mask_t& channelMask,audio_input_flags_t flags)
-
{
-
// Choose an input profile based on the requested capture parameters: select the first available
-
// profile supporting all requested parameters.
-
for (size_t i = 0; i
< mHwModules.size(); i++)
-
{
-
if (mHwModules[i]->mHandle == 0) {
-
continue;
-
}
-
for (size_t j = 0; j
< mHwModules[i]->mInputProfiles.size(); j++)
-
{
-
sp
<IOProfile> profile = mHwModules[i]->mInputProfiles[j];
-
// profile->log();
-
if (profile->isCompatibleProfile(/*一堆参数*/) {
-
-
return profile;
-
}
-
}
-
//恕老夫眼拙,没看出来和上面的for循环有什么区别?????
-
for (size_t j = 0; j
< mHwModules[i]->mInputProfiles.size(); j++)
-
{
-
sp
<IOProfile> profile = mHwModules[i]->mInputProfiles[j];
-
// profile->log();
-
if (profile->isCompatibleProfile(/*一堆参数同上*/) {
-
-
return profile;
-
}
-
}
-
}
-
return NULL;
-
}
基于请求的capture参数,选择一个input profile,选中第一个可用的。
看了会儿相关代码,都要跟吐了。不过,我感觉基本就是改
audio_policy_configuration.xml(Android O新加入的)或者audio_policy.conf了,加入8.1声道的支持。比如这样:
-
<mixPort name="primary input" role="sink">
-
<profile name="" format="AUDIO_FORMAT_PCM_16_BIT"
-
samplingRates=
"8000,11025,12000,16000,22050,24000,32000,44100,48000"
-
channelMasks=
"AUDIO_CHANNEL_IN_MONO,AUDIO_CHANNEL_IN_STEREO,AUDIO_CHANNEL_IN_FRONT_BACK,AUDIO_CHANNEL_IN_8"/>
相应的,audo-base也要改一下。
//audio-base.h //这个值应该要和java定义的对应0x3fc AUDIO_CHANNEL_IN_8 = 1020u
再后来,发现,读入的时候,需要在这里加一下,不然,没法识别
-
//libmedia/TypeConverter.cpp
-
template
<>
-
const InputChannelConverter::Table InputChannelConverter::mTable[] = {
-
MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_MONO),
-
MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_STEREO),
-
MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_FRONT_BACK),
-
MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_6),
-
MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_VOICE_UPLINK_MONO),
-
MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_VOICE_DNLINK_MONO),
-
MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_VOICE_CALL_MONO),
-
MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_8),
-
TERMINATOR
-
};
因为:
//Serializer.cpp status_t AudioProfileTraits::deserialize(_xmlDoc */*doc*/, const _xmlNode *root, PtrElement &profile, PtrSerializingCtx /*serializingContext*/) { string samplingRates = getXmlAttribute(root, Attributes::samplingRates); string format = getXmlAttribute(root, Attributes::format); string channels = getXmlAttribute(root, Attributes::channelMasks); profile = new Element(formatFromString(format, gDynamicFormat), //这里 channelMasksFromString(channels, ","), samplingRatesFromString(samplingRates, ",")); profile->setDynamicFormat(profile->getFormat() == gDynamicFormat); profile->setDynamicChannels(profile->getChannels().isEmpty()); profile->setDynamicRate(profile->getSampleRates().isEmpty()); return NO_ERROR; }
这样改完之后。就支持8channel录音了。当然了。。。如果tinyalsa的实现不支持,pcm_open的时候恐怕是要报错的。那就是另外一个话题了.