支持多通道录音

原生Android只支持2 channel的录音。可是偏偏会有多mic的需求,比如说语音识别。目前已知TDM协议可以将多mic数据从kernel送到hal,从内核空间搬运到用户空间中。可是原生AudioRecord接口是完全不支持多channel录音数据的采集的,怎么修改,才能让原生进行支持呢?

我们就从AudioRecord的构造函数开始往下研究。无论行不行,都要研究出个所以然来!​我们如果写个录音app,我们一般这么使用AudioRecord:


   
   
   
   
  1. int sampleRateInHz = 8000;
  2. int audioEncodingBits = AudioFormat.ENCODING_PCM_16BIT;
  3. int recordBufferSize = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfiguration, audioEncodingBits);
  4. mAudioRecord = new AudioRecord(MediaRecorder.AudioSource.MIC,
  5.    sampleRateInHz, channelConfiguration, audioEncodingBits,
  6.                    recordBufferSize);

先说AudioRecord构造函数最后一个参数recordBufferSize。来自:

getMinBufferSize


   
   
   
   
  1. //AudioRecord.java
  2. static public int getMinBufferSize(int sampleRateInHz, int channelConfig, int audioFormat) {
  3.        int channelCount = 0;
  4.   ...
  5.        //根据channelMask得出channelCount
  6.        //这里竟然有个6声道的,估计可以参考下
  7.        case AudioFormat.CHANNEL_IN_5POINT1:
  8.            channelCount = 6;
  9.       ...
  10.        int size = native_get_min_buff_size(sampleRateInHz, channelCount, audioFormat);
  11.   ...
  12. }

native_get_min_buff_size对应android_media_AudioRecord_get_min_buff_size:


   
   
   
   
  1. //android_media_AudioRecord.cpp
  2. static jint android_media_AudioRecord_get_min_buff_size(JNIEnv *env,  jobject thiz,jint sampleRateInHertz, jint channelCount, jint audioFormat) {
  3.    size_t frameCount = 0;
  4.    audio_format_t format = audioFormatToNative(audioFormat);
  5.    status_t result = AudioRecord::getMinFrameCount(&frameCount,
  6.            sampleRateInHertz,
  7.            format,
  8.            audio_channel_in_mask_from_count(channelCount));
  9.    return frameCount * channelCount * audio_bytes_per_sample(format);
  10. }

这里传入的format是AudioFormat.ENCODING_PCM_16BIT,根据audio_bytes_per_sample:


   
   
   
   
  1. //audio.h
  2. static inline size_t audio_bytes_per_sample(audio_format_t format)
  3. {
  4.   ...
  5.    case AUDIO_FORMAT_PCM_16_BIT:
  6.      case AUDIO_FORMAT_IEC61937:
  7.          size = sizeof(int16_t);
  8.   ...
  9. }

audio_bytes_per_sample返回的是sizeof(signed short) = 2.


   
   
   
   
  1. status_t AudioRecord::getMinFrameCount(  
  2.   size_t* frameCount,
  3.        uint32_t sampleRate,
  4.        audio_format_t format,
  5.        audio_channel_mask_t channelMask)
  6. {
  7.    status_t status = AudioSystem::getInputBufferSize(sampleRate, format, channelMask, &size);
  8.   ...
  9.     //这里需要double一下
  10.    // We double the size of input buffer for ping pong use of record buffer.
  11.    // Assumes audio_is_linear_pcm(format)
  12.    if ((*frameCount = (size * 2) / (audio_channel_count_from_in_mask(channelMask) *
  13.            audio_bytes_per_sample(format))) == 0) {
  14.        ALOGE("Unsupported configuration: sampleRate %u, format %#x, channelMask %#x",
  15.            sampleRate, format, channelMask);
  16.        return BAD_VALUE;
  17.   }
  18. }

getInputBufferSize直接看hal层:


   
   
   
   
  1. //audio_hw.c
  2. static size_t get_input_buffer_size(uint32_t sample_rate,
  3.                                    audio_format_t format,
  4.                                    int channel_count,
  5.                                    bool is_low_latency)
  6. {
  7.   ...
  8.        //这里是(8000*20)/1000
  9.        size = (sample_rate * AUDIO_CAPTURE_PERIOD_DURATION_MSEC) / 1000;
  10.   size *= sizeof(short) * channel_count;
  11.   ...
  12. }

size = (8000*20)/1000 * 2 * 2 = 640,get_input_buffer_size返回640.

目前这种场景getMinFrameCount取得frameCount = (640 *2) / (2 * 2) = 320

getMinBufferSize将返回320 * 2 * 2 = 1280,调用完构造函数之后,AudioRecord将通过audioBuffSizeCheck将这个值设置生效(函数名字是check,我觉得这个地方不太合理。)


   
   
   
   
  1. private void audioBuffSizeCheck(int audioBufferSize) throws IllegalArgumentException {                                                                                  
  2.        // NB: this section is only valid with PCM data.
  3.        // To update when supporting compressed formats
  4.   //只支持无压缩的pcm
  5.        int frameSizeInBytes = mChannelCount
  6.            * (AudioFormat.getBytesPerSample(mAudioFormat));
  7.   //检查用户设置的这个值是不是frameSizeInBytes的整数倍
  8.        if ((audioBufferSize % frameSizeInBytes != 0) || (audioBufferSize < 1)) {
  9.            throw new IllegalArgumentException("Invalid audio buffer size " + audioBufferSize
  10.                    + " (frame size " + frameSizeInBytes + ")");
  11.       }
  12. //存到这里。作为录音数据的buffer
  13.        mNativeBufferSizeInBytes = audioBufferSize;
  14.   }

然后,通过调用native_setup将值传入native层。


   
   
   
   
  1. //android_media_AudioRecord.cpp
  2. static jint
  3. android_media_AudioRecord_setup
  4. {
  5. ...
  6. size_t frameSize = channelCount * bytesPerSample;
  7. //这里还是上文说的320
  8.    size_t frameCount = buffSizeInBytes / frameSize;
  9.   ...
  10.    const status_t status = lpRecorder->set(
  11.   ...
  12.            frameCount
  13.           ...
  14.           );
  15. }

然后这个函数会调用AudioRecord set接口.


   
   
   
   
  1. //AudioRecord.cpp
  2. status_t AudioRecord::set(//参数省略)
  3. {
  4.   ...
  5.    //上层请求的frameCount
  6.    // mFrameCount is initialized in openRecord_l
  7.    mReqFrameCount = frameCount;
  8.   ...
  9.    size_t frameCount = mReqFrameCount;
  10.   ...
  11.    //temp有可能会被openRecord修订
  12.    size_t temp = frameCount;
  13.   ...
  14.    sp <IAudioRecord> record = audioFlinger->openRecord(
  15.   ...
  16.       &temp,
  17.       ...
  18.   );
  19. }

然后设置到AudioFlinger端:


   
   
   
   
  1. //services/audioflinger/Tracks.cpp
  2. AudioFlinger::PlaybackThread::Track::Track(/*省略参数*/)
  3. {
  4.   ...
  5.   if (sharedBuffer == 0) {
  6.        mAudioTrackServerProxy = new AudioTrackServerProxy(mCblk, mBuffer, frameCount,mFrameSize, !isExternalTrack(), sampleRate);
  7.   } else {
  8.        mAudioTrackServerProxy = new StaticAudioTrackServerProxy(mCblk, mBuffer, frameCount,mFrameSize);
  9.   }
  10.   ...
  11. }

本文的主题是研究多声道录音,所以先就此打住。

回到前文,如果需要支持多声道,需要看看第四个参数

channelConfiguration

取值范围只有这些:


   
   
   
   
  1. public static final int CHANNEL_IN_DEFAULT = 1;
  2.    // These directly match native
  3.    public static final int CHANNEL_IN_LEFT = 0x4;
  4.    public static final int CHANNEL_IN_RIGHT = 0x8;
  5.    public static final int CHANNEL_IN_FRONT = 0x10;
  6.    public static final int CHANNEL_IN_BACK = 0x20;
  7.    public static final int CHANNEL_IN_LEFT_PROCESSED = 0x40;
  8.    public static final int CHANNEL_IN_RIGHT_PROCESSED = 0x80;
  9.    public static final int CHANNEL_IN_FRONT_PROCESSED = 0x100;
  10.    public static final int CHANNEL_IN_BACK_PROCESSED = 0x200;
  11.    public static final int CHANNEL_IN_PRESSURE = 0x400;
  12.    public static final int CHANNEL_IN_X_AXIS = 0x800;
  13.    public static final int CHANNEL_IN_Y_AXIS = 0x1000;
  14.    public static final int CHANNEL_IN_Z_AXIS = 0x2000;
  15.    public static final int CHANNEL_IN_VOICE_UPLINK = 0x4000;
  16.    public static final int CHANNEL_IN_VOICE_DNLINK = 0x8000;
  17.    public static final int CHANNEL_IN_MONO = CHANNEL_IN_FRONT;
  18.    public static final int CHANNEL_IN_STEREO = (CHANNEL_IN_LEFT | CHANNEL_IN_RIGHT);
  19.    /** @hide */
  20.    public static final int CHANNEL_IN_FRONT_BACK = CHANNEL_IN_FRONT | CHANNEL_IN_BACK;
  21.    // CHANNEL_IN_ALL is not yet defined; if added then it should match AUDIO_CHANNEL_IN_ALL

刚开始没看明白为什么这么定义。直到看到了...往下看,后面会说


   
   
   
   
  1. //AudioRecord.java
  2. public AudioRecord(int audioSource, int sampleRateInHz, int channelConfig, int audioFormat,                                                                              
  3.            int bufferSizeInBytes)
  4.    throws IllegalArgumentException {
  5.   //调用另外一个重载的构造函数
  6.        this((new AudioAttributes.Builder())
  7.                   .setInternalCapturePreset(audioSource)
  8.                   .build(),
  9.               (new AudioFormat.Builder())       .setChannelMask(getChannelMaskFromLegacyConfig(channelConfig,
  10.                                        true/*allow legacy configurations*/))
  11.                   .setEncoding(audioFormat)
  12.                   .setSampleRate(sampleRateInHz)
  13.                   .build(),
  14.                bufferSizeInBytes,
  15.                AudioManager.AUDIO_SESSION_ID_GENERATE);
  16.   }

注意看这一行:


   
   
   
   
  1. .setChannelMask(getChannelMaskFromLegacyConfig(channelConfig,
  2.                                        true/*allow legacy configurations*/))

做一个兼容性转换。最终结果还是前面那些。关键是这里是Mask(中文叫掩码)。Android中有很多这种用法


   
   
   
   
  1. private static int getChannelMaskFromLegacyConfig(int inChannelConfig,
  2.            boolean allowLegacyConfig) {
  3.        int mask;
  4.        switch (inChannelConfig) {
  5.        case AudioFormat.CHANNEL_IN_DEFAULT: // AudioFormat.CHANNEL_CONFIGURATION_DEFAULT
  6.        case AudioFormat.CHANNEL_IN_MONO:
  7.        case AudioFormat.CHANNEL_CONFIGURATION_MONO:
  8.            mask = AudioFormat.CHANNEL_IN_MONO;
  9.            break;
  10.        case AudioFormat.CHANNEL_IN_STEREO:
  11.        case AudioFormat.CHANNEL_CONFIGURATION_STEREO:
  12.            mask = AudioFormat.CHANNEL_IN_STEREO;
  13.            break;
  14.        case (AudioFormat.CHANNEL_IN_FRONT | AudioFormat.CHANNEL_IN_BACK):
  15.            mask = inChannelConfig;
  16.            break;
  17.        default:
  18.            throw new IllegalArgumentException("Unsupported channel configuration.");
  19.       }
  20.        if (!allowLegacyConfig && ((inChannelConfig == AudioFormat.CHANNEL_CONFIGURATION_MONO)
  21.                || (inChannelConfig == AudioFormat.CHANNEL_CONFIGURATION_STEREO))) {
  22.            // only happens with the constructor that uses AudioAttributes and AudioFormat
  23.            throw new IllegalArgumentException("Unsupported deprecated configuration.");
  24.       }
  25.        return mask;
  26.   }

getChannelMaskFromLegacyConfig根本没对超过2个的声道就行处理。包括AudioFormat里的hide参数:


   
   
   
   
  1. /** @hide */
  2.    public static final int CHANNEL_IN_5POINT1 = (CHANNEL_IN_LEFT |
  3.            CHANNEL_IN_RIGHT | CHANNEL_IN_FRONT | CHANNEL_IN_BACK |
  4.            CHANNEL_IN_LEFT_PROCESSED | CHANNEL_IN_RIGHT_PROCESSED);

看了是打算先占个位置,将来会支持这种5.1声道的方式。那我们岂不是可以加上个同样的定义,比如说7.1声道:


   
   
   
   
  1. /** @hide */
  2.    public static final int CHANNEL_IN_7POINT1 = (CHANNEL_IN_LEFT |
  3.            CHANNEL_IN_RIGHT | CHANNEL_IN_FRONT | CHANNEL_IN_BACK |
  4.            CHANNEL_IN_LEFT_PROCESSED | CHANNEL_IN_RIGHT_PROCESSED|
  5.            CHANNEL_IN_FRONT_PROCESSED | CHANNEL_IN_BACK_PROCESSED);

虽然感觉不太对。对应的,getChannelMaskFromLegacyConfig就需要做添加,不然直接抛出IllegalArgumentException:


   
   
   
   
  1. //AudioRecord.java-getChannelMaskFromLegacyConfig
  2. case AudioFormat.CHANNEL_IN_7POINT1:
  3.            mask = AudioFormat.CHANNEL_IN_7POINT1:
  4.            break;

然后,计算bufferSize的地方也要修改:


   
   
   
   
  1. case AudioFormat.CHANNEL_IN_7POINT1:
  2.            channelCount = 8;

再往下,貌似是不会被参数检查所拦截了。

只有channelCount够了,frameCount才能对,bufferSize才能对应。不然数据就错乱了。因为声道数,格式等决定了每一帧所需要的缓存空间!

另外一个设置channel_count的地方:


   
   
   
   
  1. //hal/audio_hw.c
  2. struct pcm_config pcm_config_audio_capture = {
  3.   .channels = 2,
  4.   .period_count = AUDIO_CAPTURE_PERIOD_COUNT,
  5.   .format = PCM_FORMAT_S16_LE,
  6. };

普通录音场景给了个默认的2 channel.然后


   
   
   
   
  1. static int adev_open_input_stream(struct audio_hw_device *dev,
  2.                                  audio_io_handle_t handle,
  3.                                  audio_devices_t devices,
  4.                                  //注意这个:
  5.                                  struct audio_config *config,
  6.                                  struct audio_stream_in **stream_in,
  7.                                  audio_input_flags_t flags __unused,
  8.                                  const char *address __unused,
  9.                                  audio_source_t source)
  10. {
  11.   ...
  12.    in->config = pcm_config_audio_capture;//此时是默认值2
  13.   ...
  14.    //这里会取出应用层设置下来的channel_count
  15.    int channel_count = audio_channel_count_from_in_mask(config->channel_mask);
  16.    //如果应用设置的不是2.修改之
  17.    in->config.channels = channel_count;
  18.   ...
  19. }

这些都改完之后,全编译一把,然后:


   
   
   
   
  1. int channelConfiguration = AudioFormat.CHANNEL_IN_7POINT1;
  2. int audioEncodingBits = AudioFormat.ENCODING_PCM_16BIT;
  3. int sampleRateInHz = 8000;
  4. int recordBufferSize = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfiguration, audioEncodingBits);
  5. LogD("recordBufferSize = " + String.valueOf(recordBufferSize));

recordBufferSize = 5120(之前立体声1280的四倍)

表明,修改成功!Oh yeah!当然,这么修改之后,frameCount还是320(5120/(声道数×每个采样的字节数)).知道为啥定义帧的概念了吧。

稍等,好坑!


   
   
   
   
  1. AudioRecord: set(): inputSource 6, sampleRate 8000, format 0x1, channelMask 0x3fc
  2. ...
  3. audio_hw_primary: adev_open_input_stream: enter: sample_rate(8000) channel_mask(0xc)

看这log,AudioRecord:set之后,adev_open_input_stream之前会改变到这个channelMask.跟了下代码,发现是这里在搞怪:


   
   
   
   
  1. audio_io_handle_t AudioPolicyManager::getInputForDevice(
  2.   ...
  3.    audio_channel_mask_t channelMask,
  4.   ...)
  5. {
  6.   ...
  7.    audio_channel_mask_t profileChannelMask = channelMask;
  8.    for (;;) {
  9.        //就是这里
  10.        profile = getInputProfile(device, address,profileSamplingRate, profileFormat, profileChannelMask,profileFlags);
  11.        if (profile != 0) {
  12.            break; // success
  13.       } else if (profileFlags & AUDIO_INPUT_FLAG_RAW) {
  14.            profileFlags = (audio_input_flags_t) (profileFlags & ~AUDIO_INPUT_FLAG_RAW); // retry
  15.       } else if (profileFlags != AUDIO_INPUT_FLAG_NONE) {
  16.            profileFlags = AUDIO_INPUT_FLAG_NONE; // retry
  17.       } else { // fail
  18.            return input;
  19.       }
  20.   }
  21.   ...
  22. }

我们来看看getInputProfile


   
   
   
   
  1. sp <IOProfile> AudioPolicyManager::getInputProfile(audio_devices_t device,const String8& address,uint32_t& samplingRate,audio_format_t& format,audio_channel_mask_t& channelMask,audio_input_flags_t flags)
  2. {
  3.    // Choose an input profile based on the requested capture parameters: select the first available
  4.    // profile supporting all requested parameters.
  5.    for (size_t i = 0; i < mHwModules.size(); i++)
  6.   {
  7.        if (mHwModules[i]->mHandle == 0) {
  8.            continue;
  9.       }  
  10.        for (size_t j = 0; j < mHwModules[i]->mInputProfiles.size(); j++)
  11.       {
  12.            sp <IOProfile> profile = mHwModules[i]->mInputProfiles[j];
  13.            // profile->log();
  14.            if (profile->isCompatibleProfile(/*一堆参数*/) {
  15.                return profile;
  16.           }
  17.       }
  18. //恕老夫眼拙,没看出来和上面的for循环有什么区别?????
  19.        for (size_t j = 0; j < mHwModules[i]->mInputProfiles.size(); j++)
  20.       {
  21.            sp <IOProfile> profile = mHwModules[i]->mInputProfiles[j];
  22.            // profile->log();
  23.            if (profile->isCompatibleProfile(/*一堆参数同上*/) {
  24.                                              
  25.                return profile;
  26.           }  
  27.       }  
  28.   }  
  29.    return NULL;
  30. }

基于请求的capture参数,选择一个input profile,选中第一个可用的。

看了会儿相关代码,都要跟吐了。不过,我感觉基本就是改

audio_policy_configuration.xml(Android O新加入的)或者audio_policy.conf了,加入8.1声道的支持。比如这样:


   
   
   
   
  1. <mixPort name="primary input" role="sink">
  2.                     <profile name="" format="AUDIO_FORMAT_PCM_16_BIT"
  3.                             samplingRates= "8000,11025,12000,16000,22050,24000,32000,44100,48000"
  4.                             channelMasks= "AUDIO_CHANNEL_IN_MONO,AUDIO_CHANNEL_IN_STEREO,AUDIO_CHANNEL_IN_FRONT_BACK,AUDIO_CHANNEL_IN_8"/>

相应的,audo-base也要改一下。


   
   
   
   
  1. //audio-base.h
  2. //这个值应该要和java定义的对应0x3fc
  3. AUDIO_CHANNEL_IN_8 = 1020u

再后来,发现,读入的时候,需要在这里加一下,不然,没法识别


   
   
   
   
  1. //libmedia/TypeConverter.cpp
  2. template <>
  3. const InputChannelConverter::Table InputChannelConverter::mTable[] = {
  4.    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_MONO),
  5.    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_STEREO),
  6.    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_FRONT_BACK),
  7.    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_6),
  8.    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_VOICE_UPLINK_MONO),
  9.    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_VOICE_DNLINK_MONO),
  10.    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_VOICE_CALL_MONO),
  11.    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_8),
  12.    TERMINATOR
  13. };

因为:


   
   
   
   
  1. //Serializer.cpp
  2. status_t AudioProfileTraits::deserialize(_xmlDoc */*doc*/, const _xmlNode *root, PtrElement &profile,
  3.                                         PtrSerializingCtx /*serializingContext*/)
  4. {
  5.    string samplingRates = getXmlAttribute(root, Attributes::samplingRates);
  6.    string format = getXmlAttribute(root, Attributes::format);
  7.    string channels = getXmlAttribute(root, Attributes::channelMasks);
  8.    profile = new Element(formatFromString(format, gDynamicFormat),
  9.                          //这里
  10.                          channelMasksFromString(channels, ","),
  11.                          samplingRatesFromString(samplingRates, ","));
  12.    
  13.    profile->setDynamicFormat(profile->getFormat() == gDynamicFormat);
  14.    profile->setDynamicChannels(profile->getChannels().isEmpty());
  15.    profile->setDynamicRate(profile->getSampleRates().isEmpty());
  16.    
  17.    return NO_ERROR;
  18. }

这样改完之后。就支持8channel录音了。当然了。。。如果tinyalsa的实现不支持,pcm_open的时候恐怕是要报错的。那就是另外一个话题了.

你可能感兴趣的:(android-audio)