WebRTC音频采集至发送流程(iOS版)

此文我自己看,所以乱七八糟没有整理,别人看不懂我不管

src/audio/audio_state.cc

void AudioState::AddSendingStream(webrtc::AudioSendStream* stream,
                                  int sample_rate_hz,
                                  size_t num_channels) {
  RTC_DCHECK(thread_checker_.CalledOnValidThread());
  auto& properties = sending_streams_[stream];
  properties.sample_rate_hz = sample_rate_hz;
  properties.num_channels = num_channels;
  UpdateAudioTransportWithSendingStreams();

  // Make sure recording is initialized; start recording if enabled.
  auto* adm = config_.audio_device_module.get();
  if (!adm->Recording()) {
    if (adm->InitRecording() == 0) {
      if (recording_enabled_) {
        adm->StartRecording();
      }
    } else {
      RTC_DLOG_F(LS_ERROR) << "Failed to initialize recording.";
    }
  }
}
src/media/engine/webrtcvoiceengine.cc

void WebRtcVoiceEngine::Init() {
...
#if defined(WEBRTC_INCLUDE_INTERNAL_AUDIO_DEVICE)
  // No ADM supplied? Create a default one.
  if (!adm_) {
    adm_ = webrtc::AudioDeviceModule::Create(
        webrtc::AudioDeviceModule::kPlatformDefaultAudio);
  }
#endif  // WEBRTC_INCLUDE_INTERNAL_AUDIO_DEVICE
  RTC_CHECK(adm());
  webrtc::adm_helpers::Init(adm());
  webrtc::apm_helpers::Init(apm());

  // Set up AudioState.
  {
    webrtc::AudioState::Config config;
    if (audio_mixer_) {
      config.audio_mixer = audio_mixer_;
    } else {
      config.audio_mixer = webrtc::AudioMixerImpl::Create();
    }
    config.audio_processing = apm_;
    config.audio_device_module = adm_;
    audio_state_ = webrtc::AudioState::Create(config);
  }

  // Connect the ADM to our audio path.
  adm()->RegisterAudioCallback(audio_state()->audio_transport());

  // Save the default AGC configuration settings. This must happen before
  // calling ApplyOptions or the default will be overwritten.
  default_agc_config_ = webrtc::apm_helpers::GetAgcConfig(apm());

  // Set default engine options.
  {
    AudioOptions options;
    options.echo_cancellation = true;
    options.auto_gain_control = true;
    options.noise_suppression = true;
    options.highpass_filter = true;
    options.stereo_swapping = false;
    options.audio_jitter_buffer_max_packets = 50;
    options.audio_jitter_buffer_fast_accelerate = false;
    options.audio_jitter_buffer_min_delay_ms = 0;
    options.typing_detection = true;
    options.experimental_agc = false;
    options.extended_filter_aec = false;
    options.delay_agnostic_aec = false;
    options.experimental_ns = false;
    options.residual_echo_detector = true;
    bool error = ApplyOptions(options);
    RTC_DCHECK(error);
  }

  initialized_ = true;
}

WebRtcVoiceEngine类必须定义了HAVE_WEBRTC_VOICE宏才会被用到。
adm_可以在WebRtcVoiceEngine的构造函数中由外部创建传进来,当adm_为空时且定义了WEBRTC_INCLUDE_INTERNAL_AUDIO_DEVICE宏时才使用内置的方法创建adm_

AudioLayer的类型:

  enum AudioLayer {
    kPlatformDefaultAudio = 0,
    kWindowsCoreAudio,
    kWindowsCoreAudio2,  // experimental
    kLinuxAlsaAudio,
    kLinuxPulseAudio,
    kAndroidJavaAudio,
    kAndroidOpenSLESAudio,
    kAndroidJavaInputAndOpenSLESOutputAudio,
    kAndroidAAudioAudio,
    kAndroidJavaInputAndAAudioOutputAudio,
    kDummyAudio,
  };

AudioDeviceModule::Create创建AudioDeviceModuleImpl

rtc::scoped_refptr AudioDeviceModule::Create(
    const AudioLayer audio_layer) {
  RTC_LOG(INFO) << __FUNCTION__;
  return AudioDeviceModule::CreateForTest(audio_layer);
}
rtc::scoped_refptr AudioDeviceModule::CreateForTest(
    const AudioLayer audio_layer) {
  RTC_LOG(INFO) << __FUNCTION__;

  // The "AudioDeviceModule::kWindowsCoreAudio2" audio layer has its own
  // dedicated factory method which should be used instead.
  if (audio_layer == AudioDeviceModule::kWindowsCoreAudio2) {
    RTC_LOG(LS_ERROR) << "Use the CreateWindowsCoreAudioAudioDeviceModule() "
                         "factory method instead for this option.";
    return nullptr;
  }

  // Create the generic reference counted (platform independent) implementation.
  rtc::scoped_refptr audioDevice(
      new rtc::RefCountedObject(audio_layer));

  // Ensure that the current platform is supported.
  if (audioDevice->CheckPlatform() == -1) {
    return nullptr;
  }

  // Create the platform-dependent implementation.
  if (audioDevice->CreatePlatformSpecificObjects() == -1) {
    return nullptr;
  }

  // Ensure that the generic audio buffer can communicate with the platform
  // specific parts.
  if (audioDevice->AttachAudioBuffer() == -1) {
    return nullptr;
  }

  return audioDevice;
}
src/modules/audio_device/audio_device_impl.cc
int32_t AudioDeviceModuleImpl::StartRecording() {
  RTC_LOG(INFO) << __FUNCTION__;
  CHECKinitialized_();
  if (Recording()) {
    return 0;
  }
  audio_device_buffer_.StartRecording();
  int32_t result = audio_device_->StartRecording();
  RTC_LOG(INFO) << "output: " << result;
  RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.StartRecordingSuccess",
                        static_cast(result == 0));
  return result;
}

audio_device_在各个平台创建不同的实现类,iOS平台是AudioDeviceIOS,但实际在iOS上的AudioDevice并不是这样创建出来的,因为iOS上adm_对象是上层创建再传进来的,所以这里的webrtc::AudioDeviceModule::Create并不会调用,下面解析adm_的创建。

WebRtcVoiceEngine的创建


回过头来看看WebRtcVoiceEngine是怎么创建的。

src/sdk/objc/api/peerconnection/RTCPeerConnectionFactory.mm

- (instancetype)
    initWithNativeAudioEncoderFactory:... {
...
    std::unique_ptr media_engine =
        cricket::WebRtcMediaEngineFactory::Create(audioDeviceModule,
                                                  audioEncoderFactory,
                                                  audioDecoderFactory,
                                                  std::move(videoEncoderFactory),
                                                  std::move(videoDecoderFactory),
                                                  nullptr,  // audio mixer
                                                  audioProcessingModule);

...
  return self;
#endif
}

- (rtc::scoped_refptr)audioDeviceModule {
#if defined(WEBRTC_IOS)
  return webrtc::CreateAudioDeviceModule();
#else
  return nullptr;
#endif
}

RTCPeerConnectionFactoryinitWithNativeAudioEncoderFactory时调用了WebRtcMediaEngineFactory::Create
这里的audioDeviceModule在iOS平台由webrtc::CreateAudioDeviceModule();创建,mac平台为nullptr由内部创建。

src/sdk/objc/native/api/audio_device_module.mm
namespace webrtc {

rtc::scoped_refptr CreateAudioDeviceModule() {
  RTC_LOG(INFO) << __FUNCTION__;
#if defined(WEBRTC_IOS)
  return new rtc::RefCountedObject();
#else
  RTC_LOG(LERROR)
      << "current platform is not supported => this module will self destruct!";
  return nullptr;
#endif
}
}

可以看出iOS版创建AudioDeviceModuleIOS,在其Init方法中创建AudioDeviceIOS

src/sdk/objc/native/src/audio/audio_device_module_ios.mm

  int32_t AudioDeviceModuleIOS::Init() {
    RTC_LOG(INFO) << __FUNCTION__;
    if (initialized_)
      return 0;

    audio_device_buffer_.reset(new webrtc::AudioDeviceBuffer());
    audio_device_.reset(new ios_adm::AudioDeviceIOS());
    RTC_CHECK(audio_device_);

    this->AttachAudioBuffer();

    AudioDeviceGeneric::InitStatus status = audio_device_->Init();
    RTC_HISTOGRAM_ENUMERATION(
        "WebRTC.Audio.InitializationResult", static_cast(status),
        static_cast(AudioDeviceGeneric::InitStatus::NUM_STATUSES));
    if (status != AudioDeviceGeneric::InitStatus::OK) {
      RTC_LOG(LS_ERROR) << "Audio device initialization failed.";
      return -1;
    }
    initialized_ = true;
    return 0;
  }

同时将AudioDeviceIOSAudioDeviceBuffer关联,录制的音频数据写入AudioDeviceBuffer
不过并不是直接将录制的音频直接写入AudioDeviceBuffer,中间经过FineAudioBuffer转了一手。
FineAudioBuffer将采集到的音频转换并且按10ms的帧率传递给AudioDeviceBuffer
FineAudioBuffer不仅用于采集音频时也可用于播放音频。

src/sdk/objc/native/src/audio/audio_device_ios.mm
  fine_audio_buffer_.reset(new FineAudioBuffer(audio_device_buffer_));

系统音频录制与回调


iOS音频录制采集使用AudioUnit,实现类VoiceProcessingAudioUnit,代码位于src/sdk/objc/native/src/audio/voice_processing_audio_unit.mm
系统采集到的音频数据回调到AudioDeviceIOS::OnDeliverRecordedData

src/sdk/objc/native/src/audio/audio_device_ios.mm

OSStatus AudioDeviceIOS::OnDeliverRecordedData(AudioUnitRenderActionFlags* flags,
                                               const AudioTimeStamp* time_stamp,
                                               UInt32 bus_number,
                                               UInt32 num_frames,
                                               AudioBufferList* /* io_data */) {
  RTC_DCHECK_RUN_ON(&io_thread_checker_);
  OSStatus result = noErr;
  // Simply return if recording is not enabled.
  if (!rtc::AtomicOps::AcquireLoad(&recording_)) return result;

  // Set the size of our own audio buffer and clear it first to avoid copying
  // in combination with potential reallocations.
  // On real iOS devices, the size will only be set once (at first callback).
  record_audio_buffer_.Clear();
  record_audio_buffer_.SetSize(num_frames);

  // Allocate AudioBuffers to be used as storage for the received audio.
  // The AudioBufferList structure works as a placeholder for the
  // AudioBuffer structure, which holds a pointer to the actual data buffer
  // in |record_audio_buffer_|. Recorded audio will be rendered into this memory
  // at each input callback when calling AudioUnitRender().
  AudioBufferList audio_buffer_list;
  audio_buffer_list.mNumberBuffers = 1;
  AudioBuffer* audio_buffer = &audio_buffer_list.mBuffers[0];
  audio_buffer->mNumberChannels = record_parameters_.channels();
  audio_buffer->mDataByteSize =
      record_audio_buffer_.size() * VoiceProcessingAudioUnit::kBytesPerSample;
  audio_buffer->mData = reinterpret_cast(record_audio_buffer_.data());

  // Obtain the recorded audio samples by initiating a rendering cycle.
  // Since it happens on the input bus, the |io_data| parameter is a reference
  // to the preallocated audio buffer list that the audio unit renders into.
  // We can make the audio unit provide a buffer instead in io_data, but we
  // currently just use our own.
  // TODO(henrika): should error handling be improved?
  result = audio_unit_->Render(flags, time_stamp, bus_number, num_frames, &audio_buffer_list);
  if (result != noErr) {
    RTCLogError(@"Failed to render audio.");
    return result;
  }

  // Get a pointer to the recorded audio and send it to the WebRTC ADB.
  // Use the FineAudioBuffer instance to convert between native buffer size
  // and the 10ms buffer size used by WebRTC.
  fine_audio_buffer_->DeliverRecordedData(record_audio_buffer_, kFixedRecordDelayEstimate);
  return noErr;
}

从上面代码可以看到,使用了自己的缓冲区AudioBufferList,真正的缓冲指针是record_audio_buffer_,然后调用audio_unit_->Render,使用AudioUnitRender将数据写入到WebRTC提供的数据缓冲区中:

src/sdk/objc/native/src/audio/voice_processing_audio_unit.mm

OSStatus VoiceProcessingAudioUnit::Render(AudioUnitRenderActionFlags* flags,
                                          const AudioTimeStamp* time_stamp,
                                          UInt32 output_bus_number,
                                          UInt32 num_frames,
                                          AudioBufferList* io_data) {
  RTC_DCHECK(vpio_unit_) << "Init() not called.";

  OSStatus result = AudioUnitRender(vpio_unit_, flags, time_stamp,
                                    output_bus_number, num_frames, io_data);
  if (result != noErr) {
    RTCLogError(@"Failed to render audio unit. Error=%ld", (long)result);
  }
  return result;
}

之后将record_audio_buffer_传递给FineAudioBuffer进行处理。

FineAudioBuffer


src/modules/audio_device/fine_audio_buffer.cc

void FineAudioBuffer::DeliverRecordedData(
    rtc::ArrayView audio_buffer,
    int record_delay_ms) {
  RTC_DCHECK(IsReadyForRecord());
  // Always append new data and grow the buffer when needed.
  record_buffer_.AppendData(audio_buffer.data(), audio_buffer.size());
  // Consume samples from buffer in chunks of 10ms until there is not
  // enough data left. The number of remaining samples in the cache is given by
  // the new size of the internal |record_buffer_|.
  const size_t num_elements_10ms =
      record_channels_ * record_samples_per_channel_10ms_;
  while (record_buffer_.size() >= num_elements_10ms) {
    audio_device_buffer_->SetRecordedBuffer(record_buffer_.data(),
                                            record_samples_per_channel_10ms_);
    audio_device_buffer_->SetVQEData(playout_delay_ms_, record_delay_ms);
    audio_device_buffer_->DeliverRecordedData();
    memmove(record_buffer_.data(), record_buffer_.data() + num_elements_10ms,
            (record_buffer_.size() - num_elements_10ms) * sizeof(int16_t));
    record_buffer_.SetSize(record_buffer_.size() - num_elements_10ms);
  }
}

FineAudioBuffer收到系统音频数据时追加到自己的record_buffer_缓冲区中,同时将缓冲区中所有10ms数据传递给AudioDeviceBuffer

src/modules/audio_device/audio_device_buffer.cc

int32_t AudioDeviceBuffer::DeliverRecordedData() {
  if (!audio_transport_cb_) {
    RTC_LOG(LS_WARNING) << "Invalid audio transport";
    return 0;
  }
  const size_t frames = rec_buffer_.size() / rec_channels_;
  const size_t bytes_per_frame = rec_channels_ * sizeof(int16_t);
  uint32_t new_mic_level_dummy = 0;
  uint32_t total_delay_ms = play_delay_ms_ + rec_delay_ms_;
  int32_t res = audio_transport_cb_->RecordedDataIsAvailable(
      rec_buffer_.data(), frames, bytes_per_frame, rec_channels_,
      rec_sample_rate_, total_delay_ms, 0, 0, typing_status_,
      new_mic_level_dummy);
  if (res == -1) {
    RTC_LOG(LS_ERROR) << "RecordedDataIsAvailable() failed";
  }
  return 0;
}

AudioDeviceBuffer收到数据后回调给AudioTransport::RecordedDataIsAvailable
AudioTransportAudioDeviceBuffer::RegisterAudioCallback注册:

void WebRtcVoiceEngine::Init() {
...
src/media/engine/webrtcvoiceengine.cc
void WebRtcVoiceEngine::Init() {
...
  // Connect the ADM to our audio path.
  adm()->RegisterAudioCallback(audio_state()->audio_transport());
...
}

audio_state()获取 的是audio_state_对象。audio_state_的创建:
audio_state_ = webrtc::AudioState::Create(config);

src/audio/audio_state.cc
AudioState::AudioState(const AudioState::Config& config)
    : config_(config),
      audio_transport_(config_.audio_mixer, config_.audio_processing.get()) {
  process_thread_checker_.DetachFromThread();
  RTC_DCHECK(config_.audio_mixer);
  RTC_DCHECK(config_.audio_device_module);
}

audio_transport_AudioTransportImpl实例。

AudioTransportImpl


AudioTransportImpl收到采集的音频数据后做了很多事情:

src/audio/audio_transport_impl.cc

// Not used in Chromium. Process captured audio and distribute to all sending
// streams, and try to do this at the lowest possible sample rate.
int32_t AudioTransportImpl::RecordedDataIsAvailable(
    const void* audio_data,
    const size_t number_of_frames,
    const size_t bytes_per_sample,
    const size_t number_of_channels,
    const uint32_t sample_rate,
    const uint32_t audio_delay_milliseconds,
    const int32_t /*clock_drift*/,
    const uint32_t /*volume*/,
    const bool key_pressed,
    uint32_t& /*new_mic_volume*/) {  // NOLINT: to avoid changing APIs
  RTC_DCHECK(audio_data);
  RTC_DCHECK_GE(number_of_channels, 1);
  RTC_DCHECK_LE(number_of_channels, 2);
  RTC_DCHECK_EQ(2 * number_of_channels, bytes_per_sample);
  RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz);
  // 100 = 1 second / data duration (10 ms).
  RTC_DCHECK_EQ(number_of_frames * 100, sample_rate);
  RTC_DCHECK_LE(bytes_per_sample * number_of_frames * number_of_channels,
                AudioFrame::kMaxDataSizeBytes);

  int send_sample_rate_hz = 0;
  size_t send_num_channels = 0;
  bool swap_stereo_channels = false;
  {
    rtc::CritScope lock(&capture_lock_);
    send_sample_rate_hz = send_sample_rate_hz_;
    send_num_channels = send_num_channels_;
    swap_stereo_channels = swap_stereo_channels_;
  }

  std::unique_ptr audio_frame(new AudioFrame());
  InitializeCaptureFrame(sample_rate, send_sample_rate_hz, number_of_channels,
                         send_num_channels, audio_frame.get());
  voe::RemixAndResample(static_cast(audio_data),
                        number_of_frames, number_of_channels, sample_rate,
                        &capture_resampler_, audio_frame.get());
  ProcessCaptureFrame(audio_delay_milliseconds, key_pressed,
                      swap_stereo_channels, audio_processing_,
                      audio_frame.get());

  // Typing detection (utilizes the APM/VAD decision). We let the VAD determine
  // if we're using this feature or not.
  // TODO(solenberg): is_enabled() takes a lock. Work around that.
  bool typing_detected = false;
  if (audio_processing_->voice_detection()->is_enabled()) {
    if (audio_frame->vad_activity_ != AudioFrame::kVadUnknown) {
      bool vad_active = audio_frame->vad_activity_ == AudioFrame::kVadActive;
      typing_detected = typing_detection_.Process(key_pressed, vad_active);
    }
  }

  // Measure audio level of speech after all processing.
  double sample_duration = static_cast(number_of_frames) / sample_rate;
  audio_level_.ComputeLevel(*audio_frame.get(), sample_duration);

  // Copy frame and push to each sending stream. The copy is required since an
  // encoding task will be posted internally to each stream.
  {
    rtc::CritScope lock(&capture_lock_);
    typing_noise_detected_ = typing_detected;

    RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0);
    if (!sending_streams_.empty()) {
      auto it = sending_streams_.begin();
      while (++it != sending_streams_.end()) {
        std::unique_ptr audio_frame_copy(new AudioFrame());
        audio_frame_copy->CopyFrom(*audio_frame.get());
        (*it)->SendAudioData(std::move(audio_frame_copy));
      }
      // Send the original frame to the first stream w/o copying.
      (*sending_streams_.begin())->SendAudioData(std::move(audio_frame));
    }
  }

  return 0;
}

voe::RemixAndResample先判断源声道数是否大于目标声道数,然后大于则降低到目标声道数,然后重采样转换到目标采样率。
最后源声道如果为单声道且目标声道为双声道则将音频数据转换到双声道。

经过重采样和声道转换后进行一下步更多的音频处理:

  ProcessCaptureFrame(audio_delay_milliseconds, key_pressed,
                      swap_stereo_channels, audio_processing_,
                      audio_frame.get());

audio_processing_config_.audio_processing,也是WebRtcVoiceEnginertc::scoped_refptr apm_
RTCPeerConnectionFactory中创建了(rtc::scoped_refptr)audioProcessingModule

    if (!audioProcessingModule) audioProcessingModule = webrtc::AudioProcessingBuilder().Create();

所以audio_processing_webrtc::AudioProcessingBuilder().Create();创建。

src/modules/audio_processing/audio_processing_impl.cc

AudioProcessing* AudioProcessingBuilder::Create(const webrtc::Config& config) {
  AudioProcessingImpl* apm = new rtc::RefCountedObject(
      config, std::move(capture_post_processing_),
      std::move(render_pre_processing_), std::move(echo_control_factory_),
      std::move(echo_detector_), std::move(capture_analyzer_));
  if (apm->Initialize() != AudioProcessing::kNoError) {
    delete apm;
    apm = nullptr;
  }
  return apm;
}

AudioProcessingImpl集合了众多音频处理模块。

ProcessCaptureFrame调用了AudioProcessingImpl::ProcessStream(AudioFrame* frame)

src/modules/audio_processing/audio_processing_impl.cc
int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
...
  capture_.capture_audio->DeinterleaveFrom(frame);
  RETURN_ON_ERR(ProcessCaptureStreamLocked());
  capture_.capture_audio->InterleaveTo(
      frame, submodule_states_.CaptureMultiBandProcessingActive() ||
                 submodule_states_.CaptureFullBandProcessingActive());

DeinterleaveFrom将音频按声道逐行扫描分离到capture_.capture_audio中。
ProcessCaptureStreamLocked()进行真正的音频处理,详细的过程参考另一篇文章WebRTC采集音频后的音频处理算法。
最后使用InterleaveTo将分离的音频再合并回来。

typing_detection_.Process检测处理键盘打字声音。

audio_level_.ComputeLevel计算音量。

最后SendAudioData阶段,先遍历sending_streams_除了第一个AudioSendStream,新建AudioFrame拷贝audio_frame数据,这里必须要拷贝,因为每个AudioSendStream都独立编码处理音频帧,而第一个AudioSendStream不需要拷贝数据直接将audio_frame提交给其处理。

ProcessCaptureStreamLocked音频处理


单独写了一篇文章:WebRTC采集音频后的音频处理算法

AudioSendStream


sending_streams_中存储的是AudioSendStream对象,AudioSendStreamWebRtcVoiceMediaChannel::WebRtcAudioSendStream创建并持有:

src/media/engine/webrtcvoiceengine.cc
WebRtcAudioSendStream(...){
...
    stream_ = call_->CreateAudioSendStream(config_);
...
}

WebRtcAudioSendStreamWebRtcVoiceMediaChannel::AddSendStream中创建。
AddSendStream被调用的流程如下:

iOS:RTCPeerConnection::setLocalDescription->
c++:
PeerConnection::SetLocalDescription->
PeerConnection::ApplyLocalDescription->
PeerConnection::UpdateSessionState->
PeerConnection::PushdownMediaDescription->
BaseChannel::SetLocalContent->
VoiceChannel::SetLocalContent_w->
BaseChannel::UpdateLocalStreams_w->
WebRtcVoiceMediaChannel::AddSendStream

AudioSendStream::SendAudioData真正发送是由voe::CreateChannelSend创建的ChannelSend调用ProcessAndEncodeAudio.

void ChannelSend::ProcessAndEncodeAudio(
    std::unique_ptr audio_frame) {
  RTC_DCHECK_RUNS_SERIALIZED(&audio_thread_race_checker_);
  // Avoid posting any new tasks if sending was already stopped in StopSend().
  rtc::CritScope cs(&encoder_queue_lock_);
  if (!encoder_queue_is_active_) {
    return;
  }
  // Profile time between when the audio frame is added to the task queue and
  // when the task is actually executed.
  audio_frame->UpdateProfileTimeStamp();
  encoder_queue_->PostTask(std::unique_ptr(
      new ProcessAndEncodeAudioTask(std::move(audio_frame), this)));
}

这儿先说一个比较主要的对象:Call。上面的encoder_queue_由其成员RtpTransportControllerSendInterface创建:transport_send_ptr_->GetWorkerQueue()Call最初由PeerConnectionFactory创建:

src/pc/peerconnectionfactory.cc

rtc::scoped_refptr
PeerConnectionFactory::CreatePeerConnection(
    const PeerConnectionInterface::RTCConfiguration& configuration,
    PeerConnectionDependencies dependencies) {
  RTC_DCHECK(signaling_thread_->IsCurrent());

  // Set internal defaults if optional dependencies are not set.
  if (!dependencies.cert_generator) {
    dependencies.cert_generator =
        absl::make_unique(signaling_thread_,
                                                        network_thread_);
  }
  if (!dependencies.allocator) {
    network_thread_->Invoke(RTC_FROM_HERE, [this, &configuration,
                                                  &dependencies]() {
      dependencies.allocator = absl::make_unique(
          default_network_manager_.get(), default_socket_factory_.get(),
          configuration.turn_customizer);
    });
  }

  // TODO(zstein): Once chromium injects its own AsyncResolverFactory, set
  // |dependencies.async_resolver_factory| to a new
  // |rtc::BasicAsyncResolverFactory| if no factory is provided.

  network_thread_->Invoke(
      RTC_FROM_HERE,
      rtc::Bind(&cricket::PortAllocator::SetNetworkIgnoreMask,
                dependencies.allocator.get(), options_.network_ignore_mask));

  std::unique_ptr event_log =
      worker_thread_->Invoke>(
          RTC_FROM_HERE,
          rtc::Bind(&PeerConnectionFactory::CreateRtcEventLog_w, this));

  std::unique_ptr call = worker_thread_->Invoke>(
      RTC_FROM_HERE,
      rtc::Bind(&PeerConnectionFactory::CreateCall_w, this, event_log.get()));

  rtc::scoped_refptr pc(
      new rtc::RefCountedObject(this, std::move(event_log),
                                                std::move(call)));
  ActionsBeforeInitializeForTesting(pc);
  if (!pc->Initialize(configuration, std::move(dependencies))) {
    return nullptr;
  }
  return PeerConnectionProxy::Create(signaling_thread(), pc);
}

成员 RtpTransportControllerSendInterface *transport_send_ptr_的创建:

Call* Call::Create(const Call::Config& config) {
  return new internal::Call(
      config, absl::make_unique(
                  Clock::GetRealTimeClock(), config.event_log,
                  config.network_controller_factory, config.bitrate_config));
}

src/call/rtp_transport_controller_send.cc:
RtpTransportControllerSend的构造函数中创建了线程:process_thread_(ProcessThread::Create("SendControllerThread"))
并且在process_thread_中注册了两个Module:

  process_thread_->RegisterModule(&pacer_, RTC_FROM_HERE);
  process_thread_->RegisterModule(send_side_cc_.get(), RTC_FROM_HERE);

process_thread_
process_thread_的实现类是ProcessThreadImpl,线程由平台线程类rtc::PlatformThread创建。
线程执行时循环回调:ProcessThreadImpl::Process()
ProcessThreadImpl::Process()遍历所有注册的module,每个moudule检测自己的定时器,当时间到达时执行module的Process方法。
同时ProcessThreadImpl::Process()还取出自己queue_中的所有task逐个执行task->Run(),执行完成后删除task。
然后根据modules的需等待的最小时间使用wake_up_.Wait挂起线程,等待超时或者WakeUpPostTaskRegisterModuleStop方法唤醒。

TaskQueue:
RtpTransportControllerSend的成员:rtc::TaskQueue task_queue_在每个平台都有不同的实现:
iOS平台由src/rtc_base/task_queue_gcd.cc实现,根据平台不同还有几个版本:
src/rtc_base/task_queue_win.cc
src/rtc_base/task_queue_libevent.cc
src/rtc_base/task_queue_stdlib.cc
总的来说,TaskQueue自己维护一个线程来处理自己队列中的任务。
iOS中直接使用gcd实现,内部的queue_是串行队列: queue_(dispatch_queue_create(queue_name, DISPATCH_QUEUE_SERIAL))PostTask时调用dispatch_async_f进行异步排队处理。

encoder_queue_->PostTask(std::unique_ptr(
      new ProcessAndEncodeAudioTask(std::move(audio_frame), this)));

当音频数据采集且处理过后,将AudioFrame扔到一个TaskQueue中逐个进行处理,处理方法为ProcessAndEncodeAudioOnTaskQueue

src/audio/channel_send.cc

void ChannelSend::ProcessAndEncodeAudioOnTaskQueue(AudioFrame* audio_input) {
  RTC_DCHECK_RUN_ON(encoder_queue_);
  RTC_DCHECK_GT(audio_input->samples_per_channel_, 0);
  RTC_DCHECK_LE(audio_input->num_channels_, 2);

  // Measure time between when the audio frame is added to the task queue and
  // when the task is actually executed. Goal is to keep track of unwanted
  // extra latency added by the task queue.
  RTC_HISTOGRAM_COUNTS_10000("WebRTC.Audio.EncodingTaskQueueLatencyMs",
                             audio_input->ElapsedProfileTimeMs());

  bool is_muted = InputMute();
  AudioFrameOperations::Mute(audio_input, previous_frame_muted_, is_muted);

  if (_includeAudioLevelIndication) {
    size_t length =
        audio_input->samples_per_channel_ * audio_input->num_channels_;
    RTC_CHECK_LE(length, AudioFrame::kMaxDataSizeBytes);
    if (is_muted && previous_frame_muted_) {
      rms_level_.AnalyzeMuted(length);
    } else {
      rms_level_.Analyze(
          rtc::ArrayView(audio_input->data(), length));
    }
  }
  previous_frame_muted_ = is_muted;

  // Add 10ms of raw (PCM) audio data to the encoder @ 32kHz.

  // The ACM resamples internally.
  audio_input->timestamp_ = _timeStamp;
  // This call will trigger AudioPacketizationCallback::SendData if encoding
  // is done and payload is ready for packetization and transmission.
  // Otherwise, it will return without invoking the callback.
  if (audio_coding_->Add10MsData(*audio_input) < 0) {
    RTC_DLOG(LS_ERROR) << "ACM::Add10MsData() failed.";
    return;
  }

  _timeStamp += static_cast(audio_input->samples_per_channel_);
}

audio_coding_是音频编码器,当编码完成后回调ChannelSend::SendData

int32_t ChannelSend::SendData(FrameType frameType,
                              uint8_t payloadType,
                              uint32_t timeStamp,
                              const uint8_t* payloadData,
                              size_t payloadSize,
                              const RTPFragmentationHeader* fragmentation) {
  RTC_DCHECK_RUN_ON(encoder_queue_);
  rtc::ArrayView payload(payloadData, payloadSize);

  if (media_transport() != nullptr) {
    return SendMediaTransportAudio(frameType, payloadType, timeStamp, payload,
                                   fragmentation);
  } else {
    return SendRtpAudio(frameType, payloadType, timeStamp, payload,
                        fragmentation);
  }
}

追寻media_transport() 的创建至最初,发现是由PeerConnectionInterface::use_media_transport决定:

src/api/peerconnectioninterface.h
PeerConnectionInterface::
// If MediaTransportFactory is provided in PeerConnectionFactory, this flag
    // informs PeerConnection that it should use the MediaTransportInterface.
    // It's invalid to set it to |true| if the MediaTransportFactory wasn't
    // provided.
    bool use_media_transport = false;

WebRTC默认使用use_media_transport = false创建PeerConnection,所以在默认设置下media_transport()为null,因此ChannelSend::SendData执行SendRtpAudio
SendRtpAudio将编码后的数据进行RTP打包,经SRTP加密后发送。
_rtpRtcpModule的实现类:
src/modules/rtp_rtcp/source/rtp_rtcp_impl.cc: ModuleRtpRtcpImpl

你可能感兴趣的:(WebRTC音频采集至发送流程(iOS版))