WebRTC :NetEQ源码分析

参考:  https://www.cnblogs.com/talkaudiodev/p/9142192.html
             https://www.cnblogs.com/talkaudiodev/p/9185067.html
             https://www.cnblogs.com/talkaudiodev/p/9231526.html
             https://www.cnblogs.com/talkaudiodev/p/9459010.html
             https://www.cnblogs.com/talkaudiodev/p/9501438.html

     随着WebRTC版本的不断更新,内部代码结构也有了很大的变化,但是对于底层的音视频引擎来讲,却没有什么太大的变化,因为引擎早很早之前就已经达到了一个非常成熟稳定的状态(早期的QQ音频就已经使用了GIPS的NetEQ)。读研期间我就研究过NetEQ中的一些理论,不过那时候研究的很浅,主要是通过查找各种资料来学习NetEQ,其实我也很奇怪,如此有大名鼎鼎的技术,并且已经开源了,为什么网上研究它的资料少之又少,除了西安电子大学吴江锐的那篇硕士论文,基本上就没有任何有价值的资料了。估计自己以后还会持续的研究NetEQ,所以准备一边研究一边记录。

      NetEQ是GIPS公司的核心音频引擎技术,后来GIPS公司被Google收购,这项技术落到了Google手里并随着WebRTC的开源一起被公之于众。NetEQ是从接收端来处理语音包的,主要的功能是抖动消除,丢包隐藏,在网络延迟大的时候降低丢包率,在网络条件好的时候减小时延。

      M66版本的WebRTC中NetEQ代码接口的定义在src/modules/audio_coding/neteq/include/neteq.h头文件中,而具体的实现是在src/modules/audio_coding/neteq/neteq_impl.cc文件中,neteq.h文件中定义了相当多的接口,但其实最重要的接口就只有两个,第一个是向NetEQ模块中插入解析过后的从网络中来到的RTP数据包,二是从NetEQ模块中取解码过后的pcm音频数据。所以其实我们可以很简单的将NetEQ看成是一个黑盒,我们往里面扔网络中接收到的RTP数据包,它会给我们吐出解码或者经过其它处理过后的pcm音频数据,然后我们拿去播放,至于中间的一些过程,例如抖动消除,解码,丢包隐藏,语音的拉伸和压缩以及它们之间如何配合,都是我们可以不用去关心的,其实也就是NetEQ真正的价值所在。

      neteq.h的中两个重要函数分别是InsertPacket()和GetAudio(),在neteq_impl.cc中的具体实现InsertPacketInternal()和GetAudioInternal(),与前面的塞包和取包的描述一一对应。下面主要介绍这两个函数的主要流程,这两个函数的流程也就体现了NetEQ整个处理语音包的流程。

int NetEqImpl::InsertPacketInternal(const RTPHeader& rtp_header,
                                    rtc::ArrayView payload,
                                    uint32_t receive_timestamp) {
  if (payload.empty()) {
    RTC_LOG_F(LS_ERROR) << "payload is empty";
    return kInvalidPointer;
  }
  PacketList packet_list;
  // Insert packet in a packet list.
  packet_list.push_back([&rtp_header, &payload] {
    // Convert to Packet.
    Packet packet;
    packet.payload_type = rtp_header.payloadType;
    packet.sequence_number = rtp_header.sequenceNumber;
    packet.timestamp = rtp_header.timestamp;

    packet.payload.SetData(payload.data(), payload.size());
    // Waiting time will be set upon inserting the packet in the buffer.
    RTC_DCHECK(!packet.waiting_time);
    return packet;
  }());

  bool update_sample_rate_and_channels =
      first_packet_ || (rtp_header.ssrc != ssrc_);
  
  if (update_sample_rate_and_channels) {
    // Reset timestamp scaling.
    timestamp_scaler_->Reset();
  }
//  RTC_LOG(LS_WARNING) << "external timestamp = " << rtp_header.timestamp;
  if (!decoder_database_->IsRed(rtp_header.payloadType)) {
    // Scale timestamp to internal domain (only for some codecs).
    timestamp_scaler_->ToInternal(&packet_list);
//    RTC_LOG(LS_WARNING) << "internal timestamp = " << packet_list.front().timestamp;
  }

  // Store these for later use, since the first packet may very well disappear
  // before we need these values.
  uint32_t main_timestamp = packet_list.front().timestamp;
  uint8_t main_payload_type = packet_list.front().payload_type;
  uint16_t main_sequence_number = packet_list.front().sequence_number;

  // Reinitialize NetEq if it's needed (changed SSRC or first call).
  if (update_sample_rate_and_channels) {
    // Note: |first_packet_| will be cleared further down in this method, once
    // the packet has been successfully inserted into the packet buffer.

    rtcp_.Init(rtp_header.sequenceNumber);

    // Flush the packet buffer and DTMF buffer.
    packet_buffer_->Flush();
    dtmf_buffer_->Flush();

    // Store new SSRC.
    ssrc_ = rtp_header.ssrc;

    // Update audio buffer timestamp.
    sync_buffer_->IncreaseEndTimestamp(main_timestamp - timestamp_);

    // Update codecs.
    timestamp_ = main_timestamp;
  }

  // Update RTCP statistics, only for regular packets.
  rtcp_.Update(rtp_header, receive_timestamp);

  if (nack_enabled_) {
    RTC_DCHECK(nack_);
    if (update_sample_rate_and_channels) {
      nack_->Reset();
    }
    nack_->UpdateLastReceivedPacket(rtp_header.sequenceNumber,
                                    rtp_header.timestamp);
  }

  // Check for RED payload type, and separate payloads into several packets.
  if (decoder_database_->IsRed(rtp_header.payloadType)) {
    if (!red_payload_splitter_->SplitRed(&packet_list)) {
      return kRedundancySplitError;
    }
    // Only accept a few RED payloads of the same type as the main data,
    // DTMF events and CNG.
    red_payload_splitter_->CheckRedPayloads(&packet_list, *decoder_database_);
  }

  // Check payload types.
  if (decoder_database_->CheckPayloadTypes(packet_list) ==
      DecoderDatabase::kDecoderNotFound) {
    return kUnknownRtpPayloadType;
  }

  RTC_DCHECK(!packet_list.empty());

  // Update main_timestamp, if new packets appear in the list
  // after RED splitting.
  if (decoder_database_->IsRed(rtp_header.payloadType)) {
    timestamp_scaler_->ToInternal(&packet_list);
    main_timestamp = packet_list.front().timestamp;
    main_payload_type = packet_list.front().payload_type;
    main_sequence_number = packet_list.front().sequence_number;
  }

  // Process DTMF payloads. Cycle through the list of packets, and pick out any
  // DTMF payloads found.
  PacketList::iterator it = packet_list.begin();
  while (it != packet_list.end()) {
    const Packet& current_packet = (*it);
    RTC_DCHECK(!current_packet.payload.empty());
    if (decoder_database_->IsDtmf(current_packet.payload_type)) {
      DtmfEvent event;
      int ret = DtmfBuffer::ParseEvent(current_packet.timestamp,
                                       current_packet.payload.data(),
                                       current_packet.payload.size(), &event);
      if (ret != DtmfBuffer::kOK) {
        return kDtmfParsingError;
      }
      if (dtmf_buffer_->InsertEvent(event) != DtmfBuffer::kOK) {
        return kDtmfInsertError;
      }
      it = packet_list.erase(it);
    } else {
      ++it;
    }
  }

  // Update bandwidth estimate, if the packet is not comfort noise.
  if (!packet_list.empty() &&
      !decoder_database_->IsComfortNoise(main_payload_type)) {
    // The list can be empty here if we got nothing but DTMF payloads.
    AudioDecoder* decoder = decoder_database_->GetDecoder(main_payload_type);
    RTC_DCHECK(decoder);  // Should always get a valid object, since we have
                          // already checked that the payload types are known.
    decoder->IncomingPacket(packet_list.front().payload.data(),
                            packet_list.front().payload.size(),
                            packet_list.front().sequence_number,
                            packet_list.front().timestamp, receive_timestamp);
  }

  PacketList parsed_packet_list;
  while (!packet_list.empty()) {
    Packet& packet = packet_list.front();
    const DecoderDatabase::DecoderInfo* info =
        decoder_database_->GetDecoderInfo(packet.payload_type);
    if (!info) {
      RTC_LOG(LS_WARNING) << "SplitAudio unknown payload type";
      return kUnknownRtpPayloadType;
    }

    if (info->IsComfortNoise()) {
      // Carry comfort noise packets along.
      parsed_packet_list.splice(parsed_packet_list.end(), packet_list,
                                packet_list.begin());
    } else {
      const auto sequence_number = packet.sequence_number;
      const auto payload_type = packet.payload_type;
      const Packet::Priority original_priority = packet.priority;
      auto packet_from_result = [&](AudioDecoder::ParseResult& result) {
        Packet new_packet;
        new_packet.sequence_number = sequence_number;
        new_packet.payload_type = payload_type;
        new_packet.timestamp = result.timestamp;
        new_packet.priority.codec_level = result.priority;
        new_packet.priority.red_level = original_priority.red_level;
        new_packet.frame = std::move(result.frame);
        return new_packet;
      };

      std::vector results =
          info->GetDecoder()->ParsePayload(std::move(packet.payload),
                                           packet.timestamp);
      if (results.empty()) {
        packet_list.pop_front();
      } else {
        bool first = true;
        for (auto& result : results) {
          RTC_DCHECK(result.frame);
          RTC_DCHECK_GE(result.priority, 0);
          if (first) {
            // Re-use the node and move it to parsed_packet_list.
            packet_list.front() = packet_from_result(result);
            parsed_packet_list.splice(parsed_packet_list.end(), packet_list,
                                      packet_list.begin());
            first = false;
          } else {
            parsed_packet_list.push_back(packet_from_result(result));
          }
        }
      }
    }
  }

  // Calculate the number of primary (non-FEC/RED) packets.
  const int number_of_primary_packets = std::count_if(
      parsed_packet_list.begin(), parsed_packet_list.end(),
      [](const Packet& in) { return in.priority.codec_level == 0; });

  // Insert packets in buffer.
  const int ret = packet_buffer_->InsertPacketList(
      &parsed_packet_list, *decoder_database_, ¤t_rtp_payload_type_,
      ¤t_cng_rtp_payload_type_, &stats_);
  if (ret == PacketBuffer::kFlushed) {
    // Reset DSP timestamp etc. if packet buffer flushed.
    new_codec_ = true;
    update_sample_rate_and_channels = true;
  } else if (ret != PacketBuffer::kOK) {
    return kOtherError;
  }

  if (first_packet_) {
    first_packet_ = false;
    // Update the codec on the next GetAudio call.
    new_codec_ = true;
  }

  if (current_rtp_payload_type_) {
    RTC_DCHECK(decoder_database_->GetDecoderInfo(*current_rtp_payload_type_))
        << "Payload type " << static_cast(*current_rtp_payload_type_)
        << " is unknown where it shouldn't be";
  }

  if (update_sample_rate_and_channels && !packet_buffer_->Empty()) {
    // We do not use |current_rtp_payload_type_| to |set payload_type|, but
    // get the next RTP header from |packet_buffer_| to obtain the payload type.
    // The reason for it is the following corner case. If NetEq receives a
    // CNG packet with a sample rate different than the current CNG then it
    // flushes its buffer, assuming send codec must have been changed. However,
    // payload type of the hypothetically new send codec is not known.
    const Packet* next_packet = packet_buffer_->PeekNextPacket();
    RTC_DCHECK(next_packet);
    const int payload_type = next_packet->payload_type;
    size_t channels = 1;
    if (!decoder_database_->IsComfortNoise(payload_type)) {
      AudioDecoder* decoder = decoder_database_->GetDecoder(payload_type);
      assert(decoder);  // Payloads are already checked to be valid.
      channels = decoder->Channels();
    }
    const DecoderDatabase::DecoderInfo* decoder_info =
        decoder_database_->GetDecoderInfo(payload_type);
    assert(decoder_info);
    if (decoder_info->SampleRateHz() != fs_hz_ ||
        channels != algorithm_buffer_->Channels()) {
      SetSampleRateAndChannels(decoder_info->SampleRateHz(), channels);
    }
    if (nack_enabled_) {
      RTC_DCHECK(nack_);
      // Update the sample rate even if the rate is not new, because of Reset().
      nack_->UpdateSampleRate(fs_hz_);
    }
  }

  // TODO(hlundin): Move this code to DelayManager class.
  const DecoderDatabase::DecoderInfo* dec_info =
      decoder_database_->GetDecoderInfo(main_payload_type);
  assert(dec_info);  // Already checked that the payload type is known.
  delay_manager_->LastDecodedWasCngOrDtmf(dec_info->IsComfortNoise() ||
                                          dec_info->IsDtmf());
  if (delay_manager_->last_pack_cng_or_dtmf() == 0) {
    // Calculate the total speech length carried in each packet.
    if (number_of_primary_packets > 0) {
      const size_t packet_length_samples =
          number_of_primary_packets * decoder_frame_length_;
      if (packet_length_samples != decision_logic_->packet_length_samples()) {
        decision_logic_->set_packet_length_samples(packet_length_samples);
        delay_manager_->SetPacketAudioLength(
            rtc::dchecked_cast((1000 * packet_length_samples) / fs_hz_));
      }
    }

    // Update statistics.
    if ((int32_t)(main_timestamp - timestamp_) >= 0 && !new_codec_) {
      // Only update statistics if incoming packet is not older than last played
      // out packet, and if new codec flag is not set.
      delay_manager_->Update(main_sequence_number, main_timestamp, fs_hz_);
    }
  } else if (delay_manager_->last_pack_cng_or_dtmf() == -1) {
    // This is first "normal" packet after CNG or DTMF.
    // Reset packet time counter and measure time until next packet,
    // but don't update statistics.
    delay_manager_->set_last_pack_cng_or_dtmf(0);
    delay_manager_->ResetPacketIatCount();
  }
  return 0;
}

 

你可能感兴趣的:(WebRTC,WebRTC)