参考: https://www.cnblogs.com/talkaudiodev/p/9142192.html
https://www.cnblogs.com/talkaudiodev/p/9185067.html
https://www.cnblogs.com/talkaudiodev/p/9231526.html
https://www.cnblogs.com/talkaudiodev/p/9459010.html
https://www.cnblogs.com/talkaudiodev/p/9501438.html
随着WebRTC版本的不断更新,内部代码结构也有了很大的变化,但是对于底层的音视频引擎来讲,却没有什么太大的变化,因为引擎早很早之前就已经达到了一个非常成熟稳定的状态(早期的QQ音频就已经使用了GIPS的NetEQ)。读研期间我就研究过NetEQ中的一些理论,不过那时候研究的很浅,主要是通过查找各种资料来学习NetEQ,其实我也很奇怪,如此有大名鼎鼎的技术,并且已经开源了,为什么网上研究它的资料少之又少,除了西安电子大学吴江锐的那篇硕士论文,基本上就没有任何有价值的资料了。估计自己以后还会持续的研究NetEQ,所以准备一边研究一边记录。
NetEQ是GIPS公司的核心音频引擎技术,后来GIPS公司被Google收购,这项技术落到了Google手里并随着WebRTC的开源一起被公之于众。NetEQ是从接收端来处理语音包的,主要的功能是抖动消除,丢包隐藏,在网络延迟大的时候降低丢包率,在网络条件好的时候减小时延。
M66版本的WebRTC中NetEQ代码接口的定义在src/modules/audio_coding/neteq/include/neteq.h头文件中,而具体的实现是在src/modules/audio_coding/neteq/neteq_impl.cc文件中,neteq.h文件中定义了相当多的接口,但其实最重要的接口就只有两个,第一个是向NetEQ模块中插入解析过后的从网络中来到的RTP数据包,二是从NetEQ模块中取解码过后的pcm音频数据。所以其实我们可以很简单的将NetEQ看成是一个黑盒,我们往里面扔网络中接收到的RTP数据包,它会给我们吐出解码或者经过其它处理过后的pcm音频数据,然后我们拿去播放,至于中间的一些过程,例如抖动消除,解码,丢包隐藏,语音的拉伸和压缩以及它们之间如何配合,都是我们可以不用去关心的,其实也就是NetEQ真正的价值所在。
neteq.h的中两个重要函数分别是InsertPacket()和GetAudio(),在neteq_impl.cc中的具体实现InsertPacketInternal()和GetAudioInternal(),与前面的塞包和取包的描述一一对应。下面主要介绍这两个函数的主要流程,这两个函数的流程也就体现了NetEQ整个处理语音包的流程。
int NetEqImpl::InsertPacketInternal(const RTPHeader& rtp_header,
rtc::ArrayView payload,
uint32_t receive_timestamp) {
if (payload.empty()) {
RTC_LOG_F(LS_ERROR) << "payload is empty";
return kInvalidPointer;
}
PacketList packet_list;
// Insert packet in a packet list.
packet_list.push_back([&rtp_header, &payload] {
// Convert to Packet.
Packet packet;
packet.payload_type = rtp_header.payloadType;
packet.sequence_number = rtp_header.sequenceNumber;
packet.timestamp = rtp_header.timestamp;
packet.payload.SetData(payload.data(), payload.size());
// Waiting time will be set upon inserting the packet in the buffer.
RTC_DCHECK(!packet.waiting_time);
return packet;
}());
bool update_sample_rate_and_channels =
first_packet_ || (rtp_header.ssrc != ssrc_);
if (update_sample_rate_and_channels) {
// Reset timestamp scaling.
timestamp_scaler_->Reset();
}
// RTC_LOG(LS_WARNING) << "external timestamp = " << rtp_header.timestamp;
if (!decoder_database_->IsRed(rtp_header.payloadType)) {
// Scale timestamp to internal domain (only for some codecs).
timestamp_scaler_->ToInternal(&packet_list);
// RTC_LOG(LS_WARNING) << "internal timestamp = " << packet_list.front().timestamp;
}
// Store these for later use, since the first packet may very well disappear
// before we need these values.
uint32_t main_timestamp = packet_list.front().timestamp;
uint8_t main_payload_type = packet_list.front().payload_type;
uint16_t main_sequence_number = packet_list.front().sequence_number;
// Reinitialize NetEq if it's needed (changed SSRC or first call).
if (update_sample_rate_and_channels) {
// Note: |first_packet_| will be cleared further down in this method, once
// the packet has been successfully inserted into the packet buffer.
rtcp_.Init(rtp_header.sequenceNumber);
// Flush the packet buffer and DTMF buffer.
packet_buffer_->Flush();
dtmf_buffer_->Flush();
// Store new SSRC.
ssrc_ = rtp_header.ssrc;
// Update audio buffer timestamp.
sync_buffer_->IncreaseEndTimestamp(main_timestamp - timestamp_);
// Update codecs.
timestamp_ = main_timestamp;
}
// Update RTCP statistics, only for regular packets.
rtcp_.Update(rtp_header, receive_timestamp);
if (nack_enabled_) {
RTC_DCHECK(nack_);
if (update_sample_rate_and_channels) {
nack_->Reset();
}
nack_->UpdateLastReceivedPacket(rtp_header.sequenceNumber,
rtp_header.timestamp);
}
// Check for RED payload type, and separate payloads into several packets.
if (decoder_database_->IsRed(rtp_header.payloadType)) {
if (!red_payload_splitter_->SplitRed(&packet_list)) {
return kRedundancySplitError;
}
// Only accept a few RED payloads of the same type as the main data,
// DTMF events and CNG.
red_payload_splitter_->CheckRedPayloads(&packet_list, *decoder_database_);
}
// Check payload types.
if (decoder_database_->CheckPayloadTypes(packet_list) ==
DecoderDatabase::kDecoderNotFound) {
return kUnknownRtpPayloadType;
}
RTC_DCHECK(!packet_list.empty());
// Update main_timestamp, if new packets appear in the list
// after RED splitting.
if (decoder_database_->IsRed(rtp_header.payloadType)) {
timestamp_scaler_->ToInternal(&packet_list);
main_timestamp = packet_list.front().timestamp;
main_payload_type = packet_list.front().payload_type;
main_sequence_number = packet_list.front().sequence_number;
}
// Process DTMF payloads. Cycle through the list of packets, and pick out any
// DTMF payloads found.
PacketList::iterator it = packet_list.begin();
while (it != packet_list.end()) {
const Packet& current_packet = (*it);
RTC_DCHECK(!current_packet.payload.empty());
if (decoder_database_->IsDtmf(current_packet.payload_type)) {
DtmfEvent event;
int ret = DtmfBuffer::ParseEvent(current_packet.timestamp,
current_packet.payload.data(),
current_packet.payload.size(), &event);
if (ret != DtmfBuffer::kOK) {
return kDtmfParsingError;
}
if (dtmf_buffer_->InsertEvent(event) != DtmfBuffer::kOK) {
return kDtmfInsertError;
}
it = packet_list.erase(it);
} else {
++it;
}
}
// Update bandwidth estimate, if the packet is not comfort noise.
if (!packet_list.empty() &&
!decoder_database_->IsComfortNoise(main_payload_type)) {
// The list can be empty here if we got nothing but DTMF payloads.
AudioDecoder* decoder = decoder_database_->GetDecoder(main_payload_type);
RTC_DCHECK(decoder); // Should always get a valid object, since we have
// already checked that the payload types are known.
decoder->IncomingPacket(packet_list.front().payload.data(),
packet_list.front().payload.size(),
packet_list.front().sequence_number,
packet_list.front().timestamp, receive_timestamp);
}
PacketList parsed_packet_list;
while (!packet_list.empty()) {
Packet& packet = packet_list.front();
const DecoderDatabase::DecoderInfo* info =
decoder_database_->GetDecoderInfo(packet.payload_type);
if (!info) {
RTC_LOG(LS_WARNING) << "SplitAudio unknown payload type";
return kUnknownRtpPayloadType;
}
if (info->IsComfortNoise()) {
// Carry comfort noise packets along.
parsed_packet_list.splice(parsed_packet_list.end(), packet_list,
packet_list.begin());
} else {
const auto sequence_number = packet.sequence_number;
const auto payload_type = packet.payload_type;
const Packet::Priority original_priority = packet.priority;
auto packet_from_result = [&](AudioDecoder::ParseResult& result) {
Packet new_packet;
new_packet.sequence_number = sequence_number;
new_packet.payload_type = payload_type;
new_packet.timestamp = result.timestamp;
new_packet.priority.codec_level = result.priority;
new_packet.priority.red_level = original_priority.red_level;
new_packet.frame = std::move(result.frame);
return new_packet;
};
std::vector results =
info->GetDecoder()->ParsePayload(std::move(packet.payload),
packet.timestamp);
if (results.empty()) {
packet_list.pop_front();
} else {
bool first = true;
for (auto& result : results) {
RTC_DCHECK(result.frame);
RTC_DCHECK_GE(result.priority, 0);
if (first) {
// Re-use the node and move it to parsed_packet_list.
packet_list.front() = packet_from_result(result);
parsed_packet_list.splice(parsed_packet_list.end(), packet_list,
packet_list.begin());
first = false;
} else {
parsed_packet_list.push_back(packet_from_result(result));
}
}
}
}
}
// Calculate the number of primary (non-FEC/RED) packets.
const int number_of_primary_packets = std::count_if(
parsed_packet_list.begin(), parsed_packet_list.end(),
[](const Packet& in) { return in.priority.codec_level == 0; });
// Insert packets in buffer.
const int ret = packet_buffer_->InsertPacketList(
&parsed_packet_list, *decoder_database_, ¤t_rtp_payload_type_,
¤t_cng_rtp_payload_type_, &stats_);
if (ret == PacketBuffer::kFlushed) {
// Reset DSP timestamp etc. if packet buffer flushed.
new_codec_ = true;
update_sample_rate_and_channels = true;
} else if (ret != PacketBuffer::kOK) {
return kOtherError;
}
if (first_packet_) {
first_packet_ = false;
// Update the codec on the next GetAudio call.
new_codec_ = true;
}
if (current_rtp_payload_type_) {
RTC_DCHECK(decoder_database_->GetDecoderInfo(*current_rtp_payload_type_))
<< "Payload type " << static_cast(*current_rtp_payload_type_)
<< " is unknown where it shouldn't be";
}
if (update_sample_rate_and_channels && !packet_buffer_->Empty()) {
// We do not use |current_rtp_payload_type_| to |set payload_type|, but
// get the next RTP header from |packet_buffer_| to obtain the payload type.
// The reason for it is the following corner case. If NetEq receives a
// CNG packet with a sample rate different than the current CNG then it
// flushes its buffer, assuming send codec must have been changed. However,
// payload type of the hypothetically new send codec is not known.
const Packet* next_packet = packet_buffer_->PeekNextPacket();
RTC_DCHECK(next_packet);
const int payload_type = next_packet->payload_type;
size_t channels = 1;
if (!decoder_database_->IsComfortNoise(payload_type)) {
AudioDecoder* decoder = decoder_database_->GetDecoder(payload_type);
assert(decoder); // Payloads are already checked to be valid.
channels = decoder->Channels();
}
const DecoderDatabase::DecoderInfo* decoder_info =
decoder_database_->GetDecoderInfo(payload_type);
assert(decoder_info);
if (decoder_info->SampleRateHz() != fs_hz_ ||
channels != algorithm_buffer_->Channels()) {
SetSampleRateAndChannels(decoder_info->SampleRateHz(), channels);
}
if (nack_enabled_) {
RTC_DCHECK(nack_);
// Update the sample rate even if the rate is not new, because of Reset().
nack_->UpdateSampleRate(fs_hz_);
}
}
// TODO(hlundin): Move this code to DelayManager class.
const DecoderDatabase::DecoderInfo* dec_info =
decoder_database_->GetDecoderInfo(main_payload_type);
assert(dec_info); // Already checked that the payload type is known.
delay_manager_->LastDecodedWasCngOrDtmf(dec_info->IsComfortNoise() ||
dec_info->IsDtmf());
if (delay_manager_->last_pack_cng_or_dtmf() == 0) {
// Calculate the total speech length carried in each packet.
if (number_of_primary_packets > 0) {
const size_t packet_length_samples =
number_of_primary_packets * decoder_frame_length_;
if (packet_length_samples != decision_logic_->packet_length_samples()) {
decision_logic_->set_packet_length_samples(packet_length_samples);
delay_manager_->SetPacketAudioLength(
rtc::dchecked_cast((1000 * packet_length_samples) / fs_hz_));
}
}
// Update statistics.
if ((int32_t)(main_timestamp - timestamp_) >= 0 && !new_codec_) {
// Only update statistics if incoming packet is not older than last played
// out packet, and if new codec flag is not set.
delay_manager_->Update(main_sequence_number, main_timestamp, fs_hz_);
}
} else if (delay_manager_->last_pack_cng_or_dtmf() == -1) {
// This is first "normal" packet after CNG or DTMF.
// Reset packet time counter and measure time until next packet,
// but don't update statistics.
delay_manager_->set_last_pack_cng_or_dtmf(0);
delay_manager_->ResetPacketIatCount();
}
return 0;
}