// Insert a frame into the frame buffer. Returns the picture id
// of the last continuous frame or -1 if there is no continuous frame.
// TODO(philipel): Return a VideoLayerFrameId and not only the picture id.
int64_t FrameBuffer::InsertFrame(std::unique_ptr frame) {
TRACE_EVENT0("webrtc", "FrameBuffer::InsertFrame");
if (stats_callback_)
stats_callback_->OnCompleteFrame(frame->is_keyframe(), frame->size(),
const VideoLayerFrameId& id = frame->id;
rtc::CritScope lock(&crit_);
int64_t last_continuous_picture_id =
!last_continuous_frame_ ? -1 : last_continuous_frame_->picture_id;
// 如果前向参考不存在,则返回上一次的last_continuous_picture_id
if (!ValidReferences(*frame)) {
RTC_LOG(LS_WARNING) << "Frame with (picture_id:spatial_id) ("
<< id.picture_id << ":"
<< static_cast(id.spatial_layer)
<< ") has invalid frame references, dropping frame.";
return last_continuous_picture_id;
// 如果缓存满了,则判断当前是否为关键帧,如果是则清空之前的数据
// 否则,返回上一次的last_continuous_picture_id
if (frames_.size() >= kMaxFramesBuffered) {
if (frame->is_keyframe()) {
RTC_LOG(LS_WARNING) << "Inserting keyframe (picture_id:spatial_id) ("
<< id.picture_id << ":"
<< static_cast(id.spatial_layer)
<< ") but buffer is full, clearing"
<< " buffer and inserting the frame.";
} else {
RTC_LOG(LS_WARNING) << "Frame with (picture_id:spatial_id) ("
<< id.picture_id << ":"
<< static_cast(id.spatial_layer)
<< ") could not be inserted due to the frame "
<< "buffer being full, dropping frame.";
return last_continuous_picture_id;
auto last_decoded_frame = decoded_frames_history_.GetLastDecodedFrameId();
auto last_decoded_frame_timestamp =
if (last_decoded_frame && id <= *last_decoded_frame) {
if (AheadOf(frame->Timestamp(), *last_decoded_frame_timestamp) &&
frame->is_keyframe()) {
// If this frame has a newer timestamp but an earlier picture id then we
// assume there has been a jump in the picture id due to some encoder
// reconfiguration or some other reason. Even though this is not according
// to spec we can still continue to decode from this frame if it is a
// keyframe.
<< "A jump in picture id was detected, clearing buffer.";
last_continuous_picture_id = -1;
} else {
RTC_LOG(LS_WARNING) << "Frame with (picture_id:spatial_id) ("
<< id.picture_id << ":"
<< static_cast(id.spatial_layer)
<< ") inserted after frame ("
<< last_decoded_frame->picture_id << ":"
<< static_cast(last_decoded_frame->spatial_layer)
<< ") was handed off for decoding, dropping frame.";
return last_continuous_picture_id;
// Test if inserting this frame would cause the order of the frames to become
// ambiguous (covering more than half the interval of 2^16). This can happen
// when the picture id make large jumps mid stream.
if (!frames_.empty() && id < frames_.begin()->first &&
frames_.rbegin()->first < id) {
<< "A jump in picture id was detected, clearing buffer.";
last_continuous_picture_id = -1;
auto info = frames_.emplace(id, FrameInfo()).first;
if (info->second.frame) {
RTC_LOG(LS_WARNING) << "Frame with (picture_id:spatial_id) ("
<< id.picture_id << ":"
<< static_cast(id.spatial_layer)
<< ") already inserted, dropping frame.";
return last_continuous_picture_id;
if (stats_callback_)
stats_callback_->OnFrameInserted(frame->is_keyframe(), frame->size(),
frame->contentType(), frames_.size());
if (!UpdateFrameInfoWithIncomingFrame(*frame, info))
return last_continuous_picture_id;
info->second.frame = std::move(frame);
if (info->second.num_missing_continuous == 0) {
info->second.continuous = true;
last_continuous_picture_id = last_continuous_frame_->picture_id;
// Since we now have new continuous frames there might be a better frame
// to return from NextFrame. Signal that thread so that it again can choose
// which frame to return.
return last_continuous_picture_id;
// Get the next frame for decoding. Will return at latest after
// |max_wait_time_ms|.
// - If a frame is available within |max_wait_time_ms| it will return
// kFrameFound and set |frame_out| to the resulting frame.
// - If no frame is available after |max_wait_time_ms| it will return
// kTimeout.
// - If the FrameBuffer is stopped then it will return kStopped.
// 1. 如果一个frame已经准备好了,则返回kFrameFound,并且frame数据存储在frame_out指针中。
// 2. 如果在max_wait_time_ms的等待时间中,仍然没有数据准备好,则返回kTimeout
// 3. 如果FrameBuffer 被关闭,则返回 kStopped
FrameBuffer::ReturnReason FrameBuffer::NextFrame(
int64_t max_wait_time_ms,
std::unique_ptr* frame_out,
bool keyframe_required) {
// 这个函数最终可以执行的结束时间deadline
int64_t latest_return_time_ms =
clock_->TimeInMilliseconds() + max_wait_time_ms;
int64_t wait_ms = max_wait_time_ms;
int64_t now_ms = 0;
do {
now_ms = clock_->TimeInMilliseconds();
rtc::CritScope lock(&crit_);
if (stopped_)
return kStopped;
keyframe_required_ = keyframe_required;
latest_return_time_ms_ = latest_return_time_ms;
wait_ms = FindNextFrame(now_ms);
} while (new_continuous_frame_event_.Wait(wait_ms));
rtc::CritScope lock(&crit_);
if (!frames_to_decode_.empty()) {
return kFrameFound;
if (latest_return_time_ms - clock_->TimeInMilliseconds() > 0) {
// If |next_frame_it_ == frames_.end()| and there is still time left, it
// means that the frame buffer was cleared as the thread in this function
// was waiting to acquire |crit_| in order to return. Wait for the
// remaining time and then return.
return NextFrame(latest_return_time_ms - now_ms, frame_out,
return kTimeout;
int64_t FrameBuffer::FindNextFrame(int64_t now_ms) {
int64_t wait_ms = latest_return_time_ms_ - now_ms;
// |last_continuous_frame_| may be empty below, but nullopt is smaller
// than everything else and loop will immediately terminate as expected.
for (auto frame_it = frames_.begin();
frame_it != frames_.end() && frame_it->first <= last_continuous_frame_;
++frame_it) {
if (!frame_it->second.continuous ||
frame_it->second.num_missing_decodable > 0) {
EncodedFrame* frame = frame_it->second.frame.get();
// 等关键帧
if (keyframe_required_ && !frame->is_keyframe())
auto last_decoded_frame_timestamp =
// TODO(https://bugs.webrtc.org/9974): consider removing this check
// as it may make a stream undecodable after a very long delay between
// frames.
if (last_decoded_frame_timestamp &&
AheadOf(*last_decoded_frame_timestamp, frame->Timestamp())) {
// Only ever return all parts of a superframe. Therefore skip this
// frame if it's not a beginning of a superframe.
if (frame->inter_layer_predicted) {
// Gather all remaining frames for the same superframe.
std::vector current_superframe;
bool last_layer_completed = frame_it->second.frame->is_last_spatial_layer;
FrameMap::iterator next_frame_it = frame_it;
while (true) {
if (next_frame_it == frames_.end() ||
next_frame_it->first.picture_id != frame->id.picture_id ||
!next_frame_it->second.continuous) {
// Check if the next frame has some undecoded references other than
// the previous frame in the same superframe.
size_t num_allowed_undecoded_refs =
(next_frame_it->second.frame->inter_layer_predicted) ? 1 : 0;
if (next_frame_it->second.num_missing_decodable >
num_allowed_undecoded_refs) {
// All frames in the superframe should have the same timestamp.
if (frame->Timestamp() != next_frame_it->second.frame->Timestamp()) {
RTC_LOG(LS_WARNING) << "Frames in a single superframe have different"
" timestamps. Skipping undecodable superframe.";
last_layer_completed = next_frame_it->second.frame->is_last_spatial_layer;
// Check if the current superframe is complete.
// TODO(bugs.webrtc.org/10064): consider returning all available to
// decode frames even if the superframe is not complete yet.
if (!last_layer_completed) {
frames_to_decode_ = std::move(current_superframe);
if (frame->RenderTime() == -1) {
frame->SetRenderTime(timing_->RenderTimeMs(frame->Timestamp(), now_ms));
wait_ms = timing_->MaxWaitingTime(frame->RenderTime(), now_ms);
// This will cause the frame buffer to prefer high framerate rather
// than high resolution in the case of the decoder not decoding fast
// enough and the stream has multiple spatial and temporal layers.
// For multiple temporal layers it may cause non-base layer frames to be
// skipped if they are late.
if (wait_ms < -kMaxAllowedFrameDelayMs)
wait_ms = std::min(wait_ms, latest_return_time_ms_ - now_ms);
wait_ms = std::max(wait_ms, 0);
return wait_ms;
##4 UpdatePlayoutDelays
void FrameBuffer::UpdatePlayoutDelays(const EncodedFrame& frame) {
TRACE_EVENT0("webrtc", "FrameBuffer::UpdatePlayoutDelays");
PlayoutDelay playout_delay = frame.EncodedImage().playout_delay_;
if (playout_delay.min_ms >= 0)
if (playout_delay.max_ms >= 0)
//该包是否被重传过? 每一个frame的times_nacked_记录的是每一个frame里面所有的packet中重传次数的最大值
//如果被重传过,则直接更新timing_, 简单的理解就是把音视频的时间轴、播放器系统时间做一个映射。
//Used to report that a frame is passed to decoding. Updates the timestamp
//filter which is used to map between timestamps and receiver system time.
if (!frame.delayed_by_retransmission())
timing_->IncomingTimestamp(frame.Timestamp(), frame.ReceivedTime());
##5 GetNextFrame()
EncodedFrame* FrameBuffer::GetNextFrame() {
int64_t now_ms = clock_->TimeInMilliseconds();
// TODO(ilnik): remove |frames_out| use frames_to_decode_ directly.
std::vector frames_out;
bool superframe_delayed_by_retransmission = false;
size_t superframe_size = 0;
EncodedFrame* first_frame = frames_to_decode_[0]->second.frame.get();
int64_t render_time_ms = first_frame->RenderTime();
int64_t receive_time_ms = first_frame->ReceivedTime();
// Gracefully handle bad RTP timestamps and render time issues.
if (HasBadRenderTiming(*first_frame, now_ms)) {
render_time_ms = timing_->RenderTimeMs(first_frame->Timestamp(), now_ms);
// **核心逻辑**
for (FrameMap::iterator& frame_it : frames_to_decode_) {
RTC_DCHECK(frame_it != frames_.end());
EncodedFrame* frame = frame_it->second.frame.release();
//decode frame里面所有帧的渲染时间都是第一个frame的渲染时间
superframe_delayed_by_retransmission |= frame->delayed_by_retransmission();
receive_time_ms = std::max(receive_time_ms, frame->ReceivedTime());
superframe_size += frame->size();
decoded_frames_history_.InsertDecoded(frame_it->first, frame->Timestamp());
// Remove decoded frame and all undecoded frames before it.
if (stats_callback_) {
unsigned int dropped_frames = std::count_if(
frames_.begin(), frame_it,
[](const std::pair& frame) {
return frame.second.frame != nullptr;
if (dropped_frames > 0) {
frames_.erase(frames_.begin(), ++frame_it);
if (!superframe_delayed_by_retransmission) {
int64_t frame_delay;
if (inter_frame_delay_.CalculateDelay(first_frame->Timestamp(),
&frame_delay, receive_time_ms)) {
jitter_estimator_.UpdateEstimate(frame_delay, superframe_size);
float rtt_mult = protection_mode_ == kProtectionNackFEC ? 0.0 : 1.0;
absl::optional rtt_mult_add_cap_ms = absl::nullopt;
if (rtt_mult_settings_.has_value()) {
rtt_mult = rtt_mult_settings_->rtt_mult_setting;
rtt_mult_add_cap_ms = rtt_mult_settings_->rtt_mult_add_cap_ms;
jitter_estimator_.GetJitterEstimate(rtt_mult, rtt_mult_add_cap_ms));
timing_->UpdateCurrentDelay(render_time_ms, now_ms);
} else {
if (RttMultExperiment::RttMultEnabled() || add_rtt_to_playout_delay_)
// **这个地方可以返回一个数组!!**
if (frames_out.size() == 1) {
return frames_out[0];
} else {
return CombineAndDeleteFrames(frames_out);
class RtpFrameObject : public EncodedFrame {
RtpFrameObject(PacketBuffer* packet_buffer,
uint16_t first_seq_num,
uint16_t last_seq_num,
size_t frame_size,
int times_nacked,
int64_t first_packet_received_time,
int64_t last_packet_received_time,
RtpPacketInfos packet_infos);
~RtpFrameObject() override;
uint16_t first_seq_num() const;
uint16_t last_seq_num() const;
int times_nacked() const;
VideoFrameType frame_type() const;
VideoCodecType codec_type() const;
int64_t ReceivedTime() const override;
int64_t RenderTime() const override;
bool delayed_by_retransmission() const override;
const RTPVideoHeader& GetRtpVideoHeader() const;
const absl::optional& GetGenericFrameDescriptor()
const FrameMarking& GetFrameMarking() const;
RTPVideoHeader rtp_video_header_;
absl::optional rtp_generic_frame_descriptor_;
VideoCodecType codec_type_;
uint16_t first_seq_num_; //起始seq num
uint16_t last_seq_num_; //最后seq num
int64_t last_packet_received_time_; //最后一个packet接收到的时间
// Equal to times nacked of the packet with the highet times nacked
// belonging to this frame.
int times_nacked_; //这个frame中packet的最大重传次数