上一篇BLOG已经介绍了revolver RUDP的传输性能、基本的框架和接口,这篇文章我重点讲述RUDP的实现细节。在RUDP的模块中最为重要的是其收发缓冲控制和CCC发送窗口控制、CCC发送慢启动控制、CCC快恢复控制等几个过程。(关于RUDP源代码实现在revolver开源项目的RUDP目录:点击打开链接)
//发送数据片
typedef struct tagRUDPSendSegment
{
uint64_t seq_; //块序号
uint64_t push_ts_; //进入发送队列的时刻
uint64_t last_send_ts_; //最后一次发送的时刻
uint16_t send_count_; //发送的次数
uint8_t data_[MAX_SEGMENT_SIZE]; //块数据
uint16_t data_size_; //块数据长度
}RUDPSendSegment;
typedef struct tagRUDPRecvSegment
{
uint64_t seq_; //块序号
uint8_t data_[MAX_SEGMENT_SIZE]; //块数据
uint16_t data_size_; //块数据长度
}RUDPRecvSegment;
块的最大尺寸为MAX_SEGMENT_SIZE = 1408(不能大于MTU,一般MTU是1492)。为了加快内存分配的速度,RUDP模块中使用了对象池来保证块对象的快速申请,对象池定义:
ObjectPool SENDSEGPOOL;
ObjectPool RECVSEGPOOL;
#define GAIN_SEND_SEG(seg) \
RUDPSendSegment* seg = SENDSEGPOOL.pop_obj();\
seg->reset()
#define RETURN_SEND_SEG(seg) \
if(seg != NULL)\
SENDSEGPOOL.push_obj(seg)
#define GAIN_RECV_SEG(seg) \
RUDPRecvSegment* seg = RECVSEGPOOL.pop_obj(); \
seg->reset()
#define RETURN_RECV_SEG(seg) \
if(seg != NULL)\
RECVSEGPOOL.push_obj(seg)
class RUDPSendBuffer
{
public:
...
//发送数据接口
int32_t send(const uint8_t* data, int32_t data_size);
//ACK处理
void on_ack(uint64_t ack_seq);
//NACK处理
void on_nack(uint64_t base_seq, const LossIDArray& loss_ids);
//定时器接口
void on_timer(uint64_t now_ts);
//检查BUFFER是否可以写入数据
void check_buffer();
...
public:
uint64_t get_buffer_seq() {return buffer_seq_;};
//设置NAGLE算法
void set_nagle(bool nagle = true){nagle_ = nagle;};
bool get_nagle() const {return nagle_;};
//设置发送缓冲区的大小
void set_buffer_size(int32_t buffer_size){buffer_size_ = buffer_size;};
int32_t get_buffer_size() const {return buffer_size_;};
...
protected:
IRUDPNetChannel* net_channel_;
//正在发送的数据片
SendWindowMap send_window_;
//正在发送的报文的丢包集合
LossIDSet loss_set_;
//等待发送的数据片
SendDataList send_data_;
//发送缓冲区的大小
int32_t buffer_size_;
//当前缓冲数据的大小
int32_t buffer_data_size_;
//当前BUFFER中最大的SEQ
uint64_t buffer_seq_;
//当前WINDOW中最大的SEQ
uint64_t cwnd_max_seq_;
//接收端最大的SEQ
uint64_t dest_max_seq_;
//速度控制器
RUDPCCCObject* ccc_;
//是否启动NAGLE算法
bool nagle_;
}
int32_t RUDPSendBuffer::send(const uint8_t* data, int32_t data_size)
{
int32_t copy_pos = 0;
int32_t copy_size = 0;
uint8_t* pos = (uint8_t *)data;
uint64_t now_timer = CBaseTimeValue::get_time_value().msec();
if(!send_data_.empty()) //拼接报文,让其接近MAX_SEGMENT_SIZE
{
//取出send_data_中的最后一片,如果它没有达到MAX_SEGMENT_SIZE,数据追加到MAX_SEGMENT_SIZE大小为止。
RUDPSendSegment* last_seg = send_data_.back();
if(last_seg != NULL && last_seg->data_size_ < MAX_SEGMENT_SIZE)
{
copy_size = MAX_SEGMENT_SIZE - last_seg->data_size_;
if( copy_size > data_size)
copy_size = data_size;
memcpy(last_seg->data_ + last_seg->data_size_, pos, copy_size);
copy_pos += copy_size;
pos += copy_size;
last_seg->data_size_ += copy_size;
}
}
//剩余数据分成MAX_SEGMENT_SIZE为单位的若干分片
while(copy_pos < data_size)
{
GAIN_SEND_SEG(last_seg);
//设置初始化的的时刻
last_seg->push_ts_ = now_timer; //记录压入时间戳
last_seg->seq_ = buffer_seq_;
buffer_seq_ ++;
//确定拷贝的块长度
copy_size = (data_size - copy_pos);
if(copy_size > MAX_SEGMENT_SIZE)
copy_size = MAX_SEGMENT_SIZE;
memcpy(last_seg->data_, pos, copy_size);
copy_pos += copy_size;
pos += copy_size;
last_seg->data_size_ = copy_size;
//压入发送队列
send_data_.push_back(last_seg);
}
//记录缓冲区的数据长度
buffer_data_size_ += copy_pos;
//尝试发送,立即发送
attempt_send(now_timer);
return copy_pos;
}
这里会触发attempt_send()函数。这个函数是尝试发送的核心函数。在后面的几个过程里面也会调用到这个函数。以上就是发送函数的过程。
void RUDPSendBuffer::attempt_send(uint64_t now_timer)
{
uint32_t cwnd_size = send_window_.size();
uint32_t rtt = ccc_->get_rtt();
uint32_t ccc_cwnd_size = ccc_->get_send_window_size();
RUDPSendSegment* seg = NULL;
uint32_t send_packet_number = 0;
if(!loss_set_.empty()) //重发丢失的片段
{
//发送丢包队列中的报文
uint64_t loss_last_ts = 0;
uint64_t loss_last_seq = 0;
for(LossIDSet::iterator it = loss_set_.begin(); it != loss_set_.end();) //检查丢失报文是否要重发
{
if(send_packet_number >= ccc_cwnd_size) //超过发送窗口
break;
SendWindowMap::iterator cwnd_it = send_window_.find(*it);
if(cwnd_it != send_window_.end() && cwnd_it->second->last_send_ts_ + rtt < now_timer) //丢失报文必须在窗口中
{
seg = cwnd_it->second;
//UDP网络发送
net_channel_->send_data(0, seg->seq_, seg->data_, seg->data_size_, now_timer);
if(cwnd_max_seq_ < seg->seq_)
cwnd_max_seq_ = seg->seq_;
//判断是否可以更改TS
if(loss_last_ts < seg->last_send_ts_)
{
loss_last_ts = seg->last_send_ts_;
if(loss_last_seq < *it)
loss_last_seq = *it;
}
seg->last_send_ts_ = now_timer;
seg->send_count_ ++;
send_packet_number ++;
loss_set_.erase(it ++);
//报告CCC有重发
ccc_->add_resend();
}
else
++ it;
}
//更新重发包范围内未重发报文的时刻,防止下一次定时器到来时重复发送
for(SendWindowMap::iterator it = send_window_.begin(); it != send_window_.end(); ++it)
{
if(it->second->push_ts_ < loss_last_ts && loss_last_seq >= it->first)
it->second->last_send_ts_ = now_timer;
else if(loss_last_seq < it->first)
break;
}
}
else if(send_window_.size() > 0)//丢包队列为空,重发所有窗口中超时的分片
{
//发送间时间隔阈值
uint32_t rtt_threshold = (uint32_t)ceil(rtt * 1.25);
rtt_threshold = (core_max(rtt_threshold, 30));
SendWindowMap::iterator end_it = send_window_.end();
for(SendWindowMap::iterator it = send_window_.begin(); it != end_it; ++it)
{
if(send_packet_number >= ccc_cwnd_size || (it->second->push_ts_ + rtt_threshold > now_timer))
break;
seg = it->second;
//重发块的触发条件是上一次发送的时间距离现在大于特定的阈值或者压入时间很长并且是属于发送缓冲区靠前的块
if(seg->last_send_ts_ + rtt_threshold < now_timer
|| (seg->push_ts_ + rtt_threshold * 5 < now_timer && seg->seq_ < dest_max_seq_ + 3 && seg->last_send_ts_ + rtt_threshold / 2 < now_timer))
{
net_channel_->send_data(0, seg->seq_, seg->data_, seg->data_size_, now_timer);
if(cwnd_max_seq_ < seg->seq_)
cwnd_max_seq_ = seg->seq_;
seg->last_send_ts_ = now_timer;
seg->send_count_ ++;
send_packet_number ++;
//报告CCC有重发块
ccc_->add_resend();
}
}
}
//判断是否可以发送新的报文
if(ccc_cwnd_size > send_packet_number)
{
while(!send_data_.empty())
{
RUDPSendSegment* seg = send_data_.front();
//判断NAGLE算法,NAGLE最少需要在100MS凑1024个字节报文
if(cwnd_size > 0 && nagle_ && seg->push_ts_ + NAGLE_DELAY > now_timer && seg->data_size_ < MAX_SEGMENT_SIZE - 256)
break;
//判断发送窗口
if(cwnd_size < ccc_cwnd_size)
{
send_data_.pop_front();
send_window_.insert(SendWindowMap::value_type(seg->seq_, seg));
cwnd_size ++;
seg->push_ts_ = now_timer;
seg->last_send_ts_ = now_timer;
seg->send_count_ = 1;
//UDP网络发送
net_channel_->send_data(0, seg->seq_, seg->data_, seg->data_size_, now_timer);
if(cwnd_max_seq_ < seg->seq_)
cwnd_max_seq_ = seg->seq_;
}
else //发送窗口满,则停止发送
break;
}
}
}
从上可得知,attempt_send是首先检查是否可以发送丢失的报文,然后再检查窗口中太老的报文是否要重发,最后才加入新的发送报文。所有的前提约束是不超过发送窗口。这个函数里CCC决定的发送窗口大小和RTT直接控制着发送速度和发送策略。
在这里值得一提的是NAGLE的实现,RUDP为了防止小包过多,实现了一个nagle算法,如果设置了此开关,假如只有1个块在缓冲队列中,会等数据达到1024的长度才进行发送。如果等100MS没到1024长度也会发送,也就是最大等100MS.开关可以通过rudp interface设置的。
class RUDPRecvBuffer
{
public:
...
//来自网络中的数据
int32_t on_data(uint64_t seq, const uint8_t* data, int32_t data_size);
//定时事件
void on_timer(uint64_t now_timer, uint32_t rtc);
//读取BUFFER中的数据
int32_t read(uint8_t* data, int32_t data_size);
//检查缓冲区是否可读
void check_buffer();
//检查丢包
bool check_loss(uint64_t now_timer, uint32_t rtc);
...
protected:
IRUDPNetChannel* net_channel_;
//接收窗口
RecvWindowMap recv_window_;
//已完成的连续数据片
RecvDataList recv_data_;
//丢包序列
LossIDTSMap loss_map_;
//当前BUFFER中最大连续数据片的SEQ
uint64_t first_seq_;
//当期BUFFER中受到的最大的数据片ID
uint64_t max_seq_;
//最后一次发送ACK的时刻
uint64_t last_ack_ts_;
//在上次发送ACK到现在,受到新的连续报文的标志
bool recv_new_packet_;
...
};
在上面定义中,核心的函数主要是on_data和on_timer。on_data是接收来自发送端的RUDP数据报文,在这个函数里面首先会进行接收到报文和缓冲去里面的报文进行比较判断是否丢包和重复包。如果有丢包,记录到loss_map中。如果是重复包,则丢弃。如果接收到的包和缓冲区里的报文可以组成连续的块序列。则对上层触发on_read读事件。一下是这个函数的伪代码:
int32_t RUDPRecvBuffer::on_data(uint64_t seq, const uint8_t* data, int32_t data_size)
{
//报文合法性检测
if(seq > first_seq_ + MAX_SEQ_INTNAL || data_size > MAX_SEGMENT_SIZE)
{
//报告异常
RUDP_RECV_DEBUG("on data exception!!");
net_channel_->on_exception();
return -1;
}
RUDPRecvSegment* seg = NULL;
if(first_seq_ + 1 == seq)//连续报文
{
recv_new_packet_= true;
//将数据缓冲到队列中
GAIN_RECV_SEG(seg);
seg->seq_ = seq;
seg->data_size_ = data_size;
memcpy(seg->data_, data, data_size);
recv_data_.push_back(seg);
first_seq_ = seq;
//判断缓冲区中的块是否连续,并进行排序
check_recv_window();
//触发可读事件
net_channel_->on_read();
//删除丢包
loss_map_.erase(seq);
}
else if(seq > first_seq_ + 1) //非连续报文
{
RecvWindowMap::iterator it = recv_window_.find(seq);
if(it == recv_window_.end()) //记录到接收窗口中
{
//将数据缓冲到队列中
GAIN_RECV_SEG(seg);
seg->seq_ = seq;
seg->data_size_ = data_size;
memcpy(seg->data_, data, data_size);
recv_window_[seq] = seg;
}
//判断丢包
if(seq > max_seq_ + 1)
{
uint64_t ts = CBaseTimeValue::get_time_value().msec();
for(uint64_t i = max_seq_ + 1; i < seq; ++ i) //缓冲区中最大的报文和收到的报文之间的报文全部列入丢包范围中,并记录丢包时刻
loss_map_[i] = ts;
}
else
{
//删除丢包
loss_map_.erase(seq);
}
}
//更新缓冲区最大SEQ
if(max_seq_ < seq)
max_seq_ = seq;
return 0;
}
void RUDPRecvBuffer::on_timer(uint64_t now_timer, uint32_t rtc)
{ //检查丢包
if(check_loss(now_timer, rtc))
recv_new_packet_ = false;
//检查是否需要发送ack
uint32_t rtc_threshold = core_min(20, rtc / 2);
if(last_ack_ts_ + rtc_threshold <= now_timer && recv_new_packet_)
send_ack();
//检查缓冲区是否可读
if(!recv_data_.empty() && net_channel_ != NULL)
net_channel_->on_read();
}
void RUDPCCCObject::set_rtt(uint32_t keep_live_rtt)
{
...
//第一次计算rtt和rtt修正
if(rtt_first_)
{
rtt_first_ = false;
rtt_ = keep_live_rtt;
rtt_var_ = rtt_ / 2;
}
else //参考了tcp的rtt计算
{
rtt_var_ = (rtt_var_ * 3 + core_abs(rtt_, keep_live_rtt)) / 4;
rtt_ = (7 * rtt_ + keep_live_rtt) / 8;
}
rtt_ = core_max(5, rtt_);
rtt_var_ = core_max(3, rtt_var_);
...
}