TCP在发送数据的时候会携带ACK标记,但这里我们要研究的是TCP在收到数据时发送不带数据的ACK报文的情况,这时ACK的发送是通过tcp_send_ack函数完成的:
3027 void tcp_send_ack(struct sock *sk) 3028 { 3029 struct sk_buff *buff; 3030 3031 /* If we have been reset, we may not send again. */ 3032 if (sk->sk_state == TCP_CLOSE) 3033 return; 3034 3035 /* We are not putting this on the write queue, so 3036 * tcp_transmit_skb() will set the ownership to this 3037 * sock. 3038 */ 3039 buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); //申请一个只能容纳TCP首部的skb 3040 if (buff == NULL) { //申请失败 3041 inet_csk_schedule_ack(sk); 3042 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; //记录用来计算延时确认的估值 3043 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 3044 TCP_DELACK_MAX, TCP_RTO_MAX); //使用延迟ACK定时器发送ACK 3045 return; 3046 } 3047 3048 /* Reserve space for headers and prepare control bits. */ 3049 skb_reserve(buff, MAX_TCP_HEADER); 3050 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); //设置ACK标记 3051 3052 /* Send it off, this clears delayed acks for us. */ 3053 TCP_SKB_CB(buff)->when = tcp_time_stamp; //记录发送时间,用于计算RTT 3054 tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); //发送无数据的ACK 3055 }
tcp_transmit_skb函数会将ACK标记写如TCP报头,并设置ack_seq:
828 static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, 829 gfp_t gfp_mask) 830 { 831 const struct inet_connection_sock *icsk = inet_csk(sk); ... 899 th->ack_seq = htonl(tp->rcv_nxt); 900 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | 901 tcb->tcp_flags); ...899:tp->rcv_nxt是“下次希望接收的数据的序列号”,tp->rcv_nxt - 1则是“已经接收到的数据的序列号”。
发送ACK的时机有:
(1)进程将TCP收到的数据读取(通过tcp_recvmsg、tcp_splice、DMA等等)完毕,会调用tcp_cleanup_rbuf函数发送ACK来通过数据发送端更新窗口:
1323 void tcp_cleanup_rbuf(struct sock *sk, int copied) 1324 { 1325 struct tcp_sock *tp = tcp_sk(sk); 1326 bool time_to_ack = false; 1327 1328 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); 1329 1330 WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq), 1331 "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n", 1332 tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt); 1333 1334 if (inet_csk_ack_scheduled(sk)) { //需要发送ACK 1335 const struct inet_connection_sock *icsk = inet_csk(sk); 1336 /* Delayed ACKs frequently hit locked sockets during bulk 1337 * receive. */ 1338 if (icsk->icsk_ack.blocked || //延迟ACK定时器超时时进程锁定了socket,导致ACK无法发送 1339 /* Once-per-two-segments ACK was not sent by tcp_input.c */ 1340 tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss || //接收了多于2个数据段但还未发送ACK 1341 /* 1342 * If this read emptied read buffer, we send ACK, if 1343 * connection is not bidirectional, user drained 1344 * receive buffer and there was a small segment 1345 * in queue. 1346 */ 1347 (copied > 0 && //进程至少copy了1字节数据 1348 ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || 1349 ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && 1350 !icsk->icsk_ack.pingpong)) && //非socket是交互模式(交互模式允许延迟发送ACK) 1351 !atomic_read(&sk->sk_rmem_alloc))) //接收缓存为空 1352 time_to_ack = true; 1353 } 1354 1355 /* We send an ACK if we can now advertise a non-zero window 1356 * which has been raised "significantly". 1357 * 1358 * Even if window raised up to infinity, do not send window open ACK 1359 * in states, where we will not receive more. It is useless. 1360 */ 1361 if (copied > 0 && !time_to_ack && !(sk->sk_shutdown & RCV_SHUTDOWN)) { 1362 __u32 rcv_window_now = tcp_receive_window(tp); //得到当前接收窗口的值,即对端计算的发送窗口 1363 1364 /* Optimize, __tcp_select_window() is not cheap. */ 1365 if (2*rcv_window_now <= tp->window_clamp) { //当前接收窗口小于最大接收窗口的一半 1366 __u32 new_window = __tcp_select_window(sk); //根据当前缓存情况得出真正的通告窗口 1367 1368 /* Send ACK now, if this read freed lots of space 1369 * in our buffer. Certainly, new_window is new window. 1370 * We can advertise it now, if it is not less than current one. 1371 * "Lots" means "at least twice" here. 1372 */ 1373 if (new_window && new_window >= 2 * rcv_window_now) //真正的通告窗口是现在对端计算的发送窗口的2倍以上 1374 time_to_ack = true; 1375 } 1376 } 1377 if (time_to_ack) //决定发送ACK 1378 tcp_send_ack(sk); 1379 }1348-1349:看一下icsk->icsk_ack.pending中标记位的含义:
ICSK_ACK_SCHED:有ACK需要发送,是立即发送还是延时发送,还需要看其他标志,也是能否发送确认的前提
ICSK_ACK_TIMER:延时发送ACK定时器已经启动
ICSK_ACK_PUSHED:只要有ACK需要发送,并且pingpong为0时,ACK可以立即发送
ICSK_ACK_PUSHED2:只要有ACK需要发送,都可以立即发送,不管是否在快速发送模式中
如果经过上面流程决定无需发送ACK,则还要进行下面的检查:
1361-1374:如果有数据copy到进程中,且socket没有关闭,则通过计算新的窗口值来决定是否发送ACK
综上,进程读取数据后发送ACK的基本条件是:进程成功地copy了数据并且接收缓存为空,或当前可以通告给对端的窗口是对端计算的发送窗口的2倍以上。
(2)报文的序列号或确认号非法导致调用tcp_send_challenge_ack发送挑战ACK
3254 static void tcp_send_challenge_ack(struct sock *sk) 3255 { 3256 /* unprotected vars, we dont care of overwrites */ 3257 static u32 challenge_timestamp; 3258 static unsigned int challenge_count; 3259 u32 now = jiffies / HZ; 3260 3261 if (now != challenge_timestamp) { 3262 challenge_timestamp = now; 3263 challenge_count = 0; 3264 } 3265 if (++challenge_count <= sysctl_tcp_challenge_ack_limit) { 3266 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); 3267 tcp_send_ack(sk); 3268 } 3269 }(3)使用tcp_send_dupack发送重复ACK时:
3895 static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) 3896 { 3897 struct tcp_sock *tp = tcp_sk(sk); 3898 3899 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && //包中有数据或SYN或FIN标记位 3900 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { //包中有旧数据 3901 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); 3902 tcp_enter_quickack_mode(sk); //进入快速ACK模式 3903 3904 if (tcp_is_sack(tp) && sysctl_tcp_dsack) { //处理SACK相关信息 3905 u32 end_seq = TCP_SKB_CB(skb)->end_seq; 3906 3907 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) 3908 end_seq = tp->rcv_nxt; 3909 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq); 3910 } 3911 } 3912 3913 tcp_send_ack(sk); //发送ACK 3914 }3902:在包中有部分(也许是全部)旧数据的情况下TCP会调用tcp_enter_quickack_mode进入快速ACK模式。现简要讨论一下快速ACK模式,这个模式下几乎每收到一个包都会发送ACK。相关代码如下:
173 static void tcp_incr_quickack(struct sock *sk) 174 { 175 struct inet_connection_sock *icsk = inet_csk(sk); 176 unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss); //按照每两个MSS一个ACK来计算ACK的数量 177 178 if (quickacks == 0) 179 quickacks = 2; //至少发送2个ACK 180 if (quickacks > icsk->icsk_ack.quick) 181 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); //允许快速发送的ACK的最大数量是TCP_MAX_QUICKACKS(16) 182 } 183 184 static void tcp_enter_quickack_mode(struct sock *sk) 185 { 186 struct inet_connection_sock *icsk = inet_csk(sk); 187 tcp_incr_quickack(sk); //增加可以快速发送ACK段的数量 188 icsk->icsk_ack.pingpong = 0; //禁用乒乓模式(即交互模式) 189 icsk->icsk_ack.ato = TCP_ATO_MIN; 190 } 191 192 /* Send ACKs quickly, if "quick" count is not exhausted 193 * and the session is not interactive. 194 */ 195 196 static inline bool tcp_in_quickack_mode(const struct sock *sk) //这个函数用于判断是否处于快速ACK模式 197 { 198 const struct inet_connection_sock *icsk = inet_csk(sk); 199 200 return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong; 201 }__tcp_ack_snd_check会调用tcp_in_quickack_mode函数判断是否处于快速ACK模式,如果是则立即发送ACK,否则延迟发送。
调用tcp_send_dupack有两种情况:PAWS检查失败和序列号非法,这时需要丢包并发送ACK:
4985 static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, 4986 const struct tcphdr *th, int syn_inerr) 4987 { 4988 struct tcp_sock *tp = tcp_sk(sk); 4989 4990 /* RFC1323: H1. Apply PAWS check first. */ 4991 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && 4992 tcp_paws_discard(sk, skb)) { 4993 if (!th->rst) { 4994 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); 4995 tcp_send_dupack(sk, skb); //PAWS检查失败,skb是旧包 4996 goto discard; 4997 } 4998 /* Reset is accepted even if it did not pass PAWS. */ 4999 } 5000 5001 /* Step 1: check sequence number */ 5002 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { 5003 /* RFC793, page 37: "In all states except SYN-SENT, all reset 5004 * (RST) segments are validated by checking their SEQ-fields." 5005 * And page 69: "If an incoming segment is not acceptable, 5006 * an acknowledgment should be sent in reply (unless the RST 5007 * bit is set, if so drop the segment and return)". 5008 */ 5009 if (!th->rst) { 5010 if (th->syn) 5011 goto syn_challenge; 5012 tcp_send_dupack(sk, skb); //序列号非法 5013 } 5014 goto discard; ...(4)__tcp_ack_snd_check函数:
4758 static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) 4759 { 4760 struct tcp_sock *tp = tcp_sk(sk); 4761 4762 /* More than one full frame received... */ 4763 if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && 4764 /* ... and right edge of window advances far enough. 4765 * (tcp_recvmsg() will send ACK otherwise). Or... 4766 */ 4767 __tcp_select_window(sk) >= tp->rcv_wnd) || 收到至少一个MSS且通告窗口增大 4768 /* We ACK each frame or... */ 4769 tcp_in_quickack_mode(sk) || //在快速ACK模式下 4770 /* We have out of order data. */ 4771 (ofo_possible && skb_peek(&tp->out_of_order_queue))) { //有乱序数据 4772 /* Then ack it now */ 4773 tcp_send_ack(sk); 4774 } else { 4775 /* Else, send delayed ack. */ 4776 tcp_send_delayed_ack(sk); 4777 } 4778 }快速路径处理模式下每收到一个包就会调用一次__tcp_ack_snd_check,而在快速ACK模式下就会对每个收到的包都发送ACK:
5076 int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, 5077 const struct tcphdr *th, unsigned int len) 5078 { 5079 struct tcp_sock *tp = tcp_sk(sk); ... 5236 if (!copied_early || tp->rcv_nxt != tp->rcv_wup) //如果没有数据通过DMA直接copy给进程,或接收了新的数据 5237 __tcp_ack_snd_check(sk, 0); ...
tcp_send_delayed_ack是使用延迟ACK定时器延迟发送ACK:
2974 void tcp_send_delayed_ack(struct sock *sk) 2975 { 2976 struct inet_connection_sock *icsk = inet_csk(sk); ... 3021 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; 3022 icsk->icsk_ack.timeout = timeout; 3023 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); 3024 }
(5)tcp_ack_snd_check函数:
4780 static inline void tcp_ack_snd_check(struct sock *sk) 4781 { 4782 if (!inet_csk_ack_scheduled(sk)) { 4783 /* We sent a data segment already. */ 4784 return; 4785 } 4786 __tcp_ack_snd_check(sk, 1); 4787 }
只有当inet_csk_ack_scheduled为真时tcp_ack_snd_check函数才会发送ACK:
166 static inline void inet_csk_schedule_ack(struct sock *sk) 167 { 168 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED; 169 } 170 171 static inline int inet_csk_ack_scheduled(const struct sock *sk) 172 { 173 return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED; 174 }这时意味着有发送ACK的必要,除了tcp_send_delayed_ack设置延迟ACK定时器之外,调用inet_csk_schedule_ack也会使inet_csk_ack_scheduled为真,而调用inet_csk_schedule_ack的条件有:
1)tcp_event_data_recv函数
584 static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) 585 { 586 struct tcp_sock *tp = tcp_sk(sk); 587 struct inet_connection_sock *icsk = inet_csk(sk); 588 u32 now; 589 590 inet_csk_schedule_ack(sk); ...而tcp_event_data_recv被调用的条件是
2)收到乱序数据:
4121 static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) 4122 { ... 4134 4135 /* Disable header prediction. */ 4136 tp->pred_flags = 0; 4137 inet_csk_schedule_ack(sk); ...3)收到窗口之外的数据:
4300 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) 4301 { ... 4380 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { 4381 /* A retransmit, 2nd most common case. Force an immediate ack. */ 4382 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); 4383 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); 4384 4385 out_of_window: 4386 tcp_enter_quickack_mode(sk); 4387 inet_csk_schedule_ack(sk); 4388 drop: 4389 __kfree_skb(skb); 4390 return; 4391 } ...(6)延迟ACK定时器会在超时时发送ACK;使用延迟ACK机制可以一次累计确认多个报文段,这样就减少了ACK报文段的发送,从而减轻了网络拥塞。除了tcp_send_delayed_ack会设置延迟ACK定时器外,TCP在处理prequeue时也会设置:
1919 bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) 1920 { ... 1931 __skb_queue_tail(&tp->ucopy.prequeue, skb); 1932 tp->ucopy.memory += skb->truesize; 1933 if (tp->ucopy.memory > sk->sk_rcvbuf) { 1934 struct sk_buff *skb1; 1935 1936 BUG_ON(sock_owned_by_user(sk)); 1937 1938 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { 1939 sk_backlog_rcv(sk, skb1); 1940 NET_INC_STATS_BH(sock_net(sk), 1941 LINUX_MIB_TCPPREQUEUEDROPPED); 1942 } 1943 1944 tp->ucopy.memory = 0; 1945 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { //队列中仅有一个skb 1946 wake_up_interruptible_sync_poll(sk_sleep(sk), 1947 POLLIN | POLLRDNORM | POLLRDBAND); 1948 if (!inet_csk_ack_scheduled(sk)) 1949 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 1950 (3 * tcp_rto_min(sk)) / 4, 1951 TCP_RTO_MAX); 1952 } 1953 return true; 1954 }关于prequeue后续章节会详细讨论
(7)TCP在将承载进程传入内核数据的skb放入发送队列时会设置ACK标记:
596 static inline void skb_entail(struct sock *sk, struct sk_buff *skb) //由tcp_sendmsg和do_tcp_sendpages调用 597 { 598 struct tcp_sock *tp = tcp_sk(sk); 599 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); 600 601 skb->csum = 0; 602 tcb->seq = tcb->end_seq = tp->write_seq; 603 tcb->tcp_flags = TCPHDR_ACK; //设置ACK标记 604 tcb->sacked = 0; 605 skb_header_release(skb); 606 tcp_add_write_queue_tail(sk, skb); 607 sk->sk_wmem_queued += skb->truesize; 608 sk_mem_charge(sk, skb->truesize); 609 if (tp->nonagle & TCP_NAGLE_PUSH) 610 tp->nonagle &= ~TCP_NAGLE_PUSH; 611 }调用tcp_transmit_skb发送数据时这个标记会被设置到TCP报头上:
828 static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, 829 gfp_t gfp_mask) 830 { ... 900 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | 901 tcb->tcp_flags); ... 940 if (likely(tcb->tcp_flags & TCPHDR_ACK)) 941 tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); ...这样TCP发送的数据就会携带ACK标记,tcp_event_ack_sent函数会卸载延迟ACK定时器,并可能会取消快速ACK模式,从而减少ACK报文段的发送。这种方法被称为“捎带确认”:
178 static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) 179 { 180 tcp_dec_quickack_mode(sk, pkts); //减少quickack计数,减小到0则快速ACK模式会被禁用 181 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); //卸载延迟ACK定时器 182 }
(8)另外,TCP收到SYN|ACK、FIN也会发送ACK,此处不做讨论
总之,TCP发送ACK的情况分为以下几类:
(1)收到非法报文时
(2)进程从TCP接收缓存读取数据后;这时通告窗口可能会变大
(3)收到数据时;这时通告窗口可能会变小,发送ACK会告知对端更新窗口,并释放已经接收的数据
(4)发送数据段时
(5)收到带SYN、FIN标记的包时
TCP在接收ACK时,ACK标记的处理由tcp_ack函数完成:
3325 static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) 3326 { 3327 struct inet_connection_sock *icsk = inet_csk(sk); 3328 struct tcp_sock *tp = tcp_sk(sk); 3329 u32 prior_snd_una = tp->snd_una; 3330 u32 ack_seq = TCP_SKB_CB(skb)->seq; 3331 u32 ack = TCP_SKB_CB(skb)->ack_seq; 3332 bool is_dupack = false; 3333 u32 prior_in_flight; 3334 u32 prior_fackets; 3335 int prior_packets = tp->packets_out; 3336 int prior_sacked = tp->sacked_out; 3337 int pkts_acked = 0; 3338 int previous_packets_out = 0; 3339 3340 /* If the ack is older than previous acks 3341 * then we can probably ignore it. 3342 */ 3343 if (before(ack, prior_snd_una)) { //ack_seq是旧的 3344 /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */ 3345 if (before(ack, prior_snd_una - tp->max_window)) {//如果ACK序列号太旧了 3346 tcp_send_challenge_ack(sk);//发生探测ACK给对端,如果连接已经结束则对段会发送RST 3347 return -1; 3348 } 3349 goto old_ack; 3350 } 3351 3352 /* If the ack includes data we haven't sent yet, discard 3353 * this segment (RFC793 Section 3.9). 3354 */ 3355 if (after(ack, tp->snd_nxt)) //ack_seq太超前了 3356 goto invalid_ack; 3357 3358 if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || //设置了早期重传定时器 3359 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) //设置了丢失重传定时器 3360 tcp_rearm_rto(sk); //取消或重新设置重传定时器 3361 3362 if (after(ack, prior_snd_una))//至少会确认1字节数据 3363 flag |= FLAG_SND_UNA_ADVANCED; 3364 3365 prior_fackets = tp->fackets_out; 3366 prior_in_flight = tcp_packets_in_flight(tp); //在“网络中”的报文的数量,即“已发送的 - 已确认的 - 丢失的 + 重传的” 3367 3368 /* ts_recent update must be made after we are sure that the packet 3369 * is in window. 3370 */ 3371 if (flag & FLAG_UPDATE_TS_RECENT) 3372 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); //更新时间戳 3373 3374 if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {//不是慢速路径且至少会确认1字节数据 3375 /* Window is constant, pure forward advance. 3376 * No more checks are required. 3377 * Note, we use the fact that SND.UNA>=SND.WL2. 3378 */ 3379 tcp_update_wl(tp, ack_seq); //记录更新窗口的ACK的序列号 3380 tp->snd_una = ack; //更新“已发送但未确认”的序列号,tp->snd_una - 1即已经被确认的数据的序列号 3381 flag |= FLAG_WIN_UPDATE; 3382 3383 tcp_ca_event(sk, CA_EVENT_FAST_ACK); //进行拥塞控制 3384 3385 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS); 3386 } else { 3387 if (ack_seq != TCP_SKB_CB(skb)->end_seq)//ACK包中有数据 3388 flag |= FLAG_DATA; 3389 else 3390 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS); 3391 3392 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);//更新发送窗口 3393 3394 if (TCP_SKB_CB(skb)->sacked) 3395 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); //更新发送队列中s的ack信息 3396 3397 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))//收到显式拥塞通告 3398 flag |= FLAG_ECE; 3399 3400 tcp_ca_event(sk, CA_EVENT_SLOW_ACK); 3401 } 3402 3403 /* We passed data and got it acked, remove any soft error 3404 * log. Something worked... 3405 */ 3406 sk->sk_err_soft = 0; 3407 icsk->icsk_probes_out = 0; 3408 tp->rcv_tstamp = tcp_time_stamp; 3409 if (!prior_packets) //没有未被对端收到的数据 3410 goto no_queue; 3411 3412 /* See if we can take anything off of the retransmit queue. */ 3413 previous_packets_out = tp->packets_out; 3414 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);//将已经确认的数据移出发送缓冲 3415 3416 pkts_acked = previous_packets_out - tp->packets_out;//计算被确认的包的个数 3417 3418 if (tcp_ack_is_dubious(sk, flag)) {//如果ACK是重复的,或要进入拥塞状态,或已经在拥塞状态 3419 /* Advance CWND, if state allows this. */ 3420 if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) //确认了数据或可以增大拥塞窗口 3421 tcp_cong_avoid(sk, ack, prior_in_flight); //执行拥塞避免算法 3422 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); 3423 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, 3424 prior_packets, is_dupack, flag); //更新拥塞控制信息,并决定是否执行快速重传 3425 } else { 3426 if (flag & FLAG_DATA_ACKED) //确认了数据 3427 tcp_cong_avoid(sk, ack, prior_in_flight); 3428 } 3429 3430 if (tp->tlp_high_seq) //Tail Loss Probe (TLP)算法相关 3431 tcp_process_tlp_ack(sk, ack, flag); 3432 3433 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { 3434 struct dst_entry *dst = __sk_dst_get(sk); 3435 if (dst) 3436 dst_confirm(dst); 3437 } 3438 3439 if (icsk->icsk_pending == ICSK_TIME_RETRANS) //如果设置了重传定时器 3440 tcp_schedule_loss_probe(sk); //将重传定时器转换为丢失探测定时器 3441 return 1; 3442 3443 no_queue: 3444 /* If data was DSACKed, see if we can undo a cwnd reduction. */ 3445 if (flag & FLAG_DSACKING_ACK) 3446 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, 3447 prior_packets, is_dupack, flag); 3448 /* If this ack opens up a zero window, clear backoff. It was 3449 * being used to time the probes, and is probably far higher than 3450 * it needs to be for normal retransmission. 3451 */ 3452 if (tcp_send_head(sk)) //在数据已经全部被对方接收且发送队列中有数据未发送,这通常意味着发送窗口太小 3453 tcp_ack_probe(sk); //重新设置窗口探测定时器(坚持定时器) 3454 3455 if (tp->tlp_high_seq) 3456 tcp_process_tlp_ack(sk, ack, flag); 3457 return 1; 3458 3459 invalid_ack: 3460 SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt); 3461 return -1; 3462 3463 old_ack: 3464 /* If data was SACKed, tag it and see if we should send more data. 3465 * If data was DSACKed, see if we can undo a cwnd reduction. 3466 */ 3467 if (TCP_SKB_CB(skb)->sacked) { 3468 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); 3469 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, 3470 prior_packets, is_dupack, flag); 3471 } 3472 3473 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); 3474 return 0; 3475 }tcp_clean_rtx_queue函数用于清除发送队列中已经被确认的skb:
3001 static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, 3002 u32 prior_snd_una) 3003 { 3004 struct tcp_sock *tp = tcp_sk(sk); 3005 const struct inet_connection_sock *icsk = inet_csk(sk); 3006 struct sk_buff *skb; 3007 u32 now = tcp_time_stamp; 3008 int fully_acked = true; 3009 int flag = 0; 3010 u32 pkts_acked = 0; 3011 u32 reord = tp->packets_out; 3012 u32 prior_sacked = tp->sacked_out; 3013 s32 seq_rtt = -1; 3014 s32 ca_seq_rtt = -1; 3015 ktime_t last_ackt = net_invalid_timestamp(); 3016 3017 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {//获取发送缓存中最旧的已发送数据 3018 struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 3019 u32 acked_pcount; 3020 u8 sacked = scb->sacked; 3021 3022 /* Determine how many packets and what bytes were acked, tso and else */ 3023 if (after(scb->end_seq, tp->snd_una)) { //skb有一部分数据未确认 3024 if (tcp_skb_pcount(skb) == 1 ||//skb能够被TSO作为一个包发送 3025 !after(tp->snd_una, scb->seq))//当前数据未被确认 3026 break;//当前包不能删除 3027 3028 acked_pcount = tcp_tso_acked(sk, skb);//计算有多少个TSO分割后的包能被确认 3029 if (!acked_pcount) 3030 break;//不能删除当前包的任何一部分 3031 3032 fully_acked = false;//确认了当前包中的一部分 3033 } else {//当前数据全部被确认 3034 acked_pcount = tcp_skb_pcount(skb);//被确认的包的个数是当前包经TSO分割后的包的数量 3035 } ... 3070 if (!(scb->tcp_flags & TCPHDR_SYN)) { 3071 flag |= FLAG_DATA_ACKED;//有数据被确认 3072 } else { 3073 flag |= FLAG_SYN_ACKED;//SYN包不算被确认的数据 3074 tp->retrans_stamp = 0; 3075 } 3076 3077 if (!fully_acked) //如果skb未被完全确认,则不能删除 3078 break; 3079 3080 tcp_unlink_write_queue(skb, sk);//将skb移出发送缓存 3081 sk_wmem_free_skb(sk, skb);//释放skb 3082 tp->scoreboard_skb_hint = NULL; 3083 if (skb == tp->retransmit_skb_hint) 3084 tp->retransmit_skb_hint = NULL; 3085 if (skb == tp->lost_skb_hint) 3086 tp->lost_skb_hint = NULL; 3087 } 3088 3089 if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))//可能确认了紧急数据 3090 tp->snd_up = tp->snd_una;//更新紧急指针;tp->snd_una == tp->snd_up意味着没有发送紧急数据 ... 3165 return flag; 3166 }
代码省略的部分主要是与拥塞控制有关。
当TCP把数据从发送缓冲中清除时,这些数据的传输才真正完成。使用TCP发送数据的应用进程不需要关心这些过程,它们把数据成功写入TCP发送缓存的之后就可以不保存数据的副本了,数据的可靠交付由TCP协议完成。
从ACK的发送和接收的流程中可以看出,ACK的主要功能为:
(1)通告数据发送端清除已经收到的数据
(2)更新通告窗口
(3)参与拥塞控制(快速重传、拥塞避免等等)
TCP数据接收端发送ACK通知发送端可以删除发送缓存中的数据时,这些数据可能只是放入了接收端的接收队列中,并没有交付应用进程。应用进程收取数据是通过收包系统调用完成的,详见下节。