9.5 尾部丢失探测(Tail Loss Probe)定时器

9.5.1 Why

        在9.4节中,我们了解到如果拥塞窗口较小且数据的最后一段数据丢失时,快速重传算法会因为无法收到足够数量的ACK而无法及时重传丢失的报文。尾部丢失探测(Tail Loss Probe)定时器就是为了解决这个问题而设计的。

9.5.2 When

       TLP在tcp_schedule_loss_probe函数中安装:

1913 bool tcp_schedule_loss_probe(struct sock *sk)
1914 {
1915     struct inet_connection_sock *icsk = inet_csk(sk);
1916     struct tcp_sock *tp = tcp_sk(sk);
1917     u32 timeout, tlp_time_stamp, rto_time_stamp;
1918     u32 rtt = tp->srtt >> 3;
1919 
1920     if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
1921         return false;
1922     /* No consecutive loss probes. */
1923     if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
1924         tcp_rearm_rto(sk);
1925         return false;
1926     }
1927     /* Don't do any loss probe on a Fast Open connection before 3WHS
1928      * finishes.
1929      */
1930     if (sk->sk_state == TCP_SYN_RECV)
1931         return false;
1932 
1933     /* TLP is only scheduled when next timer event is RTO. */
1934     if (icsk->icsk_pending != ICSK_TIME_RETRANS)
1935         return false;
1936 
1937     /* Schedule a loss probe in 2*RTT for SACK capable connections
1938      * in Open state, that are either limited by cwnd or application.
1939      */
1940     if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out ||
1941         !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
1942         return false;
1943 
1944     if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
1945          tcp_send_head(sk))
1946         return false;
1947 
1948     /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
1949      * for delayed ack when there's one outstanding packet.
1950      */
1951     timeout = rtt << 1;
1952     if (tp->packets_out == 1)
1953         timeout = max_t(u32, timeout,
1954                 (rtt + (rtt >> 1) + TCP_DELACK_MAX));
1955     timeout = max_t(u32, timeout, msecs_to_jiffies(10));
1956 
1957     /* If RTO is shorter, just schedule TLP in its place. */
1958     tlp_time_stamp = tcp_time_stamp + timeout;
1959     rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout;
1960     if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) {
1961         s32 delta = rto_time_stamp - tcp_time_stamp;
1962         if (delta > 0)
1963             timeout = delta;
1964     }
1965 
1966     inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
1967                   TCP_RTO_MAX);
1968     return true;
1969 }
        TCP在收到ACK时会调用tcp_schedule_loss_probe:
3325 static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3326 {
...
3439     if (icsk->icsk_pending == ICSK_TIME_RETRANS)
3440         tcp_schedule_loss_probe(sk);
...
         发送最后一个数据时也会调用tcp_schedule_loss_probe尝试安装TLP:

1811 static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1812                int push_one, gfp_t gfp)
1813 {
...
1900     if (likely(sent_pkts)) {
...
1904         /* Send one loss probe per tail loss episode. */
1905         if (push_one != 2)         //不是在TLP定时器超时函数中发送的数据
1906             tcp_schedule_loss_probe(sk); 

        下面总结一下TLP安装的条件:

(1)安装了重传定时器,这时可能需要拆除重传定时器,改装TLP;如果没有安装重传定时器,说明没有数据需要重传,也就不需要TLP了

(2)没有安装ER定时器;ER定时器负责重传丢失的中间数据,只有将中间数据补全了才能重传尾部数据

(3)没有安装TLP定时器;重新安装TLP定时器相当于延长定时器超时时间

(4)在TFO模式下server端会在发送SYN|ACK后设置重传定时器以便重传SYN|ACK,但在三次握手完成之前不能进行丢失探测

(5)net.ipv4.tcp_early_retrans内核参数 < 3

(6)tp->srtt >= 8

(7)有包在网络中

(8)开启SACK

(9)拥塞状态为TCP_CA_Open

(10)网络中的包的长度 <= 拥塞窗口大小或没有数据等待发送

        TCP在安装重传定时器、ER定时器或坚持定时器时的同时TLP就会被拆除。

        TLP的超时时间是根据RTT动态计算的。

9.5.3 What

        TLP的超时函数是tcp_send_loss_probe:

1974 void tcp_send_loss_probe(struct sock *sk)
1975 {      
1976     struct tcp_sock *tp = tcp_sk(sk);
1977     struct sk_buff *skb;
1978     int pcount;
1979     int mss = tcp_current_mss(sk);
1980     int err = -1;
1981    
1982     if (tcp_send_head(sk) != NULL) {    //如果还有数据发送
1983         err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);  //则发送之
1984         goto rearm_timer;
1985     }
1986 
1987     /* At most one outstanding TLP retransmission. */
1988     if (tp->tlp_high_seq)    //已经有一个由TLP定时器发送的报文在网络中了
1989         goto rearm_timer;
1990 
1991     /* Retransmit last segment. */
1992     skb = tcp_write_queue_tail(sk);    //得到发送队列最后一个skb,即已发送的最后一个skb
1993     if (WARN_ON(!skb))
1994         goto rearm_timer;
1995 
1996     pcount = tcp_skb_pcount(skb);
1997     if (WARN_ON(!pcount))
1998         goto rearm_timer;
1999 
2000     if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
2001         if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss)))
2002             goto rearm_timer;
2003         skb = tcp_write_queue_tail(sk);
2004     }
2005 
2006     if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
2007         goto rearm_timer;
2008 
2009     /* Probe with zero data doesn't trigger fast recovery. */
2010     if (skb->len > 0)
2011         err = __tcp_retransmit_skb(sk, skb);  //重传最后一个数据段
2012 
2013     /* Record snd_nxt for loss detection. */
2014     if (likely(!err))
2015         tp->tlp_high_seq = tp->snd_nxt;
2016 
2017 rearm_timer:
2018     inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2019                   inet_csk(sk)->icsk_rto,
2020                   TCP_RTO_MAX);        //设置重传定时器
2021 
2022     if (likely(!err))
2023         NET_INC_STATS_BH(sock_net(sk),
2024                  LINUX_MIB_TCPLOSSPROBES);
2025     return;
2026 }
        2000-2003:如果skb使用了GSO导致其由多个段构成且其数据过长,则将其分割后再取最后一个skb
        可见TLP定时器的超时动作主要是重传最后一个报文段并设置重传定时器。

你可能感兴趣的:(tcp,网络,linux内核)