7.6 TIME_WAIT状态处理

        TCP在以下情况下可能会进入TIME_WAIT状态:

(1)在TCP_FIN_WAIT2状态时调用close系统调用时

(2)TCP_FIN_WAIT2收到对端的FIN时(一定会进入TIME_WAIT状态);

(3)成为orphan sock并且在TCP_FIN_WAIT1状态下收到ACK时;

(4)TCP_CLOSING状态下收到ACK时(一定会进入TIME_WAIT状态);

(5)FIN_WAIT2定时器超时时。

        处于TIME_WAIT状态的TCP连接会保持2MSL(Maximum Segment Lifetime)时间,即2倍的最大生存时间。在这段时间内,相同源|目的IP和源|目的端口的TCP连接无法建立。这样做的目的主要有两个:1)禁止旧连接的报文危害新连接;2)收到对端重传的FIN时回应ACK,使对端尽快释放连接资源。

        进入TIME_WAIT状态函数为tcp_time_wait:

266 void tcp_time_wait(struct sock *sk, int state, int timeo)
267 {
268     struct inet_timewait_sock *tw = NULL;
269     const struct inet_connection_sock *icsk = inet_csk(sk);
270     const struct tcp_sock *tp = tcp_sk(sk);
271     bool recycle_ok = false;
272
273     if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) //设置了快速回收tw sock并且开启了时间戳
274         recycle_ok = tcp_remember_stamp(sk);//记录时间戳信息到对端IP地址对应的信息管理块中;如果找到信息管理块,则recycle_ok为1
275
276     if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) //正在使用的tw sock的数量未超过限制
277         tw = inet_twsk_alloc(sk, state); //申请tw sock
278
279     if (tw != NULL) {
280         struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
281         const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);  //rto = 3.5 * icsk->icsk_rto
282         struct inet_sock *inet = inet_sk(sk);
283         //将socket中的信息记录到tw sock中
284         tw->tw_transparent  = inet->transparent;
285         tw->tw_rcv_wscale   = tp->rx_opt.rcv_wscale;
286         tcptw->tw_rcv_nxt   = tp->rcv_nxt;
287         tcptw->tw_snd_nxt   = tp->snd_nxt;
288         tcptw->tw_rcv_wnd   = tcp_receive_window(tp);
289         tcptw->tw_ts_recent = tp->rx_opt.ts_recent;
290         tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
291         tcptw->tw_ts_offset = tp->tsoffset;
...
327         __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); //将tw sock放入ESTABLESHED hash表中,将sk从hash表中移除
328
329         /* Get the TIME_WAIT timeout firing. */
330         if (timeo < rto)
331             timeo = rto;
332
333         if (recycle_ok) { //可以快速回收
334             tw->tw_timeout = rto; //设置较短的超时时间
335         } else {
336             tw->tw_timeout = TCP_TIMEWAIT_LEN;
337             if (state == TCP_TIME_WAIT)
338                 timeo = TCP_TIMEWAIT_LEN;
339         }
340
341         inet_twsk_schedule(tw, &tcp_death_row, timeo,
342                    TCP_TIMEWAIT_LEN); //启动TIME WAIT定时器
343         inet_twsk_put(tw);
344     } else {
345         /* Sorry, if we're out of memory, just CLOSE this
346          * socket up.  We've got bigger problems than
347          * non-graceful socket closings.
348          */
349         NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW);
350     }
351
352     tcp_update_metrics(sk); //更新管理信息,如果没有则创建
353     tcp_done(sk); //关闭TCP控制块
354 }
        进入TIME_WAIT状态后,TCP生成了一个tw sock代替socket存放在hash表中,如果这时应用进程调用close系统调用则socket结构体就可以释放。tw sock占用空间比socket小,从而能节约内存空间。在tw socket超时前如果有之前连接的数据到来 ,则会匹配到tw sock,在TCPv4入口函数tcp_v4_rcv中会做如下处理:
 1961 int tcp_v4_rcv(struct sk_buff *skb)
1962 {
1963     const struct iphdr *iph;
1964     const struct tcphdr *th;
1965     struct sock *sk;
1966     int ret;
1967     struct net *net = dev_net(skb->dev);
...
2002     sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
2003     if (!sk)
2004         goto no_tcp_socket;
2005
2006 process:
2007     if (sk->sk_state == TCP_TIME_WAIT)
2008         goto do_time_wait;
...
2073 do_time_wait:
2074     if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
2075         inet_twsk_put(inet_twsk(sk));
2076         goto discard_it;
2077     }
2078
2079     if (skb->len < (th->doff << 2)) { //长度异常
2080         inet_twsk_put(inet_twsk(sk));
2081         goto bad_packet;
2082     }
2083     if (tcp_checksum_complete(skb)) { //检验和异常
2084         inet_twsk_put(inet_twsk(sk));
2085         goto csum_error;
2086     }
2087     switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
2088     case TCP_TW_SYN: {//有SYN请求到来
2089         struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
2090                             &tcp_hashinfo,
2091                             iph->saddr, th->source,
2092                             iph->daddr, th->dest,
2093                             inet_iif(skb));//查询listening socket
2094         if (sk2) {
2095             inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);//删除TIME_WAIT定时器
2096             inet_twsk_put(inet_twsk(sk));//释放tw sock
2097             sk = sk2;
2098             goto process;//进入正常处理流程
2099         }
2100         /* Fall through to ACK */
2101     }
2102     case TCP_TW_ACK://发送ACK
2103         tcp_v4_timewait_ack(sk, skb);
2104         break;
2105     case TCP_TW_RST://发送RST
2106         goto no_tcp_socket;
2107     case TCP_TW_SUCCESS:;//不做任何处理
2108     }
2109     goto discard_it;
2110 }
        tcp_timewait_state_process函数:
  91 enum tcp_tw_status
 92 tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 93                const struct tcphdr *th)
 94 {
 95     struct tcp_options_received tmp_opt;
 96     struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
 97     bool paws_reject = false;
 98
 99     tmp_opt.saw_tstamp = 0;
100     if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { //TCP头中有选项且旧连接开启了时间戳选项
101         tcp_parse_options(skb, &tmp_opt, 0, NULL);//解析选项
102
103         if (tmp_opt.saw_tstamp) {          
104             tmp_opt.rcv_tsecr   -= tcptw->tw_ts_offset;
105             tmp_opt.ts_recent   = tcptw->tw_ts_recent;
106             tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
107             paws_reject = tcp_paws_reject(&tmp_opt, th->rst);//检查是否发生了时间戳回绕
108         }
109     }
110
111     if (tw->tw_substate == TCP_FIN_WAIT2) { //当前tw连接是在TCP_FIN_WAIT2状态下被强制进入time wait的,当时的sock已经是orphan sock
112         /* Just repeat all the checks of tcp_rcv_state_process() */
113
114         /* Out of window, send ACK */
115         if (paws_reject || //发生了回绕,是旧包
116             !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
117                    tcptw->tw_rcv_nxt,
118                    tcptw->tw_rcv_nxt + tcptw->tw_rcv_wnd))//数据在窗口之外
119             return TCP_TW_ACK;//发送ACK
120
121         if (th->rst)
122             goto kill;//删除TIME_WAIT定时器,释放tw sock
123
124         if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt)) //是SYN包且序列号比旧连接中要接收的下一个序列号大
125             goto kill_with_rst;//删除TIME_WAIT定时器,释放tw sock并发送RST
126
127         /* Dup ACK? */
128         if (!th->ack || //没有ACK标记
129             !after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) ||//没有新数据
130             TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {//没有数据
131             inet_twsk_put(tw);
132             return TCP_TW_SUCCESS;
133         }
134
135         /* New data or FIN. If new data arrive after half-duplex close,
136          * reset.
137          */
138         if (!th->fin ||
139             TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) {
140 kill_with_rst:
141             inet_twsk_deschedule(tw, &tcp_death_row);
142             inet_twsk_put(tw);
143             return TCP_TW_RST;
144         }
145
146         /* FIN arrived, enter true time-wait state. */
147         tw->tw_substate   = TCP_TIME_WAIT;
148         tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
149         if (tmp_opt.saw_tstamp) {
150             tcptw->tw_ts_recent_stamp = get_seconds();
151             tcptw->tw_ts_recent   = tmp_opt.rcv_tsval;
152         }
153
154         if (tcp_death_row.sysctl_tw_recycle && //开启快速回收tw sock功能
155             tcptw->tw_ts_recent_stamp && //开启时间戳
156             tcp_tw_remember_stamp(tw))//将时间戳记录在管理信息块中
157             inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout,
158                        TCP_TIMEWAIT_LEN); //使用短的超时时间
159         else
160             inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
161                        TCP_TIMEWAIT_LEN);
162         return TCP_TW_ACK;
163     }
164
165     /*
166      *  Now real TIME-WAIT state.
167      *
168      *  RFC 1122:
169      *  "When a connection is [...] on TIME-WAIT state [...]
170      *  [a TCP] MAY accept a new SYN from the remote TCP to
171      *  reopen the connection directly, if it:
172      *
173      *  (1)  assigns its initial sequence number for the new
174      *  connection to be larger than the largest sequence
175      *  number it used on the previous connection incarnation,
176      *  and
177      *
178      *  (2)  returns to TIME-WAIT state if the SYN turns out
179      *  to be an old duplicate".
180      */
181
182     if (!paws_reject && //没有发生时间戳回绕
183         (TCP_SKB_CB(skb)->seq == tcptw->tw_rcv_nxt && //序列号是下一个要接收的序列号
184          (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq || th->rst))) {//没有数据或设置了RST标记位
185         /* In window segment, it may be only reset or bare ack. */
186
187         if (th->rst) {
188             /* This is TIME_WAIT assassination, in two flavors.
189              * Oh well... nobody has a sufficient solution to this
190              * protocol bug yet.
191              */
192             if (sysctl_tcp_rfc1337 == 0) { //不开启这个选项当RST到来时会立即回收tw sock,但这样做是有风险的
193 kill:
194                 inet_twsk_deschedule(tw, &tcp_death_row); //删除time wait定时器
195                 inet_twsk_put(tw);//释放tw sock
196                 return TCP_TW_SUCCESS;
197             }
198         }
199         inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
200                    TCP_TIMEWAIT_LEN); //设置长的超时时间
201
202         if (tmp_opt.saw_tstamp) {
203             tcptw->tw_ts_recent   = tmp_opt.rcv_tsval;
204             tcptw->tw_ts_recent_stamp = get_seconds();
205         }
206
207         inet_twsk_put(tw);
208         return TCP_TW_SUCCESS;
209     }
210
211     /* Out of window segment.
212
213        All the segments are ACKed immediately.
214
215        The only exception is new SYN. We accept it, if it is
216        not old duplicate and we are not in danger to be killed
217        by delayed old duplicates. RFC check is that it has
218        newer sequence number works at rates <40Mbit/sec.
219        However, if paws works, it is reliable AND even more,
220        we even may relax silly seq space cutoff.
221
222        RED-PEN: we violate main RFC requirement, if this SYN will appear
223        old duplicate (i.e. we receive RST in reply to SYN-ACK),
224        we must return socket to time-wait state. It is not good,
225        but not fatal yet.
226      */
227
228     if (th->syn && !th->rst && !th->ack && !paws_reject && //是SYN包、没有RST也没有ACK、没有回绕
229         (after(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt) || 
230          (tmp_opt.saw_tstamp && //新连接开启了时间戳
231           (s32)(tcptw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) { //没有回绕
232         u32 isn = tcptw->tw_snd_nxt + 65535 + 2;
233         if (isn == 0)
234             isn++;
235         TCP_SKB_CB(skb)->when = isn;
236         return TCP_TW_SYN; //允许新连接建立并替代tw sock,这时就依靠新旧序列号空间的不一致性来防止旧包对新连接的危害
237     }
238
239     if (paws_reject)
240         NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED);
241
242     if (!th->rst) {
243         /* In this case we must reset the TIMEWAIT timer.
244          *
245          * If it is ACKless SYN it may be both old duplicate
246          * and new good SYN with random sequence number <rcv_nxt.
247          * Do not reschedule in the last case.
248          */
249         if (paws_reject || th->ack)  //是回绕包或ACK
250             inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
251                        TCP_TIMEWAIT_LEN); //重新设置time wait定时器
252
253         /* Send ACK. Note, we do not put the bucket,
254          * it will be released by caller.
255          */
256         return TCP_TW_ACK;
257     }
258     inet_twsk_put(tw);
259     return TCP_TW_SUCCESS;
260 }
        229:序列号大于旧连接中下一个要接收的序列号,这样旧连接的数据在新连接中就会被认为是旧包而丢弃

        从代码中得知,有一些情况会导致TIME_WAIT状态的提前终结:

(1)处于孤儿sock的状态下对端有SYN或数据发送过来

(2)收到RST且没有开启sysctl_tcp_rfc1337拒绝reset;

(3)收到新的SYN请求且允许建立新连接。

        除了这些情况外,tw sock会一直保持到TIME_WAIT定时器超时。

        在tw sock的生存时间内,TCP会处理与已经关闭连接的所有旧数据包,期望它们能够在新的连接建立之前全部消失在网络中为什么关闭连接后TCP通信两端只有一端处于TIME_WAIT状态而另一端却可以快速释放连接?因为只要有一端处于TIME_WAIT状态则相四元组的连接就无法建立,所以另一端无需担心旧报文混入新连接中。

你可能感兴趣的:(tcp,linux内核)