TCP在以下情况下可能会进入TIME_WAIT状态:
(1)在TCP_FIN_WAIT2状态时调用close系统调用时;
(2)TCP_FIN_WAIT2收到对端的FIN时(一定会进入TIME_WAIT状态);
(3)成为orphan sock并且在TCP_FIN_WAIT1状态下收到ACK时;
(4)TCP_CLOSING状态下收到ACK时(一定会进入TIME_WAIT状态);
(5)FIN_WAIT2定时器超时时。
处于TIME_WAIT状态的TCP连接会保持2MSL(Maximum Segment Lifetime)时间,即2倍的最大生存时间。在这段时间内,相同源|目的IP和源|目的端口的TCP连接无法建立。这样做的目的主要有两个:1)禁止旧连接的报文危害新连接;2)收到对端重传的FIN时回应ACK,使对端尽快释放连接资源。
进入TIME_WAIT状态的函数为tcp_time_wait:
266 void tcp_time_wait(struct sock *sk, int state, int timeo) 267 { 268 struct inet_timewait_sock *tw = NULL; 269 const struct inet_connection_sock *icsk = inet_csk(sk); 270 const struct tcp_sock *tp = tcp_sk(sk); 271 bool recycle_ok = false; 272 273 if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) //设置了快速回收tw sock并且开启了时间戳 274 recycle_ok = tcp_remember_stamp(sk);//记录时间戳信息到对端IP地址对应的信息管理块中;如果找到信息管理块,则recycle_ok为1 275 276 if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) //正在使用的tw sock的数量未超过限制 277 tw = inet_twsk_alloc(sk, state); //申请tw sock 278 279 if (tw != NULL) { 280 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); 281 const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); //rto = 3.5 * icsk->icsk_rto 282 struct inet_sock *inet = inet_sk(sk); 283 //将socket中的信息记录到tw sock中 284 tw->tw_transparent = inet->transparent; 285 tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; 286 tcptw->tw_rcv_nxt = tp->rcv_nxt; 287 tcptw->tw_snd_nxt = tp->snd_nxt; 288 tcptw->tw_rcv_wnd = tcp_receive_window(tp); 289 tcptw->tw_ts_recent = tp->rx_opt.ts_recent; 290 tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; 291 tcptw->tw_ts_offset = tp->tsoffset; ... 327 __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); //将tw sock放入ESTABLESHED hash表中,将sk从hash表中移除 328 329 /* Get the TIME_WAIT timeout firing. */ 330 if (timeo < rto) 331 timeo = rto; 332 333 if (recycle_ok) { //可以快速回收 334 tw->tw_timeout = rto; //设置较短的超时时间 335 } else { 336 tw->tw_timeout = TCP_TIMEWAIT_LEN; 337 if (state == TCP_TIME_WAIT) 338 timeo = TCP_TIMEWAIT_LEN; 339 } 340 341 inet_twsk_schedule(tw, &tcp_death_row, timeo, 342 TCP_TIMEWAIT_LEN); //启动TIME WAIT定时器 343 inet_twsk_put(tw); 344 } else { 345 /* Sorry, if we're out of memory, just CLOSE this 346 * socket up. We've got bigger problems than 347 * non-graceful socket closings. 348 */ 349 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW); 350 } 351 352 tcp_update_metrics(sk); //更新管理信息,如果没有则创建 353 tcp_done(sk); //关闭TCP控制块 354 }进入TIME_WAIT状态后,TCP生成了一个tw sock代替socket存放在hash表中,如果这时应用进程调用close系统调用则socket结构体就可以释放。tw sock占用空间比socket小,从而能节约内存空间。在tw socket超时前如果有之前连接的数据到来 ,则会匹配到tw sock,在TCPv4入口函数tcp_v4_rcv中会做如下处理:
1961 int tcp_v4_rcv(struct sk_buff *skb) 1962 { 1963 const struct iphdr *iph; 1964 const struct tcphdr *th; 1965 struct sock *sk; 1966 int ret; 1967 struct net *net = dev_net(skb->dev); ... 2002 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); 2003 if (!sk) 2004 goto no_tcp_socket; 2005 2006 process: 2007 if (sk->sk_state == TCP_TIME_WAIT) 2008 goto do_time_wait; ... 2073 do_time_wait: 2074 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 2075 inet_twsk_put(inet_twsk(sk)); 2076 goto discard_it; 2077 } 2078 2079 if (skb->len < (th->doff << 2)) { //长度异常 2080 inet_twsk_put(inet_twsk(sk)); 2081 goto bad_packet; 2082 } 2083 if (tcp_checksum_complete(skb)) { //检验和异常 2084 inet_twsk_put(inet_twsk(sk)); 2085 goto csum_error; 2086 } 2087 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 2088 case TCP_TW_SYN: {//有SYN请求到来 2089 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), 2090 &tcp_hashinfo, 2091 iph->saddr, th->source, 2092 iph->daddr, th->dest, 2093 inet_iif(skb));//查询listening socket 2094 if (sk2) { 2095 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);//删除TIME_WAIT定时器 2096 inet_twsk_put(inet_twsk(sk));//释放tw sock 2097 sk = sk2; 2098 goto process;//进入正常处理流程 2099 } 2100 /* Fall through to ACK */ 2101 } 2102 case TCP_TW_ACK://发送ACK 2103 tcp_v4_timewait_ack(sk, skb); 2104 break; 2105 case TCP_TW_RST://发送RST 2106 goto no_tcp_socket; 2107 case TCP_TW_SUCCESS:;//不做任何处理 2108 } 2109 goto discard_it; 2110 }tcp_timewait_state_process函数:
91 enum tcp_tw_status 92 tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, 93 const struct tcphdr *th) 94 { 95 struct tcp_options_received tmp_opt; 96 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); 97 bool paws_reject = false; 98 99 tmp_opt.saw_tstamp = 0; 100 if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { //TCP头中有选项且旧连接开启了时间戳选项 101 tcp_parse_options(skb, &tmp_opt, 0, NULL);//解析选项 102 103 if (tmp_opt.saw_tstamp) { 104 tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset; 105 tmp_opt.ts_recent = tcptw->tw_ts_recent; 106 tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; 107 paws_reject = tcp_paws_reject(&tmp_opt, th->rst);//检查是否发生了时间戳回绕 108 } 109 } 110 111 if (tw->tw_substate == TCP_FIN_WAIT2) { //当前tw连接是在TCP_FIN_WAIT2状态下被强制进入time wait的,当时的sock已经是orphan sock 112 /* Just repeat all the checks of tcp_rcv_state_process() */ 113 114 /* Out of window, send ACK */ 115 if (paws_reject || //发生了回绕,是旧包 116 !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, 117 tcptw->tw_rcv_nxt, 118 tcptw->tw_rcv_nxt + tcptw->tw_rcv_wnd))//数据在窗口之外 119 return TCP_TW_ACK;//发送ACK 120 121 if (th->rst) 122 goto kill;//删除TIME_WAIT定时器,释放tw sock 123 124 if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt)) //是SYN包且序列号比旧连接中要接收的下一个序列号大 125 goto kill_with_rst;//删除TIME_WAIT定时器,释放tw sock并发送RST 126 127 /* Dup ACK? */ 128 if (!th->ack || //没有ACK标记 129 !after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) ||//没有新数据 130 TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {//没有数据 131 inet_twsk_put(tw); 132 return TCP_TW_SUCCESS; 133 } 134 135 /* New data or FIN. If new data arrive after half-duplex close, 136 * reset. 137 */ 138 if (!th->fin || 139 TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) { 140 kill_with_rst: 141 inet_twsk_deschedule(tw, &tcp_death_row); 142 inet_twsk_put(tw); 143 return TCP_TW_RST; 144 } 145 146 /* FIN arrived, enter true time-wait state. */ 147 tw->tw_substate = TCP_TIME_WAIT; 148 tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq; 149 if (tmp_opt.saw_tstamp) { 150 tcptw->tw_ts_recent_stamp = get_seconds(); 151 tcptw->tw_ts_recent = tmp_opt.rcv_tsval; 152 } 153 154 if (tcp_death_row.sysctl_tw_recycle && //开启快速回收tw sock功能 155 tcptw->tw_ts_recent_stamp && //开启时间戳 156 tcp_tw_remember_stamp(tw))//将时间戳记录在管理信息块中 157 inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout, 158 TCP_TIMEWAIT_LEN); //使用短的超时时间 159 else 160 inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, 161 TCP_TIMEWAIT_LEN); 162 return TCP_TW_ACK; 163 } 164 165 /* 166 * Now real TIME-WAIT state. 167 * 168 * RFC 1122: 169 * "When a connection is [...] on TIME-WAIT state [...] 170 * [a TCP] MAY accept a new SYN from the remote TCP to 171 * reopen the connection directly, if it: 172 * 173 * (1) assigns its initial sequence number for the new 174 * connection to be larger than the largest sequence 175 * number it used on the previous connection incarnation, 176 * and 177 * 178 * (2) returns to TIME-WAIT state if the SYN turns out 179 * to be an old duplicate". 180 */ 181 182 if (!paws_reject && //没有发生时间戳回绕 183 (TCP_SKB_CB(skb)->seq == tcptw->tw_rcv_nxt && //序列号是下一个要接收的序列号 184 (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq || th->rst))) {//没有数据或设置了RST标记位 185 /* In window segment, it may be only reset or bare ack. */ 186 187 if (th->rst) { 188 /* This is TIME_WAIT assassination, in two flavors. 189 * Oh well... nobody has a sufficient solution to this 190 * protocol bug yet. 191 */ 192 if (sysctl_tcp_rfc1337 == 0) { //不开启这个选项当RST到来时会立即回收tw sock,但这样做是有风险的 193 kill: 194 inet_twsk_deschedule(tw, &tcp_death_row); //删除time wait定时器 195 inet_twsk_put(tw);//释放tw sock 196 return TCP_TW_SUCCESS; 197 } 198 } 199 inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, 200 TCP_TIMEWAIT_LEN); //设置长的超时时间 201 202 if (tmp_opt.saw_tstamp) { 203 tcptw->tw_ts_recent = tmp_opt.rcv_tsval; 204 tcptw->tw_ts_recent_stamp = get_seconds(); 205 } 206 207 inet_twsk_put(tw); 208 return TCP_TW_SUCCESS; 209 } 210 211 /* Out of window segment. 212 213 All the segments are ACKed immediately. 214 215 The only exception is new SYN. We accept it, if it is 216 not old duplicate and we are not in danger to be killed 217 by delayed old duplicates. RFC check is that it has 218 newer sequence number works at rates <40Mbit/sec. 219 However, if paws works, it is reliable AND even more, 220 we even may relax silly seq space cutoff. 221 222 RED-PEN: we violate main RFC requirement, if this SYN will appear 223 old duplicate (i.e. we receive RST in reply to SYN-ACK), 224 we must return socket to time-wait state. It is not good, 225 but not fatal yet. 226 */ 227 228 if (th->syn && !th->rst && !th->ack && !paws_reject && //是SYN包、没有RST也没有ACK、没有回绕 229 (after(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt) || 230 (tmp_opt.saw_tstamp && //新连接开启了时间戳 231 (s32)(tcptw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) { //没有回绕 232 u32 isn = tcptw->tw_snd_nxt + 65535 + 2; 233 if (isn == 0) 234 isn++; 235 TCP_SKB_CB(skb)->when = isn; 236 return TCP_TW_SYN; //允许新连接建立并替代tw sock,这时就依靠新旧序列号空间的不一致性来防止旧包对新连接的危害 237 } 238 239 if (paws_reject) 240 NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED); 241 242 if (!th->rst) { 243 /* In this case we must reset the TIMEWAIT timer. 244 * 245 * If it is ACKless SYN it may be both old duplicate 246 * and new good SYN with random sequence number <rcv_nxt. 247 * Do not reschedule in the last case. 248 */ 249 if (paws_reject || th->ack) //是回绕包或ACK 250 inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, 251 TCP_TIMEWAIT_LEN); //重新设置time wait定时器 252 253 /* Send ACK. Note, we do not put the bucket, 254 * it will be released by caller. 255 */ 256 return TCP_TW_ACK; 257 } 258 inet_twsk_put(tw); 259 return TCP_TW_SUCCESS; 260 }229:序列号大于旧连接中下一个要接收的序列号,这样旧连接的数据在新连接中就会被认为是旧包而丢弃
从代码中得知,有一些情况会导致TIME_WAIT状态的提前终结:
(1)处于孤儿sock的状态下对端有SYN或数据发送过来;
(2)收到RST且没有开启sysctl_tcp_rfc1337拒绝reset;
(3)收到新的SYN请求且允许建立新连接。
除了这些情况外,tw sock会一直保持到TIME_WAIT定时器超时。
在tw sock的生存时间内,TCP会处理与已经关闭连接的所有旧数据包,期望它们能够在新的连接建立之前全部消失在网络中。为什么关闭连接后TCP通信两端只有一端处于TIME_WAIT状态而另一端却可以快速释放连接?因为只要有一端处于TIME_WAIT状态则相四元组的连接就无法建立,所以另一端无需担心旧报文混入新连接中。