前文说过,发送RST会快速关闭一条TCP连接。RST的发送由tcp_send_active_reset函数完成:
2586 void tcp_send_active_reset(struct sock *sk, gfp_t priority) 2587 { 2588 struct sk_buff *skb; 2589 2590 /* NOTE: No TCP options attached and we never retransmit this. */ 2591 skb = alloc_skb(MAX_TCP_HEADER, priority); 2592 if (!skb) { 2593 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); 2594 return; 2595 } 2596 2597 /* Reserve space for headers and prepare control bits. */ 2598 skb_reserve(skb, MAX_TCP_HEADER); 2599 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), 2600 TCPHDR_ACK | TCPHDR_RST); 2601 /* Send it off. */ 2602 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2603 if (tcp_transmit_skb(sk, skb, 0, priority)) 2604 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); 2605 2606 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS); 2607 }RST报文没有被放入发送队列,丢失时不会重传。
发送RST的时机有:
(1)调用close系统调用时socket仍有未读数据:
2059 void tcp_close(struct sock *sk, long timeout) 2060 { ... 2101 if (unlikely(tcp_sk(sk)->repair)) { 2102 sk->sk_prot->disconnect(sk, 0); 2103 } else if (data_was_unread) { 2104 /* Unread data was tossed, zap the connection. */ 2105 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); 2106 tcp_set_state(sk, TCP_CLOSE); 2107 tcp_send_active_reset(sk, sk->sk_allocation); ...(2)在FIN_WAIT2状态下调用close系统调用且socket用TCP_LINGER2选项设置为负值:
2059 void tcp_close(struct sock *sk, long timeout) 2060 { ... 2183 if (sk->sk_state == TCP_FIN_WAIT2) { 2184 struct tcp_sock *tp = tcp_sk(sk); 2185 if (tp->linger2 < 0) { 2186 tcp_set_state(sk, TCP_CLOSE); 2187 tcp_send_active_reset(sk, GFP_ATOMIC); ...(3) 孤儿套接字(orphan sock)的数量超过上限,或发送缓存的内存占用过大:
2059 void tcp_close(struct sock *sk, long timeout) 2060 { ... 2202 if (sk->sk_state != TCP_CLOSE) { 2203 sk_mem_reclaim(sk); 2204 if (tcp_check_oom(sk, 0)) { 2205 tcp_set_state(sk, TCP_CLOSE); 2206 tcp_send_active_reset(sk, GFP_ATOMIC);
55 static int tcp_out_of_resources(struct sock *sk, int do_reset) 56 { 57 struct tcp_sock *tp = tcp_sk(sk); 58 int shift = 0; 59 60 /* If peer does not open window for long time, or did not transmit 61 * anything for long time, penalize it. */ 62 if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) 63 shift++; 64 65 /* If some dubious ICMP arrived, penalize even more. */ 66 if (sk->sk_err_soft) 67 shift++; 68 69 if (tcp_check_oom(sk, shift)) { 70 /* Catch exceptional cases, when connection requires reset. 71 * 1. Last segment was sent recently. */ 72 if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN || 73 /* 2. Window is closed. */ 74 (!tp->snd_wnd && !tp->packets_out)) 75 do_reset = 1; 76 if (do_reset) 77 tcp_send_active_reset(sk, GFP_ATOMIC); 78 tcp_done(sk); ...(4)连接出现异常调用tcp_disconnect断开连接时状态非法或仍有未读数据时:
2240 int tcp_disconnect(struct sock *sk, int flags) 2241 { ... 2256 } else if (tcp_need_reset(old_state) || 2257 (tp->snd_nxt != tp->write_seq && 2258 (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { 2259 /* The last check adjusts for discrepancy of Linux wrt. RFC 2260 * states 2261 */ 2262 tcp_send_active_reset(sk, gfp_any()); ...(5)keepalive定时器超时时socket处于TCP_FIN_WAIT2状态且已经是孤儿socket:
<pre name="code" class="cpp">558 static void tcp_keepalive_timer (unsigned long data) 559 { ... 578 if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) { 579 if (tp->linger2 >= 0) { 580 const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN; 581 582 if (tmo > 0) { 583 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); 584 goto out; 585 } 586 } 587 tcp_send_active_reset(sk, GFP_ATOMIC); 588 goto death; 589 } ...(6)keepalive定时器超时时进程通过TCP_USER_TIMEOUT socket选项设置的时间也超过了,或探测次数超过限制:
558 static void tcp_keepalive_timer (unsigned long data) 559 { ... 600 elapsed = keepalive_time_elapsed(tp); 601 602 if (elapsed >= keepalive_time_when(tp)) { 603 /* If the TCP_USER_TIMEOUT option is enabled, use that 604 * to determine when to timeout instead. 605 */ 606 if ((icsk->icsk_user_timeout != 0 && 607 elapsed >= icsk->icsk_user_timeout && 608 icsk->icsk_probes_out > 0) || 609 (icsk->icsk_user_timeout == 0 && 610 icsk->icsk_probes_out >= keepalive_probes(tp))) { 611 tcp_send_active_reset(sk, GFP_ATOMIC); ...在收到RST包时,tcp_validate_incoming函数会对其进行处理:
4985 static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, 4986 const struct tcphdr *th, int syn_inerr) 4987 { ... 5018 if (th->rst) { 5019 /* RFC 5961 3.2 : 5020 * If sequence number exactly matches RCV.NXT, then 5021 * RESET the connection 5022 * else 5023 * Send a challenge ACK 5024 */ 5025 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) 5026 tcp_reset(sk); 5027 else 5028 tcp_send_challenge_ack(sk); 5029 goto discard; 5030 } ...在RST报文的seq合法时会调用tcp_reset函数:
3745 void tcp_reset(struct sock *sk) 3746 { 3747 /* We want the right error as BSD sees it (and indeed as we do). */ 3748 switch (sk->sk_state) { 3749 case TCP_SYN_SENT: 3750 sk->sk_err = ECONNREFUSED; 3751 break; 3752 case TCP_CLOSE_WAIT: 3753 sk->sk_err = EPIPE; 3754 break; 3755 case TCP_CLOSE: 3756 return; 3757 default: 3758 sk->sk_err = ECONNRESET; 3759 } 3760 /* This barrier is coupled with smp_rmb() in tcp_poll() */ 3761 smp_wmb(); 3762 3763 if (!sock_flag(sk, SOCK_DEAD)) 3764 sk->sk_error_report(sk); //唤醒进程 3765 3766 tcp_done(sk); //关闭本端TCP连接,清除定时器,状态跳转为TCP_CLOSE 3767 }tcp_done函数:
3327 void tcp_done(struct sock *sk) 3328 { 3329 struct request_sock *req = tcp_sk(sk)->fastopen_rsk; 3330 3331 if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) 3332 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); 3333 3334 tcp_set_state(sk, TCP_CLOSE); 3335 tcp_clear_xmit_timers(sk); 3336 if (req != NULL) 3337 reqsk_fastopen_remove(sk, req, false); 3338 3339 sk->sk_shutdown = SHUTDOWN_MASK; 3340 3341 if (!sock_flag(sk, SOCK_DEAD)) 3342 sk->sk_state_change(sk); 3343 else 3344 inet_csk_destroy_sock(sk); 3345 }在TCP收到RST报文后,连接被复位,不再允许读写,进程调用tcp_recvmsg收数据时会产生错误:
1545 int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 1546 size_t len, int nonblock, int flags, int *addr_len) 1547 { ... 1671 if (sk->sk_err) { 1672 copied = sock_error(sk); 1673 break; 1674 } ...同样,调用tcp_sendmsg发送数据时也会产生错误:
1016 int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 1017 size_t size) 1018 { ... 1074 err = -EPIPE; 1075 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 1076 goto out_err; ...可见,使用RST快速复位TCP连接可能会导致数据丢失。TCP连接只有在出现异常时才会使用RST进行关闭。