7.5 RST的发送与接收

        前文说过,发送RST会快速关闭一条TCP连接。RST的发送由tcp_send_active_reset函数完成:

2586 void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2587 {
2588     struct sk_buff *skb;
2589 
2590     /* NOTE: No TCP options attached and we never retransmit this. */
2591     skb = alloc_skb(MAX_TCP_HEADER, priority);
2592     if (!skb) {
2593         NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2594         return;
2595     }
2596 
2597     /* Reserve space for headers and prepare control bits. */
2598     skb_reserve(skb, MAX_TCP_HEADER);
2599     tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2600                  TCPHDR_ACK | TCPHDR_RST);
2601     /* Send it off. */
2602     TCP_SKB_CB(skb)->when = tcp_time_stamp;
2603     if (tcp_transmit_skb(sk, skb, 0, priority))
2604         NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2605 
2606     TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
2607 }
        RST报文没有被放入发送队列,丢失时不会重传。

        发送RST的时机有:

(1)调用close系统调用时socket仍有未读数据:

2059 void tcp_close(struct sock *sk, long timeout)
2060 {
...
2101     if (unlikely(tcp_sk(sk)->repair)) {
2102         sk->sk_prot->disconnect(sk, 0);
2103     } else if (data_was_unread) {
2104         /* Unread data was tossed, zap the connection. */
2105         NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
2106         tcp_set_state(sk, TCP_CLOSE);
2107         tcp_send_active_reset(sk, sk->sk_allocation);
...
(2)在FIN_WAIT2状态下调用close系统调用且socket用TCP_LINGER2选项设置为负值:

2059 void tcp_close(struct sock *sk, long timeout)
2060 {
...
2183     if (sk->sk_state == TCP_FIN_WAIT2) {
2184         struct tcp_sock *tp = tcp_sk(sk);
2185         if (tp->linger2 < 0) {
2186             tcp_set_state(sk, TCP_CLOSE);
2187             tcp_send_active_reset(sk, GFP_ATOMIC);
...
(3) 孤儿套接字(orphan sock)的数量超过上限,或发送缓存的内存占用过大
2059 void tcp_close(struct sock *sk, long timeout)
2060 {
...
2202     if (sk->sk_state != TCP_CLOSE) {
2203         sk_mem_reclaim(sk);
2204         if (tcp_check_oom(sk, 0)) {
2205             tcp_set_state(sk, TCP_CLOSE);
2206             tcp_send_active_reset(sk, GFP_ATOMIC);
 55 static int tcp_out_of_resources(struct sock *sk, int do_reset)
 56 {
 57     struct tcp_sock *tp = tcp_sk(sk);
 58     int shift = 0;
 59 
 60     /* If peer does not open window for long time, or did not transmit
 61      * anything for long time, penalize it. */
 62     if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
 63         shift++;
 64 
 65     /* If some dubious ICMP arrived, penalize even more. */
 66     if (sk->sk_err_soft)
 67         shift++;
 68 
 69     if (tcp_check_oom(sk, shift)) {
 70         /* Catch exceptional cases, when connection requires reset.
 71          *      1. Last segment was sent recently. */
 72         if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
 73             /*  2. Window is closed. */
 74             (!tp->snd_wnd && !tp->packets_out))
 75             do_reset = 1;
 76         if (do_reset)
 77             tcp_send_active_reset(sk, GFP_ATOMIC);
 78         tcp_done(sk);
...
(4)连接出现异常调用tcp_disconnect断开连接时状态非法或仍有未读数据时:

2240 int tcp_disconnect(struct sock *sk, int flags)
2241 {
...
2256     } else if (tcp_need_reset(old_state) ||
2257            (tp->snd_nxt != tp->write_seq &&
2258             (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
2259         /* The last check adjusts for discrepancy of Linux wrt. RFC
2260          * states
2261          */
2262         tcp_send_active_reset(sk, gfp_any());
...
(5)keepalive定时器超时时socket处于TCP_FIN_WAIT2状态且已经是孤儿socket:

<pre name="code" class="cpp">558 static void tcp_keepalive_timer (unsigned long data)
559 {
...
578     if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
579         if (tp->linger2 >= 0) {
580             const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
581     
582             if (tmo > 0) {
583                 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
584                 goto out;
585             }
586         }   
587         tcp_send_active_reset(sk, GFP_ATOMIC);
588         goto death;
589     }  
...

 (6)keepalive定时器超时时进程通过TCP_USER_TIMEOUT socket选项设置的时间也超过了,或探测次数超过限制:

558 static void tcp_keepalive_timer (unsigned long data)
559 {
...
600     elapsed = keepalive_time_elapsed(tp);
601 
602     if (elapsed >= keepalive_time_when(tp)) {
603         /* If the TCP_USER_TIMEOUT option is enabled, use that
604          * to determine when to timeout instead.
605          */
606         if ((icsk->icsk_user_timeout != 0 &&
607             elapsed >= icsk->icsk_user_timeout &&
608             icsk->icsk_probes_out > 0) ||
609             (icsk->icsk_user_timeout == 0 &&
610             icsk->icsk_probes_out >= keepalive_probes(tp))) {
611             tcp_send_active_reset(sk, GFP_ATOMIC);
...
        在收到RST包时,tcp_validate_incoming函数会对其进行处理:

4985 static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
4986                   const struct tcphdr *th, int syn_inerr)
4987 {
...
5018     if (th->rst) {
5019         /* RFC 5961 3.2 :
5020          * If sequence number exactly matches RCV.NXT, then
5021          *     RESET the connection
5022          * else
5023          *     Send a challenge ACK
5024          */
5025         if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt)
5026             tcp_reset(sk);
5027         else
5028             tcp_send_challenge_ack(sk);
5029         goto discard;
5030     }
...
      在RST报文的seq合法时会调用tcp_reset函数:

3745 void tcp_reset(struct sock *sk)
3746 {
3747     /* We want the right error as BSD sees it (and indeed as we do). */
3748     switch (sk->sk_state) {
3749     case TCP_SYN_SENT:
3750         sk->sk_err = ECONNREFUSED;
3751         break;
3752     case TCP_CLOSE_WAIT:
3753         sk->sk_err = EPIPE;
3754         break;
3755     case TCP_CLOSE:
3756         return;
3757     default:
3758         sk->sk_err = ECONNRESET;
3759     }
3760     /* This barrier is coupled with smp_rmb() in tcp_poll() */
3761     smp_wmb();
3762 
3763     if (!sock_flag(sk, SOCK_DEAD))
3764         sk->sk_error_report(sk);  //唤醒进程
3765 
3766     tcp_done(sk); //关闭本端TCP连接,清除定时器,状态跳转为TCP_CLOSE
3767 }
        tcp_done函数:

3327 void tcp_done(struct sock *sk)
3328 {
3329     struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
3330     
3331     if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
3332         TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
3333     
3334     tcp_set_state(sk, TCP_CLOSE);
3335     tcp_clear_xmit_timers(sk);
3336     if (req != NULL)
3337         reqsk_fastopen_remove(sk, req, false);
3338     
3339     sk->sk_shutdown = SHUTDOWN_MASK;
3340  
3341     if (!sock_flag(sk, SOCK_DEAD))
3342         sk->sk_state_change(sk);
3343     else 
3344         inet_csk_destroy_sock(sk);
3345 }
        在TCP收到RST报文后,连接被复位,不再允许读写,进程调用tcp_recvmsg收数据时会产生错误:

1545 int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1546         size_t len, int nonblock, int flags, int *addr_len)
1547 {
...
1671             if (sk->sk_err) {
1672                 copied = sock_error(sk);
1673                 break;
1674             }
...
        同样,调用tcp_sendmsg发送数据时也会产生错误:
1016 int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1017         size_t size)
1018 {
...
1074     err = -EPIPE;
1075     if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
1076         goto out_err;
...
      可见,使用RST快速复位TCP连接可能会导致数据丢失。TCP连接只有在出现异常时才会使用RST进行关闭。

你可能感兴趣的:(tcp,linux内核)