7.2 Shutdown系统调用

        当应用进程不想再接收数据时,就可以关闭TCP连接。关闭的方式有两种:如果进程既不想发送数据,也不想接收数据,则可以选择完全关闭;如果进程不想发送数据,但仍可以接收数据,可以执行“半关闭”。

        关闭TCP连接可以使用shutdown系统调用:

int shutdown(int sockfd, int how);
        sockfd是要关闭的TCP socket的文件描述符,how是用来指定关闭方式:为SHUT_RD则关闭收包,为SHUT_WR则关闭发包,为SHUT_RDWR则都关闭。

        shutdown系统调用对应的内核函数为:


1932 SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1933 {
1934     int err, fput_needed;
1935     struct socket *sock;
1936
1937     sock = sockfd_lookup_light(fd, &err, &fput_needed);
1938     if (sock != NULL) {  
1939         err = security_socket_shutdown(sock, how);
1940         if (!err)
1941             err = sock->ops->shutdown(sock, how); //指向inet_shutdown函数
1942         fput_light(sock->file, fput_needed);
1943     }
1944     return err;
1945 }
            inet_shutdown函数:
 811 int inet_shutdown(struct socket *sock, int how)
 812 {   
 813     struct sock *sk = sock->sk;
 814     int err = 0;   
 815     
 816     /* This should really check to make sure
 817      * the socket is a TCP socket. (WHY AC...)
 818      */
 819     how++; /* maps 0->1 has the advantage of making bit 1 rcvs and
 820                1->2 bit 2 snds.         
 821                2->3 */
 822     if ((how & ~SHUTDOWN_MASK) || !how) /* MAXINT->0 */
 823         return -EINVAL;
 824     
 825     lock_sock(sk);
 826     if (sock->state == SS_CONNECTING) {  //连接建立尚未完成
 827         if ((1 << sk->sk_state) &
 828             (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE))
 829             sock->state = SS_DISCONNECTING;
 830         else
 831             sock->state = SS_CONNECTED;
 832     }
 833     
 834     switch (sk->sk_state) {
 835     case TCP_CLOSE:
 836         err = -ENOTCONN;
 837         /* Hack to wake up other listeners, who can poll for
 838            POLLHUP, even on eg. unconnected UDP sockets -- RR */
 839     default: //TCP_ESTABLISHED状态下关闭连接会到达这里
 840         sk->sk_shutdown |= how;
 841         if (sk->sk_prot->shutdown)
 842             sk->sk_prot->shutdown(sk, how); //指向tcp_shutdown
 843         break;
 844
 845     /* Remaining two branches are temporary solution for missing
 846      * close() in multithreaded environment. It is _not_ a good idea,
 847      * but we have no choice until close() is repaired at VFS level.
 848      */
 849     case TCP_LISTEN:
 850         if (!(how & RCV_SHUTDOWN)) //listening socket不能发包,故非RCV_SHUTDOWN是没有意义的
 851             break;
 852         /* Fall through */
 853     case TCP_SYN_SENT:
 854         err = sk->sk_prot->disconnect(sk, O_NONBLOCK);//指向tcp_disconnect
 855         sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
 856         break;
 857     }
 858
 859     /* Wake up anyone sleeping in poll. */
 860     sk->sk_state_change(sk);  //唤醒睡眠的进程
 861     release_sock(sk);
 862     return err;
 863 }
        tcp_shutdown函数用来关闭TCP连接:
2025 void tcp_shutdown(struct sock *sk, int how)
2026 {   
2027     /*  We need to grab some memory, and put together a FIN,
2028      *  and then put it into the queue to be sent.
2029      *      Tim MacKenzie([email protected]) 4 Dec '92.
2030      */
2031     if (!(how & SEND_SHUTDOWN)) //不是SEND_SHUTDOWN则不需要发送FIN
2032         return;     
2033     
2034     /* If we've already sent a FIN, or it's a closed state, skip this. */
2035     if ((1 << sk->sk_state) &
2036         (TCPF_ESTABLISHED | TCPF_SYN_SENT |
2037          TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
2038         /* Clear out any half completed packets.  FIN if needed. */
2039         if (tcp_close_state(sk)) //可以发送FIN
2040             tcp_send_fin(sk);//发送FIN
2041     }
2042 }   
        tcp_close_state函数进行状态跳转和是否发送FIN的检查

1994 static const unsigned char new_state[16] = {
1995   /* current state:        new state:      action:  */
1996   /* (Invalid)      */ TCP_CLOSE,
1997   /* TCP_ESTABLISHED    */ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
1998   /* TCP_SYN_SENT   */ TCP_CLOSE,
1999   /* TCP_SYN_RECV   */ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
2000   /* TCP_FIN_WAIT1  */ TCP_FIN_WAIT1,
2001   /* TCP_FIN_WAIT2  */ TCP_FIN_WAIT2,
2002   /* TCP_TIME_WAIT  */ TCP_CLOSE,
2003   /* TCP_CLOSE      */ TCP_CLOSE,
2004   /* TCP_CLOSE_WAIT */ TCP_LAST_ACK  | TCP_ACTION_FIN,
2005   /* TCP_LAST_ACK   */ TCP_LAST_ACK,
2006   /* TCP_LISTEN     */ TCP_CLOSE,
2007   /* TCP_CLOSING    */ TCP_CLOSING,
2008 };
2009
2010 static int tcp_close_state(struct sock *sk)
2011 {
2012     int next = (int)new_state[sk->sk_state];
2013     int ns = next & TCP_STATE_MASK;
2014
2015     tcp_set_state(sk, ns); //跳转状态
2016
2017     return next & TCP_ACTION_FIN;
2018 }
        如果在 TCP_ESTABLISHED 状态下调用shutdown则状态会跳转到 TCP_FIN_WAIT1 ,如果是 TCP_CLOSE_WAIT 则跳转到 TCP_LAST_ACK 。这两种情况下都会调用tcp_send_fin函数发送FIN:
2545 void tcp_send_fin(struct sock *sk)
2546 {
2547     struct tcp_sock *tp = tcp_sk(sk);
2548     struct sk_buff *skb = tcp_write_queue_tail(sk);
2549     int mss_now;
2550
2551     /* Optimization, tack on the FIN if we have a queue of
2552      * unsent frames.  But be careful about outgoing SACKS
2553      * and IP options.
2554      */
2555     mss_now = tcp_current_mss(sk);
2556
2557     if (tcp_send_head(sk) != NULL) { //队列中还有尚未发送的数据
2558         TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; //将FIN标志位放在数据中
2559         TCP_SKB_CB(skb)->end_seq++; //FIN标志位占用一个序列号
2560         tp->write_seq++;
2561     } else {  //队列为空,则新建一个包
2562         /* Socket is locked, keep trying until memory is available. */
2563         for (;;) {
2564             skb = alloc_skb_fclone(MAX_TCP_HEADER,
2565                            sk->sk_allocation);
2566             if (skb)
2567                 break;
2568             yield();
2569         }
2570
2571         /* Reserve space for headers and prepare control bits. */
2572         skb_reserve(skb, MAX_TCP_HEADER);
2573         /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
2574         tcp_init_nondata_skb(skb, tp->write_seq,
2575                      TCPHDR_ACK | TCPHDR_FIN); //设置FIN|ACK标记
2576         tcp_queue_skb(sk, skb); //将包放入发送队列
2577     }
2578     __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); //发送FIN;对于队列中所有的包关闭Nagle算法再发送
2579 }        

        可见关闭连接时如果发送缓存中有数据则TCP会负责将其传送到对端,而这些数据包中的最后一个会携带FIN标记

        连接建立尚未完成时调用shutdown则会使用tcp_disconnect函数来断开连接:

2233 static inline bool tcp_need_reset(int state)
2234 {
2235     return (1 << state) &
2236            (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
2237         TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
2238 }       
2239         
2240 int tcp_disconnect(struct sock *sk, int flags)
2241 {
2242     struct inet_sock *inet = inet_sk(sk);
2243     struct inet_connection_sock *icsk = inet_csk(sk);
2244     struct tcp_sock *tp = tcp_sk(sk);
2245     int err = 0;
2246     int old_state = sk->sk_state;
2247
2248     if (old_state != TCP_CLOSE)        
2249         tcp_set_state(sk, TCP_CLOSE); //将socket移出hash表,解除bind,状态跳转到TCP_CLOSE
2250
2251     /* ABORT function of RFC793 */
2252     if (old_state == TCP_LISTEN) {     
2253         inet_csk_listen_stop(sk);  
2254     } else if (unlikely(tp->repair)) {
2255         sk->sk_err = ECONNABORTED;
2256     } else if (tcp_need_reset(old_state) ||
2257            (tp->snd_nxt != tp->write_seq && //有数据未发送完毕
2258             (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
2259         /* The last check adjusts for discrepancy of Linux wrt. RFC
2260          * states
2261          */
2262         tcp_send_active_reset(sk, gfp_any()); //发送RST包
2263         sk->sk_err = ECONNRESET;   
2264     } else if (old_state == TCP_SYN_SENT)
2265         sk->sk_err = ECONNRESET;
2266
2267     tcp_clear_xmit_timers(sk); //清除定时器
2268     __skb_queue_purge(&sk->sk_receive_queue); //清空接收队列
2269     tcp_write_queue_purge(sk);//清空发送队列
2270     __skb_queue_purge(&tp->out_of_order_queue);//清空乱序队列
2271 #ifdef CONFIG_NET_DMA    
2272     __skb_queue_purge(&sk->sk_async_wait_queue);//清空异步等待队列
2273 #endif
2274
2275     inet->inet_dport = 0;
2276
2277     if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
2278         inet_reset_saddr(sk);
2279
2280     sk->sk_shutdown = 0;
2281     sock_reset_flag(sk, SOCK_DONE);
2282     tp->srtt = 0;
2283     if ((tp->write_seq += tp->max_window + 2) == 0)
2284         tp->write_seq = 1;
2285     icsk->icsk_backoff = 0;
2286     tp->snd_cwnd = 2;
2287     icsk->icsk_probes_out = 0;
2288     tp->packets_out = 0;
2289     tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
2290     tp->snd_cwnd_cnt = 0;
2291     tp->window_clamp = 0;
2292     tcp_set_ca_state(sk, TCP_CA_Open);
2293     tcp_clear_retrans(tp);
2294     inet_csk_delack_init(sk);
2295     tcp_init_send_head(sk);
2296     memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
2297     __sk_dst_reset(sk);
2298
2299     WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
2300
2301     sk->sk_error_report(sk);
2302     return err;
2303 }
        可见tcp_disconnect函数并不会等待对端回复报文而是先行清空本端连接的资源与状态信息,并且不发送FIN,而是可能会发送RST。发送RST的条件是TCP状态机处于ESTABLISHED、CLOSE_WAIT 、FIN_WAIT1 、FIN_WAIT2 、SYN_RECV、TCPF_CLOSING、TCPF_LAST_ACK这7个状态之一时(shutdown系统调用不会满足这一条件,因为inet_shutdown函数只会在TCP_LISTEN和TCP_SYN_SENT这两个状态下调用tcp_disconnect函数),或发送队列中有未发送的数据时。

        在连接建立完成后再调用shutdown系统的话,在调用结束后,如果应用进程选择了关闭读(包括关闭读写)的模式,则在tcp_recvmsg函数中在检查sk->sk_shutdown中保存的how的值时就会返回,不读取数据:

1545 int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1546         size_t len, int nonblock, int flags, int *addr_len)
1547 {
...
1660         if (copied) {
1661             if (sk->sk_err ||
1662                 sk->sk_state == TCP_CLOSE ||
1663                 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1664                 !timeo ||
1665                 signal_pending(current))
1666                 break;
1667         } else {
1668             if (sock_flag(sk, SOCK_DONE))
1669                 break;
1670 
1671             if (sk->sk_err) {
1672                 copied = sock_error(sk);
1673                 break;
1674             }
1675 
1676             if (sk->sk_shutdown & RCV_SHUTDOWN)
1677                 break;
...
        如果应用进程选择了关闭写(包括关闭读写)的模式,则在tcp_sendmsg函数中会返回错误
1016 int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1017         size_t size)
1018 {
...
1075     if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
1076         goto out_err;
...
即不允许再发送数据,但仍然可读数据(只关闭写),当收到FIN(读到0字节)时就需要调用close系统调用释放socket。

        如果应用进程选择了关闭读写的模式,则可以直接调用close系统调用。

        应用进程也可以不使用shutdown系统调用而是仅使用close系统调用就可以完成连接的关闭(同时关闭读写)和socket资源的释放。




你可能感兴趣的:(tcp,linux内核)