关闭TCP连接可以使用shutdown系统调用:
int shutdown(int sockfd, int how);sockfd是要关闭的TCP socket的文件描述符,how是用来指定关闭方式:为SHUT_RD则关闭收包,为SHUT_WR则关闭发包,为SHUT_RDWR则都关闭。
shutdown系统调用对应的内核函数为:
1932 SYSCALL_DEFINE2(shutdown, int, fd, int, how) 1933 { 1934 int err, fput_needed; 1935 struct socket *sock; 1936 1937 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1938 if (sock != NULL) { 1939 err = security_socket_shutdown(sock, how); 1940 if (!err) 1941 err = sock->ops->shutdown(sock, how); //指向inet_shutdown函数 1942 fput_light(sock->file, fput_needed); 1943 } 1944 return err; 1945 }inet_shutdown函数:
811 int inet_shutdown(struct socket *sock, int how) 812 { 813 struct sock *sk = sock->sk; 814 int err = 0; 815 816 /* This should really check to make sure 817 * the socket is a TCP socket. (WHY AC...) 818 */ 819 how++; /* maps 0->1 has the advantage of making bit 1 rcvs and 820 1->2 bit 2 snds. 821 2->3 */ 822 if ((how & ~SHUTDOWN_MASK) || !how) /* MAXINT->0 */ 823 return -EINVAL; 824 825 lock_sock(sk); 826 if (sock->state == SS_CONNECTING) { //连接建立尚未完成 827 if ((1 << sk->sk_state) & 828 (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE)) 829 sock->state = SS_DISCONNECTING; 830 else 831 sock->state = SS_CONNECTED; 832 } 833 834 switch (sk->sk_state) { 835 case TCP_CLOSE: 836 err = -ENOTCONN; 837 /* Hack to wake up other listeners, who can poll for 838 POLLHUP, even on eg. unconnected UDP sockets -- RR */ 839 default: //TCP_ESTABLISHED状态下关闭连接会到达这里 840 sk->sk_shutdown |= how; 841 if (sk->sk_prot->shutdown) 842 sk->sk_prot->shutdown(sk, how); //指向tcp_shutdown 843 break; 844 845 /* Remaining two branches are temporary solution for missing 846 * close() in multithreaded environment. It is _not_ a good idea, 847 * but we have no choice until close() is repaired at VFS level. 848 */ 849 case TCP_LISTEN: 850 if (!(how & RCV_SHUTDOWN)) //listening socket不能发包,故非RCV_SHUTDOWN是没有意义的 851 break; 852 /* Fall through */ 853 case TCP_SYN_SENT: 854 err = sk->sk_prot->disconnect(sk, O_NONBLOCK);//指向tcp_disconnect 855 sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; 856 break; 857 } 858 859 /* Wake up anyone sleeping in poll. */ 860 sk->sk_state_change(sk); //唤醒睡眠的进程 861 release_sock(sk); 862 return err; 863 }tcp_shutdown函数用来关闭TCP连接:
2025 void tcp_shutdown(struct sock *sk, int how) 2026 { 2027 /* We need to grab some memory, and put together a FIN, 2028 * and then put it into the queue to be sent. 2029 * Tim MacKenzie([email protected]) 4 Dec '92. 2030 */ 2031 if (!(how & SEND_SHUTDOWN)) //不是SEND_SHUTDOWN则不需要发送FIN 2032 return; 2033 2034 /* If we've already sent a FIN, or it's a closed state, skip this. */ 2035 if ((1 << sk->sk_state) & 2036 (TCPF_ESTABLISHED | TCPF_SYN_SENT | 2037 TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) { 2038 /* Clear out any half completed packets. FIN if needed. */ 2039 if (tcp_close_state(sk)) //可以发送FIN 2040 tcp_send_fin(sk);//发送FIN 2041 } 2042 }tcp_close_state函数进行状态跳转和是否发送FIN的检查 :
1994 static const unsigned char new_state[16] = { 1995 /* current state: new state: action: */ 1996 /* (Invalid) */ TCP_CLOSE, 1997 /* TCP_ESTABLISHED */ TCP_FIN_WAIT1 | TCP_ACTION_FIN, 1998 /* TCP_SYN_SENT */ TCP_CLOSE, 1999 /* TCP_SYN_RECV */ TCP_FIN_WAIT1 | TCP_ACTION_FIN, 2000 /* TCP_FIN_WAIT1 */ TCP_FIN_WAIT1, 2001 /* TCP_FIN_WAIT2 */ TCP_FIN_WAIT2, 2002 /* TCP_TIME_WAIT */ TCP_CLOSE, 2003 /* TCP_CLOSE */ TCP_CLOSE, 2004 /* TCP_CLOSE_WAIT */ TCP_LAST_ACK | TCP_ACTION_FIN, 2005 /* TCP_LAST_ACK */ TCP_LAST_ACK, 2006 /* TCP_LISTEN */ TCP_CLOSE, 2007 /* TCP_CLOSING */ TCP_CLOSING, 2008 }; 2009 2010 static int tcp_close_state(struct sock *sk) 2011 { 2012 int next = (int)new_state[sk->sk_state]; 2013 int ns = next & TCP_STATE_MASK; 2014 2015 tcp_set_state(sk, ns); //跳转状态 2016 2017 return next & TCP_ACTION_FIN; 2018 }如果在 TCP_ESTABLISHED 状态下调用shutdown则状态会跳转到 TCP_FIN_WAIT1 ,如果是 TCP_CLOSE_WAIT 则跳转到 TCP_LAST_ACK 。这两种情况下都会调用tcp_send_fin函数发送FIN:
2545 void tcp_send_fin(struct sock *sk) 2546 { 2547 struct tcp_sock *tp = tcp_sk(sk); 2548 struct sk_buff *skb = tcp_write_queue_tail(sk); 2549 int mss_now; 2550 2551 /* Optimization, tack on the FIN if we have a queue of 2552 * unsent frames. But be careful about outgoing SACKS 2553 * and IP options. 2554 */ 2555 mss_now = tcp_current_mss(sk); 2556 2557 if (tcp_send_head(sk) != NULL) { //队列中还有尚未发送的数据 2558 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; //将FIN标志位放在数据中 2559 TCP_SKB_CB(skb)->end_seq++; //FIN标志位占用一个序列号 2560 tp->write_seq++; 2561 } else { //队列为空,则新建一个包 2562 /* Socket is locked, keep trying until memory is available. */ 2563 for (;;) { 2564 skb = alloc_skb_fclone(MAX_TCP_HEADER, 2565 sk->sk_allocation); 2566 if (skb) 2567 break; 2568 yield(); 2569 } 2570 2571 /* Reserve space for headers and prepare control bits. */ 2572 skb_reserve(skb, MAX_TCP_HEADER); 2573 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ 2574 tcp_init_nondata_skb(skb, tp->write_seq, 2575 TCPHDR_ACK | TCPHDR_FIN); //设置FIN|ACK标记 2576 tcp_queue_skb(sk, skb); //将包放入发送队列 2577 } 2578 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); //发送FIN;对于队列中所有的包关闭Nagle算法再发送 2579 }
可见关闭连接时如果发送缓存中有数据则TCP会负责将其传送到对端,而这些数据包中的最后一个会携带FIN标记。
连接建立尚未完成时调用shutdown则会使用tcp_disconnect函数来断开连接:
2233 static inline bool tcp_need_reset(int state) 2234 { 2235 return (1 << state) & 2236 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 | 2237 TCPF_FIN_WAIT2 | TCPF_SYN_RECV); 2238 } 2239 2240 int tcp_disconnect(struct sock *sk, int flags) 2241 { 2242 struct inet_sock *inet = inet_sk(sk); 2243 struct inet_connection_sock *icsk = inet_csk(sk); 2244 struct tcp_sock *tp = tcp_sk(sk); 2245 int err = 0; 2246 int old_state = sk->sk_state; 2247 2248 if (old_state != TCP_CLOSE) 2249 tcp_set_state(sk, TCP_CLOSE); //将socket移出hash表,解除bind,状态跳转到TCP_CLOSE 2250 2251 /* ABORT function of RFC793 */ 2252 if (old_state == TCP_LISTEN) { 2253 inet_csk_listen_stop(sk); 2254 } else if (unlikely(tp->repair)) { 2255 sk->sk_err = ECONNABORTED; 2256 } else if (tcp_need_reset(old_state) || 2257 (tp->snd_nxt != tp->write_seq && //有数据未发送完毕 2258 (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { 2259 /* The last check adjusts for discrepancy of Linux wrt. RFC 2260 * states 2261 */ 2262 tcp_send_active_reset(sk, gfp_any()); //发送RST包 2263 sk->sk_err = ECONNRESET; 2264 } else if (old_state == TCP_SYN_SENT) 2265 sk->sk_err = ECONNRESET; 2266 2267 tcp_clear_xmit_timers(sk); //清除定时器 2268 __skb_queue_purge(&sk->sk_receive_queue); //清空接收队列 2269 tcp_write_queue_purge(sk);//清空发送队列 2270 __skb_queue_purge(&tp->out_of_order_queue);//清空乱序队列 2271 #ifdef CONFIG_NET_DMA 2272 __skb_queue_purge(&sk->sk_async_wait_queue);//清空异步等待队列 2273 #endif 2274 2275 inet->inet_dport = 0; 2276 2277 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) 2278 inet_reset_saddr(sk); 2279 2280 sk->sk_shutdown = 0; 2281 sock_reset_flag(sk, SOCK_DONE); 2282 tp->srtt = 0; 2283 if ((tp->write_seq += tp->max_window + 2) == 0) 2284 tp->write_seq = 1; 2285 icsk->icsk_backoff = 0; 2286 tp->snd_cwnd = 2; 2287 icsk->icsk_probes_out = 0; 2288 tp->packets_out = 0; 2289 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 2290 tp->snd_cwnd_cnt = 0; 2291 tp->window_clamp = 0; 2292 tcp_set_ca_state(sk, TCP_CA_Open); 2293 tcp_clear_retrans(tp); 2294 inet_csk_delack_init(sk); 2295 tcp_init_send_head(sk); 2296 memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); 2297 __sk_dst_reset(sk); 2298 2299 WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); 2300 2301 sk->sk_error_report(sk); 2302 return err; 2303 }可见tcp_disconnect函数并不会等待对端回复报文而是先行清空本端连接的资源与状态信息,并且不发送FIN,而是可能会发送RST。发送RST的条件是TCP状态机处于ESTABLISHED、CLOSE_WAIT 、FIN_WAIT1 、FIN_WAIT2 、SYN_RECV、TCPF_CLOSING、TCPF_LAST_ACK这7个状态之一时(shutdown系统调用不会满足这一条件,因为inet_shutdown函数只会在TCP_LISTEN和TCP_SYN_SENT这两个状态下调用tcp_disconnect函数),或发送队列中有未发送的数据时。
在连接建立完成后再调用shutdown系统的话,在调用结束后,如果应用进程选择了关闭读(包括关闭读写)的模式,则在tcp_recvmsg函数中在检查sk->sk_shutdown中保存的how的值时就会返回,不读取数据:
1545 int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 1546 size_t len, int nonblock, int flags, int *addr_len) 1547 { ... 1660 if (copied) { 1661 if (sk->sk_err || 1662 sk->sk_state == TCP_CLOSE || 1663 (sk->sk_shutdown & RCV_SHUTDOWN) || 1664 !timeo || 1665 signal_pending(current)) 1666 break; 1667 } else { 1668 if (sock_flag(sk, SOCK_DONE)) 1669 break; 1670 1671 if (sk->sk_err) { 1672 copied = sock_error(sk); 1673 break; 1674 } 1675 1676 if (sk->sk_shutdown & RCV_SHUTDOWN) 1677 break; ...如果应用进程选择了关闭写(包括关闭读写)的模式,则在tcp_sendmsg函数中会返回错误:
1016 int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 1017 size_t size) 1018 { ... 1075 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 1076 goto out_err; ...即不允许再发送数据,但仍然可读数据(只关闭写),当收到FIN(读到0字节)时就需要调用close系统调用释放socket。
如果应用进程选择了关闭读写的模式,则可以直接调用close系统调用。
应用进程也可以不使用shutdown系统调用而是仅使用close系统调用就可以完成连接的关闭(同时关闭读写)和socket资源的释放。