TFO(TCP Fast Open)是一种能够在TCP连接建立阶段传输数据的机制。使用这种机制可以将数据交互提前,降低应用层事务的延迟。其基本步骤如下:
1、客户端发送一个SYN包到服务器,这个包中携带了Fast Open Cookie请求的TCP选项;
2、服务器生成一个cookie,这个cookie是通过使用密钥加密客户端的IP地址生成的。服务器给客户端发送SYN|ACK响应,在响应包的选项中包含了这个cookie;
3、客户端存储这个cookie以便将来再次与这个服务器的IP建立TFO连接时使用;
也就是说,第一次TCP连接只是交换cookie信息,无法在SYN包中携带数据。在完成上述步骤后,接下来的TCP连接就可以在SYN中携带数据了。流程如下:
1、客户端发送一个携带应用数据和以TCP选项方式存储的Fast Open cookie的SYN包;
2、服务器验证这个cookie,如果合法,服务器发送一个SYN|ACK确认SYN和数据,然后数据被传递到应用进程;如果不合法,服务器丢弃数据,发送一个SYN|ACK只确认SYN,接下来走三次握手的普通流程;
3、如果接收了SYN包中的数据,服务器在接收到客户端的第一个ACK前可以发送其它响应数据;
4、客户端发送ACK确认了服务器的SYN;如果客户端的数据没有被确认,数据会在ACK包中重传;
5、下面的流程与普通的TCP交互流程无异。
客户端使用TFO的方法:内核功能选项sysctl -w net.ipv4.tcp_fastopen=1;客户端代码:
int sockfd, n; char recvline[4096], sendline[4096]; struct sockaddr_in servaddr; char buf[20] = {"aaabbbccc"}; int ret = 0; if ((sockfd = socket(AF_INET, SOCK_STREAM, 0)) < 0) { printf ("create socket error: %s(errno: %d)\n", strerror (errno), errno); exit (0); } memset (&servaddr, 0, sizeof (servaddr)); servaddr.sin_family = AF_INET; servaddr.sin_port = htons (6666); servaddr.sin_addr.s_addr = inet_addr("127.0.0.1"); ret = sendto(sockfd, buf, strlen(buf), MSG_FASTOPEN, (struct sockaddr *)&servaddr, sizeof(servaddr)); if (ret < 0) { printf ("send msg error: %s(errno: %d)\n", strerror (errno), errno); } close (sockfd);即客户端在发送数据时,生成socket后直接使用sendto发送数据,不用connect系统调用。第一次交互时只是向服务器申请一个TFO cookie,数据并不在连接建立过程中送达;TFO cookie交互完成后,以后客户端每次用同样方式发送数据时都会在SYN包中携带数据。
服务器端开启TFO功能的方法:内核功能选项sysctl -w net.ipv4.tcp_fastopen=2;服务器端代码:
int listenfd, connfd, fd; struct sockaddr_in servaddr; char buff[MAXLINE]; int n; if ((listenfd = socket (AF_INET, SOCK_STREAM, 0)) == -1) { printf ("create socket error: %s(errno: %d)\n", strerror (errno), errno); exit (0); } memset (&servaddr, 0, sizeof (servaddr)); servaddr.sin_family = AF_INET; servaddr.sin_addr.s_addr = inet_addr("127.0.0.1"); servaddr.sin_port = htons (6666); if (bind (listenfd, (struct sockaddr *) &servaddr, sizeof (servaddr)) == -1) { printf ("bind socket error: %s(errno: %d)\n", strerror (errno), errno); exit (0); } if (listen (listenfd, 10) == -1) { printf ("listen socket error: %s(errno: %d)\n", strerror (errno), errno); exit (0); } int qlen = 5; ret = setsockopt(listenfd, 6, TCP_FASTOPEN, &qlen, sizeof(qlen)); if (ret < 0) { printf ("setsockopt error: %s(errno: %d)\n", strerror (errno), errno); } while (1) { printf("Before accpet!\n"); if ((connfd = accept (listenfd, (struct sockaddr *) NULL, NULL)) == -1) ...如果想要同时开启客户端和服务端的TFO功能,可以用“ sysctl -w net.ipv4.tcp_fastopen=3”。
TFO功能在Linux 2.6.34内核中开始集成。
下面通过分析内核代码来了解TFO的运行机制。开启TFO功能后,server端进程在调用listen系统调用时会初始化TFO队列:
195 int inet_listen(struct socket *sock, int backlog) 196 { 197 struct sock *sk = sock->sk; 198 unsigned char old_state; 199 int err; ... 214 if (old_state != TCP_LISTEN) { ... 222 if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 && 223 inet_csk(sk)->icsk_accept_queue.fastopenq == NULL) { 224 if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0) 225 err = fastopen_init_queue(sk, backlog); 226 else if ((sysctl_tcp_fastopen & 227 TFO_SERVER_WO_SOCKOPT2) != 0) 228 err = fastopen_init_queue(sk, 229 ((uint)sysctl_tcp_fastopen) >> 16); 230 else 231 err = 0; 232 if (err) 233 goto out; 234 } 235 err = inet_csk_listen_start(sk, backlog); ...fastopen_init_queue函数:
373 static inline int fastopen_init_queue(struct sock *sk, int backlog) 374 { 375 struct request_sock_queue *queue = 376 &inet_csk(sk)->icsk_accept_queue; 377 378 if (queue->fastopenq == NULL) { 379 queue->fastopenq = kzalloc( 380 sizeof(struct fastopen_queue), 381 sk->sk_allocation); 382 if (queue->fastopenq == NULL) 383 return -ENOMEM; 384 385 sk->sk_destruct = tcp_sock_destruct; 386 spin_lock_init(&queue->fastopenq->lock); 387 } 388 queue->fastopenq->max_qlen = backlog; 389 return 0; 390 }如果 net.ipv4.tcp_fastopen && (TFO_SERVER_WO_SOCKOPT1|TFO_SERVER_WO_SOCKOPT2)为假,则TFO队列不会被初始化。但setsockopt函数也可以初始化TFO队列:
2371 static int do_tcp_setsockopt(struct sock *sk, int level, 2372 int optname, char __user *optval, unsigned int optlen) 2373 { 2374 struct tcp_sock *tp = tcp_sk(sk); 2375 struct inet_connection_sock *icsk = inet_csk(sk); 2376 int val; 2377 int err = 0; ... 2621 case TCP_FASTOPEN: 2622 if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | 2623 TCPF_LISTEN))) 2624 err = fastopen_init_queue(sk, val); 2625 else 2626 err = -EINVAL; 2627 break; ...如果inet_csk(sk)->icsk_accept_queue.fastopenq为NULL的话意味着TFO功能未开启。
轮到client端出场了!client端的sendto系统调用在内核中对应的TCP函数是tcp_sendmsg:
1016 int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 1017 size_t size) 1018 { 1019 struct iovec *iov; 1020 struct tcp_sock *tp = tcp_sk(sk); 1021 struct sk_buff *skb; 1022 int iovlen, flags, err, copied = 0; 1023 int mss_now = 0, size_goal, copied_syn = 0, offset = 0; 1024 bool sg; 1025 long timeo; 1026 1027 lock_sock(sk); 1028 1029 flags = msg->msg_flags; 1030 if (flags & MSG_FASTOPEN) {//要使用TFO功能 1031 err = tcp_sendmsg_fastopen(sk, msg, &copied_syn);//发送TFO数据 1032 if (err == -EINPROGRESS && copied_syn > 0) 1033 goto out; 1034 else if (err) 1035 goto out_err; 1036 offset = copied_syn; 1037 }tcp_sendmsg_fastopen函数用于发送带TFO请求的SYN或携带数据的SYN:
992 static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size) 993 { 994 struct tcp_sock *tp = tcp_sk(sk); 995 int err, flags; 996 997 if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE)) 998 return -EOPNOTSUPP; 999 if (tp->fastopen_req != NULL) 1000 return -EALREADY; /* Another Fast Open is in progress */ 1001 1002 tp->fastopen_req = kzalloc(sizeof(struct tcp_fastopen_request), 1003 sk->sk_allocation); 1004 if (unlikely(tp->fastopen_req == NULL)) 1005 return -ENOBUFS; 1006 tp->fastopen_req->data = msg; 1007 1008 flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; 1009 err = __inet_stream_connect(sk->sk_socket, msg->msg_name, 1010 msg->msg_namelen, flags); //发送连接请求 1011 *size = tp->fastopen_req->copied; //记录发送了多少数据,如果发送的是TFO请求则*size为0 1012 tcp_free_fastopen_req(tp); 1013 return err; 1014 }__inet_stream_connect函数会调用tcp_connect函数发送SYN:
2925 int tcp_connect(struct sock *sk) 2926 { 2927 struct tcp_sock *tp = tcp_sk(sk); 2928 struct sk_buff *buff; 2929 int err; ... 2950 /* Send off SYN; include data in Fast Open. */ 2951 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : 2952 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); //如果使用TFO,则会调用tcp_send_syn_data发送SYNtcp_send_syn_data函数:
2842 static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) 2843 { 2844 struct tcp_sock *tp = tcp_sk(sk); 2845 struct tcp_fastopen_request *fo = tp->fastopen_req; 2846 int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen; 2847 struct sk_buff *syn_data = NULL, *data; 2848 unsigned long last_syn_loss = 0; 2849 2850 tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */ 2851 tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie, 2852 &syn_loss, &last_syn_loss);//查询缓存的TFO cookie信息 2853 /* Recurring FO SYN losses: revert to regular handshake temporarily */ 2854 if (syn_loss > 1 && 2855 time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) { 2856 fo->cookie.len = -1; 2857 goto fallback; 2858 } 2859 2860 if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE)//无论有没有cookie,都发送携带数据的SYN 2861 fo->cookie.len = -1; 2862 else if (fo->cookie.len <= 0) //没有cookie,发送携带TFO请求选项的SYN 2863 goto fallback; 2864 2865 /* MSS for SYN-data is based on cached MSS and bounded by PMTU and 2866 * user-MSS. Reserve maximum option space for middleboxes that add 2867 * private TCP options. The cost is reduced data space in SYN :( 2868 */ 2869 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp) 2870 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss; 2871 space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - 2872 MAX_TCP_OPTION_SPACE;//计算SYN包中的能够携带的数据的最大大小 2873 2874 syn_data = skb_copy_expand(syn, skb_headroom(syn), space, 2875 sk->sk_allocation);//复制SYN包中的内容,并扩展SKB中的空间 2876 if (syn_data == NULL) 2877 goto fallback; 2878 2879 for (i = 0; i < iovlen && syn_data->len < space; ++i) {//将用户态中缓存的数据copy到内核 2880 struct iovec *iov = &fo->data->msg_iov[i]; 2881 unsigned char __user *from = iov->iov_base; 2882 int len = iov->iov_len; 2883 2884 if (syn_data->len + len > space)//数据总长度大于SKB中空间的总大小 2885 len = space - syn_data->len; 2886 else if (i + 1 == iovlen) 2887 /* No more data pending in inet_wait_for_connect() */ 2888 fo->data = NULL;//数据全部发送完毕,不需要在inet_wait_for_connect中等待时发送 2889 2890 if (skb_add_data(syn_data, from, len))//将用户数据copy到SKB中 2891 goto fallback; 2892 } 2893 2894 /* Queue a data-only packet after the regular SYN for retransmission */ 2895 data = pskb_copy(syn_data, sk->sk_allocation); 2896 if (data == NULL) 2897 goto fallback; 2898 TCP_SKB_CB(data)->seq++; 2899 TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN; 2900 TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH); 2901 tcp_connect_queue_skb(sk, data); 2902 fo->copied = data->len; 2903 2904 if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {//发送携带数据的SYN 2905 tp->syn_data = (fo->copied > 0); 2906 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); 2907 goto done; 2908 } 2909 syn_data = NULL; 2910 2911 fallback: 2912 /* Send a regular SYN with Fast Open cookie request option */ 2913 if (fo->cookie.len > 0) 2914 fo->cookie.len = 0; 2915 err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation); 2916 if (err) 2917 tp->syn_fastopen = 0; 2918 kfree_skb(syn_data); 2919 done: 2920 fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */ 2921 return err; 2922 }
如果client是发送TFO请求,则tcp_send_syn_data函数会发送一个不带数据的SYN包,数据部分则会由tcp_sendmsg函数放入发送队列中,等待三次握手完成后再发送。
tcp_transmit_skb函数会调用tcp_syn_options函数构建选项信息,tcp_options_write函数负责将选项写入TCP报头中:
498 static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, 499 struct tcp_out_options *opts, 500 struct tcp_md5sig_key **md5) 501 { 502 struct tcp_sock *tp = tcp_sk(sk); 503 unsigned int remaining = MAX_TCP_OPTION_SPACE; 504 struct tcp_fastopen_request *fastopen = tp->fastopen_req; ... 545 if (fastopen && fastopen->cookie.len >= 0) { 546 u32 need = TCPOLEN_EXP_FASTOPEN_BASE + fastopen->cookie.len; 547 need = (need + 3) & ~3U; /* Align to 32 bits */ 548 if (remaining >= need) { 549 opts->options |= OPTION_FAST_OPEN_COOKIE; 550 opts->fastopen_cookie = &fastopen->cookie; 551 remaining -= need; 552 tp->syn_fastopen = 1; 553 } 554 }
409 static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, 410 struct tcp_out_options *opts) 411 { 412 u16 options = opts->options; /* mungable copy */ ... 479 if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) { 480 struct tcp_fastopen_cookie *foc = opts->fastopen_cookie; 481 482 *ptr++ = htonl((TCPOPT_EXP << 24) | 483 ((TCPOLEN_EXP_FASTOPEN_BASE + foc->len) << 16) | 484 TCPOPT_FASTOPEN_MAGIC); 485 486 memcpy(ptr, foc->val, foc->len); //如果找到了TFO cookie,则写入;没有RFO cookie则仅仅是一个TFO请求 487 if ((foc->len & 3) == 2) { 488 u8 *align = ((u8 *)ptr) + foc->len; 489 align[0] = align[1] = TCPOPT_NOP; 490 } 491 ptr += (foc->len + 3) >> 2; 492 }
client端在每次使用TFO功能时都会在TCP的选项中添加一个TFO选项,与server端进行第一次TFO交互时TFO选项只有4字节长,其值是一个“MAGIC”,这种TFO被称为“TFO请求”;后续的TFO选项长度会增加一个从服务器端获得的TFO cookie的长度值,并且在这个SYN中会携带数据。
server收到SYN后,会在tcp_v4_conn_request中进行处理:
1465 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 1466 { 1467 struct tcp_options_received tmp_opt; 1468 struct request_sock *req; 1469 struct inet_request_sock *ireq; 1470 struct tcp_sock *tp = tcp_sk(sk); 1471 struct dst_entry *dst = NULL; 1472 __be32 saddr = ip_hdr(skb)->saddr; 1473 __be32 daddr = ip_hdr(skb)->daddr; 1474 __u32 isn = TCP_SKB_CB(skb)->when; 1475 bool want_cookie = false; 1476 struct flowi4 fl4; 1477 struct tcp_fastopen_cookie foc = { .len = -1 }; 1478 struct tcp_fastopen_cookie valid_foc = { .len = -1 }; 1479 struct sk_buff *skb_synack; 1480 int do_fastopen; ... 1517 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);//解析TFO选项 ... 1585 do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);//检查TFO选项的合法性 ... 1598 skb_synack = tcp_make_synack(sk, dst, req, 1599 fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);//如果客户端发送的是TFO请求则发送TFO cookie,否则不发送 ... 1607 if (likely(!do_fastopen)) { ... 1622 } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req))//创建子sock,将SYN中的数据放入socekt中的接收队列中 1623 goto drop_and_free; 1624 1625 return 0;tcp_fastopen_check函数用于检查SYN中TFO请求的合法性以及生成TFO cookie:
1288 static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, 1289 struct request_sock *req, 1290 struct tcp_fastopen_cookie *foc, 1291 struct tcp_fastopen_cookie *valid_foc) 1292 { 1293 bool skip_cookie = false; 1294 struct fastopen_queue *fastopenq; 1295 1296 if (likely(!fastopen_cookie_present(foc))) {//SYN中没有携带TFO选项 1297 /* See include/net/tcp.h for the meaning of these knobs */ 1298 if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) || 1299 ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) && 1300 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1))) 1301 skip_cookie = true; /* no cookie to validate */ //无需校验cookie,直接允许SYN中携带数据 1302 else 1303 return false; 1304 } 1305 fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq; ... 1319 if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 || 1320 fastopenq == NULL || fastopenq->max_qlen == 0)//未开启Server端TFO功能 1321 return false; 1322 1323 if (fastopenq->qlen >= fastopenq->max_qlen) {//TFO队列已满 1324 struct request_sock *req1; 1325 spin_lock(&fastopenq->lock); 1326 req1 = fastopenq->rskq_rst_head; 1327 if ((req1 == NULL) || time_after(req1->expires, jiffies)) { 1328 spin_unlock(&fastopenq->lock); 1329 NET_INC_STATS_BH(sock_net(sk), 1330 LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); 1331 /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/ 1332 foc->len = -1; 1333 return false; 1334 } 1335 fastopenq->rskq_rst_head = req1->dl_next;//替换队列中最老的一个 1336 fastopenq->qlen--; 1337 spin_unlock(&fastopenq->lock); 1338 reqsk_free(req1); 1339 } 1340 if (skip_cookie) {//不使用cookie,直接接收数据 1341 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 1342 return true; 1343 } 1344 if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {//SYN中携带了TFO cookie 1345 if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) { 1346 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);//生成TFO cookie 1347 if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) || //TFO初始化不成功 1348 memcmp(&foc->val[0], &valid_foc->val[0], //TFO cookie不合法 1349 TCP_FASTOPEN_COOKIE_SIZE) != 0) 1350 return false; 1351 valid_foc->len = -1; 1352 } 1353 /* Acknowledge the data received from the peer. */ 1354 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 1355 return true; 1356 } else if (foc->len == 0) { /* Client requesting a cookie */ 1357 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);//生成一个TFO cookie保存在valid_foc中 1358 NET_INC_STATS_BH(sock_net(sk), 1359 LINUX_MIB_TCPFASTOPENCOOKIEREQD); 1360 } else { 1361 /* Client sent a cookie with wrong size. Treat it 1362 * the same as invalid and return a valid one. 1363 */ 1364 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc); 1365 } 1366 return false; 1367 }1327:rskq_rst_head为NULL的场景为有很多带TFO的SYN到来但SYN|ACK发送后并没有收到RST包,这意味着之前收到的那些带数据的TFO SYN可能是合法的;如果不为NULL但对立中最老的一个仍然没有超时的话,也不能将其替换
1344-1351:如果clienet端的TFO不是请求,而是cookie,则不设置valid_foc;另外如果server端被设置为不检查cookie的合法性,则生成一个cookie再检查SYN中的TFO cookie的合法性,如果不合法则不使用TFO功能。
tcp_make_synack函数会将tcp_fastopen_check中生成的TFO cookie写入TCP首部中,tcp_synack_options函数用来构建SYN|ACK报文的选项信息:
560 static unsigned int tcp_synack_options(struct sock *sk, 561 struct request_sock *req, 562 unsigned int mss, struct sk_buff *skb, 563 struct tcp_out_options *opts, 564 struct tcp_md5sig_key **md5, 565 struct tcp_fastopen_cookie *foc) 566 { ... 607 if (foc != NULL) { 608 u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; 609 need = (need + 3) & ~3U; /* Align to 32 bits */ 610 if (remaining >= need) { 611 opts->options |= OPTION_FAST_OPEN_COOKIE; 612 opts->fastopen_cookie = foc; 613 remaining -= need; 614 } 615 } ...将选项信息写入SYN|ACK的方法与client发送SYN时一样,都是调用tcp_options_write函数。可以看出,TCP server端会返回给发送TFO请求的client端一个TFO cookie。client发送的下一个带数据的SYN必须携带这个cookie,而TCP server对这样的SYN回复的SYN|ACK中不会携带TFO选项。
在SYN携带TFO cookie的情况下TCP server会在收到SYN时就创建sock,这个功能由cp_v4_conn_req_fastopen函数完成:
1369 static int tcp_v4_conn_req_fastopen(struct sock *sk, 1370 struct sk_buff *skb, 1371 struct sk_buff *skb_synack, 1372 struct request_sock *req) 1373 { 1374 struct tcp_sock *tp = tcp_sk(sk); 1375 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; 1376 const struct inet_request_sock *ireq = inet_rsk(req); 1377 struct sock *child; ... 1383 1384 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);//生成子socket,其状态为TCP_SYN_RECV ... 1391 err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr, 1392 ireq->rmt_addr, ireq->opt);//构建SYN|ACK的IP头并将其发送出去 1393 err = net_xmit_eval(err); 1394 if (!err) 1395 tcp_rsk(req)->snt_synack = tcp_time_stamp; 1396 /* XXX (TFO) - is it ok to ignore error and continue? */ 1397 1398 spin_lock(&queue->fastopenq->lock); 1399 queue->fastopenq->qlen++;//将这个连接计入TFO queue 1400 spin_unlock(&queue->fastopenq->lock); ... 1406 tp = tcp_sk(child); 1407 1408 tp->fastopen_rsk = req; 1409 /* Do a hold on the listner sk so that if the listener is being 1410 * closed, the child that has been accepted can live on and still 1411 * access listen_lock. 1412 */ 1413 sock_hold(sk); 1414 tcp_rsk(req)->listener = sk; 1415 1416 /* RFC1323: The window in SYN & SYN/ACK segments is never 1417 * scaled. So correct it appropriately. 1418 */ 1419 tp->snd_wnd = ntohs(tcp_hdr(skb)->window); 1420 1421 /* Activate the retrans timer so that SYNACK can be retransmitted. 1422 * The request socket is not added to the SYN table of the parent 1423 * because it's been added to the accept queue directly. 1424 */ 1425 inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, 1426 TCP_TIMEOUT_INIT, TCP_RTO_MAX); 1427 1428 /* Add the child socket directly into the accept queue */ 1429 inet_csk_reqsk_queue_add(sk, req, child); 1430 1431 /* Now finish processing the fastopen child socket. */ 1432 inet_csk(child)->icsk_af_ops->rebuild_header(child); 1433 tcp_init_congestion_control(child); 1434 tcp_mtup_init(child); 1435 tcp_init_buffer_space(child); 1436 tcp_init_metrics(child); 1437 1438 /* Queue the data carried in the SYN packet. We need to first 1439 * bump skb's refcnt because the caller will attempt to free it. 1440 * 1441 * XXX (TFO) - we honor a zero-payload TFO request for now. 1442 * (Any reason not to?) 1443 */ 1444 if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {//SYN包中没有数据 1445 /* Don't queue the skb if there is no payload in SYN. 1446 * XXX (TFO) - How about SYN+FIN? 1447 */ 1448 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 1449 } else { 1450 skb = skb_get(skb); 1451 skb_dst_drop(skb); 1452 __skb_pull(skb, tcp_hdr(skb)->doff * 4); 1453 skb_set_owner_r(skb, child); 1454 __skb_queue_tail(&child->sk_receive_queue, skb);//将数据放入child的接收队列中 1455 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 1456 tp->syn_data_acked = 1; 1457 } 1458 sk->sk_data_ready(sk, 0);//通知持有listening socket的进程调用accept系统调用创建新连接 1459 bh_unlock_sock(child); 1460 sock_put(child); 1461 WARN_ON(req->sk == NULL); 1462 return 0; 1463 }应用进程收到listening socket的可读通告后,使用accept系统调用建立socket,就可以立即从这个新的socket中读到数据,并开始与客户端进行数据交互。
5373 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, 5374 const struct tcphdr *th, unsigned int len) 5375 { 5376 struct inet_connection_sock *icsk = inet_csk(sk); 5377 struct tcp_sock *tp = tcp_sk(sk); 5378 struct tcp_fastopen_cookie foc = { .len = -1 }; 5379 int saved_clamp = tp->rx_opt.mss_clamp; 5380 5381 tcp_parse_options(skb, &tp->rx_opt, 0, &foc);//解析TFO选项 ... 5482 if ((tp->syn_fastopen || tp->syn_data) && //如果发送过TFO选项或在SYN中发送过数据 5483 tcp_rcv_fastopen_synack(sk, skb, &foc))//记录SYN|ACK中的FTO cookie 5484 return -1;tcp_rcv_fastopen_synack函数检查并保存server端发送的TFO cookie:
5331 static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, 5332 struct tcp_fastopen_cookie *cookie) 5333 { 5334 struct tcp_sock *tp = tcp_sk(sk); 5335 struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL; 5336 u16 mss = tp->rx_opt.mss_clamp; 5337 bool syn_drop; 5338 5339 if (mss == tp->rx_opt.user_mss) { 5340 struct tcp_options_received opt; 5341 5342 /* Get original SYNACK MSS value if user MSS sets mss_clamp */ 5343 tcp_clear_options(&opt); 5344 opt.user_mss = opt.mss_clamp = 0; 5345 tcp_parse_options(synack, &opt, 0, NULL); 5346 mss = opt.mss_clamp; 5347 } 5348 5349 if (!tp->syn_fastopen) /* Ignore an unsolicited cookie */ 5350 cookie->len = -1;//如果客户端没有发送TFO请求但服务器给出了TFO cookie,忽略之 5351 5352 /* The SYN-ACK neither has cookie nor acknowledges the data. Presumably 5353 * the remote receives only the retransmitted (regular) SYNs: either 5354 * the original SYN-data or the corresponding SYN-ACK is lost. 5355 */ 5356 syn_drop = (cookie->len <= 0 && data && tp->total_retrans); //客户端认为发生了SYN丢失事件 5357 5358 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop);//存储SYN|ACK包中的TFO cookie,并记录发现SYN丢失事件的时间 5359 5360 if (data) { /* Retransmit unacked data in SYN */ 5361 tcp_for_write_queue_from(data, sk) { 5362 if (data == tcp_send_head(sk) || 5363 __tcp_retransmit_skb(sk, data)) 5364 break; 5365 } 5366 tcp_rearm_rto(sk); 5367 return true; 5368 } 5369 tp->syn_data_acked = tp->syn_data; 5370 return false; 5371 }在保存了TFO cookie后,client在向相同IP地址的server发送SYN时都可以携带数据(这时必须发送TFO cookie)。client在收到SYN|ACK后需要回复ACK报文,服务器端在接收ACK时对TFO的处理如下:
5600 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, 5601 const struct tcphdr *th, unsigned int len) 5602 { 5603 struct tcp_sock *tp = tcp_sk(sk); 5604 struct inet_connection_sock *icsk = inet_csk(sk); 5605 struct request_sock *req; ... 5661 req = tp->fastopen_rsk;//找到在SYN请求到来后创建子socket时使用的request sock 5662 if (req != NULL) { 5663 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && 5664 sk->sk_state != TCP_FIN_WAIT1); 5665 5666 if (tcp_check_req(sk, skb, req, NULL, true) == NULL)//检查包的合法性 5667 goto discard; 5668 } ... 5681 switch (sk->sk_state) { 5682 case TCP_SYN_RECV: 5683 if (acceptable) { 5684 /* Once we leave TCP_SYN_RECV, we no longer 5685 * need req so release it. 5686 */ 5687 if (req) {//使用了TFO cookie 5688 tcp_synack_rtt_meas(sk, req); 5689 tp->total_retrans = req->num_retrans; 5690 5691 reqsk_fastopen_remove(sk, req, false);//将request sock从TFO queue中删除,TFO流程全部结束 5692 } else { ...综上, TFO 在收到SYN的时候就创建socket并将数据提交给应用进程,这样就比普通模式节省了SYN|ACK与ACK的交互时间,减小了通信延迟 。