8.6 TCP Fast Open(TFO)

  TFO(TCP Fast Open)是一种能够在TCP连接建立阶段传输数据的机制。使用这种机制可以将数据交互提前,降低应用层事务的延迟。其基本步骤如下:

1、客户端发送一个SYN包到服务器,这个包中携带了Fast Open Cookie请求的TCP选项;

2、服务器生成一个cookie,这个cookie是通过使用密钥加密客户端的IP地址生成的。服务器给客户端发送SYN|ACK响应,在响应包的选项中包含了这个cookie;

3、客户端存储这个cookie以便将来再次与这个服务器的IP建立TFO连接时使用;

  也就是说,第一次TCP连接只是交换cookie信息,无法在SYN包中携带数据。在完成上述步骤后,接下来的TCP连接就可以在SYN中携带数据了。流程如下:

1、客户端发送一个携带应用数据和以TCP选项方式存储的Fast Open cookie的SYN包;

2、服务器验证这个cookie,如果合法,服务器发送一个SYN|ACK确认SYN和数据,然后数据被传递到应用进程;如果不合法,服务器丢弃数据,发送一个SYN|ACK只确认SYN,接下来走三次握手的普通流程;

3、如果接收了SYN包中的数据,服务器在接收到客户端的第一个ACK前可以发送其它响应数据;

4、客户端发送ACK确认了服务器的SYN;如果客户端的数据没有被确认,数据会在ACK包中重传;

5、下面的流程与普通的TCP交互流程无异。

  客户端使用TFO的方法:内核功能选项sysctl -w net.ipv4.tcp_fastopen=1;客户端代码:

    int sockfd, n;
    char recvline[4096], sendline[4096];
    struct sockaddr_in servaddr;
    char buf[20] = {"aaabbbccc"};
    int ret = 0;

    if ((sockfd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
        printf ("create socket error: %s(errno: %d)\n", strerror (errno),
                errno);
        exit (0);
    }

    memset (&servaddr, 0, sizeof (servaddr));
    servaddr.sin_family = AF_INET;
    servaddr.sin_port = htons (6666);
    servaddr.sin_addr.s_addr = inet_addr("127.0.0.1");

    ret = sendto(sockfd, buf, strlen(buf), MSG_FASTOPEN,
            (struct sockaddr *)&servaddr, sizeof(servaddr));
    if (ret < 0) {
        printf ("send msg error: %s(errno: %d)\n", strerror (errno), errno);
    }

    close (sockfd);
   即客户端在发送数据时,生成socket后直接使用sendto发送数据,不用connect系统调用。第一次交互时只是向服务器申请一个TFO cookie,数据并不在连接建立过程中送达;TFO cookie交互完成后,以后客户端每次用同样方式发送数据时都会在SYN包中携带数据。

  服务器端开启TFO功能的方法:内核功能选项sysctl -w net.ipv4.tcp_fastopen=2;服务器端代码:

    int listenfd, connfd, fd;
    struct sockaddr_in servaddr;
    char buff[MAXLINE];
    int n;

    if ((listenfd = socket (AF_INET, SOCK_STREAM, 0)) == -1) {
        printf ("create socket error: %s(errno: %d)\n", strerror (errno), errno);
        exit (0);
    }

    memset (&servaddr, 0, sizeof (servaddr));
    servaddr.sin_family = AF_INET;
    servaddr.sin_addr.s_addr = inet_addr("127.0.0.1");
    servaddr.sin_port = htons (6666);

    if (bind (listenfd, (struct sockaddr *) &servaddr, sizeof (servaddr)) == -1) {
        printf ("bind socket error: %s(errno: %d)\n", strerror (errno), errno);
        exit (0);
    }

    if (listen (listenfd, 10) == -1) {
        printf ("listen socket error: %s(errno: %d)\n", strerror (errno), errno);
        exit (0);
    }

    int qlen = 5;
    ret = setsockopt(listenfd, 6, TCP_FASTOPEN, &qlen, sizeof(qlen));
    if (ret < 0) {
        printf ("setsockopt error: %s(errno: %d)\n", strerror (errno), errno);
    }
    while (1)
    {
        printf("Before accpet!\n");
        if ((connfd = accept (listenfd, (struct sockaddr *) NULL, NULL)) == -1)
...
  如果想要同时开启客户端和服务端的TFO功能,可以用“ sysctl -w net.ipv4.tcp_fastopen=3”。

  TFO功能在Linux 2.6.34内核中开始集成。

  下面通过分析内核代码来了解TFO的运行机制。开启TFO功能后,server端进程在调用listen系统调用时会初始化TFO队列:

 195 int inet_listen(struct socket *sock, int backlog)
 196 {
 197     struct sock *sk = sock->sk;    
 198     unsigned char old_state;
 199     int err;             
...
 214     if (old_state != TCP_LISTEN) { 
...
 222         if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 &&
 223             inet_csk(sk)->icsk_accept_queue.fastopenq == NULL) {
 224             if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0)
 225                 err = fastopen_init_queue(sk, backlog);
 226             else if ((sysctl_tcp_fastopen &
 227                   TFO_SERVER_WO_SOCKOPT2) != 0)
 228                 err = fastopen_init_queue(sk,
 229                     ((uint)sysctl_tcp_fastopen) >> 16);
 230             else
 231                 err = 0;
 232             if (err)
 233                 goto out;
 234         }
 235         err = inet_csk_listen_start(sk, backlog);
...
  fastopen_init_queue函数:

373 static inline int fastopen_init_queue(struct sock *sk, int backlog)
374 {
375     struct request_sock_queue *queue =
376         &inet_csk(sk)->icsk_accept_queue;
377 
378     if (queue->fastopenq == NULL) {
379         queue->fastopenq = kzalloc(
380             sizeof(struct fastopen_queue),
381             sk->sk_allocation);
382         if (queue->fastopenq == NULL)
383             return -ENOMEM;
384 
385         sk->sk_destruct = tcp_sock_destruct;
386         spin_lock_init(&queue->fastopenq->lock);
387     }
388     queue->fastopenq->max_qlen = backlog;
389     return 0;
390 }    
  如果 net.ipv4.tcp_fastopen && (TFO_SERVER_WO_SOCKOPT1|TFO_SERVER_WO_SOCKOPT2)为假,则TFO队列不会被初始化。但setsockopt函数也可以初始化TFO队列:

2371 static int do_tcp_setsockopt(struct sock *sk, int level,
2372         int optname, char __user *optval, unsigned int optlen)
2373 {
2374     struct tcp_sock *tp = tcp_sk(sk);
2375     struct inet_connection_sock *icsk = inet_csk(sk);
2376     int val;
2377     int err = 0;
...
2621     case TCP_FASTOPEN:
2622         if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
2623             TCPF_LISTEN)))
2624             err = fastopen_init_queue(sk, val);
2625         else
2626             err = -EINVAL;
2627         break;
...
  如果inet_csk(sk)->icsk_accept_queue.fastopenq为NULL的话意味着TFO功能未开启。

  轮到client端出场了!client端的sendto系统调用在内核中对应的TCP函数是tcp_sendmsg:

1016 int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1017         size_t size)
1018 {
1019     struct iovec *iov;
1020     struct tcp_sock *tp = tcp_sk(sk);
1021     struct sk_buff *skb;
1022     int iovlen, flags, err, copied = 0;
1023     int mss_now = 0, size_goal, copied_syn = 0, offset = 0;
1024     bool sg;
1025     long timeo;
1026
1027     lock_sock(sk);
1028         
1029     flags = msg->msg_flags;
1030     if (flags & MSG_FASTOPEN) {//要使用TFO功能
1031         err = tcp_sendmsg_fastopen(sk, msg, &copied_syn);//发送TFO数据
1032         if (err == -EINPROGRESS && copied_syn > 0)
1033             goto out;    
1034         else if (err)    
1035             goto out_err;
1036         offset = copied_syn;
1037     }
   tcp_sendmsg_fastopen函数用于发送带TFO请求的SYN或携带数据的SYN:

 992 static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size)
 993 {
 994     struct tcp_sock *tp = tcp_sk(sk);
 995     int err, flags;      
 996
 997     if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE))
 998         return -EOPNOTSUPP;
 999     if (tp->fastopen_req != NULL)  
1000         return -EALREADY; /* Another Fast Open is in progress */
1001
1002     tp->fastopen_req = kzalloc(sizeof(struct tcp_fastopen_request),
1003                    sk->sk_allocation);            
1004     if (unlikely(tp->fastopen_req == NULL))
1005         return -ENOBUFS;
1006     tp->fastopen_req->data = msg;  
1007
1008     flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
1009     err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
1010                     msg->msg_namelen, flags);      //发送连接请求
1011     *size = tp->fastopen_req->copied; //记录发送了多少数据,如果发送的是TFO请求则*size为0
1012     tcp_free_fastopen_req(tp);
1013     return err;
1014 }
  __inet_stream_connect函数会调用tcp_connect函数发送SYN:

2925 int tcp_connect(struct sock *sk)
2926 {   
2927     struct tcp_sock *tp = tcp_sk(sk);
2928     struct sk_buff *buff;
2929     int err;
...
2950     /* Send off SYN; include data in Fast Open. */
2951     err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
2952           tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); //如果使用TFO,则会调用tcp_send_syn_data发送SYN
   tcp_send_syn_data函数:
2842 static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
2843 {
2844     struct tcp_sock *tp = tcp_sk(sk);
2845     struct tcp_fastopen_request *fo = tp->fastopen_req;
2846     int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen;
2847     struct sk_buff *syn_data = NULL, *data;
2848     unsigned long last_syn_loss = 0;
2849
2850     tp->rx_opt.mss_clamp = tp->advmss;  /* If MSS is not cached */
2851     tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie,
2852                    &syn_loss, &last_syn_loss);//查询缓存的TFO cookie信息
2853     /* Recurring FO SYN losses: revert to regular handshake temporarily */
2854     if (syn_loss > 1 &&  
2855         time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) {
2856         fo->cookie.len = -1;
2857         goto fallback;   
2858     }
2859
2860     if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE)//无论有没有cookie,都发送携带数据的SYN
2861         fo->cookie.len = -1;
2862     else if (fo->cookie.len <= 0)      //没有cookie,发送携带TFO请求选项的SYN
2863         goto fallback;   
2864
2865     /* MSS for SYN-data is based on cached MSS and bounded by PMTU and
2866      * user-MSS. Reserve maximum option space for middleboxes that add
2867      * private TCP options. The cost is reduced data space in SYN :(
2868      */
2869     if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp)
2870         tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
2871     space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
2872         MAX_TCP_OPTION_SPACE;//计算SYN包中的能够携带的数据的最大大小
2873
2874     syn_data = skb_copy_expand(syn, skb_headroom(syn), space,
2875                    sk->sk_allocation);//复制SYN包中的内容,并扩展SKB中的空间
2876     if (syn_data == NULL)
2877         goto fallback;
2878
2879     for (i = 0; i < iovlen && syn_data->len < space; ++i) {//将用户态中缓存的数据copy到内核
2880         struct iovec *iov = &fo->data->msg_iov[i];
2881         unsigned char __user *from = iov->iov_base;
2882         int len = iov->iov_len;
2883
2884         if (syn_data->len + len > space)//数据总长度大于SKB中空间的总大小
2885             len = space - syn_data->len;
2886         else if (i + 1 == iovlen)
2887             /* No more data pending in inet_wait_for_connect() */
2888             fo->data = NULL;//数据全部发送完毕,不需要在inet_wait_for_connect中等待时发送
2889
2890         if (skb_add_data(syn_data, from, len))//将用户数据copy到SKB中
2891             goto fallback;
2892     }
2893
2894     /* Queue a data-only packet after the regular SYN for retransmission */
2895     data = pskb_copy(syn_data, sk->sk_allocation);
2896     if (data == NULL)
2897         goto fallback;
2898     TCP_SKB_CB(data)->seq++;
2899     TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN;
2900     TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH);
2901     tcp_connect_queue_skb(sk, data);
2902     fo->copied = data->len;
2903
2904     if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {//发送携带数据的SYN
2905         tp->syn_data = (fo->copied > 0);
2906         NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
2907         goto done;
2908     }
2909     syn_data = NULL;
2910
2911 fallback:
2912     /* Send a regular SYN with Fast Open cookie request option */
2913     if (fo->cookie.len > 0)
2914         fo->cookie.len = 0;
2915     err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
2916     if (err)
2917         tp->syn_fastopen = 0;
2918     kfree_skb(syn_data);
2919 done:
2920     fo->cookie.len = -1;  /* Exclude Fast Open option for SYN retries */
2921     return err;
2922 }

  如果client是发送TFO请求,则tcp_send_syn_data函数会发送一个不带数据的SYN包,数据部分则会由tcp_sendmsg函数放入发送队列中,等待三次握手完成后再发送。

  tcp_transmit_skb函数会调用tcp_syn_options函数构建选项信息,tcp_options_write函数负责将选项写入TCP报头中:

498 static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 499                 struct tcp_out_options *opts,  
 500                 struct tcp_md5sig_key **md5)
 501 {
 502     struct tcp_sock *tp = tcp_sk(sk);
 503     unsigned int remaining = MAX_TCP_OPTION_SPACE;
 504     struct tcp_fastopen_request *fastopen = tp->fastopen_req;
...
 545     if (fastopen && fastopen->cookie.len >= 0) {
 546         u32 need = TCPOLEN_EXP_FASTOPEN_BASE + fastopen->cookie.len;
 547         need = (need + 3) & ~3U;  /* Align to 32 bits */
 548         if (remaining >= need) {
 549             opts->options |= OPTION_FAST_OPEN_COOKIE;
 550             opts->fastopen_cookie = &fastopen->cookie;
 551             remaining -= need;
 552             tp->syn_fastopen = 1;
 553         }
 554     }
 409 static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
 410                   struct tcp_out_options *opts)
 411 {
412     u16 options = opts->options;    /* mungable copy */
...
 479     if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) {
 480         struct tcp_fastopen_cookie *foc = opts->fastopen_cookie;
 481 
 482         *ptr++ = htonl((TCPOPT_EXP << 24) |
 483                    ((TCPOLEN_EXP_FASTOPEN_BASE + foc->len) << 16) |
 484                    TCPOPT_FASTOPEN_MAGIC);
 485 
 486         memcpy(ptr, foc->val, foc->len);  //如果找到了TFO cookie,则写入;没有RFO cookie则仅仅是一个TFO请求
 487         if ((foc->len & 3) == 2) {
 488             u8 *align = ((u8 *)ptr) + foc->len;
 489             align[0] = align[1] = TCPOPT_NOP;
 490         }
 491         ptr += (foc->len + 3) >> 2;
 492     }

  client端在每次使用TFO功能时都会在TCP的选项中添加一个TFO选项,与server端进行第一次TFO交互时TFO选项只有4字节长,其值是一个“MAGIC”,这种TFO被称为“TFO请求”;后续的TFO选项长度会增加一个从服务器端获得的TFO cookie的长度值,并且在这个SYN中会携带数据。

  server收到SYN后,会在tcp_v4_conn_request中进行处理:

 1465 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1466 {
1467     struct tcp_options_received tmp_opt;
1468     struct request_sock *req;
1469     struct inet_request_sock *ireq;
1470     struct tcp_sock *tp = tcp_sk(sk);
1471     struct dst_entry *dst = NULL;  
1472     __be32 saddr = ip_hdr(skb)->saddr;
1473     __be32 daddr = ip_hdr(skb)->daddr;
1474     __u32 isn = TCP_SKB_CB(skb)->when;
1475     bool want_cookie = false;
1476     struct flowi4 fl4;
1477     struct tcp_fastopen_cookie foc = { .len = -1 };
1478     struct tcp_fastopen_cookie valid_foc = { .len = -1 };
1479     struct sk_buff *skb_synack;    
1480     int do_fastopen;
...
1517     tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);//解析TFO选项
...
1585     do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);//检查TFO选项的合法性
...
1598     skb_synack = tcp_make_synack(sk, dst, req,
1599         fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);//如果客户端发送的是TFO请求则发送TFO cookie,否则不发送
...
1607     if (likely(!do_fastopen)) {
...
1622     } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req))//创建子sock,将SYN中的数据放入socekt中的接收队列中
1623         goto drop_and_free;
1624
1625     return 0;
   tcp_fastopen_check函数用于检查SYN中TFO请求的合法性以及生成TFO cookie:

1288 static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
1289                    struct request_sock *req,      
1290                    struct tcp_fastopen_cookie *foc,
1291                    struct tcp_fastopen_cookie *valid_foc)
1292 {
1293     bool skip_cookie = false;
1294     struct fastopen_queue *fastopenq;
1295
1296     if (likely(!fastopen_cookie_present(foc))) {//SYN中没有携带TFO选项
1297         /* See include/net/tcp.h for the meaning of these knobs */
1298         if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
1299             ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
1300             (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
1301             skip_cookie = true; /* no cookie to validate */  //无需校验cookie,直接允许SYN中携带数据
1302         else
1303             return false;
1304     }
1305     fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
...
1319     if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
1320         fastopenq == NULL || fastopenq->max_qlen == 0)//未开启Server端TFO功能
1321         return false;    
1322
1323     if (fastopenq->qlen >= fastopenq->max_qlen) {//TFO队列已满
1324         struct request_sock *req1;     
1325         spin_lock(&fastopenq->lock);   
1326         req1 = fastopenq->rskq_rst_head;
1327         if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
1328             spin_unlock(&fastopenq->lock);
1329             NET_INC_STATS_BH(sock_net(sk),     
1330                 LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
1331             /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
1332             foc->len = -1;
1333             return false;
1334         }
1335         fastopenq->rskq_rst_head = req1->dl_next;//替换队列中最老的一个
1336         fastopenq->qlen--;
1337         spin_unlock(&fastopenq->lock);
1338         reqsk_free(req1);
1339     }
1340     if (skip_cookie) {//不使用cookie,直接接收数据
1341         tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1342         return true;
1343     }
1344     if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {//SYN中携带了TFO cookie
1345         if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
1346             tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);//生成TFO cookie
1347             if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) || //TFO初始化不成功
1348                 memcmp(&foc->val[0], &valid_foc->val[0], //TFO cookie不合法
1349                 TCP_FASTOPEN_COOKIE_SIZE) != 0)
1350                 return false;
1351             valid_foc->len = -1;
1352         }
1353         /* Acknowledge the data received from the peer. */
1354         tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1355         return true;
1356     } else if (foc->len == 0) { /* Client requesting a cookie */
1357         tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);//生成一个TFO cookie保存在valid_foc中
1358         NET_INC_STATS_BH(sock_net(sk),
1359             LINUX_MIB_TCPFASTOPENCOOKIEREQD);
1360     } else {
1361         /* Client sent a cookie with wrong size. Treat it
1362          * the same as invalid and return a valid one.
1363          */
1364         tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
1365     }
1366     return false;
1367 }
  1327:rskq_rst_head为NULL的场景为有很多带TFO的SYN到来但SYN|ACK发送后并没有收到RST包,这意味着之前收到的那些带数据的TFO SYN可能是合法的;如果不为NULL但对立中最老的一个仍然没有超时的话,也不能将其替换

  1344-1351:如果clienet端的TFO不是请求,而是cookie,则不设置valid_foc;另外如果server端被设置为不检查cookie的合法性,则生成一个cookie再检查SYN中的TFO cookie的合法性,如果不合法则不使用TFO功能。

  tcp_make_synack函数会将tcp_fastopen_check中生成的TFO cookie写入TCP首部中,tcp_synack_options函数用来构建SYN|ACK报文的选项信息:

 560 static unsigned int tcp_synack_options(struct sock *sk,
 561                    struct request_sock *req,
 562                    unsigned int mss, struct sk_buff *skb,
 563                    struct tcp_out_options *opts,
 564                    struct tcp_md5sig_key **md5,
 565                    struct tcp_fastopen_cookie *foc)
 566 {
...
 607     if (foc != NULL) {
 608         u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
 609         need = (need + 3) & ~3U;  /* Align to 32 bits */
 610         if (remaining >= need) {
 611             opts->options |= OPTION_FAST_OPEN_COOKIE;
 612             opts->fastopen_cookie = foc;
 613             remaining -= need;
 614         }
 615     }
...
  将选项信息写入SYN|ACK的方法与client发送SYN时一样,都是调用tcp_options_write函数。可以看出,TCP server端会返回给发送TFO请求的client端一个TFO cookie。client发送的下一个带数据的SYN必须携带这个cookie,而TCP server对这样的SYN回复的SYN|ACK中不会携带TFO选项。

  在SYN携带TFO cookie的情况下TCP server会在收到SYN时就创建sock,这个功能由cp_v4_conn_req_fastopen函数完成:

1369 static int tcp_v4_conn_req_fastopen(struct sock *sk,
1370                     struct sk_buff *skb,           
1371                     struct sk_buff *skb_synack,    
1372                     struct request_sock *req)
1373 {
1374     struct tcp_sock *tp = tcp_sk(sk);
1375     struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
1376     const struct inet_request_sock *ireq = inet_rsk(req);
1377     struct sock *child;  
...   
1383
1384     child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);//生成子socket,其状态为TCP_SYN_RECV
...
1391     err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
1392                     ireq->rmt_addr, ireq->opt);//构建SYN|ACK的IP头并将其发送出去
1393     err = net_xmit_eval(err);
1394     if (!err)
1395         tcp_rsk(req)->snt_synack = tcp_time_stamp;
1396     /* XXX (TFO) - is it ok to ignore error and continue? */
1397
1398     spin_lock(&queue->fastopenq->lock);
1399     queue->fastopenq->qlen++;//将这个连接计入TFO queue
1400     spin_unlock(&queue->fastopenq->lock);
...
1406     tp = tcp_sk(child);
1407
1408     tp->fastopen_rsk = req;
1409     /* Do a hold on the listner sk so that if the listener is being
1410      * closed, the child that has been accepted can live on and still
1411      * access listen_lock.
1412      */
1413     sock_hold(sk);
1414     tcp_rsk(req)->listener = sk;
1415
1416     /* RFC1323: The window in SYN & SYN/ACK segments is never
1417      * scaled. So correct it appropriately.
1418      */
1419     tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
1420
1421     /* Activate the retrans timer so that SYNACK can be retransmitted.
1422      * The request socket is not added to the SYN table of the parent
1423      * because it's been added to the accept queue directly.
1424      */
1425     inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
1426         TCP_TIMEOUT_INIT, TCP_RTO_MAX);
1427
1428     /* Add the child socket directly into the accept queue */
1429     inet_csk_reqsk_queue_add(sk, req, child);
1430
1431     /* Now finish processing the fastopen child socket. */
1432     inet_csk(child)->icsk_af_ops->rebuild_header(child);
1433     tcp_init_congestion_control(child);
1434     tcp_mtup_init(child);
1435     tcp_init_buffer_space(child);
1436     tcp_init_metrics(child);
1437
1438     /* Queue the data carried in the SYN packet. We need to first
1439      * bump skb's refcnt because the caller will attempt to free it.
1440      *
1441      * XXX (TFO) - we honor a zero-payload TFO request for now.
1442      * (Any reason not to?)
1443      */
1444     if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {//SYN包中没有数据
1445         /* Don't queue the skb if there is no payload in SYN.
1446          * XXX (TFO) - How about SYN+FIN?
1447          */
1448         tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1449     } else {
1450         skb = skb_get(skb);
1451         skb_dst_drop(skb);
1452         __skb_pull(skb, tcp_hdr(skb)->doff * 4);
1453         skb_set_owner_r(skb, child);
1454         __skb_queue_tail(&child->sk_receive_queue, skb);//将数据放入child的接收队列中
1455         tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1456         tp->syn_data_acked = 1;
1457     }
1458     sk->sk_data_ready(sk, 0);//通知持有listening socket的进程调用accept系统调用创建新连接
1459     bh_unlock_sock(child);
1460     sock_put(child);
1461     WARN_ON(req->sk == NULL);
1462     return 0;
1463 }
   应用进程收到listening socket的可读通告后,使用accept系统调用建立socket,就可以立即从这个新的socket中读到数据,并开始与客户端进行数据交互。
  如果client的TFO是cookie,则SYN|ACK的处理过程与不使用TFO的情况是一样的;如果client发送的TFO是请求,则在收到SYN|ACK时需要将包中的TFO cookie保存下来:

 5373 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5374                      const struct tcphdr *th, unsigned int len)
5375 {
5376     struct inet_connection_sock *icsk = inet_csk(sk);
5377     struct tcp_sock *tp = tcp_sk(sk);
5378     struct tcp_fastopen_cookie foc = { .len = -1 };
5379     int saved_clamp = tp->rx_opt.mss_clamp;
5380
5381     tcp_parse_options(skb, &tp->rx_opt, 0, &foc);//解析TFO选项
...
5482         if ((tp->syn_fastopen || tp->syn_data) && //如果发送过TFO选项或在SYN中发送过数据
5483             tcp_rcv_fastopen_synack(sk, skb, &foc))//记录SYN|ACK中的FTO cookie
5484             return -1;
  tcp_rcv_fastopen_synack函数检查并保存server端发送的TFO cookie:
5331 static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5332                     struct tcp_fastopen_cookie *cookie)
5333 {
5334     struct tcp_sock *tp = tcp_sk(sk);
5335     struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL;
5336     u16 mss = tp->rx_opt.mss_clamp;
5337     bool syn_drop;
5338
5339     if (mss == tp->rx_opt.user_mss) {
5340         struct tcp_options_received opt;
5341
5342         /* Get original SYNACK MSS value if user MSS sets mss_clamp */
5343         tcp_clear_options(&opt);       
5344         opt.user_mss = opt.mss_clamp = 0;
5345         tcp_parse_options(synack, &opt, 0, NULL);
5346         mss = opt.mss_clamp;
5347     }
5348
5349     if (!tp->syn_fastopen)  /* Ignore an unsolicited cookie */
5350         cookie->len = -1;//如果客户端没有发送TFO请求但服务器给出了TFO cookie,忽略之
5351
5352     /* The SYN-ACK neither has cookie nor acknowledges the data. Presumably
5353      * the remote receives only the retransmitted (regular) SYNs: either
5354      * the original SYN-data or the corresponding SYN-ACK is lost.
5355      */
5356     syn_drop = (cookie->len <= 0 && data && tp->total_retrans); //客户端认为发生了SYN丢失事件
5357
5358     tcp_fastopen_cache_set(sk, mss, cookie, syn_drop);//存储SYN|ACK包中的TFO cookie,并记录发现SYN丢失事件的时间
5359
5360     if (data) { /* Retransmit unacked data in SYN */
5361         tcp_for_write_queue_from(data, sk) {
5362             if (data == tcp_send_head(sk) ||
5363                 __tcp_retransmit_skb(sk, data))
5364                 break;
5365         }
5366         tcp_rearm_rto(sk);
5367         return true;
5368     }
5369     tp->syn_data_acked = tp->syn_data;
5370     return false;
5371 }
  在保存了TFO cookie后,client在向相同IP地址的server发送SYN时都可以携带数据(这时必须发送TFO cookie)。client在收到SYN|ACK后需要回复ACK报文,服务器端在接收ACK时对TFO的处理如下:
 5600 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5601               const struct tcphdr *th, unsigned int len)
5602 {
5603     struct tcp_sock *tp = tcp_sk(sk);
5604     struct inet_connection_sock *icsk = inet_csk(sk);
5605     struct request_sock *req;
...
5661     req = tp->fastopen_rsk;//找到在SYN请求到来后创建子socket时使用的request sock
5662     if (req != NULL) {
5663         WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
5664             sk->sk_state != TCP_FIN_WAIT1);
5665
5666         if (tcp_check_req(sk, skb, req, NULL, true) == NULL)//检查包的合法性
5667             goto discard;
5668     }
...
5681         switch (sk->sk_state) {
5682         case TCP_SYN_RECV:
5683             if (acceptable) {
5684                 /* Once we leave TCP_SYN_RECV, we no longer
5685                  * need req so release it.
5686                  */
5687                 if (req) {//使用了TFO cookie
5688                     tcp_synack_rtt_meas(sk, req);
5689                     tp->total_retrans = req->num_retrans;
5690
5691                     reqsk_fastopen_remove(sk, req, false);//将request sock从TFO queue中删除,TFO流程全部结束
5692                 } else {
...
   综上, TFO 在收到SYN的时候就创建socket并将数据提交给应用进程,这样就比普通模式节省了SYN|ACK与ACK的交互时间,减小了通信延迟

你可能感兴趣的:(tcp,linux内核)