SYN包TCP选项的设置

一个SYN可能包括这些内容:初始系列号、初始窗口大小、MSS、窗口扩大因子、时间戳。
那么是怎么决定是否设置某个选项,怎样设置的呢?例如窗口扩大因子有关传输的性能,我可以怎么样改动这个值呢?
下面来看看内核是怎么做的?
设置SYN包的TCP选项的函数(tcp_output.c中):
/* Compute TCP options for SYN packets. This is not the final
 * network wire format yet.
 */
static unsigned int tcp_syn_options( struct sock *sk, struct sk_buff *skb,
                         struct tcp_out_options *opts,
                         struct tcp_md5sig_key **md5)
{
       struct tcp_sock *tp = tcp_sk(sk);
       //TCP选项的最大长度
       unsigned int remaining = MAX_TCP_OPTION_SPACE;
       struct tcp_fastopen_request *fastopen = tp->fastopen_req;

#ifdef CONFIG_TCP_MD5SIG
      *md5 = tp->af_specific->md5_lookup(sk, sk);
       if (*md5) {
            opts->options |= OPTION_MD5;
            remaining -= TCPOLEN_MD5SIG_ALIGNED;
      }
#else
      *md5 = NULL;
#endif

       /* We always get an MSS option.  The option bytes which will be seen in
       * normal data packets should timestamps be used, must be in the MSS
       * advertised.  But we subtract them from tp->mss_cache so that
       * calculations in tcp_sendmsg are simpler etc.  So account for this
       * fact here if necessary.  If we don't do this correctly, as a
       * receiver we won't recognize data packets as being full sized when we
       * should, and thus we won't abide by the delayed ACK rules correctly.
       * SACKs don't matter, we never delay an ACK when we have any of those
       * going out.  */
      opts->mss = tcp_advertise_mss(sk);
      remaining -= TCPOLEN_MSS_ALIGNED;

       if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
            opts->options |= OPTION_TS;
            opts->tsval = TCP_SKB_CB(skb)->when + tp->tsoffset;
            opts->tsecr = tp->rx_opt.ts_recent;
            remaining -= TCPOLEN_TSTAMP_ALIGNED;
      }
       if (likely(sysctl_tcp_window_scaling)) {
            opts->ws = tp->rx_opt.rcv_wscale;
            opts->options |= OPTION_WSCALE;
            remaining -= TCPOLEN_WSCALE_ALIGNED;
      }
       if (likely(sysctl_tcp_sack)) {
            opts->options |= OPTION_SACK_ADVERTISE;
             if (unlikely(!(OPTION_TS & opts->options)))
                  remaining -= TCPOLEN_SACKPERM_ALIGNED;
      }

       if (fastopen && fastopen->cookie.len >= 0) {
            u32 need = TCPOLEN_EXP_FASTOPEN_BASE + fastopen->cookie.len;
            need = (need + 3) & ~3U;  /* Align to 32 bits */
             if (remaining >= need) {
                  opts->options |= OPTION_FAST_OPEN_COOKIE;
                  opts->fastopen_cookie = &fastopen->cookie;
                  remaining -= need;
                  tp->syn_fastopen = 1;
            }
      }

       return MAX_TCP_OPTION_SPACE - remaining;
}
是否设置窗口扩大选项是由 sysctl_tcp_window_scaling 的值决定的, sysctl_tcp_window_scaling 是一个sysctl变量
/* sysctl variables for tcp */                         
extern int sysctl_tcp_window_scaling;
如果sysctl_tcp_window_scaling的值为真, 则设置窗口扩大选项。它的具体的值由是由 tp  ->rx_opt.rcv_wscale 决定的。
rx_opt的结构如下:
struct tcp_options_received {
       /*    PAWS/RTTM data    */
       long   ts_recent_stamp ; /* Time we stored ts_recent (for aging) */
       u32    ts_recent ;   /* Time stamp to echo next          */
       u32    rcv_tsval ;   /* Time stamp value                 */
       u32    rcv_tsecr ;   /* Time stamp echo reply            */
       u16   saw_tstamp : 1,    /* Saw TIMESTAMP on last packet           */
       tstamp_ok : 1,    /* TIMESTAMP seen on SYN packet           */
       dsack : 1,  /* D-SACK is scheduled              */
       wscale_ok : 1,    /* Wscale seen on SYN packet        */
       sack_ok : 4,       /* SACK seen on SYN packet          */
       snd_wscale : 4,   /* Window scaling received from sender    */
       rcv_wscale : 4;   /* Window scaling to send to receiver     */
      u8     num_sacks ;  /* Number of SACK blocks            */
       u16    user_mss ;    /* mss requested by user in ioctl   */
       u16    mss_clamp ;   /* Maximal mss, negotiated at connection setup */
};
可见 rcv_wscale   的值就是接收窗口扩大因子。这个值的计算式在函数 tcp_select_initial_window 中完成的:
/* Determine a window scaling and initial window to offer.
 * Based on the assumption that the given amount of space
 * will be offered. Store the results in the tp structure.
 * NOTE: for smooth operation initial space offering should
 * be a multiple of mss if possible. We assume here that mss >= 1.
 * This MUST be enforced by all callers.
 */
void tcp_select_initial_window(int __space, __u32 mss,
                         __u32 *rcv_wnd, __u32 *window_clamp,
                         int wscale_ok, __u8 *rcv_wscale,
                         __u32 init_rcv_wnd)
{
       unsigned int space = (__space < 0 ? 0 : __space); //接收缓存不能为负

       /* If no clamp set the clamp to the max possible scaled window */
       /* 如果通告窗口最大值的初始值为0,则把它设为最大值*/
       if (*window_clamp == 0)
            (*window_clamp) = (65535 << 14);
      
       /* 接收窗口不能超过它的上限 */
      space = min(*window_clamp, space);

       /* Quantize space offering to a multiple of mss if possible. */
       /* 调整为mss的整数倍 */
       if (space > mss)
            space = (space / mss) * mss;

       /* NOTE: offering an initial window larger than 32767
       * will break some buggy TCP stacks. If the admin tells us
       * it is likely we could be speaking with such a buggy stack
       * we will truncate our initial window offering to 32K-1
       * unless the remote has sent us a window scaling option,
       * which we interpret as a sign the remote TCP is not
       * misinterpreting the window field as a signed quantity.
       */
       /*当协议使用有符号的接收窗口时,则接收窗口大小不能超过32767 */
       if (sysctl_tcp_workaround_signed_windows)
            (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
       else
            (*rcv_wnd) = space;

      
      (*rcv_wscale) = 0;
       if (wscale_ok) { /* Wscale seen on SYN packet */
             /* Set window scaling on max possible window
             * See RFC1323 for an explanation of the limit to 14
             */
             /* tcp_rmem[2]为接收缓冲区长度上限的最大值,用于调整sk_rcvbuf。
             * rmem_max为系统接收窗口的最大大小 */
            space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
            space = min_t(u32, space, *window_clamp); /* 不能超过上限 */
             /* 计算窗口扩大因子 */
             while (space > 65535 && (*rcv_wscale) < 14) {
                  space >>= 1;
                  (*rcv_wscale)++;
            }
      }

       if (mss > (1 << *rcv_wscale)) {
             if (!init_rcv_wnd) /* Use default unless specified otherwise */
                  init_rcv_wnd = tcp_default_init_rwnd(mss);
            *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
      }

       /* Set the clamp no higher than max representable value */
      (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
}
tcp_select_initial_window 确定窗口扩大因子和初始窗口大小。window_clamp是通告窗口上限
u32      window_clamp ;     /* Maximal window to advertise          */
初始窗口大小一般设置为 TCP_INIT_CWND   的2倍乘以mss
u32 tcp_default_init_rwnd ( u32 mss )
{
       /* Initial receive window should be twice of TCP_INIT_CWND to
       * enable proper sending of new unsent data during fast recovery
       * (RFC 3517, Section 4, NextSeg() rule (2)). Further place a
       * limit when mss is larger than 1460.
       */
       u32 init_rwnd = TCP_INIT_CWND * 2;

       if (mss > 1460)
             init_rwnd = max ((1460 * init_rwnd) / mss , 2U);
       return init_rwnd ;
}

问题:如何更改窗口扩大因子
窗口扩大选项允许使用大于64K 的TCP窗口,这有助于提升高延迟或高带宽网络的性能。为了使用窗口扩大选项,需要增加发送和接收缓冲的大小。全局设置可以通过/proc/sys/net/ipv4/tcp_wmen和/proc/sys/net/ipv4/tcp_rmem文件。针对单个socket设置可以通过使用SO_SNDBUF和SO_RCVBUF套接字选项,调用setsockopt实现。
不过要注意:通过SO_SNDBUF和SO_RCVBUF机制声明的socket缓冲的最大值被这两个文件/proc/sys/net/core/rmem_max和/proc/sys/net/core/wmem_max的值限定了,即
设置超过这两个文件中的值的值会被忽略。
所以通过SO_SNDBUF和SO_RCVBUF改变窗口大小是有限度的。更多选项的设置可以查看man tcp。

下面是全局设置的一个例子:
如前所述,我们可以看到,是否支持窗口扩大选项由sysctl变量 sysctl_tcp_window_scaling 决定,我们可以在/proc/sys/net/ipv4中找到它目前的值。
决定窗口扩大因子的主要是sysctl_tcp_rmem[2],这是/proc下tcp_rmem三元组中的最后一个值。
可以在/etc/sysctl.conf文件中添加下面两行来更新
net.ipv4.tcp_window_scaling = 1  
net.ipv4.tcp_rmem = 4096     87380     2097120

sudo sysctl -p使设置立即生效
这样设置后,窗口扩大因子应该是32
SYN包TCP选项的设置_第1张图片






你可能感兴趣的:(网络编程)