MSS选项用于通知对端本端能接受的每个TCP分段中的最大数据长度,发送端TCP用接收到的MSS值作为决定所发送分段的最大大小的最大值。MSS选项的总长度为4字节,MSS值为16bit,最大为65535。MSS选项只能在有SYN标记的包中携带。
在初始化socket的时候就需要设置MSS相关信息,connect时:
2752 void tcp_connect_init(struct sock *sk) 2753 { 2754 const struct dst_entry *dst = __sk_dst_get(sk); 2755 struct tcp_sock *tp = tcp_sk(sk); 2756 __u8 rcv_wscale; ... 2778 tp->advmss = dst_metric_advmss(dst); //dst_metric_advmss到路由表中查询MSS,会利用到路径MTU探测的结果 2779 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)//tp->rx_opt.user_mss是TCP_MAXSEG socket选项设置的值 2780 tp->advmss = tp->rx_opt.user_mss; //取二者中最小的 ...发送SYN时:
828 static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, 829 gfp_t gfp_mask) 830 { 831 const struct inet_connection_sock *icsk = inet_csk(sk); 832 struct inet_sock *inet; 833 struct tcp_sock *tp; 834 struct tcp_skb_cb *tcb; 835 struct tcp_out_options opts; 836 unsigned int tcp_options_size, tcp_header_size; 837 struct tcp_md5sig_key *md5; 838 struct tcphdr *th; 839 int err; ... 868 memset(&opts, 0, sizeof(opts)); 869 870 if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) 871 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);//将选项信息写入opts局部变量中 872 else 873 tcp_options_size = tcp_established_options(sk, skb, &opts, 874 &md5); ... 925 tcp_options_write((__be32 *)(th + 1), tp, &opts);//将opts中的选项信息写入报文的TCP头中 ...
构建SYN|ACK时:
2654 struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, 2655 struct request_sock *req, 2656 struct tcp_fastopen_cookie *foc) 2657 { 2658 struct tcp_out_options opts; 2659 struct inet_request_sock *ireq = inet_rsk(req); 2660 struct tcp_sock *tp = tcp_sk(sk); 2661 struct tcphdr *th; 2662 struct sk_buff *skb; 2663 struct tcp_md5sig_key *md5; 2664 int tcp_header_size; 2665 int mss; ... 2678 mss = dst_metric_advmss(dst); 2679 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) 2680 mss = tp->rx_opt.user_mss; ... 2703 memset(&opts, 0, sizeof(opts)); 2704 #ifdef CONFIG_SYN_COOKIES 2705 if (unlikely(req->cookie_ts)) 2706 TCP_SKB_CB(skb)->when = cookie_init_timestamp(req); 2707 else 2708 #endif 2709 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2710 tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5, 2711 foc) + sizeof(*th); ... 2735 tcp_options_write((__be32 *)(th + 1), tp, &opts); ...SYN包的tcp_syn_options函数用于生成选项信息:
498 static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, 499 struct tcp_out_options *opts, 500 struct tcp_md5sig_key **md5) 501 { 502 struct tcp_sock *tp = tcp_sk(sk); 503 unsigned int remaining = MAX_TCP_OPTION_SPACE; 504 struct tcp_fastopen_request *fastopen = tp->fastopen_req; ... 525 opts->mss = tcp_advertise_mss(sk); ...用tcp_advertise_mss函数得到MSS的值:
118 static __u16 tcp_advertise_mss(struct sock *sk) 119 { 120 struct tcp_sock *tp = tcp_sk(sk); 121 const struct dst_entry *dst = __sk_dst_get(sk); 122 int mss = tp->advmss; 123 124 if (dst) { 125 unsigned int metric = dst_metric_advmss(dst); //再获取一次路由表中的MSS 126 127 if (metric < mss) { 128 mss = metric; 129 tp->advmss = mss; //刷新MSS 130 } 131 } 132 133 return (__u16)mss; 134 }
SYN|ACK报文的选项信息由tcp_synack_options构建:
560 static unsigned int tcp_synack_options(struct sock *sk, 561 struct request_sock *req, 562 unsigned int mss, struct sk_buff *skb, 563 struct tcp_out_options *opts, 564 struct tcp_md5sig_key **md5, 565 struct tcp_fastopen_cookie *foc) 566 { 567 struct inet_request_sock *ireq = inet_rsk(req); ... 588 opts->mss = mss; 589 remaining -= TCPOLEN_MSS_ALIGNED; ...
tcp_options_write用于将选项信息写入TCP报头:
409 static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, 410 struct tcp_out_options *opts) 411 { 412 u16 options = opts->options; /* mungable copy */ ... 422 if (unlikely(opts->mss)) { 423 *ptr++ = htonl((TCPOPT_MSS << 24) | //MSS选项标识号:2 424 (TCPOLEN_MSS << 16) | //MSS选项长度:4 425 opts->mss); //MSS值 426 } ...收到SYN或SYN|ACK后,TCP会使用tcp_parse_options函数解析选项信息:
3481 void tcp_parse_options(const struct sk_buff *skb, 3482 struct tcp_options_received *opt_rx, int estab, 3483 struct tcp_fastopen_cookie *foc) 3484 { 3485 const unsigned char *ptr; 3486 const struct tcphdr *th = tcp_hdr(skb); 3487 int length = (th->doff * 4) - sizeof(struct tcphdr); 3488 3489 ptr = (const unsigned char *)(th + 1); 3490 opt_rx->saw_tstamp = 0; 3491 3492 while (length > 0) { 3493 int opcode = *ptr++; 3494 int opsize; 3495 3496 switch (opcode) { 3497 case TCPOPT_EOL: 3498 return; 3499 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ 3500 length--; 3501 continue; 3502 default: 3503 opsize = *ptr++; 3504 if (opsize < 2) /* "silly options" */ 3505 return; 3506 if (opsize > length) 3507 return; /* don't parse partial options */ 3508 switch (opcode) { 3509 case TCPOPT_MSS: 3510 if (opsize == TCPOLEN_MSS && th->syn && !estab) { 3511 u16 in_mss = get_unaligned_be16(ptr); //得到报文中MSS选项的值 3512 if (in_mss) { 3513 if (opt_rx->user_mss && 3514 opt_rx->user_mss < in_mss) 3515 in_mss = opt_rx->user_mss; //不能超过用户设定的值 3516 opt_rx->mss_clamp = in_mss; //记录MSS 3517 } 3518 } 3519 break; ...收到SYN时tcp_v4_conn_request函数会调用tcp_openreq_init函数将MSS信息记录到request_sock中:
075 static inline void tcp_openreq_init(struct request_sock *req, 1076 struct tcp_options_received *rx_opt, 1077 struct sk_buff *skb) 1078 { 1079 struct inet_request_sock *ireq = inet_rsk(req); ... 1086 req->mss = rx_opt->mss_clamp; ...Server端在收到三次握手的ACK时TCP会由tcp_v4_syn_recv_sock调用tcp_create_openreq_child创建sock:
381 struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb) 382 { 383 struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC); ... 481 newtp->rx_opt.mss_clamp = req->mss; ...
tcp_v4_syn_recv_sock中接下来会利用对端MSS选项信息计算rcv_mss:
1642 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, 1643 struct request_sock *req, 1644 struct dst_entry *dst) 1645 { ... 1658 newsk = tcp_create_openreq_child(sk, req, skb); //在这里会转存SYN包中的MSS信息 ... 1692 tcp_sync_mss(newsk, dst_mtu(dst)); //使用SYN包中的MSS信息计算当前MSS 1693 newtp->advmss = dst_metric_advmss(dst); 1694 if (tcp_sk(sk)->rx_opt.user_mss && 1695 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) 1696 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; 1697 1698 tcp_initialize_rcv_mss(newsk); //计算rcv_mss的值 ...rcv_mss是本端所估计的对端使用的MSS。之所以需要估计是因为对端的MSS信息也是在不断变化的。
Client端在收到SYN|ACK时会调用tcp_rcv_synsent_state_process函数处理MSS信息:
5373 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, 5374 const struct tcphdr *th, unsigned int len) 5375 { 5376 struct inet_connection_sock *icsk = inet_csk(sk); 5377 struct tcp_sock *tp = tcp_sk(sk); ... 5381 tcp_parse_options(skb, &tp->rx_opt, 0, &foc); ... 5470 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); 5471 tcp_initialize_rcv_mss(sk); ...看来无论是收到SYN还是SYN|ACK,MSS信息都会保存在tp->rx_opt.mss_clamp中,且都会调用tcp_sync_mss函数。
tcp_sync_mss用来计算当前MSS信息:
1296 unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) 1297 { 1298 struct tcp_sock *tp = tcp_sk(sk); 1299 struct inet_connection_sock *icsk = inet_csk(sk); 1300 int mss_now; 1301 1302 if (icsk->icsk_mtup.search_high > pmtu) 1303 icsk->icsk_mtup.search_high = pmtu; 1304 1305 mss_now = tcp_mtu_to_mss(sk, pmtu); 1306 mss_now = tcp_bound_to_half_wnd(tp, mss_now); //根据最大窗口大小计算mss_now 1307 1308 /* And store cached results */ 1309 icsk->icsk_pmtu_cookie = pmtu; 1310 if (icsk->icsk_mtup.enabled) 1311 mss_now = min(mss_now, tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low)); 1312 tp->mss_cache = mss_now; //将计算结果保存在mss_cache中,以便发送数据时使用 1313 1314 return mss_now; 1315 }tcp_mtu_to_mss用于将MTU值转换为MSS值:
1198 static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu) 1199 { 1200 const struct tcp_sock *tp = tcp_sk(sk); 1201 const struct inet_connection_sock *icsk = inet_csk(sk); 1202 int mss_now; 1203 1204 /* Calculate base mss without TCP options: 1205 It is MMS_S - sizeof(tcphdr) of rfc1122 1206 */ 1207 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr); 1208 1209 /* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */ 1210 if (icsk->icsk_af_ops->net_frag_header_len) { 1211 const struct dst_entry *dst = __sk_dst_get(sk); 1212 1213 if (dst && dst_allfrag(dst)) 1214 mss_now -= icsk->icsk_af_ops->net_frag_header_len; 1215 } 1216 1217 /* Clamp it (mss_clamp does not include tcp options) */ 1218 if (mss_now > tp->rx_opt.mss_clamp) //mss_clamp是从对端发送的带SYN标记的包的MSS选项中获得 1219 mss_now = tp->rx_opt.mss_clamp; //不能超过对端通过的MSS值 1220 1221 /* Now subtract optional transport overhead */ 1222 mss_now -= icsk->icsk_ext_hdr_len; 1223 1224 /* Then reserve room for full set of TCP options and 8 bytes of data */ 1225 if (mss_now < 48) 1226 mss_now = 48; 1227 return mss_now; 1228 } 1229 1230 /* Calculate MSS. Not accounting for SACKs here. */ 1231 int tcp_mtu_to_mss(struct sock *sk, int pmtu) 1232 { 1233 /* Subtract TCP options size, not including SACKs */ 1234 return __tcp_mtu_to_mss(sk, pmtu) - 1235 (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr)); 1236 }当前最新的MSS记录在tp->mss_cache中,tcp_sendmsg函数构建报文段时会用到:
1016 int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 1017 size_t size) 1018 { ... 1067 mss_now = tcp_send_mss(sk, &size_goal, flags); ...tcp_send_mss会调用tcp_current_mss得到当前MSS:
1321 unsigned int tcp_current_mss(struct sock *sk) 1322 { 1323 const struct tcp_sock *tp = tcp_sk(sk); 1324 const struct dst_entry *dst = __sk_dst_get(sk); 1325 u32 mss_now; 1326 unsigned int header_len; 1327 struct tcp_out_options opts; 1328 struct tcp_md5sig_key *md5; 1329 1330 mss_now = tp->mss_cache; //得到MSS记录 1331 1332 if (dst) { //如果有路由表项,则根据路由表中的信息更新MSS 1333 u32 mtu = dst_mtu(dst); 1334 if (mtu != inet_csk(sk)->icsk_pmtu_cookie) 1335 mss_now = tcp_sync_mss(sk, mtu); 1336 } 1337 1338 header_len = tcp_established_options(sk, NULL, &opts, &md5) + 1339 sizeof(struct tcphdr); 1340 /* The mss_cache is sized based on tp->tcp_header_len, which assumes 1341 * some common options. If this is an odd packet (because we have SACK 1342 * blocks etc) then our calculated header_len will be different, and 1343 * we have to adjust mss_now correspondingly */ 1344 if (header_len != tp->tcp_header_len) { 1345 int delta = (int) header_len - tp->tcp_header_len; //TCP报头长度发生了变换 1346 mss_now -= delta; //修正MSS 1347 } 1348 1349 return mss_now; 1350 }以上便是MSS选项从MSS值获取、选项构建、选项解析、MSS值记录到MSS值的使用的全过程。而发送数据时最终使用的当前MSS(mss_now)则是由通告MSS(SYN中MSS选项的值)、最大窗口大小、报头长度及其变化、路径MTU等多种因素共同来决定的。