vxlan报文的csum处理逻辑由VXLAN_F_UDP_CSUM、VXLAN_F_REMCSUM_TX、VXLAN_F_REMCSUM_RX、VXLAN_F_REMCSUM_NOPARTIAL等等标记决定,本篇从发包流程来看实现逻辑:
vxlan_xmit_skb函数,还包括iptunnel_handle_offloads函数,实现功能如下图:
总结下:
1)如果报文是gso报文,那么设置csum或remcsum只是决定了skb的gso_type中添加的属性;
2)如果报文不是gso报文,那么skb的ip_summed最终会设置为CHECKSUM_NONE;
2.1 设置csum但是未设置remcsum,则报文的encapsulation会被置为0;
2.2 设置csum但是未设置remcsum,且报文的ip_summed为CHECKSUM_PARTIAL,则需要软件计算内层报文的csum值;
udp_set_csum函数(udp_tunnel_xmit_skb函数封装UDP头时,计算csum值)
void udp_set_csum(bool nocheck, struct sk_buff *skb,
__be32 saddr, __be32 daddr, int len)
{
struct udphdr *uh = udp_hdr(skb);
if (nocheck) //如果ovs的vxlan设备支持VXLAN_F_UDP_CSUM标记,则该值为true,默认为false
uh->check = 0;
else if (skb_is_gso(skb)) //对于gso报文,仅需要计算UDP头的csum值
uh->check = ~udp_v4_check(len, saddr, daddr, 0); //如果是gso报文,仅计算UDP伪首部的csum值
else if (skb_dst(skb) && skb_dst(skb)->dev &&
(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) { //virtio-net设备不支持该特性,硬件网卡一般都支持
BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); //非gso报文,ip_summed不可能为CHECKSUM_PARTIAL,见vxlan_xmit_skb函数分析
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_transport_header(skb) - skb->head; //即UDP头的起始位置偏移
skb->csum_offset = offsetof(struct udphdr, check); //check在UDP头中的偏移
uh->check = ~udp_v4_check(len, saddr, daddr, 0); //仅需要计算UDP头的csum值,payload的csum值计算由硬件来做
} else {
__wsum csum;
BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); //非gso报文,那么skb->ip_summed不可能为CHECKSUM_PARTIAL
uh->check = 0;
csum = skb_checksum(skb, 0, len, 0); //计算整个报文(从UDP头开始)的csum,比较耗CPU资源
uh->check = udp_v4_check(len, saddr, daddr, csum); //计算UDP头的check值
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
skb->ip_summed = CHECKSUM_UNNECESSARY; //软件已完成csum计算,硬件不再需要计算
}
}
总结:
1)报文是gso报文,仅计算UDP伪首部csum值;
2)非gso报文
2.1 网卡支持csum计算,则设置ip_summed为CHECKSUM_PARTIAL,计算UDP伪首部,并设置csum_start和csum_offset值;
2.2 网卡不支持csum计算,则软件计算csum值,并设置ip_summed为CHECKSUM_UNNECESSARY,不需要硬件进行计算;
__skb_udp_tunnel_segment函数
static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t features,
struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
netdev_features_t features),
__be16 new_protocol, bool is_ipv6)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
u16 mac_offset = skb->mac_header;
int mac_len = skb->mac_len;
int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); //vxlan头长度 UDP + vxlan,
__be16 protocol = skb->protocol;
netdev_features_t enc_features;
int udp_offset, outer_hlen;
unsigned int oldlen;
bool need_csum = !!(skb_shinfo(skb)->gso_type & //是否标记csum计算
SKB_GSO_UDP_TUNNEL_CSUM);
bool remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM); //是否标记remcsum计算
bool offload_csum = false, dont_encap = (need_csum || remcsum);
oldlen = (u16)~skb->len;
if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
goto out;
skb->encapsulation = 0;
__skb_pull(skb, tnl_hlen); //报文移动到内层报文的MAC头
skb_reset_mac_header(skb); //设置skb的mac header
skb_set_network_header(skb, skb_inner_network_offset(skb)); //设置skb的 ip header
skb->mac_len = skb_inner_network_offset(skb); //设置skb mac len
skb->protocol = new_protocol; //设置skb protocol,至此skb已经切换到内层,可以继续进行GSO分段
skb->encap_hdr_csum = need_csum;
skb->remcsum_offload = remcsum;
/* Try to offload checksum if possible */
offload_csum = !!(need_csum &&
(skb->dev->features &
(is_ipv6 ? NETIF_F_V6_CSUM : NETIF_F_V4_CSUM))); //硬件支持csum计算
/* segment inner packet. */
enc_features = skb->dev->hw_enc_features & features;
segs = gso_inner_segment(skb, enc_features); //如果是vxlan报文,则重新开始mac层的GSO分段
if (IS_ERR_OR_NULL(segs)) {
skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
mac_len);
goto out;
}
outer_hlen = skb_tnl_header_len(skb); //计算外层报文的长度
udp_offset = outer_hlen - tnl_hlen; //外层UDP头的偏移
skb = segs; //此时skb指向内层报文的mac头位置
do {
struct udphdr *uh;
int len;
__be32 delta;
if (dont_encap) {
skb->encapsulation = 0;
skb->ip_summed = CHECKSUM_NONE;
} else {
/* Only set up inner headers if we might be offloading
* inner checksum.
*/ //csum或remcsum设置时,进此分支
skb_reset_inner_headers(skb); //此时skb指向内层报文,可以建立inner header值
skb->encapsulation = 1;
}
skb->mac_len = mac_len;
skb->protocol = protocol;
skb_push(skb, outer_hlen); //skb移到外层报文的mac头
skb_reset_mac_header(skb); //设置mac header
skb_set_network_header(skb, mac_len); //设置network header,ip层需要
skb_set_transport_header(skb, udp_offset); //设置transport header
len = skb->len - udp_offset;
uh = udp_hdr(skb); //找到UDP头很重要,GSO分段后,有些数据需要刷新,包括长度等
uh->len = htons(len);
if (!need_csum) //如果csum未开启,UDP头的check值不需要刷新和设置
continue;
delta = htonl(oldlen + len);
uh->check = ~csum_fold((__force __wsum) //gso分段后,UDP伪首部的长度字段变化,需要刷新check
((__force u32)uh->check +
(__force u32)delta));
if (offload_csum) {
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_transport_header(skb) - skb->head; //重新计算csum值,gso分段后位置更新了
skb->csum_offset = offsetof(struct udphdr, check);
} else if (remcsum) {
/* Need to calculate checksum from scratch,
* inner checksums are never when doing
* remote_checksum_offload.
*/
skb->csum = skb_checksum(skb, udp_offset, //如果设置了remcsum,则软件计算csum值
skb->len - udp_offset,
0);
uh->check = csum_fold(skb->csum);
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
} else {
uh->check = gso_make_checksum(skb, ~uh->check); //软件计算csum值,基于报文的csum值(skb_segment中计算)进行计算
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
}
} while ((skb = skb->next));
out:
return segs;
}
总结:
1)如果不设置csum,那么gso分段后不需要刷新UDP头的check值;
2)否则
2.1 offload_csum,计算UDP伪首部,交给硬件进行csum计算;
2.2 设置remcsum,由软件计算csum值;
2.3 未设置remcsum,由软件计算csum值,基于csum值进行增量计算;
skb_segment函数
perform_csum_check:
if (!csum && !nskb->remcsum_offload) { //如果设置csum且未设置remcsum,软件计算内层报文的csum
nskb->csum = skb_checksum(nskb, doffset,
nskb->len - doffset, 0); //计算csum值
nskb->ip_summed = CHECKSUM_NONE;
SKB_GSO_CB(nskb)->csum_start =
skb_headroom(nskb) + doffset;
}
} while ((offset += len) < head_skb->len);
/* Some callers want to get the end of the list.
* Put it in segs->prev to avoid walking the list.
* (see validate_xmit_skb_list() for example)
*/
总结:
1)如果设置csum且未设置remcsum,软件计算内层报文的csum值;
2)其他情况,不计算csum值;