UDP offload为udpv4_offload
static const struct net_offload udpv4_offload = { .callbacks = { .gso_segment = udp4_ufo_fragment, .gro_receive = udp4_gro_receive, .gro_complete = udp4_gro_complete, }, };udp4_gro_receive
static struct sk_buff **udp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) { struct udphdr *uh = udp_gro_udphdr(skb); //取到UDP头指针 if (unlikely(!uh)) goto flush; /* Don't bother verifying checksum if we're going to flush anyway. */ if (NAPI_GRO_CB(skb)->flush) //如果flush已经置1,则不进行csum计算 goto skip; if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check, //如果csum check失败,则直接flush报文到协议栈 inet_gro_compute_pseudo)) goto flush; else if (uh->check) //如果csum_cnt为0,且csum_valid为false使用新计算的伪首部的csum值 skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check, inet_gro_compute_pseudo); skip: NAPI_GRO_CB(skb)->is_ipv6 = 0; return udp_gro_receive(head, skb, uh); flush: NAPI_GRO_CB(skb)->flush = 1; //刷新当前报文的flush,调用vxlan等offload后,可能会刷新 return NULL; }udp_gro_receive函数
struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, struct udphdr *uh) { struct udp_offload_priv *uo_priv; struct sk_buff *p, **pp = NULL; struct udphdr *uh2; unsigned int off = skb_gro_offset(skb); int flush = 1; if (NAPI_GRO_CB(skb)->udp_mark || //如果udp_mark已经被标记 (skb->ip_summed != CHECKSUM_PARTIAL && //或者ip_summed不等于CHECKSUM_PARTIAL NAPI_GRO_CB(skb)->csum_cnt == 0 && //且csum_cnt等于0且csum_valid等于0 !NAPI_GRO_CB(skb)->csum_valid)) //直接flush该报文 goto out; /* mark that this skb passed once through the udp gro layer */ NAPI_GRO_CB(skb)->udp_mark = 1; //udp_mark置1,避免下次再进入 rcu_read_lock(); uo_priv = rcu_dereference(udp_offload_base); for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { if (uo_priv->offload->port == uh->dest && uo_priv->offload->callbacks.gro_receive) //根据UDP报文目的端口,找到udp_offload对象,例如vxlan报文 goto unflush; } goto out_unlock; unflush: flush = 0; for (p = *head; p; p = p->next) { //遍历gro_list中的报文 if (!NAPI_GRO_CB(p)->same_flow) continue; uh2 = (struct udphdr *)(p->data + off); //得到UDP头 /* Match ports and either checksums are either both zero * or nonzero. */ if ((*(u32 *)&uh->source != *(u32 *)&uh2->source) || //UDP判断同一条流的条件是:源端口和目的端口一致 (!uh->check ^ !uh2->check)) { //csum要么同为0要么同不为零 NAPI_GRO_CB(p)->same_flow = 0; continue; } } skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ //报文移到vxlan头或其他的 skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); //刷新csum值 NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; pp = uo_priv->offload->callbacks.gro_receive(head, skb, //调用vxlan offload等 uo_priv->offload); out_unlock: rcu_read_unlock(); out: NAPI_GRO_CB(skb)->flush |= flush; //刷新当前报文的flush,调用四层offload后,可能会刷新 return pp; }
static int udp4_gro_complete(struct sk_buff *skb, int nhoff) { const struct iphdr *iph = ip_hdr(skb); //得到IP头 struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); //得到UDP头 if (uh->check) { skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; //如果check不为0,设置为tunnel csum uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr, //刷新check值 iph->daddr, 0); } else { skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; //check为0,则设置非csum } return udp_gro_complete(skb, nhoff); }udp_gro_complete函数
int udp_gro_complete(struct sk_buff *skb, int nhoff) { struct udp_offload_priv *uo_priv; __be16 newlen = htons(skb->len - nhoff); //udp报文的新长度 struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); //得到UDP头 int err = -ENOSYS; uh->len = newlen; //设置UDP头中报文新的长度 rcu_read_lock(); uo_priv = rcu_dereference(udp_offload_base); for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { if (uo_priv->offload->port == uh->dest && uo_priv->offload->callbacks.gro_complete) //得到下一层的offload,例如vxlan break; } if (uo_priv) { NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; err = uo_priv->offload->callbacks.gro_complete(skb, //调用下一层的gro_complete函数,例如vxlan nhoff + sizeof(struct udphdr), uo_priv->offload); } rcu_read_unlock(); if (skb->remcsum_offload) skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM; skb->encapsulation = 1; //设置encapsulation为1 skb_set_inner_mac_header(skb, nhoff + sizeof(struct udphdr)); //设置inner mace header return err; }早期内核对于UDP报文是没有聚合能力的,对于vxlan报文是在udp收包阶段进行聚合的性能较差,新内核已经支持vxlan封装报文的聚合了。