IP层的offload定义是ip_packet_offload
static struct packet_offload ip_packet_offload __read_mostly = { .type = cpu_to_be16(ETH_P_IP), .callbacks = { .gso_segment = inet_gso_segment, .gro_receive = inet_gro_receive, .gro_complete = inet_gro_complete, }, };
static struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb) { const struct net_offload *ops; struct sk_buff **pp = NULL; struct sk_buff *p; const struct iphdr *iph; unsigned int hlen; unsigned int off; unsigned int id; int flush = 1; int proto; off = skb_gro_offset(skb); hlen = off + sizeof(*iph); iph = skb_gro_header_fast(skb, off); //得到IP头,内核支持两种skb,放在线性区和放在frag if (skb_gro_header_hard(skb, hlen)) { iph = skb_gro_header_slow(skb, hlen, off); if (unlikely(!iph)) goto out; } proto = iph->protocol; //得到传输层协议 rcu_read_lock(); ops = rcu_dereference(inet_offloads[proto]); //得到传输层对应的offload if (!ops || !ops->callbacks.gro_receive) //如果未找到对应的offload,则报文将被提交给协议栈 goto out_unlock; if (*(u8 *)iph != 0x45) //IP报文的协议版本必须为4,且报文头长度为20(5*4),否则报文将被提交给协议栈 goto out_unlock; if (unlikely(ip_fast_csum((u8 *)iph, 5))) //IP头csum校验,如果通不过,则flush置1,报文将被提交给协议栈 goto out_unlock; id = ntohl(*(__be32 *)&iph->id); //得到16位的ID值,3位flag和13位分片偏移 flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF)); //IP报文数据长度不等于gro_len或者报文携带DF标记,flush置1 id >>= 16; for (p = *head; p; p = p->next) { //遍历gro_list中的报文 struct iphdr *iph2; if (!NAPI_GRO_CB(p)->same_flow) //same_flow为零说明MAC的流匹配未通过,不需要下一步处理 continue; iph2 = (struct iphdr *)(p->data + off); //得到报文的IP头,此时采用线性区的方式,从当前报文的IP头获取方式,此处也将会改变 /* The above works because, with the exception of the top * (inner most) layer, we only aggregate pkts with the same * hdr length so all the hdrs we'll need to verify will start * at the same offset. */ if ((iph->protocol ^ iph2->protocol) | //IP层判断同一个流,要求:4层协议要相同 ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | //源地址要相同 ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) { //目标地址要相同 NAPI_GRO_CB(p)->same_flow = 0; continue; } /* All fields must match except length and checksum. */ NAPI_GRO_CB(p)->flush |= (iph->ttl ^ iph2->ttl) | //同一个流,但是ttl、tos、有一个报文包含DF标记,则需要flush当前该报文 (iph->tos ^ iph2->tos) | ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)); /* Save the IP ID check to be included later when we get to * the transport layer so only the inner most IP ID is checked. * This is because some GSO/TSO implementations do not * correctly increment the IP ID for the outer hdrs. */ NAPI_GRO_CB(p)->flush_id = ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; //刷新报文的flush } NAPI_GRO_CB(skb)->flush |= flush; //刷新当前报文的flush skb_set_network_header(skb, off); //设置network header,可以找到IP头 /* The above will be needed by the transport layer if there is one * immediately following this IP hdr. */ /* Note : No need to call skb_gro_postpull_rcsum() here, * as we already checked checksum over ipv4 header was 0 */ skb_gro_pull(skb, sizeof(*iph)); //报文移动到4层头 skb_set_transport_header(skb, skb_gro_offset(skb)); //设置传输层header值 pp = ops->callbacks.gro_receive(head, skb); //调用4层的offload out_unlock: rcu_read_unlock(); out: NAPI_GRO_CB(skb)->flush |= flush; //刷新当前报文的flush,调用四层offload后,可能会刷新 return pp; }
static int inet_gro_complete(struct sk_buff *skb, int nhoff) { __be16 newlen = htons(skb->len - nhoff); struct iphdr *iph = (struct iphdr *)(skb->data + nhoff); //找到IP头 const struct net_offload *ops; int proto = iph->protocol; int err = -ENOSYS; if (skb->encapsulation) skb_set_inner_network_header(skb, nhoff); //如果报文是封装报文,那么iph指向的就是内层报文 csum_replace2(&iph->check, iph->tot_len, newlen); //由于长度变化,刷新csum值 iph->tot_len = newlen; //指定IP头中的长度字段 rcu_read_lock(); ops = rcu_dereference(inet_offloads[proto]); //找到传输层的offload if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out_unlock; /* Only need to add sizeof(*iph) to get to the next hdr below * because any hdr with option will have been flushed in * inet_gro_receive(). */ err = ops->callbacks.gro_complete(skb, nhoff + sizeof(*iph)); //调用传输层的gro_complete函数 out_unlock: rcu_read_unlock(); return err; }