【Linux4.1.12源码分析】协议栈gro收包之IP层处理

IP层的offload定义是ip_packet_offload

static struct packet_offload ip_packet_offload __read_mostly = {
	.type = cpu_to_be16(ETH_P_IP),
	.callbacks = {
		.gso_segment = inet_gso_segment,
		.gro_receive = inet_gro_receive,
		.gro_complete = inet_gro_complete,
	},
};


inet_gro_receive函数

static struct sk_buff **inet_gro_receive(struct sk_buff **head,
					 struct sk_buff *skb)
{
	const struct net_offload *ops;
	struct sk_buff **pp = NULL;
	struct sk_buff *p;
	const struct iphdr *iph;
	unsigned int hlen;
	unsigned int off;
	unsigned int id;
	int flush = 1;
	int proto;

	off = skb_gro_offset(skb);
	hlen = off + sizeof(*iph);
	iph = skb_gro_header_fast(skb, off);		//得到IP头,内核支持两种skb,放在线性区和放在frag
	if (skb_gro_header_hard(skb, hlen)) {
		iph = skb_gro_header_slow(skb, hlen, off);
		if (unlikely(!iph))
			goto out;
	}

	proto = iph->protocol;		//得到传输层协议

	rcu_read_lock();
	ops = rcu_dereference(inet_offloads[proto]);	//得到传输层对应的offload
	if (!ops || !ops->callbacks.gro_receive)	//如果未找到对应的offload,则报文将被提交给协议栈
		goto out_unlock;

	if (*(u8 *)iph != 0x45)		//IP报文的协议版本必须为4,且报文头长度为20(5*4),否则报文将被提交给协议栈
		goto out_unlock;

	if (unlikely(ip_fast_csum((u8 *)iph, 5)))	//IP头csum校验,如果通不过,则flush置1,报文将被提交给协议栈
		goto out_unlock;

	id = ntohl(*(__be32 *)&iph->id);	//得到16位的ID值,3位flag和13位分片偏移
	flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF)); //IP报文数据长度不等于gro_len或者报文携带DF标记,flush置1
	id >>= 16;

	for (p = *head; p; p = p->next) {	//遍历gro_list中的报文
		struct iphdr *iph2;

		if (!NAPI_GRO_CB(p)->same_flow)	//same_flow为零说明MAC的流匹配未通过,不需要下一步处理
			continue;

		iph2 = (struct iphdr *)(p->data + off);	 //得到报文的IP头,此时采用线性区的方式,从当前报文的IP头获取方式,此处也将会改变
		/* The above works because, with the exception of the top
		 * (inner most) layer, we only aggregate pkts with the same
		 * hdr length so all the hdrs we'll need to verify will start
		 * at the same offset.
		 */
		if ((iph->protocol ^ iph2->protocol) |		//IP层判断同一个流,要求:4层协议要相同
		    ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |	//源地址要相同
		    ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {	//目标地址要相同
			NAPI_GRO_CB(p)->same_flow = 0;
			continue;
		}

		/* All fields must match except length and checksum. */
		NAPI_GRO_CB(p)->flush |=
			(iph->ttl ^ iph2->ttl) |	//同一个流,但是ttl、tos、有一个报文包含DF标记,则需要flush当前该报文
			(iph->tos ^ iph2->tos) |
			((iph->frag_off ^ iph2->frag_off) & htons(IP_DF));

		/* Save the IP ID check to be included later when we get to
		 * the transport layer so only the inner most IP ID is checked.
		 * This is because some GSO/TSO implementations do not
		 * correctly increment the IP ID for the outer hdrs.
		 */
		NAPI_GRO_CB(p)->flush_id =
			    ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id);
		NAPI_GRO_CB(p)->flush |= flush;		//刷新报文的flush
	}

	NAPI_GRO_CB(skb)->flush |= flush;	//刷新当前报文的flush
	skb_set_network_header(skb, off);	//设置network header,可以找到IP头
	/* The above will be needed by the transport layer if there is one
	 * immediately following this IP hdr.
	 */

	/* Note : No need to call skb_gro_postpull_rcsum() here,
	 * as we already checked checksum over ipv4 header was 0
	 */
	skb_gro_pull(skb, sizeof(*iph));	//报文移动到4层头
	skb_set_transport_header(skb, skb_gro_offset(skb));	//设置传输层header值

	pp = ops->callbacks.gro_receive(head, skb);	//调用4层的offload

out_unlock:
	rcu_read_unlock();

out:
	NAPI_GRO_CB(skb)->flush |= flush;	//刷新当前报文的flush,调用四层offload后,可能会刷新

	return pp;
}

inet_gro_complete函数

static int inet_gro_complete(struct sk_buff *skb, int nhoff)
{
	__be16 newlen = htons(skb->len - nhoff);
	struct iphdr *iph = (struct iphdr *)(skb->data + nhoff);	//找到IP头
	const struct net_offload *ops;
	int proto = iph->protocol;
	int err = -ENOSYS;

	if (skb->encapsulation)
		skb_set_inner_network_header(skb, nhoff);   //如果报文是封装报文,那么iph指向的就是内层报文

	csum_replace2(&iph->check, iph->tot_len, newlen);	//由于长度变化,刷新csum值
	iph->tot_len = newlen;		//指定IP头中的长度字段

	rcu_read_lock();
	ops = rcu_dereference(inet_offloads[proto]);		//找到传输层的offload
	if (WARN_ON(!ops || !ops->callbacks.gro_complete))
		goto out_unlock;

	/* Only need to add sizeof(*iph) to get to the next hdr below
	 * because any hdr with option will have been flushed in
	 * inet_gro_receive().
	 */
	err = ops->callbacks.gro_complete(skb, nhoff + sizeof(*iph));	//调用传输层的gro_complete函数

out_unlock:
	rcu_read_unlock();

	return err;
}

inet_gro_receive函数完成same_flow和flush判断,inet_gro_complete用于刷新IP头信息。

你可能感兴趣的:(源码,linux,GRO)