【Linux4.1.12源码分析】协议栈gro收包之VXLAN处理

VXLAN gro处理不是通过定义常量实现的,而是通过动态注册的实现的,在UDP层处理时我们知道是注册到udp_offload_base全局变量中,我们先看下注册过程

vxlan_socket_create函数

/* Create new listen socket if needed */
static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
					      vxlan_rcv_t *rcv, void *data,
					      u32 flags)
{
	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
	struct vxlan_sock *vs;
	struct socket *sock;
	unsigned int h;
	bool ipv6 = !!(flags & VXLAN_F_IPV6);
	struct udp_tunnel_sock_cfg tunnel_cfg;

	vs = kzalloc(sizeof(*vs), GFP_KERNEL);
	if (!vs)
		return ERR_PTR(-ENOMEM);

	for (h = 0; h < VNI_HASH_SIZE; ++h)
		INIT_HLIST_HEAD(&vs->vni_list[h]);

	INIT_WORK(&vs->del_work, vxlan_del_work);

	sock = vxlan_create_sock(net, ipv6, port, flags);
	if (IS_ERR(sock)) {
		pr_info("Cannot bind port %d, err=%ld\n", ntohs(port),
			PTR_ERR(sock));
		kfree(vs);
		return ERR_CAST(sock);
	}

	vs->sock = sock;
	atomic_set(&vs->refcnt, 1);
	vs->rcv = rcv;
	vs->data = data;
	vs->flags = (flags & VXLAN_F_RCV_FLAGS);

	/* Initialize the vxlan udp offloads structure */
	vs->udp_offloads.port = port;
	vs->udp_offloads.callbacks.gro_receive  = vxlan_gro_receive;
	vs->udp_offloads.callbacks.gro_complete = vxlan_gro_complete;

	spin_lock(&vn->sock_lock);
	hlist_add_head_rcu(&vs->hlist, vs_head(net, port));
	vxlan_notify_add_rx_port(vs);			//注册vxlan gro offload
	spin_unlock(&vn->sock_lock);

	/* Mark socket as an encapsulation socket. */
	tunnel_cfg.sk_user_data = vs;
	tunnel_cfg.encap_type = 1;
	tunnel_cfg.encap_rcv = vxlan_udp_encap_recv;
	tunnel_cfg.encap_destroy = NULL;

	setup_udp_tunnel_sock(net, sock, &tunnel_cfg);

	return vs;
}

vxlan_notify_add_rx_port函数

static void vxlan_notify_add_rx_port(struct vxlan_sock *vs)
{
	struct net_device *dev;
	struct sock *sk = vs->sock->sk;
	struct net *net = sock_net(sk);
	sa_family_t sa_family = sk->sk_family;
	__be16 port = inet_sk(sk)->inet_sport;
	int err;

	if (sa_family == AF_INET) {
		err = udp_add_offload(&vs->udp_offloads);	//注册vxlan gro offload对象
		if (err)
			pr_warn("vxlan: udp_add_offload failed with status %d\n", err);
	}

	rcu_read_lock();
	for_each_netdev_rcu(net, dev) {
		if (dev->netdev_ops->ndo_add_vxlan_port)
			dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family,
							    port);
	}
	rcu_read_unlock();
}
udp_add_offload函数

int udp_add_offload(struct udp_offload *uo)
{
	struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC);

	if (!new_offload)
		return -ENOMEM;

	new_offload->offload = uo;

	spin_lock(&udp_offload_lock);
	new_offload->next = udp_offload_base;
	rcu_assign_pointer(udp_offload_base, new_offload);	//注册到udp_offload_base全局变量中
	spin_unlock(&udp_offload_lock);

	return 0;
}
接下来,我们看下vxlan gro offload的实现,包括vxlan_gro_receive和vxlan_gro_complete:

vxlan_gro_receive函数

static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
					  struct sk_buff *skb,
					  struct udp_offload *uoff)
{
	struct sk_buff *p, **pp = NULL;
	struct vxlanhdr *vh, *vh2;
	unsigned int hlen, off_vx;
	int flush = 1;
	struct vxlan_sock *vs = container_of(uoff, struct vxlan_sock,	//通过udp_offload对象获得vxlan_sock对象
					     udp_offloads);
	u32 flags;
	struct gro_remcsum grc;

	skb_gro_remcsum_init(&grc);	//初始化grc

	off_vx = skb_gro_offset(skb);
	hlen = off_vx + sizeof(*vh);
	vh   = skb_gro_header_fast(skb, off_vx);	//得到vxlan头
	if (skb_gro_header_hard(skb, hlen)) {
		vh = skb_gro_header_slow(skb, hlen, off_vx);
		if (unlikely(!vh))
			goto out;
	}

	skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */	//移动到数据区,实际是内层mac头
	skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));	//csum值刷新

	flags = ntohl(vh->vx_flags);

	if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {	//即发送方携带SKB_GSO_TUNNEL_REMCSUM标记
		vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr),	//remcsum校验和刷新
				       ntohl(vh->vx_vni), &grc,
				       !!(vs->flags &
					  VXLAN_F_REMCSUM_NOPARTIAL));

		if (!vh)		//校验不通过,提交当前报文到协议栈
			goto out;
	}

	flush = 0;

	for (p = *head; p; p = p->next) {		//遍历gro_list中的报文
		if (!NAPI_GRO_CB(p)->same_flow)
			continue;

		vh2 = (struct vxlanhdr *)(p->data + off_vx);
		if (vh->vx_flags != vh2->vx_flags ||	//flags和vni相同才是同一个流
		    vh->vx_vni != vh2->vx_vni) {
			NAPI_GRO_CB(p)->same_flow = 0;
			continue;
		}
	}

	pp = eth_gro_receive(head, skb);	//调用mac层的gro_receive

out:
	skb_gro_remcsum_cleanup(skb, &grc);
	NAPI_GRO_CB(skb)->flush |= flush;

	return pp;
}
eth_gro_receive函数

struct sk_buff **eth_gro_receive(struct sk_buff **head,
				 struct sk_buff *skb)
{
	struct sk_buff *p, **pp = NULL;
	struct ethhdr *eh, *eh2;
	unsigned int hlen, off_eth;
	const struct packet_offload *ptype;
	__be16 type;
	int flush = 1;

	off_eth = skb_gro_offset(skb);
	hlen = off_eth + sizeof(*eh);
	eh = skb_gro_header_fast(skb, off_eth);		//得到mac头
	if (skb_gro_header_hard(skb, hlen)) {
		eh = skb_gro_header_slow(skb, hlen, off_eth);
		if (unlikely(!eh))
			goto out;
	}

	flush = 0;

	for (p = *head; p; p = p->next) {
		if (!NAPI_GRO_CB(p)->same_flow)
			continue;

		eh2 = (struct ethhdr *)(p->data + off_eth);
		if (compare_ether_header(eh, eh2)) {		//mac头相同则为同一个流
			NAPI_GRO_CB(p)->same_flow = 0;
			continue;
		}
	}

	type = eh->h_proto;	//得到3层协议类型,例如IPV4协议

	rcu_read_lock();
	ptype = gro_find_receive_by_type(type);
	if (ptype == NULL) {				//未找到协议注册的gro offload等,则把报文提交给协议栈
		flush = 1;
		goto out_unlock;
	}

	skb_gro_pull(skb, sizeof(*eh));		//报文移到IP头
	skb_gro_postpull_rcsum(skb, eh, sizeof(*eh));	//刷新csum值
	pp = ptype->callbacks.gro_receive(head, skb);	//调用ip层的gro_receive函数

out_unlock:
	rcu_read_unlock();
out:
	NAPI_GRO_CB(skb)->flush |= flush;

	return pp;
}

vxlan_gro_complete函数

static int vxlan_gro_complete(struct sk_buff *skb, int nhoff,
			      struct udp_offload *uoff)
{
	udp_tunnel_gro_complete(skb, nhoff);	//设置skb_shinfo(skb)->gso_type值

	return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
}
eth_gro_complete函数

int eth_gro_complete(struct sk_buff *skb, int nhoff)
{
	struct ethhdr *eh = (struct ethhdr *)(skb->data + nhoff);	//得到mac头
	__be16 type = eh->h_proto;
	struct packet_offload *ptype;
	int err = -ENOSYS;

	if (skb->encapsulation)
		skb_set_inner_mac_header(skb, nhoff);		//设置inner mac header

	rcu_read_lock();
	ptype = gro_find_complete_by_type(type);
	if (ptype != NULL)
		err = ptype->callbacks.gro_complete(skb, nhoff +	//调用ip层的gro_complete函数
						    sizeof(struct ethhdr));

	rcu_read_unlock();
	return err;
}

vxlan gro offload是实现了聚合内层报文为TCP的vxlan报文,传统协议在处理gro时,如果是UDP报文则提交给协议栈了,vxlan gro offload主要实现了vxlan csum相关的逻辑功能,以及继续内层的gro收包处理。

你可能感兴趣的:(Linux4.1.12源码分析)