【OVS2.5.0源码分析】datapath之action分析(1)

OVS dp支持的action都在do_execute_actions函数中定义,支持的action包括:OVS_ACTION_ATTR_OUTPUT、OVS_ACTION_ATTR_USERSPACE、OVS_ACTION_ATTR_HASH、OVS_ACTION_ATTR_PUSH_MPLS、OVS_ACTION_ATTR_POP_MPLS、OVS_ACTION_ATTR_PUSH_VLAN、OVS_ACTION_ATTR_POP_VLAN、OVS_ACTION_ATTR_RECIRC、OVS_ACTION_ATTR_SET、OVS_ACTION_ATTR_SET_MASKED、OVS_ACTION_ATTR_SET_TO_MASKED、OVS_ACTION_ATTR_SAMPLE、OVS_ACTION_ATTR_CT。本系列要完成这些action的分析,output已经在之前介绍datapath主流程时已经介绍,不再进行介绍。

OVS_ACTION_ATTR_USERSPACE的处理函数为output_userspace函数,以此函数作为入口进行分析。

1、output_userspace函数

static int output_userspace(struct datapath *dp, struct sk_buff *skb,
			    struct sw_flow_key *key, const struct nlattr *attr,
			    const struct nlattr *actions, int actions_len)
{
	struct ip_tunnel_info info;
	struct dp_upcall_info upcall;
	const struct nlattr *a;
	int rem;

	memset(&upcall, 0, sizeof(upcall));
	upcall.cmd = OVS_PACKET_CMD_ACTION;         //封装upcall对象
	upcall.mru = OVS_CB(skb)->mru;

	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;	  //获取userspace action相关的信息
		 a = nla_next(a, &rem)) {
		switch (nla_type(a)) {
		case OVS_USERSPACE_ATTR_USERDATA:
			upcall.userdata = a;
			break;

		case OVS_USERSPACE_ATTR_PID:
			upcall.portid = nla_get_u32(a);
			break;

		case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {
			/* Get out tunnel info. */
			struct vport *vport;

			vport = ovs_vport_rcu(dp, nla_get_u32(a));
			if (vport) {
				int err;

				upcall.egress_tun_info = &info;
				err = ovs_vport_get_egress_tun_info(vport, skb,
								    &upcall);
				if (err)
					upcall.egress_tun_info = NULL;
			}

			break;
		}

		case OVS_USERSPACE_ATTR_ACTIONS: {
			/* Include actions. */
			upcall.actions = actions;
			upcall.actions_len = actions_len;
			break;
		}

		} /* End of switch. */
	}

	return ovs_dp_upcall(dp, skb, key, &upcall);    //upcall
}
2、ovs_dp_upcall函数

int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
		  const struct sw_flow_key *key,
		  const struct dp_upcall_info *upcall_info)
{
	struct dp_stats_percpu *stats;
	int err;

	if (upcall_info->portid == 0) {
		err = -ENOTCONN;
		goto err;
	}

	if (!skb_is_gso(skb))
		err = queue_userspace_packet(dp, skb, key, upcall_info);
	else
		err = queue_gso_packets(dp, skb, key, upcall_info);
	if (err)
		goto err;

	return 0;

err:
	stats = this_cpu_ptr(dp->stats_percpu);

	u64_stats_update_begin(&stats->syncp);
	stats->n_lost++;
	u64_stats_update_end(&stats->syncp);

	return err;
}
3、queue_userspace_packet函数

static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
				  const struct sw_flow_key *key,
				  const struct dp_upcall_info *upcall_info)
{
	struct ovs_header *upcall;
	struct sk_buff *nskb = NULL;
	struct sk_buff *user_skb = NULL; /* to be queued to userspace */
	struct nlattr *nla;
	struct genl_info info = {
#ifdef HAVE_GENLMSG_NEW_UNICAST
		.dst_sk = ovs_dp_get_net(dp)->genl_sock,
#endif
		.snd_portid = upcall_info->portid,
	};
	size_t len;
	unsigned int hlen;
	int err, dp_ifindex;

	dp_ifindex = get_dpifindex(dp);
	if (!dp_ifindex)
		return -ENODEV;

	if (skb_vlan_tag_present(skb)) {
		nskb = skb_clone(skb, GFP_ATOMIC);
		if (!nskb)
			return -ENOMEM;

		nskb = vlan_insert_tag_set_proto(nskb, nskb->vlan_proto, skb_vlan_tag_get(nskb));
		if (!nskb)
			return -ENOMEM;

		vlan_set_tci(nskb, 0);	//为什么要把tci置0?

		skb = nskb;
	}

	if (nla_attr_size(skb->len) > USHRT_MAX) {
		err = -EFBIG;
		goto out;
	}

	/* Complete checksum if needed */
	if (skb->ip_summed == CHECKSUM_PARTIAL &&
	    (err = skb_checksum_help(skb)))
		goto out;

	/* Older versions of OVS user space enforce alignment of the last
	 * Netlink attribute to NLA_ALIGNTO which would require extensive
	 * padding logic. Only perform zerocopy if padding is not required.
	 */
	if (dp->user_features & OVS_DP_F_UNALIGNED)
		hlen = skb_zerocopy_headlen(skb);
	else
		hlen = skb->len;

	len = upcall_msg_size(upcall_info, hlen);
	user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);		//创建upcall消息对象
	if (!user_skb) {
		err = -ENOMEM;
		goto out;
	}

	upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,	//dp_packet_genl_family 和 upcall_info->cmd确定处理函数
			     0, upcall_info->cmd);
	upcall->dp_ifindex = dp_ifindex;

	err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);  //upcall信息对象添加key
	BUG_ON(err);

	if (upcall_info->userdata)    
		__nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,   //upcall信息对象添加userdata
			  nla_len(upcall_info->userdata),
			  nla_data(upcall_info->userdata));


	if (upcall_info->egress_tun_info) {
		nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);  //upcall信息对象添加egress_tun_info
		err = ovs_nla_put_egress_tunnel_key(user_skb,
						    upcall_info->egress_tun_info,
						    upcall_info->egress_tun_opts);
		BUG_ON(err);
		nla_nest_end(user_skb, nla);
	}

	if (upcall_info->actions_len) {
		nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);  //upcall信息对象添加actions
		err = ovs_nla_put_actions(upcall_info->actions,
					  upcall_info->actions_len,
					  user_skb);
		if (!err)
			nla_nest_end(user_skb, nla);
		else
			nla_nest_cancel(user_skb, nla);
	}

	/* Add OVS_PACKET_ATTR_MRU */
	if (upcall_info->mru) {
		if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
				upcall_info->mru)) {
			err = -ENOBUFS;
			goto out;
		}
		pad_packet(dp, user_skb);
	}

	/* Only reserve room for attribute header, packet data is added
	 * in skb_zerocopy()
	 */
	if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
		err = -ENOBUFS;
		goto out;
	}
	nla->nla_len = nla_attr_size(skb->len);

	err = skb_zerocopy(user_skb, skb, skb->len, hlen);    //upcall信息对象添加报文
	if (err)
		goto out;

	/* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
	pad_packet(dp, user_skb);

	((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;

	err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);    //发送netlink报文
	user_skb = NULL;
out:
	if (err)
		skb_tx_error(skb);
	kfree_skb(user_skb);
	kfree_skb(nskb);
	return err;
}

到此可以看到userspace action和精确流表未匹配导致的upcall在处理流程上是比较一致的,两者都是通过调用ovs_dp_upcall函数实现信息发送到用户态程序。upcall处理线程是如何处理的不在本篇分析,将在后续给出分析。

通过userspace能够实现什么功能呢? 现在还想不出,等分析upcall处理后,再回过头来回答这个问题。

你可能感兴趣的:(OVS2.5.0源码分析,OVS,openvswitch,网络)