OVS dp支持的action都在do_execute_actions函数中定义,支持的action包括:OVS_ACTION_ATTR_OUTPUT、OVS_ACTION_ATTR_USERSPACE、OVS_ACTION_ATTR_HASH、OVS_ACTION_ATTR_PUSH_MPLS、OVS_ACTION_ATTR_POP_MPLS、OVS_ACTION_ATTR_PUSH_VLAN、OVS_ACTION_ATTR_POP_VLAN、OVS_ACTION_ATTR_RECIRC、OVS_ACTION_ATTR_SET、OVS_ACTION_ATTR_SET_MASKED、OVS_ACTION_ATTR_SET_TO_MASKED、OVS_ACTION_ATTR_SAMPLE、OVS_ACTION_ATTR_CT。本系列要完成这些action的分析,output已经在之前介绍datapath主流程时已经介绍,不再进行介绍。
OVS_ACTION_ATTR_USERSPACE的处理函数为output_userspace函数,以此函数作为入口进行分析。
1、output_userspace函数
static int output_userspace(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key, const struct nlattr *attr,
const struct nlattr *actions, int actions_len)
{
struct ip_tunnel_info info;
struct dp_upcall_info upcall;
const struct nlattr *a;
int rem;
memset(&upcall, 0, sizeof(upcall));
upcall.cmd = OVS_PACKET_CMD_ACTION; //封装upcall对象
upcall.mru = OVS_CB(skb)->mru;
for (a = nla_data(attr), rem = nla_len(attr); rem > 0; //获取userspace action相关的信息
a = nla_next(a, &rem)) {
switch (nla_type(a)) {
case OVS_USERSPACE_ATTR_USERDATA:
upcall.userdata = a;
break;
case OVS_USERSPACE_ATTR_PID:
upcall.portid = nla_get_u32(a);
break;
case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {
/* Get out tunnel info. */
struct vport *vport;
vport = ovs_vport_rcu(dp, nla_get_u32(a));
if (vport) {
int err;
upcall.egress_tun_info = &info;
err = ovs_vport_get_egress_tun_info(vport, skb,
&upcall);
if (err)
upcall.egress_tun_info = NULL;
}
break;
}
case OVS_USERSPACE_ATTR_ACTIONS: {
/* Include actions. */
upcall.actions = actions;
upcall.actions_len = actions_len;
break;
}
} /* End of switch. */
}
return ovs_dp_upcall(dp, skb, key, &upcall); //upcall
}
2、ovs_dp_upcall函数
int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
const struct sw_flow_key *key,
const struct dp_upcall_info *upcall_info)
{
struct dp_stats_percpu *stats;
int err;
if (upcall_info->portid == 0) {
err = -ENOTCONN;
goto err;
}
if (!skb_is_gso(skb))
err = queue_userspace_packet(dp, skb, key, upcall_info);
else
err = queue_gso_packets(dp, skb, key, upcall_info);
if (err)
goto err;
return 0;
err:
stats = this_cpu_ptr(dp->stats_percpu);
u64_stats_update_begin(&stats->syncp);
stats->n_lost++;
u64_stats_update_end(&stats->syncp);
return err;
}
3、queue_userspace_packet函数
static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
const struct sw_flow_key *key,
const struct dp_upcall_info *upcall_info)
{
struct ovs_header *upcall;
struct sk_buff *nskb = NULL;
struct sk_buff *user_skb = NULL; /* to be queued to userspace */
struct nlattr *nla;
struct genl_info info = {
#ifdef HAVE_GENLMSG_NEW_UNICAST
.dst_sk = ovs_dp_get_net(dp)->genl_sock,
#endif
.snd_portid = upcall_info->portid,
};
size_t len;
unsigned int hlen;
int err, dp_ifindex;
dp_ifindex = get_dpifindex(dp);
if (!dp_ifindex)
return -ENODEV;
if (skb_vlan_tag_present(skb)) {
nskb = skb_clone(skb, GFP_ATOMIC);
if (!nskb)
return -ENOMEM;
nskb = vlan_insert_tag_set_proto(nskb, nskb->vlan_proto, skb_vlan_tag_get(nskb));
if (!nskb)
return -ENOMEM;
vlan_set_tci(nskb, 0); //为什么要把tci置0?
skb = nskb;
}
if (nla_attr_size(skb->len) > USHRT_MAX) {
err = -EFBIG;
goto out;
}
/* Complete checksum if needed */
if (skb->ip_summed == CHECKSUM_PARTIAL &&
(err = skb_checksum_help(skb)))
goto out;
/* Older versions of OVS user space enforce alignment of the last
* Netlink attribute to NLA_ALIGNTO which would require extensive
* padding logic. Only perform zerocopy if padding is not required.
*/
if (dp->user_features & OVS_DP_F_UNALIGNED)
hlen = skb_zerocopy_headlen(skb);
else
hlen = skb->len;
len = upcall_msg_size(upcall_info, hlen);
user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC); //创建upcall消息对象
if (!user_skb) {
err = -ENOMEM;
goto out;
}
upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, //dp_packet_genl_family 和 upcall_info->cmd确定处理函数
0, upcall_info->cmd);
upcall->dp_ifindex = dp_ifindex;
err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb); //upcall信息对象添加key
BUG_ON(err);
if (upcall_info->userdata)
__nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, //upcall信息对象添加userdata
nla_len(upcall_info->userdata),
nla_data(upcall_info->userdata));
if (upcall_info->egress_tun_info) {
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY); //upcall信息对象添加egress_tun_info
err = ovs_nla_put_egress_tunnel_key(user_skb,
upcall_info->egress_tun_info,
upcall_info->egress_tun_opts);
BUG_ON(err);
nla_nest_end(user_skb, nla);
}
if (upcall_info->actions_len) {
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS); //upcall信息对象添加actions
err = ovs_nla_put_actions(upcall_info->actions,
upcall_info->actions_len,
user_skb);
if (!err)
nla_nest_end(user_skb, nla);
else
nla_nest_cancel(user_skb, nla);
}
/* Add OVS_PACKET_ATTR_MRU */
if (upcall_info->mru) {
if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
upcall_info->mru)) {
err = -ENOBUFS;
goto out;
}
pad_packet(dp, user_skb);
}
/* Only reserve room for attribute header, packet data is added
* in skb_zerocopy()
*/
if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
err = -ENOBUFS;
goto out;
}
nla->nla_len = nla_attr_size(skb->len);
err = skb_zerocopy(user_skb, skb, skb->len, hlen); //upcall信息对象添加报文
if (err)
goto out;
/* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
pad_packet(dp, user_skb);
((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); //发送netlink报文
user_skb = NULL;
out:
if (err)
skb_tx_error(skb);
kfree_skb(user_skb);
kfree_skb(nskb);
return err;
}
到此可以看到userspace action和精确流表未匹配导致的upcall在处理流程上是比较一致的,两者都是通过调用ovs_dp_upcall函数实现信息发送到用户态程序。upcall处理线程是如何处理的不在本篇分析,将在后续给出分析。
通过userspace能够实现什么功能呢? 现在还想不出,等分析upcall处理后,再回过头来回答这个问题。