在 skb 进入 ovs_dp_process_packet 后(位于 ovs/datapath/datapath.c),若并未查询找匹配的流表,则进行upcall
void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
{
/* Miss flow. */
struct dp_upcall_info upcall;
memset(&upcall, 0, sizeof(upcall));
upcall.cmd = OVS_PACKET_CMD_MISS;
upcall.portid = ovs_vport_find_upcall_portid(p, skb);
upcall.mru = OVS_CB(skb)->mru;
/* Upcall */
const struct vport *p = OVS_CB(skb)->input_vport;
struct datapath *dp = p->dp;
ovs_dp_upcall(dp, skb, key, &upcall, 0);
}
在 ovs_dp_upcall 中:
int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
const struct sw_flow_key *key,
const struct dp_upcall_info *upcall_info,
uint32_t cutlen)
{
if (!skb_is_gso(skb))
queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
else
queue_gso_packets(dp, skb, key, upcall_info, cutlen);
}
这里只是简单地分析了 skb 是否需要分片,我们这里只看无需分片的一支。
在 queue_userspace_packet 中:
static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
const struct sw_flow_key *key,
const struct dp_upcall_info *upcall_info,
uint32_t cutlen)
{
size_t len = upcall_msg_size(upcall_info, skb->len, OVS_CB(skb)->acts_origlen);
struct sk_buff *user_skb = genlmsg_new(len, GFP_ATOMIC);
//将 key 中的信息(即各层网络协议头信息)填入 user_skb 中
ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
skb_zerocopy(user_skb, skb, skb->len, skb->len);
((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
}
此时,通过 genlmsg_unicast 发送到用户空间的信息 user_skb 中只有该 skb 对应的 key 中的信息(即各层网络协议的收发地址信息)。user_skb 管理的数据部分是一个 generic message,从 upcall_msg_size 中我们不难看出该 generic message 的内容是什么。
static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
unsigned int hdrlen, int actions_attrlen)
{
size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
+ nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
+ nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */
+ nla_total_size(sizeof(upcall_info->mru));/* OVS_PACKET_ATTR_MRU */
return size;
}
ovs_header 是 ovs 使用的自定义 generic message 协议头。
/**
* struct ovs_header - header for OVS Generic Netlink messages.
* @dp_ifindex: ifindex of local port for datapath (0 to make a request not
* specific to a datapath).
*
* Attributes following the header are specific to a particular OVS Generic
* Netlink family, but all of the OVS families use this header.
*/
struct ovs_header {
int dp_ifindex;
};
skb_zerocopy 将 skb 的负载部分拷贝到 user_skb 的 tailroom 部分中(这里一个问题,从 ovs 源码的注释中说的却是拷贝到 user_skb 的 headroom 中)
#define skb_zerocopy rpl_skb_zerocopy
#ifndef HAVE_SKB_ZEROCOPY
/**
* skb_zerocopy - Zero copy skb to skb
* @to: destination buffer
* @source: source buffer
* @len: number of bytes to copy from source buffer
* @hlen: size of linear headroom in destination buffer
*
* Copies up to `len` bytes from `from` to `to` by creating references
* to the frags in the source buffer.
*
* The `hlen` as calculated by skb_zerocopy_headlen() specifies the
* headroom in the `to` buffer.
*
* Return value:
* 0: everything is OK
* -ENOMEM: couldn't orphan frags of @from due to lack of memory
* -EFAULT: skb_copy_bits() found some problem with skb geometry
*/
int rpl_skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
{
int i, j = 0;
int plen = 0; /* length of skb->head fragment */
int ret;
struct page *page;
unsigned int offset;
//若 to 的 tailroom 中有长度为 len 的空间,这直接全部复制过来
if (len <= skb_tailroom(to))
return skb_copy_bits(from, 0, skb_put(to, len), len);
//否则只复制一部分
if (hlen) {
ret = skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
len -= hlen;
} else {
plen = min_t(int, skb_headlen(from), len);
if (plen) {
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
__skb_fill_page_desc(to, 0, page, offset, plen);
get_page(page);
j = 1;
len -= plen;
}
}
to->truesize += len + plen;
to->len += len + plen;
to->data_len += len + plen;
//分页部分
for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
if (!len)
break;
skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
len -= skb_shinfo(to)->frags[j].size;
skb_frag_ref(to, j);
j++;
}
skb_shinfo(to)->nr_frags = j;
return 0;
}
EXPORT_SYMBOL_GPL(rpl_skb_zerocopy);
#endif