OpenvSwitch sw_flow 流表项添加过程详解之一

在 skb 进入 ovs_dp_process_packet 后(位于 ovs/datapath/datapath.c),若并未查询找匹配的流表,则进行upcall

void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
{
    /* Miss flow. */
    struct dp_upcall_info upcall;
    memset(&upcall, 0, sizeof(upcall));
    upcall.cmd = OVS_PACKET_CMD_MISS;
    upcall.portid = ovs_vport_find_upcall_portid(p, skb);
    upcall.mru = OVS_CB(skb)->mru;
    /* Upcall */
    const struct vport *p = OVS_CB(skb)->input_vport;
    struct datapath *dp = p->dp;
    ovs_dp_upcall(dp, skb, key, &upcall, 0);
}

在 ovs_dp_upcall 中:

int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
          const struct sw_flow_key *key,
          const struct dp_upcall_info *upcall_info,
          uint32_t cutlen)
{
    if (!skb_is_gso(skb))
        queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
    else
        queue_gso_packets(dp, skb, key, upcall_info, cutlen);
}

这里只是简单地分析了 skb 是否需要分片,我们这里只看无需分片的一支。
在 queue_userspace_packet 中:

static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
                  const struct sw_flow_key *key,
                  const struct dp_upcall_info *upcall_info,
                  uint32_t cutlen)
{
    size_t len = upcall_msg_size(upcall_info, skb->len, OVS_CB(skb)->acts_origlen);

    struct sk_buff *user_skb = genlmsg_new(len, GFP_ATOMIC);

    //将 key 中的信息(即各层网络协议头信息)填入 user_skb 中
    ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);

    skb_zerocopy(user_skb, skb, skb->len, skb->len);

    ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;

    genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
}

此时,通过 genlmsg_unicast 发送到用户空间的信息 user_skb 中只有该 skb 对应的 key 中的信息(即各层网络协议的收发地址信息)。user_skb 管理的数据部分是一个 generic message,从 upcall_msg_size 中我们不难看出该 generic message 的内容是什么。

static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
                  unsigned int hdrlen, int actions_attrlen)
{
    size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
        + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
        + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
        + nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */
        + nla_total_size(sizeof(upcall_info->mru));/* OVS_PACKET_ATTR_MRU */
    return size;
}

ovs_header 是 ovs 使用的自定义 generic message 协议头。

/**
 * struct ovs_header - header for OVS Generic Netlink messages.
 * @dp_ifindex: ifindex of local port for datapath (0 to make a request not
 * specific to a datapath).
 *
 * Attributes following the header are specific to a particular OVS Generic
 * Netlink family, but all of the OVS families use this header.
 */
struct ovs_header {
    int dp_ifindex;
};

skb_zerocopy 将 skb 的负载部分拷贝到 user_skb 的 tailroom 部分中(这里一个问题,从 ovs 源码的注释中说的却是拷贝到 user_skb 的 headroom 中)

#define skb_zerocopy rpl_skb_zerocopy
#ifndef HAVE_SKB_ZEROCOPY
/**
 *    skb_zerocopy - Zero copy skb to skb
 *    @to: destination buffer
 *    @source: source buffer
 *    @len: number of bytes to copy from source buffer
 *    @hlen: size of linear headroom in destination buffer
 *
 *    Copies up to `len` bytes from `from` to `to` by creating references
 *    to the frags in the source buffer.
 *
 *    The `hlen` as calculated by skb_zerocopy_headlen() specifies the
 *    headroom in the `to` buffer.
 *
 *    Return value:
 *    0: everything is OK
 *    -ENOMEM: couldn't orphan frags of @from due to lack of memory
 *    -EFAULT: skb_copy_bits() found some problem with skb geometry
 */
int rpl_skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
{
    int i, j = 0;
    int plen = 0; /* length of skb->head fragment */
    int ret;
    struct page *page;
    unsigned int offset;
    //若 to 的 tailroom 中有长度为 len 的空间,这直接全部复制过来
    if (len <= skb_tailroom(to))
        return skb_copy_bits(from, 0, skb_put(to, len), len);
    //否则只复制一部分
    if (hlen) {
        ret = skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
        len -= hlen;
    } else {
        plen = min_t(int, skb_headlen(from), len);
        if (plen) {
            page = virt_to_head_page(from->head);
            offset = from->data - (unsigned char *)page_address(page);
            __skb_fill_page_desc(to, 0, page, offset, plen);
            get_page(page);
            j = 1;
            len -= plen;
        }
    }

    to->truesize += len + plen;
    to->len += len + plen;
    to->data_len += len + plen;
    //分页部分
    for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
        if (!len)
            break;
        skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
        skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
        len -= skb_shinfo(to)->frags[j].size;
        skb_frag_ref(to, j);
        j++;
    }
    skb_shinfo(to)->nr_frags = j;
    return 0;
}
EXPORT_SYMBOL_GPL(rpl_skb_zerocopy);
#endif

你可能感兴趣的:(ovs,sdn)