ovs-vsctl add-port br0 eth1 实际做了什么?


      ovsctl这个应用程序主要职责是根据用户的命令和ovsdb沟通,将配置信息更新到数据库中,而vswitchd会在需要重新配置的时候和ovsdb打交道,而后和内核datapath通信执行真正的动作(通过netlink传递)。这里规定了命令的语法格式(vsctl_command_syntax )以及所支持的所有命令,这里主要看add-port相关的。OVS_VPORT_CMD_NEW到达内核后的处理流程图见 图1.
ovs-vsctl add-port br0 eth1 实际做了什么?_第1张图片
图 1
     
struct  vsctl_command_syntax {
    const char *name;           /* e.g. "add-br" */
    int min_args;               /* Min number of arguments following name. */
    int max_args;               /* Max number of arguments following name. */

     void (*prerequisites)(struct vsctl_context *ctx);
      //如果非空调用 ovsdb_idl_add_column() 或 ovsdb_idl_add_table() 为下面插入数据占位( ??);

    void (*run)(struct vsctl_context *ctx);
     //执行这个命令对应的事务,并且将命令的输出(如果有)存入ctx->output或ctx->table;如果这个命令执行的先决条件未得         //到满足,要等待时机而后重新尝试,会设置 ctx->try_again= true(现在只有 "wait-until"命令这样做)
 
    void (*postprocess)(struct vsctl_context *ctx);
     //如果非空,在事物完成后调用这个函数进行后续处理, ctx->output是从run()执行后的输出,现在只有"create" 命令有postprocessing;

    const char *options;   // 用逗号分开的选项,形如"--a,--b"
    enum { RO, RW } mode;       //标识这个命令是否修改OVSDB
};

static const struct vsctl_command_syntax  all_commands[] = {
    /* Open vSwitch commands. */
    {"init", 0, 0, NULL, cmd_init, NULL, "", RW},
    {"show", 0, 0, pre_cmd_show, cmd_show, NULL, "", RO},

    /* Bridge commands. */
    {"add-br", 1, 3, pre_get_info, cmd_add_br, NULL, "--may-exist", RW},
    {"del-br", 1, 1, pre_get_info, cmd_del_br, NULL, "--if-exists", RW},
    {"list-br", 0, 0, pre_get_info, cmd_list_br, NULL, "--real,--fake", RO},
    {"br-exists", 1, 1, pre_get_info, cmd_br_exists, NULL, "", RO},
    {"br-to-vlan", 1, 1, pre_get_info, cmd_br_to_vlan, NULL, "", RO},
    {"br-to-parent", 1, 1, pre_get_info, cmd_br_to_parent, NULL, "", RO},
    {"br-set-external-id", 2, 3, pre_cmd_br_set_external_id, cmd_br_set_external_id, NULL, "", RW},
    {"br-get-external-id", 1, 2, pre_cmd_br_get_external_id, cmd_br_get_external_id, NULL, "", RO},

    /* Port commands. */
    {"list-ports", 1, 1, pre_get_info, cmd_list_ports, NULL, "", RO},
     {"add-port", 2, INT_MAX, pre_get_info, cmd_add_port, NULL, "--may-exist", RW},
    {"add-bond", 4, INT_MAX, pre_get_info, cmd_add_bond, NULL, "--may-exist,--fake-iface", RW},
    {"del-port", 1, 2, pre_get_info, cmd_del_port, NULL, "--if-exists,--with-iface", RW},
    {"port-to-br", 1, 1, pre_get_info, cmd_port_to_br, NULL, "", RO},

    /* Interface commands. */
    {"list-ifaces", 1, 1, pre_get_info, cmd_list_ifaces, NULL, "", RO},
    {"iface-to-br", 1, 1, pre_get_info, cmd_iface_to_br, NULL, "", RO},

    /* Controller commands. */
    {"get-controller", 1, 1, pre_controller, cmd_get_controller, NULL, "", RO},
    {"del-controller", 1, 1, pre_controller, cmd_del_controller, NULL, "", RW},
    {"set-controller", 1, INT_MAX, pre_controller, cmd_set_controller, NULL,
     "", RW},
    {"get-fail-mode", 1, 1, pre_get_info, cmd_get_fail_mode, NULL, "", RO},
    {"del-fail-mode", 1, 1, pre_get_info, cmd_del_fail_mode, NULL, "", RW},
    {"set-fail-mode", 2, 2, pre_get_info, cmd_set_fail_mode, NULL, "", RW},

    /* Manager commands. */
    {"get-manager", 0, 0, pre_manager, cmd_get_manager, NULL, "", RO},
    {"del-manager", 0, 0, pre_manager, cmd_del_manager, NULL, "", RW},
    {"set-manager", 1, INT_MAX, pre_manager, cmd_set_manager, NULL, "", RW},

    /* SSL commands. */
    {"get-ssl", 0, 0, pre_cmd_get_ssl, cmd_get_ssl, NULL, "", RO},
    {"del-ssl", 0, 0, pre_cmd_del_ssl, cmd_del_ssl, NULL, "", RW},
    {"set-ssl", 3, 3, pre_cmd_set_ssl, cmd_set_ssl, NULL, "--bootstrap", RW},

    /* Switch commands. */
    {"emer-reset", 0, 0, pre_cmd_emer_reset, cmd_emer_reset, NULL, "", RW},

    /* Database commands. */
    {"comment", 0, INT_MAX, NULL, NULL, NULL, "", RO},
    {"get", 2, INT_MAX, pre_cmd_get, cmd_get, NULL, "--if-exists,--id=", RO},
    {"list", 1, INT_MAX, pre_cmd_list, cmd_list, NULL, "--columns=", RO},
    {"find", 1, INT_MAX, pre_cmd_find, cmd_find, NULL, "--columns=", RO},
    {"set", 3, INT_MAX, pre_cmd_set, cmd_set, NULL, "", RW},
    {"add", 4, INT_MAX, pre_cmd_add, cmd_add, NULL, "", RW},
    {"remove", 4, INT_MAX, pre_cmd_remove, cmd_remove, NULL, "", RW},
    {"clear", 3, INT_MAX, pre_cmd_clear, cmd_clear, NULL, "", RW},
    {"create", 2, INT_MAX, pre_create, cmd_create, post_create, "--id=", RW},
    {"destroy", 1, INT_MAX, pre_cmd_destroy, cmd_destroy, NULL, "--if-exists,--all", RW},
    {"wait-until", 2, INT_MAX, pre_cmd_wait_until, cmd_wait_until, NULL, "", RO},
    {NULL, 0, 0, NULL, NULL, NULL, NULL, RO},
};

static void   cmd_add_port(struct vsctl_context *ctx) {
    bool may_exist = shash_find(&ctx->options, "--may-exist") != NULL;
     //看vsctl_context->options 中是否有 --may-exist选项;
    add_port(ctx, ctx->argv[1], ctx->argv[2], may_exist, false,  &ctx->argv[2], 1, &ctx->argv[3], ctx->argc - 3);
}

static void   add_port(struct vsctl_context *ctx,   const char *br_name, const char *port_name, bool may_exist,
                               bool fake_iface,  char *iface_names[], int n_ifaces,  char *settings[], int n_settings)  {
    struct vsctl_port *vsctl_port;
    struct vsctl_bridge *bridge;
    struct ovsrec_interface **ifaces;
    struct ovsrec_port *port;
    size_t i;

    vsctl_context_populate_cache(ctx); 
    if (may_exist) {
        struct vsctl_port *vsctl_port;

        vsctl_port = find_port(ctx, port_name, false);
        if (vsctl_port) {
            struct svec want_names, have_names;

            svec_init(&want_names);
            for (i = 0; i < n_ifaces; i++) {
                svec_add(&want_names, iface_names[i]);
            }
            svec_sort(&want_names);

            svec_init(&have_names);
            for (i = 0; i < vsctl_port->port_cfg->n_interfaces; i++) {
                svec_add(&have_names, vsctl_port->port_cfg->interfaces[i]->name);
            }
            svec_sort(&have_names);

            if (strcmp(vsctl_port->bridge->name, br_name)) {
                char *command = vsctl_context_to_string(ctx);
                vsctl_fatal("\"%s\" but %s is actually attached to bridge %s", command, port_name, vsctl_port->bridge->name);
            }

            if (!svec_equal(&want_names, &have_names)) {
                char *have_names_string = svec_join(&have_names, ", ", "");
                char *command = vsctl_context_to_string(ctx);

                vsctl_fatal("\"%s\" but %s actually has interface(s) %s",command, port_name, have_names_string);
            }

            svec_destroy(&want_names);
            svec_destroy(&have_names);

            return;
        }
    }
    check_conflicts(ctx, port_name,  xasprintf("cannot create a port named %s", port_name));
    for (i = 0; i < n_ifaces; i++) {
        check_conflicts(ctx, iface_names[i], xasprintf("cannot create an interface named %s", iface_names[i]));
    }
     bridge = find_bridge(ctx, br_name, true);

    ifaces = xmalloc(n_ifaces * sizeof *ifaces);
    for (i = 0; i < n_ifaces; i++) {
        ifaces[i] = ovsrec_interface_insert(ctx->txn);
        ovsrec_interface_set_name(ifaces[i], iface_names[i]);
    }

     port = ovsrec_port_insert(ctx->txn);
    ovsrec_port_set_name(port, port_name);
    ovsrec_port_set_interfaces(port, ifaces, n_ifaces);
    ovsrec_port_set_bond_fake_iface(port, fake_iface);


    if (bridge->parent) {
        int64_t tag = bridge->vlan;
        ovsrec_port_set_tag(port, &tag, 1);
    }

    for (i = 0; i < n_settings; i++) {
        set_column(get_table("Port"), &port->header_, settings[i], ctx->symtab);
    }

     bridge_insert_port((bridge->parent ? bridge->parent->br_cfg : bridge->br_cfg), port);

    vsctl_port = add_port_to_cache(ctx, bridge, port);
    for (i = 0; i < n_ifaces; i++) {
        add_iface_to_cache(ctx, vsctl_port, ifaces[i]);
    }
    free(ifaces);
}

void ovsrec_port_set_name(const struct ovsrec_port *row, const char *name) {
    struct ovsdb_datum datum;

    assert(inited);
    datum.n = 1;
    datum.keys = xmalloc(sizeof *datum.keys);
    datum.keys[0].string = xstrdup(name);
    datum.values = NULL;
    ovsdb_idl_txn_write(&row->header_, &ovsrec_port_columns[OVSREC_PORT_COL_NAME], &datum);
}
      上面都是db相关操作,当更新到ovsdb后这个应用程序的任务就完成了,接下来就是守护进程合适的时候响应这些新的配置。
     函数 ofproto_port_add(ofproto/ofproto.c line 1353)将netdev (struct netdev是一个open network device实体,lib/netdev-provider.h)增加为ofproto 的端口,如果成功的话 ofp_portp更新为新端口的openflow port number; 疑问:何时根据ovsdb来调用这里的ofproto_port_add 呢?

int  ofproto_port_add(struct ofproto *ofproto, struct netdev *netdev, uint16_t *ofp_portp) {
    uint16_t ofp_port;
    int error;

    error =  ofproto->ofproto_class->port_add(ofproto, netdev, &ofp_port);
    if (!error) {
        update_port(ofproto, netdev_get_name(netdev));
    }
    if (ofp_portp) {
        *ofp_portp = error ? OFPP_NONE : ofp_port;
    }
    return error;
}

     函数port_addd(ofproto/ofproto-dpif.c) ,dpif_port_add(lib/dpif.c)将netdev增设为dpif(struct dpif是ovs datapath接口dpif-provider.h)的一个端口,接下来会执行具体的struct dpif_class dpif_linux_class 的 port_add 函数。

static int   port_add(struct ofproto *ofproto_, struct netdev *netdev, uint16_t *ofp_portp)  {
    struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
    uint16_t odp_port = UINT16_MAX;
    int error;

    error =  dpif_port_add(ofproto->dpif, netdev, &odp_port);
    if (!error) {
        *ofp_portp = odp_port_to_ofp_port(odp_port);
    }
    return error;
}

int   dpif_port_add(struct dpif *dpif, struct netdev *netdev, uint16_t *port_nop) {
    const char *netdev_name = netdev_get_name(netdev);
    uint16_t port_no = UINT16_MAX;
    int error;

    COVERAGE_INC(dpif_port_add);  //操作计数

    if (port_nop) {
        port_no = *port_nop;
    }

     error = dpif->dpif_class->port_add(dpif, netdev, &port_no);
    if (!error) {
        VLOG_DBG_RL(&dpmsg_rl, "%s: added %s as port %"PRIu16,dpif_name(dpif), netdev_name, port_no);
    } else {
        VLOG_WARN_RL(&error_rl, "%s: failed to add %s as port: %s",dpif_name(dpif), netdev_name, strerror(error));
        port_no = UINT16_MAX;
    }
    if (port_nop) {
        *port_nop = port_no;
    }
    return error;
}

    
     当打开一个已存在的datapath或者创建并打开一个datapath都会调用 lib/dpif.c:do_open()函数,其中会调用 dp_initialize() 来注册这两个dpif_class 到 registered_dpif_class 中,然后调用其open函数,这里就是dpif_linux_open,主要功能是查看那4个generic family是否存在,并且创建netlink 套接字等等netlink 用户空间的准备工作; 这里会调用 dpif_linux_class里面的port_add handler,构造相应的nlmsg发送到内核空间。

static int   do_open (const char *name, const char *type, bool create, struct dpif **dpifp)
{
    struct dpif *dpif = NULL;
    int error;
    struct registered_dpif_class *registered_class;

     dp_initialize();

    type = dpif_normalize_type(type);

    registered_class = shash_find_data(&dpif_classes, type);   //根据具体类型找到对应的 registered_dpif_class 
    if (!registered_class) {
        VLOG_WARN("could not create datapath %s of unknown type %s", name,type);
        error = EAFNOSUPPORT;
        goto exit;
    }

    error =  registered_class->dpif_class->open(registered_class->dpif_class,name, create, &dpif);
    if (!error) {
        assert(dpif->dpif_class == registered_class->dpif_class);
        registered_class->refcount++;
    }

exit:
    *dpifp = error ? NULL : dpif;
    return error;
}

static const struct dpif_class * base_dpif_classes[] = {
#ifdef LINUX_DATAPATH
    &dpif_linux_class,
#endif
    &dpif_netdev_class,
};

static void  dp_initialize(void)
{
    static int status = -1;

    if (status < 0) {
        int i;

        status = 0;
        for (i = 0; i < ARRAY_SIZE(base_dpif_classes); i++) {
             dp_register_provider(base_dpif_classes[i]);
        }
    }
}

/* Registers a new datapath provider.  After successful registration, new
* datapaths of that type can be opened using dpif_open(). */
int   dp_register_provider(const struct dpif_class *new_class) {
    struct registered_dpif_class *registered_class;

    registered_class = xmalloc(sizeof *registered_class);
    registered_class->dpif_class = new_class;
    registered_class->refcount = 0;

    shash_add(&dpif_classes, new_class->type, registered_class);  //增加到hash表中;

    return 0;
}


------------lib/dpif-linux.c
const struct dpif_class  dpif_linux_class = {
    "system",
    dpif_linux_enumerate,
    dpif_linux_open,
    dpif_linux_close,
    dpif_linux_destroy,
    dpif_linux_run,
    dpif_linux_wait,
    dpif_linux_get_stats,
     dpif_linux_port_add,
    dpif_linux_port_del,
    dpif_linux_port_query_by_number,
    dpif_linux_port_query_by_name,
    dpif_linux_get_max_ports,
    dpif_linux_port_get_pid,
    dpif_linux_port_dump_start,
    dpif_linux_port_dump_next,
    dpif_linux_port_dump_done,
    dpif_linux_port_poll,
    dpif_linux_port_poll_wait,
    dpif_linux_flow_get,
    dpif_linux_flow_put,
    dpif_linux_flow_del,
    dpif_linux_flow_flush,
    dpif_linux_flow_dump_start,
    dpif_linux_flow_dump_next,
    dpif_linux_flow_dump_done,
    dpif_linux_execute,
    dpif_linux_operate,
    dpif_linux_recv_set,
    dpif_linux_queue_to_priority,
    dpif_linux_recv,
    dpif_linux_recv_wait,
    dpif_linux_recv_purge,
};

static int   dpif_linux_open(const struct dpif_class *class OVS_UNUSED, const char *name, bool create, struct dpif **dpifp)
{
    struct dpif_linux_dp dp_request, dp;
    struct ofpbuf *buf;
    uint32_t upcall_pid;
    int error;

    error =  dpif_linux_init();
    if (error) {
        return error;
    }

    dpif_linux_dp_init(&dp_request);  //memset 0
    if (create) {
        dp_request.cmd = OVS_DP_CMD_NEW;
        upcall_pid = 0;
        dp_request.upcall_pid = &upcall_pid;
    } else {
        dp_request.cmd = OVS_DP_CMD_GET;
    }
    dp_request.name = name;
    error =   dpif_linux_dp_transact(&dp_request, &dp, &buf);
      //将命令(创建或者获得已存在的DP)通过genl传到内核空间执行;
    if (error) {
        return error;
    }

    open_dpif(&dp, dpifp);
    ofpbuf_delete(buf);
    return 0;
}
---->
static int   dpif_linux_init(void) {
    static int error = -1;

    if (error < 0) {
        unsigned int ovs_vport_mcgroup;

        error = nl_lookup_genl_family(OVS_DATAPATH_FAMILY,&ovs_datapath_family);
       
        if (!error) {
            error = nl_lookup_genl_family(OVS_VPORT_FAMILY, &ovs_vport_family);
        }
        if (!error) {
            error = nl_lookup_genl_family(OVS_FLOW_FAMILY, &ovs_flow_family);
        }
        if (!error) {
            error = nl_lookup_genl_family(OVS_PACKET_FAMILY,&ovs_packet_family);
        }
        if (!error) {
            error =  nl_sock_create(NETLINK_GENERIC, &genl_sock);   //创建一个netlink socket:genl_sock
        }
        if (!error) {
            error = nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY, OVS_VPORT_MCGROUP,&ovs_vport_mcgroup,OVS_VPORT_MCGROUP_FALLBACK_ID);
        }
        if (!error) {
            static struct dpif_linux_vport vport;
            nln = nln_create(NETLINK_GENERIC, ovs_vport_mcgroup, dpif_linux_nln_parse, &vport);  //创建一个netlink notifier
        }
    }
    return error;
}


static int  dpif_linux_port_add(struct dpif *dpif_, struct netdev *netdev,  uint16_t *port_nop) {
    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
    const char *name = netdev_get_name(netdev);
    const char *type = netdev_get_type(netdev);
    struct dpif_linux_vport request, reply;
    const struct ofpbuf *options;
    struct ofpbuf *buf;
    int error, i = 0, max_ports = MAX_PORTS;

    dpif_linux_vport_init(&request);   //为下面发送到内核空间做准备;
    request.cmd = OVS_VPORT_CMD_NEW;
    request.dp_ifindex = dpif->dp_ifindex;
    request.type = netdev_vport_get_vport_type(netdev);
   
    request.name = name;

    options = netdev_vport_get_options(netdev);
    if (options && options->size) {
        request.options = options->data;
        request.options_len = options->size;
    }

    if (request.type == OVS_VPORT_TYPE_NETDEV) {
        netdev_linux_ethtool_set_flag(netdev, ETH_FLAG_LRO, "LRO", false);
    }

    /* Unless a specific port was requested, loop until we find a port
     * that isn't used. */
    do {
        uint32_t upcall_pid;

        request.port_no = *port_nop != UINT16_MAX ? *port_nop : ++dpif->alloc_port_no;
        upcall_pid = dpif_linux_port_get_pid(dpif_, request.port_no);
        request.upcall_pid = &upcall_pid;
        error =  dpif_linux_vport_transact(&request, &reply, &buf);

       ofpbuf_delete(buf);
    } while ((*port_nop == UINT16_MAX) && (i++ < max_ports)&& (error == EBUSY || error == EFBIG));

    return error;
}

       在datapath的内核层处理 request:dpif_linux_dp_to_ofpbuf 通过nl_msg_put_* 将request中秒数的netlink msg增加到ofpbuf中,而后通过genl_sock发送到kernel 等待回复,成功后bufp指向的是响应消息体,然后从中解析到 reply 中。

int   dpif_linux_vport_transact(const struct dpif_linux_vport *request, struct dpif_linux_vport *reply, struct ofpbuf **bufp) {
    struct ofpbuf *request_buf;
    int error;

    assert((reply != NULL) == (bufp != NULL));

    error =  dpif_linux_init();
    if (error) {
        if (reply) {
            *bufp = NULL;
            dpif_linux_vport_init(reply);
        }
        return error;
    }

    request_buf = ofpbuf_new(1024);
     dpif_linux_vport_to_ofpbuf(request, request_buf);
    error =  nl_sock_transact(genl_sock, request_buf, bufp);
    ofpbuf_delete(request_buf);

    if (reply) {
        if (!error) {
            error =  dpif_linux_vport_from_ofpbuf(reply, *bufp);
        }
        if (error) {
            dpif_linux_vport_init(reply);
            ofpbuf_delete(*bufp);
            *bufp = NULL;
        }
    }
    return error;
}

---->     lib/netlink-socket.c:nl_sock_transact通过sock发送请求到内核空间等待回复,
int  nl_sock_transact(struct nl_sock *sock, const struct ofpbuf *request, struct ofpbuf **replyp) {
    struct nl_transaction *transactionp;
    struct nl_transaction transaction;

    transaction.request = CONST_CAST(struct ofpbuf *, request);
    transaction.reply = replyp ? ofpbuf_new(1024) : NULL;
    transactionp = &transaction;

     nl_sock_transact_multiple(sock, &transactionp, 1);

    if (replyp) {
        if (transaction.error) {
            ofpbuf_delete(transaction.reply);
            *replyp = NULL;
        } else {
            *replyp = transaction.reply;
        }
    }

    return transaction.error;
}


     当成功收到内核的回复之后dpif_linux_vport_from_ofpbuf 将ofbuf中的内容通过netlink attribute的方式(结构是 nlmsghdr | genlmsghdr | ovs_header ,就是??)解析到vport中,dpif_linux_vport 有指向ofbuf的指针,所以调用者要确保不能释放buf 当vport在使用的时候。此时得到的vport 后来如何被使用的呢?

static int   dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport *vport, const struct ofpbuf *buf) {
    static const struct nl_policy ovs_vport_policy[] = {
        [OVS_VPORT_ATTR_PORT_NO] = { .type = NL_A_U32 },
        [OVS_VPORT_ATTR_TYPE] = { .type = NL_A_U32 },
        [OVS_VPORT_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
        [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NL_A_U32 },
        [OVS_VPORT_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_vport_stats), .optional = true },
        [OVS_VPORT_ATTR_ADDRESS] = { .type = NL_A_UNSPEC,  .min_len = ETH_ADDR_LEN,
                                     .max_len = ETH_ADDR_LEN, .optional = true },
        [OVS_VPORT_ATTR_OPTIONS] = { .type = NL_A_NESTED, .optional = true },
    };

    struct nlattr *a[ARRAY_SIZE(ovs_vport_policy)];
    struct ovs_header *ovs_header;
    struct nlmsghdr *nlmsg;
    struct genlmsghdr *genl;
    struct ofpbuf b;

    dpif_linux_vport_init(vport);

    ofpbuf_use_const(&b, buf->data, buf->size);
     nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
    genl = ofpbuf_try_pull(&b, sizeof *genl);
    ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);

    if (!nlmsg || !genl || !ovs_header
        || nlmsg->nlmsg_type != ovs_vport_family
        || !nl_policy_parse(&b, 0, ovs_vport_policy, a,
                            ARRAY_SIZE(ovs_vport_policy))) {
        return EINVAL;
    }

    vport->cmd = genl->cmd;
    vport->dp_ifindex = ovs_header->dp_ifindex;
    vport->port_no = nl_attr_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
    vport->type = nl_attr_get_u32(a[OVS_VPORT_ATTR_TYPE]);
    vport->name = nl_attr_get_string(a[OVS_VPORT_ATTR_NAME]);
    if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
        vport->upcall_pid = nl_attr_get(a[OVS_VPORT_ATTR_UPCALL_PID]);
    }
    if (a[OVS_VPORT_ATTR_STATS]) {
        vport->stats = nl_attr_get(a[OVS_VPORT_ATTR_STATS]);
    }
    if (a[OVS_VPORT_ATTR_ADDRESS]) {
        vport->address = nl_attr_get(a[OVS_VPORT_ATTR_ADDRESS]);
    }
    if (a[OVS_VPORT_ATTR_OPTIONS]) {
        vport->options = nl_attr_get(a[OVS_VPORT_ATTR_OPTIONS]);
        vport->options_len = nl_attr_get_size(a[OVS_VPORT_ATTR_OPTIONS]);
    }
    return 0;
}

     在datapath模块启动的时候会注册一些generic netlink family。解析并执行从用户空间传来的指令,这里是 OVS_VPORT_CMD_NEW,就会呼叫对应的operation。

static int  dp_register_genl(void) {
     int n_registered;
     int err;
     int i;

     n_registered = 0;
     for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
          const struct genl_family_and_ops *f = &dp_genl_families[i];

          err =  genl_register_family_with_ops(f->family, f->ops,f->n_ops);
          if (err)
               goto error;
          n_registered++;

          if (f->group) {
               err = genl_register_mc_group(f->family, f->group);
               if (err)
                    goto error;
          }
     }

     return 0;
}

static const struct genl_family_and_ops  dp_genl_families[] = {
     { &dp_datapath_genl_family, dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops), &ovs_dp_datapath_multicast_group },
     { &dp_vport_genl_family, dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops), &ovs_dp_vport_multicast_group },
     { &dp_flow_genl_family, dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops), &ovs_dp_flow_multicast_group },
     { &dp_packet_genl_family, dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),  NULL },
};

tatic struct genl_family  dp_vport_genl_family = {
     .id = GENL_ID_GENERATE,
     .hdrsize = sizeof(struct ovs_header),
     .name = OVS_VPORT_FAMILY,
     .version = OVS_VPORT_VERSION,
     .maxattr = OVS_VPORT_ATTR_MAX,
     SET_NETNSOK
};

struct genl_multicast_group ovs_dp_vport_multicast_group = {
     .name = OVS_VPORT_MCGROUP
};

static struct genl_ops  dp_vport_genl_ops[] = {
     { .cmd = OVS_VPORT_CMD_NEW,
       .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
       .policy = vport_policy,
       .doit = ovs_vport_cmd_new
     },
     { .cmd = OVS_VPORT_CMD_DEL,
       .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
       .policy = vport_policy,
       .doit = ovs_vport_cmd_del
     },
     { .cmd = OVS_VPORT_CMD_GET,
       .flags = 0,              /* OK for unprivileged users. */
       .policy = vport_policy,
       .doit = ovs_vport_cmd_get,
       .dumpit = ovs_vport_cmd_dump
     },
     { .cmd = OVS_VPORT_CMD_SET,
       .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
       .policy = vport_policy,
       .doit = ovs_vport_cmd_set,
     },
};

static int  ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) 
{
     struct nlattr **a = info->attrs;
     struct ovs_header *ovs_header = info->userhdr;
     struct vport_parms parms;
     struct sk_buff *reply;
     struct vport *vport;
     struct datapath *dp;
     u32 port_no;
     int err;

     err = -EINVAL;
     if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || !a[OVS_VPORT_ATTR_UPCALL_PID])
          goto exit;

     err = ovs_vport_cmd_validate(a);
     if (err)
          goto exit;

     rtnl_lock();
     dp =  get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
     // 通过dp_ifindex 的netdevice ,然后通过dev找到对应的vport 返回vport->dp;
     err = -ENODEV;
     if (!dp)
          goto exit_unlock;

     if (a[OVS_VPORT_ATTR_PORT_NO]) {
           port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);

          err = -EFBIG;
          if (port_no >= DP_MAX_PORTS)
               goto exit_unlock;

           vport = ovs_vport_rtnl(dp, port_no);
          // -- ovs_lookup_vport(dp,port_no)通过端口号返回对应的vport结构;
          err = -EBUSY;
          if (vport)
               goto exit_unlock;
     } else {     //如果没有得到port_no 的话就取得具有最小端口号的vport; 
          for (port_no = 1; ; port_no++) {
               if (port_no >= DP_MAX_PORTS) {
                    err = -EFBIG;
                    goto exit_unlock;
               }
               vport = ovs_vport_rtnl(dp, port_no);
               if (!vport)
                    break;
          }
     }
     //接下来填充vport的参数结构体;
     parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
     parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
     parms.options = a[OVS_VPORT_ATTR_OPTIONS];
     parms.dp = dp;
     parms.port_no = port_no;
     parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);

      vport = new_vport(&parms);
     err = PTR_ERR(vport);
     if (IS_ERR(vport))
          goto exit_unlock;

     ovs_dp_sysfs_add_if(vport);

     err = change_vport(vport, a);
     if (!err) {
          reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
                              info->snd_seq,
                              OVS_VPORT_CMD_NEW);
          if (IS_ERR(reply))
               err = PTR_ERR(reply);
     }
     if (err) {
          ovs_dp_detach_port(vport);
          goto exit_unlock;
     }
     genl_notify(reply, genl_info_net(info), info->snd_portid,
              ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);

exit_unlock:
     rtnl_unlock();
exit:
     return err;
}


static struct vport * new_vport(const struct vport_parms *parms)
{
     struct vport *vport;

     vport = ovs_vport_add(parms);
     if (!IS_ERR(vport)) {
          struct datapath *dp = parms->dp;
          struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);

          hlist_add_head_rcu(&vport->dp_hash_node, head);
          dp_ifinfo_notify(RTM_NEWLINK, vport);
     }
     return vport;
}

     函数 ovs_vport_add()是内核真正的增加vport设备(datapath/vport.c),根据要添加端口的参数从vport_ops_list中找到对应的类型;parms是要新增的vport的信息。接下来有了具体设备地类型后就是调用相应的create方法(这里是ovs_netdev_vport_ops)。

struct vport * ovs_vport_add(const struct vport_parms *parms)
{
     struct vport *vport;
     int err = 0;
     int i;

     ASSERT_RTNL();    ///和RTNL lock相关;

     for (i = 0; i < n_vport_types; i++) {
          if (vport_ops_list[i]->type == parms->type) {
               struct hlist_head *bucket;
                //在 ovs_vport_init() 的时候会将base_vport_ops_list[] 更新到 vport_ops_list中;
                 vport = vport_ops_list[i]->create(parms);
               if (IS_ERR(vport)) {
                    err = PTR_ERR(vport);
                    goto out;
               }

               bucket = hash_bucket(ovs_dp_get_net(vport->dp),  vport->ops->get_name(vport));
               hlist_add_head_rcu(&vport->hash_node, bucket);
               return vport;
          }
     }

     err = -EAFNOSUPPORT;

out:
     return ERR_PTR(err);


static const struct vport_ops * base_vport_ops_list[] = {
     &ovs_netdev_vport_ops,
     &ovs_internal_vport_ops,
     &ovs_patch_vport_ops,
     &ovs_gre_vport_ops,
     &ovs_gre_ft_vport_ops,
     &ovs_gre64_vport_ops,
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
     &ovs_capwap_vport_ops,
#endif
};

     
const struct vport_ops  ovs_netdev_vport_ops = {
     .type          = OVS_VPORT_TYPE_NETDEV,
     .flags          = VPORT_F_REQUIRED,
     .init          = netdev_init,
     .exit          = netdev_exit,
      .create          = netdev_create,
     .destroy     = netdev_destroy,
     .set_addr     = ovs_netdev_set_addr,
     .get_name     = ovs_netdev_get_name,
     .get_addr     = ovs_netdev_get_addr,
     .get_kobj     = ovs_netdev_get_kobj,
     .get_dev_flags     = ovs_netdev_get_dev_flags,
     .is_running     = ovs_netdev_is_running,
     .get_operstate     = ovs_netdev_get_operstate,
     .get_ifindex     = ovs_netdev_get_ifindex,
     .get_mtu     = ovs_netdev_get_mtu,
     .send          = netdev_send,
};

static struct vport * netdev_create(const struct vport_parms *parms)
{
     struct vport *vport;
     struct netdev_vport *netdev_vport;
     int err;

       vport = ovs_vport_alloc(sizeof(struct netdev_vport), &ovs_netdev_vport_ops, parms);
     if (IS_ERR(vport)) {
          err = PTR_ERR(vport);
          goto error;
     }

     netdev_vport = netdev_vport_priv(vport);

     netdev_vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), parms->name);
     if (!netdev_vport->dev) {
          err = -ENODEV;
          goto error_free_vport;
     }

     if (netdev_vport->dev->flags & IFF_LOOPBACK ||  netdev_vport->dev->type != ARPHRD_ETHER ||
         ovs_is_internal_dev(netdev_vport->dev)) {
          err = -EINVAL;
          goto error_put;
     }

     err =   netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook, vport);
     if (err)
          goto error_put;

     dev_set_promiscuity(netdev_vport->dev, 1);
     netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;

     return vport;

error_put:
     dev_put(netdev_vport->dev);
error_free_vport:
     ovs_vport_free(vport);
error:
     return ERR_PTR(err);
}

     这个时候我们就要处理来自网络接口的数据了,见 packet处理流程





你可能感兴趣的:(OVS源码阅读,OVS源码阅读)