ovs-vswitchd源码分析

ovs-vswitch的主要处理流程在主循环while里,话不多说,直接看代码吧。

openvswitch/vswitchd.c
main(int argc, char *argv[])
    char *unixctl_path = NULL;
    struct unixctl_server *unixctl;
    char *remote;
    
    remote = parse_options(argc, argv, &unixctl_path);
    unixctl_server_create(unixctl_path, &unixctl);
    bridge_init(remote);
        /* Create connection to database. */
        //只是创建结构体,还没有真正连接数据库
        idl = ovsdb_idl_create(remote, &ovsrec_idl_class, true, true);
        lacp_init();
        bond_init();
        ...
    exiting = false;
    cleanup = false;
    while (!exiting) {
        1. memory_run();

        2. bridge_run();
            2.0
            a. 连接数据库
            b. 发送 monitor_cond 请求给 ovsdb-server,一旦数据库配置有改变(比如添加/删除网桥),则会通知 ovs-vswitchd
            ovsdb_idl_run(idl);
                jsonrpc_session_run(idl->session);
                    //当s->stream 不为空时,分配 rpc 结构,后续使用 rpc 和 ovsdb-server 通信,
                    //但底层还是使用 stream 收发数据
                    if (s->stream) {
                        stream_connect(s->stream);
                        s->rpc = jsonrpc_open(s->stream);
                            rpc = xzalloc(sizeof *rpc);
                            rpc->name = xstrdup(stream_get_name(stream));
                            rpc->stream = stream;
                            byteq_init(&rpc->input, rpc->input_buffer, sizeof rpc->input_buffer);
                            ovs_list_init(&rpc->output);
                        s->stream = NULL;
                    }
                    //主动连接 ovsdb-server,将连接后的数据流放在 s->stream
                    switch (reconnect_run(s->reconnect, time_msec())) {
                    case RECONNECT_CONNECT:
                        jsonrpc_session_connect(s);
                            jsonrpc_stream_open(name, &s->stream, s->dscp);
                    }
                //当 rpc 不为空后,就可以循环接收数据了
                for (i = 0; jsonrpc_session_is_connected(idl->session) && i < 50; i++) {
                    //首先发送请求获取 schema
                    seqno = jsonrpc_session_get_seqno(idl->session);
                    if (idl->state_seqno != seqno) {
                        idl->state_seqno = seqno;
                        json_destroy(idl->request_id);
                        idl->request_id = NULL;
                        ovsdb_idl_txn_abort_all(idl);
                        //发送 get-schema 请求
                        ovsdb_idl_send_schema_request(idl);
                        //将state设置为 IDL_S_SCHEMA_REQUESTED
                        idl->state = IDL_S_SCHEMA_REQUESTED;
                    }
                    msg = jsonrpc_session_recv(idl->session);
                    if (!msg) {
                        break;
                    }
                    //收到 ovsdb-server 发送的通知,表示有配置改变
                    if (msg->type == JSONRPC_NOTIFY
                        && !strcmp(msg->method, "update2")
                        && msg->params->type == JSON_ARRAY
                        && msg->params->u.array.n == 2
                        && msg->params->u.array.elems[0]->type == JSON_STRING) {
                        /* Database contents changed. */
                        ovsdb_idl_parse_update(idl, msg->params->u.array.elems[1], OVSDB_UPDATE2);
                    } else if (msg->type == JSONRPC_REPLY && idl->request_id && json_equal(idl->request_id, msg->id)) {
                        json_destroy(idl->request_id);
                        idl->request_id = NULL;
                        switch (idl->state) {
                        //收到 get-schema 的 reply
                        case IDL_S_SCHEMA_REQUESTED:
                            /* Reply to our "get_schema" request. */
                            idl->schema = json_clone(msg->result);
                            //发送 monitor-cond 请求
                            ovsdb_idl_send_monitor_cond_request(idl);
                            //设置 state 为 IDL_S_MONITOR_COND_REQUESTED
                            idl->state = IDL_S_MONITOR_COND_REQUESTED;
                            break;
                            case IDL_S_MONITOR_REQUESTED:
                        case IDL_S_MONITOR_COND_REQUESTED:
                            /* Reply to our "monitor" or "monitor_cond" request. */
                            idl->change_seqno++;
                            ovsdb_idl_clear(idl);
                            if (idl->state == IDL_S_MONITOR_REQUESTED) {
                                idl->state = IDL_S_MONITORING;
                                ovsdb_idl_parse_update(idl, msg->result, OVSDB_UPDATE);
                            } else { /* IDL_S_MONITOR_COND_REQUESTED. */
                                //收到 monitor-cond 响应
                                idl->state = IDL_S_MONITORING_COND;
                                ovsdb_idl_parse_update(idl, msg->result, OVSDB_UPDATE2);
                            }

                            /* Schema is not useful after monitor request is accepted
                             * by the server.  */
                            json_destroy(idl->schema);
                            idl->schema = NULL;
                            break;
                        case IDL_S_MONITORING_COND:
                            /* Conditional monitor clauses were updated. Send out
                             * the next condition changes, in any, immediately. */
                            ovsdb_idl_send_cond_change(idl);
                            idl->cond_seqno++;
                            break;
            2.1
            //获取配置
            const struct ovsrec_open_vswitch *cfg;
            cfg = ovsrec_open_vswitch_first(idl);

            //使能硬件offload
            netdev_set_flow_api_enabled(&cfg->other_config);
                if (smap_get_bool(ovs_other_config, "hw-offload", false)) {
                    netdev_flow_api_enabled = true;
                }

            2.2 dpdk初始化
            dpdk_init(&cfg->other_config);
                if (smap_get_bool(ovs_other_config, "dpdk-init", false)) {
                    dpdk_init__(ovs_other_config);
                        /* Make sure things are initialized ... */
                        rte_eal_init(argc, argv);
                        rte_pdump_init(ovs_rundir());
                        /* Finally, register the dpdk classes */
                        netdev_dpdk_register();
                            netdev_register_provider(&dpdk_class);
                            netdev_register_provider(&dpdk_ring_class);
                            netdev_register_provider(&dpdk_vhost_class);
                            netdev_register_provider(&dpdk_vhost_client_class);
                                new_class->init()
                                struct netdev_registered_class *rc;
                                rc = xmalloc(sizeof *rc);
                                cmap_insert(&netdev_classes, &rc->cmap_node, hash_string(new_class->type, 0));
                                rc->class = new_class;

            2.3 注册 ofproto_class
            /* Initialize the ofproto library.  This only needs to run once, but
             * it must be done after the configuration is set.  If the
             * initialization has already occurred, bridge_init_ofproto()
             * returns immediately. */
            bridge_init_ofproto(cfg);
                static bool initialized = false;
                if (initialized) {
                    return;
                }
                struct shash iface_hints;
                shash_init(&iface_hints);
                if (cfg) {
                    //将配置的所有interface插入 iface_hints
                    for (i = 0; i < cfg->n_bridges; i++) {
                        const struct ovsrec_bridge *br_cfg = cfg->bridges[i];
                        int j;

                        for (j = 0; j < br_cfg->n_ports; j++) {
                            struct ovsrec_port *port_cfg = br_cfg->ports[j];
                            int k;

                            for (k = 0; k < port_cfg->n_interfaces; k++) {
                                struct ovsrec_interface *if_cfg = port_cfg->interfaces[k];
                                struct iface_hint *iface_hint;

                                iface_hint = xmalloc(sizeof *iface_hint);
                                iface_hint->br_name = br_cfg->name;
                                iface_hint->br_type = br_cfg->datapath_type;
                                iface_hint->ofp_port = iface_pick_ofport(if_cfg);

                                shash_add(&iface_hints, if_cfg->name, iface_hint);
                            }
                        }
                    }
                }
                ofproto_init(&iface_hints);
                    //目前只有一种 ofproto_class,即 ofproto_dpif_class
                    ofproto_class_register(&ofproto_dpif_class);
                        static const struct ofproto_class **ofproto_classes;
                        ofproto_classes[n_ofproto_classes++] = new_class;

                    /* Make a local copy, since we don't own 'iface_hints' elements. */
                    //先复制到 ofproto.c 中的静态变量 init_ofp_ports 中
                    SHASH_FOR_EACH(node, iface_hints) {
                        const struct iface_hint *orig_hint = node->data;
                        struct iface_hint *new_hint = xmalloc(sizeof *new_hint);
                        const char *br_type = ofproto_normalize_type(orig_hint->br_type);
                        new_hint->br_name = xstrdup(orig_hint->br_name);
                        new_hint->br_type = xstrdup(br_type);
                        new_hint->ofp_port = orig_hint->ofp_port;
                        shash_add(&init_ofp_ports, node->name, new_hint);

                    for (i = 0; i < n_ofproto_classes; i++) {
                        //目前只支持一种 ofproto_classes,即 ofproto_dpif_class,调用其 init 函数
                        ofproto_classes[i]->init(&init_ofp_ports); //init(const struct shash *iface_hints)
                            /* Make a local copy, since we don't own 'iface_hints' elements. */
                            //再复制到 ofproto_dpif.c 中的静态变量 init_ofp_ports 中
                            SHASH_FOR_EACH(node, iface_hints) {
                                const struct iface_hint *orig_hint = node->data;
                                struct iface_hint *new_hint = xmalloc(sizeof *new_hint);
                                new_hint->br_name = xstrdup(orig_hint->br_name);
                                new_hint->br_type = xstrdup(orig_hint->br_type);
                                new_hint->ofp_port = orig_hint->ofp_port;
                                shash_add(&init_ofp_ports, node->name, new_hint);

                            ofproto_unixctl_init();
                                unixctl_command_register("fdb/flush", "[bridge]", 0, 1, ofproto_unixctl_fdb_flush, NULL);
                                unixctl_command_register("fdb/show", "bridge", 1, 1, ofproto_unixctl_fdb_show, NULL);
                                ...

                            ofproto_dpif_trace_init();
                                unixctl_command_register(
                                    "ofproto/trace",
                                    "{[dp_name] odp_flow | bridge br_flow} [OPTIONS...] "
                                    "[-generate|packet]", 1, INT_MAX, ofproto_unixctl_trace, NULL);
                                ...

                            udpif_init();
                                unixctl_command_register("upcall/show", "", 0, 0, upcall_unixctl_show, NULL);
                                unixctl_command_register("upcall/disable-megaflows", "", 0, 0, upcall_unixctl_disable_megaflows, NULL);
                                unixctl_command_register("upcall/enable-megaflows", "", 0, 0, upcall_unixctl_enable_megaflows, NULL);
                                ...
                initialized = true;

            2.4
            bridge_run__();
                2.4.1
                sset_init(&types);
                //获取所有type,目前就两种:system和netdev
                ofproto_enumerate_types(&types);
                    sset_clear(types);
                    //目前只支持一种 ofproto_classes,即 ofproto_dpif_class
                    for (i = 0; i < n_ofproto_classes; i++) {
                        ofproto_classes[i]->enumerate_types(types); //dp_enumerate_types
                            dp_initialize();
                                if (ovsthread_once_start(&once)) {
                                    tnl_conf_seq = seq_create();
                                    dpctl_unixctl_register();
                                    tnl_port_map_init();
                                    tnl_neigh_cache_init();
                                    route_table_init();
                                    
                                    static const struct dpif_class *base_dpif_classes[] = {
                                    #if defined(__linux__) || defined(_WIN32)
                                        &dpif_netlink_class,
                                    #endif
                                        &dpif_netdev_class,
                                    };
                                    for (i = 0; i < ARRAY_SIZE(base_dpif_classes); i++) {
                                        dp_register_provider(base_dpif_classes[i]);
                                            dp_register_provider__(new_class);
                                                new_class->init()
                                                struct registered_dpif_class * registered_class = xmalloc(sizeof *registered_class);
                                                registered_class->dpif_class = new_class;
                                                registered_class->refcount = 0;
                                                shash_add(&dpif_classes, new_class->type, registered_class);
                                    }
                                }
                            SHASH_FOR_EACH(node, &dpif_classes) {
                                const struct registered_dpif_class *registered_class = node->data;
                                sset_add(types, registered_class->dpif_class->type);
                            }
                    }

                2.4.2
                SSET_FOR_EACH (type, &types) {
                    ofproto_type_run(type);
                        datapath_type = ofproto_normalize_type(datapath_type); //return type && type[0] ? type : "system";
                        //根据 datapath_type 找到 ofproto_class,目前其实就一种 
                        struct ofproto_class * class = ofproto_class_find__(datapath_type);
                            for (i = 0; i < n_ofproto_classes; i++) {
                                const struct ofproto_class *class = ofproto_classes[i];
                                struct sset types;
                                bool found;

                                sset_init(&types);
                                class->enumerate_types(&types);
                                found = sset_contains(&types, type);
                                sset_destroy(&types);

                                if (found) {
                                    return class;
                                }
                            }
                        class->type_run(datapath_type) //type_run(const char *type)
                            //all_dpif_backers 是个全局变量。根据type找backer。
                            //system和netdev类型的数据通路会分别创建一个 backer
                            backer = shash_find_data(&all_dpif_backers, type);
                            dpif_run(backer->dpif)
                                //dpif_netlink_run 或者 dpif_netdev_run
                                dpif->dpif_class->run(dpif);

                            udpif_run(backer->udpif);
                                unixctl_command_reply(udpif->conns[i], NULL);

                            if (backer->recv_set_enable) {
                                udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
                                    if (!udpif->handlers && !udpif->revalidators) {
                                        dpif_handlers_set(udpif->dpif, n_handlers);
                                            //只有system类型的数据通路才有此函数 dpif_netlink_handlers_set
                                            dpif->dpif_class->handlers_set(dpif, n_handlers);

                                        udpif_start_threads(udpif, n_handlers, n_revalidators);
                                            udpif->n_handlers = n_handlers;
                                            udpif->n_revalidators = n_revalidators;
                                            //启动线程,这些线程貌似只在 system 数据通路时有用
                                            udpif->handlers = xzalloc(udpif->n_handlers * sizeof *udpif->handlers);
                                            for (i = 0; i < udpif->n_handlers; i++) {
                                                struct handler *handler = &udpif->handlers[i];

                                                handler->udpif = udpif;
                                                handler->handler_id = i;
                                                handler->thread = ovs_thread_create(
                                                    "handler", udpif_upcall_handler, handler);
                                            }
                                            udpif->revalidators = xzalloc(udpif->n_revalidators * sizeof *udpif->revalidators);
                                            for (i = 0; i < udpif->n_revalidators; i++) {
                                                struct revalidator *revalidator = &udpif->revalidators[i];

                                                revalidator->udpif = udpif;
                                                revalidator->thread = ovs_thread_create(
                                                    "revalidator", udpif_revalidator, revalidator);
                                            }
                                    }
                            }
                            
                            if (backer->need_revalidate) {
                                //申请new_xcfg,将旧配置更新到new_xcfg
                                xlate_txn_start();
                                    struct xbridge *xbridge;
                                    struct xlate_cfg *xcfg;

                                    ovs_assert(!new_xcfg);

                                    new_xcfg = xmalloc(sizeof *new_xcfg);
                                    hmap_init(&new_xcfg->xbridges);
                                    hmap_init(&new_xcfg->xbundles);
                                    hmap_init(&new_xcfg->xports);
                                    hmap_init(&new_xcfg->xports_uuid);

                                    xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
                                    if (!xcfg) {
                                        return;
                                    }

                                    HMAP_FOR_EACH (xbridge, hmap_node, &xcfg->xbridges) {
                                        xlate_xbridge_copy(xbridge);
                                    }
                                //将最新的配置添加到 new_xcfg
                                HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
                                    struct ofport_dpif *ofport;
                                    struct ofbundle *bundle;

                                    if (ofproto->backer != backer) {
                                        continue;
                                    }
                                    //创建 xbridge,并插入 xcfg->xbridges
                                    xlate_ofproto_set(ofproto, ofproto->up.name,
                                                      ofproto->backer->dpif, ofproto->ml,
                                                      ofproto->stp, ofproto->rstp, ofproto->ms,
                                                      ofproto->mbridge, ofproto->sflow, ofproto->ipfix,
                                                      ofproto->netflow,
                                                      ofproto->up.forward_bpdu,
                                                      connmgr_has_in_band(ofproto->up.connmgr),
                                                      &ofproto->backer->rt_support);
                                    //创建 xbundle,并插入 xcfg->xbundles
                                    HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
                                        xlate_bundle_set(ofproto, bundle, bundle->name,
                                                         bundle->vlan_mode, bundle->qinq_ethtype,
                                                         bundle->vlan, bundle->trunks, bundle->cvlans,
                                                         bundle->use_priority_tags,
                                                         bundle->bond, bundle->lacp,
                                                         bundle->floodable, bundle->protected);
                                    }
                                    //创建 xport,并插入 xcfg->xports,xbridge->xports 和 xbundle->xports
                                    HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
                                        int stp_port = ofport->stp_port ? stp_port_no(ofport->stp_port) : -1;
                                        xlate_ofport_set(ofproto, ofport->bundle, ofport,
                                                         ofport->up.ofp_port, ofport->odp_port,
                                                         ofport->up.netdev, ofport->cfm, ofport->bfd,
                                                         ofport->lldp, ofport->peer, stp_port,
                                                         ofport->rstp_port, ofport->qdscp,
                                                         ofport->n_qdscp, ofport->up.pp.config,
                                                         ofport->up.pp.state, ofport->is_tunnel,
                                                         ofport->may_enable);
                                    }
                                }
                                xlate_txn_commit();
                                    //xcfgp指针指向最新的配置 new_xcfg
                                    struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
                                    ovsrcu_set(&xcfgp, new_xcfg);
                                    ovsrcu_synchronize();
                                    xlate_xcfg_free(xcfg);
                                    new_xcfg = NULL;
                            }
                }

                2.4.3
                /* Let each bridge do the work that it needs to do. */
                HMAP_FOR_EACH (br, node, &all_bridges) {
                    ofproto_run(br->ofproto);
                        p->ofproto_class->run(p); //run(struct ofproto *ofproto_)
                            if (ofproto->netflow) {
                                netflow_run(ofproto->netflow);
                            }
                            if (ofproto->sflow) {
                                dpif_sflow_run(ofproto->sflow);
                            }
                            if (ofproto->ipfix) {
                                dpif_ipfix_run(ofproto->ipfix);
                            }
                            stp_run(ofproto);
                            rstp_run(ofproto);
                            mac_learning_run(ofproto->ml)
                            mcast_snooping_run(ofproto->ms)
                            /* Expire OpenFlow flows whose idle_timeout or hard_timeout
                             * has passed. */
                            LIST_FOR_EACH_SAFE (rule, next_rule, expirable,
                                                &ofproto->up.expirable) {
                                rule_expire(rule_dpif_cast(rule), now);
                            }

                        //处理 controller,snoop 等连接
                        connmgr_run(p->connmgr, handle_openflow);
                            LIST_FOR_EACH_SAFE (ofconn, next_ofconn, node, &mgr->all_conns) {
                                ofconn_run(ofconn, handle_openflow);
                                    rconn_run(ofconn->rconn);
                                        vconn_run(rc->vconn);
                                        for (i = 0; i < rc->n_monitors; ) {
                                            vconn_run(rc->monitors[i]);
                                            vconn_recv(rc->monitors[i], &msg);
                                        }
                                    /* Limit the number of iterations to avoid starving other tasks. */
                                    for (i = 0; i < 50 && ofconn_may_recv(ofconn); i++) {
                                        struct ofpbuf *of_msg = rconn_recv(ofconn->rconn);
                                            vconn_recv(rc->vconn, &buffer);
                                            //将从controller收到的消息进行复制,发送给所有的monitor,即 snoop
                                            //发送消息给controller时,也会复制,发送给所有monitor,可参考 rconn_send__
                                            copy_to_monitor(rc, buffer);
                                                for (i = 0; i < rc->n_monitors; ) {
                                                    struct vconn *vconn = rc->monitors[i];
                                                    clone = ofpbuf_clone(b);//buffer
                                                    vconn_send(vconn, clone);
                                                }
                                        //处理 openflow 消息
                                        handle_openflow(ofconn, of_msg);
                                            handle_openflow__(ofconn, ofp_msg);
                                                ofptype_decode(&type, oh);
                                                    switch (type) {
                                                        /* OpenFlow requests. */
                                                    case OFPTYPE_ECHO_REQUEST:
                                                        return handle_echo_request(ofconn, oh);

                                                    case OFPTYPE_FEATURES_REQUEST:
                                                        return handle_features_request(ofconn, oh);
                                                    }
                                        ofpbuf_delete(of_msg);
                                    }
                            }

                            ofmonitor_run(mgr);

                            //处理 service controller 连接
                            HMAP_FOR_EACH (ofservice, node, &mgr->services) {
                                struct vconn *vconn;
                                pvconn_accept(ofservice->pvconn, &vconn);
                                struct rconn *rconn;
                                rconn = rconn_create(ofservice->probe_interval, 0, ofservice->dscp, vconn_get_allowed_versions(vconn));
                                    struct rconn *rc = xzalloc(sizeof *rc);
                                    ovs_mutex_init(&rc->mutex);
                                    rc->state = S_VOID;
                                    rc->state_entered = time_now();
                                    rc->vconn = NULL;
                                    rc->n_monitors = 0;
                                rconn_connect_unreliably(rconn, vconn, name);
                                    rc->vconn = vconn;
                                struct ofconn *ofconn;
                                //创建 ofconn,将新连接添加到 mgr->all_conns
                                ofconn = ofconn_create(mgr, rconn, OFCONN_SERVICE, ofservice->enable_async_msgs);
                                    struct ofconn *ofconn;
                                    ofconn = xzalloc(sizeof *ofconn);
                                    ofconn->connmgr = mgr;
                                    ovs_list_push_back(&mgr->all_conns, &ofconn->node);
                                    ofconn->rconn = rconn;
                                    ofconn->type = type;
                            }

                            //处理 snoop
                            for (i = 0; i < mgr->n_snoops; i++) {
                                struct vconn *vconn;
                                //如果snoop有新连接,则 add_snooper
                                pvconn_accept(mgr->snoops[i], &vconn);
                                add_snooper(mgr, vconn);
                                    struct ofconn *ofconn, *best;
                                    /* Pick a controller for monitoring. */
                                    best = NULL;
                                    LIST_FOR_EACH (ofconn, node, &mgr->all_conns) {
                                        if (ofconn->type == OFCONN_PRIMARY
                                            && (!best || snoop_preference(ofconn) > snoop_preference(best))) {
                                            best = ofconn;
                                        }
                                    }
                                    if (best) {
                                        rconn_add_monitor(best->rconn, vconn);
                                            rc->monitors[rc->n_monitors++] = vconn;
                                    } else {
                                        VLOG_INFO_RL(&rl, "no controller connection to snoop");
                                        vconn_close(vconn);
                                    }
                            }
                }

            2.5
            bridge_reconfigure(cfg ? cfg : &null_cfg);
                2.5.0 解析配置
                //flow-limit 决定dpcls流表最大个数限制
                ofproto_set_flow_limit(smap_get_int(&ovs_cfg->other_config, "flow-limit",
                                                    OFPROTO_FLOW_LIMIT_DEFAULT));
                //max-idle 决定dpcls流表超时时间
                ofproto_set_max_idle(smap_get_int(&ovs_cfg->other_config, "max-idle",
                                                  OFPROTO_MAX_IDLE_DEFAULT));
                //vlan-limit 决定vlan头个数,单vlan还是双vlan
                ofproto_set_vlan_limit(smap_get_int(&ovs_cfg->other_config, "vlan-limit",
                                                   LEGACY_MAX_VLAN_HEADERS));

                //给 n_handlers 和 n_revalidators 赋值,用来在udpif_set_threads中决定起几个thread
                ofproto_set_threads(smap_get_int(&ovs_cfg->other_config, "n-handler-threads", 0),
                                    smap_get_int(&ovs_cfg->other_config, "n-revalidator-threads", 0));
                    int threads = MAX(count_cpu_cores(), 2);

                    n_revalidators = MAX(n_revalidators_, 0);
                    n_handlers = MAX(n_handlers_, 0);

                    if (!n_revalidators) {
                        n_revalidators = n_handlers
                            ? MAX(threads - (int) n_handlers, 1)
                            : threads / 4 + 1;
                    }

                    if (!n_handlers) {
                        n_handlers = MAX(threads - (int) n_revalidators, 1);
                    }
                2.5.1
                add_del_bridges(ovs_cfg);
                    //获取最新的 bridge 配置
                    /* Collect new bridges' names and types. */
                    shash_init(&new_br);
                    for (i = 0; i < cfg->n_bridges; i++) {
                        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
                        const struct ovsrec_bridge *br_cfg = cfg->bridges[i];
                        //过滤掉 bridge name 包含特殊字符的bridge
                        if (strchr(br_cfg->name, '/') || strchr(br_cfg->name, '\\')) {
                            /* Prevent remote ovsdb-server users from accessing arbitrary
                             * directories, e.g. consider a bridge named "../../../etc/".
                             *
                             * Prohibiting "\" is only necessary on Windows but it's no great
                             * loss elsewhere. */
                            VLOG_WARN_RL(&rl, "ignoring bridge with invalid name \"%s\"",
                                         br_cfg->name);
                        //将符号规则的bridge,添加到 new_br
                        } else if (!shash_add_once(&new_br, br_cfg->name, br_cfg)) {
                            VLOG_WARN_RL(&rl, "bridge %s specified twice", br_cfg->name);
                        }
                    }
                    
                    //如果 all_bridges 中的bridge在最新bridge配置中不存在了或者type改变了,则需要删除此bridge
                    /* Get rid of deleted bridges or those whose types have changed.
                     * Update 'cfg' of bridges that still exist. */
                    HMAP_FOR_EACH_SAFE (br, next, node, &all_bridges) {
                        br->cfg = shash_find_data(&new_br, br->name);
                        if (!br->cfg || strcmp(br->type, ofproto_normalize_type(br->cfg->datapath_type))) {
                            bridge_destroy(br, true);
                        }
                    }

                    //将新添加的bridge插入 all_bridges
                    /* Add new bridges. */
                    SHASH_FOR_EACH(node, &new_br) {
                        const struct ovsrec_bridge *br_cfg = node->data;
                        //到 all_bridges 查找是否已经存在bridge,如果不存在则创建
                        if (!bridge_lookup(br_cfg->name)) {
                            bridge_create(br_cfg);
                                struct bridge *br;
                                br = xzalloc(sizeof *br);
                                br->name = xstrdup(br_cfg->name);
                                br->type = xstrdup(ofproto_normalize_type(br_cfg->datapath_type));
                                //bridge 的配置,包含多少端口等信息
                                br->cfg = br_cfg;
                                memcpy(&br->default_ea, &br_cfg->header_.uuid, ETH_ADDR_LEN);
                                eth_addr_mark_random(&br->default_ea);
                                hmap_init(&br->ports);
                                hmap_init(&br->ifaces);
                                hmap_init(&br->iface_by_name);
                                hmap_init(&br->mirrors);
                                hmap_init(&br->mappings);
                                //新创建的bridge,要插入 all_bridges
                                hmap_insert(&all_bridges, &br->node, hash_string(br->name, 0));
                        }
                2.5.2 将br上port插入 br->wanted_ports
                HMAP_FOR_EACH (br, node, &all_bridges) {
                    bridge_collect_wanted_ports(br, &br->wanted_ports);
                        //将bridge上最新配置的port插入hash表 wanted_ports
                        shash_init(wanted_ports);
                        for (i = 0; i < br->cfg->n_ports; i++) {
                            const char *name = br->cfg->ports[i]->name;
                            if (!shash_add_once(wanted_ports, name, br->cfg->ports[i])) {
                                VLOG_WARN("bridge %s: %s specified twice as bridge port", br->name, name);
                            }
                        }
                        //自动添加一个和bridge名字相同的 internal port
                        if (bridge_get_controllers(br, NULL)
                            && !shash_find(wanted_ports, br->name)) {
                            VLOG_WARN("bridge %s: no port named %s, synthesizing one",
                                      br->name, br->name);

                            ovsrec_interface_init(&br->synth_local_iface);
                            ovsrec_port_init(&br->synth_local_port);

                            br->synth_local_port.interfaces = &br->synth_local_ifacep;
                            br->synth_local_port.n_interfaces = 1;
                            br->synth_local_port.name = br->name;

                            br->synth_local_iface.name = br->name;
                            br->synth_local_iface.type = "internal";

                            br->synth_local_ifacep = &br->synth_local_iface;

                            shash_add(wanted_ports, br->name, &br->synth_local_port);
                        }
                        
                    bridge_del_ports(br, &br->wanted_ports);
                        struct shash_node *port_node;
                        struct port *port, *next;

                        //删除已经不在wanted_ports中的port
                        /* Get rid of deleted ports.
                         * Get rid of deleted interfaces on ports that still exist. */
                        HMAP_FOR_EACH_SAFE (port, next, hmap_node, &br->ports) {
                            port->cfg = shash_find_data(wanted_ports, port->name);
                            if (!port->cfg) {
                                port_destroy(port);
                            } else {
                                //删除 port 下的不需要的 interface
                                port_del_ifaces(port);
                                    /* Collect list of new interfaces. */
                                    sset_init(&new_ifaces);
                                    for (i = 0; i < port->cfg->n_interfaces; i++) {
                                        const char *name = port->cfg->interfaces[i]->name;
                                        const char *type = port->cfg->interfaces[i]->type;
                                        if (strcmp(type, "null")) {
                                            sset_add(&new_ifaces, name);
                                        }
                                    }

                                    /* Get rid of deleted interfaces. */
                                    LIST_FOR_EACH_SAFE (iface, next, port_elem, &port->ifaces) {
                                        if (!sset_contains(&new_ifaces, iface->name)) {
                                            iface_destroy(iface);
                                        }
                                    }

                                    sset_destroy(&new_ifaces);
                            }
                        }
                        //更新iface的 cfg 和 type
                        /* Update iface->cfg and iface->type in interfaces that still exist. */
                        SHASH_FOR_EACH (port_node, wanted_ports) {
                            const struct ovsrec_port *port_rec = port_node->data;
                            for (i = 0; i < port_rec->n_interfaces; i++) {
                                const struct ovsrec_interface *cfg = port_rec->interfaces[i];
                                struct iface *iface = iface_lookup(br, cfg->name);
                                const char *type = iface_get_type(cfg, br->cfg);
                                const char *dp_type = br->cfg->datapath_type;
                                const char *netdev_type = ofproto_port_open_type(dp_type, type);
                                if (iface) {
                                    iface->cfg = cfg;
                                    iface->type = type;
                                    iface->netdev_type = netdev_type;
                                }
                            }
                        }

                2.5.3 删除最新配置中不存在的或者type改变的 ofproto
                bridge_delete_ofprotos();
                    //types 为 system,netdev
                    ofproto_enumerate_types(&types);
                        sset_clear(types);
                        //只有一个ofproto_classes,即 ofproto_dpif_class
                        for (i = 0; i < n_ofproto_classes; i++) {
                            ofproto_classes[i]->enumerate_types(types); //enumerate_types
                                dp_enumerate_types(types);
                                    //获取所有 dpif_classes 的 type,目前有两个:system和netdev
                                    SHASH_FOR_EACH(node, &dpif_classes) {
                                        const struct registered_dpif_class *registered_class = node->data;
                                        sset_add(types, registered_class->dpif_class->type);
                                    }
                        }
                    SSET_FOR_EACH (type, &types) {
                        ofproto_enumerate_names(type, &names); //enumerate_names
                            //获取所有 ofproto 的名字
                            struct ofproto_dpif *ofproto;
                            sset_clear(names);
                            HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
                                if (strcmp(type, ofproto->up.type)) {
                                    continue;
                                }
                                sset_add(names, ofproto->up.name);
                            }
                        SSET_FOR_EACH (name, &names) {
                            //遍历names到 all_bridges 寻找,如果找不到或者桥type改变,则需要删除此 ofproto
                            br = bridge_lookup(name);
                            if (!br || strcmp(type, br->type)) {
                                ofproto_delete(name, type);
                            }
                        }
                    }

                2.5.4 删除不需要的 port
                HMAP_FOR_EACH (br, node, &all_bridges) {
                if (br->ofproto) {
                    /* Main task: Iterate over the ports in 'br->ofproto' and remove the ports
                     * that are not configured in the database.  (This commonly happens when
                     * ports have been deleted, e.g. with "ovs-vsctl del-port".)
                     *
                     * Side tasks: Reconfigure the ports that are still in 'br'.  Delete ports
                     * that have the wrong OpenFlow port number (and arrange to add them back
                     * with the correct OpenFlow port number). */
                    bridge_delete_or_reconfigure_ports(br);                 

                2.5.5 为每个桥创建 struct ofproto
                HMAP_FOR_EACH_SAFE (br, next, node, &all_bridges) {
                    if (!br->ofproto) {
                        ofproto_create(br->name, br->type, &br->ofproto); {
                            //datapath_type 为 system or netdev
                            datapath_type = ofproto_normalize_type(datapath_type);
                            //ofproto_class 为 ofproto_dpif_class
                            struct ofproto_class * class = ofproto_class_find__(datapath_type);
                            //分配内存
                            struct ofproto *ofproto;
                            ofproto = class->alloc(); //alloc(void)
                                struct ofproto_dpif *ofproto = xzalloc(sizeof *ofproto);
                                return &ofproto->up;
                            ofproto->ofproto_class = class;
                            ofproto->name = xstrdup(datapath_name);
                            ofproto->type = xstrdup(datapath_type);
                            //将 ofproto 插入全局静态变量 all_ofprotos
                            hmap_insert(&all_ofprotos, &ofproto->hmap_node, hash_string(ofproto->name, 0));

                            ofproto->ofproto_class->construct(ofproto); //construct(struct ofproto *ofproto_)
                                struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
                                ofproto_tunnel_init();
                                    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;

                                    if (ovsthread_once_start(&once)) {
                                        fat_rwlock_init(&rwlock);
                                        ovsthread_once_done(&once);
                                    }
                                open_dpif_backer(ofproto->up.type, &ofproto->backer);
                                    struct dpif_backer *backer;
                                    //首先根据type查找,如果为kernel space datapath,则type为system,
                                    //如果userspace datapath,则type为netdev。
                                    /* All datapaths of a given type share a single dpif backer instance. */
                                    backer = shash_find_data(&all_dpif_backers, type);
                                    //如果已经创建了,则引用计数加1即可
                                    if (backer) {
                                        backer->refcount++;
                                        *backerp = backer;
                                        return 0;
                                    }
                                    //datapath 名字,ovs-netdev 或者 ovs-system
                                    backer_name = xasprintf("ovs-%s", type);
                                    backer = xmalloc(sizeof *backer);
                                    dpif_create_and_open(backer_name, type, &backer->dpif);
                                        dpif_create(name, type, dpifp);
                                            do_open(name, type, true, dpifp);
                                                type = dpif_normalize_type(type);
                                                registered_class = dp_class_lookup(type);
                                                //dpif_netlink_open 或者 dpif_netdev_open
                                                registered_class->dpif_class->open(registered_class->dpif_class, name, create, &dpif);

                                        dpif_open(name, type, dpifp);
                                            do_open(name, type, false, dpifp);
                                                type = dpif_normalize_type(type);
                                                registered_class = dp_class_lookup(type);
                                                registered_class->dpif_class->open(registered_class->dpif_class, name, create, &dpif);
                                                如果为 dpif_netlink_open,则和openvswitch kernel module交互,创建datapath
                                                    dp_request.cmd = OVS_DP_CMD_NEW;
                                                    upcall_pid = 0;
                                                    dp_request.upcall_pid = &upcall_pid;
                                                    dp_request.name = name;
                                                    dp_request.user_features |= OVS_DP_F_UNALIGNED;
                                                    dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
                                                    dpif_netlink_dp_transact(&dp_request, &dp, &buf);

                                    backer->udpif = udpif_create(backer, backer->dpif);
                                        struct udpif *udpif = xzalloc(sizeof *udpif);
                                        udpif->dpif = dpif;
                                        udpif->backer = backer;
                                        atomic_init(&udpif->flow_limit, MIN(ofproto_flow_limit, 10000));
                                        udpif->reval_seq = seq_create();
                                        udpif->dump_seq = seq_create();
                                        latch_init(&udpif->exit_latch);
                                        latch_init(&udpif->pause_latch);
                                        ovs_list_push_back(&all_udpifs, &udpif->list_node);
                                        atomic_init(&udpif->enable_ufid, false);
                                        atomic_init(&udpif->n_flows, 0);
                                        atomic_init(&udpif->n_flows_timestamp, LLONG_MIN);
                                        ovs_mutex_init(&udpif->n_flows_mutex);
                                        udpif->ukeys = xmalloc(N_UMAPS * sizeof *udpif->ukeys);
                                        for (int i = 0; i < N_UMAPS; i++) {
                                            cmap_init(&udpif->ukeys[i].cmap);
                                            ovs_mutex_init(&udpif->ukeys[i].mutex);
                                        }
                                        //只有用户空间netdev数据通路才会提供 register_upcall_cb
                                        dpif_register_upcall_cb(dpif, upcall_cb, udpif);
                                            if (dpif->dpif_class->register_upcall_cb) {
                                                dpif->dpif_class->register_upcall_cb(dpif, cb, aux);
                                            }
                                        dpif_register_dp_purge_cb(dpif, dp_purge_cb, udpif);
                                        
                                    backer->type = xstrdup(type);
                                    backer->refcount = 1;
                                    hmap_init(&backer->odp_to_ofport_map);
                                    ovs_rwlock_init(&backer->odp_to_ofport_lock);
                                    backer->need_revalidate = 0;
                                    simap_init(&backer->tnl_backers);
                                    backer->recv_set_enable = !ofproto_get_flow_restore_wait();
                                    *backerp = backer;

                                    /* Loop through the ports already on the datapath and remove any
                                     * that we don't need anymore. */
                                    ovs_list_init(&garbage_list);
                                    dpif_port_dump_start(&port_dump, backer->dpif);
                                    while (dpif_port_dump_next(&port_dump, &port)) {
                                        node = shash_find(&init_ofp_ports, port.name);
                                        if (!node && strcmp(port.name, dpif_base_name(backer->dpif))) {
                                            garbage = xmalloc(sizeof *garbage);
                                            garbage->odp_port = port.port_no;
                                            ovs_list_push_front(&garbage_list, &garbage->list_node);
                                        }
                                    }
                                    dpif_port_dump_done(&port_dump);

                                    LIST_FOR_EACH_POP (garbage, list_node, &garbage_list) {
                                        dpif_port_del(backer->dpif, garbage->odp_port, false);
                                        free(garbage);
                                    }

                                    //将 backer 添加到全局变量 all_dpif_backers
                                    shash_add(&all_dpif_backers, type, backer);

                                    dpif_recv_set(backer->dpif, backer->recv_set_enable);
                                        //只有system数据通路会提供 dpif_netlink_recv_set
                                        dpif->dpif_class->recv_set(dpif, enable);
                                    if (backer->recv_set_enable) {
                                        udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
                                    }
                                uuid_generate(&ofproto->uuid);
                                ofproto->ml = mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME);
                                
                                //如果 iface 所属桥的名字是 ofproto 的名字,则将 iface 从 init_ofp_ports 删除
                                SHASH_FOR_EACH_SAFE (node, next, &init_ofp_ports) {
                                    struct iface_hint *iface_hint = node->data;
                                    if (!strcmp(iface_hint->br_name, ofproto->up.name)) {
                                        /* Check if the datapath already has this port. */
                                        if (dpif_port_exists(ofproto->backer->dpif, node->name)) {
                                            sset_add(&ofproto->ports, node->name);
                                        }

                                        free(iface_hint->br_name);
                                        free(iface_hint->br_type);
                                        free(iface_hint);
                                        shash_delete(&init_ofp_ports, node);
                                    }
                                }
                                //将 ofproto 插入 hash 表 all_ofproto_dpifs
                                hmap_insert(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node,
                                            hash_string(ofproto->up.name, 0));
                                memset(&ofproto->stats, 0, sizeof ofproto->stats);

                                //创建 255 个 oftable
                                enum { N_TABLES = 255 };
                                ofproto_init_tables(ofproto_, N_TABLES);
                                    ofproto->n_tables = n_tables;
                                    ofproto->tables = xmalloc(n_tables * sizeof *ofproto->tables);
                                    OFPROTO_FOR_EACH_TABLE (table, ofproto) {
                                        oftable_init(table);
                                            table->max_flows = UINT_MAX;
                                            table->n_flows = 0;
                                    }

                                //添加默认流表
                                add_internal_flows(ofproto);
                                    add_internal_miss_flow(ofproto, id++, &ofpacts, &ofproto->miss_rule);
                                    add_internal_miss_flow(ofproto, id++, &ofpacts, &ofproto->no_packet_in_rule);
                                    add_internal_miss_flow(ofproto, id++, &ofpacts, &ofproto->drop_frags_rule);

                            ofproto->datapath_id = pick_datapath_id(ofproto);

                            //创建 ofport,添加到 ofproto
                            init_ports(ofproto);//init_ports(struct ofproto *p) {
                                OFPROTO_PORT_FOR_EACH (&ofproto_port, &dump, p) {
                                    const char *name = ofproto_port.name;
                                    if (shash_find(&p->port_by_name, name)) {
                                        VLOG_WARN_RL(&rl, "%s: ignoring duplicate device %s in datapath", p->name, name);
                                    } else {
                                        struct netdev *netdev;
                                        /* Check if an OpenFlow port number had been requested. */
                                        node = shash_find(&init_ofp_ports, name);
                                        if (node) {
                                            const struct iface_hint *iface_hint = node->data;
                                            simap_put(&p->ofp_requests, name, ofp_to_u16(iface_hint->ofp_port));
                                        }
                                        netdev = ofport_open(p, &ofproto_port, &pp);
                                            struct netdev *netdev;
                                            netdev_open(ofproto_port->name, ofproto_port->type, &netdev);
                                                struct netdev_registered_class *rc;
                                                rc = netdev_lookup_class(type && type[0] ? type : "system");
                                                if (rc && ovs_refcount_try_ref_rcu(&rc->refcnt)) {
                                                    netdev = rc->class->alloc();
                                                    if (netdev) {
                                                        memset(netdev, 0, sizeof *netdev);
                                                        netdev->netdev_class = rc->class;
                                                        netdev->auto_classified = type && type[0] ? false : true;
                                                        rc->class->construct(netdev);
                                                    }
                                                }
                                            return netdev;
                                        if (netdev) {
                                            ofport_install(p, netdev, &pp);
                                                struct ofport *ofport;
                                                ofport = p->ofproto_class->port_alloc();
                                                    struct ofport_dpif *port = xzalloc(sizeof *port);
                                                    return &port->up;
                                                ofport->ofproto = p;
                                                ofport->netdev = netdev;
                                                ofport->change_seq = netdev_get_change_seq(netdev);
                                                ofport->pp = *pp;
                                                ofport->ofp_port = pp->port_no;
                                                ofport->created = time_msec();
                                                /* Add port to 'p'. */
                                                hmap_insert(&p->ports, &ofport->hmap_node, hash_ofp_port(ofport->ofp_port));
                                                shash_add(&p->port_by_name, netdev_name, ofport);
                                                update_mtu(p, ofport);
                                                p->ofproto_class->port_construct(ofport);
                                                //发送添加端口消息
                                                connmgr_send_port_status(p->connmgr, NULL, pp, OFPPR_ADD);
                                        }
                                    }
                                } //OFPROTO_PORT_FOR_EACH
                            } //init_ports
                        } //ofproto_create
                    } //if (!br->ofproto)
                } //HMAP_FOR_EACH_SAFE

                2.5.6
                //将 wanted_ports 中的 port 添加到 bridge 的 ports,ifaces 和 iface_by_name 表中
                //遍历 wanted_ports 所有port的所有iface,将iface(patch类型的不用下发)下发到datapath,
                //同时每个iface还会生成ofport,插入 ofproto->ports 中
                HMAP_FOR_EACH (br, node, &all_bridges) {
                    bridge_add_ports(br, &br->wanted_ports);
                        /* First add interfaces that request a particular port number. */
                        bridge_add_ports__(br, wanted_ports, true);

                        /* Then add interfaces that want automatic port number assignment.
                         * We add these afterward to avoid accidentally taking a specifically
                         * requested port number. */
                        //bridge_add_ports__(struct bridge *br, const struct shash *wanted_ports, bool with_requested_port)
                        bridge_add_ports__(br, wanted_ports, false);
                            struct shash_node *port_node;
                            SHASH_FOR_EACH (port_node, wanted_ports) {
                                const struct ovsrec_port *port_cfg = port_node->data;
                                for (i = 0; i < port_cfg->n_interfaces; i++) {
                                    const struct ovsrec_interface *iface_cfg = port_cfg->interfaces[i];
                                    requested_ofp_port = iface_get_requested_ofp_port(iface_cfg);
                                        iface_validate_ofport__(cfg->n_ofport_request, cfg->ofport_request);
                                    if ((requested_ofp_port != OFPP_NONE) == with_requested_port) {
                                        //到 br->br->iface_by_name 查看是否已经存在
                                        struct iface *iface = iface_lookup(br, iface_cfg->name);
                                        //不存在则创建
                                        if (!iface) {
                                            iface_create(br, iface_cfg, port_cfg); {
                                                struct netdev *netdev;
                                                struct iface *iface;
                                                ofp_port_t ofp_port;
                                                struct port *port;
                                                char *errp = NULL;
                                                int error;

                                                iface_do_create(br, iface_cfg, &ofp_port, &netdev, &errp); {
                                                    type = ofproto_port_open_type(br->cfg->datapath_type, iface_get_type(iface_cfg, br->cfg));
                                                        const struct ofproto_class *class;
                                                        datapath_type = ofproto_normalize_type(datapath_type);
                                                        class = ofproto_class_find__(datapath_type);
                                                        //port_open_type
                                                        class->port_open_type(datapath_type, port_type)
                                                            dpif_port_open_type(datapath_type, port_type);
                                                                rc = shash_find_data(&dpif_classes, datapath_type);
                                                                //对于netdev dp来说,dpif_netdev_port_open_type
                                                                //对于system dp来说,此函数为空,所以返回原始的 port_type
                                                                port_type = rc->dpif_class->port_open_type(rc->dpif_class, port_type);
                                                                    //如果配置的类型为 tap或者system,则返回即可。
                                                                    //但是配置的类型为 internal 的话,会自动转换成 tap。
                                                                    return strcmp(type, "internal") ? type : dpif_netdev_class_is_dummy(class) ? "dummy-internal" : "tap";
                                                    netdev_open(iface_cfg->name, type, &netdev);
                                                        //注册所有的 netdev_class
                                                        netdev_initialize();
                                                            if (ovsthread_once_start(&once)) {
                                                                netdev_vport_patch_register();
                                                                netdev_vport_tunnel_register();
                                                                netdev_register_provider(&netdev_linux_class);
                                                                netdev_register_provider(&netdev_internal_class);
                                                                netdev_register_provider(&netdev_tap_class);
                                                                    new_class->init()
                                                                    struct netdev_registered_class *rc;
                                                                    rc = xmalloc(sizeof *rc);
                                                                    cmap_insert(&netdev_classes, &rc->cmap_node, hash_string(new_class->type, 0));
                                                                    rc->class = new_class;
                                                            }
                                                        //type 为 system,internal,tap等
                                                        struct netdev_registered_class *rc;
                                                        rc = netdev_lookup_class(type && type[0] ? type : "system");
                                                        //如果type为system,则 alloc 为 netdev_linux_alloc
                                                        netdev = rc->class->alloc();
                                                            struct netdev_linux *netdev = xzalloc(sizeof *netdev);
                                                            return &netdev->up;
                                                        memset(netdev, 0, sizeof *netdev);
                                                        netdev->netdev_class = rc->class;
                                                        netdev->auto_classified = type && type[0] ? false : true;
                                                        netdev->name = xstrdup(name);
                                                        //如果type为system,则 construct 为 netdev_linux_construct
                                                        rc->class->construct(netdev);

                                                    iface_set_netdev_config(iface_cfg, netdev, errp);           
                                                        netdev_set_config(netdev, &iface_cfg->options, errp);
                                                            netdev->netdev_class->set_config(netdev, args ? args : &no_args, &verbose_error);

                                                    iface_set_netdev_mtu(iface_cfg, netdev);
                                                        netdev_set_mtu(netdev, *iface_cfg->mtu_request);
                                                            class->set_mtu(netdev, mtu)

                                                    //获取 portid
                                                    *ofp_portp = iface_pick_ofport(iface_cfg);
                                                    ofproto_port_add(br->ofproto, netdev, ofp_portp); {
                                                        //调用 ofproto_dpif_class 中的 port_add
                                                        //port_add(struct ofproto *ofproto_, struct netdev *netdev)
                                                        ofproto->ofproto_class->port_add(ofproto, netdev); {
                                                            struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
                                                            //如果是 patch 类型的port直接返回
                                                            //patch 端口不需要加到 datapath
                                                            if (netdev_vport_is_patch(netdev)) {
                                                                sset_add(&ofproto->ghost_ports, netdev_get_name(netdev));
                                                                return 0;
                                                            }
                                                            //获取端口名字
                                                            dp_port_name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
                                                            //如果datapath还不存在此端口,则将端口加入到datapath,包括tunnle端口
                                                            if (!dpif_port_exists(ofproto->backer->dpif, dp_port_name)) {
                                                                //将端口加入 datapath
                                                                dpif_port_add(ofproto->backer->dpif, netdev, &port_no);
                                                                    //dpif_netlink_port_add 或者 dpif_netdev_port_add
                                                                    dpif->dpif_class->port_add(dpif, netdev, &port_no);
                                                                    //如果为 dpif_netlink_port_add
                                                                        struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
                                                                        int error = EOPNOTSUPP;
                                                                        if (!ovs_tunnels_out_of_tree) {
                                                                            //openvswitch.ko 为 kernel 源码提供的,则调用此函数
                                                                            error = dpif_netlink_rtnl_port_create_and_add(dpif, netdev, port_nop);
                                                                                //创建vport,比如vxlan,会使用到kernel提供的vxlan模块
                                                                                dpif_netlink_rtnl_port_create(netdev);
                                                                                    type = netdev_to_ovs_vport_type(netdev_get_type(netdev));
                                                                                    tnl_cfg = netdev_get_tunnel_config(netdev);
                                                                                    if (!tnl_cfg) {
                                                                                        return EOPNOTSUPP;
                                                                                    }

                                                                                    kind = vport_type_to_kind(type, tnl_cfg);
                                                                                    if (!kind) {
                                                                                        return EOPNOTSUPP;
                                                                                    }
                                                                                    name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
                                                                                    flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | NLM_F_EXCL;

                                                                                    err = dpif_netlink_rtnl_create(tnl_cfg, name, type, kind, flags);
                                                                                        /* tunnel unique info */
                                                                                        switch (type) {
                                                                                        case OVS_VPORT_TYPE_VXLAN:
                                                                                            nl_msg_put_u8(&request, IFLA_VXLAN_LEARNING, 0);
                                                                                            nl_msg_put_u8(&request, IFLA_VXLAN_COLLECT_METADATA, 1);
                                                                                            nl_msg_put_u8(&request, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, 1);
                                                                                            if (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GBP)) {
                                                                                                nl_msg_put_flag(&request, IFLA_VXLAN_GBP);
                                                                                            }
                                                                                            if (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GPE)) {
                                                                                                nl_msg_put_flag(&request, IFLA_VXLAN_GPE);
                                                                                            }
                                                                                            nl_msg_put_be16(&request, IFLA_VXLAN_PORT, tnl_cfg->dst_port);
                                                                                            break;
                                                                                        ...
                                                                                        }
                                                                                        nl_transact(NETLINK_ROUTE, &request, NULL);
                                                                                name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
                                                                                //因为上面已经创建vxlan端口,此处使用OVS_VPORT_TYPE_NETDEV即可,
                                                                                //表示不用openvswitch.ko创建vxlan端口
                                                                                dpif_netlink_port_add__(dpif, name, OVS_VPORT_TYPE_NETDEV, NULL, port_nop);
                                                                                    //和kernel module openvswitch.ko 交互,创建vport
                                                                                    dpif_netlink_port_add__(dpif, name, ovs_type, NULL, port_nop);
                                                                                        dpif_netlink_vport_init(&request);
                                                                                        request.cmd = OVS_VPORT_CMD_NEW;
                                                                                        request.dp_ifindex = dpif->dp_ifindex;
                                                                                        request.type = type;
                                                                                        request.name = name;
                                                                                        dpif_netlink_vport_transact(&request, &reply, &buf);
                                                                        } //ovs_tunnels_out_of_tree
                                                                        if (error) {
                                                                            //openvswitch.ko 为 ovs 源码提供的,则调用此函数
                                                                            //或者创建非tunnel端口,也调用此函数
                                                                            dpif_netlink_port_add_compat(dpif, netdev, port_nop);
                                                                                name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);

                                                                                ovs_type = netdev_to_ovs_vport_type(netdev_get_type(netdev));
                                                                                tnl_cfg = netdev_get_tunnel_config(netdev);
                                                                                if (tnl_cfg && (tnl_cfg->dst_port != 0 || tnl_cfg->exts)) {
                                                                                    if (tnl_cfg->dst_port) {
                                                                                        nl_msg_put_u16(&options, OVS_TUNNEL_ATTR_DST_PORT,
                                                                                                       ntohs(tnl_cfg->dst_port));
                                                                                    }
                                                                                    return dpif_netlink_port_add__(dpif, name, ovs_type, &options, port_nop);
                                                                                }else {
                                                                                    return dpif_netlink_port_add__(dpif, name, ovs_type, NULL, port_nop);
                                                                                }
                                                                        }
                                                            }
                                                            if (netdev_get_tunnel_config(netdev)) {
                                                                sset_add(&ofproto->ghost_ports, devname);
                                                            } else {
                                                                sset_add(&ofproto->ports, devname);
                                                            }
                                                        } //port_add

                                                        const char *netdev_name = netdev_get_name(netdev);
                                                        simap_put(&ofproto->ofp_requests, netdev_name, ofp_to_u16(ofp_port));
                                                        //创建/更新 ofport
                                                        update_port(ofproto, netdev_name);
                                                            struct ofport *port;
                                                            //先到 ofproto->ports 根据 ofp_port 查找是否已经存在
                                                            port = ofproto_get_port(ofproto, ofproto_port.ofp_port);
                                                            //如果已经存在,则更新配置
                                                            if (port && !strcmp(netdev_get_name(port->netdev), name)) {
                                                                update_mtu(ofproto, port);
                                                            }
                                                            else {
                                                                //如果不存在,则分配 ofport,并插入 ofproto->ports
                                                                ofport_install(ofproto, netdev, &pp);
                                                                    const char *netdev_name = netdev_get_name(netdev);
                                                                    struct ofport *ofport;
                                                                    /* Create ofport. */
                                                                    ofport = p->ofproto_class->port_alloc();
                                                                    ofport->ofproto = p;
                                                                    ofport->netdev = netdev;
                                                                    ofport->change_seq = netdev_get_change_seq(netdev);
                                                                    ofport->pp = *pp;
                                                                    ofport->ofp_port = pp->port_no;
                                                                    ofport->created = time_msec();

                                                                    /* Add port to 'p'. */
                                                                    hmap_insert(&p->ports, &ofport->hmap_node,
                                                                                hash_ofp_port(ofport->ofp_port));
                                                                    shash_add(&p->port_by_name, netdev_name, ofport);

                                                                    update_mtu(p, ofport);

                                                                    /* Let the ofproto_class initialize its private data. */
                                                                    p->ofproto_class->port_construct(ofport);
                                                            }
                                                    } //ofproto_port_add
                                                } //iface_do_create

                                                /* Get or create the port structure. */
                                                //到 br->ports 查找是否已经存在port
                                                struct port *port;
                                                port = port_lookup(br, port_cfg->name);
                                                if (!port) {
                                                    port = port_create(br, port_cfg);
                                                        struct port *port;
                                                        port = xzalloc(sizeof *port);
                                                        port->bridge = br;
                                                        port->name = xstrdup(cfg->name);
                                                        port->cfg = cfg;
                                                        ovs_list_init(&port->ifaces);
                                                        hmap_insert(&br->ports, &port->hmap_node, hash_string(port->name, 0));
                                                }

                                                /* Create the iface structure. */
                                                iface = xzalloc(sizeof *iface);
                                                //将 iface 插入port->ifaces链表
                                                ovs_list_push_back(&port->ifaces, &iface->port_elem);
                                                //将 iface 按名字hash后,插入 br->iface_by_name hashmap 表
                                                hmap_insert(&br->iface_by_name, &iface->name_node, hash_string(iface_cfg->name, 0));
                                                iface->port = port;
                                                iface->name = xstrdup(iface_cfg->name);
                                                iface->ofp_port = ofp_port;
                                                iface->netdev = netdev;
                                                iface->type = iface_get_type(iface_cfg, br->cfg);
                                                iface->netdev_type = ofproto_port_open_type(br->cfg->datapath_type, iface->type);
                                                iface->cfg = iface_cfg;
                                                //将 iface 按port号hash后,插入 br->ifaces 表
                                                hmap_insert(&br->ifaces, &iface->ofp_port_node, hash_ofp_port(ofp_port));
                                            } //iface_create
                                        } //if (!iface)
                                    } //if
                                } //for
                            }
                }
                2.5.7
                HMAP_FOR_EACH (br, node, &all_bridges) {
                    bridge_configure_datapath_id(br);
                    HMAP_FOR_EACH (port, hmap_node, &br->ports) {
                        port_configure(port);
                            const struct ovsrec_port *cfg = port->cfg;
                            struct ofproto_bundle_settings s;
                            s.name = port->name;
                            s.slaves = xmalloc(ovs_list_size(&port->ifaces) * sizeof *s.slaves);
                            LIST_FOR_EACH (iface, port_elem, &port->ifaces) {
                                s.slaves[s.n_slaves++] = iface->ofp_port;
                            }
                            /* Get VLAN tag. */
                            s.vlan = -1;
                            if (cfg->tag && *cfg->tag >= 0 && *cfg->tag <= 4095) {
                                s.vlan = *cfg->tag;
                            }

                            /* Get VLAN trunks. */
                            s.trunks = NULL;
                            if (cfg->n_trunks) {
                                s.trunks = vlan_bitmap_from_array(cfg->trunks, cfg->n_trunks);
                            }

                            s.cvlans = NULL;
                            if (cfg->n_cvlans) {
                                s.cvlans = vlan_bitmap_from_array(cfg->cvlans, cfg->n_cvlans);
                            }

                            /* Get VLAN mode. */
                            if (cfg->vlan_mode) {
                                if (!strcmp(cfg->vlan_mode, "access")) {
                                    s.vlan_mode = PORT_VLAN_ACCESS;
                                } else if (!strcmp(cfg->vlan_mode, "trunk")) {
                                    s.vlan_mode = PORT_VLAN_TRUNK;
                                } else if (!strcmp(cfg->vlan_mode, "native-tagged")) {
                                    s.vlan_mode = PORT_VLAN_NATIVE_TAGGED;
                                } else if (!strcmp(cfg->vlan_mode, "native-untagged")) {
                                    s.vlan_mode = PORT_VLAN_NATIVE_UNTAGGED;
                                } else if (!strcmp(cfg->vlan_mode, "dot1q-tunnel")) {
                                    s.vlan_mode = PORT_VLAN_DOT1Q_TUNNEL;
                                } else {
                                    /* This "can't happen" because ovsdb-server should prevent it. */
                                    VLOG_WARN("port %s: unknown VLAN mode %s, falling "
                                              "back to trunk mode", port->name, cfg->vlan_mode);
                                    s.vlan_mode = PORT_VLAN_TRUNK;
                                }
                            } else {
                                if (s.vlan >= 0) {
                                    s.vlan_mode = PORT_VLAN_ACCESS;
                                    if (cfg->n_trunks || cfg->n_cvlans) {
                                        VLOG_WARN("port %s: ignoring trunks in favor of implicit vlan",
                                                  port->name);
                                    }
                                } else {
                                    s.vlan_mode = PORT_VLAN_TRUNK;
                                }
                            }
                            s.lacp = port_configure_lacp(port, &lacp_settings);
                            port_configure_bond(port, &bond_settings);
                            ofproto_bundle_register(port->bridge->ofproto, port, &s);
                                //bundle_set
                                ofproto->ofproto_class->bundle_set(ofproto, aux, s)
                                    struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
                                    struct ofbundle *bundle;
                                    bundle = xmalloc(sizeof *bundle);
                                    bundle->ofproto = ofproto;
                                    //将bundle插入hash表 ofproto->bundles
                                    hmap_insert(&ofproto->bundles, &bundle->hmap_node, hash_pointer(aux, 0));
                                    bundle->aux = aux;
                                    bundle->name = NULL;

                                    ovs_list_init(&bundle->ports);
                                    bundle->vlan_mode = PORT_VLAN_TRUNK;
                                    bundle->qinq_ethtype = ETH_TYPE_VLAN_8021AD;
                                    bundle->vlan = -1;
                                    bundle->trunks = NULL;
                                    bundle->cvlans = NULL;
                                    bundle->use_priority_tags = s->use_priority_tags;
                                    bundle->lacp = NULL;
                                    bundle->bond = NULL;

                                    bundle->floodable = true;
                                    bundle->protected = false;

                        LIST_FOR_EACH (iface, port_elem, &port->ifaces) {
                            iface_set_ofport(iface->cfg, iface->ofp_port);
                            /* Clear eventual previous errors */
                            ovsrec_interface_set_error(iface->cfg, NULL);
                            iface_configure_cfm(iface);
                            iface_configure_qos(iface, port->cfg->qos);
                            iface_set_mac(br, port, iface);
                            ofproto_port_set_bfd(br->ofproto, iface->ofp_port, &iface->cfg->bfd);
                            ofproto_port_set_lldp(br->ofproto, iface->ofp_port, &iface->cfg->lldp);
                            ofproto_port_set_config(br->ofproto, iface->ofp_port, &iface->cfg->other_config);
                        }
                    }
                    bridge_configure_mirrors(br);
                    bridge_configure_forward_bpdu(br);
                    bridge_configure_mac_table(br);
                    bridge_configure_mcast_snooping(br);
                    bridge_configure_remotes(br, managers, n_managers);
                        struct ovsrec_controller **controllers;
                        size_t n_controllers;
                        struct ofproto_controller *ocs;
                        //获取配置的 controller
                        n_controllers = bridge_get_controllers(br, &controllers);
                            struct ovsrec_controller **controllers;
                            size_t n_controllers;
                            controllers = br->cfg->controller;
                            n_controllers = br->cfg->n_controller;
                        ocs = xmalloc((n_controllers + 1) * sizeof *ocs);
                        n_ocs = 0;
                        //默认为每个网桥添加controller "/usr/local/var/run/openvswitch/br1.mgmt"
                        bridge_ofproto_controller_for_mgmt(br, &ocs[n_ocs++]);
                            oc->target = xasprintf("punix:%s/%s.mgmt", ovs_rundir(), br->name);
                            oc->max_backoff = 0;
                            oc->probe_interval = 60;
                            oc->band = OFPROTO_OUT_OF_BAND;
                        for (i = 0; i < n_controllers; i++) {
                            struct ovsrec_controller *c = controllers[i];
                            bridge_ofproto_controller_from_ovsrec(c, &ocs[n_ocs]);
                                oc->target = c->target;
                                oc->max_backoff = c->max_backoff ? *c->max_backoff / 1000 : 8;
                                oc->probe_interval = c->inactivity_probe ? *c->inactivity_probe / 1000 : 5;
                                oc->band = (!c->connection_mode || !strcmp(c->connection_mode, "in-band")
                                            ? OFPROTO_IN_BAND : OFPROTO_OUT_OF_BAND);
                            n_ocs++;
                        }
                        ofproto_set_controllers(br->ofproto, ocs, n_ocs, bridge_get_allowed_versions(br));
                            connmgr_set_controllers(p->connmgr, controllers, n_controllers, allowed_versions);
                                shash_init(&new_controllers);
                                for (i = 0; i < n_controllers; i++) {
                                    const struct ofproto_controller *c = &controllers[i];
                                    //如果是 active 连接,即主动连接。则调用 add_controller
                                    if (!vconn_verify_name(c->target)) {
                                        add_controller(mgr, c->target, c->dscp, allowed_versions);
                                            char *name = ofconn_make_name(mgr, target);
                                            struct ofconn *ofconn;
                                            ofconn = ofconn_create(mgr, rconn_create(5, 8, dscp, allowed_versions), OFCONN_PRIMARY, true);
                                                struct ofconn *ofconn;
                                                ofconn = xzalloc(sizeof *ofconn);
                                                ofconn->connmgr = mgr;
                                                //将 ofconn 插入 mgr->all_conns 链表,在 connmgr_run 中统一处理 mgr->all_conns 链表上的连接
                                                ovs_list_push_back(&mgr->all_conns, &ofconn->node);
                                                ofconn->rconn = rconn;
                                                ofconn->type = type;
                                                ofconn->enable_async_msgs = enable_async_msgs;
                                                hmap_init(&ofconn->monitors);
                                                ovs_list_init(&ofconn->updates);
                                                hmap_init(&ofconn->bundles);
                                                ofconn->next_bundle_expiry_check = time_msec() + BUNDLE_EXPIRY_INTERVAL;
                                                ofconn_flush(ofconn);
                                            rconn_connect(ofconn->rconn, target, name);
                                            hmap_insert(&mgr->controllers, &ofconn->hmap_node, hash_string(target, 0));
                                    } else if (!pvconn_verify_name(c->target)) {
                                        //如果是被动连接,则调用 ofservice_create
                                        ofservice_create(mgr, c->target, allowed_versions, c->dscp);
                                            struct ofservice *ofservice;
                                            struct pvconn *pvconn;
                                            pvconn_open(target, allowed_versions, dscp, &pvconn);
                                            ofservice = xzalloc(sizeof *ofservice);
                                            hmap_insert(&mgr->services, &ofservice->node, hash_string(target, 0));
                                            ofservice->pvconn = pvconn;
                                            ofservice->allowed_versions = allowed_versions;
                                    }
                                    shash_add_once(&new_controllers, c->target, &controllers[i]);
                                }
                                /* Delete controllers that are no longer configured.
                                 * Update configuration of all now-existing controllers. */
                                HMAP_FOR_EACH_SAFE (ofconn, next_ofconn, hmap_node, &mgr->controllers) {
                                    ...
                                }

                                /* Delete services that are no longer configured.
                                 * Update configuration of all now-existing services. */
                                HMAP_FOR_EACH_SAFE (ofservice, next_ofservice, node, &mgr->services) {
                                    ...
                                }
                        /* Configure OpenFlow controller connection snooping. */
                        //默认为每个网桥添加 snoop "/usr/local/var/run/openvswitch/br1.snoop"
                        //用于将和controller通信的消息复制一份
                        if (!ofproto_has_snoops(br->ofproto)) {
                            struct sset snoops;

                            sset_init(&snoops);
                            sset_add_and_free(&snoops, xasprintf("punix:%s/%s.snoop", ovs_rundir(), br->name));
                            ofproto_set_snoops(br->ofproto, &snoops);
                            /* Sets the "snoops" for 'mgr' to the pvconn targets listed in 'snoops'.
                             *
                             * A "snoop" is a pvconn to which every OpenFlow message to or from the most
                             * important controller on 'mgr' is mirrored. */
                                connmgr_set_snoops(ofproto->connmgr, snoops);
                                    set_pvconns(&mgr->snoops, &mgr->n_snoops, snoops);
                                        pvconns = xmalloc(sset_count(sset) * sizeof *pvconns);
                                        n_pvconns = 0;
                                        SSET_FOR_EACH (name, sset) {
                                            struct pvconn *pvconn;
                                            pvconn_open(name, 0, 0, &pvconn);
                                            pvconns[n_pvconns++] = pvconn;
                                        }
                                        *pvconnsp = pvconns;
                                        *n_pvconnsp = n_pvconns;
                        }
                    bridge_configure_netflow(br);
                    bridge_configure_sflow(br, &sflow_bridge_number);
                    bridge_configure_ipfix(br);
                    bridge_configure_spanning_tree(br);
                    bridge_configure_tables(br);
                    bridge_configure_dp_desc(br);
                    bridge_configure_aa(br);
                }
                bridge_run__();
            } //bridge_reconfigure
            
            run_stats_update();
            run_status_update();
            run_system_stats();
        } //bridge_run

        3. netdev_run();
            struct netdev_registered_class *rc;
            CMAP_FOR_EACH (rc, cmap_node, &netdev_classes)
                //对于 system 类型,run 为 netdev_linux_run,dpdk类型的run函数为空
                //主要处理 link,mtu 改变等事件
                rc->class->run(rc->class);
                    struct nl_sock *sock;
                    /* Returns a NETLINK_ROUTE socket listening for RTNLGRP_LINK,
                     * RTNLGRP_IPV4_IFADDR and RTNLGRP_IPV6_IFADDR changes, or NULL
                     * if no such socket could be created. */
                    sock = netdev_linux_notify_sock();
                        static struct nl_sock *sock;
                        unsigned int mcgroups[] = {RTNLGRP_LINK, RTNLGRP_IPV4_IFADDR, RTNLGRP_IPV6_IFADDR, RTNLGRP_IPV6_IFINFO};
                        if (ovsthread_once_start(&once)) {
                            nl_sock_create(NETLINK_ROUTE, &sock);
                            for (i = 0; i < ARRAY_SIZE(mcgroups); i++) {
                                nl_sock_join_mcgroup(sock, mcgroups[i]);
                                    setsockopt(sock->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &multicast_group, sizeof multicast_group)
                            }
                            ovsthread_once_done(&once);
                        }
                    do {
                        error = nl_sock_recv(sock, &buf, false);
                        if (!error) {
                            struct rtnetlink_change change;
                            if (rtnetlink_parse(&buf, &change)) {
                                if (!change.ifname) {
                                    change.ifname = if_indextoname(change.if_index, dev_name);
                                }

                                if (change.ifname) {
                                    netdev_ = netdev_from_name(change.ifname);
                                }
                                //is_netdev_linux_class: return netdev_class->run == netdev_linux_run;
                                if (netdev_ && is_netdev_linux_class(netdev_->netdev_class)) {
                                    struct netdev_linux *netdev = netdev_linux_cast(netdev_);

                                    ovs_mutex_lock(&netdev->mutex);
                                    netdev_linux_update(netdev, &change);
                                    ovs_mutex_unlock(&netdev->mutex);
                                }
                                netdev_close(netdev_);
                            }
                        }
                    } while (!error)
    }

也可参考:ovs-vswitchd源码分析 - 简书 (jianshu.com) 

你可能感兴趣的:(OVS,ovs,ovs-vswitchd,源码分析)