【OVS2.5.0源码分析】ovsd进程运行机制分析(1)

ovsd作为ovs的用户态管理主进程,负责处理ovs-vsctl命令接口、ovs-ofctl命令接口、与controller交互等等。 本篇分析它的整个处理机制。

1、main函数

    bridge_init(remote);
    free(remote);

    exiting = false;
    while (!exiting) {     //while循环,直到退出
        memory_run();
        if (memory_should_report()) {
            struct simap usage;

            simap_init(&usage);
            bridge_get_memory_usage(&usage);
            memory_report(&usage);
            simap_destroy(&usage);
        }
        bridge_run();                    //处理controller交互,ovs-ofctl命令
        unixctl_server_run(unixctl);     //处理ovs-vsctl命令
        netdev_run();

        memory_wait();
        bridge_wait();
        unixctl_server_wait(unixctl);
        netdev_wait();
        if (exiting) {
            poll_immediate_wake();
        }
        poll_block();                     //阻塞,当没有请求处理的时候,阻塞在此处
        if (should_service_stop()) {
            exiting = true;
        }
    }
    bridge_exit();
    unixctl_server_destroy(unixctl);
    service_stop();
2、poll_block函数

void
poll_block(void)
{
    struct poll_loop *loop = poll_loop();     //ovsd进程携带的poll信息,其中最重要的就是fd信息
    struct poll_node *node;
    struct pollfd *pollfds;
    HANDLE *wevents = NULL;
    int elapsed;
    int retval;
    int i;

    /* Register fatal signal events before actually doing any real work for
     * poll_block. */
    fatal_signal_wait();

    if (loop->timeout_when == LLONG_MIN) {
        COVERAGE_INC(poll_zero_timeout);
    }

    timewarp_run();
    pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds);

#ifdef _WIN32
    wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents);
#endif

    /* Populate with all the fds and events. */
    i = 0;
    HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {
        pollfds[i] = node->pollfd;
#ifdef _WIN32
        wevents[i] = node->wevent;
        if (node->pollfd.fd && node->wevent) {
            short int wsa_events = 0;
            if (node->pollfd.events & POLLIN) {
                wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE;
            }
            if (node->pollfd.events & POLLOUT) {
                wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE;
            }
            WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events);
        }
#endif
        i++;
    }

    retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), wevents,<span style="white-space:pre">		</span>//time_poll会最终调用linux的poll函数
                       loop->timeout_when, &elapsed);
    if (retval < 0) {
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
        VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(-retval));
    } else if (!retval) {
        log_wakeup(loop->timeout_where, NULL, elapsed);
    } else if (get_cpu_usage() > 50 || VLOG_IS_DBG_ENABLED()) {
        i = 0;
        HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {
            if (pollfds[i].revents) {
                log_wakeup(node->where, &pollfds[i], 0);
            }
            i++;
        }
    }

    free_poll_nodes(loop);
    loop->timeout_when = LLONG_MAX;
    loop->timeout_where = NULL;
    free(pollfds);
    free(wevents);

    /* Handle any pending signals before doing anything else. */
    fatal_signal_run();

    seq_woke();
}
3、time_poll函数

int
time_poll(struct pollfd *pollfds, int n_pollfds, HANDLE *handles OVS_UNUSED,
          long long int timeout_when, int *elapsed)
{
    long long int *last_wakeup = last_wakeup_get();
    long long int start;
    bool quiescent;
    int retval = 0;

    time_init();
    coverage_clear();
    coverage_run();
    if (*last_wakeup && !thread_is_pmd()) {
        log_poll_interval(*last_wakeup);
    }
    start = time_msec();

    timeout_when = MIN(timeout_when, deadline);
    quiescent = ovsrcu_is_quiescent();

    for (;;) {
        long long int now = time_msec();
        int time_left;

        if (now >= timeout_when) {
            time_left = 0;
        } else if ((unsigned long long int) timeout_when - now > INT_MAX) {
            time_left = INT_MAX;
        } else {
            time_left = timeout_when - now;
        }

        if (!quiescent) {
            if (!time_left) {
                ovsrcu_quiesce();
            } else {
                ovsrcu_quiesce_start();
            }
        }

#ifndef _WIN32
        retval = poll(pollfds, n_pollfds, time_left);    //调用linux的poll函数
        if (retval < 0) {
            retval = -errno;
        }
#else
        if (n_pollfds > MAXIMUM_WAIT_OBJECTS) {
            VLOG_ERR("Cannot handle more than maximum wait objects\n");
        } else if (n_pollfds != 0) {
            retval = WaitForMultipleObjects(n_pollfds, handles, FALSE,
                                            time_left);
        }
        if (retval < 0) {
            /* XXX This will be replace by a win error to errno
               conversion function */
            retval = -WSAGetLastError();
            retval = -EINVAL;
        }
#endif

        if (!quiescent && time_left) {
            ovsrcu_quiesce_end();
        }

        if (deadline <= time_msec()) {
#ifndef _WIN32
            fatal_signal_handler(SIGALRM);
#else
            VLOG_ERR("wake up from WaitForMultipleObjects after deadline");
            fatal_signal_handler(SIGTERM);
#endif
            if (retval < 0) {
                retval = 0;
            }
            break;
        }

        if (retval != -EINTR) {
            break;
        }
    }
    *last_wakeup = time_msec();
    refresh_rusage();
    *elapsed = *last_wakeup - start;
    return retval;
}

由此可以看到,poll_block会最终调用linux poll函数,那么会监听哪些fd呢,这些fd是合适被放到进程信息中的呢,以下以ofservice为例作为说明,其他的句柄也是类似,只是添加的函数略有不同而已。

bridge_wait函数

void
bridge_wait(void)
{
    struct sset types;
    const char *type;

    ovsdb_idl_wait(idl);
    if (daemonize_txn) {
        ovsdb_idl_txn_wait(daemonize_txn);
    }

    if_notifier_wait();
    if (ifaces_changed) {
        poll_immediate_wake();
    }

    sset_init(&types);
    ofproto_enumerate_types(&types);
    SSET_FOR_EACH (type, &types) {
        ofproto_type_wait(type);
    }
    sset_destroy(&types);

    if (!hmap_is_empty(&all_bridges)) {
        struct bridge *br;

        HMAP_FOR_EACH (br, node, &all_bridges) {
            ofproto_wait(br->ofproto);                //调用入口
        }
        stats_update_wait();
        status_update_wait();
    }

    system_stats_wait();
}
ofproto_wait函数

void
ofproto_wait(struct ofproto *p)
{
    p->ofproto_class->wait(p);
    if (p->ofproto_class->port_poll_wait) {
        p->ofproto_class->port_poll_wait(p);
    }
    seq_wait(connectivity_seq_get(), p->change_seq);
    connmgr_wait(p->connmgr);    //调用入口
}
connmgr_wait函数

void
connmgr_wait(struct connmgr *mgr)
{
    struct ofservice *ofservice;
    struct ofconn *ofconn;
    size_t i;

    LIST_FOR_EACH (ofconn, node, &mgr->all_conns) {
        ofconn_wait(ofconn);
    }
    ofmonitor_wait(mgr);
    if (mgr->in_band) {
        in_band_wait(mgr->in_band);
    }
    if (mgr->fail_open) {
        fail_open_wait(mgr->fail_open);
    }
    HMAP_FOR_EACH (ofservice, node, &mgr->services) {
        pvconn_wait(ofservice->pvconn);     //监听ofservice句柄
    }
    for (i = 0; i < mgr->n_snoops; i++) {
        pvconn_wait(mgr->snoops[i]);
    }
}
pvconn_wait函数

void
pvconn_wait(struct pvconn *pvconn)
{
    (pvconn->pvclass->wait)(pvconn);		//实际调用pvconn_pstream_wait函数
}
pvconn_pstream_wait函数

static void
pvconn_pstream_wait(struct pvconn *pvconn)
{
    struct pvconn_pstream *ps = pvconn_pstream_cast(pvconn);
    pstream_wait(ps->pstream);
}
pstream_wait函数

void
pstream_wait(struct pstream *pstream)
{
    (pstream->class->wait)(pstream);	//实际调用fd_pstream_class的pfd_wait函数
}
pfd_wait函数

static void
pfd_wait(struct pstream *pstream)
{
    struct fd_pstream *ps = fd_pstream_cast(pstream);
    poll_fd_wait(ps->fd, POLLIN);    //该fd就是介绍ofservice服务建立时打开的句柄
}
poll_fd_wait函数

#define poll_fd_wait(fd, events) poll_fd_wait_at(fd, events, OVS_SOURCE_LOCATOR)
poll_fd_wait_at函数

/* Registers 'fd' as waiting for the specified 'events' (which should be POLLIN
 * or POLLOUT or POLLIN | POLLOUT).  The following call to poll_block() will
 * wake up when 'fd' becomes ready for one or more of the requested events.
 *
 * On Windows, 'fd' must be a socket.
 *
 * The event registration is one-shot: only the following call to poll_block()
 * is affected.  The event will need to be re-registered after poll_block() is
 * called if it is to persist.
 *
 * ('where' is used in debug logging.  Commonly one would use poll_fd_wait() to
 * automatically provide the caller's source file and line number for
 * 'where'.) */
void
poll_fd_wait_at(int fd, short int events, const char *where)
{
    poll_create_node(fd, 0, events, where);
}
poll_create_node函数

static void
poll_create_node(int fd, HANDLE wevent, short int events, const char *where)
{
    struct poll_loop *loop = poll_loop();	//进程poll_loop信息
    struct poll_node *node;

    COVERAGE_INC(poll_create_node);

    /* Both 'fd' and 'wevent' cannot be set. */
    ovs_assert(!fd != !wevent);

    /* Check for duplicate.  If found, "or" the events. */
    node = find_poll_node(loop, fd, wevent);
    if (node) {
        node->pollfd.events |= events;		//如果已监听该fd,则增加监听的event
    } else {
        node = xzalloc(sizeof *node);		//新建poll_node,并插入到poll_loop中
        hmap_insert(&loop->poll_nodes, &node->hmap_node,
                    hash_2words(fd, (uint32_t)wevent));
        node->pollfd.fd = fd;
        node->pollfd.events = events;
#ifdef _WIN32
        if (!wevent) {
            wevent = CreateEvent(NULL, FALSE, FALSE, NULL);
        }
#endif
        node->wevent = wevent;
        node->where = where;
    }
}
至此,了解了句柄是如何添加到poll_loop中,并通过poll_block进行监听,其他句柄的监听也是大同小异,不再赘述。









你可能感兴趣的:(网络,openvswitch,ovs)