【OVS2.5.0源码分析】normal action精确流表生成和刷新过程分析

一、首包精确流表生成

首包到达OVS交换机时,尚未建立基于目的mac的流表规则,需要upcall到用户态进行学习,此时生成的规则是把报文flood到其他端口。

1、upcall_xlate函数

    upcall->dump_seq = seq_read(udpif->dump_seq);
    upcall->reval_seq = seq_read(udpif->reval_seq);
    xlate_actions(&xin, &upcall->xout); 
    upcall->xout_initialized = true;

2、xlate_actions函数

<pre name="code" class="cpp">   /* The bridge is now known so obtain its table version. */
    ctx.tables_version = ofproto_dpif_get_tables_version(ctx.xbridge->ofproto);

    if (!xin->ofpacts && !ctx.rule) {
        ctx.rule = rule_dpif_lookup_from_table(       //检索流表
            ctx.xbridge->ofproto, ctx.tables_version, flow, xin->wc,
            ctx.xin->resubmit_stats, &ctx.table_id,
            flow->in_port.ofp_port, true, true);
        if (ctx.xin->resubmit_stats) {
            rule_dpif_credit_stats(ctx.rule, ctx.xin->resubmit_stats);
        }
        if (ctx.xin->xcache) {
            struct xc_entry *entry;

            entry = xlate_cache_add_entry(ctx.xin->xcache, XC_RULE);
            entry->u.rule = ctx.rule;
            rule_dpif_ref(ctx.rule);
        }

        if (OVS_UNLIKELY(ctx.xin->resubmit_hook)) {
            ctx.xin->resubmit_hook(ctx.xin, ctx.rule, 0);
        }
    }
    xout->fail_open = ctx.rule && rule_dpif_is_fail_open(ctx.rule);

    /* Get the proximate input port of the packet.  (If xin->recirc,
     * flow->in_port is the ultimate input port of the packet.) */
    struct xport *in_port = get_ofp_port(xbridge,
                                         ctx.base_flow.in_port.ofp_port);

    /* Tunnel stats only for non-recirculated packets. */
    if (!xin->recirc && in_port && in_port->is_tunnel) {
        if (ctx.xin->resubmit_stats) {
            netdev_vport_inc_rx(in_port->netdev, ctx.xin->resubmit_stats);
            if (in_port->bfd) {
                bfd_account_rx(in_port->bfd, ctx.xin->resubmit_stats);
            }
        }
        if (ctx.xin->xcache) {
            struct xc_entry *entry;

            entry = xlate_cache_add_entry(ctx.xin->xcache, XC_NETDEV);
            entry->u.dev.rx = netdev_ref(in_port->netdev);
            entry->u.dev.bfd = bfd_ref(in_port->bfd);
        }
    }

    if (!xin->recirc && process_special(&ctx, in_port)) {
        /* process_special() did all the processing for this packet.
         *
         * We do not perform special processing on recirculated packets, as
         * recirculated packets are not really received by the bridge.*/
    } else if (in_port && in_port->xbundle
               && xbundle_mirror_out(xbridge, in_port->xbundle)) {
        if (ctx.xin->packet != NULL) {
            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
            VLOG_WARN_RL(&rl, "bridge %s: dropping packet received on port "
                         "%s, which is reserved exclusively for mirroring",
                         ctx.xbridge->name, in_port->xbundle->name);
        }
    } else {
        /* Sampling is done only for packets really received by the bridge. */
        unsigned int user_cookie_offset = 0;
        if (!xin->recirc) {
            user_cookie_offset = compose_sflow_action(&ctx);
            compose_ipfix_action(&ctx, ODPP_NONE);
        }
        size_t sample_actions_len = ctx.odp_actions->size;

        if (tnl_process_ecn(flow)
            && (!in_port || may_receive(in_port, &ctx))) {
            const struct ofpact *ofpacts;
            size_t ofpacts_len;

            if (xin->ofpacts) {
                ofpacts = xin->ofpacts;
                ofpacts_len = xin->ofpacts_len;
            } else if (ctx.rule) {
                const struct rule_actions *actions
                    = rule_dpif_get_actions(ctx.rule);
                ofpacts = actions->ofpacts;
                ofpacts_len = actions->ofpacts_len;
                ctx.rule_cookie = rule_dpif_get_flow_cookie(ctx.rule);
            } else {
                OVS_NOT_REACHED();
            }

            mirror_ingress_packet(&ctx);
            do_xlate_actions(ofpacts, ofpacts_len, &ctx);      //把openflow流表转化为精确流表
            if (ctx.error) {
                goto exit;
            }

            /* We've let OFPP_NORMAL and the learning action look at the
             * packet, so drop it now if forwarding is disabled. */
            if (in_port && (!xport_stp_forward_state(in_port) ||
                            !xport_rstp_forward_state(in_port))) {
                /* Drop all actions added by do_xlate_actions() above. */
                ctx.odp_actions->size = sample_actions_len;

                /* Undo changes that may have been done for recirculation. */
                if (exit_recirculates(&ctx)) {
                    ctx.action_set.size = ctx.recirc_action_offset;
                    ctx.recirc_action_offset = -1;
                    ctx.last_unroll_offset = -1;
                }
            } else if (ctx.action_set.size) {
                /* Translate action set only if not dropping the packet and
                 * not recirculating. */
                if (!exit_recirculates(&ctx)) {
                    xlate_action_set(&ctx);
                }
            }
            /* Check if need to recirculate. */
            if (exit_recirculates(&ctx)) {
                compose_recirculate_action(&ctx);
            }
        }

 3、do_xlate_actions函数 
 

        switch (a->type) {
        case OFPACT_OUTPUT:
            xlate_output_action(ctx, ofpact_get_OUTPUT(a)->port,
                                ofpact_get_OUTPUT(a)->max_len, true);
            break;

4、xlate_output_action函数

    switch (port) {
    case OFPP_IN_PORT:
        compose_output_action(ctx, ctx->xin->flow.in_port.ofp_port, NULL);
        break;
    case OFPP_TABLE:
        xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port,
                           0, may_packet_in, true);
        break;
    case OFPP_NORMAL:
        xlate_normal(ctx);      //normal规则
        break;
    case OFPP_FLOOD:
        flood_packets(ctx,  false);
        break;

5、xlate_normal函数

        ovs_rwlock_unlock(&ms->rwlock);
    } else {
        ovs_rwlock_rdlock(&ctx->xbridge->ml->rwlock);
        mac = mac_learning_lookup(ctx->xbridge->ml, flow->dl_dst, vlan);
        mac_port = mac ? mac_entry_get_port(ctx->xbridge->ml, mac) : NULL;
        ovs_rwlock_unlock(&ctx->xbridge->ml->rwlock);

        if (mac_port) {
            struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
            struct xbundle *mac_xbundle = xbundle_lookup(xcfg, mac_port);
            if (mac_xbundle && mac_xbundle != in_xbundle) {
                xlate_report(ctx, "forwarding to learned port");
                output_normal(ctx, mac_xbundle, vlan);                 //能够匹配到mac,确定出端口
            } else if (!mac_xbundle) {
                xlate_report(ctx, "learned port is unknown, dropping");
            } else {
                xlate_report(ctx, "learned port is input port, dropping");
            }
        } else {
            xlate_report(ctx, "no learned MAC for destination, flooding");
            xlate_normal_flood(ctx, in_xbundle, vlan);      //初始,会flood到其他端口;
        }

二、后续报文规则刷新

当首包的响应报文到达交换机后,那么源报文的流表可以刷新了(因为知道目的mac在哪个端口),OVS是通过revalidate线程来刷新流表的。

1、revalidate函数

static void
revalidate(struct revalidator *revalidator)
{
    uint64_t odp_actions_stub[1024 / 8];
    struct ofpbuf odp_actions = OFPBUF_STUB_INITIALIZER(odp_actions_stub);

    struct udpif *udpif = revalidator->udpif;
    struct dpif_flow_dump_thread *dump_thread;
    uint64_t dump_seq, reval_seq;
    unsigned int flow_limit;

    dump_seq = seq_read(udpif->dump_seq);
    reval_seq = seq_read(udpif->reval_seq);
    atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
    dump_thread = dpif_flow_dump_thread_create(udpif->dump);
    for (;;) {
        struct ukey_op ops[REVALIDATE_MAX_BATCH];
        int n_ops = 0;

        struct dpif_flow flows[REVALIDATE_MAX_BATCH];
        const struct dpif_flow *f;
        int n_dumped;

        long long int max_idle;
        long long int now;
        size_t n_dp_flows;
        bool kill_them_all;

        n_dumped = dpif_flow_dump_next(dump_thread, flows, ARRAY_SIZE(flows));    //从datapath中获取流表
        if (!n_dumped) {
            break;
        }

        now = time_msec();

        /* In normal operation we want to keep flows around until they have
         * been idle for 'ofproto_max_idle' milliseconds.  However:
         *
         *     - If the number of datapath flows climbs above 'flow_limit',
         *       drop that down to 100 ms to try to bring the flows down to
         *       the limit.
         *
         *     - If the number of datapath flows climbs above twice
         *       'flow_limit', delete all the datapath flows as an emergency
         *       measure.  (We reassess this condition for the next batch of
         *       datapath flows, so we will recover before all the flows are
         *       gone.) */
        n_dp_flows = udpif_get_n_flows(udpif);
        kill_them_all = n_dp_flows > flow_limit * 2;
        max_idle = n_dp_flows > flow_limit ? 100 : ofproto_max_idle;

        for (f = flows; f < &flows[n_dumped]; f++) {
            long long int used = f->stats.used;
            struct recirc_refs recircs = RECIRC_REFS_EMPTY_INITIALIZER;
            enum reval_result result;
            struct udpif_key *ukey;
            bool already_dumped;
            int error;

            if (ukey_acquire(udpif, f, &ukey, &error)) {
                if (error == EBUSY) {
                    /* Another thread is processing this flow, so don't bother
                     * processing it.*/
                    COVERAGE_INC(upcall_ukey_contention);
                } else {
                    log_unexpected_flow(f, error);
                    if (error != ENOENT) {
                        delete_op_init__(udpif, &ops[n_ops++], f);
                    }
                }
                continue;
            }

            already_dumped = ukey->dump_seq == dump_seq;
            if (already_dumped) {
                /* The flow has already been handled during this flow dump
                 * operation. Skip it. */
                if (ukey->xcache) {
                    COVERAGE_INC(dumped_duplicate_flow);
                } else {
                    COVERAGE_INC(dumped_new_flow);
                }
                ovs_mutex_unlock(&ukey->mutex);
                continue;
            }

            if (!used) {
                used = ukey->created;
            }
            if (kill_them_all || (used && used < now - max_idle)) {
                result = UKEY_DELETE;
            } else {
                result = revalidate_ukey(udpif, ukey, &f->stats, &odp_actions,     //比较流表是否有变化
                                         reval_seq, &recircs);
            }
            ukey->dump_seq = dump_seq;
            ukey->flow_exists = result != UKEY_DELETE;

            if (result != UKEY_KEEP) {
                /* Takes ownership of 'recircs'. */
                reval_op_init(&ops[n_ops++], result, udpif, ukey, &recircs,
                              &odp_actions);
            }
            ovs_mutex_unlock(&ukey->mutex);
        }

        if (n_ops) {
            push_ukey_ops__(udpif, ops, n_ops);      //是否有刷新则刷新流表
        }
        ovsrcu_quiesce();
    }
    dpif_flow_dump_thread_destroy(dump_thread);
    ofpbuf_uninit(&odp_actions);
}

2、revalidate_ukey函数

static enum reval_result
revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey,
                const struct dpif_flow_stats *stats,
                struct ofpbuf *odp_actions, uint64_t reval_seq,
                struct recirc_refs *recircs)
    OVS_REQUIRES(ukey->mutex)
{
    struct xlate_out xout, *xoutp;
    struct netflow *netflow;
    struct ofproto_dpif *ofproto;
    struct dpif_flow_stats push;
    struct flow flow;
    struct flow_wildcards dp_mask, wc;
    enum reval_result result;
    ofp_port_t ofp_in_port;
    struct xlate_in xin;
    long long int last_used;
    int error;
    bool need_revalidate;

    result = UKEY_DELETE;
    xoutp = NULL;
    netflow = NULL;

    ofpbuf_clear(odp_actions);
    need_revalidate = (ukey->reval_seq != reval_seq);
    last_used = ukey->stats.used;
    push.used = stats->used;
    push.tcp_flags = stats->tcp_flags;
    push.n_packets = (stats->n_packets > ukey->stats.n_packets
                      ? stats->n_packets - ukey->stats.n_packets
                      : 0);
    push.n_bytes = (stats->n_bytes > ukey->stats.n_bytes
                    ? stats->n_bytes - ukey->stats.n_bytes
                    : 0);

    if (need_revalidate && last_used
        && !should_revalidate(udpif, push.n_packets, last_used)) {
        goto exit;
    }

    /* We will push the stats, so update the ukey stats cache. */
    ukey->stats = *stats;
    if (!push.n_packets && !need_revalidate) {
        result = UKEY_KEEP;
        goto exit;
    }

    if (ukey->xcache && !need_revalidate) {
        xlate_push_stats(ukey->xcache, &push);
        result = UKEY_KEEP;
        goto exit;
    }

    if (odp_flow_key_to_flow(ukey->key, ukey->key_len, &flow)
        == ODP_FIT_ERROR) {
        goto exit;
    }

    error = xlate_lookup(udpif->backer, &flow, &ofproto, NULL, NULL, &netflow,
                         &ofp_in_port);
    if (error) {
        goto exit;
    }

    if (need_revalidate) {
        xlate_cache_clear(ukey->xcache);
    }
    if (!ukey->xcache) {
        ukey->xcache = xlate_cache_new();
    }

    xlate_in_init(&xin, ofproto, &flow, ofp_in_port, NULL, push.tcp_flags,
                  NULL, need_revalidate ? &wc : NULL, odp_actions);
    if (push.n_packets) {
        xin.resubmit_stats = &push;
        xin.may_learn = true;
    }
    xin.xcache = ukey->xcache;
    xlate_actions(&xin, &xout);      //action学习,由于此时可以确认目的mac的端口,所以生成的action会不同
    xoutp = &xout;

    if (!need_revalidate) {
        result = UKEY_KEEP;
        goto exit;
    }

    if (xout.slow) {
        ofpbuf_clear(odp_actions);
        compose_slow_path(udpif, &xout, &flow, flow.in_port.odp_port,
                          odp_actions);
    }

    if (odp_flow_key_to_mask(ukey->mask, ukey->mask_len, ukey->key,
                             ukey->key_len, &dp_mask, &flow)
        == ODP_FIT_ERROR) {
        goto exit;
    }

    /* Do not modify if any bit is wildcarded by the installed datapath flow,
     * but not the newly revalidated wildcard mask (wc), i.e., if revalidation
     * tells that the datapath flow is now too generic and must be narrowed
     * down.  Note that we do not know if the datapath has ignored any of the
     * wildcarded bits, so we may be overtly conservative here. */
    if (flow_wildcards_has_extra(&dp_mask, &wc)) {
        goto exit;
    }

    if (!ofpbuf_equal(odp_actions,
                      ovsrcu_get(struct ofpbuf *, &ukey->actions))) {     //判断action是否有差异,有则需要刷新
        /* The datapath mask was OK, but the actions seem to have changed.
         * Let's modify it in place. */
        result = UKEY_MODIFY;
        /* Transfer recirc action ID references to the caller. */
        recirc_refs_swap(recircs, &xoutp->recircs);
        goto exit;
    }

    result = UKEY_KEEP;

exit:
    if (result != UKEY_DELETE) {
        ukey->reval_seq = reval_seq;
    }
    if (netflow && result == UKEY_DELETE) {
        netflow_flow_clear(netflow, &flow);
    }
    xlate_out_uninit(xoutp);
    return result;
}

你可能感兴趣的:(网络,openvswitch,ovs)