一、首包精确流表生成
首包到达OVS交换机时,尚未建立基于目的mac的流表规则,需要upcall到用户态进行学习,此时生成的规则是把报文flood到其他端口。
1、upcall_xlate函数
upcall->dump_seq = seq_read(udpif->dump_seq); upcall->reval_seq = seq_read(udpif->reval_seq); xlate_actions(&xin, &upcall->xout); upcall->xout_initialized = true;
<pre name="code" class="cpp"> /* The bridge is now known so obtain its table version. */ ctx.tables_version = ofproto_dpif_get_tables_version(ctx.xbridge->ofproto); if (!xin->ofpacts && !ctx.rule) { ctx.rule = rule_dpif_lookup_from_table( //检索流表 ctx.xbridge->ofproto, ctx.tables_version, flow, xin->wc, ctx.xin->resubmit_stats, &ctx.table_id, flow->in_port.ofp_port, true, true); if (ctx.xin->resubmit_stats) { rule_dpif_credit_stats(ctx.rule, ctx.xin->resubmit_stats); } if (ctx.xin->xcache) { struct xc_entry *entry; entry = xlate_cache_add_entry(ctx.xin->xcache, XC_RULE); entry->u.rule = ctx.rule; rule_dpif_ref(ctx.rule); } if (OVS_UNLIKELY(ctx.xin->resubmit_hook)) { ctx.xin->resubmit_hook(ctx.xin, ctx.rule, 0); } } xout->fail_open = ctx.rule && rule_dpif_is_fail_open(ctx.rule); /* Get the proximate input port of the packet. (If xin->recirc, * flow->in_port is the ultimate input port of the packet.) */ struct xport *in_port = get_ofp_port(xbridge, ctx.base_flow.in_port.ofp_port); /* Tunnel stats only for non-recirculated packets. */ if (!xin->recirc && in_port && in_port->is_tunnel) { if (ctx.xin->resubmit_stats) { netdev_vport_inc_rx(in_port->netdev, ctx.xin->resubmit_stats); if (in_port->bfd) { bfd_account_rx(in_port->bfd, ctx.xin->resubmit_stats); } } if (ctx.xin->xcache) { struct xc_entry *entry; entry = xlate_cache_add_entry(ctx.xin->xcache, XC_NETDEV); entry->u.dev.rx = netdev_ref(in_port->netdev); entry->u.dev.bfd = bfd_ref(in_port->bfd); } } if (!xin->recirc && process_special(&ctx, in_port)) { /* process_special() did all the processing for this packet. * * We do not perform special processing on recirculated packets, as * recirculated packets are not really received by the bridge.*/ } else if (in_port && in_port->xbundle && xbundle_mirror_out(xbridge, in_port->xbundle)) { if (ctx.xin->packet != NULL) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_WARN_RL(&rl, "bridge %s: dropping packet received on port " "%s, which is reserved exclusively for mirroring", ctx.xbridge->name, in_port->xbundle->name); } } else { /* Sampling is done only for packets really received by the bridge. */ unsigned int user_cookie_offset = 0; if (!xin->recirc) { user_cookie_offset = compose_sflow_action(&ctx); compose_ipfix_action(&ctx, ODPP_NONE); } size_t sample_actions_len = ctx.odp_actions->size; if (tnl_process_ecn(flow) && (!in_port || may_receive(in_port, &ctx))) { const struct ofpact *ofpacts; size_t ofpacts_len; if (xin->ofpacts) { ofpacts = xin->ofpacts; ofpacts_len = xin->ofpacts_len; } else if (ctx.rule) { const struct rule_actions *actions = rule_dpif_get_actions(ctx.rule); ofpacts = actions->ofpacts; ofpacts_len = actions->ofpacts_len; ctx.rule_cookie = rule_dpif_get_flow_cookie(ctx.rule); } else { OVS_NOT_REACHED(); } mirror_ingress_packet(&ctx); do_xlate_actions(ofpacts, ofpacts_len, &ctx); //把openflow流表转化为精确流表 if (ctx.error) { goto exit; } /* We've let OFPP_NORMAL and the learning action look at the * packet, so drop it now if forwarding is disabled. */ if (in_port && (!xport_stp_forward_state(in_port) || !xport_rstp_forward_state(in_port))) { /* Drop all actions added by do_xlate_actions() above. */ ctx.odp_actions->size = sample_actions_len; /* Undo changes that may have been done for recirculation. */ if (exit_recirculates(&ctx)) { ctx.action_set.size = ctx.recirc_action_offset; ctx.recirc_action_offset = -1; ctx.last_unroll_offset = -1; } } else if (ctx.action_set.size) { /* Translate action set only if not dropping the packet and * not recirculating. */ if (!exit_recirculates(&ctx)) { xlate_action_set(&ctx); } } /* Check if need to recirculate. */ if (exit_recirculates(&ctx)) { compose_recirculate_action(&ctx); } }3、do_xlate_actions函数
switch (a->type) { case OFPACT_OUTPUT: xlate_output_action(ctx, ofpact_get_OUTPUT(a)->port, ofpact_get_OUTPUT(a)->max_len, true); break;
switch (port) { case OFPP_IN_PORT: compose_output_action(ctx, ctx->xin->flow.in_port.ofp_port, NULL); break; case OFPP_TABLE: xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port, 0, may_packet_in, true); break; case OFPP_NORMAL: xlate_normal(ctx); //normal规则 break; case OFPP_FLOOD: flood_packets(ctx, false); break;
ovs_rwlock_unlock(&ms->rwlock); } else { ovs_rwlock_rdlock(&ctx->xbridge->ml->rwlock); mac = mac_learning_lookup(ctx->xbridge->ml, flow->dl_dst, vlan); mac_port = mac ? mac_entry_get_port(ctx->xbridge->ml, mac) : NULL; ovs_rwlock_unlock(&ctx->xbridge->ml->rwlock); if (mac_port) { struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp); struct xbundle *mac_xbundle = xbundle_lookup(xcfg, mac_port); if (mac_xbundle && mac_xbundle != in_xbundle) { xlate_report(ctx, "forwarding to learned port"); output_normal(ctx, mac_xbundle, vlan); //能够匹配到mac,确定出端口 } else if (!mac_xbundle) { xlate_report(ctx, "learned port is unknown, dropping"); } else { xlate_report(ctx, "learned port is input port, dropping"); } } else { xlate_report(ctx, "no learned MAC for destination, flooding"); xlate_normal_flood(ctx, in_xbundle, vlan); //初始,会flood到其他端口; }
当首包的响应报文到达交换机后,那么源报文的流表可以刷新了(因为知道目的mac在哪个端口),OVS是通过revalidate线程来刷新流表的。
1、revalidate函数
static void revalidate(struct revalidator *revalidator) { uint64_t odp_actions_stub[1024 / 8]; struct ofpbuf odp_actions = OFPBUF_STUB_INITIALIZER(odp_actions_stub); struct udpif *udpif = revalidator->udpif; struct dpif_flow_dump_thread *dump_thread; uint64_t dump_seq, reval_seq; unsigned int flow_limit; dump_seq = seq_read(udpif->dump_seq); reval_seq = seq_read(udpif->reval_seq); atomic_read_relaxed(&udpif->flow_limit, &flow_limit); dump_thread = dpif_flow_dump_thread_create(udpif->dump); for (;;) { struct ukey_op ops[REVALIDATE_MAX_BATCH]; int n_ops = 0; struct dpif_flow flows[REVALIDATE_MAX_BATCH]; const struct dpif_flow *f; int n_dumped; long long int max_idle; long long int now; size_t n_dp_flows; bool kill_them_all; n_dumped = dpif_flow_dump_next(dump_thread, flows, ARRAY_SIZE(flows)); //从datapath中获取流表 if (!n_dumped) { break; } now = time_msec(); /* In normal operation we want to keep flows around until they have * been idle for 'ofproto_max_idle' milliseconds. However: * * - If the number of datapath flows climbs above 'flow_limit', * drop that down to 100 ms to try to bring the flows down to * the limit. * * - If the number of datapath flows climbs above twice * 'flow_limit', delete all the datapath flows as an emergency * measure. (We reassess this condition for the next batch of * datapath flows, so we will recover before all the flows are * gone.) */ n_dp_flows = udpif_get_n_flows(udpif); kill_them_all = n_dp_flows > flow_limit * 2; max_idle = n_dp_flows > flow_limit ? 100 : ofproto_max_idle; for (f = flows; f < &flows[n_dumped]; f++) { long long int used = f->stats.used; struct recirc_refs recircs = RECIRC_REFS_EMPTY_INITIALIZER; enum reval_result result; struct udpif_key *ukey; bool already_dumped; int error; if (ukey_acquire(udpif, f, &ukey, &error)) { if (error == EBUSY) { /* Another thread is processing this flow, so don't bother * processing it.*/ COVERAGE_INC(upcall_ukey_contention); } else { log_unexpected_flow(f, error); if (error != ENOENT) { delete_op_init__(udpif, &ops[n_ops++], f); } } continue; } already_dumped = ukey->dump_seq == dump_seq; if (already_dumped) { /* The flow has already been handled during this flow dump * operation. Skip it. */ if (ukey->xcache) { COVERAGE_INC(dumped_duplicate_flow); } else { COVERAGE_INC(dumped_new_flow); } ovs_mutex_unlock(&ukey->mutex); continue; } if (!used) { used = ukey->created; } if (kill_them_all || (used && used < now - max_idle)) { result = UKEY_DELETE; } else { result = revalidate_ukey(udpif, ukey, &f->stats, &odp_actions, //比较流表是否有变化 reval_seq, &recircs); } ukey->dump_seq = dump_seq; ukey->flow_exists = result != UKEY_DELETE; if (result != UKEY_KEEP) { /* Takes ownership of 'recircs'. */ reval_op_init(&ops[n_ops++], result, udpif, ukey, &recircs, &odp_actions); } ovs_mutex_unlock(&ukey->mutex); } if (n_ops) { push_ukey_ops__(udpif, ops, n_ops); //是否有刷新则刷新流表 } ovsrcu_quiesce(); } dpif_flow_dump_thread_destroy(dump_thread); ofpbuf_uninit(&odp_actions); }
static enum reval_result revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey, const struct dpif_flow_stats *stats, struct ofpbuf *odp_actions, uint64_t reval_seq, struct recirc_refs *recircs) OVS_REQUIRES(ukey->mutex) { struct xlate_out xout, *xoutp; struct netflow *netflow; struct ofproto_dpif *ofproto; struct dpif_flow_stats push; struct flow flow; struct flow_wildcards dp_mask, wc; enum reval_result result; ofp_port_t ofp_in_port; struct xlate_in xin; long long int last_used; int error; bool need_revalidate; result = UKEY_DELETE; xoutp = NULL; netflow = NULL; ofpbuf_clear(odp_actions); need_revalidate = (ukey->reval_seq != reval_seq); last_used = ukey->stats.used; push.used = stats->used; push.tcp_flags = stats->tcp_flags; push.n_packets = (stats->n_packets > ukey->stats.n_packets ? stats->n_packets - ukey->stats.n_packets : 0); push.n_bytes = (stats->n_bytes > ukey->stats.n_bytes ? stats->n_bytes - ukey->stats.n_bytes : 0); if (need_revalidate && last_used && !should_revalidate(udpif, push.n_packets, last_used)) { goto exit; } /* We will push the stats, so update the ukey stats cache. */ ukey->stats = *stats; if (!push.n_packets && !need_revalidate) { result = UKEY_KEEP; goto exit; } if (ukey->xcache && !need_revalidate) { xlate_push_stats(ukey->xcache, &push); result = UKEY_KEEP; goto exit; } if (odp_flow_key_to_flow(ukey->key, ukey->key_len, &flow) == ODP_FIT_ERROR) { goto exit; } error = xlate_lookup(udpif->backer, &flow, &ofproto, NULL, NULL, &netflow, &ofp_in_port); if (error) { goto exit; } if (need_revalidate) { xlate_cache_clear(ukey->xcache); } if (!ukey->xcache) { ukey->xcache = xlate_cache_new(); } xlate_in_init(&xin, ofproto, &flow, ofp_in_port, NULL, push.tcp_flags, NULL, need_revalidate ? &wc : NULL, odp_actions); if (push.n_packets) { xin.resubmit_stats = &push; xin.may_learn = true; } xin.xcache = ukey->xcache; xlate_actions(&xin, &xout); //action学习,由于此时可以确认目的mac的端口,所以生成的action会不同 xoutp = &xout; if (!need_revalidate) { result = UKEY_KEEP; goto exit; } if (xout.slow) { ofpbuf_clear(odp_actions); compose_slow_path(udpif, &xout, &flow, flow.in_port.odp_port, odp_actions); } if (odp_flow_key_to_mask(ukey->mask, ukey->mask_len, ukey->key, ukey->key_len, &dp_mask, &flow) == ODP_FIT_ERROR) { goto exit; } /* Do not modify if any bit is wildcarded by the installed datapath flow, * but not the newly revalidated wildcard mask (wc), i.e., if revalidation * tells that the datapath flow is now too generic and must be narrowed * down. Note that we do not know if the datapath has ignored any of the * wildcarded bits, so we may be overtly conservative here. */ if (flow_wildcards_has_extra(&dp_mask, &wc)) { goto exit; } if (!ofpbuf_equal(odp_actions, ovsrcu_get(struct ofpbuf *, &ukey->actions))) { //判断action是否有差异,有则需要刷新 /* The datapath mask was OK, but the actions seem to have changed. * Let's modify it in place. */ result = UKEY_MODIFY; /* Transfer recirc action ID references to the caller. */ recirc_refs_swap(recircs, &xoutp->recircs); goto exit; } result = UKEY_KEEP; exit: if (result != UKEY_DELETE) { ukey->reval_seq = reval_seq; } if (netflow && result == UKEY_DELETE) { netflow_flow_clear(netflow, &flow); } xlate_out_uninit(xoutp); return result; }