分类器用途
classifier在ovs中非常重要,用于流分类,执行相应的动作。属于match+action中的match。
在ovs中有三个地方会用到分类器:
- miss报文处理(用户态慢路径)
/* The returned rule (if any) is valid at least until the next RCU quiescent
* period. If the rule needs to stay around longer, the caller should take
* a reference.
*
* 'flow' is non-const to allow for temporary modifications during the lookup.
* Any changes are restored before returning.
* 允许用户临时改变flow中的内容
*/
static struct rule_dpif *
rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, ovs_version_t version,
uint8_t table_id, struct flow *flow,
struct flow_wildcards *wc)
{
struct classifier *cls = &ofproto->up.tables[table_id].cls;
return rule_dpif_cast(rule_from_cls_rule(classifier_lookup(cls, version,
flow, wc)));
}
- underlay路由表查找
/* ovs路由查找,得到网关的IP地址和出接口的源IP地址 */
bool
ovs_router_lookup(const struct in6_addr *ip6_dst, char output_bridge[],
struct in6_addr *src, struct in6_addr *gw)
{
const struct cls_rule *cr;
struct flow flow = {.ipv6_dst = *ip6_dst};
cr = classifier_lookup(&cls, OVS_VERSION_MAX, &flow, NULL);
if (cr) {
struct ovs_router_entry *p = ovs_router_entry_cast(cr);
ovs_strlcpy(output_bridge, p->output_bridge, IFNAMSIZ);
*gw = p->gw;
if (src) {
*src = p->src_addr;
}
return true;
}
return ovs_router_lookup_fallback(ip6_dst, output_bridge, src, gw);
}
- tunnel terminate表查找
/* 'flow' is non-const to allow for temporary modifications during the lookup.
* Any changes are restored before returning.
* flow参数允许临时修改一些值,但是在返回之前需要被恢复原来的值。
*/
odp_port_t
tnl_port_map_lookup(struct flow *flow, struct flow_wildcards *wc)
{
//进行分类器规则查找
const struct cls_rule *cr = classifier_lookup(&cls, OVS_VERSION_MAX, flow,
wc);
return (cr) ? tnl_port_cast(cr)->portno : ODPP_NONE;
}
分类器的核心函数为classifier_lookup函数,它有四个参数:
- cls: table的分类器
- version:当前符合要求的规则版本,即规则的添加版本需要小于该version,并且其删除版本要大于该version,即该规则在该版本是可见的。
- flow:需要匹配的报文的提取值,中途可以邻居修改,但是最终是保持报文原样的,将来用来作为megaflow的value。
- wc:通配符,报文起始查找时是通配所有的,在整个报文匹配中使用同一个wc,命中一个规则就将该规则对应的掩码unwildcard,表示报文需要匹配这些域,当报文通过goto和resubmit动作转移到另外一个table时,依然会使用原来的上个table处理后的wc继续处理,多个规则之间的mask采用或的关系处理wc。wc与flow共同构成快转表中规则的value与mask。wc非常重要,它最大限度的用来减少误匹配。对于那些不用生成megaflow的使用场景,该值可以为NULL,所以ovs_router_lookup中该值为NULL。
数据结构关系图
数据结构
/* A flow classifier. */
/* flow分类器 */
struct classifier {
int n_rules; /* Total number of rules. 总共的规则数目 */
uint8_t n_flow_segments; /* 段匹配器的段的个数, 一般是四个段: metadata,l2,l3,l4,但是这个值只能是3,最后一个段单独处理 */
//存储每个段的起始地址的64字节偏移
uint8_t flow_segments[CLS_MAX_INDICES]; /* Flow segment boundaries to use
* for staged lookup. 段匹配器的结束边界,总共有四个段,三个边界
* |---段1----|-----段2----|----段三----|----段4-----|
* 边界1 边界2 边界3
*/
struct cmap subtables_map; /* Contains "struct cls_subtable"s. 子表hash表,分类器下,按照flow的掩码不同
* 划分成不同的子表,每个子表的规则的掩码是一样的,一个流表有多个子表
*/
struct pvector subtables; /* 子表数组,安装表格的优先级进行排列,这样的话,可以跳过低优先级的表格 */
struct cmap partitions; /* Contains "struct cls_partition"s. */
struct cls_trie tries[CLS_MAX_TRIES]; /* Prefix tries. 前缀树,用于过滤一些规则 */
unsigned int n_tries; /* 前缀树的个数 */
bool publish; /* Make changes visible to lookups? 表示是否可以查询了 */
};
段边界
/* U64 indices for segmented flow classification. */
/* 64bit段flow分类器 */
const uint8_t flow_segment_u64s[4] = {
FLOW_SEGMENT_1_ENDS_AT / sizeof(uint64_t),/* 元数据匹配结束地址 */
FLOW_SEGMENT_2_ENDS_AT / sizeof(uint64_t),/* 链路层匹配结束 地址 */
FLOW_SEGMENT_3_ENDS_AT / sizeof(uint64_t),/* 网络层匹配结束地址 */
FLOW_U64S /* 传输层匹配结束地址 */
};
classifier流程
- 从cls->subtables第一个子表开始循环遍历每一个符合要求的子表,对子表调用函数find_match_wc进行处理。
函数find_match_wc进行如下处理:
对flow在子表的前三个段中进行trie过滤,不符合命中条件的话,开始下一个子表。
计算flow在子表在前三个段中的hash值,在subtable->indices[i]中查找是否有对应的hash值,前三个段就是通过hash值进行
过滤。
前面两条都满足要求后,对最后一个段进行trie过滤,不符合命中条件的话,开始下一个子表。
对最后一个段进行hash,使用hash值在子表中调用函数find_match进行精确匹配。
- 所有子表遍历完毕后,开始对匹配中命中的规则进行关联匹配处理,找出最高优先级的规则。
相关函数
classifier_init
初始化一个分类器。
/* Initializes 'cls' as a classifier that initially contains no classification
* rules. 初始化一个分类器
* 参数flow_segments为段边界数组,即flow_segment_u64s */
void
classifier_init(struct classifier *cls, const uint8_t *flow_segments)
{
cls->n_rules = 0;/* 规则个数为0 */
cmap_init(&cls->subtables_map);/* 初始化子表map */
pvector_init(&cls->subtables);//初始化子表数组
cls->n_flow_segments = 0;/* 初始化段为0 */
if (flow_segments) {/* 如果存在段匹配器,那么初始化段匹配器的结束地址,这里只初始化了三个段 */
while (cls->n_flow_segments < CLS_MAX_INDICES
&& *flow_segments < FLOW_U64S) {
cls->flow_segments[cls->n_flow_segments++] = *flow_segments++;
}
}
cls->n_tries = 0;/* 默认前缀树匹配器的个数为0 */
for (int i = 0; i < CLS_MAX_TRIES; i++) {/* 初始化前缀树 */
trie_init(cls, i, NULL);
}
cls->publish = true;
}
classifier_insert
插入一个规则到分类器。
/* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller
* must not modify or free it.
*
* 'cls' must not contain an identical rule (including wildcards, values of
* fixed fields, and priority). Use classifier_find_rule_exactly() to find
* such a rule.
* 插入规则到分类器,直到规则从分类器中删除,调用者不能修改和释放该规则*/
void
classifier_insert(struct classifier *cls,/* 分类器 */
const struct cls_rule *rule,/* 规则 */
ovs_version_t version, /* 规则版本 */
const struct cls_conjunction conj[],
size_t n_conj)
{
const struct cls_rule *displaced_rule
= classifier_replace(cls, rule, version, conj, n_conj);
ovs_assert(!displaced_rule);
}
/* Inserts 'rule' into 'cls' in 'version'. Until 'rule' is removed from 'cls',
* the caller must not modify or free it.
*
* If 'cls' already contains an identical rule (including wildcards, values of
* fixed fields, and priority) that is visible in 'version', replaces the old
* rule by 'rule' and returns the rule that was replaced. The caller takes
* ownership of the returned rule and is thus responsible for destroying it
* with cls_rule_destroy(), after RCU grace period has passed (see
* ovsrcu_postpone()).
*
* Returns NULL if 'cls' does not contain a rule with an identical key, after
* inserting the new rule. In this case, no rules are displaced by the new
* rule, even rules that cannot have any effect because the new rule matches a
* superset of their flows and has higher priority.
*
* 如果不包含一个一样的规则,则返回空。
*/
const struct cls_rule *
classifier_replace(struct classifier *cls, const struct cls_rule *rule,
ovs_version_t version,
const struct cls_conjunction *conjs, size_t n_conjs)
{
struct cls_match *new;
struct cls_subtable *subtable;
uint32_t ihash[CLS_MAX_INDICES];
struct cls_match *head;
unsigned int mask_offset;
size_t n_rules = 0;
uint32_t basis;
uint32_t hash;
unsigned int i;
/* 'new' is initially invisible to lookups. */
new = cls_match_alloc(rule, version, conjs, n_conjs);/* 分配一个新的匹配器 */
ovsrcu_set(&CONST_CAST(struct cls_rule *, rule)->cls_match, new);/* 设置规则的匹配器 */
/* 根据规则的掩码进行子表的查找 */
subtable = find_subtable(cls, rule->match.mask);/* 根据掩码查找子表 */
if (!subtable) {/* 没有找到子表,新建一个子表,将规则插入新的子表 */
subtable = insert_subtable(cls, rule->match.mask);
}
/* Compute hashes in segments. */
/* 计算在段里面的hash值 */
basis = 0;
mask_offset = 0;
for (i = 0; i < subtable->n_indices; i++) {/*遍历每一个段,计算每一个段的hash值,这些hash值可以用来过滤 */
/* 对规则的每一个段进行hash */
ihash[i] = minimatch_hash_range(&rule->match, subtable->index_maps[i],
&mask_offset, &basis);
}
/* 对最后一个段进行hash */
hash = minimatch_hash_range(&rule->match, subtable->index_maps[i],
&mask_offset, &basis);
/* 根据hash值对子表中的规则进行查找,最后一个段的hash值作为hash桶的key */
head = find_equal(subtable, rule->match.flow, hash);
if (!head) {/* 没有找到,添加规则 */
/* Add rule to tries.
*
* Concurrent readers might miss seeing the rule until this update,
* which might require being fixed up by revalidation later. */
for (i = 0; i < cls->n_tries; i++) {/* 遍历分类器的每一个前缀树 */
if (subtable->trie_plen[i]) {/* 如果该子表的对应的掩码长度存在,插入该前缀树 */
trie_insert(&cls->tries[i], rule, subtable->trie_plen[i]);/* 这个是前缀长度 */
}
}
/* Add rule to ports trie. */
/* 添加规则到端口树中,只对传输层源端口进行树插入 */
if (subtable->ports_mask_len) {
/* We mask the value to be inserted to always have the wildcarded
* bits in known (zero) state, so we can include them in comparison
* and they will always match (== their original value does not
* matter). */
ovs_be32 masked_ports = minimatch_get_ports(&rule->match);
trie_insert_prefix(&subtable->ports_trie, &masked_ports,
subtable->ports_mask_len);
}
/* Add new node to segment indices. 将hash值插入子表段hash过滤表 */
for (i = 0; i < subtable->n_indices; i++) {
ccmap_inc(&subtable->indices[i], ihash[i]);
}
n_rules = cmap_insert(&subtable->rules, &new->cmap_node, hash);
} else { /* Equal rules exist in the classifier already. */
struct cls_match *prev, *iter;
/* Scan the list for the insertion point that will keep the list in
* order of decreasing priority. Insert after rules marked invisible
* in any version of the same priority. */
FOR_EACH_RULE_IN_LIST_PROTECTED (iter, prev, head) {
if (rule->priority > iter->priority
|| (rule->priority == iter->priority
&& !cls_match_is_eventually_invisible(iter))) {
break;
}
}
/* Replace 'iter' with 'new' or insert 'new' between 'prev' and
* 'iter'. */
if (iter) {
struct cls_rule *old;
if (rule->priority == iter->priority) {
cls_match_replace(prev, iter, new);
old = CONST_CAST(struct cls_rule *, iter->cls_rule);
} else {
cls_match_insert(prev, iter, new);
old = NULL;
}
/* Replace the existing head in data structures, if rule is the new
* head. */
if (iter == head) {
cmap_replace(&subtable->rules, &head->cmap_node,
&new->cmap_node, hash);
}
if (old) {
struct cls_conjunction_set *conj_set;
conj_set = ovsrcu_get_protected(struct cls_conjunction_set *,
&iter->conj_set);
if (conj_set) {
ovsrcu_postpone(free, conj_set);
}
ovsrcu_set(&old->cls_match, NULL); /* Marks old rule as removed
* from the classifier. */
ovsrcu_postpone(cls_match_free_cb, iter);
/* No change in subtable's max priority or max count. */
/* Make 'new' visible to lookups in the appropriate version. */
cls_match_set_remove_version(new, OVS_VERSION_NOT_REMOVED);
/* Make rule visible to iterators (immediately). */
rculist_replace(CONST_CAST(struct rculist *, &rule->node),
&old->node);
/* Return displaced rule. Caller is responsible for keeping it
* around until all threads quiesce. */
return old;
}
} else {
/* 'new' is new node after 'prev' */
cls_match_insert(prev, iter, new);
}
}
/* Make 'new' visible to lookups in the appropriate version. */
cls_match_set_remove_version(new, OVS_VERSION_NOT_REMOVED);
/* Make rule visible to iterators (immediately). */
rculist_push_back(&subtable->rules_list,
CONST_CAST(struct rculist *, &rule->node));
/* Rule was added, not replaced. Update 'subtable's 'max_priority' and
* 'max_count', if necessary.
*
* The rule was already inserted, but concurrent readers may not see the
* rule yet as the subtables vector is not updated yet. This will have to
* be fixed by revalidation later. */
if (n_rules == 1) {
subtable->max_priority = rule->priority;
subtable->max_count = 1;
pvector_insert(&cls->subtables, subtable, rule->priority);
} else if (rule->priority == subtable->max_priority) {
++subtable->max_count;
} else if (rule->priority > subtable->max_priority) {
subtable->max_priority = rule->priority;
subtable->max_count = 1;
pvector_change_priority(&cls->subtables, subtable, rule->priority);
}
/* Nothing was replaced. */
cls->n_rules++;
if (cls->publish) {/* 更新子表 */
pvector_publish(&cls->subtables);
}
return NULL;
}
classifier_lookup
/* Finds and returns the highest-priority rule in 'cls' that matches 'flow' and
* that is visible in 'version'. Returns a null pointer if no rules in 'cls'
* match 'flow'. If multiple rules of equal priority match 'flow', returns one
* arbitrarily.
*
* If a rule is found and 'wc' is non-null, bitwise-OR's 'wc' with the
* set of bits that were significant in the lookup. At some point
* earlier, 'wc' should have been initialized (e.g., by
* flow_wildcards_init_catchall()).
*
* 'flow' is non-const to allow for temporary modifications during the lookup.
* Any changes are restored before returning.
* 从cls分类器中找到一个能够匹配flow的最高优先级的规则。
* 如果在分类器cls没有规则匹配这个flow,则返回空,如果多个规则匹配该流,则返回一个最高
* 优先级的。
* 可以临时修改flow
*/
const struct cls_rule *
classifier_lookup(const struct classifier *cls, /* 分类器 */ovs_version_t version,/* 版本 */
struct flow *flow,/* 需要查找的flow */ struct flow_wildcards *wc/* 通配符 */)
{
return classifier_lookup__(cls, version, flow, wc, true);
}
/* Like classifier_lookup(), except that support for conjunctive matches can be
* configured with 'allow_conjunctive_matches'. That feature is not exposed
* externally because turning off conjunctive matches is only useful to avoid
* recursion within this function itself.
*
* 'flow' is non-const to allow for temporary modifications during the lookup.
* Any changes are restored before returning. */
static const struct cls_rule *
classifier_lookup__(const struct classifier *cls, ovs_version_t version,/* 分类器及其版本号 */
struct flow *flow, struct flow_wildcards *wc,/* 匹配流和通配符 */
bool allow_conjunctive_matches)/* 是否允许conjunctive匹配 */
{
struct trie_ctx trie_ctx[CLS_MAX_TRIES];
const struct cls_match *match;
/* Highest-priority flow in 'cls' that certainly matches 'flow'. */
/* 指向最高优先级的flow在cls中 */
const struct cls_match *hard = NULL;
int hard_pri = INT_MIN; /* hard ? hard->priority : INT_MIN. */
/* Highest-priority conjunctive flows in 'cls' matching 'flow'. Since
* these are (components of) conjunctive flows, we can only know whether
* the full conjunctive flow matches after seeing multiple of them. Thus,
* we refer to these as "soft matches". 我们只有在查看了所有的关联匹配后
* 我们才能知道是全匹配了,因此我们将其称之为软匹配
* 最高优先级的关联匹配,在分类器中,使用最高优先级的关联flows匹配flow
* */
struct cls_conjunction_set *soft_stub[64];/* 关联匹配集合,我们默认64个集合 */
struct cls_conjunction_set **soft = soft_stub;/* 软匹配集合 */
size_t n_soft = 0, allocated_soft = ARRAY_SIZE(soft_stub);/* 64个元素 */
int soft_pri = INT_MIN; /* n_soft ? MAX(soft[*]->priority) : INT_MIN. */
/* Synchronize for cls->n_tries and subtable->trie_plen. They can change
* when table configuration changes, which happens typically only on
* startup. */
atomic_thread_fence(memory_order_acquire);
/* Initialize trie contexts for find_match_wc(). */
/* 初始化trie上下文用于查找wc */
for (int i = 0; i < cls->n_tries; i++) {
trie_ctx_init(&trie_ctx[i], &cls->tries[i]);
}
/* Main loop. */
/* 主循环,遍历所有的子表,从这里的循环可以看出,每一次循环处理一个子表,由于子表掩码相同,所以一次返回
** 与掩码区域相同的多个规则的公用match,该match是一个单链表,按照优先级高低进行排列。
*/
struct cls_subtable *subtable;
PVECTOR_FOR_EACH_PRIORITY (subtable, hard_pri + 1, 2, sizeof *subtable,
&cls->subtables) {
struct cls_conjunction_set *conj_set;/* 关联器集合 */
/* Skip subtables with no match, or where the match is lower-priority
* than some certain match we've already found.
* 跳过没有相同的match的子表,或者匹配的优先级比先前匹配的较低
*/
match = find_match_wc(subtable, version, flow, trie_ctx, cls->n_tries,
wc);
if (!match || match->priority <= hard_pri) {/* 过滤掉有先级比已经匹配的规则低或者相等的 */
continue;
}
/* 获取规则的关联匹配 */
conj_set = ovsrcu_get(struct cls_conjunction_set *, &match->conj_set);
if (!conj_set) {/* 该匹配项没有关联匹配项,直接作为硬匹配 */
/* 'match' isn't part of a conjunctive match. It's the best
* certain match we've got so far, since we know that it's
* higher-priority than hard_pri.
*
* (There might be a higher-priority conjunctive match. We can't
* tell yet.) */
hard = match;
hard_pri = hard->priority;/* 当前命中的最高优先级的硬匹配 */
} else if (allow_conjunctive_matches) {/* 存在关联匹配,并且允许关联匹配 */
/* 'match' is part of a conjunctive match. Add it to the list. */
/* 该match是关联匹配的一部分,将其添加到链表中 */
if (OVS_UNLIKELY(n_soft >= allocated_soft)) {/* 超过了我们能够容纳的关联个数,需要进行重新分配 */
struct cls_conjunction_set **old_soft = soft;/* 先保存老的匹配 */
allocated_soft *= 2;/* 进行指数扩展 */
soft = xmalloc(allocated_soft * sizeof *soft);/* 分配软匹配数组 */
memcpy(soft, old_soft, n_soft * sizeof *soft);/* 将原来的内容拷贝进去 */
if (old_soft != soft_stub) {/* 如果更换了软匹配,释放以前的内容 */
free(old_soft);
}
}
/* 将新增加的软匹配加入 */
soft[n_soft++] = conj_set;
/* Keep track of the highest-priority soft match. */
/* 如果原来的有先级小于现在的这个规则,更新优先级 */
if (soft_pri < match->priority) {
soft_pri = match->priority;
}
}
}
/* In the common case, at this point we have no soft matches and we can
* return immediately. (We do the same thing if we have potential soft
* matches but none of them are higher-priority than our hard match.)
* 在通常情况下:到这里的时候,我们没有软匹配,我们将会立即返回。
* 如果我们有潜在的软匹配,我们会做同样的事情*/
if (hard_pri >= soft_pri) {/* 如果硬匹配的优先级大于等于软匹配,说明我们将会选择硬匹配规则,软匹配如果分配了内存则需要释放 */
if (soft != soft_stub) {
free(soft);/* 释放我们分配的内存 */
}
return hard ? hard->cls_rule : NULL;/* 返回命中的规则 */
}
/* At this point, we have some soft matches. We might also have a hard
* match; if so, its priority is lower than the highest-priority soft
* match.
* 能够走到这里,说明我们有多个软匹配,我们需要逐个将软匹配的关联匹配变成新的
* 全关联匹配 */
/* Soft match loop.
* 外层循环,遍历每一个优先级,通过第一个for语句选出本次循环的最高优先级。
* Check whether soft matches are real matches. 检查软匹配是否是一个真实的全匹配,即关联规则的每一个维度都命中了,则软匹配升级为硬匹配 */
for (;;) {
/* Delete soft matches that are null. This only happens in second and
* subsequent iterations of the soft match loop, when we drop back from
* a high-priority soft match to a lower-priority one.
*
* Also, delete soft matches whose priority is less than or equal to
* the hard match's priority. In the first iteration of the soft
* match, these can be in 'soft' because the earlier main loop found
* the soft match before the hard match. In second and later iteration
* of the soft match loop, these can be in 'soft' because we dropped
* back from a high-priority soft match to a lower-priority soft match.
*
* It is tempting to delete soft matches that cannot be satisfied
* because there are fewer soft matches than required to satisfy any of
* their conjunctions, but we cannot do that because there might be
* lower priority soft or hard matches with otherwise identical
* matches. (We could special case those here, but there's no
* need--we'll do so at the bottom of the soft match loop anyway and
* this duplicates less code.)
*
* It's also tempting to break out of the soft match loop if 'n_soft ==
* 1' but that would also miss lower-priority hard matches. We could
* special case that also but again there's no need. */
/* 过滤掉所有的优先级比硬匹配低的,空的软匹配。 */
for (int i = 0; i < n_soft; ) {
if (!soft[i] || soft[i]->priority <= hard_pri) {
soft[i] = soft[--n_soft];
} else {
i++;
}
}
/* 如果所有的软匹配都不合格的话,直接跳出 */
if (!n_soft) {
break;
}
/* Find the highest priority among the soft matches. (We know this
* must be higher than the hard match's priority; otherwise we would
* have deleted all of the soft matches in the previous loop.) Count
* the number of soft matches that have that priority.
* 从所有的软匹配中找到最高优先级的软匹配,这个软匹配必须高于硬匹配。同时计算出这个优先级的软匹配的个数
*/
soft_pri = INT_MIN;
int n_soft_pri = 0;/* 获取一个最高的软匹配优先级及其个数 */
for (int i = 0; i < n_soft; i++) {/* 遍历剩下的合规的软匹配 */
if (soft[i]->priority > soft_pri) {
soft_pri = soft[i]->priority;/* 更新软匹配的有先级 */
n_soft_pri = 1;
} else if (soft[i]->priority == soft_pri) {
n_soft_pri++;/* 计算同一最高优先级的软匹配的个数 */
}
}
ovs_assert(soft_pri > hard_pri);
/* Look for a real match among the highest-priority soft matches.
*
* It's unusual to have many conjunctive matches, so we use stubs to
* avoid calling malloc() in the common case. An hmap has a built-in
* stub for up to 2 hmap_nodes; possibly, we would benefit a variant
* with a bigger stub. */
struct conjunctive_match cm_stubs[16];
struct hmap matches;
hmap_init(&matches);/* 初始暂存hash,同一个优先级的所有软匹配都使用该hash表,每次循环清零。这样可以组合
* 多个子表中的软匹配形成硬匹配。
*/
for (int i = 0; i < n_soft; i++) {/* 遍历每一个软匹配,只处理我们需要的优先级,即本次选出的最高优先级
* 一次循环处理一个该优先级的软匹配。设置一个维度bit。
*/
uint32_t id;
if (soft[i]->priority == soft_pri/* 如果是我们需要的优先级 */
&& find_conjunctive_match(soft[i], n_soft_pri, &matches,/* 根据关联id,查找关联匹配 */
cm_stubs, ARRAY_SIZE(cm_stubs),
&id)) {/* 将该软匹配的每一个关联匹配进行过滤,放到matches杂凑表中 */
uint32_t saved_conj_id = flow->conj_id;/* 记录流的关联动作id */
const struct cls_rule *rule;
flow->conj_id = id; /* 最终的关联id,根据关联id进行第二步查找 */
rule = classifier_lookup__(cls, version, flow, wc, false);/* 进行流表查找,不允许处理关联动作,因为前面已经处理过了 */
flow->conj_id = saved_conj_id;
if (rule) {/* 找到规则 */
free_conjunctive_matches(&matches,
cm_stubs, ARRAY_SIZE(cm_stubs));
if (soft != soft_stub) {
free(soft);
}
return rule;
}
}
}
free_conjunctive_matches(&matches, cm_stubs, ARRAY_SIZE(cm_stubs));
/* There's no real match among the highest-priority soft matches.
* However, if any of those soft matches has a lower-priority but
* otherwise identical flow match, then we need to consider those for
* soft or hard matches.
*
* The next iteration of the soft match loop will delete any null
* pointers we put into 'soft' (and some others too).
* 走到这里说明,本优先级没有形成硬匹配
*/
for (int i = 0; i < n_soft; i++) {
if (soft[i]->priority != soft_pri) {
continue;
}
/* Find next-lower-priority flow with identical flow match. */
/* 从同样的匹配条件中找到下一个优先级的匹配,如果该匹配是关联匹配,则
** 准备下一轮。否则作为硬匹配。
*/
match = next_visible_rule_in_list(soft[i]->match, version);
if (match) {
soft[i] = ovsrcu_get(struct cls_conjunction_set *,
&match->conj_set);
if (!soft[i]) {//该规则不是关联匹配,作为硬匹配与原来的硬匹配比较优先级。
/* The flow is a hard match; don't treat as a soft
* match. */
if (match->priority > hard_pri) {
hard = match;
hard_pri = hard->priority;
}
}
} else {
/* No such lower-priority flow (probably the common case). */
/* 为空,需要清除,在下一次循环中会进行计数修改 */
soft[i] = NULL;
}
}
}
/* 如果是新分配的软匹配保存控制块,则释放 */
if (soft != soft_stub) {
free(soft);
}
return hard ? hard->cls_rule : NULL;
}
总结
经过长时间的分析,大概理清了ovs的分类器的实现机制,还有一些比较重要的细节没有想明白:
- 一般来说缓存都是精确匹配的,掩码匹配很容易导致误匹配。ovs选择megaflow作为掩码匹配器,是否会存在误匹配的可能了?
- classifier_lookup函数的wc参数保存了所有命中规则的掩码或的结果。不仅包含了每一个table中最终的最高优先级的规则的掩码,也包括了每一个table中各个子表中命中的其它优先级的掩码。也就是说,wc不仅仅是最终执行了的动作的规则的掩码的或,还包含了一些命中了但是低优先级不回被执行动作的规则的掩码。这样做的意义是啥?
- 函数check_tries中如下代码片段的作用是啥?
/* Possible to skip the rest of the subtable if subtable's
* prefix on the field is not included in the lookup result.
* 查看该子表是否有命中,有命中则匹配下一个过滤树。否则
*/
if (!be_get_bit_at(&ctx->match_plens.be32, field_plen[j] - 1)) {
/* We want the trie lookup to never result in unwildcarding
* any bits that would not be unwildcarded otherwise.
* Since the trie is shared by the whole classifier, it is
* possible that the 'maskbits' contain bits that are
* irrelevant for the partition relevant for the current
* packet. Hence the checks below.
* Trie是整个分类器共享的。它是可能的maskbits可能包含一些与这个报文不相关的部分。
* 因此需要进行下面的检查。
*/
/* Check that the trie result will not unwildcard more bits
* than this subtable would otherwise.
* 这棵树最长*/
if (ctx->maskbits <= field_plen[j]) {//没有全匹配
/* Unwildcard the bits and skip the rest. */
/* 设置非通配bit,用于跳过其子表其他 */
mask_set_prefix_bits(wc, ctx->be32ofs, ctx->maskbits);
/* Note: Prerequisite already unwildcarded, as the only
* prerequisite of the supported trie lookup fields is
* the ethertype, which is always unwildcarded. */
return true;
}
/* Can skip if the field is already unwildcarded. */
if (mask_prefix_bits_set(wc, ctx->be32ofs, ctx->maskbits)) {
return true;
}
}
- 函数find_match_wc中的如下代码片段,没能想明白其作用是啥?
if (!rule && subtable->ports_mask_len) {/* 没有找到,并且存在tdports引擎树 */
/* The final stage had ports, but there was no match. Instead of
* unwildcarding all the ports bits, use the ports trie to figure out a
* smaller set of bits to unwildcard.
* 最后一个stage为传输层信息,但是这个子表没有规则命中。代替规则需要匹配整个
* 端口域,通过trie计算一个最小的掩码域,可以最大限度减少误匹配。
*/
unsigned int mbits;
ovs_be32 value, plens, mask;
/* 获取四层端口的掩码 */
mask = MINIFLOW_GET_BE32(&subtable->mask.masks, tp_src);/* 获取四层源端口掩码 */
/* 两者相与 */
value = ((OVS_FORCE ovs_be32 *)flow)[TP_PORTS_OFS32] & mask;/* 与流进行掩码与操作,得到我们需要查找的值 */
/* 对端口树进行查找,返回的mbits表示value能够在trie中命中的最长的位置,如果是没有命中的话,表示参与了匹配
** 的bit数,即mbits掩码下得到的值一定不会命中。用于减少误匹配。
*/
mbits = trie_lookup_value(&subtable->ports_trie, &value, &plens, 32);
((OVS_FORCE ovs_be32 *)&wc->masks)[TP_PORTS_OFS32] |=
mask & be32_prefix_mask(mbits);
goto no_match;
}