早期的VPP本身的node框架比较固定,各个node之间逻辑连接已经固化。为此新版本增加了feature机制,
每个feature是一个node,用户可以启用/停止某个或某些feature。
用户也可以自己写插件,把自定义node(自己的业务逻辑)加入到指定位置。
vnet_feature_arc_registration_t
vpp将feature分成不同的组,每组feature称为一个arc。arc中的feature按照代码指定的顺序串接起来。arc结构将记录这组feature的起始node和结束node。系统初始化时完成每个feature的连接。
VNET_FEATURE_ARC_INIT宏用来注册arc。
在arc中指定的起始node中,必须调用vnet_feature_arc_start函数,才能正式进入feature机制业务流程,该函数会将下一跳强行指定为arc中的下一个feature。
vnet_feature_registration_t
一个feature等价于一个node,用户通过VNET_FEATURE_INIT宏定义自己的feature,指定需要加入哪个arc,以及在arc中的哪个相对位置。每个feature都可以通过外部命令行启用/停止。
VNET_FEATURE_ARC_INIT和VNET_FEATURE_ARC_INIT宏将arc和feature保存到全局变量feature_main中。此时arc和feature还未被”加工”,只是基本的原料。
clib_error_t *vnet_feature_init
将把arc和feature初始化,并组织成最终的数据结构填充到vnet_feature_main_t feature_main中,代码比较简单。这里只对vnet_feature_main_t 描述即可。
typedef struct
{
/** feature arc configuration list */
//VNET_FEATURE_ARC_INIT宏中注册的arc保存在这里,顺序不定,也没有意义。
vnet_feature_arc_registration_t *next_arc;
/*vnet_feature_init()中来初始化该成员,把arc按名字组织成hash表,为何要用双重指针???
博主认为是bug,后面明明是当hash表用么。又不是hash数组*/
uword **arc_index_by_name;
/** feature path configuration lists */
VNET_FEATURE_ARC_INIT宏中注册的feature保存在这里,顺序不定,也没有意义。
vnet_feature_registration_t *next_feature;
/*vnet_feature_init()中来初始化该成员,把feature分配到不同arc中,并且根据
before和after变量来决定feature顺序。这里看作是一个数组,用arc的index做索引*/
vnet_feature_registration_t **next_feature_by_arc;
/*vnet_feature_init()中来初始化该成员,把feature按名字组织成hash表数组,arc的index索引到
对应hash表,node name来做hash表的key*/
uword **next_feature_by_name;
/** feature config main objects */
vnet_feature_config_main_t *feature_config_mains;
/** Save partial order results for show command */
char ***feature_nodes;
/** bitmap of interfaces which have driver rx features configured */
/*bitmap数组,arc的index索引对应bitmap表,决定该网卡是否启用了该arc中的feature*/
uword **sw_if_index_has_features;
/** feature reference counts by interface */
i16 **feature_count_by_sw_if_index;
/** Feature arc index for device-input */
/*从收包开始就使用feature机制了,这个变量给收包驱动用。追踪了下代码,这个变量属于device_input_node这个node,但是这个node业务函数为空,而且状态是VLIB_NODE_STATE_DISABLED。那这是干什么用的呢?层层追踪,原来这个node纯粹是个dump node,但是它是一个arc起点。这个arc的index保存在了下面变量中。然后真正的网卡收包node中会借用这个index当成自己的,调用vnet_feature_start_device_input_x1这类函数,开始了feature之旅。真TMD恶心,代码还能写的更难懂点么。*/
u8 device_input_feature_arc_index;
/** convenience */
vlib_main_t *vlib_main;
vnet_main_t *vnet_main;
} vnet_feature_main_t;
vnet_feature_arc_init 把属于某arc的无序的feature排序成有序状态。feature注册时可以指定在某些node前面,和某些node后面。
clib_error_t *
vnet_feature_arc_init (vlib_main_t * vm,
vnet_config_main_t * vcm,
char **feature_start_nodes,
int num_feature_start_nodes,
vnet_feature_registration_t * first_reg,
char ***in_feature_nodes)
{
uword *index_by_name;
uword *reg_by_index;
u8 **node_names = 0;
u8 *node_name;
char **these_constraints;
char *this_constraint_c;
u8 **constraints = 0;
u8 *constraint_tuple;
u8 *this_constraint;
u8 **orig, **closure;
uword *p;
int i, j, k;
u8 *a_name, *b_name;
int a_index, b_index;
int n_features;
u32 *result = 0;
vnet_feature_registration_t *this_reg = 0;
char **feature_nodes = 0;
hash_pair_t *hp;
u8 **keys_to_delete = 0;
index_by_name = hash_create_string (0, sizeof (uword));
reg_by_index = hash_create (0, sizeof (uword));
this_reg = first_reg;
/* pass 1, collect feature node names, construct a before b pairs */
while (this_reg)
{
node_name = format (0, "%s%c", this_reg->node_name, 0);
//vec_len (node_names) 指的是当前node_names数组中元素个数
hash_set (reg_by_index, vec_len (node_names), (uword) this_reg);
//现在feature用index来代表了
hash_set_mem (index_by_name, node_name, vec_len (node_names));
vec_add1 (node_names, node_name);
//runs_before是个数组,可以保存有多个node。
these_constraints = this_reg->runs_before;
while (these_constraints && these_constraints[0])
{
this_constraint_c = these_constraints[0];
constraint_tuple = format (0, "%s,%s%c", node_name,
this_constraint_c, 0);
vec_add1 (constraints, constraint_tuple);
these_constraints++;
}
//runs_after是个数组,可以保存有多个node。
these_constraints = this_reg->runs_after;
while (these_constraints && these_constraints[0])
{
this_constraint_c = these_constraints[0];
constraint_tuple = format (0, "%s,%s%c",
this_constraint_c, node_name, 0);
vec_add1 (constraints, constraint_tuple);
these_constraints++;
}
this_reg = this_reg->next;
}
//至此,把feature之间顺序关系表达为"A,B"字符串,代表A在B前面。
//其实就是该arc中所有feature个数
n_features = vec_len (node_names);
//可以看作生成一个(n_features X n_features)的二维数组
orig = clib_ptclosure_alloc (n_features);
for (i = 0; i < vec_len (constraints); i++)
{
this_constraint = constraints[i];
if (comma_split (this_constraint, &a_name, &b_name))
return clib_error_return (0, "comma_split failed!");
p = hash_get_mem (index_by_name, a_name);
/*
* Note: the next two errors mean that something is
* b0rked. As in: if you code "A depends on B," and you forget
* to define a FEATURE_INIT macro for B, you lose.
* Nonexistent graph nodes are tolerated.
*/
if (p == 0)
return clib_error_return (0, "feature node '%s' not found", a_name);
a_index = p[0];
p = hash_get_mem (index_by_name, b_name);
if (p == 0)
return clib_error_return (0, "feature node '%s' not found", b_name);
b_index = p[0];
/* add a before b to the original set of constraints */
orig[a_index][b_index] = 1;
vec_free (this_constraint);
}
/* Compute the positive transitive closure of the original constraints */
closure = clib_ptclosure (orig);
/* Compute a partial order across feature nodes, if one exists. */
again:
for (i = 0; i < n_features; i++)
{
for (j = 0; j < n_features; j++)
{
if (closure[i][j])
goto item_constrained;
}
/* Item i can be output */
vec_add1 (result, i);
{
for (k = 0; k < n_features; k++)
closure[k][i] = 0;
/*
* Add a "Magic" a before a constraint.
* This means we'll never output it again
*/
closure[i][i] = 1;
goto again;
}
item_constrained:
;
}
/* see if we got a partial order... */
if (vec_len (result) != n_features)
return clib_error_return (0, "%d feature_init_cast no partial order!");
//到这里,feature的顺序关系就计算成功了。保存在result中,每个feature用index代表。
/*
* We win.
* Bind the index variables, and output the feature node name vector
* using the partial order we just computed. Result is in stack
* order, because the entry with the fewest constraints (e.g. none)
* is output first, etc.
*/
//把feature索引按顺序转换成对应的vnet_feature_registration_t信息,保存在feature_nodes中
for (i = n_features - 1; i >= 0; i--)
{
p = hash_get (reg_by_index, result[i]);
ASSERT (p != 0);
this_reg = (vnet_feature_registration_t *) p[0];
if (this_reg->feature_index_ptr)
*this_reg->feature_index_ptr = n_features - (i + 1);
this_reg->feature_index = n_features - (i + 1);
vec_add1 (feature_nodes, this_reg->node_name);
}
/* Set up the config infrastructure */
/*feature顺序现在保存在feature_nodes中,现在要把这个关系保存到vnet_config_main_t中了。
注意每个arc都有一份自己的vnet_config_main_t*/
vnet_config_init (vm, vcm,
feature_start_nodes,
num_feature_start_nodes,
feature_nodes, vec_len (feature_nodes));
/* Save a copy for show command */
*in_feature_nodes = feature_nodes;
/* Finally, clean up all the shit we allocated */
/* *INDENT-OFF* */
hash_foreach_pair (hp, index_by_name,
({
vec_add1 (keys_to_delete, (u8 *)hp->key);
}));
/* *INDENT-ON* */
hash_free (index_by_name);
for (i = 0; i < vec_len (keys_to_delete); i++)
vec_free (keys_to_delete[i]);
vec_free (keys_to_delete);
hash_free (reg_by_index);
vec_free (result);
clib_ptclosure_free (orig);
clib_ptclosure_free (closure);
return 0;
}
至此,feature已经计算好了顺序,但是还是没有真正把各个feature连接起来。连接各个feature的真正工作,最终交由vnet_config_add_feature()和vnet_config_del_feature()完成。
u32
vnet_config_add_feature (vlib_main_t * vm,
vnet_config_main_t * cm,
u32 config_string_heap_index,
u32 feature_index,
void *feature_config, u32 n_feature_config_bytes)
{
vnet_config_t *old, *new;
vnet_config_feature_t *new_features, *f;
u32 n_feature_config_u32s;
u32 node_index = vec_elt (cm->node_index_by_feature_index, feature_index);
if (node_index == ~0) // feature node does not exist
return config_string_heap_index; // return original config index
if (config_string_heap_index == ~0)
{
old = 0;
new_features = 0;
}
else
{
u32 *p = vnet_get_config_heap (cm, config_string_heap_index);
old = pool_elt_at_index (cm->config_pool, p[-1]);
new_features = old->features;
if (new_features)
new_features = duplicate_feature_vector (new_features);
}
//现有的feature基础上增加参数中传来的新feature。注意每个feature都是这么一个个添加进来的。
vec_add2 (new_features, f, 1);
f->feature_index = feature_index;
f->node_index = node_index;
//不同feature可能有自己独特的config内容,因此参数中的feature_config和n_feature_config_bytes
就是用来干这事的,保存在f->feature_config中,没有私有内容,则计算出来为0。
n_feature_config_u32s =
round_pow2 (n_feature_config_bytes,
sizeof (f->feature_config[0])) /
sizeof (f->feature_config[0]);
vec_add (f->feature_config, feature_config, n_feature_config_u32s);
/* Sort (prioritize) features. */
//上文所知,feature的index也代表着在arc中的顺序,这里排下序。
if (vec_len (new_features) > 1)
vec_sort_with_function (new_features, feature_cmp);
if (old)
remove_reference (cm, old);
//开始干活了,连接每个feature。config内存也会更新
new = find_config_with_features (vm, cm, new_features);
new->reference_count += 1;
/*
* User gets pointer to config string first element
* (which defines the pool index
* this config string comes from).
*/
vec_validate (cm->config_pool_index_by_user_index,
new->config_string_heap_index + 1);
cm->config_pool_index_by_user_index[new->config_string_heap_index + 1]
= new - cm->config_pool;
return new->config_string_heap_index + 1;
}
u32
vnet_config_del_feature (vlib_main_t * vm,
vnet_config_main_t * cm,
u32 config_string_heap_index,
u32 feature_index,
void *feature_config, u32 n_feature_config_bytes)
{
vnet_config_t *old, *new;
vnet_config_feature_t *new_features, *f;
u32 n_feature_config_u32s;
{
/*每个feature组合都有一个对应的config内存。*/
u32 *p = vnet_get_config_heap (cm, config_string_heap_index);
/*config内存第一个32bit,保存了该config的索引号。这代码很恶心,作者为啥不弄个结构出来,
还好懂些。*/
old = pool_elt_at_index (cm->config_pool, p[-1]);
}
n_feature_config_u32s =
round_pow2 (n_feature_config_bytes,
sizeof (f->feature_config[0])) /
sizeof (f->feature_config[0]);
/* Find feature with same index and opaque data. */
//找到要删除的目标feature
vec_foreach (f, old->features)
{
if (f->feature_index == feature_index
&& vec_len (f->feature_config) == n_feature_config_u32s
&& (n_feature_config_u32s == 0
|| !memcmp (f->feature_config, feature_config,
n_feature_config_bytes)))
break;
}
/* Feature not found. */
if (f >= vec_end (old->features))
return config_string_heap_index; // return original config index
//生成一组新feature,不包含目标feature。
new_features = duplicate_feature_vector (old->features);
f = new_features + (f - old->features);
vnet_config_feature_free (f);
vec_delete (new_features, 1, f - new_features);
/* must remove old from config_pool now as it may be expanded and change
memory location if the following function find_config_with_features()
adds a new config because none of existing config's has matching features
and so can be reused */
remove_reference (cm, old);
//新生成的feature重新连接下,config内存也会更新
new = find_config_with_features (vm, cm, new_features);
new->reference_count += 1;
vec_validate (cm->config_pool_index_by_user_index,
new->config_string_heap_index + 1);
cm->config_pool_index_by_user_index[new->config_string_heap_index + 1]
= new - cm->config_pool;
return new->config_string_heap_index + 1;
}
static vnet_config_t *
find_config_with_features (vlib_main_t * vm,
vnet_config_main_t * cm,
vnet_config_feature_t * feature_vector)
{
u32 last_node_index = ~0;
vnet_config_feature_t *f;
u32 *config_string;
uword *p;
vnet_config_t *c;
config_string = cm->config_string_temp;
cm->config_string_temp = 0;
if (config_string)
_vec_len (config_string) = 0;
vec_foreach (f, feature_vector)
{
/* Connect node graph. */
//按顺序连接各个node
f->next_index = add_next (vm, cm, last_node_index, f->node_index);
last_node_index = f->node_index;
/* Store next index in config string. */
//下一个node在本node中的slot号,保存进config_string
vec_add1 (config_string, f->next_index);
/* Store feature config. */
//slot号后面还可以保存本node特有的config信息,可以为0大小
vec_add (config_string, f->feature_config, vec_len (f->feature_config));
}
/* Terminate config string with next for end node. */
//确保最终连接到end node
if (last_node_index == ~0 || last_node_index != cm->end_node_index)
{
u32 next_index = add_next (vm, cm, last_node_index, cm->end_node_index);
vec_add1 (config_string, next_index);
}
/* See if config string is unique. */
//看看hash表里是不是有同样的config_string,注意这里比较的是config_string内容,不是地址
p = hash_get_mem (cm->config_string_hash, config_string);
if (p)
{
/* Not unique. Share existing config. */
//大多数情况下config_string不会重复,除非旧的还被别的模块引用了没释放,那这里就继续引用旧的释放新的
cm->config_string_temp = config_string; /* we'll use it again later. */
free_feature_vector (feature_vector);
c = pool_elt_at_index (cm->config_pool, p[0]);
}
else
{
u32 *d;
//分配新的config结构
pool_get (cm->config_pool, c);
c->index = c - cm->config_pool;
c->features = feature_vector;
c->config_string_vector = config_string;
/* Allocate copy of config string in heap.
VLIB buffers will maintain pointers to heap as they read out
configuration data. */
c->config_string_heap_index
= heap_alloc (cm->config_string_heap, vec_len (config_string) + 1,
c->config_string_heap_handle);
/* First element in heap points back to pool index. */
d =
vec_elt_at_index (cm->config_string_heap,
c->config_string_heap_index);
//注意这里,config第一个4字节保存的是config索引号,之后才是slot号-私有config-slot号-私有config ......
d[0] = c->index;
clib_memcpy (d + 1, config_string, vec_bytes (config_string));
hash_set_mem (cm->config_string_hash, config_string, c->index);
c->reference_count = 0; /* will be incremented by caller. */
}
return c;
}
现在各个feature node已经连接好了,连接信息也保存到了config中,接下来就是在业务node中使用了。
在需要开始使用feature机制的业务node中调用如下函数即可,注意该业务node必须是arc中的起始node
static_always_inline void
vnet_feature_arc_start (u8 arc, u32 sw_if_index, u32 * next0,
vlib_buffer_t * b0)
{
vnet_feature_arc_start_with_data (arc, sw_if_index, next0, b0, 0);
}
此函数会将next0修改为下一跳feature。feature的顺序在上文中已经确定
获取下一跳feature的逻辑很简单,在函数:
always_inline void *
vnet_get_config_data (vnet_config_main_t * cm,
u32 * config_index, u32 * next_index, u32 n_data_bytes)
{
u32 i, n, *d;
//vlib_buffer_t->current_config_index用来记录执行到了哪个feature了,注意最初值为1。
i = *config_index;
d = heap_elt_at_index (cm->config_string_heap, i);
n = round_pow2 (n_data_bytes, sizeof (d[0])) / sizeof (d[0]);
/* Last 32 bits are next index. */
//本feature下一跳的slot号
*next_index = d[n];
/* Advance config index to next config. */
//记录下一跳feature的config索引
*config_index = (i + n + 1);
/* Return config data to user for this feature. */
return (void *) d;
}