static_always_inline u64
dispatch_node (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_node_type_t type, vlib_node_state_t dispatch_state,
vlib_frame_t * frame, u64 last_time_stamp)
if (PREDICT_TRUE (vm->dispatch_wrapper_fn == 0))
n = node->function (vm, node, frame);
n = vm->dispatch_wrapper_fn (vm, node, frame);
t = clib_cpu_time_now ();
v = vlib_node_runtime_update_stats (vm, node,
/* n_calls */ 1,
/* n_vectors */ n,
/* n_clocks */ t - last_time_stamp);
三种统计维度:调用次数、报文数量和时间,对应以上的参数n_calls=1,n_vectors=n和n_clocks=t - last_time_stamp。将node中的三个记录统计信息的成员分别累加上对应的值。
always_inline u32
vlib_node_runtime_update_stats (vlib_main_t * vm, vlib_node_runtime_t * node,
uword n_calls, uword n_vectors, uword n_clocks)
u32 ca0, ca1, v0, v1, cl0, cl1, r;
cl0 = cl1 = node->clocks_since_last_overflow;
ca0 = ca1 = node->calls_since_last_overflow;
v0 = v1 = node->vectors_since_last_overflow;
ca1 = ca0 + n_calls;
v1 = v0 + n_vectors;
cl1 = cl0 + n_clocks;
node->calls_since_last_overflow = ca1;
node->clocks_since_last_overflow = cl1;
node->vectors_since_last_overflow = v1;
node->max_clock_n = node->max_clock > n_clocks ? node->max_clock_n : n_vectors;
node->max_clock = node->max_clock > n_clocks ? node->max_clock : n_clocks;
r = vlib_node_runtime_update_main_loop_vector_stats (vm, node, n_vectors);
if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0)) {
node->calls_since_last_overflow = ca0;
node->clocks_since_last_overflow = cl0;
node->vectors_since_last_overflow = v0;
vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks);
return r;
vlib_node_runtime_sync_stats (vlib_main_t *vm, vlib_node_runtime_t *r,
uword n_calls, uword n_vectors, uword n_clocks)
vlib_node_t *n = vlib_get_node (vm, r->node_index);
vlib_node_runtime_sync_stats_node (n, r, n_calls, n_vectors, n_clocks);
/* Sync up runtime (32 bit counters) and main node stats (64 bit counters). */
vlib_node_runtime_sync_stats_node (vlib_node_t *n, vlib_node_runtime_t *r,
uword n_calls, uword n_vectors, uword n_clocks)
n->stats_total.calls += n_calls + r->calls_since_last_overflow;
n->stats_total.vectors += n_vectors + r->vectors_since_last_overflow;
n->stats_total.clocks += n_clocks + r->clocks_since_last_overflow;
n->stats_total.max_clock = r->max_clock;
n->stats_total.max_clock_n = r->max_clock_n;
r->calls_since_last_overflow = 0;
r->vectors_since_last_overflow = 0;
r->clocks_since_last_overflow = 0;
always_inline void
vlib_increment_main_loop_counter (vlib_main_t * vm)
static_always_inline void
vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
vlib_increment_main_loop_counter (vm);
always_inline void
vlib_process_update_stats (vlib_main_t * vm,
vlib_process_t * p,
uword n_calls, uword n_vectors, uword n_clocks)
vlib_node_runtime_update_stats (vm, &p->node_runtime,
n_calls, n_vectors, n_clocks);
注册显示节点信息命令:show runtime,可以显示指定节点的统计信息,或者显示所有节点的统计信息。
VLIB_CLI_COMMAND (show_node_runtime_command, static) = {
.path = "show runtime",
.short_help = "Show packet processing runtime",
.function = show_node_runtime,
.is_mp_safe = 1,
vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n)
vlib_node_runtime_t *rt;
if (n->type == VLIB_NODE_TYPE_PROCESS) {
/* Nothing to do for PROCESS nodes except in main thread */
if (vm != vlib_get_first_main ()) return;
vlib_process_t *p = vlib_get_process_from_node (vm, n);
n->stats_total.suspends += p->n_suspends;
p->n_suspends = 0;
rt = &p->node_runtime;
} else
rt = vec_elt_at_index (vm->node_main.nodes_by_type[n->type], n->runtime_index);
vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0);
/* Sync up runtime next frame vector counters with main node structure. */
vlib_next_frame_t *nf;
for (i = 0; i < rt->n_next_nodes; i++) {
nf = vlib_node_runtime_get_next_frame (vm, rt, i);
vec_elt (n->n_vectors_by_next_node, i) += nf->vectors_since_last_overflow;
nf->vectors_since_last_overflow = 0;
static u8 *
format_vlib_node_stats (u8 * s, va_list * va)
vlib_main_t *vm = va_arg (*va, vlib_main_t *);
vlib_node_t *n = va_arg (*va, vlib_node_t *);
int max = va_arg (*va, int);
f64 v, x, maxc, maxcn;
u8 *ns, *misc_info = 0;
u64 c, p, l, d;
if (!n) {
if (max)
s = format (s, "%=30s%=17s%=16s%=16s%=16s%=16s",
"Name", "Max Node Clocks", "Vectors at Max",
"Max Clocks", "Avg Clocks", "Avg Vectors/Call");
s = format (s, "%=30s%=12s%=16s%=16s%=16s%=16s%=16s",
"Name", "State", "Calls", "Vectors", "Suspends",
"Clocks", "Vectors/Call");
return s;
indent = format_get_indent (s);
l = n->stats_total.clocks - n->stats_last_clear.clocks;
c = n->stats_total.calls - n->stats_last_clear.calls;
p = n->stats_total.vectors - n->stats_last_clear.vectors;
d = n->stats_total.suspends - n->stats_last_clear.suspends;
maxc = (f64) n->stats_total.max_clock;
maxn = n->stats_total.max_clock_n;
if (n->stats_total.max_clock_n)
maxcn = (f64) n->stats_total.max_clock / (f64) maxn;
maxcn = 0.0;
/* Clocks per packet, per call or per suspend. */
x = 0;
if (p > 0)
x = (f64) l / (f64) p;
else if (c > 0)
x = (f64) l / (f64) c;
else if (d > 0)
x = (f64) l / (f64) d;
if (c > 0)
v = (double) p / (double) c;
v = 0;
ns = n->name;
if (max)
s = format (s, "%-30v%=17.2e%=16d%=16.2e%=16.2e%=16.2e",
ns, maxc, maxn, maxcn, x, v);
s = format (s, "%-30v%=12U%16Ld%16Ld%16Ld%16.2e%16.2f", ns,
format_vlib_node_state, vm, n, c, p, d, x, v);
if (ns != n->name)
vec_free (ns);
return s;
函数show_node_runtime处理命令show runtime。
如果在命令行指定了节点名称,如:show runtime ip4-input,调用以上介绍的统计信息收集函数和显示函数处理。
static clib_error_t *
show_node_runtime (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
vlib_node_main_t *nm = &vm->node_main;
vlib_node_t *n, ***node_dups = 0;
f64 time_now, *internal_node_vector_rates = 0;
u32 node_index;
time_now = vlib_time_now (vm);
if (unformat (input, "%U", unformat_vlib_node, vm, &node_index)) {
n = vlib_get_node (vm, node_index);
vlib_node_sync_stats (vm, n);
vlib_cli_output (vm, "%U\n", format_vlib_node_stats, vm, 0, 0);
vlib_cli_output (vm, "%U\n", format_vlib_node_stats, vm, n, 0);
} else {
vpp# show runtime ip4-input
Name State Calls Vectors Suspends Clocks Vectors/Call
ip4-input active 0 0 0 0.00e0 0.00
vlib_node_t **nodes;
f64 dt;
u64 n_input, n_output, n_drop, n_punt;
u64 n_clocks, l, v, c, d;
int brief = 1, summary = 0, max = 0;
vlib_main_t **stat_vms = 0, *stat_vm;
/* Suppress nodes with zero calls since last clear */
if (unformat (input, "brief") || unformat (input, "b")) brief = 1;
if (unformat (input, "verbose") || unformat (input, "v")) brief = 0;
if (unformat (input, "max") || unformat (input, "m")) max = 1;
if (unformat (input, "summary") || unformat (input, "sum") || unformat (input, "su"))
summary = 1;
for (i = 0; i < vlib_get_n_threads (); i++) {
stat_vm = vlib_get_main_by_index (i);
if (stat_vm)
vec_add1 (stat_vms, stat_vm);
/* Barrier sync across stats scraping.
* Otherwise, the counts will be grossly inaccurate.
vlib_worker_thread_barrier_sync (vm);
for (j = 0; j < vec_len (stat_vms); j++) {
stat_vm = stat_vms[j];
nm = &stat_vm->node_main;
for (i = 0; i < vec_len (nm->nodes); i++) {
n = nm->nodes[i];
vlib_node_sync_stats (stat_vm, n);
nodes = vec_dup (nm->nodes);
vec_add1 (node_dups, nodes);
vec_add1 (internal_node_vector_rates, vlib_internal_node_vector_rate (stat_vm));
vlib_worker_thread_barrier_release (vm);
for (j = 0; j < vec_len (stat_vms); j++) {
stat_vm = stat_vms[j];
nodes = node_dups[j];
vec_sort_with_function (nodes, node_cmp);
n_input = n_output = n_drop = n_punt = n_clocks = 0;
for (i = 0; i < vec_len (nodes); i++) {
n = nodes[i];
v = n->stats_total.vectors - n->stats_last_clear.vectors;
switch (n->type) {
default: continue;
n_output += (n->flags & VLIB_NODE_FLAG_IS_OUTPUT) ? v : 0;
n_drop += (n->flags & VLIB_NODE_FLAG_IS_DROP) ? v : 0;
n_punt += (n->flags & VLIB_NODE_FLAG_IS_PUNT) ? v : 0;
if (n->flags & VLIB_NODE_FLAG_IS_HANDOFF) n_input += v;
n_input += v;
if (vlib_get_n_threads () > 1) {
vlib_worker_thread_t *w = vlib_worker_threads + j;
if (j > 0) vlib_cli_output (vm, "---------------");
if (w->cpu_id > -1)
vlib_cli_output (vm, "Thread %d %s (lcore %u)", j, w->name, w->cpu_id);
vlib_cli_output (vm, "Thread %d %s", j, w->name);
dt = time_now - nm->time_last_runtime_stats_clear;
vlib_cli_output (vm, "Time %.1f, %f sec internal node vector rate %.2f loops/sec %.2f\n"
" vector rates in %.4e, out %.4e, drop %.4e, punt %.4e",
dt, vlib_stats_get_segment_update_rate (), internal_node_vector_rates[j], stat_vm->loops_per_second,
(f64) n_input / dt, (f64) n_output / dt, (f64) n_drop / dt, (f64) n_punt / dt);
if (summary == 0) {
vlib_cli_output (vm, "%U", format_vlib_node_stats, stat_vm, 0, max);
for (i = 0; i < vec_len (nodes); i++) {
c = nodes[i]->stats_total.calls - nodes[i]->stats_last_clear.calls;
d = nodes[i]->stats_total.suspends - nodes[i]->stats_last_clear.suspends;
if (c || d || !brief) {
vlib_cli_output (vm, "%U", format_vlib_node_stats, stat_vm, nodes[i], max);
Thread 2 vpp_wk_1 (lcore 3)
Time 13238.4, 10 sec internal node vector rate 0.00 loops/sec 14040836.44
vector rates in 5.0524e5, out 5.0524e5, drop 0.0000e0, punt 0.0000e0
Name State Calls Vectors Suspends Clocks Vectors/Call
dpdk-input polling 180198867720 6688500986 0 4.59e3 .04
ethernet-input active 544505661 6688500986 0 4.32e1 12.28