VPP节点统计信息

节点的统计位于函数dispatch_node函数中,节点处理函数运行之后(node->function)返回值为处理的报文数量n(n_vectors),通过函数vlib_node_runtime_update_stats来更新节点的相关统计信息。

static_always_inline u64
dispatch_node (vlib_main_t * vm, vlib_node_runtime_t * node,
           vlib_node_type_t type, vlib_node_state_t dispatch_state,
           vlib_frame_t * frame, u64 last_time_stamp)
{

      if (PREDICT_TRUE (vm->dispatch_wrapper_fn == 0))
    n = node->function (vm, node, frame);
      else
    n = vm->dispatch_wrapper_fn (vm, node, frame);

  t = clib_cpu_time_now ();

  v = vlib_node_runtime_update_stats (vm, node,
                      /* n_calls */ 1,
                      /* n_vectors */ n,
                      /* n_clocks */ t - last_time_stamp);

三种统计维度:调用次数、报文数量和时间,对应以上的参数n_calls=1,n_vectors=n和n_clocks=t - last_time_stamp。将node中的三个记录统计信息的成员分别累加上对应的值。

always_inline u32
vlib_node_runtime_update_stats (vlib_main_t * vm, vlib_node_runtime_t * node,
                uword n_calls, uword n_vectors, uword n_clocks)
{
  u32 ca0, ca1, v0, v1, cl0, cl1, r;

  cl0 = cl1 = node->clocks_since_last_overflow;
  ca0 = ca1 = node->calls_since_last_overflow;
  v0 = v1 = node->vectors_since_last_overflow;

  ca1 = ca0 + n_calls;
  v1 = v0 + n_vectors;
  cl1 = cl0 + n_clocks;

  node->calls_since_last_overflow = ca1;
  node->clocks_since_last_overflow = cl1;
  node->vectors_since_last_overflow = v1;

max_clock和max_clock_n分别记录最大的时钟周期,和在此周期中处理的报文数量。

  node->max_clock_n = node->max_clock > n_clocks ? node->max_clock_n : n_vectors;
  node->max_clock = node->max_clock > n_clocks ? node->max_clock : n_clocks;

如果累加统计值之后,三者之中有某一个数值出现反转(32bit溢出overflow),调用同步函数vlib_node_runtime_sync_stats,将统计信息由节点结构vlib_node_runtime_t同步到对应的vlib_node_t中,即由运行时节点结构同步到主节点结构中,参见函数vlib_node_runtime_sync_stats。

  r = vlib_node_runtime_update_main_loop_vector_stats (vm, node, n_vectors);

  if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0)) {
      node->calls_since_last_overflow = ca0;
      node->clocks_since_last_overflow = cl0;
      node->vectors_since_last_overflow = v0;

      vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks);
  }
  return r;

统计信息同步

根据节点索引,取得主节点结构。

void
vlib_node_runtime_sync_stats (vlib_main_t *vm, vlib_node_runtime_t *r,
                  uword n_calls, uword n_vectors, uword n_clocks)
{
  vlib_node_t *n = vlib_get_node (vm, r->node_index);
  vlib_node_runtime_sync_stats_node (n, r, n_calls, n_vectors, n_clocks);
}

将统计信息由32位的*_overflow累加到主节点的统计结构中,并且,请求运行节点结构中的统计值*_overflow。

/* Sync up runtime (32 bit counters) and main node stats (64 bit counters). */
void
vlib_node_runtime_sync_stats_node (vlib_node_t *n, vlib_node_runtime_t *r,
                   uword n_calls, uword n_vectors, uword n_clocks)
{
  n->stats_total.calls += n_calls + r->calls_since_last_overflow;
  n->stats_total.vectors += n_vectors + r->vectors_since_last_overflow;
  n->stats_total.clocks += n_clocks + r->clocks_since_last_overflow;
  n->stats_total.max_clock = r->max_clock;
  n->stats_total.max_clock_n = r->max_clock_n;

  r->calls_since_last_overflow = 0;
  r->vectors_since_last_overflow = 0;
  r->clocks_since_last_overflow = 0;
}

主循环统计更新

每次主循环执行,都会将计数递增main_loop_count。

always_inline void
vlib_increment_main_loop_counter (vlib_main_t * vm)
{      
  vm->main_loop_count++;
  ...
}
	
static_always_inline void
vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
{
      vlib_increment_main_loop_counter (vm);

PROCESS节点统计

以上dispatch_node处理的是VLIB_NODE_TYPE_PRE_INPUT/INPUT/INTERNAL三种类型的节点,对于VLIB_NODE_TYPE_PROCESS类型节点,由函数vlib_process_update_stats进行统计更新。其核心为以上介绍的函数vlib_node_runtime_update_stats。

always_inline void
vlib_process_update_stats (vlib_main_t * vm,
               vlib_process_t * p,
               uword n_calls, uword n_vectors, uword n_clocks)
{
  vlib_node_runtime_update_stats (vm, &p->node_runtime,
                  n_calls, n_vectors, n_clocks);
}

显示节点统计信息

注册显示节点信息命令:show runtime,可以显示指定节点的统计信息,或者显示所有节点的统计信息。

VLIB_CLI_COMMAND (show_node_runtime_command, static) = {
  .path = "show runtime",
  .short_help = "Show packet processing runtime",
  .function = show_node_runtime,
  .is_mp_safe = 1,
};

在显示节点统计信息之前,需要先收集下节点信息,如下函数vlib_node_sync_stats。首先,根据获取到节点对应的运行节点结构rt。其次由函数vlib_node_runtime_sync_stats将未同步的统计信息同步到节点主结构中。

void
vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n)
{
  vlib_node_runtime_t *rt;

  if (n->type == VLIB_NODE_TYPE_PROCESS) {
      /* Nothing to do for PROCESS nodes except in main thread */
      if (vm != vlib_get_first_main ()) return;

      vlib_process_t *p = vlib_get_process_from_node (vm, n);
      n->stats_total.suspends += p->n_suspends;
      p->n_suspends = 0;
      rt = &p->node_runtime;
  } else
    rt = vec_elt_at_index (vm->node_main.nodes_by_type[n->type], n->runtime_index);

  vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0);

最后,将运行时下一个frame结构中统计的vectors值,同步到主节点结构中。

  /* Sync up runtime next frame vector counters with main node structure. */
  {
    vlib_next_frame_t *nf;
    for (i = 0; i < rt->n_next_nodes; i++) {
      nf = vlib_node_runtime_get_next_frame (vm, rt, i);
      vec_elt (n->n_vectors_by_next_node, i) += nf->vectors_since_last_overflow;
      nf->vectors_since_last_overflow = 0;

格式化统计信息函数,如下format_vlib_node_stats。如果节点结构为空,仅显示标题信息。

static u8 *
format_vlib_node_stats (u8 * s, va_list * va)
{
  vlib_main_t *vm = va_arg (*va, vlib_main_t *);
  vlib_node_t *n = va_arg (*va, vlib_node_t *);
  int max = va_arg (*va, int);
  f64 v, x, maxc, maxcn;
  u8 *ns, *misc_info = 0;
  u64 c, p, l, d;

  if (!n) {
      if (max)
    s = format (s, "%=30s%=17s%=16s%=16s%=16s%=16s",
            "Name", "Max Node Clocks", "Vectors at Max",
            "Max Clocks", "Avg Clocks", "Avg Vectors/Call");
      else
    s = format (s, "%=30s%=12s%=16s%=16s%=16s%=16s%=16s",
            "Name", "State", "Calls", "Vectors", "Suspends",
            "Clocks", "Vectors/Call");
      return s;
  }
  indent = format_get_indent (s);

由节点中获得时钟周期、调用数量、报文数量和suspends数量的值。计算处理一个报文使用的最长的时钟周期maxcn。

  l = n->stats_total.clocks - n->stats_last_clear.clocks;
  c = n->stats_total.calls - n->stats_last_clear.calls;
  p = n->stats_total.vectors - n->stats_last_clear.vectors;
  d = n->stats_total.suspends - n->stats_last_clear.suspends;
  maxc = (f64) n->stats_total.max_clock;
  maxn = n->stats_total.max_clock_n;
  if (n->stats_total.max_clock_n)
    maxcn = (f64) n->stats_total.max_clock / (f64) maxn;
  else
    maxcn = 0.0;

计算每个报文、每次调用、每个suspend所花费的时钟周期,优先级依次降低。计算每次调用calls,处理的报文数量vectors,结果为v。

  /* Clocks per packet, per call or per suspend. */
  x = 0;
  if (p > 0)
    x = (f64) l / (f64) p;
  else if (c > 0)
    x = (f64) l / (f64) c;
  else if (d > 0)
    x = (f64) l / (f64) d;

  if (c > 0)
    v = (double) p / (double) c;
  else
    v = 0;

输出节点的统计信息,以及以上的计算值。

  ns = n->name;

  if (max)
    s = format (s, "%-30v%=17.2e%=16d%=16.2e%=16.2e%=16.2e",
        ns, maxc, maxn, maxcn, x, v);
  else
    s = format (s, "%-30v%=12U%16Ld%16Ld%16Ld%16.2e%16.2f", ns,
        format_vlib_node_state, vm, n, c, p, d, x, v);

  if (ns != n->name)
    vec_free (ns);

  return s;

函数show_node_runtime处理命令show runtime。
如果在命令行指定了节点名称,如:show runtime ip4-input,调用以上介绍的统计信息收集函数和显示函数处理。

static clib_error_t *
show_node_runtime (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
{
  vlib_node_main_t *nm = &vm->node_main;
  vlib_node_t *n, ***node_dups = 0;
  f64 time_now, *internal_node_vector_rates = 0;
  u32 node_index;

  time_now = vlib_time_now (vm);

  if (unformat (input, "%U", unformat_vlib_node, vm, &node_index)) {
      n = vlib_get_node (vm, node_index);
      vlib_node_sync_stats (vm, n);
      vlib_cli_output (vm, "%U\n", format_vlib_node_stats, vm, 0, 0);
      vlib_cli_output (vm, "%U\n", format_vlib_node_stats, vm, n, 0);
  } else {

如下显示信息:

vpp# show runtime ip4-input
             Name                 State         Calls          Vectors        Suspends         Clocks       Vectors/Call
ip4-input                         active          0               0               0            0.00e0            0.00

否则,对于未指定节点名称的情况,有以下处理流程。默认按照brief简洁方式显示,还可指定verbose、max或者summary方式。

      vlib_node_t **nodes;
      f64 dt;
      u64 n_input, n_output, n_drop, n_punt;
      u64 n_clocks, l, v, c, d;
      int brief = 1, summary = 0, max = 0;
      vlib_main_t **stat_vms = 0, *stat_vm;

      /* Suppress nodes with zero calls since last clear */
      if (unformat (input, "brief") || unformat (input, "b"))    brief = 1;
      if (unformat (input, "verbose") || unformat (input, "v"))  brief = 0;
      if (unformat (input, "max") || unformat (input, "m"))      max = 1;
      if (unformat (input, "summary") || unformat (input, "sum") || unformat (input, "su"))
         summary = 1;

遍历所有的线程,生成线程vlib_main_t结构向量。

    for (i = 0; i < vlib_get_n_threads (); i++) {
      stat_vm = vlib_get_main_by_index (i);
      if (stat_vm)
        vec_add1 (stat_vms, stat_vm);
    }

      /* Barrier sync across stats scraping.
       * Otherwise, the counts will be grossly inaccurate.
       */
      vlib_worker_thread_barrier_sync (vm);

遍历每个线程的vlib_main结构,并遍历其中的每个节点,同步每个节点的统计信息。完成之后,将节点克隆一份,添加到node_dups向量。

    for (j = 0; j < vec_len (stat_vms); j++) {
      stat_vm = stat_vms[j];
      nm = &stat_vm->node_main;

      for (i = 0; i < vec_len (nm->nodes); i++) {
          n = nm->nodes[i];
          vlib_node_sync_stats (stat_vm, n);
      }

      nodes = vec_dup (nm->nodes);

      vec_add1 (node_dups, nodes);
      vec_add1 (internal_node_vector_rates, vlib_internal_node_vector_rate (stat_vm));
    }
      vlib_worker_thread_barrier_release (vm);

遍历每个线程的vlib_main结构,以及其中的每个主节点结构,计算全部节点的n_output/n_drop/n_punt/n_input的统计总量。

    for (j = 0; j < vec_len (stat_vms); j++) {
      stat_vm = stat_vms[j];
      nodes = node_dups[j];

      vec_sort_with_function (nodes, node_cmp);

      n_input = n_output = n_drop = n_punt = n_clocks = 0;
      for (i = 0; i < vec_len (nodes); i++) {
        n = nodes[i];
        v = n->stats_total.vectors - n->stats_last_clear.vectors;

        switch (n->type) {
        default: continue;

        case VLIB_NODE_TYPE_INTERNAL:
          n_output += (n->flags & VLIB_NODE_FLAG_IS_OUTPUT) ? v : 0;
          n_drop += (n->flags & VLIB_NODE_FLAG_IS_DROP) ? v : 0;
          n_punt += (n->flags & VLIB_NODE_FLAG_IS_PUNT) ? v : 0;
          if (n->flags & VLIB_NODE_FLAG_IS_HANDOFF)  n_input += v;
          break;
        case VLIB_NODE_TYPE_INPUT:
          n_input += v;
          break;
        }
      }

输出当前线程的id,线程名称和cpu索引等信息。数据线程处理的全部节点的总量信息。最后,如果没有指定summary参数,输出每个节点的统计信息。

      if (vlib_get_n_threads () > 1) {
          vlib_worker_thread_t *w = vlib_worker_threads + j;
          if (j > 0) vlib_cli_output (vm, "---------------");

          if (w->cpu_id > -1)
            vlib_cli_output (vm, "Thread %d %s (lcore %u)", j, w->name, w->cpu_id);
          else
            vlib_cli_output (vm, "Thread %d %s", j, w->name);
      }

      dt = time_now - nm->time_last_runtime_stats_clear;
      vlib_cli_output (vm, "Time %.1f, %f sec internal node vector rate %.2f loops/sec %.2f\n"
        "  vector rates in %.4e, out %.4e, drop %.4e, punt %.4e",
        dt, vlib_stats_get_segment_update_rate (), internal_node_vector_rates[j], stat_vm->loops_per_second,
        (f64) n_input / dt, (f64) n_output / dt, (f64) n_drop / dt, (f64) n_punt / dt);

      if (summary == 0) {
        vlib_cli_output (vm, "%U", format_vlib_node_stats, stat_vm, 0, max);
        for (i = 0; i < vec_len (nodes); i++) {
          c = nodes[i]->stats_total.calls - nodes[i]->stats_last_clear.calls;
          d = nodes[i]->stats_total.suspends - nodes[i]->stats_last_clear.suspends;
          if (c || d || !brief) {
              vlib_cli_output (vm, "%U", format_vlib_node_stats, stat_vm, nodes[i], max);

如下,输出线程2的统计信息:

---------------
Thread 2 vpp_wk_1 (lcore 3)
Time 13238.4, 10 sec internal node vector rate 0.00 loops/sec 14040836.44
  vector rates in 5.0524e5, out 5.0524e5, drop 0.0000e0, punt 0.0000e0
             Name                 State         Calls          Vectors        Suspends         Clocks       Vectors/Call
dpdk-input                       polling      180198867720      6688500986               0          4.59e3             .04
ethernet-input                   active          544505661      6688500986               0          4.32e1           12.28
...

你可能感兴趣的:(VPP,vpp)