首先新建四个虚拟接口(dummy interface)进行后续的报文转发和计数
modprobe dummy numdummies=4
ifconfig dummy0 up
ifconfig dummy1 up
ifconfig dummy2 up
ifconfig dummy3 up
注:若想卸载这四个虚拟接口,请运行
rmmod dummy
进行流量转发测试时,我们需要运行形如:zbalance_ipc -i zc:dna1 -c 21 -n 4 -m 1 -r 0:dummy0 -r 1:dummy1 -r 2:dummy2 -r 3:dummy3 -p
这样的命令。看起来有些复杂?没关系,后文将结合帮助文档和源代码详细讲述。
上述命令说明: 输入设备为zc:dna1,cluster id 为21, 报文输出队列为4, hash模式为根据IP进行hash, 并将四个报文队列分别绑定到dummy0~3四个虚拟接口上(以供其它程序使用)。
运行zbalance_ipc -h
,
zbalance_ipc - (C) 2014 ntop.org
Using PFRING_ZC v.6.3.0.160303
A master process balancing packets to multiple consumer processes.
Usage: zbalance_ipc -i -c -n <num inst>
[-h] [-m ] [-S ] [-g ]
[-N <num>] [-a] [-q <len>] [-Q ] [-d]
[-D ] [-P file>]
-h Print this help
-i Device (comma-separated list) Note: use 'Q' as device name to create ingress sw queues
-c Cluster id
-n <num inst> Number of application instances
In case of '-m 1' or '-m 4' it is possible to spread packets across multiple
instances of multiple applications, using a comma-separated list
-m Hashing modes:
0 - No hash: Round-Robin (default)
1 - IP hash, or TID (thread id) in case of '-i sysdig'
2 - Fan-out
3 - Fan-out (1st) + Round-Robin (2nd, 3rd, ..)
4 - GTP hash (Inner IP/Port or Seq-Num)
-r : Replace egress queue with device (multiple -r can be specified)
-S Enable Time Pulse thread and bind it to a core
-R Time resolution (nsec) when using Time Pulse thread
Note: in non-time-sensitive applications use >= 100usec to reduce cpu load
-g Bind this app to a core
-q Number of slots in each consumer queue (default: 8192)
-b Number of buffers in each consumer pool (default: 16)
-N <num> Producer for n2disk multi-thread (<num> threads)
-a Active packet wait
-Q Enable VM support (comma-separated list of QEMU monitor sockets)
-p Print per-interface and per-queue absolute stats
-d Daemon mode
-D Drop privileges
-P file> Write pid to the specified file (daemon mode only)
-u Hugepages mount point for packet memory allocation
相关说明:
-h 打印此帮助文档
-i 输入源设备名,注意使用"Q"作为设备名可创建输入的sw队列
-c Cluster id
-n 应用实例的数量(输出报文队列的数量)
-m 哈希模式:
0 - No hash: 轮询调度 (default)
1 - IP进行hash, 当使用'-i sysdig'时用线程ID进行hash
2 - Fan-out(流量复制到每一个输出队列)
3 - Fan-out (1st) + Round-Robin (2nd, 3rd, ..)
第一个输出队列里是一份完整流量,其它输出队列使用轮询来hash流量
4 - GTP hash (Inner IP/Port or Seq-Num)
-r : 将输出报文队列和设备接口绑定在一起(你可以指定多个-r来绑定多个报文队列)
E.g. `-r 0:dummy0 -r 1:dummy0 -r 2:dummy0 -r 3:dummy0`
`-r 0:dummy0 -r 1:dummy1 -r 2:dummy2 -r 3:dummy3`
-p 打印每一个接口和每一个报文队列的绝对状态
-d 守护程序模式
393 int hash_mode = 0;
// 读取hash模式
432 case 'm':
433 hash_mode = atoi(optarg);
434 break;
//根据hash模式选择对应的函数(通过改变函数指针pfring_zc_distribution_func)
705 if (hash_mode == 0 || ((hash_mode == 1 || hash_mode == 4) && num_apps == 1)) { /* balancer */
706 pfring_zc_distribution_func func = NULL;
707
708 switch (hash_mode) {
709 case 0: func = rr_distribution_func;
710 break;
711 case 1: if (strcmp(device, "sysdig") == 0) func = sysdig_distribution_func; else if (time_pulse) func = ip_distribution_func; /* else built-in IP-based */
712 break;
713 case 4: if (strcmp(device, "sysdig") == 0) func = sysdig_distribution_func; else func = gtp_distribution_func;
714 break;
715 }
716
/**
* 从pfring_zc.h 中我们可以找到pfring_zc_run_balancer的说明
* Run a balancer worker.
* @param in_queues The ingress queues handles array.
* @param out_queues The egress queues handles array.
* @param num_in_queues The number of ingress queues.
* @param num_out_queues The number of egress queues.
* @param working_set_pool The pool handle for working set buffers allocation. The worker uses 8 buffers in burst mode, 1 otherwise.
* @param recv_policy The receive policy.
* @param callback The function called when there is no incoming packet.
* @param func The distribution function, or NULL for the defualt IP-based distribution function.
* @param user_data The user data passed to distribution function.
* @param active_wait The flag indicating whether the worker should use active or passive wait for incoming packets.
* @param core_id_affinity The core affinity for the worker thread.
* @return The worker handle on success, NULL otherwise (errno is set appropriately).
*/
717 zw = pfring_zc_run_balancer(
718 inzqs,
719 outzqs,
720 num_devices,
721 num_consumer_queues,
722 wsp,
723 round_robin_bursts_policy,
724 NULL,
725 func, //负载均衡模式
726 (void *) ((long) num_consumer_queues),
727 !wait_for_packet,
728 bind_worker_core
729 );
731 } else { /* fanout */
732 pfring_zc_distribution_func func = NULL;
733
734 outzmq = pfring_zc_create_multi_queue(outzqs, num_consumer_queues);
735
736 if (outzmq == NULL) {
737 trace(TRACE_ERROR, "pfring_zc_create_multi_queue error [%s]\n", strerror(errno));
738 return -1;
739 }
740
741 switch (hash_mode) {
742 case 1: func = fo_multiapp_ip_distribution_func;
743 break;
744 case 2: if (time_pulse) func = fo_distribution_func; /* else built-in send-to-all */
745 break;
746 case 3: func = fo_rr_distribution_func;
745 break;
746 case 3: func = fo_rr_distribution_func;
747 break;
748 case 4: func = fo_multiapp_gtp_distribution_func;
749 break;
745 break;
746 case 3: func = fo_rr_distribution_func;
747 break;
748 case 4: func = fo_multiapp_gtp_distribution_func;
749 break;
750 }
751 /* 和pf_ring_zc_run_balancer参数说明类似
* @param func The distribution function, or NULL to send all the packets to all the egress queues.
* 当func参数为NULL时,流量默认复制到所有输出报文队列
*/
752 zw = pfring_zc_run_fanout(
753 inzqs,
754 outzmq,
755 num_devices,
756 wsp,
757 round_robin_bursts_policy,
758 NULL /* idle callback */,
759 func,
760 (void *) ((long) num_consumer_queues),
761 !wait_for_packet,
762 bind_worker_core
763 );
764
这里我们主要关注fan-out流量转发模式(-m 2),即流量会在不同的应用实例间复制。
case 2: if (time_pulse) func = fo_distribution_func; /* else built-in send-to-all */
当前网卡速度使用sar命令来显示: sar -n DEV 1
运行命令./zbalance_ipc -i zc:dna1 -c 21 -n 4 -m 2 -r 0:dummy0 -r 1:dummy1 -r 2:dummy2 -r 3:dummy3 -p
15/May/2016 12:00:40 [zbalance_ipc.c:537] Mapping egress queue 0 to device dummy0
15/May/2016 12:00:40 [zbalance_ipc.c:537] Mapping egress queue 1 to device dummy1
15/May/2016 12:00:40 [zbalance_ipc.c:537] Mapping egress queue 2 to device dummy2
15/May/2016 12:00:40 [zbalance_ipc.c:537] Mapping egress queue 3 to device dummy3
15/May/2016 12:00:43 [zbalance_ipc.c:683] Starting balancer with 4 consumer queues..
15/May/2016 12:00:43 [zbalance_ipc.c:693] Run your application instances as follows:
15/May/2016 12:00:43 [zbalance_ipc.c:700] dummy0
15/May/2016 12:00:43 [zbalance_ipc.c:700] dummy1
15/May/2016 12:00:43 [zbalance_ipc.c:700] dummy2
15/May/2016 12:00:43 [zbalance_ipc.c:700] dummy3
15/May/2016 12:00:44 [zbalance_ipc.c:151] =========================
15/May/2016 12:00:44 [zbalance_ipc.c:152] Absolute Stats: Recv 4 pkts (0 drops) - Forwarded 16 pkts (0 drops)
15/May/2016 12:00:44 [zbalance_ipc.c:191] zc:dna1 RX 4 pkts Dropped 0 pkts (0.0 %)
15/May/2016 12:00:44 [zbalance_ipc.c:205] Q 0 RX 0 pkts Dropped 0 pkts (0.0 %)
15/May/2016 12:00:44 [zbalance_ipc.c:205] Q 1 RX 0 pkts Dropped 0 pkts (0.0 %)
15/May/2016 12:00:44 [zbalance_ipc.c:205] Q 2 RX 0 pkts Dropped 0 pkts (0.0 %)
15/May/2016 12:00:44 [zbalance_ipc.c:205] Q 3 RX 0 pkts Dropped 0 pkts (0.0 %)
15/May/2016 12:00:44 [zbalance_ipc.c:234] =========================
15/May/2016 12:00:45 [zbalance_ipc.c:151] =========================
15/May/2016 12:00:45 [zbalance_ipc.c:152] Absolute Stats: Recv 7 pkts (0 drops) - Forwarded 28 pkts (0 drops)
15/May/2016 12:00:45 [zbalance_ipc.c:191] zc:dna1 RX 7 pkts Dropped 0 pkts (0.0 %)
15/May/2016 12:00:45 [zbalance_ipc.c:205] Q 0 RX 0 pkts Dropped 0 pkts (0.0 %)
15/May/2016 12:00:45 [zbalance_ipc.c:205] Q 1 RX 0 pkts Dropped 0 pkts (0.0 %)
15/May/2016 12:00:45 [zbalance_ipc.c:205] Q 2 RX 0 pkts Dropped 0 pkts (0.0 %)
15/May/2016 12:00:45 [zbalance_ipc.c:205] Q 3 RX 0 pkts Dropped 0 pkts (0.0 %)
15/May/2016 12:00:45 [zbalance_ipc.c:224] Actual Stats: Recv 3.00 pps (0.00 drops) - Forwarded 12.00 pps (0.00 drops)
15/May/2016 12:00:45 [zbalance_ipc.c:234] =========================
使用sar命令得到的结果:
[monster@monster ~]$ sar -n DEV 1
Linux 2.6.32-573.7.1.el6.x86_64 (monster) 05/15/2016 _x86_64_ (32 CPU)
12:00:51 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s
12:00:52 PM lo 32.32 32.32 3.48 3.48 0.00 0.00 0.00
12:00:52 PM em1 25.25 19.19 2.48 2.57 0.00 0.00 1.01
12:00:52 PM em2 0.00 0.00 0.00 0.00 0.00 0.00 0.00
12:00:52 PM em3 0.00 0.00 0.00 0.00 0.00 0.00 0.00
12:00:52 PM em4 0.00 0.00 0.00 0.00 0.00 0.00 0.00
12:00:52 PM dna0 0.00 0.00 0.00 0.00 0.00 0.00 0.00
12:00:52 PM dna1 0.00 0.00 0.00 0.00 0.00 0.00 2.02
12:00:52 PM dummy0 0.00 3.03 0.00 0.22 0.00 0.00 0.00
12:00:52 PM dummy1 0.00 3.03 0.00 0.22 0.00 0.00 0.00
12:00:52 PM dummy2 0.00 3.03 0.00 0.22 0.00 0.00 0.00
12:00:52 PM dummy3 0.00 3.03 0.00 0.22 0.00 0.00 0.00
12:00:52 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s
12:00:53 PM lo 32.00 32.00 3.45 3.45 0.00 0.00 0.00
12:00:53 PM em1 13.00 10.00 1.23 2.79 0.00 0.00 0.00
12:00:53 PM em2 0.00 0.00 0.00 0.00 0.00 0.00 0.00
12:00:53 PM em3 0.00 0.00 0.00 0.00 0.00 0.00 0.00
12:00:53 PM em4 0.00 0.00 0.00 0.00 0.00 0.00 0.00
12:00:53 PM dna0 0.00 0.00 0.00 0.00 0.00 0.00 0.00
12:00:53 PM dna1 6.00 0.00 0.49 0.00 0.00 0.00 2.00
12:00:53 PM dummy0 0.00 3.00 0.00 0.24 0.00 0.00 0.00
12:00:53 PM dummy1 0.00 3.00 0.00 0.24 0.00 0.00 0.00
12:00:53 PM dummy2 0.00 3.00 0.00 0.24 0.00 0.00 0.00
12:00:53 PM dummy3 0.00 3.00 0.00 0.24 0.00 0.00 0.00
12:00:53 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s
12:00:54 PM lo 32.32 32.32 3.48 3.48 0.00 0.00 0.00
12:00:54 PM em1 23.23 16.16 2.44 3.33 0.00 0.00 3.03
12:00:54 PM em2 0.00 0.00 0.00 0.00 0.00 0.00 0.00
12:00:54 PM em3 0.00 0.00 0.00 0.00 0.00 0.00 0.00
12:00:54 PM em4 0.00 0.00 0.00 0.00 0.00 0.00 0.00
12:00:54 PM dna0 0.00 0.00 0.00 0.00 0.00 0.00 0.00
12:00:54 PM dna1 0.00 0.00 0.00 0.00 0.00 0.00 3.03
12:00:54 PM dummy0 0.00 4.04 0.00 0.41 0.00 0.00 0.00
12:00:54 PM dummy1 0.00 4.04 0.00 0.41 0.00 0.00 0.00
12:00:54 PM dummy2 0.00 4.04 0.00 0.41 0.00 0.00 0.00
12:00:54 PM dummy3 0.00 4.04 0.00 0.41 0.00 0.00 0.00
TODO: