dpdk-packet_ordering例程分析

dpdk-packet_ordering例程分析

  • `packet_ordering`功能
  • `packet_ordering`编译运行及结果
  • main.c源码分析
    • 1. 头文件引用及宏定义
    • 2. 全局变量或结构体定义
    • 3. get_last_core_id()函数
    • 4.get_previous_lcore_id(id) 函数
    • 5.pktmbuf_free_bulk()函数
    • 6.print_usage()函数
    • 7.语法分析函数
    • 8.configure_eth_port()函数
    • 9.输出状态
    • 10.线程控制相关int_handler
    • 11.接收数据包线程
    • 12.工作线程
    • 13.清空单个网口函数
    • 14.发送线程
    • 14.传输线程
    • 15.主函数main()

packet_ordering功能

调用DPDK的reorder库,将乱序接收到的报文变成顺序再发送出去,保证和进入负载均衡前的数据一致。
该例程需要三个线程(至少三个核心)且网卡数量应为偶数(至少两个)才能完成这一功能。
三个线程为rx_thread,worker_thread,tx_thread/send_thread
用到了两个环形队列rx_to_workers,workers_to_tx

线程 功能
rx_thread 遍历所有的port,将用户要求的port接收到的数据包进行排序并设置其seq,放入rx_to_workers环形队列。
worker_thread rx_to_workers环形队列中的数据包出队,放入workers_to_tx中。
send_thread 调用reorder库,将workers_to_tx出队的数据包进行顺序排列并将port^=1由配对的另外一个网卡发送出去。
tx_thread send_thread不同的是,它不对数据包进行重新排列,直接发送

packet_ordering编译运行及结果

待分析完再补充。

main.c源码分析

1. 头文件引用及宏定义

#include 
#include 

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

#define RX_DESC_PER_QUEUE 1024 //每个接收队列的元素个数
#define TX_DESC_PER_QUEUE 1024 //每个发送队列的元素个数

#define MAX_PKTS_BURST 32 //mbuf数组存放的最多数据包个数
#define REORDER_BUFFER_SIZE 8192 //顺序重组的最多数据包个数
#define MBUF_PER_POOL 65535 //?
#define MBUF_POOL_CACHE_SIZE 250 //?

#define RING_SIZE 16384 //环大小
/* Macros for printing using RTE_LOG */
#define RTE_LOGTYPE_REORDERAPP          RTE_LOGTYPE_USER1

2. 全局变量或结构体定义

unsigned int portmask;
unsigned int disable_reorder;
unsigned int insight_worker;
volatile uint8_t quit_signal;
//volatile 易变性,要求编译器不对这个变量进行编译优化
static struct rte_mempool *mbuf_pool;
static struct rte_eth_conf port_conf_default;
struct worker_thread_args {
     //工作线程参数
        struct rte_ring *ring_in;
        struct rte_ring *ring_out;
};

struct send_thread_args {
     
        struct rte_ring *ring_in;
        struct rte_reorder_buffer *buffer;
};
//__rte_cache_aligned 要求cache对齐
volatile struct app_stats {
     
        struct {
     
                uint64_t rx_pkts;
                uint64_t enqueue_pkts;
                uint64_t enqueue_failed_pkts;
        } rx __rte_cache_aligned;
//接收队列
        struct {
     
                uint64_t dequeue_pkts;
                uint64_t enqueue_pkts;
                uint64_t enqueue_failed_pkts;
        } wkr __rte_cache_aligned;

        struct {
     
                uint64_t dequeue_pkts;
                /* Too early pkts transmitted directly w/o reordering */
                uint64_t early_pkts_txtd_woro;
                /* Too early pkts failed from direct transmit */
                uint64_t early_pkts_tx_failed_woro;
                uint64_t ro_tx_pkts;
                uint64_t ro_tx_failed_pkts;
        } tx __rte_cache_aligned;
//传输
} app_stats;
/* per worker lcore stats */
struct wkr_stats_per {
     
                uint64_t deq_pkts;
                uint64_t enq_pkts;
                uint64_t enq_failed_pkts;
} __rte_cache_aligned;

static struct wkr_stats_per wkr_stats[RTE_MAX_LCORE] = {
      {
     0} };

3. get_last_core_id()函数

获取最大的可用核心标识符。

/**
 * Get the last enabled lcore ID
 *
 * @return
 *   The last enabled lcore ID.
 */
static unsigned int
get_last_lcore_id(void)
{
     
        int i;

        for (i = RTE_MAX_LCORE - 1; i >= 0; i--)
                if (rte_lcore_is_enabled(i))
                        return i;
        return 0;
}

4.get_previous_lcore_id(id) 函数

获取核心标识符id之前的一个最近的核心标识符。

/**
 * Get the previous enabled lcore ID
 * @param id
 *  The current lcore ID
 * @return
 *   The previous enabled lcore ID or the current lcore
 *   ID if it is the first available core.
 */
static unsigned int
get_previous_lcore_id(unsigned int id)
{
     
        int i;

        for (i = id - 1; i >= 0; i--)
                if (rte_lcore_is_enabled(i))
                        return i;
        return id;
}

5.pktmbuf_free_bulk()函数

用于将所有的mbuf内存空间都释放。

static inline void
pktmbuf_free_bulk(struct rte_mbuf *mbuf_table[], unsigned n)
{
     
        unsigned int i;

        for (i = 0; i < n; i++)
                rte_pktmbuf_free(mbuf_table[i]);
}

6.print_usage()函数

用于打印各个参数的用途。

/* display usage */
//打印用途
static void
print_usage(const char *prgname)
{
     
        printf("%s [EAL options] -- -p PORTMASK\n"
                        "  -p PORTMASK: hexadecimal bitmask of ports to configure\n",
                        prgname);
}

7.语法分析函数

对传入的portmask字符串(端口掩码?)进行语法分析。其中调用的strtoul函数。

strtoul()会将参数nptr字符串根据参数base来转换成无符号的长整型数。参数base范围从2至36,或0。参数base代表采用的进制方式,如base值为10则采用10进制,若base值为16则采用16进制数等。当base值为0时会根据情况选择用哪种进制:如果第一个字符是’0’,就判断第二字符如果是‘x’则用16进制,否则用8进制;第一个字符不是‘0’,则用10进制。一开始strtoul()会扫描参数nptr字符串,跳过前面的空格字符串,直到遇上数字或正负符号才开始做转换,再遇到非数字或字符串结束时(’’)结束转换,并将结果返回。若参数endptr不为NULL,则会将遇到不合条件而终止的nptr中的字符指针由endptr返回。
unsigned long int strtoul(const char *str, char **endptr, int base)
参数:

  • str – 要转换为无符号长整数的字符串。
  • endptr – 对类型为 char* 的对象的引用,其值由函数设置为 str 中数值后的下一个字符。
  • base – 基数,必须介于 2 和 36(包含)之间,或者是特殊值 0。
static int
parse_portmask(const char *portmask)
{
     
        unsigned long pm;
        char *end = NULL;
        /* parse hexadecimal string */
        pm = strtoul(portmask, &end, 16);
        //指从portmask中获取16进制的数
        if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
                return -1;
        if (pm == 0)
                return -1;
        return pm;
}

下面是对传入的命令行参数进行语法处理。其中调用了linux解析命令行选项的函数:
int getopt_long(int argc, char * const argv[],const char *optstring, const struct option *longopts,int *longindex);
其中optarg是指向参数的指针,在DPDK中省略了extern,其定义为:

extern char *optarg;  //选项的参数指针
/* Parse the argument given in the command line of the application */
static int
parse_args(int argc, char **argv)
{
     
        int opt;
        int option_index;
        char **argvopt;
        char *prgname = argv[0];
        static struct option lgopts[] = {
     
                {
     "disable-reorder", 0, 0, 0},
                {
     NULL, 0, 0, 0}
        };
        argvopt = argv;
        while ((opt = getopt_long(argc, argvopt, "p:",
                                        lgopts, &option_index)) != EOF) {
     
                switch (opt) {
     
                /* portmask */
                case 'p':
                //如果返回的值的是'p',说明找到了portmask的值
                        portmask = parse_portmask(optarg);
                        if (portmask == 0) {
     
                                printf("invalid portmask\n");
                                print_usage(prgname);
                                return -1;
                        }
                        break;
                /* long options */
                
                case 0:
                //如果是长选项,也就是"disable-reorder",返回对应的0
                        if (!strcmp(lgopts[option_index].name, "disable-reorder")) {
     
                                printf("reorder disabled\n");
                                disable_reorder = 1;
                        }
                        break;
                default:
                //说明返回的是错误的值
                        print_usage(prgname);
                        return -1;
                }
        }
        if (optind <= 1) {
     
                print_usage(prgname);
                return -1;
        }
        argv[optind-1] = prgname;
        optind = 0; /* reset getopt lib */
        return 0;
}

8.configure_eth_port()函数

其中调用了一些DPDK的库函数,对应的功能如下表。

函数 Description 参数 返回值
rte_eth_dev_configure() Configure an Ethernet device. This function must be invoked first before any other function in the Ethernet API. This function can also be re-invoked when a device is in the stopped state. port_id The port identifier of the Ethernet device to configure.
nb_rx_queue The number of receive queues to set up for the Ethernet device.
nb_tx_queue The number of transmit queues to set up for the Ethernet device.
eth_conf The pointer to the configuration data to be used for the Ethernet device.
0:success
<0:Error code
rte_eth_rx_queue_setup() Allocate and set up a receive queue for an Ethernet device. uint8_t port_id
uint16_t rx_queue_id
uint16_t nb_rx_desc
unsigned int socket_id
const struct rte_eth_rxconf * rx_conf
struct rte_mempool * mb_pool
0: Success, receive queue correctly set up.
-EINVAL: The size of network buffers which can be allocated from the memory pool does not fit the various buffer sizes allowed by the device controller.-ENOMEM: Unable to allocate the receive ring descriptors or to allocate network memory buffers from the memory pool when initializing receive descriptors.
rte_eth_tx_queue_setup() Allocate and set up a transmit queue for an Ethernet device. the same as below the same as below
rte_eth_dev_start() Start an Ethernet device. uint8_t port_id 0:success
<0:error code
rte_eth_macaddr_get() 获取对应port的mac地址 uint8_t port_id
struct ether_addr addr
void

查看该配置以太网卡的函数即可知,其顺序为

  1. 配置以太网设备
  2. 配置接收&传输队列
  3. 启动设备
  4. 设置数据混杂模式
static inline int
configure_eth_port(uint8_t port_id)
{
     
        struct ether_addr addr;
        const uint16_t rxRings = 1, txRings = 1;
        const uint8_t nb_ports = rte_eth_dev_count();
        //rte_eth_dev_count()
        int ret;
        uint16_t q;
        if (port_id > nb_ports)
                return -1;
        ret = rte_eth_dev_configure(port_id, rxRings, txRings, &port_conf_default);
        //配置网口
        if (ret != 0)
                return ret;
        for (q = 0; q < rxRings; q++) {
     
                ret = rte_eth_rx_queue_setup(port_id, q, RX_DESC_PER_QUEUE,
                                rte_eth_dev_socket_id(port_id), NULL,
                                mbuf_pool);
                if (ret < 0)
                        return ret;
        }
        for (q = 0; q < txRings; q++) {
     
                ret = rte_eth_tx_queue_setup(port_id, q, TX_DESC_PER_QUEUE,
                                rte_eth_dev_socket_id(port_id), NULL);
                if (ret < 0)
                        return ret;
        }
        ret = rte_eth_dev_start(port_id);
        if (ret < 0)
                return ret;
        rte_eth_macaddr_get(port_id, &addr);
        printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
                        " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
                        (unsigned)port_id,
                        addr.addr_bytes[0], addr.addr_bytes[1],
                        addr.addr_bytes[2], addr.addr_bytes[3],
                        addr.addr_bytes[4], addr.addr_bytes[5]);
        rte_eth_promiscuous_enable(port_id);
        return 0;
}

9.输出状态

static void
print_stats(void)
{
     
        const uint8_t nb_ports = rte_eth_dev_count();
        unsigned i;
        struct rte_eth_stats eth_stats;
        printf("\nRX thread stats:\n");
        printf(" - Pkts rxd:                            %"PRIu64"\n",
                                                app_stats.rx.rx_pkts);
        printf(" - Pkts enqd to workers ring:           %"PRIu64"\n",
                                                app_stats.rx.enqueue_pkts);
        printf("\nWorker thread stats:\n");
        printf(" - Pkts deqd from workers ring:         %"PRIu64"\n",
                                                app_stats.wkr.dequeue_pkts);
        printf(" - Pkts enqd to tx ring:                %"PRIu64"\n",
                                                app_stats.wkr.enqueue_pkts);
        printf(" - Pkts enq to tx failed:               %"PRIu64"\n",
                                                app_stats.wkr.enqueue_failed_pkts);
        printf("\nTX stats:\n");
        printf(" - Pkts deqd from tx ring:              %"PRIu64"\n",
                                                app_stats.tx.dequeue_pkts);
        printf(" - Ro Pkts transmitted:                 %"PRIu64"\n",
                                                app_stats.tx.ro_tx_pkts);
        printf(" - Ro Pkts tx failed:                   %"PRIu64"\n",
                                                app_stats.tx.ro_tx_failed_pkts);
        printf(" - Pkts transmitted w/o reorder:        %"PRIu64"\n",
                                                app_stats.tx.early_pkts_txtd_woro);
        printf(" - Pkts tx failed w/o reorder:          %"PRIu64"\n",
                                                app_stats.tx.early_pkts_tx_failed_woro);
        for (i = 0; i < nb_ports; i++) {
     
                rte_eth_stats_get(i, &eth_stats);
                printf("\nPort %u stats:\n", i);
                printf(" - Pkts in:   %"PRIu64"\n", eth_stats.ipackets);
                printf(" - Pkts out:  %"PRIu64"\n", eth_stats.opackets);
                printf(" - In Errs:   %"PRIu64"\n", eth_stats.ierrors);
                printf(" - Out Errs:  %"PRIu64"\n", eth_stats.oerrors);
                printf(" - Mbuf Errs: %"PRIu64"\n", eth_stats.rx_nombuf);
        }
}

10.线程控制相关int_handler

static void
int_handler(int sig_num)
{
     
        printf("Exiting on signal %d\n", sig_num);
        quit_signal = 1;
}

11.接收数据包线程

注意该函数的参数为struct rte_ring *ring_out,在main函数中传输的实参为rx_to_workers
根据portmask来确定接收哪个网卡的数据包,收取这些数据包并将其顺序规定为动态增加的seqn(如下)。

/* mark sequence number */
for (i = 0; i < nb_rx_pkts; )
		pkts[i++]->seqn = seqn++;
		//记录序列

利用rte_eth_rx_burst函数获取对应port_id捕捉到的数据包,其数量是该函数的返回值nb_rx_pkts。对于因内存无法入队列的数据包内存释放,实现错误处理。

static int
rx_thread(struct rte_ring *ring_out)
{
     
        const uint8_t nb_ports = rte_eth_dev_count();
        uint32_t seqn = 0;
        uint16_t i, ret = 0;
        uint16_t nb_rx_pkts;
        uint8_t port_id;
        struct rte_mbuf *pkts[MAX_PKTS_BURST];
        RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__,
                                                        rte_lcore_id());
        while (!quit_signal) {
     
                for (port_id = 0; port_id < nb_ports; port_id++) {
     
                        if ((portmask & (1 << port_id)) != 0) {
     
                                /* receive packets */
                                nb_rx_pkts = rte_eth_rx_burst(port_id, 0,
                                                                pkts, MAX_PKTS_BURST);
                                if (nb_rx_pkts == 0) {
     
                                        LOG_DEBUG(REORDERAPP,
                                        "%s():Received zero packets\n", __func__);
                                        continue;
                                }
                                app_stats.rx.rx_pkts += nb_rx_pkts;
                                /* mark sequence number */
                                for (i = 0; i < nb_rx_pkts; )
                                        pkts[i++]->seqn = seqn++;
                                /* enqueue to rx_to_workers ring */
                                ret = rte_ring_enqueue_burst(ring_out, (void *) pkts,
                                                                nb_rx_pkts);
                                app_stats.rx.enqueue_pkts += ret;
                                if (unlikely(ret < nb_rx_pkts)) {
     
                                        app_stats.rx.enqueue_failed_pkts +=
                                                                        (nb_rx_pkts-ret);
                                        pktmbuf_free_bulk(&pkts[ret], nb_rx_pkts - ret);
                                }
                        }
                }
        }
        return 0;
}

12.工作线程

有些函数的阐释如下:

函数 描述
__sync_fetch_and_add 提供多线程下变量的加减和逻辑运算的原子操作
rte_ring_dequeue_burst 从队列中取出数据包
rte_ring_enqueue_burst 将数据包进队

该线程的功能为:将ring_in (rx_to_workers) 队列中的数据包取出,放到ring_out (workers_to_tx) 中。

static int
worker_thread(void *args_ptr)
{
     
        const uint8_t nb_ports = rte_eth_dev_count();//获取设备数为nb_ports
        uint16_t i, ret = 0;//中间变量
        uint16_t burst_size = 0;
        struct worker_thread_args *args;//工作线程的参数
        struct rte_mbuf *burst_buffer[MAX_PKTS_BURST] = {
      NULL };//线程内存放数据包指针
        struct rte_ring *ring_in, *ring_out;//队列指针
        const unsigned xor_val = (nb_ports > 1); //异或位运算?
        args = (struct worker_thread_args *) args_ptr;
        ring_in  = args->ring_in;
        ring_out = args->ring_out;
        RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__,
                                                        rte_lcore_id());
        while (!quit_signal) {
     
                /* dequeue the mbufs from rx_to_workers ring */
                burst_size = rte_ring_dequeue_burst(ring_in,
                                (void *)burst_buffer, MAX_PKTS_BURST);
                if (unlikely(burst_size == 0))
                        continue;
                __sync_fetch_and_add(&app_stats.wkr.dequeue_pkts, burst_size);
                /* just do some operation on mbuf */
                for (i = 0; i < burst_size;)
                        burst_buffer[i++]->port ^= xor_val;
                        //根据异或(XOR)运算,将port的位置调换
                        //当dpdk的网卡数量为1时,xor_val = 0,port与0异或仍为port
                        //当dpdk的网卡数量大于1时,xor_val = 1,port为奇数时,port = port - 1
                        //									port为偶数时,port = port + 1
                        //									也就是两两一组互换
                /* enqueue the modified mbufs to workers_to_tx ring */
                ret = rte_ring_enqueue_burst(ring_out, (void *)burst_buffer, burst_size);
                __sync_fetch_and_add(&app_stats.wkr.enqueue_pkts, ret);
                //更新app_stats中的进队数据包数量
                if (unlikely(ret < burst_size)) {
     
                //进行异常处理,当获取到的数据包没有全部进队时执行此操作
                        /* Return the mbufs to their respective pool, dropping packets */
                        __sync_fetch_and_add(&app_stats.wkr.enqueue_failed_pkts,
                                        (int)burst_size - ret);
                        pktmbuf_free_bulk(&burst_buffer[ret], burst_size - ret);
                }
        }
        return 0;
}

13.清空单个网口函数

该函数的功能是,用网口为outp发送数据包,发送数据包的函数为rte_eth_tx_burst

static inline void
flush_one_port(struct output_buffer *outbuf, uint8_t outp)
{
     
        unsigned nb_tx = rte_eth_tx_burst(outp, 0, outbuf->mbufs,
                        outbuf->count);
        app_stats.tx.ro_tx_pkts += nb_tx;
        if (unlikely(nb_tx < outbuf->count)) {
     
                /* free the mbufs which failed from transmit */
                app_stats.tx.ro_tx_failed_pkts += (outbuf->count - nb_tx);
                LOG_DEBUG(REORDERAPP, "%s:Packet loss with tx_burst\n", __func__);
                pktmbuf_free_bulk(&outbuf->mbufs[nb_tx], outbuf->count - nb_tx);
        }
        outbuf->count = 0;
}

14.发送线程

相关函数在说明文档中的功能介绍如下:

函数 描述 返回值
int rte_reorder_insert(
struct rte_reorder_buffer * b,
struct rte_mbuf * mbuf)
向已经排好序的b数组中插入mbuf,该mbuf中必须包含一个seqn序列号,用于将该mbuf放置在正确的位置 0:Success
-1:Error and reset rte_errno
unsigned int rte_reorder_drain(
struct rte_reorder_buffer * b,
struct rte_mbuf ** mbufs,
unsigned max_mbufs)
获取重新排序的缓冲区
从重排序结构b中返回一组有序缓冲区mbufs,如果数据包在到达重新排序之前已经被延迟很久,或者先前已经被丢弃,则mbuf的序列号可能不连续。
number of mbuf pointers written to mbufs.
0 <= N< max_mbufs
static int
send_thread(struct send_thread_args *args)
{
     
        int ret;
        unsigned int i, dret;
        uint16_t nb_dq_mbufs;
        uint8_t outp;
        static struct output_buffer tx_buffers[RTE_MAX_ETHPORTS];
        struct rte_mbuf *mbufs[MAX_PKTS_BURST];
        struct rte_mbuf *rombufs[MAX_PKTS_BURST] = {
     NULL}; //reorder-mbufs
        RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__, rte_lcore_id());
        while (!quit_signal) {
     
                /* deque the mbufs from workers_to_tx ring */
                nb_dq_mbufs = rte_ring_dequeue_burst(args->ring_in,
                                (void *)mbufs, MAX_PKTS_BURST);
                if (unlikely(nb_dq_mbufs == 0))
                        continue;
                app_stats.tx.dequeue_pkts += nb_dq_mbufs;
                for (i = 0; i < nb_dq_mbufs; i++) {
     
                        /* send dequeued mbufs for reordering */
                        ret = rte_reorder_insert(args->buffer, mbufs[i]);
                        if (ret == -1 && rte_errno == ERANGE) {
     
                        //ERANGE表示超出范围
                                /* Too early pkts should be transmitted out directly */
                                LOG_DEBUG(REORDERAPP, "%s():Cannot reorder early packet "
                                                "direct enqueuing to TX\n", __func__);
                                outp = mbufs[i]->port;
                                if ((portmask & (1 << outp)) == 0) {
     
                                //如果不是用户要求的Port则直接释放
                                        rte_pktmbuf_free(mbufs[i]);
                                        continue;
                                }
                                if (rte_eth_tx_burst(outp, 0, (void *)mbufs[i], 1) != 1) {
     
                                //如果发送失败则直接释放,并将app_stats发送失败的数据包数量增1
                                        rte_pktmbuf_free(mbufs[i]);
                                        app_stats.tx.early_pkts_tx_failed_woro++;
                                } else
                                        app_stats.tx.early_pkts_txtd_woro++;
                                        //发送成功则将app_stats发送成功的数据包数量增1
                        } else if (ret == -1 && rte_errno == ENOSPC) {
     
                                rte_pktmbuf_free(mbufs[i]);
                        }
                }
                /*
                 * drain MAX_PKTS_BURST of reordered
                 * mbufs for transmit
                 */
                dret = rte_reorder_drain(args->buffer, rombufs, MAX_PKTS_BURST);
                for (i = 0; i < dret; i++) {
     
                        struct output_buffer *outbuf;
                        uint8_t outp1;
                        outp1 = rombufs[i]->port;
                        /* skip ports that are not enabled */
                        if ((portmask & (1 << outp1)) == 0) {
     
                                rte_pktmbuf_free(rombufs[i]);
                                continue;
                        }
                        outbuf = &tx_buffers[outp1];
                        outbuf->mbufs[outbuf->count++] = rombufs[i];
                        //将重组好的数据包内存空间挂在对应port的tx_buffers中
                        if (outbuf->count == MAX_PKTS_BURST)
                                flush_one_port(outbuf, outp1);
                                //只有当数据包数量达到MAX_PKTS_BURST时,才要求对应port发送这些数据包
                }
        }
        return 0;
}

14.传输线程

对于不需要排序的要求,该函数仅将workers_to_tx环队列中的数据包出队并由各个网卡发送。与13相比,仅失去了重新排序的功能。

static int
tx_thread(struct rte_ring *ring_in)
{
     
        uint32_t i, dqnum;
        uint8_t outp;
        static struct output_buffer tx_buffers[RTE_MAX_ETHPORTS];
        struct rte_mbuf *mbufs[MAX_PKTS_BURST];
        struct output_buffer *outbuf;
        RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__,
                                                        rte_lcore_id());
        while (!quit_signal) {
     
                /* deque the mbufs from workers_to_tx ring */
                dqnum = rte_ring_dequeue_burst(ring_in,
                                (void *)mbufs, MAX_PKTS_BURST);
                if (unlikely(dqnum == 0))
                        continue;
                app_stats.tx.dequeue_pkts += dqnum;
                for (i = 0; i < dqnum; i++) {
     
                        outp = mbufs[i]->port;
                        /* skip ports that are not enabled */
                        if ((portmask & (1 << outp)) == 0) {
     
                                rte_pktmbuf_free(mbufs[i]);
                                continue;
                        }
                        outbuf = &tx_buffers[outp];
                        outbuf->mbufs[outbuf->count++] = mbufs[i];
                        if (outbuf->count == MAX_PKTS_BURST)
                                flush_one_port(outbuf, outp);
                }
        }
        return 0;
}

15.主函数main()

int
main(int argc, char **argv)
{
     
        int ret;
        unsigned nb_ports;
        unsigned int lcore_id, last_lcore_id, master_lcore_id;
        uint8_t port_id;
        uint8_t nb_ports_available;
        struct worker_thread_args worker_args = {
     NULL, NULL};
        struct send_thread_args send_args = {
     NULL, NULL};
        struct rte_ring *rx_to_workers;
        struct rte_ring *workers_to_tx;
        /* catch ctrl-c so we can print on exit */
        signal(SIGINT, int_handler);
        /* Initialize EAL */
        ret = rte_eal_init(argc, argv);
        if (ret < 0)
                return -1;
        argc -= ret;
        argv += ret;
        /* Parse the application specific arguments */
        ret = parse_args(argc, argv);
        if (ret < 0)
                return -1;
        /* Check if we have enought cores */
        if (rte_lcore_count() < 3)
                rte_exit(EXIT_FAILURE, "Error, This application needs at "
                                "least 3 logical cores to run:\n"
                                "1 lcore for packet RX\n"
                                "1 lcore for packet TX\n"
                                "and at least 1 lcore for worker threads\n");
        nb_ports = rte_eth_dev_count();
        if (nb_ports == 0)
                rte_exit(EXIT_FAILURE, "Error: no ethernet ports detected\n");
        if (nb_ports != 1 && (nb_ports & 1))
                rte_exit(EXIT_FAILURE, "Error: number of ports must be even, except "
                                "when using a single port\n");
                                //奇数个网卡,不可以执行此程序
        mbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", MBUF_PER_POOL,
                        MBUF_POOL_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
                        rte_socket_id());
                        //创建内存缓冲池
        if (mbuf_pool == NULL)
                rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
        nb_ports_available = nb_ports;
        /* initialize all ports */
        for (port_id = 0; port_id < nb_ports; port_id++) {
     
                /* skip ports that are not enabled */
                if ((portmask & (1 << port_id)) == 0) {
     
                        printf("\nSkipping disabled port %d\n", port_id);
                        nb_ports_available--;
                        continue;
                }
                /* init port */
                printf("Initializing port %u... done\n", (unsigned) port_id);
                if (configure_eth_port(port_id) != 0)
                        rte_exit(EXIT_FAILURE, "Cannot initialize port %"PRIu8"\n",
                                        port_id);
        }
        if (!nb_ports_available) {
     
                rte_exit(EXIT_FAILURE,
                        "All available ports are disabled. Please set portmask.\n");
        }
        /* Create rings for inter core communication */
        rx_to_workers = rte_ring_create("rx_to_workers", RING_SIZE, rte_socket_id(),
                        RING_F_SP_ENQ);
                        //The default enqueue is "single-producer".
        if (rx_to_workers == NULL)
                rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
        workers_to_tx = rte_ring_create("workers_to_tx", RING_SIZE, rte_socket_id(),
                        RING_F_SC_DEQ);
                        //The default dequeue is "single-consumer".
        if (workers_to_tx == NULL)
                rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
        if (!disable_reorder) {
     
                send_args.buffer = rte_reorder_create("PKT_RO", rte_socket_id(),
                                REORDER_BUFFER_SIZE);
                if (send_args.buffer == NULL)
                        rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
        }
        last_lcore_id   = get_last_lcore_id();
        master_lcore_id = rte_get_master_lcore();
        worker_args.ring_in  = rx_to_workers;
        worker_args.ring_out = workers_to_tx;
        /* Start worker_thread() on all the available slave cores but the last 1 */
        for (lcore_id = 0; lcore_id <= get_previous_lcore_id(last_lcore_id); lcore_id++)
                if (rte_lcore_is_enabled(lcore_id) && lcore_id != master_lcore_id)
                        rte_eal_remote_launch(worker_thread, (void *)&worker_args,
                                        lcore_id);
        if (disable_reorder) {
     
                /* Start tx_thread() on the last slave core */
                rte_eal_remote_launch((lcore_function_t *)tx_thread, workers_to_tx,
                                last_lcore_id);
        } else {
     
                send_args.ring_in = workers_to_tx;
                /* Start send_thread() on the last slave core */
                rte_eal_remote_launch((lcore_function_t *)send_thread,
                                (void *)&send_args, last_lcore_id);
        }
        /* Start rx_thread() on the master core */
        rx_thread(rx_to_workers);
        RTE_LCORE_FOREACH_SLAVE(lcore_id) {
     
                if (rte_eal_wait_lcore(lcore_id) < 0)
                        return -1;
        }
        print_stats();
        return 0;
}

你可能感兴趣的:(dpdk,编程)