DPDK总结之常用API

文章目录

    • 一、库函数
      • 1.1 rte_pktmbuf_pool_create
      • 1.2 rte_eth_dev_count_avail
      • 1.3 rte_eth_macaddr_get
      • 1.4 rte_eth_dev_get_mtu与rte_eth_dev_set_mtu
      • 1.5 rte_lcore_id与rte_socket_id
      • 1.6 rte_eth_dev_socket_id
      • 1.7 RTE_LCORE_FOREACH_SLAVE
      • 1.8 rte_eal_remote_launch
      • 1.7 rte_sys_gettid
      • 1.8 rte_eal_mp_remote_launch
      • 1.9 rte_eal_wait_lcore
      • 1.10 rte_eal_mp_wait_lcore
      • 1.11 rte_eth_tx_queue_setup与
      • 1.12 rte_lcore_id

一、库函数

1.1 rte_pktmbuf_pool_create

struct rte_mempool * rte_pktmbuf_pool_create(const char *name, unsigned int n,
unsigned int cache_size, uint16_t priv_size, uint16_t data_room_size, int socket_id)

①在socket_id对应的CPU上创建内存池,一个CPU最多能申请10G,超过此限制会申请失败
②n表示内存值中可以设置mbuf的个数
③内存池为每个locre分配mbuf 对mbuf结构体中对应的内容修改 在执行对mbuf的操作(发送/接收)
④cache_size必须要<= CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE(512) 和 n/1.5 设置为0表示禁用缓存
⑤priv_size通常设置为0
⑥data_room_size Mbuf数据区大小 包括headroom大小和数据大小 默认设置为(2048+128)大包情况下要设置为(8192+128)

函数原型

/* helper to create a mbuf pool */
struct rte_mempool *
rte_pktmbuf_pool_create(const char *name, unsigned int n,
	unsigned int cache_size, uint16_t priv_size, uint16_t data_room_size,
	int socket_id)
{
	return rte_pktmbuf_pool_create_by_ops(name, n, cache_size, priv_size,
			data_room_size, socket_id, NULL);
}
/* Helper to create a mbuf pool with given mempool ops name*/
struct rte_mempool *
rte_pktmbuf_pool_create_by_ops(const char *name, unsigned int n,
	unsigned int cache_size, uint16_t priv_size, uint16_t data_room_size,
	int socket_id, const char *ops_name)
{
	struct rte_mempool *mp;
	struct rte_pktmbuf_pool_private mbp_priv;
	const char *mp_ops_name = ops_name;
	unsigned elt_size;
	int ret;

	if (RTE_ALIGN(priv_size, RTE_MBUF_PRIV_ALIGN) != priv_size) {
		RTE_LOG(ERR, MBUF, "mbuf priv_size=%u is not aligned\n",
			priv_size);
		rte_errno = EINVAL;
		return NULL;
	}
	elt_size = sizeof(struct rte_mbuf) + (unsigned)priv_size +
		(unsigned)data_room_size;
	mbp_priv.mbuf_data_room_size = data_room_size;
	mbp_priv.mbuf_priv_size = priv_size;
	
	mp = rte_mempool_create_empty(name, n, elt_size, cache_size,
		 sizeof(struct rte_pktmbuf_pool_private), socket_id, 0);
	if (mp == NULL)
		return NULL;
	
	if (mp_ops_name == NULL)
		mp_ops_name = rte_mbuf_best_mempool_ops();
	ret = rte_mempool_set_ops_byname(mp, mp_ops_name, NULL);
	if (ret != 0) {
		RTE_LOG(ERR, MBUF, "error setting mempool handler\n");
		rte_mempool_free(mp);
		rte_errno = -ret;
		return NULL;
	}
	rte_pktmbuf_pool_init(mp, &mbp_priv);
	
	ret = rte_mempool_populate_default(mp);
	if (ret < 0) {
		rte_mempool_free(mp);
		rte_errno = -ret;
		return NULL;
	}
	
	rte_mempool_obj_iter(mp, rte_pktmbuf_init, NULL);
	
	return mp;

}

1.2 rte_eth_dev_count_avail

①返回设备中可用端口的数量

函数原型

uint16_t
rte_eth_dev_count_avail(void)
{
	uint16_t p;
	uint16_t count;
	count = 0;	
	RTE_ETH_FOREACH_DEV(p)
		count++;	
	return count;
}

1.3 rte_eth_macaddr_get

rte_eth_macaddr_get(uint16_t port_id, struct ether_addr *mac_addr)

①获取设备端口对应的MAC地址(当I40E或IGB被DPDK托管后看不到MAC地址,可用此接口查看MAC地址)。

函数原型

void
rte_eth_macaddr_get(uint16_t port_id, struct ether_addr *mac_addr)
{
	struct rte_eth_dev *dev;

	RTE_ETH_VALID_PORTID_OR_RET(port_id);
	dev = &rte_eth_devices[port_id];
	ether_addr_copy(&dev->data->mac_addrs[0], mac_addr);

}

1.4 rte_eth_dev_get_mtu与rte_eth_dev_set_mtu

①获取设备对应端口的MTU和设置设备对应端口的MTU数值

函数原型

int
rte_eth_dev_get_mtu(uint16_t port_id, uint16_t *mtu)
{
	struct rte_eth_dev *dev;

	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
	
	dev = &rte_eth_devices[port_id];
	*mtu = dev->data->mtu;
	return 0;

}

int
rte_eth_dev_set_mtu(uint16_t port_id, uint16_t mtu)
{
	int ret;
	struct rte_eth_dev *dev;

	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
	dev = &rte_eth_devices[port_id];
	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mtu_set, -ENOTSUP);
	
	ret = (*dev->dev_ops->mtu_set)(dev, mtu);
	if (!ret)
		dev->data->mtu = mtu;
	
	return eth_err(port_id, ret);

}

1.5 rte_lcore_id与rte_socket_id

①获取当前运行代码所在核lcoreID号

②获取当前运行代码所在核对应的CPU号(是CPU的物理序号、非CPU的逻辑核号)

函数原型

#define RTE_PER_LCORE(name) (per_lcore_##name)

static inline unsigned
rte_lcore_id(void)
{
	return RTE_PER_LCORE(_lcore_id);
}

unsigned rte_socket_id(void)
{
	return RTE_PER_LCORE(_socket_id);
}

1.6 rte_eth_dev_socket_id

①获取参数port端口对应的CPU号(是CPU的物理序号、非CPU的逻辑核号)

函数原型

struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];

int
rte_eth_dev_socket_id(uint16_t port_id)
{
	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -1);
	return rte_eth_devices[port_id].data->numa_node;
}

1.7 RTE_LCORE_FOREACH_SLAVE

①遍历所有的lcore从属核,除当前0核外的所有核

函数原型

#define RTE_MAX_LCORE 128
#define RTE_LCORE_FOREACH_SLAVE(i)					\
	for (i = rte_get_next_lcore(-1, 1, 0);				\
	     i

static inline unsigned
rte_get_next_lcore(unsigned i, int skip_master, int wrap)
{
	i++;
	if (wrap)
		i %= RTE_MAX_LCORE;

	while (i < RTE_MAX_LCORE) {
		if (!rte_lcore_is_enabled(i) ||
		    (skip_master && (i == rte_get_master_lcore()))) {
			i++;
			if (wrap)
				i %= RTE_MAX_LCORE;
			continue;
		}
		break;
	}
	return i;

}

1.8 rte_eal_remote_launch

①DPDK在slave_lcore上启动函数,该函数只能在主线程中调用。

②向处于等待状态的从属lcore(由slave_id标识)发送消息(在第一次调用rte_eal_init()后线程状态被设为WAIT)。可以通过调用rte_eal_wait_lcore(slave_id)来检查。当远程lcore收到消息时,它会切换到运行状态,然后使用参数arg调用函数f。执行完成后,远程lcore将切换到finished状态,并将f的返回值存储在本地变量中,以便使用rte_eal_wait_lcore()读取。

③实现方式:主lcore在消息发送后立即返回,且并不关心其执行情况。主线程与从属线程之间通过管道通信,从属线程一直处于WAIT状态,主线程设置从属线程的回调函数和参数后,通过管道向从属线程发送消息唤醒从属线程。

函数原型

/*

Send a message to a slave lcore identified by slave_id to call a function f with argument arg. Once the execution is done, the remote lcore switch in FINISHED state.
*/
int
rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
{
int n;
char c = 0;
int m2s = lcore_config[slave_id].pipe_master2slave[1];
int s2m = lcore_config[slave_id].pipe_slave2master[0];

if (lcore_config[slave_id].state != WAIT)
	return -EBUSY;

lcore_config[slave_id].f = f;
lcore_config[slave_id].arg = arg;

/* send message */
n = 0;
while (n == 0 || (n < 0 && errno == EINTR))
	n = write(m2s, &c, 1);
if (n < 0)
	rte_panic("cannot write on configuration pipe\n");

/* wait ack */
do {
	n = read(s2m, &c, 1);
} while (n < 0 && errno == EINTR);

if (n <= 0)
	rte_panic("cannot read on configuration pipe\n");

return 0;
}
/**

Structure storing internal configuration (per-lcore)
*/
struct lcore_config {
unsigned detected;         /**< true if lcore was detected */
pthread_t thread_id;       /**< pthread identifier */
int pipe_master2slave[2];  /**< communication pipe with master */
int pipe_slave2master[2];  /**< communication pipe with master */
lcore_function_t * volatile f;         /**< function to call */
void * volatile arg;       /**< argument of function */
volatile int ret;          /**< return value of function */
volatile enum rte_lcore_state_t state; /**< lcore state */
unsigned socket_id;        /**< physical socket id for this lcore */
unsigned core_id;          /**< core number on socket for this lcore */
int core_index;            /**< relative index, starting from 0 */
rte_cpuset_t cpuset;       /**< cpu set which the lcore affinity to */
uint8_t core_role;         /**< role of core eg: OFF, RTE, SERVICE */
};

1.7 rte_sys_gettid

①获取线程ID

函数原型

/* require calling thread tid by gettid() */
int rte_sys_gettid(void)
{
	long lwpid;
	thr_self(&lwpid);
	return (int)lwpid;
}

1.8 rte_eal_mp_remote_launch

①在每个lcore中部署函数。检查每个从属lcore(SLAVE lcore)是否处于WAIT状态,然后为每个lcore调用rte_eal_remote_launch

函数原型

int
rte_eal_mp_remote_launch(int (*f)(void *), void *arg,
			 enum rte_rmt_call_master_t call_master)
{
	int lcore_id;
	int master = rte_get_master_lcore();

	/* check state of lcores */
	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
		if (lcore_config[lcore_id].state != WAIT)
			return -EBUSY;
	}
	
	/* send messages to cores */
	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
		rte_eal_remote_launch(f, arg, lcore_id);
	}
	
	if (call_master == CALL_MASTER) {
		lcore_config[master].ret = f(arg);
		lcore_config[master].state = FINISHED;
	}
	
	return 0;

}

1.9 rte_eal_wait_lcore

①等待slave_id核完成任务,该函数只能在主核心上运行。

函数原型

int
rte_eal_wait_lcore(unsigned slave_id)
{
	if (lcore_config[slave_id].state == WAIT)
		return 0;

	while (lcore_config[slave_id].state != WAIT &&
	       lcore_config[slave_id].state != FINISHED)
		rte_pause();
	
	rte_rmb();
	
	/* we are in finished state, go to wait state */
	lcore_config[slave_id].state = WAIT;
	return lcore_config[slave_id].ret;

}

1.10 rte_eal_mp_wait_lcore

①等待所有核心完成任务。只能在主核心上运行,为每个核心执行rte_eal_wait_lcore函数,忽略返回值。

函数原型

void
rte_eal_mp_wait_lcore(void)
{
	unsigned lcore_id;

	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
		rte_eal_wait_lcore(lcore_id);
	}
}

1.11 rte_eth_tx_queue_setup与

①在指定端口port_id的指定队列queue_id上发包。每次发包过程如下:

  • 在发送环中选取一个可用的描述符。
  • 释放先前使用该描述符发送的网络缓冲区(如果有)。
  • 使用* rte_mbuf数据结构中提供的信息初始化发送描述符。

②从指定端口port_id的指定队列queue_id上收包。每次收包过程如下:

  • 根据NIC提供给该接收描述符的信息,初始化与接收描述符相关联的* rte_mbuf *数据结构。
  • 将* rte_mbuf 数据结构存储到 rx_pkts *数组的下一个条目中。
  • 在初始化时从与接收队列关联的内存池分配的新* rte_mbuf *缓冲区补充接收描述符。

函数原型

int
rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
		       uint16_t nb_rx_desc, unsigned int socket_id,
		       const struct rte_eth_rxconf *rx_conf,
		       struct rte_mempool *mp)
{
	int ret;
	uint32_t mbp_buf_size;
	struct rte_eth_dev *dev;
	struct rte_eth_dev_info dev_info;
	struct rte_eth_rxconf local_conf;
	void **rxq;

	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
	
	dev = &rte_eth_devices[port_id];
	if (rx_queue_id >= dev->data->nb_rx_queues) {
		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", rx_queue_id);
		return -EINVAL;
	}
	
	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_setup, -ENOTSUP);
	
	/*
	 * Check the size of the mbuf data buffer.
	 * This value must be provided in the private data of the memory pool.
	 * First check that the memory pool has a valid private data.
	 */
	rte_eth_dev_info_get(port_id, &dev_info);
	if (mp->private_data_size < sizeof(struct rte_pktmbuf_pool_private)) {
		RTE_ETHDEV_LOG(ERR, "%s private_data_size %d < %d\n",
			mp->name, (int)mp->private_data_size,
			(int)sizeof(struct rte_pktmbuf_pool_private));
		return -ENOSPC;
	}
	mbp_buf_size = rte_pktmbuf_data_room_size(mp);
	
	if ((mbp_buf_size - RTE_PKTMBUF_HEADROOM) < dev_info.min_rx_bufsize) {
		RTE_ETHDEV_LOG(ERR,
			"%s mbuf_data_room_size %d < %d (RTE_PKTMBUF_HEADROOM=%d + min_rx_bufsize(dev)=%d)\n",
			mp->name, (int)mbp_buf_size,
			(int)(RTE_PKTMBUF_HEADROOM + dev_info.min_rx_bufsize),
			(int)RTE_PKTMBUF_HEADROOM,
			(int)dev_info.min_rx_bufsize);
		return -EINVAL;
	}
	
	/* Use default specified by driver, if nb_rx_desc is zero */
	if (nb_rx_desc == 0) {
		nb_rx_desc = dev_info.default_rxportconf.ring_size;
		/* If driver default is also zero, fall back on EAL default */
		if (nb_rx_desc == 0)
			nb_rx_desc = RTE_ETH_DEV_FALLBACK_RX_RINGSIZE;
	}
	
	if (nb_rx_desc > dev_info.rx_desc_lim.nb_max ||
			nb_rx_desc < dev_info.rx_desc_lim.nb_min ||
			nb_rx_desc % dev_info.rx_desc_lim.nb_align != 0) {
	
		RTE_ETHDEV_LOG(ERR,
			"Invalid value for nb_rx_desc(=%hu), should be: <= %hu, >= %hu, and a product of %hu\n",
			nb_rx_desc, dev_info.rx_desc_lim.nb_max,
			dev_info.rx_desc_lim.nb_min,
			dev_info.rx_desc_lim.nb_align);
		return -EINVAL;
	}
	
	if (dev->data->dev_started &&
		!(dev_info.dev_capa &
			RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP))
		return -EBUSY;
	
	if (dev->data->dev_started &&
		(dev->data->rx_queue_state[rx_queue_id] !=
			RTE_ETH_QUEUE_STATE_STOPPED))
		return -EBUSY;
	
	rxq = dev->data->rx_queues;
	if (rxq[rx_queue_id]) {
		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release,
					-ENOTSUP);
		(*dev->dev_ops->rx_queue_release)(rxq[rx_queue_id]);
		rxq[rx_queue_id] = NULL;
	}
	
	if (rx_conf == NULL)
		rx_conf = &dev_info.default_rxconf;
	
	local_conf = *rx_conf;
	
	/*
	 * If an offloading has already been enabled in
	 * rte_eth_dev_configure(), it has been enabled on all queues,
	 * so there is no need to enable it in this queue again.
	 * The local_conf.offloads input to underlying PMD only carries
	 * those offloadings which are only enabled on this queue and
	 * not enabled on all queues.
	 */
	local_conf.offloads &= ~dev->data->dev_conf.rxmode.offloads;
	
	/*
	 * New added offloadings for this queue are those not enabled in
	 * rte_eth_dev_configure() and they must be per-queue type.
	 * A pure per-port offloading can't be enabled on a queue while
	 * disabled on another queue. A pure per-port offloading can't
	 * be enabled for any queue as new added one if it hasn't been
	 * enabled in rte_eth_dev_configure().
	 */
	if ((local_conf.offloads & dev_info.rx_queue_offload_capa) !=
	     local_conf.offloads) {
		RTE_ETHDEV_LOG(ERR,
			"Ethdev port_id=%d rx_queue_id=%d, new added offloads 0x%"PRIx64" must be "
			"within per-queue offload capabilities 0x%"PRIx64" in %s()\n",
			port_id, rx_queue_id, local_conf.offloads,
			dev_info.rx_queue_offload_capa,
			__func__);
		return -EINVAL;
	}
	
	ret = (*dev->dev_ops->rx_queue_setup)(dev, rx_queue_id, nb_rx_desc,
					      socket_id, &local_conf, mp);
	if (!ret) {
		if (!dev->data->min_rx_buf_size ||
		    dev->data->min_rx_buf_size > mbp_buf_size)
			dev->data->min_rx_buf_size = mbp_buf_size;
	}
	
	return eth_err(port_id, ret);

}

int
rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
		       uint16_t nb_tx_desc, unsigned int socket_id,
		       const struct rte_eth_txconf *tx_conf)
{
	struct rte_eth_dev *dev;
	struct rte_eth_dev_info dev_info;
	struct rte_eth_txconf local_conf;
	void **txq;

	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
	
	dev = &rte_eth_devices[port_id];
	if (tx_queue_id >= dev->data->nb_tx_queues) {
		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", tx_queue_id);
		return -EINVAL;
	}
	
	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_setup, -ENOTSUP);
	
	rte_eth_dev_info_get(port_id, &dev_info);
	
	/* Use default specified by driver, if nb_tx_desc is zero */
	if (nb_tx_desc == 0) {
		nb_tx_desc = dev_info.default_txportconf.ring_size;
		/* If driver default is zero, fall back on EAL default */
		if (nb_tx_desc == 0)
			nb_tx_desc = RTE_ETH_DEV_FALLBACK_TX_RINGSIZE;
	}
	if (nb_tx_desc > dev_info.tx_desc_lim.nb_max ||
	    nb_tx_desc < dev_info.tx_desc_lim.nb_min ||
	    nb_tx_desc % dev_info.tx_desc_lim.nb_align != 0) {
		RTE_ETHDEV_LOG(ERR,
			"Invalid value for nb_tx_desc(=%hu), should be: <= %hu, >= %hu, and a product of %hu\n",
			nb_tx_desc, dev_info.tx_desc_lim.nb_max,
			dev_info.tx_desc_lim.nb_min,
			dev_info.tx_desc_lim.nb_align);
		return -EINVAL;
	}
	
	if (dev->data->dev_started &&
		!(dev_info.dev_capa &
			RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP))
		return -EBUSY;
	
	if (dev->data->dev_started &&
		(dev->data->tx_queue_state[tx_queue_id] !=
			RTE_ETH_QUEUE_STATE_STOPPED))
		return -EBUSY;
	
	txq = dev->data->tx_queues;
	if (txq[tx_queue_id]) {
		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release,
					-ENOTSUP);
		(*dev->dev_ops->tx_queue_release)(txq[tx_queue_id]);
		txq[tx_queue_id] = NULL;
	}
	
	if (tx_conf == NULL)
		tx_conf = &dev_info.default_txconf;
	
	local_conf = *tx_conf;
	
	/*
	 * If an offloading has already been enabled in
	 * rte_eth_dev_configure(), it has been enabled on all queues,
	 * so there is no need to enable it in this queue again.
	 * The local_conf.offloads input to underlying PMD only carries
	 * those offloadings which are only enabled on this queue and
	 * not enabled on all queues.
	 */
	local_conf.offloads &= ~dev->data->dev_conf.txmode.offloads;
	
	/*
	 * New added offloadings for this queue are those not enabled in
	 * rte_eth_dev_configure() and they must be per-queue type.
	 * A pure per-port offloading can't be enabled on a queue while
	 * disabled on another queue. A pure per-port offloading can't
	 * be enabled for any queue as new added one if it hasn't been
	 * enabled in rte_eth_dev_configure().
	 */
	if ((local_conf.offloads & dev_info.tx_queue_offload_capa) !=
	     local_conf.offloads) {
		RTE_ETHDEV_LOG(ERR,
			"Ethdev port_id=%d tx_queue_id=%d, new added offloads 0x%"PRIx64" must be "
			"within per-queue offload capabilities 0x%"PRIx64" in %s()\n",
			port_id, tx_queue_id, local_conf.offloads,
			dev_info.tx_queue_offload_capa,
			__func__);
		return -EINVAL;
	}
	
	return eth_err(port_id, (*dev->dev_ops->tx_queue_setup)(dev,
		       tx_queue_id, nb_tx_desc, socket_id, &local_conf));

}

1.12 rte_lcore_id

你可能感兴趣的:(DPDK)