struct rte_mempool * rte_pktmbuf_pool_create(const char *name, unsigned int n,
unsigned int cache_size, uint16_t priv_size, uint16_t data_room_size, int socket_id)
①在socket_id对应的CPU上创建内存池,一个CPU最多能申请10G,超过此限制会申请失败
②n表示内存值中可以设置mbuf的个数
③内存池为每个locre分配mbuf 对mbuf结构体中对应的内容修改 在执行对mbuf的操作(发送/接收)
④cache_size必须要<= CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE(512) 和 n/1.5 设置为0表示禁用缓存
⑤priv_size通常设置为0
⑥data_room_size Mbuf数据区大小 包括headroom大小和数据大小 默认设置为(2048+128)大包情况下要设置为(8192+128)
函数原型
/* helper to create a mbuf pool */
struct rte_mempool *
rte_pktmbuf_pool_create(const char *name, unsigned int n,
unsigned int cache_size, uint16_t priv_size, uint16_t data_room_size,
int socket_id)
{
return rte_pktmbuf_pool_create_by_ops(name, n, cache_size, priv_size,
data_room_size, socket_id, NULL);
}
/* Helper to create a mbuf pool with given mempool ops name*/
struct rte_mempool *
rte_pktmbuf_pool_create_by_ops(const char *name, unsigned int n,
unsigned int cache_size, uint16_t priv_size, uint16_t data_room_size,
int socket_id, const char *ops_name)
{
struct rte_mempool *mp;
struct rte_pktmbuf_pool_private mbp_priv;
const char *mp_ops_name = ops_name;
unsigned elt_size;
int ret;
if (RTE_ALIGN(priv_size, RTE_MBUF_PRIV_ALIGN) != priv_size) {
RTE_LOG(ERR, MBUF, "mbuf priv_size=%u is not aligned\n",
priv_size);
rte_errno = EINVAL;
return NULL;
}
elt_size = sizeof(struct rte_mbuf) + (unsigned)priv_size +
(unsigned)data_room_size;
mbp_priv.mbuf_data_room_size = data_room_size;
mbp_priv.mbuf_priv_size = priv_size;
mp = rte_mempool_create_empty(name, n, elt_size, cache_size,
sizeof(struct rte_pktmbuf_pool_private), socket_id, 0);
if (mp == NULL)
return NULL;
if (mp_ops_name == NULL)
mp_ops_name = rte_mbuf_best_mempool_ops();
ret = rte_mempool_set_ops_byname(mp, mp_ops_name, NULL);
if (ret != 0) {
RTE_LOG(ERR, MBUF, "error setting mempool handler\n");
rte_mempool_free(mp);
rte_errno = -ret;
return NULL;
}
rte_pktmbuf_pool_init(mp, &mbp_priv);
ret = rte_mempool_populate_default(mp);
if (ret < 0) {
rte_mempool_free(mp);
rte_errno = -ret;
return NULL;
}
rte_mempool_obj_iter(mp, rte_pktmbuf_init, NULL);
return mp;
}
①返回设备中可用端口的数量
函数原型
uint16_t
rte_eth_dev_count_avail(void)
{
uint16_t p;
uint16_t count;
count = 0;
RTE_ETH_FOREACH_DEV(p)
count++;
return count;
}
rte_eth_macaddr_get(uint16_t port_id, struct ether_addr *mac_addr)
①获取设备端口对应的MAC地址(当I40E或IGB被DPDK托管后看不到MAC地址,可用此接口查看MAC地址)。
函数原型
void
rte_eth_macaddr_get(uint16_t port_id, struct ether_addr *mac_addr)
{
struct rte_eth_dev *dev;
RTE_ETH_VALID_PORTID_OR_RET(port_id);
dev = &rte_eth_devices[port_id];
ether_addr_copy(&dev->data->mac_addrs[0], mac_addr);
}
①获取设备对应端口的MTU和设置设备对应端口的MTU数值
函数原型
int
rte_eth_dev_get_mtu(uint16_t port_id, uint16_t *mtu)
{
struct rte_eth_dev *dev;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
dev = &rte_eth_devices[port_id];
*mtu = dev->data->mtu;
return 0;
}
int
rte_eth_dev_set_mtu(uint16_t port_id, uint16_t mtu)
{
int ret;
struct rte_eth_dev *dev;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
dev = &rte_eth_devices[port_id];
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mtu_set, -ENOTSUP);
ret = (*dev->dev_ops->mtu_set)(dev, mtu);
if (!ret)
dev->data->mtu = mtu;
return eth_err(port_id, ret);
}
①获取当前运行代码所在核lcoreID号
②获取当前运行代码所在核对应的CPU号(是CPU的物理序号、非CPU的逻辑核号)
函数原型
#define RTE_PER_LCORE(name) (per_lcore_##name)
static inline unsigned
rte_lcore_id(void)
{
return RTE_PER_LCORE(_lcore_id);
}
unsigned rte_socket_id(void)
{
return RTE_PER_LCORE(_socket_id);
}
①获取参数port端口对应的CPU号(是CPU的物理序号、非CPU的逻辑核号)
函数原型
struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
int
rte_eth_dev_socket_id(uint16_t port_id)
{
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -1);
return rte_eth_devices[port_id].data->numa_node;
}
①遍历所有的lcore从属核,除当前0核外的所有核
函数原型
#define RTE_MAX_LCORE 128
#define RTE_LCORE_FOREACH_SLAVE(i) \
for (i = rte_get_next_lcore(-1, 1, 0); \
i
static inline unsigned
rte_get_next_lcore(unsigned i, int skip_master, int wrap)
{
i++;
if (wrap)
i %= RTE_MAX_LCORE;
while (i < RTE_MAX_LCORE) {
if (!rte_lcore_is_enabled(i) ||
(skip_master && (i == rte_get_master_lcore()))) {
i++;
if (wrap)
i %= RTE_MAX_LCORE;
continue;
}
break;
}
return i;
}
①DPDK在slave_lcore上启动函数,该函数只能在主线程中调用。
②向处于等待状态的从属lcore(由slave_id标识)发送消息(在第一次调用rte_eal_init()后线程状态被设为WAIT)。可以通过调用rte_eal_wait_lcore(slave_id)来检查。当远程lcore收到消息时,它会切换到运行状态,然后使用参数arg调用函数f。执行完成后,远程lcore将切换到finished状态,并将f的返回值存储在本地变量中,以便使用rte_eal_wait_lcore()读取。
③实现方式:主lcore在消息发送后立即返回,且并不关心其执行情况。主线程与从属线程之间通过管道通信,从属线程一直处于WAIT状态,主线程设置从属线程的回调函数和参数后,通过管道向从属线程发送消息唤醒从属线程。
函数原型
/*
Send a message to a slave lcore identified by slave_id to call a function f with argument arg. Once the execution is done, the remote lcore switch in FINISHED state.
*/
int
rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
{
int n;
char c = 0;
int m2s = lcore_config[slave_id].pipe_master2slave[1];
int s2m = lcore_config[slave_id].pipe_slave2master[0];
if (lcore_config[slave_id].state != WAIT)
return -EBUSY;
lcore_config[slave_id].f = f;
lcore_config[slave_id].arg = arg;
/* send message */
n = 0;
while (n == 0 || (n < 0 && errno == EINTR))
n = write(m2s, &c, 1);
if (n < 0)
rte_panic("cannot write on configuration pipe\n");
/* wait ack */
do {
n = read(s2m, &c, 1);
} while (n < 0 && errno == EINTR);
if (n <= 0)
rte_panic("cannot read on configuration pipe\n");
return 0;
}
/**
Structure storing internal configuration (per-lcore)
*/
struct lcore_config {
unsigned detected; /**< true if lcore was detected */
pthread_t thread_id; /**< pthread identifier */
int pipe_master2slave[2]; /**< communication pipe with master */
int pipe_slave2master[2]; /**< communication pipe with master */
lcore_function_t * volatile f; /**< function to call */
void * volatile arg; /**< argument of function */
volatile int ret; /**< return value of function */
volatile enum rte_lcore_state_t state; /**< lcore state */
unsigned socket_id; /**< physical socket id for this lcore */
unsigned core_id; /**< core number on socket for this lcore */
int core_index; /**< relative index, starting from 0 */
rte_cpuset_t cpuset; /**< cpu set which the lcore affinity to */
uint8_t core_role; /**< role of core eg: OFF, RTE, SERVICE */
};
①获取线程ID
函数原型
/* require calling thread tid by gettid() */
int rte_sys_gettid(void)
{
long lwpid;
thr_self(&lwpid);
return (int)lwpid;
}
①在每个lcore中部署函数。检查每个从属lcore(SLAVE lcore)是否处于WAIT状态,然后为每个lcore调用rte_eal_remote_launch
函数原型
int
rte_eal_mp_remote_launch(int (*f)(void *), void *arg,
enum rte_rmt_call_master_t call_master)
{
int lcore_id;
int master = rte_get_master_lcore();
/* check state of lcores */
RTE_LCORE_FOREACH_SLAVE(lcore_id) {
if (lcore_config[lcore_id].state != WAIT)
return -EBUSY;
}
/* send messages to cores */
RTE_LCORE_FOREACH_SLAVE(lcore_id) {
rte_eal_remote_launch(f, arg, lcore_id);
}
if (call_master == CALL_MASTER) {
lcore_config[master].ret = f(arg);
lcore_config[master].state = FINISHED;
}
return 0;
}
①等待slave_id核完成任务,该函数只能在主核心上运行。
函数原型
int
rte_eal_wait_lcore(unsigned slave_id)
{
if (lcore_config[slave_id].state == WAIT)
return 0;
while (lcore_config[slave_id].state != WAIT &&
lcore_config[slave_id].state != FINISHED)
rte_pause();
rte_rmb();
/* we are in finished state, go to wait state */
lcore_config[slave_id].state = WAIT;
return lcore_config[slave_id].ret;
}
①等待所有核心完成任务。只能在主核心上运行,为每个核心执行rte_eal_wait_lcore函数,忽略返回值。
函数原型
void
rte_eal_mp_wait_lcore(void)
{
unsigned lcore_id;
RTE_LCORE_FOREACH_SLAVE(lcore_id) {
rte_eal_wait_lcore(lcore_id);
}
}
①在指定端口port_id的指定队列queue_id上发包。每次发包过程如下:
②从指定端口port_id的指定队列queue_id上收包。每次收包过程如下:
函数原型
int
rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
uint16_t nb_rx_desc, unsigned int socket_id,
const struct rte_eth_rxconf *rx_conf,
struct rte_mempool *mp)
{
int ret;
uint32_t mbp_buf_size;
struct rte_eth_dev *dev;
struct rte_eth_dev_info dev_info;
struct rte_eth_rxconf local_conf;
void **rxq;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
dev = &rte_eth_devices[port_id];
if (rx_queue_id >= dev->data->nb_rx_queues) {
RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", rx_queue_id);
return -EINVAL;
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_setup, -ENOTSUP);
/*
* Check the size of the mbuf data buffer.
* This value must be provided in the private data of the memory pool.
* First check that the memory pool has a valid private data.
*/
rte_eth_dev_info_get(port_id, &dev_info);
if (mp->private_data_size < sizeof(struct rte_pktmbuf_pool_private)) {
RTE_ETHDEV_LOG(ERR, "%s private_data_size %d < %d\n",
mp->name, (int)mp->private_data_size,
(int)sizeof(struct rte_pktmbuf_pool_private));
return -ENOSPC;
}
mbp_buf_size = rte_pktmbuf_data_room_size(mp);
if ((mbp_buf_size - RTE_PKTMBUF_HEADROOM) < dev_info.min_rx_bufsize) {
RTE_ETHDEV_LOG(ERR,
"%s mbuf_data_room_size %d < %d (RTE_PKTMBUF_HEADROOM=%d + min_rx_bufsize(dev)=%d)\n",
mp->name, (int)mbp_buf_size,
(int)(RTE_PKTMBUF_HEADROOM + dev_info.min_rx_bufsize),
(int)RTE_PKTMBUF_HEADROOM,
(int)dev_info.min_rx_bufsize);
return -EINVAL;
}
/* Use default specified by driver, if nb_rx_desc is zero */
if (nb_rx_desc == 0) {
nb_rx_desc = dev_info.default_rxportconf.ring_size;
/* If driver default is also zero, fall back on EAL default */
if (nb_rx_desc == 0)
nb_rx_desc = RTE_ETH_DEV_FALLBACK_RX_RINGSIZE;
}
if (nb_rx_desc > dev_info.rx_desc_lim.nb_max ||
nb_rx_desc < dev_info.rx_desc_lim.nb_min ||
nb_rx_desc % dev_info.rx_desc_lim.nb_align != 0) {
RTE_ETHDEV_LOG(ERR,
"Invalid value for nb_rx_desc(=%hu), should be: <= %hu, >= %hu, and a product of %hu\n",
nb_rx_desc, dev_info.rx_desc_lim.nb_max,
dev_info.rx_desc_lim.nb_min,
dev_info.rx_desc_lim.nb_align);
return -EINVAL;
}
if (dev->data->dev_started &&
!(dev_info.dev_capa &
RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP))
return -EBUSY;
if (dev->data->dev_started &&
(dev->data->rx_queue_state[rx_queue_id] !=
RTE_ETH_QUEUE_STATE_STOPPED))
return -EBUSY;
rxq = dev->data->rx_queues;
if (rxq[rx_queue_id]) {
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release,
-ENOTSUP);
(*dev->dev_ops->rx_queue_release)(rxq[rx_queue_id]);
rxq[rx_queue_id] = NULL;
}
if (rx_conf == NULL)
rx_conf = &dev_info.default_rxconf;
local_conf = *rx_conf;
/*
* If an offloading has already been enabled in
* rte_eth_dev_configure(), it has been enabled on all queues,
* so there is no need to enable it in this queue again.
* The local_conf.offloads input to underlying PMD only carries
* those offloadings which are only enabled on this queue and
* not enabled on all queues.
*/
local_conf.offloads &= ~dev->data->dev_conf.rxmode.offloads;
/*
* New added offloadings for this queue are those not enabled in
* rte_eth_dev_configure() and they must be per-queue type.
* A pure per-port offloading can't be enabled on a queue while
* disabled on another queue. A pure per-port offloading can't
* be enabled for any queue as new added one if it hasn't been
* enabled in rte_eth_dev_configure().
*/
if ((local_conf.offloads & dev_info.rx_queue_offload_capa) !=
local_conf.offloads) {
RTE_ETHDEV_LOG(ERR,
"Ethdev port_id=%d rx_queue_id=%d, new added offloads 0x%"PRIx64" must be "
"within per-queue offload capabilities 0x%"PRIx64" in %s()\n",
port_id, rx_queue_id, local_conf.offloads,
dev_info.rx_queue_offload_capa,
__func__);
return -EINVAL;
}
ret = (*dev->dev_ops->rx_queue_setup)(dev, rx_queue_id, nb_rx_desc,
socket_id, &local_conf, mp);
if (!ret) {
if (!dev->data->min_rx_buf_size ||
dev->data->min_rx_buf_size > mbp_buf_size)
dev->data->min_rx_buf_size = mbp_buf_size;
}
return eth_err(port_id, ret);
}
int
rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
uint16_t nb_tx_desc, unsigned int socket_id,
const struct rte_eth_txconf *tx_conf)
{
struct rte_eth_dev *dev;
struct rte_eth_dev_info dev_info;
struct rte_eth_txconf local_conf;
void **txq;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
dev = &rte_eth_devices[port_id];
if (tx_queue_id >= dev->data->nb_tx_queues) {
RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", tx_queue_id);
return -EINVAL;
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_setup, -ENOTSUP);
rte_eth_dev_info_get(port_id, &dev_info);
/* Use default specified by driver, if nb_tx_desc is zero */
if (nb_tx_desc == 0) {
nb_tx_desc = dev_info.default_txportconf.ring_size;
/* If driver default is zero, fall back on EAL default */
if (nb_tx_desc == 0)
nb_tx_desc = RTE_ETH_DEV_FALLBACK_TX_RINGSIZE;
}
if (nb_tx_desc > dev_info.tx_desc_lim.nb_max ||
nb_tx_desc < dev_info.tx_desc_lim.nb_min ||
nb_tx_desc % dev_info.tx_desc_lim.nb_align != 0) {
RTE_ETHDEV_LOG(ERR,
"Invalid value for nb_tx_desc(=%hu), should be: <= %hu, >= %hu, and a product of %hu\n",
nb_tx_desc, dev_info.tx_desc_lim.nb_max,
dev_info.tx_desc_lim.nb_min,
dev_info.tx_desc_lim.nb_align);
return -EINVAL;
}
if (dev->data->dev_started &&
!(dev_info.dev_capa &
RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP))
return -EBUSY;
if (dev->data->dev_started &&
(dev->data->tx_queue_state[tx_queue_id] !=
RTE_ETH_QUEUE_STATE_STOPPED))
return -EBUSY;
txq = dev->data->tx_queues;
if (txq[tx_queue_id]) {
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release,
-ENOTSUP);
(*dev->dev_ops->tx_queue_release)(txq[tx_queue_id]);
txq[tx_queue_id] = NULL;
}
if (tx_conf == NULL)
tx_conf = &dev_info.default_txconf;
local_conf = *tx_conf;
/*
* If an offloading has already been enabled in
* rte_eth_dev_configure(), it has been enabled on all queues,
* so there is no need to enable it in this queue again.
* The local_conf.offloads input to underlying PMD only carries
* those offloadings which are only enabled on this queue and
* not enabled on all queues.
*/
local_conf.offloads &= ~dev->data->dev_conf.txmode.offloads;
/*
* New added offloadings for this queue are those not enabled in
* rte_eth_dev_configure() and they must be per-queue type.
* A pure per-port offloading can't be enabled on a queue while
* disabled on another queue. A pure per-port offloading can't
* be enabled for any queue as new added one if it hasn't been
* enabled in rte_eth_dev_configure().
*/
if ((local_conf.offloads & dev_info.tx_queue_offload_capa) !=
local_conf.offloads) {
RTE_ETHDEV_LOG(ERR,
"Ethdev port_id=%d tx_queue_id=%d, new added offloads 0x%"PRIx64" must be "
"within per-queue offload capabilities 0x%"PRIx64" in %s()\n",
port_id, tx_queue_id, local_conf.offloads,
dev_info.tx_queue_offload_capa,
__func__);
return -EINVAL;
}
return eth_err(port_id, (*dev->dev_ops->tx_queue_setup)(dev,
tx_queue_id, nb_tx_desc, socket_id, &local_conf));
}