使用DPDK进行报文收发,网卡被DPDKPMD驱动接管,所以以前可以使用ethtool工具配置的网卡属性,现在都需要通过DPDK提供的API函数来进行配置。
《DPDK官方文档说明》中我们对DPDK官方提供的文档做了整体说明,今天来具体看一下DPDK提供的针对网卡各属性进行操作的API函数。
int rte_eth_dev_info_get(uint16_t port_id, struct rte_eth_dev_info *dev_info);
根据port_id获取对应网卡的速率双工配置,从struct rte_eth_dev_info的speed_capa字段获取,speed_capa是一个位图, 取值可能为:
#define RTE_ETH_LINK_SPEED_AUTONEG 0 /**< Autonegotiate (all speeds) */
#define ETH_LINK_SPEED_AUTONEG RTE_ETH_LINK_SPEED_AUTONEG
#define RTE_ETH_LINK_SPEED_FIXED RTE_BIT32(0) /**< Disable autoneg (fixed speed) */
#define ETH_LINK_SPEED_FIXED RTE_ETH_LINK_SPEED_FIXED
#define RTE_ETH_LINK_SPEED_10M_HD RTE_BIT32(1) /**< 10 Mbps half-duplex */
#define ETH_LINK_SPEED_10M_HD RTE_ETH_LINK_SPEED_10M_HD
#define RTE_ETH_LINK_SPEED_10M RTE_BIT32(2) /**< 10 Mbps full-duplex */
#define ETH_LINK_SPEED_10M RTE_ETH_LINK_SPEED_10M
#define RTE_ETH_LINK_SPEED_100M_HD RTE_BIT32(3) /**< 100 Mbps half-duplex */
#define ETH_LINK_SPEED_100M_HD RTE_ETH_LINK_SPEED_100M_HD
#define RTE_ETH_LINK_SPEED_100M RTE_BIT32(4) /**< 100 Mbps full-duplex */
#define ETH_LINK_SPEED_100M RTE_ETH_LINK_SPEED_100M
#define RTE_ETH_LINK_SPEED_1G RTE_BIT32(5) /**< 1 Gbps */
#define ETH_LINK_SPEED_1G RTE_ETH_LINK_SPEED_1G
#define RTE_ETH_LINK_SPEED_2_5G RTE_BIT32(6) /**< 2.5 Gbps */
#define ETH_LINK_SPEED_2_5G RTE_ETH_LINK_SPEED_2_5G
#define RTE_ETH_LINK_SPEED_5G RTE_BIT32(7) /**< 5 Gbps */
#define ETH_LINK_SPEED_5G RTE_ETH_LINK_SPEED_5G
#define RTE_ETH_LINK_SPEED_10G RTE_BIT32(8) /**< 10 Gbps */
#define ETH_LINK_SPEED_10G RTE_ETH_LINK_SPEED_10G
#define RTE_ETH_LINK_SPEED_20G RTE_BIT32(9) /**< 20 Gbps */
#define ETH_LINK_SPEED_20G RTE_ETH_LINK_SPEED_20G
#define RTE_ETH_LINK_SPEED_25G RTE_BIT32(10) /**< 25 Gbps */
#define ETH_LINK_SPEED_25G RTE_ETH_LINK_SPEED_25G
#define RTE_ETH_LINK_SPEED_40G RTE_BIT32(11) /**< 40 Gbps */
#define ETH_LINK_SPEED_40G RTE_ETH_LINK_SPEED_40G
#define RTE_ETH_LINK_SPEED_50G RTE_BIT32(12) /**< 50 Gbps */
#define ETH_LINK_SPEED_50G RTE_ETH_LINK_SPEED_50G
#define RTE_ETH_LINK_SPEED_56G RTE_BIT32(13) /**< 56 Gbps */
#define ETH_LINK_SPEED_56G RTE_ETH_LINK_SPEED_56G
#define RTE_ETH_LINK_SPEED_100G RTE_BIT32(14) /**< 100 Gbps */
#define ETH_LINK_SPEED_100G RTE_ETH_LINK_SPEED_100G
#define RTE_ETH_LINK_SPEED_200G RTE_BIT32(15) /**< 200 Gbps */
#define ETH_LINK_SPEED_200G RTE_ETH_LINK_SPEED_200G
2、获取网卡链路状态
根据port_id获取网卡的链路状态,包括网卡速率、双工状态、网卡协商状态和up/down状态。
int rte_eth_link_get(uint16_t port_id, struct rte_eth_link *link); //设定的时间内(9s) 内不断的轮询接口状态,当获取到一次 up 就立刻返回,或者当时间耗尽时仍旧为 down 则返回 down 的状态
int rte_eth_link_get_nowait(uint16_t port_id, struct rte_eth_link *link); //直接读取当前状态,不需要等待
struct rte_eth_link {
uint32_t link_speed; /**< RTE_ETH_SPEED_NUM_ */ //网卡当前速率
uint16_t link_duplex : 1; /**< RTE_ETH_LINK_[HALF/FULL]_DUPLEX */ //网卡半双工/全双工状态
uint16_t link_autoneg : 1; /**< RTE_ETH_LINK_[AUTONEG/FIXED] */
//网卡协商/固定速率
uint16_t link_status : 1; /**< RTE_ETH_LINK_[DOWN/UP] */ //网卡up/down状态
} __rte_aligned(8); /**< aligned for atomic64 read/write */
int rte_eth_dev_callback_register(uint16_t port_id,
enum rte_eth_event_type event,
rte_eth_dev_cb_fn cb_fn, void *cb_arg);
rte_eth_dev_callback_register函数注册网卡的链路状态事件,当rte_eth_event_type对应事件发生时,会触发调用cb_fn回调函数。
enum rte_eth_event_type {
RTE_ETH_EVENT_UNKNOWN, /**< unknown event type */
RTE_ETH_EVENT_INTR_LSC, /**< lsc interrupt event */ //网卡up/down中断事件
/** queue state event (enabled/disabled) */
RTE_ETH_EVENT_QUEUE_STATE, //队列启用/禁用事件
/** reset interrupt event, sent to VF on PF reset */
RTE_ETH_EVENT_INTR_RESET,
RTE_ETH_EVENT_VF_MBOX, /**< message from the VF received by PF */
RTE_ETH_EVENT_MACSEC, /**< MACsec offload related event */
RTE_ETH_EVENT_INTR_RMV, /**< device removal event */ //网卡移除事件
RTE_ETH_EVENT_NEW, /**< port is probed */
RTE_ETH_EVENT_DESTROY, /**< port is released */
RTE_ETH_EVENT_IPSEC, /**< IPsec offload related event */
RTE_ETH_EVENT_FLOW_AGED,/**< New aged-out flows is detected */
RTE_ETH_EVENT_MAX /**< max value of this enum */
};
当Rx队列长时间没有Rx数据包进入时,可以 休眠与Rx队列相关的cpu lcore,以节省电力,当Rx报文到达时再触发启用Rx中断进入轮询模式。具体示例可以参考l3fwd-power程序
int rte_eth_dev_rx_intr_enable(uint16_t port_id, uint16_t queue_id);
int rte_eth_dev_rx_intr_disable(uint16_t port_id, uint16_t queue_id);
如果网卡支持RTE_ETH_TX_OFFLOAD_MT_LOCKFREE属性,则多个线程可以在没有锁的情况下,在同一个Tx队列上并发地调用rte_eth_tx_burst()发送报文。
使用方法:先通过rte_eth_dev_info_get函数获取struct rte_eth_dev_info的tx_offload_capa和tx_queue_offload_capa字段,看是否支持RTE_ETH_TX_OFFLOAD_MT_LOCKFREE,如果支持,在rte_eth_dev_configure函数的struct rte_eth_conf参数的struct rte_eth_txmode结构体的offloads字段设置此属性,以及rte_eth_tx_queue_setup函数的struct rte_eth_txconf参数的offloads字段设置此属性
int rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_queue,
uint16_t nb_tx_queue, const struct rte_eth_conf *eth_conf);
int rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
uint16_t nb_tx_desc, unsigned int socket_id,
const struct rte_eth_txconf *tx_conf);
struct rte_eth_conf {
uint32_t link_speeds; /**< bitmap of RTE_ETH_LINK_SPEED_XXX of speeds to be
used. RTE_ETH_LINK_SPEED_FIXED disables link
autonegotiation, and a unique speed shall be
set. Otherwise, the bitmap defines the set of
speeds to be advertised. If the special value
RTE_ETH_LINK_SPEED_AUTONEG (0) is used, all speeds
supported are advertised. */
struct rte_eth_rxmode rxmode; /**< Port Rx configuration. */
struct rte_eth_txmode txmode; /**< Port Tx configuration. */
uint32_t lpbk_mode; /**< Loopback operation mode. By default the value
is 0, meaning the loopback mode is disabled.
Read the datasheet of given Ethernet controller
for details. The possible values of this field
are defined in implementation of each driver. */
struct {
struct rte_eth_rss_conf rss_conf; /**< Port RSS configuration */
/** Port VMDq+DCB configuration. */
struct rte_eth_vmdq_dcb_conf vmdq_dcb_conf;
/** Port DCB Rx configuration. */
struct rte_eth_dcb_rx_conf dcb_rx_conf;
/** Port VMDq Rx configuration. */
struct rte_eth_vmdq_rx_conf vmdq_rx_conf;
} rx_adv_conf; /**< Port Rx filtering configuration. */
union {
/** Port VMDq+DCB Tx configuration. */
struct rte_eth_vmdq_dcb_tx_conf vmdq_dcb_tx_conf;
/** Port DCB Tx configuration. */
struct rte_eth_dcb_tx_conf dcb_tx_conf;
/** Port VMDq Tx configuration. */
struct rte_eth_vmdq_tx_conf vmdq_tx_conf;
} tx_adv_conf; /**< Port Tx DCB configuration (union). */
/** Currently,Priority Flow Control(PFC) are supported,if DCB with PFC
is needed,and the variable must be set RTE_ETH_DCB_PFC_SUPPORT. */
uint32_t dcb_capability_en;
struct rte_eth_fdir_conf fdir_conf; /**< FDIR configuration. DEPRECATED */
struct rte_eth_intr_conf intr_conf; /**< Interrupt mode configuration. */
};
6、mbuf快速释放
支持在Tx成功后快速释放mbufs的优化。要求每个队列的所有mbufs都来自同一个内存池,并且refcnt = 1。
使用方法:同无锁Tx发送队列属性。先通过rte_eth_dev_info_get函数获取struct rte_eth_dev_info的tx_offload_capa和tx_queue_offload_capa字段,看是否支持RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE,如果支持,在rte_eth_dev_configure函数的struct rte_eth_conf参数的struct rte_eth_txmode结构体的offloads字段设置此属性,以及rte_eth_tx_queue_setup函数的struct rte_eth_txconf参数的offloads字段设置此属性。
请求驱动程序释放不再使用的mbufs,而不管tx_rs_thresh值是否已超过。
有两种情况会使得应用程序可能想要立即释放mbuf:
int rte_eth_tx_done_cleanup(uint16_t port_id, uint16_t queue_id, uint32_t free_cnt);
支持启动/停止端口的特定Rx/Tx队列。
int rte_eth_dev_rx_queue_start(uint16_t port_id, uint16_t rx_queue_id);
int rte_eth_dev_rx_queue_stop(uint16_t port_id, uint16_t rx_queue_id);
int rte_eth_dev_tx_queue_start(uint16_t port_id, uint16_t tx_queue_id);
int rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id);
获取和设置网卡的MTU。
int rte_eth_dev_get_mtu(uint16_t port_id, uint16_t *mtu); int rte_eth_dev_set_mtu(uint16_t port_id, uint16_t mtu);
LRO(Large Receive Offload)是一种网卡Offload技术,它是将网卡接收到的多个数据包合并成一个大的数据包,然后再传递给网络协议栈处理的技术。
使用方法: 同无锁Tx发送队列属性。先通过rte_eth_dev_info_get函数获取struct rte_eth_dev_info的rx_offload_capa和rx_queue_offload_capa字段,看是否支持RTE_ETH_RX_OFFLOAD_TCP_LRO,如果支持,在rte_eth_dev_configure函数的struct rte_eth_conf参数的struct rte_eth_rxmode结构体的offloads字段设置此属性,以及rte_eth_rx_queue_setup函数的struct rte_eth_rxconf参数的offloads字段设置此属性。
int
rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
uint16_t nb_rx_desc, unsigned int socket_id,
const struct rte_eth_rxconf *rx_conf,
struct rte_mempool *mp)
TSO(TCP Segmentation Offload) 是一种网卡Offload技术,它是一种利用网卡对大数据包进行分片,从而减小 CPU 负荷的一种技术。
使用方法: 同无锁Tx发送队列属性。先通过rte_eth_dev_info_get函数获取struct rte_eth_dev_info的tx_offload_capa和tx_queue_offload_capa字段,看是否支持RTE_ETH_TX_OFFLOAD_TCP_TSO,如果支持,在rte_eth_dev_configure函数的struct rte_eth_conf参数的struct rte_eth_txmode结构体的offloads字段设置此属性,以及rte_eth_tx_queue_setup函数的struct rte_eth_txconf参数的offloads字段设置此属性。
支持启用/禁用端口混杂模式。混杂模式(promiscuous mode)是指一台机器的网卡能够接收所有经过它的数据流,而不论其目的MAC地址是否是它。
int rte_eth_promiscuous_get(uint16_t port_id); //查询混杂模式状态
int rte_eth_promiscuous_enable(uint16_t port_id); //开启混杂模式
int rte_eth_promiscuous_disable(uint16_t port_id); //关闭混杂模式
支持启用/禁用接收组播帧。
int rte_eth_allmulticast_get(uint16_t port_id);
int rte_eth_allmulticast_enable(uint16_t port_id);
int rte_eth_allmulticast_disable(uint16_t port_id);
支持添加MAC地址,对报文进行入方向过滤。
int rte_eth_macaddr_get(uint16_t port_id, struct rte_ether_addr *mac_addr);
int rte_eth_dev_mac_addr_add(uint16_t port_id, struct rte_ether_addr *mac_addr,
uint32_t pool);
int rte_eth_dev_mac_addr_remove(uint16_t port_id,
struct rte_ether_addr *mac_addr);
int rte_eth_dev_default_mac_addr_set(uint16_t port_id,
struct rte_ether_addr *mac_addr);
支持设置组播地址过滤。
int rte_eth_dev_set_mc_addr_list(uint16_t port_id,
struct rte_ether_addr *mc_addr_set,
uint32_t nb_mc_addr); //设置以太网设备上要过滤的组播地址列表。
收包报文支持RSS(receive side scaling)哈希到不同队列。
使用方法:rte_eth_dev_configure函数的struct rte_eth_conf的参数指定rxmode.mq_mode = RTE_ETH_MQ_RX_RSS_FLAG,rx_adv_conf.rss_conf指定哈希key。例如:
static struct rte_eth_conf dev_rss_eth_conf =
{
.rxmode = {
.mq_mode = RTE_ETH_MQ_RX_RSS_FLAG,
.mtu = 1500,
.offloads = DEV_RX_OFFLOAD_CHECKSUM,
},
.rx_adv_conf = {
.rss_conf = {
.rss_key = seed,
.rss_key_len = sizeof(seed),
.rss_hf = ETH_RSS_IP | ETH_RSS_TCP | ETH_RSS_UDP,
},
}
}
支持RSS哈希计算的配置,更新RSS哈希key。
int
rte_eth_dev_rss_hash_conf_get(uint16_t port_id,
struct rte_eth_rss_conf *rss_conf); //查询RSS配置
int rte_eth_dev_rss_hash_update(uint16_t port_id,
struct rte_eth_rss_conf *rss_conf); //更新RSS配置
支持VLAN Tag标识符过滤。
用法: rte_eth_dev_configure函数配置网卡时,指定rte_eth_rxconf,rte_eth_rxmode: offloads:RTE_ETH_RX_OFFLOAD_VLAN_FILTER.
int rte_eth_dev_vlan_filter(uint16_t port_id, uint16_t vlan_id, int on);
//启动/禁用对指定的vlan过滤
支持VLAN卸载到硬件。
rte_eth_dev_info:
rx_offload_capa,rx_queue_offload_capa:RTE_ETH_RX_OFFLOAD_VLAN_STRIP,
tx_offload_capa,tx_queue_offload_capa:RTE_ETH_TX_OFFLOAD_VLAN_INSERT.
rte_eth_rxconf,rte_eth_rxmode: offloads:RTE_ETH_RX_OFFLOAD_VLAN_STRIP,RTE_ETH_RX_OFFLOAD_VLAN_FILTER,RTE_ETH_RX_OFFLOAD_VLAN_EXTEND.
rte_eth_txconf,rte_eth_txmode: offloads:RTE_ETH_TX_OFFLOAD_VLAN_INSERT.
int rte_eth_dev_set_vlan_offload(uint16_t port_id, int offload_mask); //配置网卡VLAN卸载
int rte_eth_dev_get_vlan_offload(uint16_t port_id); //读取网卡vlan卸载配置
支持QinQ卸载。
rte_eth_dev_info:
rx_offload_capa,rx_queue_offload_capa:RTE_ETH_RX_OFFLOAD_QINQ_STRIP
tx_offload_capa,tx_queue_offload_capa:RTE_ETH_TX_OFFLOAD_QINQ_INSERT
rte_eth_rxconf,rte_eth_rxmode: offloads:RTE_ETH_RX_OFFLOAD_QINQ_STRIP.
rte_eth_txconf,rte_eth_txmode: offloads:RTE_ETH_TX_OFFLOAD_QINQ_INSERT.
支持L3 checksum校验和卸载。
rte_eth_dev_info:
rx_offload_capa,rx_queue_offload_capa:RTE_ETH_RX_OFFLOAD_IPV4_CKSUM,
tx_offload_capa,tx_queue_offload_capa:RTE_ETH_TX_OFFLOAD_IPV4_CKSUM
rte_eth_rxconf,rte_eth_rxmode: offloads:RTE_ETH_RX_OFFLOAD_IPV4_CKSUM
rte_eth_txconf,rte_eth_txmode: offloads:RTE_ETH_TX_OFFLOAD_IPV4_CKSUM
支持L4 checksum校验和卸载。
rte_eth_dev_info:
rx_offload_capa,rx_queue_offload_capa:RTE_ETH_RX_OFFLOAD_UDP_CKSUM,RTE_ETH_RX_OFFLOAD_TCP_CKSUM,RTE_ETH_RX_OFFLOAD_SCTP_CKSUM
tx_offload_capa,tx_queue_offload_capa:RTE_ETH_TX_OFFLOAD_UDP_CKSUM,RTE_ETH_TX_OFFLOAD_TCP_CKSUM,RTE_ETH_TX_OFFLOAD_SCTP_CKSUM.
rte_eth_rxconf,rte_eth_rxmode: offloads:RTE_ETH_RX_OFFLOAD_UDP_CKSUM,RTE_ETH_RX_OFFLOAD_TCP_CKSUM,RTE_ETH_RX_OFFLOAD_SCTP_CKSUM
rte_eth_txconf,rte_eth_txmode: offloads:RTE_ETH_TX_OFFLOAD_UDP_CKSUM,RTE_ETH_TX_OFFLOAD_TCP_CKSUM,RTE_ETH_TX_OFFLOAD_SCTP_CKSUM
支持时间戳卸载。
rte_eth_dev_info:
rx_offload_capa,rx_queue_offload_capa: RTE_ETH_RX_OFFLOAD_TIMESTAMP
rte_eth_rxconf,rte_eth_rxmode: offloads:RTE_ETH_RX_OFFLOAD_TIMESTAMP
支持检查Rx描述符的状态。当使用rx_descriptor_status参数时,状态值为Available、Done或Unavailable。
static inline int rte_eth_rx_descriptor_status(uint16_t port_id, uint16_t queue_id, uint16_t offset)
支持检查Tx描述符的状态。状态可以是“已满”、“完成”或“不可用”。
static inline int rte_eth_tx_descriptor_status(uint16_t port_id, uint16_t queue_id, uint16_t offset)
支持基本统计,如: ippackets、opacpackets、ibytes、obytes、imissed、ierrors、oerrors、rx_nombuf。
每个队列统计: q_ipackets, q_opackets, q_ibytes, q_obytes, q_errors。
这些适用于所有网卡驱动。
int rte_eth_stats_get(uint16_t port_id, struct rte_eth_stats *stats); //获取网卡I/O统计信息
int rte_eth_stats_reset(uint16_t port_id); //清空网卡I/O统计信息
struct rte_eth_stats {
uint64_t ipackets; /**< Total number of successfully received packets. */
uint64_t opackets; /**< Total number of successfully transmitted packets.*/
uint64_t ibytes; /**< Total number of successfully received bytes. */
uint64_t obytes; /**< Total number of successfully transmitted bytes. */
/**
* Total of Rx packets dropped by the HW,
* because there are no available buffer (i.e. Rx queues are full).
*/
uint64_t imissed;
uint64_t ierrors; /**< Total number of erroneous received packets. */
uint64_t oerrors; /**< Total number of failed transmitted packets. */
uint64_t rx_nombuf; /**< Total number of Rx mbuf allocation failures. */
/* Queue stats are limited to max 256 queues */
/** Total number of queue Rx packets. */
uint64_t q_ipackets[RTE_ETHDEV_QUEUE_STAT_CNTRS];
/** Total number of queue Tx packets. */
uint64_t q_opackets[RTE_ETHDEV_QUEUE_STAT_CNTRS];
/** Total number of successfully received queue bytes. */
uint64_t q_ibytes[RTE_ETHDEV_QUEUE_STAT_CNTRS];
/** Total number of successfully transmitted queue bytes. */
uint64_t q_obytes[RTE_ETHDEV_QUEUE_STAT_CNTRS];
/** Total number of queue packets received that are dropped. */
uint64_t q_errors[RTE_ETHDEV_QUEUE_STAT_CNTRS];
};
支持I/O扩展统计。扩展的统计API允许每个独立的网卡导出一组唯一的统计信息。每个统计信息都有三个属性:
请注意,扩展统计信息标识符是驱动程序特定的,因此对于不同的端口可能不一样。API由各种rte_eth_xstats _*()函数组成,允许应用程序灵活地检索统计信息。
//获取I/O扩展统计信息
int rte_eth_xstats_get(uint16_t port_id, struct rte_eth_xstat *xstats,
unsigned int n);
//清空I/O扩展统计信息
int rte_eth_xstats_reset(uint16_t port_id);
int rte_eth_xstats_get_names(uint16_t port_id,
struct rte_eth_xstat_name *xstats_names,
unsigned int size);
int rte_eth_xstats_get_by_id(uint16_t port_id, const uint64_t *ids,
uint64_t *values, unsigned int size);
int
rte_eth_xstats_get_names_by_id(uint16_t port_id,
struct rte_eth_xstat_name *xstats_names, unsigned int size,
uint64_t *ids);
int rte_eth_xstats_get_id_by_name(uint16_t port_id, const char *xstat_name,
uint64_t *id);
struct rte_eth_xstat {
uint64_t id; /**< The index in xstats name array. */
uint64_t value; /**< The statistic counter value. */
};
支持获取设备硬件固件信息。
int rte_eth_dev_fw_version_get(uint16_t port_id, char *fw_version, size_t fw_size);
支持获取/设置设备eeprom数据。
int rte_eth_dev_get_eeprom_length(uint16_t port_id); //获取网卡eeprom大小
int rte_eth_dev_get_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info); //查询eeprom信息
int rte_eth_dev_set_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info);
//设置eeprom信息
支持获取模块eeprom的信息和数据。
int
rte_eth_dev_get_module_info(uint16_t port_id,
struct rte_eth_dev_module_info *modinfo);
int
rte_eth_dev_get_module_eeprom(uint16_t port_id,
struct rte_dev_eeprom_info *info);
支持查询寄存器和注册属性(寄存器数和寄存器大小)。
int rte_eth_dev_get_reg_info(uint16_t port_id, struct rte_dev_reg_info *info);
支持打开/关闭设备上的软件可控LED灯。
int rte_eth_led_on(uint16_t port_id); int rte_eth_led_off(uint16_t port_id);