目录
1 dpdk 配置
2 抓包原理
3 源码解析
在使用dpdk-pdump之前,需要设置配置参数,在CONFIG中的common_base中分别设置CONFIG_RTE_LIBRTE_PMD_PCAP=y和CONFIG_RTE_LIBRTE_PDUMP=y。
配置环境变量:在 ~/.bashrc中加入如下两行:
export RTE_SDK=dpdk_dir
export RTE_TARGET=x86_64_native_linuxapp_gcc
dpdk-pdump
使用时,作为secondary进程依附于primary进程。primary进程中启动server端,初始化pdump抓包框架任务;dpdk-pdump
进程是作为client端向primary进程发送开始/停止抓包请求,然后primary进程拷贝一份数据包到ring中,secondary进程从ring中读取出来,并保存为pcap文件。因此,可以看出在primary进程中需要初始化pdump server
下面重点讲解源码部分:
其中主函数的主要代码如下:
/* parse app arguments */
if (argc > 1) {
ret = launch_args_parse(argc, argv, argp[0]);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Invalid argument\n");
}
/* create mempool, ring and vdevs info */
create_mp_ring_vdev();
enable_pdump();
dump_packets();
cleanup_pdump_resources();
/* dump debug stats */
print_pdump_stats();
其中 launch_args_parse()函数主要是解析参数,如果用法为:
./dpdk-pdump -- --pdump 'port=0,queue=*,rx-dev=/tmp/capture.pcap'
则将其中的port,queue,rx-dev 分别传输函数 parse_uint_value(), parse_queue(),parse_rxtxdev(),最终将所有的参数分别设置到 struct pdump tuples中。
然后使用create_mp_ring_vdev()创建mempool, rte_ring,vdev:
mbuf_pool = rte_mempool_lookup(mempool_name);
if (mbuf_pool == NULL) {
/* create mempool */
mbuf_pool = rte_pktmbuf_pool_create(mempool_name,
pt->total_num_mbufs,
MBUF_POOL_CACHE_SIZE, 0,
pt->mbuf_data_size,
rte_socket_id());
if (mbuf_pool == NULL) {
cleanup_rings();
rte_exit(EXIT_FAILURE,
"Mempool creation failed: %s\n",
rte_strerror(rte_errno));
}
}
后面分别根据传入的参数,创建rx_ring,tx_ring。其中比较重要的函数是rte_eth_dev_attach()
if (rte_eth_dev_attach(vdev_args, &portid) < 0) {
cleanup_rings();
rte_exit(EXIT_FAILURE,
"vdev creation failed:%s:%d\n",
__func__, __LINE__);
}
其中传入的参数 vdev_args:net_pcap_rx_0, tx_iface=/tmp/capture.pcap
其中rte_eth_dev_attach()主要内容如下:
/* parse devargs, then retrieve device name and args */
if (rte_eal_parse_devargs_str(devargs, &name, &args))
goto err;
ret = rte_eal_dev_attach(name, args);
if (ret < 0)
goto err;
/* no point looking at the port count if no port exists */
if (!rte_eth_dev_count()) {
RTE_LOG(ERR, EAL, "No port found for device (%s)\n", name);
ret = -1;
goto err;
}
/* if nothing happened, there is a bug here, since some driver told us
* it did attach a device, but did not create a port.
*/
if (current == rte_eth_dev_count()) {
ret = -1;
goto err;
}
*port_id = eth_dev_last_created_port;
ret = 0;
上面代码主要中rte_eal_parse_devargs_str()函数主要是提取参数中的后面部分,即tx_iface=/tmp/capture.pcap, 传入rte_eal_dev_attach():
int rte_eal_dev_attach(const char *name, const char *devargs)
{
struct rte_pci_addr addr;
if (name == NULL || devargs == NULL) {
RTE_LOG(ERR, EAL, "Invalid device or arguments provided\n");
return -EINVAL;
}
if (eal_parse_pci_DomBDF(name, &addr) == 0) {
if (rte_eal_pci_probe_one(&addr) < 0)
goto err;
} else {
if (rte_eal_vdev_init(name, devargs))
goto err;
}
return 0;
这主要是创建虚拟设备。
下面看下主函数中另一个函数enable_pdump()
static void
enable_pdump(void)
{
int i;
struct pdump_tuples *pt;
int ret = 0, ret1 = 0;
// 此处可以指定server socket路径,可以使用默认值
if (server_socket_path[0] != 0)
ret = rte_pdump_set_socket_dir(server_socket_path,
RTE_PDUMP_SOCKET_SERVER);
if (ret == 0 && client_socket_path[0] != 0) {
ret = rte_pdump_set_socket_dir(client_socket_path,
RTE_PDUMP_SOCKET_CLIENT);
}
if (ret < 0) {
cleanup_pdump_resources();
rte_exit(EXIT_FAILURE,
"failed to set socket paths of server:%s, "
"client:%s\n",
server_socket_path,
client_socket_path);
}
for (i = 0; i < num_tuples; i++) {
pt = &pdump_t[i];
if (pt->dir == RTE_PDUMP_FLAG_RXTX) {
if (pt->dump_by_type == DEVICE_ID) {
ret = rte_pdump_enable_by_deviceid(
pt->device_id,
pt->queue,
RTE_PDUMP_FLAG_RX,
pt->rx_ring,
pt->mp, NULL);
ret1 = rte_pdump_enable_by_deviceid(
pt->device_id,
pt->queue,
RTE_PDUMP_FLAG_TX,
pt->tx_ring,
pt->mp, NULL);
} else if (pt->dump_by_type == PORT_ID) {
ret = rte_pdump_enable(pt->port, pt->queue,
RTE_PDUMP_FLAG_RX,
pt->rx_ring, pt->mp, NULL);
ret1 = rte_pdump_enable(pt->port, pt->queue,
RTE_PDUMP_FLAG_TX,
pt->tx_ring, pt->mp, NULL);
}
} else if (pt->dir == RTE_PDUMP_FLAG_RX) {
if (pt->dump_by_type == DEVICE_ID)
ret = rte_pdump_enable_by_deviceid(
pt->device_id,
pt->queue,
pt->dir, pt->rx_ring,
pt->mp, NULL);
else if (pt->dump_by_type == PORT_ID)
ret = rte_pdump_enable(pt->port, pt->queue,
pt->dir,
pt->rx_ring, pt->mp, NULL);
} else if (pt->dir == RTE_PDUMP_FLAG_TX) {
if (pt->dump_by_type == DEVICE_ID)
ret = rte_pdump_enable_by_deviceid(
pt->device_id,
pt->queue,
pt->dir,
pt->tx_ring, pt->mp, NULL);
else if (pt->dump_by_type == PORT_ID)
ret = rte_pdump_enable(pt->port, pt->queue,
pt->dir,
pt->tx_ring, pt->mp, NULL);
}
if (ret < 0 || ret1 < 0) {
cleanup_pdump_resources();
rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
}
}
}
其中比较重要的函数是rte_pdump_enable_by_deviceid(),里面有个创建套接字发送消息的函数:pdump_create_client_socket(struct pdump_request *p)
static int
pdump_create_client_socket(struct pdump_request *p)
{
int ret, socket_fd;
int pid;
int n;
struct pdump_response server_resp;
struct sockaddr_un addr, serv_addr, from;
socklen_t addr_len, serv_len;
pid = getpid();
socket_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
if (socket_fd < 0) {
RTE_LOG(ERR, PDUMP,
"client socket(): %s:pid(%d):tid(%u), %s:%d\n",
strerror(errno), pid, rte_sys_gettid(),
__func__, __LINE__);
rte_errno = errno;
return -1;
}
ret = pdump_get_socket_path(addr.sun_path, sizeof(addr.sun_path),
RTE_PDUMP_SOCKET_CLIENT);
if (ret != 0) {
RTE_LOG(ERR, PDUMP,
"Failed to get client socket path: %s:%d\n",
__func__, __LINE__);
rte_errno = errno;
goto exit;
}
addr.sun_family = AF_UNIX;
addr_len = sizeof(struct sockaddr_un);
do {
ret = bind(socket_fd, (struct sockaddr *) &addr, addr_len);
if (ret) {
RTE_LOG(ERR, PDUMP,
"client bind(): %s, %s:%d\n",
strerror(errno), __func__, __LINE__);
rte_errno = errno;
break;
}
serv_len = sizeof(struct sockaddr_un);
memset(&serv_addr, 0, sizeof(serv_addr));
ret = pdump_get_socket_path(serv_addr.sun_path,
sizeof(serv_addr.sun_path),
RTE_PDUMP_SOCKET_SERVER);
if (ret != 0) {
RTE_LOG(ERR, PDUMP,
"Failed to get server socket path: %s:%d\n",
__func__, __LINE__);
rte_errno = errno;
break;
}
serv_addr.sun_family = AF_UNIX;
n = sendto(socket_fd, p, sizeof(struct pdump_request), 0,
(struct sockaddr *)&serv_addr, serv_len);
if (n < 0) {
RTE_LOG(ERR, PDUMP,
"failed to send to server:%s, %s:%d\n",
strerror(errno), __func__, __LINE__);
rte_errno = errno;
ret = -1;
break;
}
n = recvfrom(socket_fd, &server_resp,
sizeof(struct pdump_response), 0,
(struct sockaddr *)&from, &serv_len);
if (n < 0) {
RTE_LOG(ERR, PDUMP,
"failed to recv from server:%s, %s:%d\n",
strerror(errno), __func__, __LINE__);
rte_errno = errno;
ret = -1;
break;
}
ret = server_resp.err_value;
} while (0);
exit:
close(socket_fd);
unlink(addr.sun_path);
return ret;
}
上面主要是创建套接字,发送接收信息。
然后是dump_packets()
static inline void
dump_packets(void)
{
int i;
struct pdump_tuples *pt;
while (!quit_signal) {
for (i = 0; i < num_tuples; i++) {
pt = &pdump_t[i];
if (pt->dir & RTE_PDUMP_FLAG_RX)
pdump_rxtx(pt->rx_ring, pt->rx_vdev_id,
&pt->stats);
if (pt->dir & RTE_PDUMP_FLAG_TX)
pdump_rxtx(pt->tx_ring, pt->tx_vdev_id,
&pt->stats);
}
}
}
pdump_rxtx()函数主要是从队列中取出mbuf,发送
pdump_rxtx(struct rte_ring *ring, uint8_t vdev_id, struct pdump_stats *stats)
{
/* write input packets of port to vdev for pdump */
struct rte_mbuf *rxtx_bufs[BURST_SIZE];
/* first dequeue packets from ring of primary process */
const uint16_t nb_in_deq = rte_ring_dequeue_burst(ring,
(void *)rxtx_bufs, BURST_SIZE);
stats->dequeue_pkts += nb_in_deq;
if (nb_in_deq) {
/* then sent on vdev */
uint16_t nb_in_txd = rte_eth_tx_burst(
vdev_id,
0, rxtx_bufs, nb_in_deq);
stats->tx_pkts += nb_in_txd;
if (unlikely(nb_in_txd < nb_in_deq)) {
do {
rte_pktmbuf_free(rxtx_bufs[nb_in_txd]);
stats->freed_pkts++;
} while (++nb_in_txd < nb_in_deq);
}
}
}
至此,dpdk-pdump转包程序完成