DPDK初始化分析(四)

目录

一、概述

二、流程

2.1 rte_eal_tailqs_init

2.2 rte_eal_alarm_init

2.3 rte_eal_timer_init

2.4 eal_check_mem_on_local_socket

2.5 eal_thread_init_master

2.6 创建slave thread

2.7 子线程启动

2.8 rte_service_init

2.9 rte_bus_probe

2.10 vfio_mp_sync_setup

2.11 rte_service_start_with_defaults

2.12 rte_eal_mcfg_complete

2.13 rte_option_init


一、概述

初始化第四篇,有些边角的没写写了标题占位。

二、流程

2.1 rte_eal_tailqs_init

2.2 rte_eal_alarm_init

2.3 rte_eal_timer_init

2.4 eal_check_mem_on_local_socket

遍历memseg list检测master lcore是否分配了内存

2.5 eal_thread_init_master

将master lcore id保存到thread local变量中

RTE_PER_LCORE(_lcore_id) = lcore_id;

/**
 * Macro to define a per lcore variable "var" of type "type", don't
 * use keywords like "static" or "volatile" in type, just prefix the
 * whole macro.
 */
#define RTE_DEFINE_PER_LCORE(type, name)			\
	__thread __typeof__(type) per_lcore_##name

#define RTE_PER_LCORE(name) (per_lcore_##name)

在eal_thread.c中有定义:

RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);

下一步是设置当前thread的亲和性

static int eal_thread_set_affinity(void)
{
	unsigned lcore_id = rte_lcore_id();

	/* acquire system unique id  */
	rte_gettid();

	/* update EAL thread core affinity */
	return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
}

 

static inline int rte_gettid(void)
{
	static RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
	if (RTE_PER_LCORE(_thread_id) == -1)
		RTE_PER_LCORE(_thread_id) = rte_sys_gettid();
	return RTE_PER_LCORE(_thread_id);
}

存储thread local thread_id

int rte_thread_set_affinity(rte_cpuset_t *cpusetp)
{
	int s;
	unsigned lcore_id;
	pthread_t tid;

	tid = pthread_self();

	s = pthread_setaffinity_np(tid, sizeof(rte_cpuset_t), cpusetp);

	/* store socket_id in TLS for quick access */
	RTE_PER_LCORE(_socket_id) =
		eal_cpuset_socket_id(cpusetp);

	/* store cpuset in TLS for quick access */
	memmove(&RTE_PER_LCORE(_cpuset), cpusetp,
		sizeof(rte_cpuset_t));

	lcore_id = rte_lcore_id();
	if (lcore_id != (unsigned)LCORE_ID_ANY) {
		/* EAL thread will update lcore_config */
		lcore_config[lcore_id].socket_id = RTE_PER_LCORE(_socket_id);
		memmove(&lcore_config[lcore_id].cpuset, cpusetp,
			sizeof(rte_cpuset_t));
	}

	return 0;
}
  • 根据前面的rte_cpuset_t ,设置tid的绑定关系
  • 存储thread local socket_id
  • 存储thread local rte_cpuset_t

2.6 创建slave thread

	RTE_LCORE_FOREACH_SLAVE(i) {

		/*
		 * create communication pipes between master thread
		 * and children
		 */
		if (pipe(lcore_config[i].pipe_master2slave) < 0)
			rte_panic("Cannot create pipe\n");
		if (pipe(lcore_config[i].pipe_slave2master) < 0)
			rte_panic("Cannot create pipe\n");

		lcore_config[i].state = WAIT;

		/* create a thread for each lcore */
		ret = pthread_create(&lcore_config[i].thread_id, NULL,
				     eal_thread_loop, NULL);
		if (ret != 0)
			rte_panic("Cannot create thread\n");

		/* Set thread_name for aid in debugging. */
		snprintf(thread_name, sizeof(thread_name),
			"lcore-slave-%d", i);
		ret = rte_thread_setname(lcore_config[i].thread_id,
						thread_name);
		if (ret != 0)
			RTE_LOG(DEBUG, EAL,
				"Cannot set name for lcore thread\n");
	}
  • 对每个未是能的slave thread,创建与master通信的pipe——pipe_master2slave, pipe_slave2master
  • 创建子线程,对应eal_thread_loop
  • 子线程的名称 lcore-slave-x (x是lcore id)

接下来看下子线程

__attribute__((noreturn)) void * eal_thread_loop(__attribute__((unused)) void *arg)
{
	char c;
	int n, ret;
	unsigned lcore_id;
	pthread_t thread_id;
	int m2s, s2m;
	char cpuset[RTE_CPU_AFFINITY_STR_LEN];

	m2s = lcore_config[lcore_id].pipe_master2slave[0];
	s2m = lcore_config[lcore_id].pipe_slave2master[1];

	/* set the lcore ID in per-lcore memory area */
	RTE_PER_LCORE(_lcore_id) = lcore_id;

	/* set CPU affinity */
	if (eal_thread_set_affinity() < 0)
		rte_panic("cannot set affinity\n");

	/* read on our pipe to get commands */
	while (1) {
		void *fct_arg;

		/* wait command */
		do {
			n = read(m2s, &c, 1);
		} while (n < 0 && errno == EINTR);

		if (n <= 0)
			rte_panic("cannot read on configuration pipe\n");

		lcore_config[lcore_id].state = RUNNING;

		/* send ack */
		n = 0;
		while (n == 0 || (n < 0 && errno == EINTR))
			n = write(s2m, &c, 1);
		if (n < 0)
			rte_panic("cannot write on configuration pipe\n");

		if (lcore_config[lcore_id].f == NULL)
			rte_panic("NULL function pointer\n");

		/* call the function and store the return value */
		fct_arg = lcore_config[lcore_id].arg;
		ret = lcore_config[lcore_id].f(fct_arg);
		lcore_config[lcore_id].ret = ret;
		rte_wmb();

		/* when a service core returns, it should go directly to WAIT
		 * state, because the application will not lcore_wait() for it.
		 */
		if (lcore_config[lcore_id].core_role == ROLE_SERVICE)
			lcore_config[lcore_id].state = WAIT;
		else
			lcore_config[lcore_id].state = FINISHED;
	}

	/* never reached */
	/* pthread_exit(NULL); */
	/* return NULL; */
}
  • 存储thread local lcore_id
  • 设置thread_id 亲和性
  • slave在m2s的读端读到启动消息,将state置为RUNNING,同时在s2m的写端回复。
  • 启动master填写的f函数,在后面分析
  • 对于非slave,执行完成后state变成FINISH

2.7 子线程启动

在2.6我们看到子线程启动后,会等待master lcore通过pipe发送的启动通知:

rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);

int rte_eal_mp_remote_launch(int (*f)(void *), void *arg,
			 enum rte_rmt_call_master_t call_master)
{
	int lcore_id;
	int master = rte_get_master_lcore();

	/* check state of lcores */
	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
		if (lcore_config[lcore_id].state != WAIT)
			return -EBUSY;
	}

	/* send messages to cores */
	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
		rte_eal_remote_launch(f, arg, lcore_id);
	}

	if (call_master == CALL_MASTER) {
		lcore_config[master].ret = f(arg);
		lcore_config[master].state = FINISHED;
	}

	return 0;
}
  • 在master发送启动通知之前,slave状态一定是WAIT
  • rte_eal_remote_launch(f, arg, lcore_id); 其中f是sync_func
int rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
{
	int n;
	char c = 0;
	int m2s = lcore_config[slave_id].pipe_master2slave[1];
	int s2m = lcore_config[slave_id].pipe_slave2master[0];

	if (lcore_config[slave_id].state != WAIT)
		return -EBUSY;

	lcore_config[slave_id].f = f;
	lcore_config[slave_id].arg = arg;

	/* send message */
	n = 0;
	while (n == 0 || (n < 0 && errno == EINTR))
		n = write(m2s, &c, 1);
	if (n < 0)
		rte_panic("cannot write on configuration pipe\n");

	/* wait ack */
	do {
		n = read(s2m, &c, 1);
	} while (n < 0 && errno == EINTR);

	if (n <= 0)
		rte_panic("cannot read on configuration pipe\n");

	return 0;
}
  • master将slave对应的f和arg填充好,在m2s的写端写入一个0,同时在s2m的读端等待slave的回复

这时候sync_func为空,根据注释这是个检查slave是否正常启动

    /*
     * Launch a dummy function on all slave lcores, so that master lcore
     * knows they are all ready when this function returns.
     */

void rte_eal_mp_wait_lcore(void)
{
	unsigned lcore_id;

	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
		rte_eal_wait_lcore(lcore_id);
	}
}
  • 就是等待slave执行完成,最终将slave状态恢复为WAIT

2.8 rte_service_init

2.9 rte_bus_probe

bus probe的接口,以pci设备为例,对应的函数为rte_pci_probe,该函数先检测一下是否应用了黑白名单策略,如果设备匹配了白名单,才允许扫描整个驱动匹配设备,核心函数pci_probe_all_drivers就是扫描pci总线下所有的driver,这里重点分析一下rte_pci_probe_one_driver函数看一下pci device是如何与pci driver匹配的

我们知道每一个pci device都会匹配所有的pci driver。

static int rte_pci_probe_one_driver(struct rte_pci_driver *dr,
			 struct rte_pci_device *dev)
{

	loc = &dev->addr;

    rte_pci_match(dr, dev)
    already_probed = rte_dev_is_probed(&dev->device);
	RTE_LOG(INFO, EAL, "  probe driver: %x:%x %s\n", dev->id.vendor_id,
		dev->id.device_id, dr->driver.name);

    if (!already_probed)
		dev->driver = dr;

    if (!already_probed && (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)) {
		/* map resources for devices that use igb_uio */
		ret = rte_pci_map_device(dev);
		if (ret != 0) {
			dev->driver = NULL;
			return ret;
		}
	}

	/* call the driver probe() function */
    ret = dr->probe(dr, dev);
    if (already_probed)
    	return ret; /* no rollback if already succeeded earlier */
    if (ret) {
        ...
	} else {
		dev->device.driver = &dr->driver;
	}

	return ret;
}
  • rte_pci_match 说明pci driver和pci device的匹配规则是通过ID Table的
  • 检测device是否已经probe,条件是dev->driver != NULL,如果没有匹配dev->driver = dr表示已经匹配上了
  • 如果需要,调用rte_pci_map_device映射一下设备空间,函数放到下一篇文章分析
  • 调用driver的probe 进行driver的初始化,也在下一篇文章分析

2.10 vfio_mp_sync_setup

2.11 rte_service_start_with_defaults

2.12 rte_eal_mcfg_complete

2.13 rte_option_init

 

 

 

 

 

 

 

 

 

 

 

 

你可能感兴趣的:(DPDK)