目录
一、概述
二、流程
2.1 rte_eal_tailqs_init
2.2 rte_eal_alarm_init
2.3 rte_eal_timer_init
2.4 eal_check_mem_on_local_socket
2.5 eal_thread_init_master
2.6 创建slave thread
2.7 子线程启动
2.8 rte_service_init
2.9 rte_bus_probe
2.10 vfio_mp_sync_setup
2.11 rte_service_start_with_defaults
2.12 rte_eal_mcfg_complete
2.13 rte_option_init
初始化第四篇,有些边角的没写写了标题占位。
遍历memseg list检测master lcore是否分配了内存
将master lcore id保存到thread local变量中
RTE_PER_LCORE(_lcore_id) = lcore_id;
/**
* Macro to define a per lcore variable "var" of type "type", don't
* use keywords like "static" or "volatile" in type, just prefix the
* whole macro.
*/
#define RTE_DEFINE_PER_LCORE(type, name) \
__thread __typeof__(type) per_lcore_##name
#define RTE_PER_LCORE(name) (per_lcore_##name)
在eal_thread.c中有定义:
RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
下一步是设置当前thread的亲和性
static int eal_thread_set_affinity(void)
{
unsigned lcore_id = rte_lcore_id();
/* acquire system unique id */
rte_gettid();
/* update EAL thread core affinity */
return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
}
static inline int rte_gettid(void)
{
static RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
if (RTE_PER_LCORE(_thread_id) == -1)
RTE_PER_LCORE(_thread_id) = rte_sys_gettid();
return RTE_PER_LCORE(_thread_id);
}
存储thread local thread_id
int rte_thread_set_affinity(rte_cpuset_t *cpusetp)
{
int s;
unsigned lcore_id;
pthread_t tid;
tid = pthread_self();
s = pthread_setaffinity_np(tid, sizeof(rte_cpuset_t), cpusetp);
/* store socket_id in TLS for quick access */
RTE_PER_LCORE(_socket_id) =
eal_cpuset_socket_id(cpusetp);
/* store cpuset in TLS for quick access */
memmove(&RTE_PER_LCORE(_cpuset), cpusetp,
sizeof(rte_cpuset_t));
lcore_id = rte_lcore_id();
if (lcore_id != (unsigned)LCORE_ID_ANY) {
/* EAL thread will update lcore_config */
lcore_config[lcore_id].socket_id = RTE_PER_LCORE(_socket_id);
memmove(&lcore_config[lcore_id].cpuset, cpusetp,
sizeof(rte_cpuset_t));
}
return 0;
}
RTE_LCORE_FOREACH_SLAVE(i) {
/*
* create communication pipes between master thread
* and children
*/
if (pipe(lcore_config[i].pipe_master2slave) < 0)
rte_panic("Cannot create pipe\n");
if (pipe(lcore_config[i].pipe_slave2master) < 0)
rte_panic("Cannot create pipe\n");
lcore_config[i].state = WAIT;
/* create a thread for each lcore */
ret = pthread_create(&lcore_config[i].thread_id, NULL,
eal_thread_loop, NULL);
if (ret != 0)
rte_panic("Cannot create thread\n");
/* Set thread_name for aid in debugging. */
snprintf(thread_name, sizeof(thread_name),
"lcore-slave-%d", i);
ret = rte_thread_setname(lcore_config[i].thread_id,
thread_name);
if (ret != 0)
RTE_LOG(DEBUG, EAL,
"Cannot set name for lcore thread\n");
}
接下来看下子线程
__attribute__((noreturn)) void * eal_thread_loop(__attribute__((unused)) void *arg)
{
char c;
int n, ret;
unsigned lcore_id;
pthread_t thread_id;
int m2s, s2m;
char cpuset[RTE_CPU_AFFINITY_STR_LEN];
m2s = lcore_config[lcore_id].pipe_master2slave[0];
s2m = lcore_config[lcore_id].pipe_slave2master[1];
/* set the lcore ID in per-lcore memory area */
RTE_PER_LCORE(_lcore_id) = lcore_id;
/* set CPU affinity */
if (eal_thread_set_affinity() < 0)
rte_panic("cannot set affinity\n");
/* read on our pipe to get commands */
while (1) {
void *fct_arg;
/* wait command */
do {
n = read(m2s, &c, 1);
} while (n < 0 && errno == EINTR);
if (n <= 0)
rte_panic("cannot read on configuration pipe\n");
lcore_config[lcore_id].state = RUNNING;
/* send ack */
n = 0;
while (n == 0 || (n < 0 && errno == EINTR))
n = write(s2m, &c, 1);
if (n < 0)
rte_panic("cannot write on configuration pipe\n");
if (lcore_config[lcore_id].f == NULL)
rte_panic("NULL function pointer\n");
/* call the function and store the return value */
fct_arg = lcore_config[lcore_id].arg;
ret = lcore_config[lcore_id].f(fct_arg);
lcore_config[lcore_id].ret = ret;
rte_wmb();
/* when a service core returns, it should go directly to WAIT
* state, because the application will not lcore_wait() for it.
*/
if (lcore_config[lcore_id].core_role == ROLE_SERVICE)
lcore_config[lcore_id].state = WAIT;
else
lcore_config[lcore_id].state = FINISHED;
}
/* never reached */
/* pthread_exit(NULL); */
/* return NULL; */
}
在2.6我们看到子线程启动后,会等待master lcore通过pipe发送的启动通知:
rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
int rte_eal_mp_remote_launch(int (*f)(void *), void *arg,
enum rte_rmt_call_master_t call_master)
{
int lcore_id;
int master = rte_get_master_lcore();
/* check state of lcores */
RTE_LCORE_FOREACH_SLAVE(lcore_id) {
if (lcore_config[lcore_id].state != WAIT)
return -EBUSY;
}
/* send messages to cores */
RTE_LCORE_FOREACH_SLAVE(lcore_id) {
rte_eal_remote_launch(f, arg, lcore_id);
}
if (call_master == CALL_MASTER) {
lcore_config[master].ret = f(arg);
lcore_config[master].state = FINISHED;
}
return 0;
}
int rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
{
int n;
char c = 0;
int m2s = lcore_config[slave_id].pipe_master2slave[1];
int s2m = lcore_config[slave_id].pipe_slave2master[0];
if (lcore_config[slave_id].state != WAIT)
return -EBUSY;
lcore_config[slave_id].f = f;
lcore_config[slave_id].arg = arg;
/* send message */
n = 0;
while (n == 0 || (n < 0 && errno == EINTR))
n = write(m2s, &c, 1);
if (n < 0)
rte_panic("cannot write on configuration pipe\n");
/* wait ack */
do {
n = read(s2m, &c, 1);
} while (n < 0 && errno == EINTR);
if (n <= 0)
rte_panic("cannot read on configuration pipe\n");
return 0;
}
这时候sync_func为空,根据注释这是个检查slave是否正常启动
/*
* Launch a dummy function on all slave lcores, so that master lcore
* knows they are all ready when this function returns.
*/
void rte_eal_mp_wait_lcore(void)
{
unsigned lcore_id;
RTE_LCORE_FOREACH_SLAVE(lcore_id) {
rte_eal_wait_lcore(lcore_id);
}
}
bus probe的接口,以pci设备为例,对应的函数为rte_pci_probe,该函数先检测一下是否应用了黑白名单策略,如果设备匹配了白名单,才允许扫描整个驱动匹配设备,核心函数pci_probe_all_drivers就是扫描pci总线下所有的driver,这里重点分析一下rte_pci_probe_one_driver函数看一下pci device是如何与pci driver匹配的
我们知道每一个pci device都会匹配所有的pci driver。
static int rte_pci_probe_one_driver(struct rte_pci_driver *dr,
struct rte_pci_device *dev)
{
loc = &dev->addr;
rte_pci_match(dr, dev)
already_probed = rte_dev_is_probed(&dev->device);
RTE_LOG(INFO, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id,
dev->id.device_id, dr->driver.name);
if (!already_probed)
dev->driver = dr;
if (!already_probed && (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)) {
/* map resources for devices that use igb_uio */
ret = rte_pci_map_device(dev);
if (ret != 0) {
dev->driver = NULL;
return ret;
}
}
/* call the driver probe() function */
ret = dr->probe(dr, dev);
if (already_probed)
return ret; /* no rollback if already succeeded earlier */
if (ret) {
...
} else {
dev->device.driver = &dr->driver;
}
return ret;
}