DPDK 内存的初始化主要在rte_eal_init()函数中进行:
/* 获取系统中hugepage种类以及数量信息到internal_config.hugepage_info,用于后续内存初始化 */
if (internal_config.no_hugetlbfs == 0 &&
internal_config.process_type != RTE_PROC_SECONDARY &&
internal_config.xen_dom0_support == 0 &&
eal_hugepage_info_init() < 0)
rte_panic("Cannot get hugepage information\n");
/* 获取系统中所有hugepage内存大小,计算方法:hugepage_sz*num_pages */
if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
if (internal_config.no_hugetlbfs)
internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE;
}
这个函数比较简单,主要是从 /sys/kernel/mm/hugepages 目录下面读取目录名和文件名,从而获取系统的hugetlbfs文件系统数,
以及每个 hugetlbfs 的大页面数目和每个页面大小,并保存在一个文件里,这个函数,只有主进程会调用。存放在internal_config结构里
/* create memory configuration in shared/mmap memory. Take out
* a write lock on the memsegs, so we can auto-detect primary/secondary.
* This means we never close the file while running (auto-close on exit).
* We also don't lock the whole file, so that in future we can use read-locks
* on other parts, e.g. memzones, to detect if there are running secondary
* processes. */
static void
rte_eal_config_create(void)
{
void *rte_mem_cfg_addr;
int retval;
const char *pathname = eal_runtime_config_path(); /* /var/run/.rte_config */
if (internal_config.no_shconf)
return;
/* map the config before hugepage address so that we don't waste a page */
if (internal_config.base_virtaddr != 0)
rte_mem_cfg_addr = (void *)
RTE_ALIGN_FLOOR(internal_config.base_virtaddr -
sizeof(struct rte_mem_config), sysconf(_SC_PAGE_SIZE));
else
rte_mem_cfg_addr = NULL;
if (mem_cfg_fd < 0){
mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
if (mem_cfg_fd < 0)
rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
}
retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config));
if (retval < 0){
close(mem_cfg_fd);
rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname);
}
retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
if (retval < 0){
close(mem_cfg_fd);
rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary "
"process running?\n", pathname);
}
/* 将文件/var/run/.rte_config以共享方式mmap到主进程 即struct rte_mem_config*/
rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config),
PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
if (rte_mem_cfg_addr == MAP_FAILED){
rte_panic("Cannot mmap memory for rte_config\n");
}
memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
/* store address of the config in the config itself so that secondary
* processes could later map the config into this exact location */
rte_config.mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr;
}
rte_eal_hugepage_init():primary进程中进行大页的映射
主要是在文件系统hugetlbfs挂载的 目录(有些是/mnt/huge,但是172.16.6.139这台机器是挂载在/dev/hugepages下面)下创建hugetlbfs配置的内存页数(139设置为1024)的rtemap_xx文件,并为每个rtemap_xx文件做mmap映射,保证mmap后的虚拟地址与实际的物理地址是一样的。
rte_eal_memzone_init()函数
memzone是内存分配器,上一步中,我们已经把大页内存分段放好了,但是在使用的时候,怎么来分配呢?自然需要内存分配器,就是memzone。而rte_eal_memzone_init主要就是把内存放到空闲链表中,等需要的时候,能够分配出去
malloc_elem结构体表示一个内存对象
struct malloc_elem {
struct malloc_heap *heap;
struct malloc_elem *volatile prev; /* points to prev elem in memseg */
LIST_ENTRY(malloc_elem) free_list; /* list of free elements in heap */
const struct rte_memseg *ms;
volatile enum elem_state state;
uint32_t pad;
size_t size;
#ifdef RTE_LIBRTE_MALLOC_DEBUG
uint64_t header_cookie; /* Cookie marking start of data */
/* trailer cookie at start + size */
#endif
} __rte_cache_aligned;
int
rte_eal_malloc_heap_init(void)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
unsigned ms_cnt;
struct rte_memseg *ms;
if (mcfg == NULL)
return -1;
/* 依次把每一段都添加到heap中,段属于哪个socket,
就添加到哪个socket的heap中,分配就从这里拿 */
for (ms = &mcfg->memseg[0], ms_cnt = 0;
(ms_cnt < RTE_MAX_MEMSEG) && (ms->len > 0);
ms_cnt++, ms++) {
malloc_heap_add_memseg(&mcfg->malloc_heaps[ms->socket_id], ms);
}
return 0;
}
/*
* Expand the heap with a memseg.
* This reserves the zone and sets a dummy malloc_elem header at the end
* to prevent overflow. The rest of the zone is added to free list as a single
* large free block
*/
static void
malloc_heap_add_memseg(struct malloc_heap *heap, struct rte_memseg *ms)
{
/* allocate the memory block headers, one at end, one at start */
struct malloc_elem *start_elem = (struct malloc_elem *)ms->addr;
struct malloc_elem *end_elem = RTE_PTR_ADD(ms->addr,
ms->len - MALLOC_ELEM_OVERHEAD);
end_elem = RTE_PTR_ALIGN_FLOOR(end_elem, RTE_CACHE_LINE_SIZE);
const size_t elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem;
/* 把每一段做初始化,并挂在空闲链表中 */
malloc_elem_init(start_elem, heap, ms, elem_size);
malloc_elem_mkend(end_elem, start_elem);
malloc_elem_free_list_insert(start_elem);
heap->total_size += elem_size;
}