DPDK内存管理二:初始化

DPDK 内存的初始化主要在rte_eal_init()函数中进行:

  • eal_hugepage_info_init()
/* 获取系统中hugepage种类以及数量信息到internal_config.hugepage_info,用于后续内存初始化 */
    if (internal_config.no_hugetlbfs == 0 &&
            internal_config.process_type != RTE_PROC_SECONDARY &&
            internal_config.xen_dom0_support == 0 &&
            eal_hugepage_info_init() < 0)
        rte_panic("Cannot get hugepage information\n");

    /* 获取系统中所有hugepage内存大小,计算方法:hugepage_sz*num_pages */
    if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
        if (internal_config.no_hugetlbfs)
            internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE;
    }

这个函数比较简单,主要是从 /sys/kernel/mm/hugepages 目录下面读取目录名和文件名,从而获取系统的hugetlbfs文件系统数,
以及每个 hugetlbfs 的大页面数目和每个页面大小,并保存在一个文件里,这个函数,只有主进程会调用。存放在internal_config结构里

  • rte_eal_config_create()
    主要是初始化rte_config.mem_config。如果是以root用户运行dpdk程序的话,rte_config.mem_config指向/var/run/.rte_config文件mmap的一段sizeof(struct rte_mem_config)大小的内存。
    rte_config.mem_config = /var/run/.rte_config文件mmap的首地址;

/* create memory configuration in shared/mmap memory. Take out
 * a write lock on the memsegs, so we can auto-detect primary/secondary.
 * This means we never close the file while running (auto-close on exit).
 * We also don't lock the whole file, so that in future we can use read-locks
 * on other parts, e.g. memzones, to detect if there are running secondary
 * processes. */
static void
rte_eal_config_create(void)
{
    void *rte_mem_cfg_addr;
    int retval;

    const char *pathname = eal_runtime_config_path();  /* /var/run/.rte_config */

    if (internal_config.no_shconf)
        return;

    /* map the config before hugepage address so that we don't waste a page */
    if (internal_config.base_virtaddr != 0)
        rte_mem_cfg_addr = (void *)
            RTE_ALIGN_FLOOR(internal_config.base_virtaddr -
            sizeof(struct rte_mem_config), sysconf(_SC_PAGE_SIZE));
    else
        rte_mem_cfg_addr = NULL;

    if (mem_cfg_fd < 0){
        mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
        if (mem_cfg_fd < 0)
            rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
    }

    retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config));
    if (retval < 0){
        close(mem_cfg_fd);
        rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname);
    }

    retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
    if (retval < 0){
        close(mem_cfg_fd);
        rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary "
                "process running?\n", pathname);
    }
    /* 将文件/var/run/.rte_config以共享方式mmap到主进程 即struct rte_mem_config*/
    rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config),
                PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);

    if (rte_mem_cfg_addr == MAP_FAILED){
        rte_panic("Cannot mmap memory for rte_config\n");
    }
    memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
    rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;

    /* store address of the config in the config itself so that secondary
     * processes could later map the config into this exact location */
    rte_config.mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr;

}
  • rte_eal_hugepage_init():primary进程中进行大页的映射

    主要是在文件系统hugetlbfs挂载的 目录(有些是/mnt/huge,但是172.16.6.139这台机器是挂载在/dev/hugepages下面)下创建hugetlbfs配置的内存页数(139设置为1024)的rtemap_xx文件,并为每个rtemap_xx文件做mmap映射,保证mmap后的虚拟地址与实际的物理地址是一样的。
    DPDK内存管理二:初始化_第1张图片

  • rte_eal_memzone_init()函数
    memzone是内存分配器,上一步中,我们已经把大页内存分段放好了,但是在使用的时候,怎么来分配呢?自然需要内存分配器,就是memzone。而rte_eal_memzone_init主要就是把内存放到空闲链表中,等需要的时候,能够分配出去
    malloc_elem结构体表示一个内存对象

struct malloc_elem {
    struct malloc_heap *heap;
    struct malloc_elem *volatile prev;      /* points to prev elem in memseg */
    LIST_ENTRY(malloc_elem) free_list;      /* list of free elements in heap */
    const struct rte_memseg *ms;
    volatile enum elem_state state;
    uint32_t pad;
    size_t size;
#ifdef RTE_LIBRTE_MALLOC_DEBUG
    uint64_t header_cookie;         /* Cookie marking start of data */
                                    /* trailer cookie at start + size */
#endif
} __rte_cache_aligned;
int
rte_eal_malloc_heap_init(void)
{
    struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
    unsigned ms_cnt;
    struct rte_memseg *ms;

    if (mcfg == NULL)
        return -1;

    /* 依次把每一段都添加到heap中,段属于哪个socket,
        就添加到哪个socket的heap中,分配就从这里拿 */
    for (ms = &mcfg->memseg[0], ms_cnt = 0;
            (ms_cnt < RTE_MAX_MEMSEG) && (ms->len > 0);
            ms_cnt++, ms++) {

        malloc_heap_add_memseg(&mcfg->malloc_heaps[ms->socket_id], ms);
    }

    return 0;
}
/*
 * Expand the heap with a memseg.
 * This reserves the zone and sets a dummy malloc_elem header at the end
 * to prevent overflow. The rest of the zone is added to free list as a single
 * large free block
 */
static void
malloc_heap_add_memseg(struct malloc_heap *heap, struct rte_memseg *ms)
{
    /* allocate the memory block headers, one at end, one at start */
    struct malloc_elem *start_elem = (struct malloc_elem *)ms->addr;
    struct malloc_elem *end_elem = RTE_PTR_ADD(ms->addr,
            ms->len - MALLOC_ELEM_OVERHEAD);
    end_elem = RTE_PTR_ALIGN_FLOOR(end_elem, RTE_CACHE_LINE_SIZE);
    const size_t elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem;

    /* 把每一段做初始化,并挂在空闲链表中 */
    malloc_elem_init(start_elem, heap, ms, elem_size);
    malloc_elem_mkend(end_elem, start_elem);
    malloc_elem_free_list_insert(start_elem);

    heap->total_size += elem_size;
}

你可能感兴趣的:(DPDK,内存,DPDK)