linux 内存管理---分页系统之物理页管理(五)

Linux把物理内存划分为三个层次来管理
层次 描述
节点(Node) 为了支持NUMA模型而引入的概念,node的划分主要根据访问速度,一个node中所有内存对于某个cpu来说访问速度相同,对于SMP来说,每个cpu都有本地内存,本地cpu访问速度较快,但是其他cpu却访问较慢,每个node用pg_data_t结构体来描述
管理区(Zone) 每个节点node根据用途不同被划分为多个zone, 用于表示不同范围的内存, 内核可以使用不同的映射方式映射物理内存,每个zone用struct zone结构体来描述
页框(Page) 内存被细分为多个页面帧, 页框是最基本的页面分配的单位,每个页框用struct page结构体来描述

初始化node数据结构
void __init free_area_init_nodes(unsigned long *max_zone_pfn)
{
    unsigned long start_pfn, end_pfn;
    int i, nid;

    /* Record where the zone boundaries are */
    memset(arch_zone_lowest_possible_pfn, 0,
                sizeof(arch_zone_lowest_possible_pfn));
    memset(arch_zone_highest_possible_pfn, 0,
                sizeof(arch_zone_highest_possible_pfn));
    arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions();        //确定每个zone的物理内存页框范围
    arch_zone_highest_possible_pfn[0] = max_zone_pfn[0];
    for (i = 1; i < MAX_NR_ZONES; i++) {
        if (i == ZONE_MOVABLE)
            continue;
        arch_zone_lowest_possible_pfn[i] =
            arch_zone_highest_possible_pfn[i-1];
        arch_zone_highest_possible_pfn[i] =
            max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]);
    }
    arch_zone_lowest_possible_pfn[ZONE_MOVABLE] = 0;
    arch_zone_highest_possible_pfn[ZONE_MOVABLE] = 0;

    /* Find the PFNs that ZONE_MOVABLE begins at in each node */
    memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn));
    find_zone_movable_pfns_for_nodes(zone_movable_pfn);

    /* Print out the zone ranges */
    printk("Zone PFN ranges:\n");
    for (i = 0; i < MAX_NR_ZONES; i++) {
        if (i == ZONE_MOVABLE)
            continue;
        printk("  %-8s ", zone_names[i]);
        if (arch_zone_lowest_possible_pfn[i] ==
                arch_zone_highest_possible_pfn[i])
            printk("empty\n");
        else
            printk("%0#10lx -> %0#10lx\n",
                arch_zone_lowest_possible_pfn[i],
                arch_zone_highest_possible_pfn[i]);
    }

    /* Print out the PFNs ZONE_MOVABLE begins at in each node */
    printk("Movable zone start PFN for each node\n");
    for (i = 0; i < MAX_NUMNODES; i++) {
        if (zone_movable_pfn[i])
            printk("  Node %d: %lu\n", i, zone_movable_pfn[i]);
    }

    /* Print out the early_node_map[] */
    printk("Early memory PFN ranges\n");
    for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
        printk("  %3d: %0#10lx -> %0#10lx\n", nid, start_pfn, end_pfn);

    /* Initialise every node */
    mminit_verify_pageflags_layout();
    setup_nr_node_ids();
    for_each_online_node(nid) {           //初始化每个node
        pg_data_t *pgdat = NODE_DATA(nid);
        free_area_init_node(nid, NULL,              //nid为node id
                find_min_pfn_for_node(nid), NULL);

        /* Any memory on that node */
        if (pgdat->node_present_pages)
            node_set_state(nid, N_HIGH_MEMORY);
        check_for_regular_memory(pgdat);
    }
}

只有每个node中物理页框的分布和空洞情况是通过CONFIG_HAVE_MEMBLOCK_NODE_MAP来描述的,这里不详细介绍

void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
        unsigned long node_start_pfn, unsigned long *zholes_size)
{
    pg_data_t *pgdat = NODE_DATA(nid);

    pgdat->node_id = nid;             //nid为node id
    pgdat->node_start_pfn = node_start_pfn;                //该node 起始页框
    calculate_node_totalpages(pgdat, zones_size, zholes_size);

    alloc_node_mem_map(pgdat);
#ifdef CONFIG_FLAT_NODE_MEM_MAP
    printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx\n",
        nid, (unsigned long)pgdat,
        (unsigned long)pgdat->node_mem_map);
#endif

    free_area_init_core(pgdat, zones_size, zholes_size);
}

static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
        unsigned long *zones_size, unsigned long *zholes_size)
{
    unsigned long realtotalpages, totalpages = 0;
    enum zone_type i;

    for (i = 0; i < MAX_NR_ZONES; i++)
        totalpages += zone_spanned_pages_in_node(pgdat->node_id, i,
                                zones_size);
    pgdat->node_spanned_pages = totalpages;            //该node中包含的页框数,包括空洞页框

    realtotalpages = totalpages;
    for (i = 0; i < MAX_NR_ZONES; i++)
        realtotalpages -=
            zone_absent_pages_in_node(pgdat->node_id, i,
                                zholes_size);
    pgdat->node_present_pages = realtotalpages;          //该node中实际的页框数,不包括空洞页框
    printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id,
                            realtotalpages);
}

static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)            //分配页描述符数组
{
    /* Skip empty nodes */
    if (!pgdat->node_spanned_pages)
        return;

#ifdef CONFIG_FLAT_NODE_MEM_MAP              //FLAT内存模型,指node中没有空洞,即node中内存连续
    /* ia64 gets its own node_mem_map, before this, without bootmem */
    if (!pgdat->node_mem_map) {
        unsigned long size, start, end;
        struct page *map;

        /*
         * The zone's endpoints aren't required to be MAX_ORDER
         * aligned but the node_mem_map endpoints must be in order
         * for the buddy allocator to function correctly.
         */
        start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1);
        end = pgdat->node_start_pfn + pgdat-> node_spanned_pages;
        end = ALIGN(end, MAX_ORDER_NR_PAGES);
        size =  (end - start) * sizeof( struct page);          //该node包含的页框数,包括空洞
        map = alloc_remap(pgdat->node_id, size);      //该函数一般返回NULL
        if (!map)
            map = alloc_bootmem_node_nopanic(pgdat, size);     //从bootmem分配页框描述符数组
        pgdat-> node_mem_map = map + (pgdat->node_start_pfn - start);    //指向该node包含的所有页框的页描述符数组
    }
#ifndef CONFIG_NEED_MULTIPLE_NODES      //系统只包含一个node,即UMA
    /*
     * With no DISCONTIG, the global mem_map is just set as node 0's
     */
    if (pgdat == NODE_DATA(0)) {
         mem_map = NODE_DATA(0)->node_mem_map;         //struct page *mem_map;  对于UMA来说 mem_map存放页描述符数组指针
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
        if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
            mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET);
#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
    }
#endif
#endif /* CONFIG_FLAT_NODE_MEM_MAP */
}


static void __paginginit free_area_init_core(struct pglist_data *pgdat,             //初始化node中的zone
        unsigned long *zones_size, unsigned long *zholes_size)
{
    enum zone_type j;
    int nid = pgdat->node_id;
    unsigned long zone_start_pfn = pgdat->node_start_pfn;
    int ret;

    pgdat_resize_init(pgdat);
    pgdat->nr_zones = 0;
    init_waitqueue_head(&pgdat->kswapd_wait);
    pgdat->kswapd_max_order = 0;
    pgdat_page_cgroup_init(pgdat);
    
    for (j = 0; j < MAX_NR_ZONES; j++) {      //初始化每个zone
        struct zone *zone = pgdat-> node_zones + j;
        unsigned long size, realsize, memmap_pages;
        enum lru_list lru;

        size = zone_spanned_pages_in_node(nid, j, zones_size);
        realsize = size - zone_absent_pages_in_node(nid, j,
                                zholes_size);

        /*
         * Adjust realsize so that it accounts for how much memory
         * is used by this zone for memmap. This affects the watermark
         * and per-cpu initialisations
         */
        memmap_pages =
            PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT;
        if (realsize >= memmap_pages) {
            realsize -= memmap_pages;
            if (memmap_pages)
                printk(KERN_DEBUG
                       "  %s zone: %lu pages used for memmap\n",
                       zone_names[j], memmap_pages);
        } else
            printk(KERN_WARNING
                "  %s zone: %lu pages exceeds realsize %lu\n",
                zone_names[j], memmap_pages, realsize);

        /* Account for reserved pages */
        if (j == 0 && realsize > dma_reserve) {
            realsize -= dma_reserve;
            printk(KERN_DEBUG "  %s zone: %lu pages reserved\n",
                    zone_names[0], dma_reserve);
        }

        if (!is_highmem_idx(j))
            nr_kernel_pages += realsize;
        nr_all_pages += realsize;

        zone->spanned_pages = size;            //zone包括的页框数,包括空洞
        zone->present_pages = realsize;        //zone存在的页框数,剔除空洞
#ifdef CONFIG_NUMA
        zone->node = nid;
        zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)
                        / 100;
        zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;
#endif
        zone->name = zone_names[j];
        spin_lock_init(&zone->lock);
        spin_lock_init(&zone->lru_lock);
        zone_seqlock_init(zone);
        zone->zone_pgdat = pgdat;

        zone_pcp_init(zone);
        for_each_lru(lru)
            INIT_LIST_HEAD(&zone->lruvec.lists[lru]);
        zone->reclaim_stat.recent_rotated[0] = 0;
        zone->reclaim_stat.recent_rotated[1] = 0;
        zone->reclaim_stat.recent_scanned[0] = 0;
        zone->reclaim_stat.recent_scanned[1] = 0;
        zap_zone_vm_stats(zone);
        zone->flags = 0;
        if (!size)
            continue;

        set_pageblock_order(pageblock_default_order());
        setup_usemap(pgdat, zone, size);
        ret = init_currently_empty_zone(zone, zone_start_pfn,
                        size, MEMMAP_EARLY);
        BUG_ON(ret);
        memmap_init(size, nid, j, zone_start_pfn);
        zone_start_pfn += size;
    }
}

void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
        unsigned long start_pfn, enum memmap_context context)
{
    struct page *page;
    unsigned long end_pfn = start_pfn + size;
    unsigned long pfn;
    struct zone *z;

    if (highest_memmap_pfn < end_pfn - 1)
        highest_memmap_pfn = end_pfn - 1;

    z = &NODE_DATA(nid)->node_zones[zone];
    for (pfn = start_pfn; pfn < end_pfn; pfn++) {
        /*
         * There can be holes in boot-time mem_map[]s
         * handed to this function.  They do not
         * exist on hotplugged memory.
         */
        if (context == MEMMAP_EARLY) {
            if (!early_pfn_valid(pfn))
                continue;
            if (!early_pfn_in_nid(pfn, nid))
                continue;
        }
        page = pfn_to_page(pfn);
        set_page_links(page, zone, nid, pfn);    //设置页描述符属于哪个node和zone
        mminit_verify_page_links(page, zone, nid, pfn);
        init_page_count(page);               //初始的count设置为1
        reset_page_mapcount(page);
        SetPageReserved(page);
        /*
         * Mark the block movable so that blocks are reserved for
         * movable at startup. This will force kernel allocations
         * to reserve their blocks rather than leaking throughout
         * the address space during boot when many long-lived
         * kernel allocations are made. Later some blocks near
         * the start are marked MIGRATE_RESERVE by
         * setup_zone_migrate_reserve()
         *
         * bitmap is created for zone's valid pfn range. but memmap
         * can be created for invalid pages (for alignment)
         * check here not to call set_pageblock_migratetype() against
         * pfn out of zone.
         */
        if ((z->zone_start_pfn <= pfn)
            && (pfn < z->zone_start_pfn + z->spanned_pages)
            && !(pfn & (pageblock_nr_pages - 1)))
            set_pageblock_migratetype(page, MIGRATE_MOVABLE);

        INIT_LIST_HEAD(&page->lru);
#ifdef WANT_PAGE_VIRTUAL
        /* The shift won't overflow because ZONE_NORMAL is below 4G. */
        if (!is_highmem_idx(zone))
            set_page_address(page, __va(pfn << PAGE_SHIFT));
#endif
    }
}

#if defined(CONFIG_FLATMEM)
#define __pfn_to_page(pfn)    (mem_map + ((pfn) - ARCH_PFN_OFFSET))
#endif

static inline void set_page_links(struct page *page, enum zone_type zone,
    unsigned long node, unsigned long pfn)
{
    set_page_zone(page, zone);
    set_page_node(page, node);
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
    set_page_section(page, pfn_to_section_nr(pfn));
#endif
}
static inline void set_page_zone(struct page *page, enum zone_type zone)
{
    page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
    page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT;
}

static inline void set_page_node(struct page *page, unsigned long node)
{
    page->flags &= ~(NODES_MASK << NODES_PGSHIFT);
    page->flags |= (node & NODES_MASK) << NODES_PGSHIFT;
}
static inline void init_page_count(struct page *page)
{
    atomic_set(&page->_count, 1);
}
static inline void reset_page_mapcount(struct page *page)
{
    atomic_set(&(page)->_mapcount, -1);
}

SetPageReserved的定义为:
#define SETPAGEFLAG(uname, lname)                    \
static inline void SetPage##uname(struct page *page)            \
            { set_bit(PG_##lname, &page->flags); }

你可能感兴趣的:(kernel基础,mips)