Linux伙伴系统(二)--伙伴系统的初始化

水平有限,描述不当之处还请指出,转载请注明出处http://blog.csdn.net/vanbreaker/article/details/7611585

伙伴系统的初始化主要是初始化之前介绍的伙伴系统涉及到的数据结构,并且把系统初始化时由bootmem allocator管理的低端内存以及系统的高端内存释放到伙伴系统中去。其中有些和zone相关的域在前面<>中已经有所介绍。

在start_kernel()-->paging_init()-->zone_sizes_init()-->free_area_init_nodes()-->free_area_init_node()-->free_area_init_core()-->init_currently_empty_zone()-->zone_init_free_lists()中,free_area的相关域都被初始化

static void __meminit zone_init_free_lists(struct zone *zone)
{
	int order, t;
	for_each_migratetype_order(order, t) {
		/*链表初始化为空链表*/
		INIT_LIST_HEAD(&zone->free_area[order].free_list[t]);
		/*内存块数量初始化为0*/
		zone->free_area[order].nr_free = 0;
	}
}


for_each_migratetype_order(order, t)就是两个嵌套的for循环

#define for_each_migratetype_order(order, type) \
	for (order = 0; order < MAX_ORDER; order++) \
		for (type = 0; type < MIGRATE_TYPES; type++)


start_kernel()-->mm_init()-->mem_init(),负责统计所有可用的低端内存和高端内存,并释放到伙伴系统中

void __init mem_init(void)
{
	int codesize, reservedpages, datasize, initsize;
	int tmp;

	pci_iommu_alloc();

#ifdef CONFIG_FLATMEM
	BUG_ON(!mem_map);
#endif
	/* this will put all low memory onto the freelists */
     /*销毁bootmem分配器,释放bootmem分配器管理的空闲页框和bootmem的位图所占用的页框,
         并计入totalram_pages*/
	totalram_pages += free_all_bootmem();

	reservedpages = 0;
	for (tmp = 0; tmp < max_low_pfn; tmp++)
		/*
		 * Only count reserved RAM pages:
		 */
		if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
			reservedpages++;

	/*处理高端内存页框*/
	set_highmem_pages_init();

	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;

	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
			"%dk reserved, %dk data, %dk init, %ldk highmem)\n",
		nr_free_pages() << (PAGE_SHIFT-10),
		num_physpages << (PAGE_SHIFT-10),
		codesize >> 10,
		reservedpages << (PAGE_SHIFT-10),
		datasize >> 10,
		initsize >> 10,
		(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
	       );

	printk(KERN_INFO "virtual kernel memory layout:\n"
		"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
#ifdef CONFIG_HIGHMEM
		"    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
#endif
		"    vmalloc : 0x%08lx - 0x%08lx   (%4ld MB)\n"
		"    lowmem  : 0x%08lx - 0x%08lx   (%4ld MB)\n"
		"      .init : 0x%08lx - 0x%08lx   (%4ld kB)\n"
		"      .data : 0x%08lx - 0x%08lx   (%4ld kB)\n"
		"      .text : 0x%08lx - 0x%08lx   (%4ld kB)\n",
		FIXADDR_START, FIXADDR_TOP,
		(FIXADDR_TOP - FIXADDR_START) >> 10,

#ifdef CONFIG_HIGHMEM
		PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
		(LAST_PKMAP*PAGE_SIZE) >> 10,
#endif

		VMALLOC_START, VMALLOC_END,
		(VMALLOC_END - VMALLOC_START) >> 20,

		(unsigned long)__va(0), (unsigned long)high_memory,
		((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,

		(unsigned long)&__init_begin, (unsigned long)&__init_end,
		((unsigned long)&__init_end -
		 (unsigned long)&__init_begin) >> 10,

		(unsigned long)&_etext, (unsigned long)&_edata,
		((unsigned long)&_edata - (unsigned long)&_etext) >> 10,

		(unsigned long)&_text, (unsigned long)&_etext,
		((unsigned long)&_etext - (unsigned long)&_text) >> 10);

	/*
	 * Check boundaries twice: Some fundamental inconsistencies can
	 * be detected at build time already.
	 */
#define __FIXADDR_TOP (-PAGE_SIZE)
#ifdef CONFIG_HIGHMEM
	BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE	> FIXADDR_START);
	BUILD_BUG_ON(VMALLOC_END			> PKMAP_BASE);
#endif
#define high_memory (-128UL << 20)
	BUILD_BUG_ON(VMALLOC_START			>= VMALLOC_END);
#undef high_memory
#undef __FIXADDR_TOP

#ifdef CONFIG_HIGHMEM
	BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE	> FIXADDR_START);
	BUG_ON(VMALLOC_END				> PKMAP_BASE);
#endif
	BUG_ON(VMALLOC_START				>= VMALLOC_END);
	BUG_ON((unsigned long)high_memory		> VMALLOC_START);

	if (boot_cpu_data.wp_works_ok < 0)
		test_wp_bit();

	save_pg_dir();
	/*将前3GB线性地址(即用户地址空间)对应的pgd全局目录项清空*/
	zap_low_mappings(true);
}


free_all_bootmem()的核心函数在<>中已有介绍,这里不再列出

void __init set_highmem_pages_init(void)
{
	struct zone *zone;
	int nid;

	for_each_zone(zone) {/*遍历每个管理区*/
		unsigned long zone_start_pfn, zone_end_pfn;

		if (!is_highmem(zone))/*判断是否是高端内存管理区*/
			continue;

		/*记录高端内存管理区的起始页框号和结束页框号*/
		zone_start_pfn = zone->zone_start_pfn;
		zone_end_pfn = zone_start_pfn + zone->spanned_pages;

		nid = zone_to_nid(zone);
		printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n",
				zone->name, nid, zone_start_pfn, zone_end_pfn);

		/*将高端内存的页框添加到伙伴系统*/
		add_highpages_with_active_regions(nid, zone_start_pfn,
				 zone_end_pfn);
	}
	/*将释放到伙伴系统的高端内存页框数计入totalram_pages*/
	totalram_pages += totalhigh_pages;
}


void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn,
					      unsigned long end_pfn)
{
	struct add_highpages_data data;

	data.start_pfn = start_pfn;
	data.end_pfn = end_pfn;

	/*遍历所有的活动区,调用add_highpages_word_fn()来处理每个活动区*/
	work_with_active_regions(nid, add_highpages_work_fn, &data);
}


void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
{
	int i;
	int ret;

	for_each_active_range_index_in_nid(i, nid) {
		ret = work_fn(early_node_map[i].start_pfn,
			      early_node_map[i].end_pfn, data);
		if (ret)
			break;
	}
}

static int __init add_highpages_work_fn(unsigned long start_pfn,
					 unsigned long end_pfn, void *datax)
{
	int node_pfn;
	struct page *page;
	unsigned long final_start_pfn, final_end_pfn;
	struct add_highpages_data *data;

	data = (struct add_highpages_data *)datax;

	/*得到合理的起始结束页框号*/
	final_start_pfn = max(start_pfn, data->start_pfn);
	final_end_pfn = min(end_pfn, data->end_pfn);
	if (final_start_pfn >= final_end_pfn)
		return 0;

	/*遍历活动区的页框*/
	for (node_pfn = final_start_pfn; node_pfn < final_end_pfn;
	     node_pfn++) {
		if (!pfn_valid(node_pfn))
			continue;
		page = pfn_to_page(node_pfn);
		add_one_highpage_init(page, node_pfn);/*将该页框添加到伙伴系统*/
	}

	return 0;

}


static void __init add_one_highpage_init(struct page *page, int pfn)
{
	ClearPageReserved(page);
	init_page_count(page);/*count初始化为1*/
	__free_page(page);     /*释放page到伙伴系统*/
	totalhigh_pages++;      /*高端内存的页数加1*/
}


__free_page()中涉及到的具体操作在后面介绍伙伴系统释放页面时再做分析

在free_area_init_core()-->memmap_init()(-->memmap_init_zone() )-->set_pageblock_migratetype(),将每个pageblock的起始页框对应的struct zone当中的pageblock_flags代表的位图相关区域标记为可移动的,表示该pageblock为可移动的,也就是说内核初始化伙伴系统时,所有的页都被标记为可移动的

void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
		unsigned long start_pfn, enum memmap_context context)
{
	struct page *page;
	unsigned long end_pfn = start_pfn + size;
	unsigned long pfn;
	struct zone *z;

	if (highest_memmap_pfn < end_pfn - 1)
		highest_memmap_pfn = end_pfn - 1;

	z = &NODE_DATA(nid)->node_zones[zone];
	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
		/*
		 * There can be holes in boot-time mem_map[]s
		 * handed to this function.  They do not
		 * exist on hotplugged memory.
		 */
		if (context == MEMMAP_EARLY) {
			if (!early_pfn_valid(pfn))
				continue;
			if (!early_pfn_in_nid(pfn, nid))
				continue;
		}
		page = pfn_to_page(pfn);
		set_page_links(page, zone, nid, pfn);
		mminit_verify_page_links(page, zone, nid, pfn);
		init_page_count(page);
		reset_page_mapcount(page);
		SetPageReserved(page);
		/*
		 * Mark the block movable so that blocks are reserved for
		 * movable at startup. This will force kernel allocations
		 * to reserve their blocks rather than leaking throughout
		 * the address space during boot when many long-lived
		 * kernel allocations are made. Later some blocks near
		 * the start are marked MIGRATE_RESERVE by
		 * setup_zone_migrate_reserve()
		 *
		 * bitmap is created for zone's valid pfn range. but memmap
		 * can be created for invalid pages (for alignment)
		 * check here not to call set_pageblock_migratetype() against
		 * pfn out of zone.
		 */
		 /*如果pfn处在合理的范围,并且该pfn是一个pageblock的起始页框号*/
		if ((z->zone_start_pfn <= pfn)
		    && (pfn < z->zone_start_pfn + z->spanned_pages)
		    && !(pfn & (pageblock_nr_pages - 1)))
			set_pageblock_migratetype(page, MIGRATE_MOVABLE);/*将页框对应的位图域标记为可移动的*/

		INIT_LIST_HEAD(&page->lru);
#ifdef WANT_PAGE_VIRTUAL
		/* The shift won't overflow because ZONE_NORMAL is below 4G. */
		if (!is_highmem_idx(zone))
			set_page_address(page, __va(pfn << PAGE_SHIFT));
#endif
	}
}


static void set_pageblock_migratetype(struct page *page, int migratetype)
{

	/*如果没有开启移动性分类,则所有页都要标记为不可移动的*/
	if (unlikely(page_group_by_mobility_disabled))
		migratetype = MIGRATE_UNMOVABLE;
    
	set_pageblock_flags_group(page, (unsigned long)migratetype,
					PB_migrate, PB_migrate_end);
}


void set_pageblock_flags_group(struct page *page, unsigned long flags,
					int start_bitidx, int end_bitidx)
{
	struct zone *zone;
	unsigned long *bitmap;
	unsigned long pfn, bitidx;
	unsigned long value = 1;

	zone = page_zone(page);
	pfn = page_to_pfn(page);
	/*得到位图的起始地址,即zone->pageblock_flags*/
	bitmap = get_pageblock_bitmap(zone, pfn);
	/*得到pfn对应的位图区域的偏移量*/
	bitidx = pfn_to_bitidx(zone, pfn);
	VM_BUG_ON(pfn < zone->zone_start_pfn);
	VM_BUG_ON(pfn >= zone->zone_start_pfn + zone->spanned_pages);

	/*每个页都需要3个bit来表征其移动性,也就是从start_bitidx到end_bitidx共3位*/
	for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
		if (flags & value)/*通过flags和value相与确定相关的位是否为1*/
			__set_bit(bitidx + start_bitidx, bitmap);
		else
			__clear_bit(bitidx + start_bitidx, bitmap);
}


你可能感兴趣的:(Linux伙伴系统(二)--伙伴系统的初始化)