bootmem分配器是系统初始化的时候用的内存分配器,用到的结构为:
30 typedef struct bootmem_data {
31 unsigned long node_boot_start;
32 unsigned long node_low_pfn;
33 void *node_bootmem_map;
34 unsigned long last_offset;
35 unsigned long last_pos;
36 unsigned long last_success; /* Previous allocation point. To speed
37 * up searching */
38 struct list_head list;
39 } bootmem_data_t;
上面的各个字段的作用在下面的代码中会很清楚的,下面是初始化bootmem分配器的代码,关于内存大相关信息从e820中获取:
1 static unsigned long __init init_bootmem_core(bootmem_data_t *bdata, unsigned long mapstart, unsigned long start, unsigned long end)
2 {
3 unsigned long mapsize;
4
5 mminit_validate_memmodel_limits(&start, &end);
6 bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));
7 bdata->node_min_pfn = start;
8 bdata->node_low_pfn = end;
9 link_bootmem(bdata);
10
11 /*
12 * Initially all pages are reserved - setup_arch() has to
13 * register free RAM areas explicitly.
14 */
15 mapsize = bootmap_bytes(end - start);
16 memset(bdata->node_bootmem_map, 0xff, mapsize);
17
18 bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n",
19 bdata - bootmem_node_data, start, mapstart, end, mapsize);
20
21 return mapsize;
22 }
知道怎么初始化bootmem了,那怎么释放呢?毕竟这只是在内核初始化的时候使用一下:
1 /**
2 * free_all_bootmem_node - release a node's free pages to the buddy allocator
3 * @pgdat: node to be released
4 *
5 * Returns the number of pages actually released.
6 */
7 unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
8 {
9 register_page_bootmem_info_node(pgdat);
10 return free_all_bootmem_core(pgdat->bdata);
11 }
12 static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
13 {
14 int aligned;
15 struct page *page;
16 unsigned long start, end, pages, count = 0;
17
18 if (!bdata->node_bootmem_map)
19 return 0;
20 start = bdata->node_min_pfn;
21 end = bdata->node_low_pfn;
22
23 /*
24 * If the start is aligned to the machines wordsize, we might
25 * be able to free pages in bulks of that order.
26 */
27 aligned = !(start & (BITS_PER_LONG - 1));
28
29 bdebug("nid=%td start=%lx end=%lx aligned=%d\n",
30 bdata - bootmem_node_data, start, end, aligned);
31
32 while (start < end) {
33 unsigned long *map, idx, vec;
34
35 map = bdata->node_bootmem_map;
36 idx = start - bdata->node_min_pfn;
37 vec = ~map[idx / BITS_PER_LONG];
38
39 if (aligned && vec == ~0UL && start + BITS_PER_LONG < end) {
40 int order = ilog2(BITS_PER_LONG);
41
42 __free_pages_bootmem(pfn_to_page(start), order);
43 count += BITS_PER_LONG;
44 } else {
45 unsigned long off = 0;
46
47 while (vec && off < BITS_PER_LONG) {
48 if (vec & 1) {
49 page = pfn_to_page(start + off);
50 __free_pages_bootmem(page, 0);
51 count++;
52 }
53 vec >>= 1;
54 off++;
55 }
56 }
57 start += BITS_PER_LONG;
58 }
59
60 page = virt_to_page(bdata->node_bootmem_map);
61 pages = bdata->node_low_pfn - bdata->node_min_pfn;
62 pages = bootmem_bootmap_pages(pages);
63 count += pages;
64 while (pages--)
65 __free_pages_bootmem(page++, 0);
66
67 bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count);
68
69 return count;
70 }
在这个过程中不断地调用__free_pages_bootmem()函数,那这个函数是干嘛的?
1 /*
2 * permit the bootmem allocator to evade page validation on high-order frees
3 */
4 void __init __free_pages_bootmem(struct page *page, unsigned int order)
5 {
6 if (order == 0) {
7 __ClearPageReserved(page);
8 set_page_count(page, 0);
9 set_page_refcounted(page);
10 __free_page(page);
11 } else {
12 int loop;
13
14 prefetchw(page);
15 for (loop = 0; loop < BITS_PER_LONG; loop++) {
16 struct page *p = &page[loop];
17
18 if (loop + 1 < BITS_PER_LONG)
19 prefetchw(p + 1);
20 __ClearPageReserved(p);
21 set_page_count(p, 0);
22 }
23
24 set_page_refcounted(page);
25 __free_pages(page, order);
26 }
27 }
28 void __free_pages(struct page *page, unsigned int order)
29 {
30 if (put_page_testzero(page)) {
31 if (order == 0)
32 free_hot_page(page);
33 else
34 __free_pages_ok(page, order);
35 }
36 }
初始化完成了,bootmem怎么分配空间呢?分配器为了省空间和提高速度做了一些优化,不过实质还是位图:
192 /*
193 * We 'merge' subsequent allocations to save space. We might 'lose'
194 * some fraction of a page if allocations cannot be satisfied due to
195 * size constraints on boxes where there is physical RAM space
196 * fragmentation - in these cases (mostly large memory boxes) this
197 * is not a problem.
198 *
199 * On low memory boxes we get it right in 100% of the cases.
200 *
201 * alignment has to be a power of 2 value.
202 *
203 * NOTE: This function is _not_ reentrant.
204 */
205 void * __init
206 __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
207 unsigned long align, unsigned long goal, unsigned long limit)
208 {
209 unsigned long offset, remaining_size, areasize, preferred;
210 unsigned long i, start = 0, incr, eidx, end_pfn;
211 void *ret;
212
213 if (!size) {
214 printk("__alloc_bootmem_core(): zero-sized request\n");
215 BUG();
216 }
217 BUG_ON(align & (align-1));
218
219 if (limit && bdata->node_boot_start >= limit)
220 return NULL;
221
222 /* on nodes without memory - bootmem_map is NULL */
223 if (!bdata->node_bootmem_map)
224 return NULL;
225
226 end_pfn = bdata->node_low_pfn;
227 limit = PFN_DOWN(limit);
228 if (limit && end_pfn > limit)
229 end_pfn = limit;
230
231 eidx = end_pfn - PFN_DOWN(bdata->node_boot_start);
232 offset = 0;
233 if (align && (bdata->node_boot_start & (align - 1UL)) != 0)
234 offset = align - (bdata->node_boot_start & (align - 1UL));
235 offset = PFN_DOWN(offset);
236
237 /*
238 * We try to allocate bootmem pages above 'goal'
239 * first, then we try to allocate lower pages.
240 */
241 if (goal && goal >= bdata->node_boot_start && PFN_DOWN(goal) < end_pfn) {
242 preferred = goal - bdata->node_boot_start;
243
244 if (bdata->last_success >= preferred)
245 if (!limit || (limit && limit > bdata->last_success))
246 preferred = bdata->last_success;
247 } else
248 preferred = 0;
249
250 preferred = PFN_DOWN(ALIGN(preferred, align)) + offset;
251 areasize = (size + PAGE_SIZE-1) / PAGE_SIZE;
252 incr = align >> PAGE_SHIFT ? : 1;
253
254 restart_scan:
255 for (i = preferred; i < eidx; i += incr) {
256 unsigned long j;
257 i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i);
258 i = ALIGN(i, incr);
259 if (i >= eidx)
260 break;
261 if (test_bit(i, bdata->node_bootmem_map))
262 continue;
263 for (j = i + 1; j < i + areasize; ++j) {
264 if (j >= eidx)
265 goto fail_block;
266 if (test_bit(j, bdata->node_bootmem_map))
267 goto fail_block;
268 }
269 start = i;
270 goto found;
271 fail_block:
272 i = ALIGN(j, incr);
273 }
274
275 if (preferred > offset) {
276 preferred = offset;
277 goto restart_scan;
278 }
279 return NULL;
280
281 found:
282 bdata->last_success = PFN_PHYS(start);
283 BUG_ON(start >= eidx);
284
285 /*
286 * Is the next page of the previous allocation-end the start
287 * of this allocation's buffer? If yes then we can 'merge'
288 * the previous partial page with this allocation.
289 */
290 if (align < PAGE_SIZE &&
291 bdata->last_offset && bdata->last_pos+1 == start) {
292 offset = ALIGN(bdata->last_offset, align);
293 BUG_ON(offset > PAGE_SIZE);
294 remaining_size = PAGE_SIZE - offset;
295 if (size < remaining_size) {
296 areasize = 0;
297 /* last_pos unchanged */
298 bdata->last_offset = offset + size;
299 ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
300 offset +
301 bdata->node_boot_start);
302 } else {
303 remaining_size = size - remaining_size;
304 areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE;
305 ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
306 offset +
307 bdata->node_boot_start);
308 bdata->last_pos = start + areasize - 1;
309 bdata->last_offset = remaining_size;
310 }
311 bdata->last_offset &= ~PAGE_MASK;
312 } else {
313 bdata->last_pos = start + areasize - 1;
314 bdata->last_offset = size & ~PAGE_MASK;
315 ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start);
316 }
317
318 /*
319 * Reserve the area now:
320 */
321 for (i = start; i < start + areasize; i++)
322 if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map)))
323 BUG();
324 memset(ret, 0, size);
325 return ret;
326 }
如果发现超过了limit就直接返回NULL,如果连位图(node_bootmem_map)都没有也返回NULL。如果要求对齐那么就计算offset的值,还要要计算“incr”也就是扫面过程每次增加的大小。如果传入参数“goal”的话就首先尝试从该位置分配内存,这个过程综合考虑“last_success”和“goal”来考虑开始搜索的位置:“perferred”的值。好了,现在从perferred开始遍历,找到一个“0”的位置就判断从这个位置开始有没有足够大的空间。如果扫面一次没有找到就调整“perferred”的值从新扫面一次,再没找到就返回NULL。
如果“fount”,那么就调整“last_success”的值。这里为了省空间可以把这次的空间和上次分配的结果合并(前提是对齐方式小于页大小),这个处理在“last_success”发挥强大威力的时候应该也会起很大作用吧。
最后把分配的地址返回。
---------------------------
个人理解,欢迎拍砖。