有时候虽然buddy system已经尽力去找了,但是仍旧找不到一大块内存时就要像用户进程一样使用处理器的分页机制了(虽然慢一点)。内核中“vmalloc”来分配在虚拟内存中连续但是物理内存中不一定连续的内存。管理这部分内存的结构为:
/* 每个vmalloc分配的子区间都对应于内核内存中的一个vm_struct实例 */
struct vm_struct {
struct vm_struct *next;
void *addr; /* 分配空间在虚拟地址中的起始地址 */
unsigned long size; /* 长度 */
unsigned long flags; /* 与给内存区关联的标志集合 */
struct page **pages; /* page数组,表示一个映射到虚拟地址空间中的物理内存页的page实例 */
unsigned int nr_pages; /* page的数量 */
unsigned long phys_addr; /* ioremap映射了由物理地址描述的物理内存区域时才使用 */
};
其实看待这个结构就可以猜到该怎么执行了,下面看代码:
/* size的单位是字节 */
void *vmalloc(unsigned long size)
{
/* 设置分配标志 */
return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
}
void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
{
return __vmalloc_node(size, gfp_mask, prot, -1);
}
上面的这个只是一个引子:
static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, int node)
{
struct vm_struct *area;
size = PAGE_ALIGN(size);
if (!size || (size >> PAGE_SHIFT) > num_physpages)
return NULL;
/* 参数准备 */
area = get_vm_area_node(size, VM_ALLOC, node, gfp_mask);
if (!area)
return NULL;
/* 真正的分配(在这里才进入正题) */
return __vmalloc_area_node(area, gfp_mask, prot, node);
}
根据子区间的长度信息再虚拟的vmalloc空间中找到一个适当的位置:
struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags, int node, gfp_t gfp_mask)
{
return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node, gfp_mask);
}
下面是找在合适的位置插入一个vmstruct结构的过程:
static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start, unsigned long end, int node, gfp_t gfp_mask)
{
struct vm_struct **p, *tmp, *area;
unsigned long align = 1;
unsigned long addr;
BUG_ON(in_interrupt());
/* 如果使用ioremap映射的话检查标志位 */
if (flags & VM_IOREMAP) {
int bit = fls(size);
if (bit > IOREMAP_MAX_ORDER)
bit = IOREMAP_MAX_ORDER;
else if (bit < PAGE_SHIFT)
bit = PAGE_SHIFT;
align = 1ul << bit;
}
addr = ALIGN(start, align);
size = PAGE_ALIGN(size);
if (unlikely(!size))
return NULL;
/* 如果没记错的话这是利用slab分配器来分配空间 */
area = kmalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
if (unlikely(!area))
return NULL;
/* 分配一个警戒页 */
size += PAGE_SIZE;
write_lock(&vmlist_lock);
/* 遍历vmlist的所有元素,直到找到一个合适的项 */
for (p = &vmlist; (tmp = *p) != NULL ;p = &tmp->next) {
if ((unsigned long)tmp->addr < addr) {
if((unsigned long)tmp->addr + tmp->size >= addr)
addr = ALIGN(tmp->size + (unsigned long)tmp->addr, align);
continue;
}
/* 溢出? */
if ((size + addr) < addr)
goto out;
/* 找到一个合适的位置 */
if (size + addr <= (unsigned long)tmp->addr)
goto found;
addr = ALIGN(tmp->size + (unsigned long)tmp->addr, align);
if (addr > end - size)
goto out;
}
if ((size + addr) < addr)
goto out;
if (addr > end - size)
goto out;
found:
/* 插入链表 */
area->next = *p;
*p = area;
area->flags = flags;
area->addr = (void *)addr;
area->size = size;
area->pages = NULL;
area->nr_pages = 0;
area->phys_addr = 0;
write_unlock(&vmlist_lock);
return area;
out:
write_unlock(&vmlist_lock);
kfree(area);
if (printk_ratelimit())
printk(KERN_WARNING "allocation failed: out of vmalloc space - use vmalloc=<size> to increase size.\n");
return NULL;
}
下面是分配页面的过程:
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot, int node)
{
struct page **pages;
unsigned int nr_pages, array_size, i;
/* 计算需要分配多少个页面 */
nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
array_size = (nr_pages * sizeof(struct page *));
area->nr_pages = nr_pages;
/* 如果数组大小小于page_size,那就用slab搞定,不然就用vmalloc来做 */
if (array_size > PAGE_SIZE) {
pages = __vmalloc_node(array_size, gfp_mask | __GFP_ZERO, PAGE_KERNEL, node);
area->flags |= VM_VPAGES;
} else {
pages = kmalloc_node(array_size, (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO, node);
}
area->pages = pages;
if (!area->pages) {
remove_vm_area(area->addr);
kfree(area);
return NULL;
}
/* 循环分配页面 */
for (i = 0; i < area->nr_pages; i++) {
struct page *page;
if (node < 0)
page = alloc_page(gfp_mask);
else
page = alloc_pages_node(node, gfp_mask, 0);
/* 如果分配不成功就跳到fail */
if (unlikely(!page)) {
area->nr_pages = i;
goto fail;
}
area->pages[i] = page;
}
if (map_vm_area(area, prot, &pages))
goto fail;
return area->addr;
fail:
vfree(area->addr);
return NULL;
}
虽然已经分配了页,但是还得建立关系好利用分页:
int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
{
pgd_t *pgd;
unsigned long next;
unsigned long addr = (unsigned long) area->addr;
unsigned long end = addr + area->size - PAGE_SIZE;
int err;
BUG_ON(addr >= end);
/* 找到对应的pgd? */
pgd = pgd_offset_k(addr);
do {
next = pgd_addr_end(addr, end);
/* 这个应该是一层层地调用来建立分页需要的数据 */
err = vmap_pud_range(pgd, addr, next, prot, pages);
if (err)
break;
} while (pgd++, addr = next, addr != end);
/* flush */
flush_cache_vmap((unsigned long) area->addr, end);
return err;
}
如果vmalloc分配的空间不想用了,释放的过程和上面申请的过程是一样的,遍历vmlist找到对应的项然后将其删去,然后释放掉所分配的页:
static void __vunmap(const void *addr, int deallocate_pages)
{
struct vm_struct *area;
if (!addr)
return;
if ((PAGE_SIZE-1) & (unsigned long)addr) {
printk(KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
WARN_ON(1);
return;
}
area = remove_vm_area(addr);
if (unlikely(!area)) {
printk(KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
addr);
WARN_ON(1);
return;
}
debug_check_no_locks_freed(addr, area->size);
if (deallocate_pages) {
int i;
/* 循环释放所占用的页 */
for (i = 0; i < area->nr_pages; i++) {
struct page *page = area->pages[i];
BUG_ON(!page);
__free_page(page);
}
/* 释放数组占用的空间 */
if (area->flags & VM_VPAGES)
vfree(area->pages);
else
kfree(area->pages);
}
/* 释放vm_struct */
kfree(area);
return;
}
vmalloc大概就是上面的这些吧,代码看起来还是没什么困难的。
---------------------------------
个人理解,欢迎拍砖。