/*
* malloc.c --- a general purpose kernel memory allocator for Linux.
*
* Written by Theodore Ts'o (tytso@mit.edu), 11/29/91
*
* This routine is written to be as fast as possible, so that it
* can be called from the interrupt level.
*
* Limitations: maximum size of memory we can allocate using this routine
*
is 4k, the size of a page in Linux.
*
* The general game plan is that each page (called a bucket) will only hold
* objects of a given size. When all of the object on a page are released,
* the page can be returned to the general free pool. When malloc() is
* called, it looks for the smallest bucket size which will fulfill its
* request, and allocate a piece of memory from that bucket pool.
*
* Each bucket has as its control block a bucket descriptor which keeps
* track of how many objects are in use on that page, and the free list
* for that page. Like the buckets themselves, bucket descriptors are
* stored on pages requested from get_free_page(). However, unlike buckets,
* pages devoted to bucket descriptor pages are never released back to the
* system. Fortunately, a system should probably only need 1 or 2 bucket
* descriptor pages, since a page can hold 256 bucket descriptors (which
* corresponds to 1 megabyte worth of bucket pages.) If the kernel is using
* that much allocated memory, it's probably doing something wrong. :-)
*
* Note: malloc() and free() both call get_free_page() and free_page()
*
in sections of code where interrupts are turned off, to allow
*
malloc() and free() to be safely called from an interrupt routine.
*
(We will probably need this functionality when networking code,
*
particularily things like NFS, is added to Linux.) However, this
*
presumes that get_free_page() and free_page() are interrupt-level
*
safe, which they may not be once paging is added. If this is the
*
case, we will need to modify malloc() to keep a few unused pages
*
"pre-allocated" so that it can safely draw upon those pages if
*
it is called from an interrupt routine.
*
*
Another concern is that get_free_page() should not sleep; if it
*
does, the code is carefully ordered so as to avoid any race
*
conditions. The catch is that if malloc() is called re-entrantly,
*
there is a chance that unecessary pages will be grabbed from the
*
system. Except for the pages for the bucket descriptor page, the
*
extra pages will eventually get released back to the system, though,
*
so it isn't all that bad.
*/
#include <linux/kernel.h>
#include <linux/mm.h>
#include <asm/system.h>
// 桶描述符.
struct bucket_desc {
/* 16 bytes */
void
*page; // 记录了桶中内存的页面地址(对应page的首地址)
struct bucket_desc
*next; // 记录了下一个同类型的桶描述符
void
*freeptr; // 当前桶中空闲的obj的地址
unsigned short
refcnt;// 当前桶中分配出去的obj的数量. 一个桶最多分配 PAGE_SIZE/bucket_size 个obj.
unsigned short
bucket_size; // 桶中元素的大小. 每个分配出去的obj均具有bucket_size的大小.
};
// 不同类型的桶索引
struct _bucket_dir {
/* 8 bytes */
int
size; // 当前索引记录桶元素bucket_size = size的桶
struct bucket_desc
*chain; // 首个桶描述符
};
/*
* The following is the where we store a pointer to the first bucket
* descriptor for a given size.
*
* If it turns out that the Linux kernel allocates a lot of objects of a
* specific size, then we may want to add that specific size to this list,
* since that will allow the memory to be allocated more efficiently.
* However, since an entire page must be dedicated to each specific size
* on this list, some amount of temperance must be exercised here.
*
* Note that this list *must* be kept in order.
*/
// 不同类型的桶索引数组, 全局变量.
struct _bucket_dir bucket_dir[] = {
{ 16,
(struct bucket_desc *) 0},
{ 32,
(struct bucket_desc *) 0},
{ 64,
(struct bucket_desc *) 0},
{ 128,
(struct bucket_desc *) 0},
{ 256,
(struct bucket_desc *) 0},
{ 512,
(struct bucket_desc *) 0},
{ 1024,
(struct bucket_desc *) 0},
{ 2048, (struct bucket_desc *) 0},
{ 4096, (struct bucket_desc *) 0},
{ 0, (struct bucket_desc *) 0}}; /* End of list marker */
/*
* This contains a linked list of free bucket descriptor blocks
*/
// 空闲的桶描述符指针. 全局变量. 指向一个空闲的描述符链表.
struct bucket_desc *free_bucket_desc = (struct bucket_desc *) 0;
/*
* This routine initializes a bucket description page.
*/
static inline void init_bucket_desc()
{
struct bucket_desc *bdesc, *first;
int i;
// 申请一页内存来用作桶描述符
first = bdesc = (struct bucket_desc *) get_free_page();
if (!bdesc)
panic("Out of memory in init_bucket_desc()");
// 将空闲描述符首尾相连
for (i = PAGE_SIZE/sizeof(struct bucket_desc); i > 1; i--) {
bdesc->next = bdesc+1;
bdesc++;
}
/*
* This is done last, to avoid race conditions in case
* get_free_page() sleeps and this routine gets called again....
*/
// 把新申请的桶描述符挂在空闲桶描述符链表上
bdesc->next = free_bucket_desc;
free_bucket_desc = first;
}
void *malloc(unsigned int len)
{
struct _bucket_dir *bdir;
struct bucket_desc *bdesc;
void *retval;
/*
* First we search the bucket_dir to find the right bucket change
* for this request.
*/
// 根据len大小,找到一个best-fit最佳大小的桶索引
for (bdir = bucket_dir; bdir->size; bdir++)
if (bdir->size >= len)
break;
if (!bdir->size) {
printk("malloc called with impossibly large argument (%d)\n",
len);
panic("malloc: bad arg");
}
/*
* Now we search for a bucket descriptor which has free space
*/
cli(); /* Avoid race conditions */
// 找到桶索引了, 继续找该索引下挂载的一桶链表,看哪个桶中有空闲的obj
for (bdesc = bdir->chain; bdesc; bdesc = bdesc->next)
if (bdesc->freeptr)
break;
/*
* If we didn't find a bucket with free space, then we'll
* allocate a new one.
*/
if (!bdesc) {
char *cp;
int i;
// 从空闲桶描述符链表中摘下一个
if (!free_bucket_desc)
init_bucket_desc();
bdesc = free_bucket_desc;
free_bucket_desc = bdesc->next;
// 初始化桶描述符
bdesc->refcnt = 0;
bdesc->bucket_size = bdir->size; // 该桶中的buf的obj大小固定为bdir->size
// 新分配一页
// freeptr 指向第一个obj
bdesc->page = bdesc->freeptr = (void *) cp = get_free_page();
if (!cp)
panic("Out of memory in kernel malloc()");
/* Set up the chain of free objects */
// 这里又是非常具有技巧性的代码
for (i=PAGE_SIZE/bdir->size; i > 1; i--) {
*((char **) cp) = cp + bdir->size; // 将当前obj的开头(cp指向的内存)指向下一个obj的地址(cp+bdir->size)
cp += bdir->size; // cp指向下一个obj
}
// 最后一个obj的的开头指向0. 后面没obj了.
*((char **) cp) = 0;
// 把该桶描述符挂载具有相同size的桶描述符链表里.
bdesc->next = bdir->chain; /* OK, link it in! */
bdir->chain = bdesc;
}
// 找到了. 把freeptr指向的obj分配出去,
retval = (void *) bdesc->freeptr;
// 从空闲obj链表上取下,freeptr指向obj指向的下一个空闲obj
bdesc->freeptr = *((void **) retval);
bdesc->refcnt++;
sti(); /* OK, we're safe again */
return(retval);
}
/*
* Here is the free routine. If you know the size of the object that you
* are freeing, then free_s() will use that information to speed up the
* search for the bucket descriptor.
*
* We will #define a macro so that "free(x)" is becomes "free_s(x, 0)"
*/
void free_s(void *obj, int size)
{
void *page;
struct _bucket_dir *bdir;
struct bucket_desc *bdesc, *prev;
/* Calculate what page this object lives in */
// 得到obj所在的页面起始地址
page = (void *) ((unsigned long) obj & 0xfffff000);
/* Now search the buckets looking for that page */
for (bdir = bucket_dir; bdir->size; bdir++) {
prev = 0;
/* If size is zero then this conditional is always false */
if (bdir->size < size)
continue;
// 找到了obj的大小所在桶索引
for (bdesc = bdir->chain; bdesc; bdesc = bdesc->next) {
// 根据页面地址确定落在了哪个桶里
if (bdesc->page == page)
goto found;
prev = bdesc;
}
}
panic("Bad address passed to kernel free_s()");
found:
cli(); /* To avoid race conditions */
// 此时已经找到了obj所在的桶描述符,归还obj到桶的空闲obj链表中
// 把obj挂载在空闲链表的头部
*((void **)obj) = bdesc->freeptr;
bdesc->freeptr = obj;
bdesc->refcnt--;
// 如果当前桶中的page已经全部空闲. 没有分配出去的obj了. 那么释放page吧
if (bdesc->refcnt == 0) {
/*
* We need to make sure that prev is still accurate. It
* may not be, if someone rudely interrupted us....
*/
// 保险起见,这里有做了一次prev的检查, 如果有问题, 则重新计算prev
// 因为prev是在关中断cli()之前就做的,所以,有可能被改变.
// 这里在做一次检查,如果对的,那么就简单了,直接跳过.
if ((prev && (prev->next != bdesc)) ||
(!prev && (bdir->chain != bdesc)))
for (prev = bdir->chain; prev; prev = prev->next)
if (prev->next == bdesc)
break;
// 把当前桶描述符从中桶链表中去掉
if (prev)
prev->next = bdesc->next;
else {
if (bdir->chain != bdesc)
panic("malloc bucket chains corrupted");
bdir->chain = bdesc->next;
}
// 释放桶申请的buf
free_page((unsigned long) bdesc->page);
// 把桶描述符加入到空闲桶描述符链表中.
bdesc->next = free_bucket_desc;
free_bucket_desc = bdesc;
}
sti();
return;
}