* malloc.c --- a general purpose kernel memory allocator for Linux.
* Written by Theodore Ts'o (tytso@mit.edu), 11/29/91
* This routine is written to be as fast as possible, so that it
* can be called from the interrupt level.
* Limitations: maximum size of memory we can allocate using this routine
is 4k, the size of a page in Linux.
* The general game plan is that each page (called a bucket) will only hold
* objects of a given size. When all of the object on a page are released,
* the page can be returned to the general free pool. When malloc() is
* called, it looks for the smallest bucket size which will fulfill its
* request, and allocate a piece of memory from that bucket pool.
* Each bucket has as its control block a bucket descriptor which keeps
* track of how many objects are in use on that page, and the free list
* for that page. Like the buckets themselves, bucket descriptors are
* stored on pages requested from get_free_page(). However, unlike buckets,
* pages devoted to bucket descriptor pages are never released back to the
* system. Fortunately, a system should probably only need 1 or 2 bucket
* descriptor pages, since a page can hold 256 bucket descriptors (which
* corresponds to 1 megabyte worth of bucket pages.) If the kernel is using
* that much allocated memory, it's probably doing something wrong. :-)
* Note: malloc() and free() both call get_free_page() and free_page()
in sections of code where interrupts are turned off, to allow
malloc() and free() to be safely called from an interrupt routine.
(We will probably need this functionality when networking code,
particularily things like NFS, is added to Linux.) However, this
presumes that get_free_page() and free_page() are interrupt-level
safe, which they may not be once paging is added. If this is the
case, we will need to modify malloc() to keep a few unused pages
"pre-allocated" so that it can safely draw upon those pages if
it is called from an interrupt routine.
Another concern is that get_free_page() should not sleep; if it
does, the code is carefully ordered so as to avoid any race
conditions. The catch is that if malloc() is called re-entrantly,
there is a chance that unecessary pages will be grabbed from the
system. Except for the pages for the bucket descriptor page, the
extra pages will eventually get released back to the system, though,
so it isn't all that bad.
#include <linux/kernel.h>
#include <linux/mm.h>
#include <asm/system.h>
// 桶描述符.
struct bucket_desc {
/* 16 bytes */
*page; // 记录了桶中内存的页面地址(对应page的首地址)
struct bucket_desc
*next; // 记录了下一个同类型的桶描述符
*freeptr; // 当前桶中空闲的obj的地址
unsigned short
refcnt;// 当前桶中分配出去的obj的数量. 一个桶最多分配 PAGE_SIZE/bucket_size 个obj.
unsigned short
bucket_size; // 桶中元素的大小. 每个分配出去的obj均具有bucket_size的大小.
// 不同类型的桶索引
struct _bucket_dir {
/* 8 bytes */
size; // 当前索引记录桶元素bucket_size = size的桶
struct bucket_desc
*chain; // 首个桶描述符
* The following is the where we store a pointer to the first bucket
* descriptor for a given size.
* If it turns out that the Linux kernel allocates a lot of objects of a
* specific size, then we may want to add that specific size to this list,
* since that will allow the memory to be allocated more efficiently.
* However, since an entire page must be dedicated to each specific size
* on this list, some amount of temperance must be exercised here.
* Note that this list *must* be kept in order.
// 不同类型的桶索引数组, 全局变量.
struct _bucket_dir bucket_dir[] = {
{ 16,
(struct bucket_desc *) 0},
{ 32,
(struct bucket_desc *) 0},
{ 64,
(struct bucket_desc *) 0},
{ 128,
(struct bucket_desc *) 0},
{ 256,
(struct bucket_desc *) 0},
{ 512,
(struct bucket_desc *) 0},
{ 1024,
(struct bucket_desc *) 0},
{ 2048, (struct bucket_desc *) 0},
{ 4096, (struct bucket_desc *) 0},
{ 0, (struct bucket_desc *) 0}}; /* End of list marker */
* This contains a linked list of free bucket descriptor blocks
// 空闲的桶描述符指针. 全局变量. 指向一个空闲的描述符链表.
struct bucket_desc *free_bucket_desc = (struct bucket_desc *) 0;
* This routine initializes a bucket description page.
static inline void init_bucket_desc()
struct bucket_desc *bdesc, *first;
int i;
// 申请一页内存来用作桶描述符
first = bdesc = (struct bucket_desc *) get_free_page();
if (!bdesc)
panic("Out of memory in init_bucket_desc()");
// 将空闲描述符首尾相连
for (i = PAGE_SIZE/sizeof(struct bucket_desc); i > 1; i--) {
bdesc->next = bdesc+1;
* This is done last, to avoid race conditions in case
* get_free_page() sleeps and this routine gets called again....
// 把新申请的桶描述符挂在空闲桶描述符链表上
bdesc->next = free_bucket_desc;
free_bucket_desc = first;
void *malloc(unsigned int len)
struct _bucket_dir *bdir;
struct bucket_desc *bdesc;
void *retval;
* First we search the bucket_dir to find the right bucket change
* for this request.
// 根据len大小,找到一个best-fit最佳大小的桶索引
for (bdir = bucket_dir; bdir->size; bdir++)
if (bdir->size >= len)
if (!bdir->size) {
printk("malloc called with impossibly large argument (%d)\n",
panic("malloc: bad arg");
* Now we search for a bucket descriptor which has free space
cli(); /* Avoid race conditions */
// 找到桶索引了, 继续找该索引下挂载的一桶链表,看哪个桶中有空闲的obj
for (bdesc = bdir->chain; bdesc; bdesc = bdesc->next)
if (bdesc->freeptr)
* If we didn't find a bucket with free space, then we'll
* allocate a new one.
if (!bdesc) {
char *cp;
int i;
// 从空闲桶描述符链表中摘下一个
if (!free_bucket_desc)
bdesc = free_bucket_desc;
free_bucket_desc = bdesc->next;
// 初始化桶描述符
bdesc->refcnt = 0;
bdesc->bucket_size = bdir->size; // 该桶中的buf的obj大小固定为bdir->size
// 新分配一页
// freeptr 指向第一个obj
bdesc->page = bdesc->freeptr = (void *) cp = get_free_page();
if (!cp)
panic("Out of memory in kernel malloc()");
/* Set up the chain of free objects */
// 这里又是非常具有技巧性的代码
for (i=PAGE_SIZE/bdir->size; i > 1; i--) {
*((char **) cp) = cp + bdir->size; // 将当前obj的开头(cp指向的内存)指向下一个obj的地址(cp+bdir->size)
cp += bdir->size; // cp指向下一个obj
// 最后一个obj的的开头指向0. 后面没obj了.
*((char **) cp) = 0;
// 把该桶描述符挂载具有相同size的桶描述符链表里.
bdesc->next = bdir->chain; /* OK, link it in! */
bdir->chain = bdesc;
// 找到了. 把freeptr指向的obj分配出去,
retval = (void *) bdesc->freeptr;
// 从空闲obj链表上取下,freeptr指向obj指向的下一个空闲obj
bdesc->freeptr = *((void **) retval);
sti(); /* OK, we're safe again */
* Here is the free routine. If you know the size of the object that you
* are freeing, then free_s() will use that information to speed up the
* search for the bucket descriptor.
* We will #define a macro so that "free(x)" is becomes "free_s(x, 0)"
void free_s(void *obj, int size)
void *page;
struct _bucket_dir *bdir;
struct bucket_desc *bdesc, *prev;
/* Calculate what page this object lives in */
// 得到obj所在的页面起始地址
page = (void *) ((unsigned long) obj & 0xfffff000);
/* Now search the buckets looking for that page */
for (bdir = bucket_dir; bdir->size; bdir++) {
prev = 0;
/* If size is zero then this conditional is always false */
if (bdir->size < size)
// 找到了obj的大小所在桶索引
for (bdesc = bdir->chain; bdesc; bdesc = bdesc->next) {
// 根据页面地址确定落在了哪个桶里
if (bdesc->page == page)
goto found;
prev = bdesc;
panic("Bad address passed to kernel free_s()");
cli(); /* To avoid race conditions */
// 此时已经找到了obj所在的桶描述符,归还obj到桶的空闲obj链表中
// 把obj挂载在空闲链表的头部
*((void **)obj) = bdesc->freeptr;
bdesc->freeptr = obj;
// 如果当前桶中的page已经全部空闲. 没有分配出去的obj了. 那么释放page吧
if (bdesc->refcnt == 0) {
* We need to make sure that prev is still accurate. It
* may not be, if someone rudely interrupted us....
// 保险起见,这里有做了一次prev的检查, 如果有问题, 则重新计算prev
// 因为prev是在关中断cli()之前就做的,所以,有可能被改变.
// 这里在做一次检查,如果对的,那么就简单了,直接跳过.
if ((prev && (prev->next != bdesc)) ||
(!prev && (bdir->chain != bdesc)))
for (prev = bdir->chain; prev; prev = prev->next)
if (prev->next == bdesc)
// 把当前桶描述符从中桶链表中去掉
if (prev)
prev->next = bdesc->next;
else {
if (bdir->chain != bdesc)
panic("malloc bucket chains corrupted");
bdir->chain = bdesc->next;
// 释放桶申请的buf
free_page((unsigned long) bdesc->page);
// 把桶描述符加入到空闲桶描述符链表中.
bdesc->next = free_bucket_desc;
free_bucket_desc = bdesc;