水平有限,描述不当之处还请之处,转载请注明出处http://blog.csdn.net/vanbreaker/article/details/7700338
Slub分配器创建缓存的过程和Slab差不多,主要的区别在于Slub分配器并不是直接创建一个新的缓存,而是先试图在已有的缓存中找到一个各方面属性和待创建缓存差不多的缓存,如果能找到的话则不会去创建新缓存,而是复用这个已有的缓存,以提高缓存的利用率。
struct kmem_cache *kmem_cache_create(const char *name, size_t size,
size_t align, unsigned long flags, void (*ctor)(void *))
{
struct kmem_cache *s;
if (WARN_ON(!name))
return NULL;
down_write(&slub_lock);
/*先试图从已有的缓存中找到一个可以满足要求的进行复用,这样就不用创建新的缓存了*/
s = find_mergeable(size, align, flags, name, ctor);
if (s) {//找到了可以复用的缓存
int cpu;
/*缓存的引用计数加1,表示缓存中多了一种对象*/
s->refcount++;
/*
* Adjust the object sizes so that we clear
* the complete object on kzalloc.
*/
/*在新对象大小和原有对象大小中取较大者作为缓存的对象大小*/
s->objsize = max(s->objsize, (int)size);
/*
* And then we need to update the object size in the
* per cpu structures
*/
for_each_online_cpu(cpu)//更新每CPU结构中的对象大小值
get_cpu_slab(s, cpu)->objsize = s->objsize;
s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
up_write(&slub_lock);
if (sysfs_slab_alias(s, name)) {
down_write(&slub_lock);
s->refcount--;
up_write(&slub_lock);
goto err;
}
return s;
}
/*没有找到可以复用的缓存,下面的步骤用来新创建一个缓存*/
/*分配kmem_size大小用来存储s*/
s = kmalloc(kmem_size, GFP_KERNEL);
if (s) {
/*根据参数设置s中的各个变量完成初始化*/
if (kmem_cache_open(s, GFP_KERNEL, name,
size, align, flags, ctor)) {
list_add(&s->list, &slab_caches);//将缓存添加到slab_caches中
up_write(&slub_lock);
if (sysfs_slab_add(s)) {//将缓存添加到sysfs
down_write(&slub_lock);
list_del(&s->list);
up_write(&slub_lock);
kfree(s);
goto err;
}
return s;
}
kfree(s);
}
up_write(&slub_lock);
err:
if (flags & SLAB_PANIC)
panic("Cannot create slabcache %s\n", name);
else
s = NULL;
return s;
}
find_mergeable()用来寻找一个可以复用的缓存,它会考察已有缓存和带创建的缓存在各方面是否匹配
static struct kmem_cache *find_mergeable(size_t size,
size_t align, unsigned long flags, const char *name,
void (*ctor)(void *))
{
struct kmem_cache *s;
/*设置了slub_nomerge或待创建的缓存的标识位中标明了不进行复用,则不查找复用缓存*/
if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
return NULL;
/*待创建缓存指定了构造函数,则不查找复用缓存*/
if (ctor)
return NULL;
/*确定待创建缓存的各个属性*/
size = ALIGN(size, sizeof(void *));
align = calculate_alignment(flags, align, size);
size = ALIGN(size, align);
flags = kmem_cache_flags(size, flags, name, NULL);
/*遍历slab_caches中的缓存*/
list_for_each_entry(s, &slab_caches, list) {
/*缓存不允许复用则放弃该缓存*/
if (slab_unmergeable(s))
continue;
/*缓存的对象占用内存小于要求的size,则放弃该缓存*/
if (size > s->size)
continue;
/*相关标识不一致,放弃该缓存*/
if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
continue;
/*
* Check if alignment is compatible.
* Courtesy of Adrian Drzewiecki
*/
/*缓存给对象分配的内存大小和对齐值不相符,则放弃该缓存*/
if ((s->size & ~(align - 1)) != s->size)
continue;
/*缓存给对象分配的内存大小比指定的size要多出1个字长以上,则放弃该缓存*/
if (s->size - size >= sizeof(void *))
continue;
return s;//以上条件都不满足则表示找到了符合条件的缓存,可以进行复用,返回之
}
return NULL;
}
再来看一个kmem_cache_create()中比较关键的函数--kmem_cache_open()
static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, const char *name, size_t size, size_t align, unsigned long flags, void (*ctor)(void *)) { memset(s, 0, kmem_size); /*设置各个变量*/ s->name = name; s->ctor = ctor; s->objsize = size; s->align = align; s->flags = kmem_cache_flags(size, flags, name, ctor); /*计算和大小相关的各项数值*/ if (!calculate_sizes(s, -1)) goto error; if (disable_higher_order_debug) { /* * Disable debugging flags that store metadata if the min slab * order increased. */ if (get_order(s->size) > get_order(s->objsize)) { s->flags &= ~DEBUG_METADATA_FLAGS; s->offset = 0; if (!calculate_sizes(s, -1)) goto error; } } /* * The larger the object size is, the more pages we want on the partial * list to avoid pounding the page allocator excessively. */ /*根据对象大小计算partial slab链表中slab的最小数目,size越大,slab数目越大, 以免过度使用页框分配器*/ set_min_partial(s, ilog2(s->size)); s->refcount = 1;//对象种类数为1 #ifdef CONFIG_NUMA s->remote_node_defrag_ratio = 1000; #endif /*初始化struct kmem_cache_node,对于NUMA架构要先分配kmem_cache_node*/ if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) goto error; /*初始化struct kmem_cache_cpu,对于SMP系统要先分配kmem_cache_cpu*/ if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA)) return 1; free_kmem_cache_nodes(s); error: if (flags & SLAB_PANIC) panic("Cannot create slab %s size=%lu realsize=%u " "order=%u offset=%u flags=%lx\n", s->name, (unsigned long)size, s->size, oo_order(s->oo), s->offset, flags); return 0; }
static int calculate_sizes(struct kmem_cache *s, int forced_order) { unsigned long flags = s->flags; unsigned long size = s->objsize; unsigned long align = s->align; int order; /* * Round up object size to the next word boundary. We can only * place the free pointer at word boundaries and this determines * the possible location of the free pointer. */ //先将size按照字长进行对齐,以便访问空闲指针 size = ALIGN(size, sizeof(void *)); #ifdef CONFIG_SLUB_DEBUG /* * Determine if we can poison the object itself. If the user of * the slab may touch the object after free or before allocation * then we should never poison the object itself. */ if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) && !s->ctor) s->flags |= __OBJECT_POISON; else s->flags &= ~__OBJECT_POISON; /* * If we are Redzoning then check if there is some space between the * end of the object and the free pointer. If not then add an * additional word to have some bytes to store Redzone information. */ if ((flags & SLAB_RED_ZONE) && size == s->objsize) size += sizeof(void *); #endif /* * With that we have determined the number of bytes in actual use * by the object. This is the potential offset to the free pointer. */ s->inuse = size;//将inuse设置为size if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) || s->ctor)) { /* * Relocate free pointer after the object if it is not * permitted to overwrite the first word of the object on * kmem_cache_free. * * This is the case if we do RCU, have a constructor or * destructor or are poisoning the objects. */ /*将offset设置为size,这样对象的后面就用来存放下一个空闲对象的指针*/ s->offset = size; size += sizeof(void *); } #ifdef CONFIG_SLUB_DEBUG if (flags & SLAB_STORE_USER) /* * Need to store information about allocs and frees after * the object. */ size += 2 * sizeof(struct track); if (flags & SLAB_RED_ZONE) /* * Add some empty padding so that we can catch * overwrites from earlier objects rather than let * tracking information or the free pointer be * corrupted if a user writes before the start * of the object. */ size += sizeof(void *); #endif /* * Determine the alignment based on various parameters that the * user specified and the dynamic determination of cache line size * on bootup. */ /*计算对齐值*/ align = calculate_alignment(flags, align, s->objsize); s->align = align; /* * SLUB stores one object immediately after another beginning from * offset 0. In order to align the objects we have to simply size * each object to conform to the alignment. */ /*根据计算出的对齐值重新进行对齐*/ size = ALIGN(size, align); s->size = size; /*如果指定了分配页面的阶数的话则将其作为缓存的页面分配阶数*/ if (forced_order >= 0) order = forced_order; else/*否则的话要根据size进行计算*/ order = calculate_order(size); if (order < 0) return 0; s->allocflags = 0; if (order) s->allocflags |= __GFP_COMP; if (s->flags & SLAB_CACHE_DMA) s->allocflags |= SLUB_DMA; if (s->flags & SLAB_RECLAIM_ACCOUNT) s->allocflags |= __GFP_RECLAIMABLE; /* * Determine the number of objects per slab */ /*用oo保存slab的页框阶数和对象数*/ s->oo = oo_make(order, size); /*min保存了slab只有一个对象时对应的order和此时可以存放的对象数*/ s->min = oo_make(get_order(size), size); if (oo_objects(s->oo) > oo_objects(s->max)) s->max = s->oo; return !!oo_objects(s->oo); }
static inline int calculate_order(int size) { int order; int min_objects; int fraction; int max_objects; /* * Attempt to find best configuration for a slab. This * works by first attempting to generate a layout with * the best configuration and backing off gradually. * * First we reduce the acceptable waste in a slab. Then * we reduce the minimum objects required in a slab. */ /*slub_min_objects全局变量指定了slab中的最小对象数*/ min_objects = slub_min_objects; /*没有指定slab中的最小对象数则根据系统的CPU数进行计算*/ if (!min_objects) min_objects = 4 * (fls(nr_cpu_ids) + 1); /*slub_max_order全局变量指定了一个slab可以占用的最大页框阶数,根据该值 计算出slab中的最大对象数*/ max_objects = (PAGE_SIZE << slub_max_order)/size; min_objects = min(min_objects, max_objects); /*两个while循环嵌套用来计算slab的阶数,当无法找到满足条件的order时, 内循环用来减小fraction的值以放宽对碎片的要求,外循环用来减小min_objects以放宽对slab的 最小对象数的要求*/ while (min_objects > 1) { fraction = 16;/*fraction用来衡量对碎片的要求标准,该值越大,则允许的碎片越少!*/ while (fraction >= 4) {//fraction不能小于4 /*计算slab的页框阶数*/ order = slab_order(size, min_objects, slub_max_order, fraction); /*阶数不大于slub_max_order,则符合要求*/ if (order <= slub_max_order) return order; /*否则降低fraction,重新计算*/ fraction /= 2; } min_objects--; } /* * We were unable to place multiple objects in a slab. Now * lets see if we can place a single object there. */ /*经过前面的步骤无法找到满足要求的order,那么只能选择存放1个对象,并且忽略碎片*/ order = slab_order(size, 1, slub_max_order, 1); if (order <= slub_max_order) return order; /* * Doh this slab cannot be placed using slub_max_order. */ /*还不行的话,则将slab的order上限提升至MAX_ORDER,对应伙伴系统的最高分配阶*/ order = slab_order(size, 1, MAX_ORDER, 1); if (order < MAX_ORDER) return order; return -ENOSYS; }
static inline int slab_order(int size, int min_objects, int max_order, int fract_leftover) { int order; int rem; int min_order = slub_min_order; /*MAX_OBJS_PER_PAGE定义了一个页面存储的最大对象数(65535),如果根据min_order和size计算出来的 对象数超过了该值,则对象数取MAX_OBJS_PER_PAGE,再计算阶数*/ if ((PAGE_SIZE << min_order) / size > MAX_OBJS_PER_PAGE) return get_order(size * MAX_OBJS_PER_PAGE) - 1; /*从最小阶数开始向最高阶数遍历*/ for (order = max(min_order, fls(min_objects * size - 1) - PAGE_SHIFT); order <= max_order; order++) { unsigned long slab_size = PAGE_SIZE << order;//计算出该阶数的slab大小 /*slab大小小于根据最小对象数和size计算出来的slab大小,不符合要求*/ if (slab_size < min_objects * size) continue; rem = slab_size % size;//计算出碎片大小 /*满足此关系式表示碎片在要求范围内,则该order可以满足要求, fract_leftover越小,则对碎片的要求越宽松*/ if (rem <= slab_size / fract_leftover) break; } return order; }
至此,关于新缓存各项属性的计算工作已完成,接下来只需在kmem_cache_open()中调用init_kmem_cache_nodes()和alloc_kmem_cache_cpus()来初始化struct kmem_cache_node和struct kmem_cache_cpu结构即可,对于UMA系统,这两种结构都是直接在struct kmem_cache中定义的,因此只需直接初始化即可,而对于NUMA系统,还需为这些对象在普通缓存中申请空间,过程较为繁琐,在此不做介绍了!