伙伴系统是常用的内存分配算法,linux内核的底层页分配算法就是伙伴系统,伙伴系统的优点就是分配和回收速度快,减少外部碎片。算法描述:
https://en.wikipedia.org/wiki/Buddy_memory_allocation http://dysphoria.net/OperatingSystems1/4_allocation_buddy_system.html
网上也搜了大牛的实现。 云风:https://github.com/cloudwu/buddy
对云风的精简版 https://github.com/wuwenbin/buddy2
这两个版本思想一样都是维护一棵满二叉树,进行分配和回收,云风版的通过标记内存节点状态进行分配,第二个版本是保存当前内存最大的连续可用数,在某些情况下避免了无效的遍历,第二个版本也可以修改为保存最大连续内存数目的阶,内存消耗就会变小。这两个算法分配和回收复杂度都是logn,并且空闲内存必须是2^n个基本分配单位。
然后又看了一下linux4.8的buddy system实现,linux的buddy system主要进行page分配也是linux最底层的分配,其他的分配算法都是以这个分配为基础,在x86架构下一个page是4KB。linux对内存进行了分区包括低端内存区,高端内存区,dma区,而且还对numa架构做了很多处理,对页面也进行了分类,这些不是讨论的重点,现在主要是提取linux的buddy算法,只提取核心部分,可以在控制台下运行。buddy system的数据结构就是下图所示,看着像哈希表中的拉链法,每个链表保存相同大小的内存块。最大的是10,也就是1024个基本单位,所以linux在x86下一次最多可分配4MB内存。
顺便学习下linux内核对链表的各种操作,代码实现
#include
#include
#include
#include
//双向链表
struct list_head {
struct list_head *prev, *next;
};
//拉链法存储空闲内存
typedef struct free_area {
struct list_head free_list; //把空闲内存块链接到空闲链表
unsigned long nr_free; //每个链表空闲块的个数
} free_area_t;
struct page {
struct list_head page_link;
int order; //当前页面所处的阶
unsigned int size; //如过当前页面是起始页面则size表示连续的页面数,如过不是起始页面则是0
};
typedef unsigned char uint8_t ;
#define MAX_ORDER 11 //最大阶是10,也就是最大的分配单位是2^10,在此程序中一次最多分配4MB内存
#define PAGE_SIZE 4096 //基本分配单位
#define PAGE_NUMS 4096 //页面个数
uint8_t mem_size[PAGE_SIZE * PAGE_NUMS]; //需要管理的内存
struct page mem_map[PAGE_NUMS];
free_area_t free_area[MAX_ORDER];
#define PAGE_TO_PFN(page) ((unsigned int)(page - mem_map))
#define ADDR_TO_PAGE(addr) (((unsigned int)((uint8_t*)addr - mem_size)) / PAGE_SIZE)
#define PAGE_TO_ADDR(page) (unsigned int)(PAGE_TO_PFN(page) * PAGE_SIZE + mem_size)
/**
*获取成员在结构体中的相对偏移大小
*/
#define offsetof(TYPE, MEMBER) ((unsigned int)&((TYPE *)0)->MEMBER)
/**
*获取结构体地址
*/
#define container_of(ptr, type, member) ({ \
const typeof( ((type *)0)->member ) *__mptr = (ptr); \
(type *)( (char *)__mptr - offsetof(type,member) );})
/**
*初始化双向链表
*/
#define LIST_HEAD_INIT(name) { &(name), &(name) }
/**
*用宏快速定义双向链表
*/
#define LIST_HEAD(name) \
struct list_head name = LIST_HEAD_INIT(name)
static inline void INIT_LIST_HEAD(struct list_head *list)
{
list->next = list;
list->prev = list;
}
/**
*在任意两个元素之间插入一个entry
*/
static inline void __list_add(struct list_head *new,
struct list_head *prev,
struct list_head *next)
{
new->next = next;
prev->next = new;
next->prev = new;
new->prev = prev;
}
/**
*在链表头插入一个新元素,相当于头插法
*/
static inline void list_add(struct list_head *new, struct list_head *head)
{
__list_add(new, head, head->next);
}
/**
*在链表尾部插入一个新元素,相当于尾插法
*/
static inline void list_add_tail(struct list_head *new, struct list_head *head)
{
__list_add(new, head->prev, head);
}
/**
*删除节点操作,删除两个节点之间的操作
*/
static inline void __list_del(struct list_head *prev, struct list_head *next)
{
prev->next = next;
next->prev = prev;
}
/**
*删除节点
*/
static inline void __list_del_entry(struct list_head *entry)
{
__list_del(entry->prev, entry->next);
}
/**
*删除节点,并且将next和prev置空,linux内核中是指向一个指定的地址
*/
static inline void list_del_entry(struct list_head *entry)
{
__list_del_entry(entry);
entry->next = NULL;
entry->prev = NULL;
}
/**
*替换某个节点
*/
static inline void list_replace(struct list_head *old, struct list_head *new)
{
new->next = old->next;
new->prev = old->prev;
new->prev->next = new;
old->next->prev = new;
}
/**
*把其中一个链表的节点移动到另一个链表头
*/
static inline void list_move(struct list_head *list, struct list_head *head)
{
__list_del_entry(list);
list_add(list, head);
}
/**
*把其中一个链表的节点移动到另一个链表末端
*/
static inline void list_move_tail(struct list_head *list, struct list_head *head)
{
__list_del_entry(list);
list_add_tail(list, head);
}
/**
*判断链表是否是最后一个节点
*/
static inline int list_is_last(const struct list_head *list, const struct list_head *head)
{
return list->next == head;
}
/**
*判断链表是否为空
*/
static inline int list_empty(const struct list_head *head)
{
struct list_head *next = head->next;
return (next == head) && (head->prev == next);
}
/**
* 获取包含链表节点的结构体
* ptr 链表节点地址
* type 结构体类型
* member 结构体成员
*/
#define list_entry(ptr, type, member) \
container_of(ptr, type, member)
/**
* 获取链表第一个元素的结构体
* ptr = head
*/
#define list_first_entry(ptr, type, member) \
list_entry((ptr)->next, type, member)
#define list_first_entry_or_null(ptr, type, member) \
(ptr)->next == ptr ? NULL : list_first_entry(ptr, type, member)
/**
* 获取链表最后一个元素的结构体
*/
#define list_last_entry(ptr, type, member) \
list_entry((ptr)->prev, type, member)
/**
* 获取下一个元素结构体
*/
#define list_next_entry(pos, member) \
list_entry((pos)->member.next, typeof(*(pos)), member)
/**
* 获取上一个元素结构体
*/
#define list_prev_entry(pos, member) \
list_entry((pos)->member.prev, typeof(*(pos)), member)
/**
* 遍历链表
*/
#define list_for_each(pos, head) \
for(pos = (head)->next; pos != (head); pos = pos->next)
/**
* 遍历链表结构体
*/
#define list_for_each_entry(pos, head, member) \
for(pos = list_first_entry(head, typeof(*(pos)), member); \
&(pos->member) != (head); \
pos = list_next_entry(pos, member))
static inline int page_to_pfn(struct page *page)
{
return PAGE_TO_PFN(page);
}
static inline int fix_size(unsigned int size)
{
size -= 1;
size |= size >> 1;
size |= size >> 2;
size |= size >> 4;
size |= size >> 8;
size |= size >> 16;
return size + 1;
}
static inline int get_order(unsigned int size)
{
int i = 0;
while(i < 32) {
if(size & (1 << i))
return i;
i++;
}
return -1;
}
static inline unsigned int
__find_buddy_index(unsigned int page_idx, unsigned int order)
{
return page_idx ^ (1 << order);
}
static inline int page_is_buddy(struct page *buddy, int order)
{
return buddy->order == order;
}
static inline void expand(unsigned int low, unsigned int high,
struct page *page, struct free_area *area)
{
unsigned int size;
while(low < high) {
high--;
area--;
size = 1 << high;
list_add(&page[size].page_link, &area->free_list);
area->nr_free++;
page[size].order = high;
page[size].size = size;
}
}
static inline struct page *__alloc_pages(unsigned int order)
{
unsigned int current_order;
struct free_area *area;
struct page *page;
for(current_order = order; current_order < MAX_ORDER; current_order++) {
area = &(free_area[current_order]);
page = list_first_entry_or_null(&area->free_list, struct page, page_link);
if(!page)
continue;
list_del_entry(&(page->page_link));
page->order = -1;
page->size = 1 << order;
area->nr_free--;
expand(order, current_order, page, area);
return page;
}
return NULL;
}
static struct page *alloc_pages(unsigned int size)
{
struct page *page;
unsigned int alloc_size = fix_size(size);
unsigned int order = get_order(alloc_size);
if(order < 0)
assert(0);
if(order >= MAX_ORDER)
assert(0);
page = __alloc_pages(order);
if(page)
return page;
return NULL;
}
static void __free_pages(struct page *page, int order)
{
unsigned int page_idx;
unsigned int combined_idx;
unsigned int buddy_idx;
unsigned int pfn;
struct page *buddy;
pfn = page_to_pfn(page);
page->size = 0;
page_idx = pfn & ((1 << MAX_ORDER) - 1);
while(order < MAX_ORDER - 1) {
buddy_idx = __find_buddy_index(page_idx, order);
buddy = page + (buddy_idx - page_idx);
//buddy可能会越界如果有4097个页面,则idx为4096的页面的buddy越界
if(buddy > (mem_map + PAGE_NUMS - 1))
break;
if(!page_is_buddy(buddy, order))
break;
list_del_entry(&buddy->page_link); //buddy从当前链表删除
free_area[order].nr_free--; //当前链表空闲数减一
buddy->order = -1;
buddy->size = 0;
combined_idx = buddy_idx & page_idx;
page = page + (combined_idx - page_idx);
page_idx = combined_idx;
order++;
}
page->order = order;
page->size = 1 << order;
list_add(&(page->page_link), &(free_area[order].free_list));
free_area[order].nr_free++;
}
static void free_pages(struct page *page)
{
int order = get_order(page->size);
__free_pages(page, order);
}
static void buddy_free(void *addr)
{
struct page *page = &mem_map[ADDR_TO_PAGE(addr)];
free_pages(page);
}
static void *buddy_malloc(unsigned int size)
{
struct page *page = alloc_pages(size);
if(page)
return mem_size + (PAGE_TO_PFN(page) * PAGE_SIZE);
return NULL;
}
static void init_free_area()
{
int cur_order = 0;
for(; cur_order < MAX_ORDER; cur_order++)
INIT_LIST_HEAD(&(free_area[cur_order].free_list));
}
static void init_pages()
{
int i;
for(i = 0; i < PAGE_NUMS; i++) {
mem_map[i].order = -1;
mem_map[i].size = 0;
}
}
static void free_mem_to_buddy()
{
int i;
for(i = 0; i < PAGE_NUMS; i++)
__free_pages(&mem_map[i], 0);
}
static void init_mm()
{
init_free_area();
init_pages();
free_mem_to_buddy();
}
/*
* 打印空闲链表的状态
*/
static void __dump()
{
int order;
for(order = 0; order < MAX_ORDER; order++) {
struct page *page;
if(list_empty(&free_area[order].free_list)) {
printf("order: %d is NULL\n\n", order);
} else {
printf("order: %d, nr_free: %d :\n", order, free_area[order].nr_free);
list_for_each_entry(page, &free_area[order].free_list, page_link)
printf("pfn: %d, page_addr: 0x%p, mem_addr: 0x%p, size: %d\n\n",PAGE_TO_PFN(page), page, PAGE_TO_ADDR(page), page->size);
}
}
}
static void buddy_test()
{
uint8_t *p1 = buddy_malloc(1);
assert(free_area[MAX_ORDER - 1].nr_free == 3 && free_area[0].nr_free == 1);
__dump();
buddy_free(p1);
__dump();
assert(free_area[MAX_ORDER - 1].nr_free == 4 && free_area[0].nr_free == 0);
uint8_t *p2 = buddy_malloc(1024);
uint8_t *p3 = buddy_malloc(1024);
uint8_t *p4 = buddy_malloc(2);
assert(p2 == p1);
assert(free_area[MAX_ORDER - 1].nr_free == 1);
assert(free_area[0].nr_free == 0);
buddy_free(p2);
buddy_free(p4);
uint8_t *p5 = buddy_malloc(1024);
uint8_t *p6 = buddy_malloc(1);
assert(p5 == p4);
assert(p6 == p2);
buddy_free(p5);
buddy_free(p6);
assert(free_area[MAX_ORDER - 1].nr_free == 3);
buddy_free(p3);
assert(free_area[MAX_ORDER - 1].nr_free == 4 && free_area[0].nr_free == 0);
p1 = buddy_malloc(3);
p2 = buddy_malloc(4);
assert(p1 + PAGE_SIZE * 4 == p2);
p3 = buddy_malloc(16);
assert(p2 + PAGE_SIZE * 12 == p3);
p4 = buddy_malloc(16);
buddy_free(p3);
buddy_free(p4);
assert(free_area[4].nr_free == 1);
p3 = buddy_malloc(512);
p4 = buddy_malloc(1);
buddy_free(p1);
p1 = buddy_malloc(1);
assert(p1 == p4 + PAGE_SIZE);
buddy_free(p1);
buddy_free(p3);
buddy_free(p4);
buddy_free(p2);
assert(free_area[MAX_ORDER - 1].nr_free == 4);
}
int main()
{
init_mm();
buddy_test();
return 0;
}