/* * linux/mm/memory.c * * (C) 1991 Linus Torvalds */ /* * demand-loading started 01.12.91 - seems it is high on the list of * things wanted, and it should be easy to implement. - Linus */ /* * Ok, demand-loading was easy, shared pages a little bit tricker. Shared * pages started 02.12.91, seems to work. - Linus. * * Tested sharing by executing about 30 /bin/sh: under the old kernel it * would have taken more than the 6M I have free, but it worked well as * far as I could see. * * Also corrected some "invalidate()"s - I wasn't doing enough of them. */ #include <signal.h> #include <asm/system.h> #include <linux/sched.h> #include <linux/head.h> #include <linux/kernel.h> volatile void do_exit(long code); //内存溢出函数 static inline volatile void oom(void) { printk("out of memory\n\r"); do_exit(SIGSEGV); } #define invalidate() \ __asm__("movl %%eax,%%cr3"::"a" (0)) /* these are not to be changed without changing head.s etc */ #define LOW_MEM 0x100000 #define PAGING_MEMORY (15*1024*1024) //16M-1M 那个1M为内核使用 #define PAGING_PAGES (PAGING_MEMORY>>12) #define MAP_NR(addr) (((addr)-LOW_MEM)>>12) #define USED 100 #define CODE_SPACE(addr) ((((addr)+4095)&~4095) < \ current->start_code + current->end_code) static long HIGH_MEMORY = 0; #define copy_page(from,to) \ __asm__("cld ; rep ; movsl"::"S" (from),"D" (to),"c" (1024):"cx","di","si") static unsigned char mem_map [ PAGING_PAGES ] = {0,}; //mem-map是内存映射字节图 /* * Get physical address of first (actually last :-) free page, and mark it * used. If no free pages left, return 0. */ //获取一个空页面 并返回 actually last :-)这句话的意思是从后往前找 unsigned long get_free_page(void) { register unsigned long __res asm("ax"); //scas 功能: 把AL(字节串)或AX(字串)的内容与由DI寄存器寻址的目的串中的数据相减,结果置 //标志位,但不改变任一操作数本身.地址指针DI自动调整. //repne CX<>0 且ZF=0重复执行字符串指令 //格式: SAL OPRD1,COUNT //功能: 其中OPRD1,COUNT与指令SHL相同.本指令与SHL的功能也完全相同,这是因为逻辑左 //---- 移指令与算术左移指令所要完成的操作是一样的. 如果SAL将OPRD1的最高位移至 //CF,改变了原来的CF值,则溢出标志位OF=1,表示移位的前后的操作数不同具有倍增 //的关系.因而SAL可用于带符号数的倍增运算,SHL只能用于无符号数的倍增运 //有效地址传送指令 LEA //格式: LEA OPRD1,OPRD2 //功能: 将源操作数给出的有效地址传送到指定的的寄存器中. __asm__("std ; repne ; scasb\n\t" "jne 1f\n\t" //没有满足的?? 跳转 执行结束 "movb $1,1(%%edi)\n\t" //mov [edi+1],1; 将对应页面的内存印象比特位置一 "sall $12,%%ecx\n\t" //PAGING_PAGES*4 = 对应页面起始位置 "addl %2,%%ecx\n\t" //加上内存低端地址LOW_MEM(1M) 得到实际的物理地址 "movl %%ecx,%%edx\n\t" "movl $1024,%%ecx\n\t" "leal 4092(%%edx),%%edi\n\t" "rep ; stosl\n\t" "movl %%edx,%%eax\n" "1:" :"=a" (__res) :"0" (0),"i" (LOW_MEM),"c" (PAGING_PAGES), "D" (mem_map+PAGING_PAGES-1) /* %edi被指向最后一页内存 */ :"di","cx","dx"); return __res; } /* * Free a page of memory at physical address 'addr'. Used by * 'free_page_tables()' */ //addr对应的是物理地址 void free_page(unsigned long addr) { if (addr < LOW_MEM) return; if (addr >= HIGH_MEMORY) panic("trying to free nonexistent page"); addr -= LOW_MEM; addr >>= 12; //如果对应的映射不为零 减一返回 if (mem_map[addr]--) return; mem_map[addr]=0; panic("trying to free free page"); } /* * This function frees a continuos block of page tables, as needed * by 'exit()'. As does copy_page_tables(), this handles only 4Mb blocks. */ //释放页面连续的内存块 //from 开始的线性地址 size释放的长度 int free_page_tables(unsigned long from,unsigned long size) { unsigned long *pg_table; unsigned long * dir, nr; //位于4MB之内?? if (from & 0x3fffff) panic("free_page_tables called with wrong alignment"); if (!from) panic("Trying to free up swapper memory space"); size = (size + 0x3fffff) >> 22; //得到页目录项 dir = (unsigned long *) ((from>>20) & 0xffc); /* _pg_dir = 0 */ for ( ; size-->0 ; dir++) { //二级页表不存在?? 跳出 继续 if (!(1 & *dir)) continue; //取二级页表 并释放之 pg_table = (unsigned long *) (0xfffff000 & *dir); for (nr=0 ; nr<1024 ; nr++) { if (1 & *pg_table) free_page(0xfffff000 & *pg_table); *pg_table = 0; pg_table++; } //释放页目录表 free_page(0xfffff000 & *dir); *dir = 0; } invalidate(); return 0; } /* * Well, here is one of the most complicated functions in mm. It * copies a range of linerar addresses by copying only the pages. * Let's hope this is bug-free, 'cause this one I don't want to debug :-) * * Note! We don't copy just any chunks of memory - addresses have to * be divisible by 4Mb (one page-directory entry), as this makes the * function easier. It's used only by fork anyway. * * NOTE 2!! When from==0 we are copying kernel space for the first * fork(). Then we DONT want to copy a full page-directory entry, as * that would lead to some serious memory waste - we just copy the * first 160 pages - 640kB. Even that is more than we need, but it * doesn't take any more memory - we don't copy-on-write in the low * 1 Mb-range, so the pages can be shared with the kernel. Thus the * special case for nr=xxxx. */ //参数 from to 是线性地址 size是需要复制的内存长度 int copy_page_tables(unsigned long from,unsigned long to,long size) { unsigned long * from_page_table; unsigned long * to_page_table; unsigned long this_page; unsigned long * from_dir, * to_dir; unsigned long nr; if ((from&0x3fffff) || (to&0x3fffff)) panic("copy_page_tables called with wrong alignment"); //页表项地址 from_dir = (unsigned long *) ((from>>20) & 0xffc); /* _pg_dir = 0 */ to_dir = (unsigned long *) ((to>>20) & 0xffc); size = ((unsigned) (size+0x3fffff)) >> 22; //这里遍历的是一级页表 for( ; size-->0 ; from_dir++,to_dir++) { //目的的页表已经存在?? 停机 if (1 & *to_dir) panic("copy_page_tables: already exist"); //源页表项不存在?? 无意义 跳出 继续循环 if (!(1 & *from_dir)) continue; from_page_table = (unsigned long *) (0xfffff000 & *from_dir); if (!(to_page_table = (unsigned long *) get_free_page())) return -1; /* Out of memory, see freeing */ *to_dir = ((unsigned long) to_page_table) | 7; //from如果等于0的话说明对应的是内核段内存 内核只占0xA0长度 nr = (from==0)?0xA0:1024; //这里开始遍历二级页表 for ( ; nr-- > 0 ; from_page_table++,to_page_table++) { this_page = *from_page_table; if (!(1 & this_page)) continue; this_page &= ~2; *to_page_table = this_page; if (this_page > LOW_MEM) { *from_page_table = this_page; this_page -= LOW_MEM; this_page >>= 12; //将新加映射的页表mem_map对应的加一 mem_map[this_page]++; } } } //刷新缓冲区 invalidate(); return 0; } /* * This function puts a page in memory at the wanted address. * It returns the physical address of the page gotten, 0 if * out of memory (either when trying to access page-table or * page.) */ //do_no_page中调用这个函数 page是奉陪的主存区的某一页面的指针 //address是线性地址 //此函数容易懂 不必多说 unsigned long put_page(unsigned long page,unsigned long address) { unsigned long tmp, *page_table; /* NOTE !!! This uses the fact that _pg_dir=0 */ if (page < LOW_MEM || page >= HIGH_MEMORY) printk("Trying to put page %p at %p\n",page,address); //相应位置是否已经置位?? if (mem_map[(page-LOW_MEM)>>12] != 1) printk("mem_map disagrees with %p at %p\n",page,address); page_table = (unsigned long *) ((address>>20) & 0xffc); //该页面是否存在?? if ( (*page_table)&1) page_table = (unsigned long *) (0xfffff000 & *page_table); else { //否则重新申请一页新的内存空间 if (!(tmp=get_free_page())) return 0; *page_table = tmp|7; page_table = (unsigned long *) tmp; } page_table[(address>>12) & 0x3ff] = page | 7; /* no need for invalidate */ //为什么不要刷新缓冲区 //由于任何缺页缘故对页表做修改时 并不需要刷新cpu的变页缓冲 return page; } //取消写保护页面 //un_write protect page //输入参数 页表指针 void un_wp_page(unsigned long * table_entry) { unsigned long old_page,new_page; old_page = 0xfffff000 & *table_entry; //指定的页表只存在一个映射位?? if (old_page >= LOW_MEM && mem_map[MAP_NR(old_page)]==1) { *table_entry |= 2; //刷新缓冲区 invalidate(); return; } if (!(new_page=get_free_page())) oom(); if (old_page >= LOW_MEM) mem_map[MAP_NR(old_page)]--; *table_entry = new_page | 7; invalidate(); copy_page(old_page,new_page); } /* * This routine handles present pages, when users try to write * to a shared page. It is done by copying the page to a new address * and decrementing the shared-page counter for the old page. * * If it's in code space we exit with a segment error. */ //该函数被page.s调用 当发生写保护异常时 //该函数做到的主要是 写时复制 void do_wp_page(unsigned long error_code,unsigned long address) { #if 0 /* we cannot do this yet: the estdio library writes to code space */ /* stupid, stupid. I really want the libc.a from GNU */ if (CODE_SPACE(address)) do_exit(SIGSEGV); #endif //要想搞懂这么个长串的内存地址相关的关系 关键是要搞懂参数到底传的是什么地址类型 //是线性地址 还是物理地址 页表项或页表中的地址? // 31---22 21---12 11---0 //线性地址:页目录项(10) 页表项(10) 页内偏移值(12) //address是线性地址 对照表就好理解多了 //((address>>12)&0x3ff)<<2=(address>>10) & 0xffc) // 这样理解就可以轻松理解这个式子了 un_wp_page((unsigned long *) (((address>>10) & 0xffc) + (0xfffff000 & *((unsigned long *) ((address>>20) &0xffc))))); } //写页面认证函数 void write_verify(unsigned long address) { unsigned long page; //p位为0? if (!( (page = *((unsigned long *) ((address>>20) & 0xffc)) )&1)) //此时page为页目录项基地址 return; page &= 0xfffff000; page += ((address>>10) & 0xffc); //页表基地址加上页表项 如果用*取值那么就为页框基地址 if ((3 & *(unsigned long *) page) == 1) /* non-writeable, present */ un_wp_page((unsigned long *) page); //un_wp_page中的参数为页表项指针 为物理地址 return; } //取得一页空闲内存页 并映射到指定额线性地址处 void get_empty_page(unsigned long address) { unsigned long tmp; //如果能得到新的内存释放页并且能将线性地址映射到指定的页面内存那么 //就不会进入到free_page中 并且抛出out of memroy的操作 if (!(tmp=get_free_page()) || !put_page(tmp,address)) { free_page(tmp); /* 0 is ok - ignored */ oom(); } } /* * try_to_share() checks the page at address "address" in the task "p", * to see if it exists, and if it is clean. If so, share it with the current * task. * * NOTE! This assumes we have checked that p != current, and that they * share the same executable. */ //参数说明:address是进程中的逻辑地址 //尝试将进程address中对应的页面地址与p进行共享 //可以理解为由进程p fork出来的进程 static int try_to_share(unsigned long address, struct task_struct * p) { unsigned long from; unsigned long to; unsigned long from_page; unsigned long to_page; unsigned long phys_addr; //这里from_page为实际的页表项地址 from_page = to_page = ((address>>20) & 0xffc); //即实际地址为虚拟地址加上真实偏移值((p->start_code)>>22) from_page += ((p->start_code>>20) & 0xffc); to_page += ((current->start_code>>20) & 0xffc); /* is there a page-directory at from? */ //取得页表基地址 from = *(unsigned long *) from_page; if (!(from & 1)) return 0; from &= 0xfffff000; from_page = from + ((address>>10) & 0xffc); phys_addr = *(unsigned long *) from_page; //页表项内容 /* is the page clean and present? */ //0x41分别对应第六位(D位)第0位(p位) if ((phys_addr & 0x41) != 0x01) return 0; phys_addr &= 0xfffff000; if (phys_addr >= HIGH_MEMORY || phys_addr < LOW_MEM) return 0; to = *(unsigned long *) to_page; //如果当前进程页目录项p=0 那么将获取新的页面 if (!(to & 1)) if (to = get_free_page()) *(unsigned long *) to_page = to | 7; else oom(); //获取新页面的后的相关设置 to &= 0xfffff000; to_page = to + ((address>>10) & 0xffc); if (1 & *(unsigned long *) to_page) panic("try_to_share: to_page already exists"); /* share them: write-protect */ *(unsigned long *) from_page &= ~2; //本程序的核心就是这一句 //使当前的页表项值等于p进程的页表项的值 *(unsigned long *) to_page = *(unsigned long *) from_page; invalidate(); //对引用加一的设置 phys_addr -= LOW_MEM; phys_addr >>= 12; mem_map[phys_addr]++; return 1; } /* * share_page() tries to find a process that could share a page with * the current one. Address is the address of the wanted page relative * to the current data space. * * We first check if it is at all feasible by checking executable->i_count. * It should be >1 if there are other tasks sharing this inode. */ //共享页面处理函数 //参数说明 address是逻辑地址 //当页面发生异常时 首先看看是否有p进程与当前进程有同一样的执行文件 //若有则尝试共享 这种linux的内存节省操作是值得学习的 写的真好 static int share_page(unsigned long address) { struct task_struct ** p; //executable是一个结构体指针 对应的结构体如下 //fs.h //这是在内存中的i节点结构 /*struct m_inode { ... unsigned short i_count; //此项对应的意思是 i节点被使用过的次数 ... };*/ if (!current->executable) return 0; //没有被共享的内存返回0 //我感觉这个想的有点意思 值得学习一下 哈哈 if (current->executable->i_count < 2) return 0; //LAST_TASK的定义#define LAST_TASK task[NR_TASKS-1] //NR_TASKS表示当前linux版本中最大的进程数 为64 //开始遍历当前的进程 for (p = &LAST_TASK ; p > &FIRST_TASK ; --p) { if (!*p) continue; //当前进程就是本进程 无意义 跳出 继续循环 if (current == *p) continue; if ((*p)->executable != current->executable) continue; //尝试共享页面 if (try_to_share(address,*p)) return 1; } return 0; } //该函数为缺页调用函数 在page.s中被调用 //address是产生异常的线性地址 //此函数bug较多 不必深究(在linux0.11中) void do_no_page(unsigned long error_code,unsigned long address) { int nr[4]; unsigned long tmp; unsigned long page; int block,i; address &= 0xfffff000; tmp = address - current->start_code; //获取对应的逻辑地址 //是由执行文件i节点缺失引起的?? if (!current->executable || tmp >= current->end_data) { get_empty_page(address); return; } //开始尝试共享页面 if (share_page(tmp)) return; //否则去申请新的页面 if (!(page = get_free_page())) oom(); /* remember that 1 block is used for header */ //四个逻辑块读入到刚刚申请的页面中 并且设置他们的映射关系 block = 1 + tmp/BLOCK_SIZE; for (i=0 ; i<4 ; block++,i++) nr[i] = bmap(current->executable,block); bread_page(page,current->executable->i_dev,nr); //在读设备的逻辑块的时候 可能会出现 在执行文件中读取的页面位置可能 //不到一个页面的信息 就会读入一些无用的信息 因此需要将其置零 //此段代码在linux0.11中bug有点多 不必过分追究 i = tmp + 4096 - current->end_data; tmp = page + 4096; while (i-- > 0) { tmp--; *(char *)tmp = 0; } //将申请的page页面的地址转换为address if (put_page(page,address)) return; free_page(page); oom(); } //内存初始化函数 在main.c中调用 void mem_init(long start_mem, long end_mem) { int i; //#define PAGING_MEMORY (15*1024*1024) //#define PAGING_PAGES (PAGING_MEMORY>>12) //linus的意思是将1M以内的内存作为内核内存区 //将1M以上的15M作为普通的内存区 并且内存的页面数为15MB/4KB=3840 HIGH_MEMORY = end_mem; //先将1M-16M的地方设置为USED for (i=0 ; i<PAGING_PAGES ; i++) mem_map[i] = USED; //将内存的映射关系设置为USED=100 i = MAP_NR(start_mem); //MAP_NR=>(start_mem-1MB)>>12 end_mem -= start_mem; end_mem >>= 12; //4M-16M的地方设置内存为0 表示都没有使用 while (end_mem-->0) mem_map[i++]=0; } //内存计算函数 void calc_mem(void) { int i,j,k,free=0; long * pg_tbl; for(i=0 ; i<PAGING_PAGES ; i++) if (!mem_map[i]) free++; printk("%d pages free (of %d)\n\r",free,PAGING_PAGES); //i=0,1作为内核的使用 在这里不计算 for(i=2 ; i<1024 ; i++) { if (1&pg_dir[i]) { pg_tbl=(long *) (0xfffff000 & pg_dir[i]); //二级页表?? for(j=k=0 ; j<1024 ; j++) if (pg_tbl[j]&1) k++; printk("Pg-dir[%d] uses %d pages\n",i,k); } } }