给xv6添加mmap和munmap功能,将文件映射到进程的地址空间。
在Makefile中添加$U/_mmaptest\
在kernel/syscall.h中添加
#define SYS_mmap 22
#define SYS_munmap 23
在kernel/syscall.c中添加函数声明:
extern uint64 sys_mmap(void);
extern uint64 sys_munmap(void);
系统调用函数指针数组中添加
[SYS_mmap] sys_mmap,
[SYS_munmap] sys_munmap,
在user/user.h中添加函数接口
void *mmap(void *addr, int length, int prot, int flags, int fd, int offset);
int munmap(void* addr, int length);
struct vma
{
uint64 addr;
int length;
int permissions;
int flags;
int fd;
int valid;
struct file* f;
};
// Per-process state
struct proc {
struct spinlock lock;
struct file *ofile[NOFILE]; // Open files
struct inode *cwd; // Current directory
char name[16]; // Process name (debugging)
struct vma VMA[16];
};
首先读取函数参数,然后判断一下文件的权限和mmap的传入的权限之间的关系是否合法,提高文件的引用计数,分配对应的进程虚拟地址空间,但不分配实际的物理地址空间,并记录在进程的VMA字段中,用于后续usertrap的使用。
uint64 sys_mmap(void)
{
// read paremater
//void *mmap(void *addr, int length, int prot, int flags, int fd, int offset);
uint64 addr;
int length, prot, flags, fd, offset;
if(argaddr(0, &addr) || argint(1, &length) < 0 || argint(2, &prot) < 0 || argint(3, &flags) < 0 || argint(4, &fd) < 0 || argint(5, &offset) < 0)
return -1;
struct proc * p = myproc();
// printf("p->id = %d\n", p->pid);
struct file* f;
if(fd < 0 || fd >= NOFILE || (f = p->ofile[fd]) == 0)
return -1;
// printf("f->readable = %d\n", f->readable);
if(!f->readable) {
if((prot & PROT_READ)) return -1;
}
if(!f->writable) {
if((prot & PROT_WRITE) && (flags & MAP_SHARED)) return -1;
}
filedup(f); // 提升文件的引用计数
int found = 0;
// 分配一块区域,然后放入对应的VMA
addr = p->sz;
if(p->sz + PGROUNDDOWN(length) >= MAXVA) return -1;
// printf("debug\n");
p->sz += PGROUNDUP(length);
// 在进程的VMA表格中插入,后续用于Usertrap时读取参数,用来判断对应是哪个mmap的区间
for(int i = 0; i < 16; i++) {
if(p->VMA[i].valid == 0) // 说明找到了
{
p->VMA[i].addr = addr;
p->VMA[i].length = length;
p->VMA[i].permissions = prot;
p->VMA[i].flags = flags;
p->VMA[i].fd = fd;
p->VMA[i].f = f;
found = 1;
p->VMA[i].valid = 1;
break;
}
}
if(!found) return -1;
return addr;
}
读取发生page fault对应的虚拟地址va,根据这个va去查找进程的VMA数组,找到对应的mmap参数,申请一页物理内存,并从对应文件中读取一页的内容写入物理空间,并根据对应的读写权限,加入页表当中(添加映射)。
// kernel/fs.c
void my_readi(struct vma vma_cur, uint64 pa, uint64 offset, uint64 len)
{
ilock(vma_cur.f->ip);
// readi(ip, 0, pa, va - vma_cur.addr, PGSIZE); //从文件中读入数据给pa
readi(vma_cur.f->ip, 0, pa, offset, len); //从文件中读入数据给pa
iunlock(vma_cur.f->ip);
}
// kernel/trap.c
void
usertrap(void)
{
int which_dev = 0;
if((r_sstatus() & SSTATUS_SPP) != 0)
panic("usertrap: not from user mode");
// send interrupts and exceptions to kerneltrap(),
// since we're now in the kernel.
w_stvec((uint64)kernelvec);
struct proc *p = myproc();
// save user program counter.
p->trapframe->epc = r_sepc();
if(r_scause() == 8){
// system call
if(p->killed)
exit(-1);
// sepc points to the ecall instruction,
// but we want to return to the next instruction.
p->trapframe->epc += 4;
// an interrupt will change sstatus &c registers,
// so don't enable until done with those registers.
intr_on();
syscall();
} else if((which_dev = devintr()) != 0){
// ok
}
else if (r_scause() == 13 || r_scause() == 15)
{
uint64 va = r_stval(); // get the virtual address that caused the page fault.
// printf("page fault, va = %p\n", va);
// 首先要根据va去判断,属于哪个VMA,
struct proc* p = myproc();
struct vma vma_cur;
int found = 0;
for(int i = 0; i < 16; i++) {
if(va >= p->VMA[i].addr && va < p->VMA[i].addr + p->VMA[i].length){
vma_cur = p->VMA[i];
found = 1;
break;
}
}
if(found) {
// 分配对应的物理内存
char * pa = kalloc(); // alloc physial memory ,分配一页物理内存
if(pa == 0){ // 申请失败
p->killed = 1; // 杀死进程
}
memset(pa, 0, PGSIZE); //清空物理内存
my_readi(vma_cur, (uint64)pa, va - vma_cur.addr, PGSIZE); // 把数据读到pa中
// 物理页映射的时候要注意flag
uint64 flag = PTE_U;
if(vma_cur.permissions & (PROT_READ)) flag |= PTE_R;
if(vma_cur.permissions & (PROT_WRITE)) flag |= PTE_W;
// 注意flag
if(mappages(p->pagetable, PGROUNDDOWN(va), PGSIZE, (uint64)pa, flag) != 0){ //建立从va下取整开始一页的映射
kfree(pa); // 分配失败,释放物理内存
p->killed = 1;
}
}
else {
p->killed = 1;
}
...
如果flag为MAP_SHARE,那么对于内存的修改要写回文件当中,然后调用uvmunmap,取消所有映射,并释放物理内存,如果释放的是之前mmap的全部空间,要减少文件的引用计数。
uint64 sys_munmap(void)
{
// read paremater
// int munmap(void* addr, int length);
uint64 addr;
int length;
if(argaddr(0, &addr) || argint(1, &length) < 0)
return -1;
struct proc * p = myproc();
int found = 0;
for(int i = 0; i < 16; i++) {
if(addr >= p->VMA[i].addr && addr < p->VMA[i].addr + p->VMA[i].length) {
found = 1;
// 如果有MAP_SHARE标志,那就要写回到文件
if(p->VMA[i].flags & MAP_SHARED) {
filewrite(p->VMA[i].f, p->VMA[i].addr, p->VMA[i].length);
}
// uvmunmap
uvmunmap(p->pagetable, addr, PGROUNDDOWN(length) / PGSIZE, 1);
// 如果munmap之前的整个mmap的空间,则要减少文件的引用计数
if(addr == p->VMA[i].addr && length == p->VMA[i].length) {
fileclose(p->VMA[i].f);
p->VMA[i].valid = 0;
}
break;
}
}
if(!found) return -1;
return 0;
}
要注意修改uvmunmap函数,将panic(“uvmunmap: not mapped”)改成continue,因为此时可能有未映射的物理页。
void
uvmunmap(pagetable_t pagetable, uint64 va, uint64 npages, int do_free)
{
uint64 a;
pte_t *pte;
if((va % PGSIZE) != 0)
panic("uvmunmap: not aligned");
for(a = va; a < va + npages*PGSIZE; a += PGSIZE){
if((pte = walk(pagetable, a, 0)) == 0)
panic("uvmunmap: walk");
if((*pte & PTE_V) == 0)
panic("uvmunmap: not mapped");
continue;
// panic("uvmunmap: not mapped");
if(PTE_FLAGS(*pte) == PTE_V)
panic("uvmunmap: not a leaf");
if(do_free){
uint64 pa = PTE2PA(*pte);
kfree((void*)pa);
}
*pte = 0;
}
}
在进程退出的时候,释放之前mmap对应的空间
void
exit(int status)
{
struct proc *p = myproc();
if(p == initproc)
panic("init exiting");
// munmap add here
for(int i = 0; i < 16; i++) {
if(p->VMA[i].valid) {
// 如果有MAKE_SHARE标志,那就要写回到文件
if(p->VMA[i].flags & (MAP_SHARED)) {
filewrite(p->VMA[i].f, p->VMA[i].addr, p->VMA[i].length);
}
// uvmunmap
uvmunmap(p->pagetable, p->VMA[i].addr, PGROUNDDOWN(p->VMA[i].length) / PGSIZE, 1);
// 减少文件的引用计数
if(p->VMA[i].f)
fileclose(p->VMA[i].f);
}
}
// Close all open files.
for(int fd = 0; fd < NOFILE; fd++){
if(p->ofile[fd]){
struct file *f = p->ofile[fd];
fileclose(f);
p->ofile[fd] = 0;
}
}
...
}
拷贝父进程的VMA数组到子进程的VMA数组。
int
fork(void)
{
int i, pid;
struct proc *np;
struct proc *p = myproc();
// Allocate process.
if((np = allocproc()) == 0){
return -1;
}
// Copy user memory from parent to child.
if(uvmcopy(p->pagetable, np->pagetable, p->sz) < 0){
freeproc(np);
release(&np->lock);
return -1;
}
np->sz = p->sz;
np->parent = p;
// copy VMA form parent to child, and increase the reference count for a VMA
for(int i = 0; i < 16; i++) {
if(p->VMA[i].valid && p->VMA[i].f) {
np->VMA[i] = p->VMA[i];
filedup(np->VMA[i].f); // 提升文件的引用计数
}
}
...
}
注意要修改uvmcopy函数,将panic(“uvmcopy: page not present”)改成continue,因为此时可能存在未映射的物理页
int
uvmcopy(pagetable_t old, pagetable_t new, uint64 sz)
{
pte_t *pte;
uint64 pa, i;
uint flags;
char *mem;
for(i = 0; i < sz; i += PGSIZE){
if((pte = walk(old, i, 0)) == 0)
panic("uvmcopy: pte should exist");
if((*pte & PTE_V) == 0)
panic("uvmcopy: page not present");
continue;
// panic("uvmcopy: page not present");
pa = PTE2PA(*pte);
flags = PTE_FLAGS(*pte);
if((mem = kalloc()) == 0)
goto err;
memmove(mem, (char*)pa, PGSIZE);
if(mappages(new, i, PGSIZE, (uint64)mem, flags) != 0){
kfree(mem);
goto err;
}
}
return 0;
err:
uvmunmap(new, 0, i / PGSIZE, 1);
return -1;
}
$ make qemu-gdb
(3.8s)
== Test mmaptest: mmap f ==
mmaptest: mmap f: OK
== Test mmaptest: mmap private ==
mmaptest: mmap private: OK
== Test mmaptest: mmap read-only ==
mmaptest: mmap read-only: OK
== Test mmaptest: mmap read/write ==
mmaptest: mmap read/write: OK
== Test mmaptest: mmap dirty ==
mmaptest: mmap dirty: OK
== Test mmaptest: not-mapped unmap ==
mmaptest: not-mapped unmap: OK
== Test mmaptest: two files ==
mmaptest: two files: OK
== Test mmaptest: fork_test ==
mmaptest: fork_test: OK
== Test usertests ==
$ make qemu-gdb
usertests: OK (94.5s)
(Old xv6.out.usertests failure log removed)
== Test time ==
time: OK
Score: 140/140
[mit6.s081] 笔记 Lab10: Mmap | 文件内存映射 | Miigon’s blog
阅读了大佬的博客,发现其实自己的代码有一些小bug和改进优化的地方。
1、首先是内存分配的位置有问题,应该从高地址向下,本文使用的是从低地址开始,这样可能会和进程使用的地址空间发生冲突。(但测试过程没有这个bug,应该是因为本文修改了uvmunmap和uvmcopy,其实这导致了进程的空间(p->sz)一直在增大,而没有在munmap后减小,换成从高地址向下分配才能解决这个问题)
2、没有特判在mmap的内存中使用munmap时,传入的参数在内存中挖洞的这种错误操作(正确操作应该是内存的前一部分或后一部分),这种错误操作应该直接返回-1
3、对于munmap之前mmap空间的部分内存的情况,这种情况应该及时修改VMA数组的一些参数(start和sz)
4、对于MAP_SHARE的映射,应该在uvmunmap函数中,对PTE有D标志位的页进行判断(代表内存被修改过),并写回磁盘,而本文是粗暴的将全部空间写回磁盘(效率低)