http://blog.csdn.net/zhuriyuxiao/article/details/8968781
http://www.tuicool.com/articles/NjY3uu
2. KVM API
简单的API例子
http://smilejay.com/2013/03/use-kvm-api/
hejie 同学的《使用KVM API实现Emulator Demo》
http://soulxu.github.io/blog/2014/08/11/use-kvm-api-write-emulator/
3. wenyi 同学的 《KVM 内存虚拟化及其实现》
http://www.ibm.com/developerworks/cn/linux/l-cn-kvm-mem/
4. KVM 官方文档
$ git clone http://git.kernel.org/pub/scm/virt/kvm/kvm.git
$ vim Documentation/virtual/kvm/api.txt
该实例由Mark Wu同学提供。
• The kvm API is centered around file descriptors.
• An initial open("/dev/kvm") obtains a handle to the kvm subsystem; this handle can be used to issue system ioctls.
• A KVM_CREATE_VM ioctl on this handle will create a VM file descriptor which can be used to issue VM ioctls.
• A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu and return a file descriptor pointing to it.
• Finally, ioctls on a vcpu fd can be used to control the vcpu, including the important task of actually running guest code.
• KVM related file descriptors in qemu.
1 (gdb) p kvm_state->fd 2 $1 = 3 3 (gdb) p kvm_state->vmfd 4 $2 = 4 5 (gdb) info threads 6 4 Thread 0x7f86a60f0700 (LWP 13455) 0x00007f86ad0803dc in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 7 3 Thread 0x7f86a56ef700 (LWP 13456) 0x00007f86ad0803dc in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 8 2 Thread 0x7f86a6af1700 (LWP 13960) 0x00007f86ad08075b in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 9 * 1 Thread 0x7f86ae478940 (LWP 13453) 0x00007f86a97772f3 in select () from /lib64/libc.so.6 10 (gdb) t 3 11 [Switching to thread 3 (Thread 0x7f86a56ef700 (LWP 13456))]#0 0x00007f86ad0803dc in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 12 (gdb) bt 13 #0 0x00007f86ad0803dc in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 14 #1 0x00007f86ae60a2e9 in qemu_cond_wait (cond=<value optimized out>, mutex=<value optimized out>) at qemu-thread-posix.c:113 15 #2 0x00007f86ae67772f in qemu_kvm_wait_io_event (arg=0x7f86b10a0930) at /home/mark/Work/qemu/qemu/cpus.c:710 16 #3 qemu_kvm_cpu_thread_fn (arg=0x7f86b10a0930) at /home/mark/Work/qemu/qemu/cpus.c:745 17 #4 0x00007f86ad07c7f1 in start_thread () from /lib64/libpthread.so.0 18 #5 0x00007f86a977e70d in clone () from /lib64/libc.so.6 19 (gdb) p ((CPUX86State *)0x7f86b10a0930)->kvm_fd 20 $3 = 12 21 (gdb) t 4 22 [Switching to thread 4 (Thread 0x7f86a60f0700 (LWP 13455))]#0 0x00007f86ad0803dc in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 23 (gdb) bt 24 #0 0x00007f86ad0803dc in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 25 #1 0x00007f86ae60a2e9 in qemu_cond_wait (cond=<value optimized out>, mutex=<value optimized out>) at qemu-thread-posix.c:113 26 #2 0x00007f86ae67772f in qemu_kvm_wait_io_event (arg=0x7f86b1088a00) at /home/mark/Work/qemu/qemu/cpus.c:710 27 #3 qemu_kvm_cpu_thread_fn (arg=0x7f86b1088a00) at /home/mark/Work/qemu/qemu/cpus.c:745 28 #4 0x00007f86ad07c7f1 in start_thread () from /lib64/libpthread.so.0 29 #5 0x00007f86a977e70d in clone () from /lib64/libc.so.6 30 (gdb) p ((CPUX86State *)0x7f86b1088a00)->kvm_fd 31 $4 = 11
1 crash> files 15011 2 PID: 15011 TASK: ffff880053ea0100 CPU: 0 COMMAND: "qemu-system-x86" 3 ROOT: / CWD: /home/mark/Work/qemu/qemu 4 FD FILE DENTRY INODE TYPE PATH 5 0 ffff880050b8c8c0 ffff88000ad77a80 ffff880134d13318 CHR /dev/pts/4 6 1 ffff880050b8c8c0 ffff88000ad77a80 ffff880134d13318 CHR /dev/pts/4 7 2 ffff880050b8c8c0 ffff88000ad77a80 ffff880134d13318 CHR /dev/pts/4 8 3 ffff88008491fa80 ffff880134c9b0c0 ffff88013b372a78 CHR /dev/kvm 9 4 ffff88012eb52140 ffff8800ae376e40 ffff88013b71e2d8 REG anon_inode:/kvm-vm 10 5 ffff8801357e7180 ffff8800ae3760c0 ffff88013b71e2d8 REG anon_inode:/[signalfd] 11 6 ffff880014255a80 ffff8800ae376180 ffff88013b71e2d8 REG anon_inode:/[eventfd] 12 7 ffff880014255a80 ffff8800ae376180 ffff88013b71e2d8 REG anon_inode:/[eventfd] 13 8 ffff880136751bc0 ffff880089da2c80 ffff88003f6490c0 REG /home/mark/Work/qemu/images/fedora.img 14 9 ffff8800a3c4d480 ffff8800ae376300 ffff880134cb1358 FIFO 15 10 ffff88008adc6980 ffff8800ae376300 ffff880134cb1358 FIFO 16 11 ffff88008ae865c0 ffff88012256f440 ffff88013b71e2d8 REG anon_inode:/kvm-vcpu 17 12 ffff88007bb11ec0 ffff88012256f2c0 ffff88013b71e2d8 REG anon_inode:/kvm-vcpu 18 crash> p ((struct file *)0xffff88008491fa80)->f_op 19 $5 = (const struct file_operations *) 0xffffffffa04f0e40 20 crash> sym 0xffffffffa04f0e40 21 ffffffffa04f0e40 (d) kvm_chardev_ops [kvm] 22 crash> px *((struct file*)0xffff88007bb11ec0)->f_op 23 $7 = { 24 owner = 0xffffffffa05249a0, 25 llseek = 0, 26 read = 0, 27 write = 0, 28 : 29 ioctl = 0, 30 unlocked_ioctl = 0xffffffffa04bae00, 31 compat_ioctl = 0xffffffffa04bae00, 32 mmap = 0xffffffffa04b9220, 33 open = 0, 34 flush = 0, 35 release = 0xffffffffa04bd830, 36 fsync = 0, 37 aio_fsync = 0, 38 : 39 setlease = 0 40 } 41 crash> sym 0xffffffffa04bae00 42 ffffffffa04bae00 (t) kvm_vcpu_ioctl [kvm] 43 crash> sym 0xffffffffa04b9220 44 ffffffffa04b9220 (t) kvm_vcpu_mmap [kvm] 45 crash> px ((struct file *)0xffff88012eb52140)->private_data 46 $15 = (void *) 0xffff880137c6c000 47 crash> px vm_list 48 vm_list = $16 = { 49 next = 0xffff880137c6c280, 50 prev = 0xffff880137c6c280 51 } 52 crash> sym vm_list 53 ffffffffa04f0aa0 (D) vm_list [kvm] 54 crash> px ((struct kvm*)0xffff880137c6c000)->vm_list 55 $17 = { 56 next = 0xffffffffa04f0aa0, 57 prev = 0xffffffffa04f0aa0 58 }
• qemu-kvm backtrace of vcpu initlizaton
1 (gdb) bt 2 #0 qemu_init_vcpu (_env=0x7ffff8b18a00) at /home/mark/Work/qemu/qemu/cpus.c:936 3 #1 0x00007ffff7e9f869 in cpu_x86_init (cpu_model=0x7ffff7f8fca9 "qemu64") at /home/mark/Work/qemu/qemu/target-i386/helper.c:1263 4 #2 0x00007ffff7ee1de0 in pc_new_cpu (cpu_model=0x7ffff7f8fca9 "qemu64") at /home/mark/Work/qemu/qemu/hw/pc.c:936 5 #3 pc_cpus_init (cpu_model=0x7ffff7f8fca9 "qemu64") at /home/mark/Work/qemu/qemu/hw/pc.c:963 6 #4 0x00007ffff7ee297c in pc_init1 (system_memory=0x7ffff8b113f0, system_io=0x7ffff8b114f0, ram_size=536870912, boot_device=0x7fffffffdf10 "cad", 7 kernel_filename=0x0, kernel_cmdline=0x7ffff7f668eb "", initrd_filename=0x0, cpu_model=0x0, pci_enabled=1, kvmclock_enabled=1) 8 at /home/mark/Work/qemu/qemu/hw/pc_piix.c:103 9 #5 0x00007ffff7ee30d8 in pc_init_pci (ram_size=536870912, boot_device=0x7fffffffdf10 "cad", kernel_filename=0x0, kernel_cmdline=0x7ffff7f668eb "", 10 initrd_filename=0x0, cpu_model=<value optimized out>) at /home/mark/Work/qemu/qemu/hw/pc_piix.c:245 11 #6 0x00007ffff7de57a9 in main (argc=<value optimized out>, argv=<value optimized out>, envp=<value optimized out>) at /home/mark/Work/qemu/qemu/vl.c:3351
1 qemu_init_vcpu 2 qemu_kvm_start_vcpu 3 qemu_thread_create(env->thread, qemu_kvm_cpu_thread_fn, env); /* One qemu thread per vCPU */ 4 qemu_kvm_cpu_thread_fn 5 kvm_init_vcpu 6 +-->kvm_cpu_exec---+ 7 | | 8 -------------------+ 9 10 int kvm_init_vcpu(CPUState *env) 11 { 12 KVMState *s = kvm_state; 13 long mmap_size; 14 int ret; 15 DPRINTF("kvm_init_vcpu\n"); 16 ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index); 17 if (ret < 0) { 18 DPRINTF("kvm_create_vcpu failed\n"); 19 goto err; 20 } 21 env->kvm_fd = ret; 22 env->kvm_state = s; 23 env->kvm_vcpu_dirty = 1; 24 mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); 25 if (mmap_size < 0) { 26 ret = mmap_size; 27 DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n"); 28 goto err; 29 } 30 env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, 31 env->kvm_fd, 0); 32 : 33 }
• qemu function kvm_cpu_exec
1 int kvm_cpu_exec(CPUState *env) 2 { 3 struct kvm_run *run = env->kvm_run; 4 int ret, run_ret; 5 DPRINTF("kvm_cpu_exec()\n"); 6 if (kvm_arch_process_async_events(env)) { 7 env->exit_request = 0; 8 return EXCP_HLT; 9 } 10 cpu_single_env = env; 11 do { 12 if (env->kvm_vcpu_dirty) { 13 kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE); 14 env->kvm_vcpu_dirty = 0; 15 } 16 kvm_arch_pre_run(env, run); 17 if (env->exit_request) { 18 DPRINTF("interrupt exit requested\n"); 19 /* 20 * KVM requires us to reenter the kernel after IO exits to complete 21 * instruction emulation. This self-signal will ensure that we 22 * leave ASAP again. 23 */ 24 qemu_cpu_kick_self(); 25 } 26 cpu_single_env = NULL; 27 qemu_mutex_unlock_iothread(); 28 run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0); 29 qemu_mutex_lock_iothread(); 30 cpu_single_env = env; 31 kvm_arch_post_run(env, run); 32 kvm_flush_coalesced_mmio_buffer(); 33 if (run_ret < 0) { 34 if (run_ret == -EINTR || run_ret == -EAGAIN) { 35 DPRINTF("io window exit\n"); 36 ret = EXCP_INTERRUPT; 37 break; 38 } 39 DPRINTF("kvm run failed %s\n", strerror(-run_ret)); 40 abort(); 41 } 42 switch (run->exit_reason) { 43 case KVM_EXIT_IO: 44 DPRINTF("handle_io\n"); 45 kvm_handle_io(run->io.port, 46 (uint8_t *)run + run->io.data_offset, 47 run->io.direction, 48 run->io.size, 49 run->io.count); 50 ret = 0; 51 break; 52 case KVM_EXIT_MMIO: 53 DPRINTF("handle_mmio\n"); 54 cpu_physical_memory_rw(run->mmio.phys_addr, 55 run->mmio.data, 56 run->mmio.len, 57 run->mmio.is_write); 58 ret = 0; 59 break; 60 : 61 } 62 } while (ret == 0); 63 : 64 return ret; 65 }
• kernel code path
1 sys_ioctl 2 do_vfs_ioctl 3 vfs_ioctl 4 kvm_vcpu_ioctl /* kvm_vcpu_fops.unlocked_ioctl */ 5 kvm_arch_vcpu_ioctl_run 6 __vcpu_run 7 vcpu_enter_guest 8 vmx_vcpu_run /* kvm_x86_ops->run */ 9 | 10 v vm entry 11 +-----------------+ 12 | guest code | 13 | on this cpu | 14 +-------------------+ 15 | vm exit 16 v 17 vmx_handle_exit /* kvm_x86_ops->handle_exit */ 18 return kvm_vmx_exit_handlers[exit_reason](vcpu)
• kernel exit handlers
1 /* 2 * The exit handlers return 1 if the exit was handled fully and guest execution 3 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 4 * to be done to userspace and return 0. 5 */ 6 static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { 7 [EXIT_REASON_EXCEPTION_NMI] = handle_exception, 8 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, 9 [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, 10 [EXIT_REASON_NMI_WINDOW] = handle_nmi_window, 11 [EXIT_REASON_IO_INSTRUCTION] = handle_io, 12 : 13 :
• guest runtime information shared between kvm mod and qemu-kvm
1 env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, 2 env->kvm_fd, 0); 3 (gdb) p ((struct CPUX86State*)0x7fcdbe63f930)->kvm_run 4 $2 = (struct kvm_run *) 0x7fcdbcfa2000 5 (gdb) p *((struct CPUX86State*)0x7fcdbe63f930)->kvm_run 6 $3 = {request_interrupt_window = 0 '\000', padding1 = "\000\000\000\000\000\000", exit_reason = 10, ready_for_interrupt_injection = 0 '\000', if_flag = 7 0 '\000', padding2 = "\000", cr8 = 0, apic_base = 4276094976, {hw = {hardware_exit_reason = 4276093104}, fail_entry = {hardware_entry_failure_reason = 8 4276093104}, ex = {exception = 4276093104, error_code = 0}, io = {direction = 176 '\260', size = 0 '\000', port = 65248, count = 0, data_offset = 9 513418191540584448}, debug = {arch = {exception = 4276093104, pad = 0, pc = 513418191540584448, dr6 = 4294967300, dr7 = 0}}, mmio = {phys_addr = 10 4276093104, data = "\000\000\000\000 \a \a", len = 4, is_write = 1 '\001'}, hypercall = {nr = 4276093104, args = {513418191540584448, 4294967300, 0, 0, 11 0, 0}, ret = 0, longmode = 0, pad = 0}, tpr_access = {rip = 4276093104, is_write = 0, pad = 119539488}, s390_sieic = {icptcode = 176 '\260', ipa = 12 65248, ipb = 0}, s390_reset_flags = 4276093104, dcr = {dcrn = 4276093104, data = 0, is_write = 0 '\000'}, internal = {suberror = 4276093104, ndata = 0, 13 data = {513418191540584448, 4294967300, 0 <repeats 14 times>}}, osi = {gprs = {4276093104, 513418191540584448, 4294967300, 0 <repeats 29 times>}}, 14 papr_hcall = {nr = 4276093104, ret = 513418191540584448, args = {4294967300, 0, 0, 0, 0, 0, 0, 0, 0}}, padding = 15 "\260\000\340\376\000\000\000\000\000\000\000\000 \a \a\004\000\000\000\001", '\000' <repeats 234 times>}}
1 crash> vtop 7fcdbcfa2000 2 VIRTUAL PHYSICAL 3 7fcdbcfa2000 12eb3c000 4 PML: 137dfd7f8 => 136ff7067 5 PUD: 136ff79b0 => 134069067 6 PMD: 134069f38 => 13671c067 7 PTE: 13671cd10 => 800000012eb3c067 8 PAGE: 12eb3c000 9 10 PTE PHYSICAL FLAGS 11 800000012eb3c067 12eb3c000 12 (PRESENT|RW|USER|ACCESSED|DIRTY|NX) 13 14 VMA START END FLAGS FILE 15 ffff8800aac39b70 7fcdbcfa2000 7fcdbcfa5000 fb anon_inode:/kvm-vcpu 16 PAGE PHYSICAL MAPPING INDEX CNT FLAGS 17 ffffea0004237520 12eb3c000 0 ffff8800b72c9980 2 40000000000014 18 crash> px ((struct kvm*)0xffff880137c6c000)->vcpus[1]->run 19 $23 = (struct kvm_run *) 0xffff88012eb3c000 20 crash> vtop 0xffff88012eb3c000 21 VIRTUAL PHYSICAL 22 ffff88012eb3c000 12eb3c000 23 PML4 DIRECTORY: ffffffff81a85000 24 PAGE DIRECTORY: 1a86063 25 PUD: 1a86020 => a067 26 PMD: aba8 => 800000012ea001e3 27 PAGE: 12ea00000 (2MB) 28 29 30 PTE PHYSICAL FLAGS 31 800000012ea001e3 12ea00000 (PRESENT|RW|ACCESSED|DIRTY|PSE|GLOBAL|NX) 32 33 PAGE PHYSICAL MAPPING INDEX CNT FLAGS 34 ffffea0004237520 12eb3c000 0 ffff8800b72c9980 2 40000000000014 35 crash> px ((struct file*)0xffff88007bb11ec0)->private_data 36 $30 = (void *) 0xffff88013860c2b8 37 crash> px ((struct kvm_vcpu *)0xffff88013860c2b8)->run 38 $31 = (struct kvm_run *) 0xffff88012eb3c000
• Qemu backtrace
1 (gdb) bt 2 #0 kvm_set_user_memory_region (s=0x7ffff8b100a0, slot=0x7ffff8b100a0) at /home/mark/Work/qemu/qemu/kvm-all.c:168 3 #1 0x00007ffff7ea3fae in kvm_set_phys_mem (client=<value optimized out>, start_addr=<value optimized out>, size=<value optimized out>, 4 phys_offset=<value optimized out>, log_dirty=false) at /home/mark/Work/qemu/qemu/kvm-all.c:650 5 #2 kvm_client_set_memory (client=<value optimized out>, start_addr=<value optimized out>, size=<value optimized out>, phys_offset=<value optimized out>, 6 log_dirty=false) at /home/mark/Work/qemu/qemu/kvm-all.c:663 7 #3 0x00007ffff7e8405a in cpu_notify_set_memory (start_addr=0, size=134217728, phys_offset=0, region_offset=0, log_dirty=false) 8 at /home/mark/Work/qemu/qemu/exec.c:1742 9 #4 cpu_register_physical_memory_log (start_addr=0, size=134217728, phys_offset=0, region_offset=0, log_dirty=false) 10 at /home/mark/Work/qemu/qemu/exec.c:2675 11 #5 0x00007ffff7eaac70 in address_space_update_topology_pass (as=0x7ffff82f31e0, old_view=..., new_view=..., adding=true) 12 at /home/mark/Work/qemu/qemu/memory.c:731 13 #6 0x00007ffff7eacf31 in address_space_update_topology (as=0x7ffff82f31e0) at /home/mark/Work/qemu/qemu/memory.c:746 14 #7 0x00007ffff7ead514 in memory_region_update_topology () at /home/mark/Work/qemu/qemu/memory.c:760 15 #8 0x00007ffff7ee1787 in pc_memory_init (system_memory=0x7ffff8b11430, kernel_filename=<value optimized out>, kernel_cmdline=0x7ffff7f668eb "", 16 initrd_filename=0x0, below_4g_mem_size=134217728, above_4g_mem_size=0, rom_memory=0x7ffff8b32240, ram_memory=0x7fffffffe188) 17 at /home/mark/Work/qemu/qemu/hw/pc.c:996 18 #9 0x00007ffff7ee2d96 in pc_init1 (system_memory=0x7ffff8b11430, system_io=0x7ffff8b11530, ram_size=134217728, boot_device=0x7fffffffe500 "cad", 19 kernel_filename=0x0, kernel_cmdline=0x7ffff7f668eb "", initrd_filename=0x0, cpu_model=0x0, pci_enabled=1, kvmclock_enabled=1) 20 at /home/mark/Work/qemu/qemu/hw/pc_piix.c:128 21 #10 0x00007ffff7ee30d8 in pc_init_pci (ram_size=134217728, boot_device=0x7fffffffe500 "cad", kernel_filename=0x0, kernel_cmdline=0x7ffff7f668eb "", 22 initrd_filename=0x0, cpu_model=<value optimized out>) at /home/mark/Work/qemu/qemu/hw/pc_piix.c:245 23 #11 0x00007ffff7de57a9 in main (argc=<value optimized out>, argv=<value optimized out>, envp=<value optimized out>) at /home/mark/Work/qemu/qemu/vl.c:3351 24 -------------------------------------------- 25 kvm_set_user_memory_region 26 kvm_vm_ioctl 27 ioctl(kvm_context->vm_fd, KVM_SET_USER_MEMORY_REGION, ...)
• dump gpa <-> hva <-> hpa mapping via crash
1 crash> px vm_list 2 vm_list = $7 = { 3 next = 0xffff880080cb4280, 4 prev = 0xffff880080cb4280 5 } 6 crash> struct kvm.vm_list 7 struct kvm { 8 [640] struct list_head vm_list; 9 } 10 crash> px 0xffff880080cb4280-640 11 $8 = 0xffff880080cb4000 12 crash> pd ((struct kvm *)0xffff880080cb4000)->memslots 13 $9 = (struct kvm_memslots *) 0xffff880139326000 14 crash> px *((struct kvm *)0xffff880080cb4000)->memslots 15 $6 = { 16 nmemslots = 0x23, 17 memslots = {{ 18 base_gfn = 0x0, 19 npages = 0xa0, 20 flags = 0x0, 21 rmap = 0xffffc90016aac000, 22 dirty_bitmap = 0x0, 23 lpage_info = {0xffffc900175d6000, 0xffffc900175d9000}, 24 userspace_addr = 0x7f30dbe00000, 25 user_alloc = 0x1, 26 id = 0x0 27 }, { 28 base_gfn = 0xfffe0, 29 npages = 0x20, 30 flags = 0x0, 31 rmap = 0xffffc90016a82000, 32 dirty_bitmap = 0x0, 33 lpage_info = {0xffffc90016a85000, 0xffffc90016a88000}, 34 userspace_addr = 0x7f310b1f0000, 35 user_alloc = 0x1, 36 id = 0x1 37 }, { 38 base_gfn = 0xc0, 39 npages = 0xc, 40 flags = 0x0, 41 rmap = 0xffffc9001787f000, 42 dirty_bitmap = 0x0, 43 lpage_info = {0xffffc90017882000, 0xffffc90017885000}, 44 userspace_addr = 0x7f30dbec0000, 45 user_alloc = 0x1, 46 id = 0x2 47 }, { 48 base_gfn = 0xfc000, 49 npages = 0x800, 50 flags = 0x1, 51 rmap = 0xffffc90017b39000, 52 dirty_bitmap = 0xffffc90017b45000, 53 lpage_info = {0xffffc90017b3f000, 0xffffc90017b42000}, 54 userspace_addr = 0x7f3101c00000, 55 user_alloc = 0x1, 56 id = 0x3 57 }, { 58 base_gfn = 0xcc, 59 npages = 0x24, 60 flags = 0x0, 61 rmap = 0xffffc90017990000, 62 dirty_bitmap = 0x0, 63 lpage_info = {0xffffc90017993000, 0xffffc90017996000}, 64 userspace_addr = 0x7f30dbecc000, 65 user_alloc = 0x1, 66 id = 0x4 67 }, { 68 base_gfn = 0xf0, 69 npages = 0x10, 70 flags = 0x0, 71 rmap = 0xffffc90017999000, 72 dirty_bitmap = 0x0, 73 lpage_info = {0xffffc9001799c000, 0xffffc9001799f000}, 74 userspace_addr = 0x7f30dbef0000, 75 user_alloc = 0x1, 76 id = 0x5 77 }, { 78 base_gfn = 0x100, 79 npages = 0x1ff00, 80 flags = 0x0, 81 rmap = 0xffffc900179a2000, 82 dirty_bitmap = 0x0, 83 lpage_info = {0xffffc90017aa4000, 0xffffc90017aa7000}, 84 userspace_addr = 0x7f30dbf00000, 85 user_alloc = 0x1, 86 id = 0x6 87 }, { 88 base_gfn = 0x0, 89 npages = 0x0, 90 flags = 0x0, 91 rmap = 0x0, 92 dirty_bitmap = 0x0, 93 lpage_info = {0x0, 0x0}, 94 userspace_addr = 0x0, 95 user_alloc = 0x0, 96 id = 0x0 97 }, 98 99 On Guest: 100 [root@localhost ~]# ./hello 101 [0x400638]: Hello, world 102 crash> ps \|grep hello 103 2203 2112 0 ffff88001d68ae60 IN 0.1 4124 356 hello 104 crash> set 2203 105 PID: 2203 106 COMMAND: "hello" 107 TASK: ffff88001d68ae60 [THREAD_INFO: ffff88001da6c000] 108 CPU: 0 109 STATE: TASK_INTERRUPTIBLE 110 crash> rd 0x400638 2 111 400638: 77202c6f6c6c6548 255b000a646c726f Hello, world..[% 112 crash> vtop 0x400638 113 VIRTUAL PHYSICAL 114 400638 30b2638 115 116 PML: 1d669000 => 1dbfa067 117 PUD: 1dbfa000 => 1c82d067 118 PMD: 1c82d010 => 1ab49067 119 PTE: 1ab49000 => 30b2025 120 PAGE: 30b2000 121 122 On Host: 123 crash> px 0x7f30dbf00000+0x30b2638-0x100000 124 $7 = 0x7f30deeb2638 125 crash> rd 0x7f30deeb2638 2 126 7f30deeb2638: 77202c6f6c6c6548 255b000a646c726f Hello, world..[%
• Overview
1 Guest CR3 EPT Base Pointer 2 | | 3 +-->+-------------------+ +---->+---------------------+ 4 GVA--->| Guest Page Table |---> GPA ---> | Extended Page Table | ---> HPA 5 +-------------------+ +---------------------+
• EPT walkthrough
1 sh> px ((struct kvm_vcpu *)0xffff88007768c078)->arch.mmu 2 $18 = { 3 new_cr3 = 0xffffffffa04dca40 <nonpaging_new_cr3>, 4 page_fault = 0xffffffffa04e4410 <tdp_page_fault>, 5 free = 0xffffffffa04e0870 <nonpaging_free>, 6 gva_to_gpa = 0xffffffffa04e4b70 <paging64_gva_to_gpa>, 7 prefetch_page = 0xffffffffa04dc7a0 <nonpaging_prefetch_page>, 8 sync_page = 0xffffffffa04dc7d0 <nonpaging_sync_page>, 9 invlpg = 0xffffffffa04dc7e0 <nonpaging_invlpg>, 10 root_hpa = 0x138457000, 11 root_level = 0x4, 12 shadow_root_level = 0x4, 13 base_role = { 14 word = 0x0, 15 { 16 glevels = 0x0, 17 level = 0x0, 18 quadrant = 0x0, 19 pad_for_nice_hex_output = 0x0, 20 direct = 0x0, 21 access = 0x0, 22 invalid = 0x0, 23 cr4_pge = 0x0, 24 nxe = 0x0, 25 cr0_wp = 0x0, 26 smep_andnot_wp = 0x0 27 } 28 }, 29 pae_root = 0xffff88000d2c2000, 30 rsvd_bits_mask = {{0xfff0000000000, 0xfff0000000000, 0xfff0000000180, 0xfff0000000180}, {0x0, 0xfff00001fe000, 0xfff003fffe000, 0xfff0000000180}} 31 } 32 33 crash> px (0x30b2638>>39)&0x1ff 34 $19 = 0x0 35 crash> rd -p 0x138457000 36 138457000: 0000000043138007 ...C.... 37 crash> px (0x30b2638>>30)&0x1ff 38 $20 = 0x0 39 crash> rd -p 0x43138000 40 43138000: 0000000108c3c007 ........ 41 crash> px (0x30b2638>>21)&0x1ff 42 $21 = 0x18 43 crash> px (0x108c3c007 & ~0xfff)+ (8*0x18) 44 $22 = 0x108c3c0c0 45 crash> rd -p 0x108c3c0c0 46 108c3c0c0: 0000000125713007 .0q%.... 47 crash> px (0x30b2638>>12)&0x1ff 48 $23 = 0xb2 49 crash> px (0x125713007 & ~0xfff) + (8*0xb2) 50 $24 = 0x125713590 51 crash> rd -p 0x125713590 52 125713590: 000000011289a277 w....... 53 crash> vtop 7f30deeb2638 54 VIRTUAL PHYSICAL 55 7f30deeb2638 11289a638 56 PML: 575e07f0 => 43236067 57 PUD: 43236618 => 7ef3c067 58 PMD: 7ef3c7b8 => 139495067 59 PTE: 139495590 => 800000011289a067 60 PAGE: 11289a000 61 PTE PHYSICAL FLAGS 62 800000011289a067 11289a000 (PRESENT|RW|USER|ACCESSED|DIRTY|NX) 63 VMA START END FLAGS FILE 64 ffff88005d100788 7f30dbe00000 7f30fbe00000 80100073 65 PAGE PHYSICAL MAPPING INDEX CNT FLAGS 66 ffffea0003c0e1b0 11289a000 ffff88000d307f61 7f30deeb2 1 4000000010006c
• Overview
1 Guest CR3 2 | 3 +-->+-------------------+ 4 GVA--->| Guest Page Table | ---> GPA 5 +-------------------+ 6 7 Host CR3 8 | 9 +-->+-------------------+ 10 GVA--->| Shadow Page Table | ---> HPA 11 +-------------------+ 12 ~
• Shadow page table walkthrough (with option ept=no for kernel moduel kvm_intel)
1 crash> px ((struct kvm_vcpu *)0xffff88007768c078)->arch.mmu 2 mmu = { 3 new_cr3 = 0xffffffffa0914890 <paging_new_cr3>, 4 page_fault = 0xffffffffa091a1b0 <paging64_page_fault>, 5 free = 0xffffffffa0914880 <paging_free>, 6 gva_to_gpa = 0xffffffffa0918b70 <paging64_gva_to_gpa>, 7 prefetch_page = 0xffffffffa0915920 <paging64_prefetch_page>, 8 sync_page = 0xffffffffa09177e0 <paging64_sync_page>, 9 invlpg = 0xffffffffa0913b20 <paging64_invlpg>, 10 root_hpa = 0x8886d000, 11 root_level = 0x4, 12 shadow_root_level = 0x4, 13 base_role = { 14 word = 0xe00004, 15 { 16 glevels = 0x4, 17 level = 0x0, 18 quadrant = 0x0, 19 pad_for_nice_hex_output = 0x0, 20 direct = 0x0, 21 access = 0x0, 22 invalid = 0x0, 23 cr4_pge = 0x1, 24 nxe = 0x1, 25 cr0_wp = 0x1, 26 smep_andnot_wp = 0x0 27 } 28 }, 29 pae_root = 0xffff88008893e000, 30 rsvd_bits_mask = {{0xfff0000000000, 0xfff0000000000, 0xfff0000000180, 0xfff0000000180}, {0x0, 0xfff00001fe000, 0xfff003fffe000, 0xfff0000000180}} 31 }, 32 crash> px (0x400608 >> 39) & 0x1ff 33 $17 = 0x0 34 crash> rd -p 0x8886d000 35 8886d000: 0000000081517027 'pQ..... 36 crash> px (0x400608 >> 30) & 0x1ff 37 $18 = 0x0 38 crash> px (0x81517027 & ~0xfff) 39 $19 = 0x81517000 40 crash> rd -p 0x81517000 41 81517000: 000000008159f027 '.Y..... 42 crash> px (0x400608 >> 21) & 0x1ff 43 $20 = 0x2 44 crash> px (0x8159f027 & ~0xfff)+(8*0x2) 45 $21 = 0x8159f010 46 crash> rd -p 0x8159f010 47 8159f010: 0000000069fd7027 'p.i.... 48 crash> px (0x400608 >> 12) & 0x1ff 49 $22 = 0x0 50 crash> rd -p 0x69fd7000 51 69fd7000: 0000000055b99265 e..U.... 52 crash> rd -p 55b99608 2 53 55b99608: 77202c6f6c6c6548 255b000a646c726f Hello, world..[%