3.4.1影子页表与EPT虚拟化对比
(1) 与ept基本区别点
init_kvm_mmu==>init_kvm_softmmu
static voidinit_kvm_softmmu(struct kvm_vcpu *vcpu)
{
kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu);
vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3;
vcpu->arch.walk_mmu->get_cr3 = get_cr3;
vcpu->arch.walk_mmu->get_pdptr = kvm_pdptr_read;
vcpu->arch.walk_mmu->inject_page_fault =kvm_inject_page_fault;
}
kvm_init_shadow_mmu根据分页类别(不分页, page32,pae,ia32-e)设置 struct kvm_mmu;
32bit paging模式设置如下paging32_init_context:
context->nx = false;
context->root_level = PT32_ROOT_LEVEL;
context->page_fault = paging32_page_fault;
context->gva_to_gpa = paging32_gva_to_gpa;
context->sync_page = paging32_sync_page;
context->invlpg = paging32_invlpg;
context->update_pte = paging32_update_pte;
context->direct_map = false;
mmu_alloc_roots==> mmu_alloc_shadow_roots;而ept调用的是mmu_alloc_direct_roots
它们在调用kvm_mmu_get_page==》kvm_mmu_alloc_page时采用的参数direct 一个为0一个为1。
if (!direct)
sp->gfns =mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
gfns[index]用于存储页框的index索引对应的gfn号,用于gva到gpa的转换。影子页表方法需要使用软件来维护gva到gpa的装换;而EPT当出现ept导致的vm-exit时由硬件提供gpa.
(2) GVA 到GPA
下面以32bit paging为例,来分析gva到gpa
#define FNAME(name)paging##32_##name (paging_tmpl.h)
static gpa_tFNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
structx86_exception *exception)
{
struct guest_walker walker;
gpa_t gpa = UNMAPPED_GVA;
int r;
r = FNAME(walk_addr)(&walker, vcpu, vaddr, access); //GVA toGFN 结果存放在walker.gfn
if (r) {
gpa = gfn_to_gpa(walker.gfn); //返回gpa
gpa |= vaddr & ~PAGE_MASK;
} else if (exception)
*exception = walker.fault;//如果转换失败
return gpa;
}
static inline gpa_tgfn_to_gpa(gfn_t gfn)
{
return (gpa_t)gfn << PAGE_SHIFT;
}
int walk_addr_generic(struct guest_walker*walker,struct kvm_vcpu *vcpu,
struct kvm_mmu *mmu,gva_t addr, u32access)
a. 从pte = mmu->get_cr3(vcpu);根据虚拟地址addr,计算物理地址gfn
b. 一层层的分析页目录项
index= PT_INDEX(addr, walker->level); //根据页表层次计算在页框中的索引
table_gfn = gpte_to_gfn(pte); //页框虚拟地址
offset = index *sizeof(pt_element_t);
pte_gpa =gfn_to_gpa(table_gfn) + offset; //取出当前层的数据地址
将遍历结果一层次序号存储,这样通过遍历能得到一个页每层的页框信息
walker->table_gfn[walker->level - 1] = table_gfn;
walker->pte_gpa[walker->level - 1] = pte_gpa;
host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn,
&walker->pte_writable[walker->level - 1]);
ptep_user = (pt_element_t __user *)((void *)host_addr +offset); //hva才是能直接被vmm访问的地址
if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte))))
goto error;
walker->ptes[walker->level - 1] = pte;
(对于非嵌套的虚拟机case mmu-> translate_gpa == translate_gpa;)
对页的属性进行判断,如果属性错误则记录errcode到:
walker->fault.vector = PF_VECTOR;
walker->fault.error_code_valid = true;
walker->fault.error_code = errcode;
如果以是最后一层则计算出walker->gfn= real_gpa >> PAGE_SHIFT;
3.4.2 缺页异常处理
static intFNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
bool prefault)
a. 对于mmio引起的异常直接调用handle_mmio_page_fault后返回
b. r =FNAME(walk_addr)(&walker, vcpu, addr, error_code); 计算gfn
c.若计算gfn失败则表明guest os 为分配该页,调用inject_page_fault注入缺页异常给guestos
void kvm_inject_page_fault(struct kvm_vcpu*vcpu, struct x86_exception *fault)
{
++vcpu->stat.pf_guest;
vcpu->arch.cr2 = fault->address;
kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
}
d. try_async_pf gfn 到pfn
e. 处理invalid页handle_abnormal_pfn
g. FNAME(fetch)(vcpu, addr,&walker, write_fault,
level, pfn, map_writable, prefault);
关联pfn到影子页表
static intFNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct guest_walker*gw,
int write_fault,int hlevel,
pfn_t pfn, boolmap_writable, bool prefault)
a. 遍历影子页表
for (shadow_walk_init(&it, vcpu, addr);
shadow_walk_okay(&it) && it.level > gw->level;
shadow_walk_next(&it))
b. 若影子页表对应的kvm_mmu_page为分配则:
if(!is_shadow_present_pte(*it.sptep)) {
table_gfn = gw->table_gfn[it.level - 2];
sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
false,access, it.sptep);
c. 设子mmu_set_spte设置页表项
3.4.3 CR3寄存器操作
(1) 设置cr3
handle_cr ==> kvm_set_cr3
若cr3为变化则 kvm_mmu_sync_roots(vcpu);
kvm_mmu_flush_tlb(vcpu);
否则 a. kvm_mmu_new_cr3(vcpu);==》mmu_free_roots
b. vcpu->arch.cr3 = cr3;
读取cr3 kvm_read_cr3(vcpu);//return vcpu->arch.cr3 ; 客户机读到的值
(2) vcpu_enter_guest ==> kvm_mmu_reload(vcpu) ==> kvm_mmu_load
{
kvm_mmu_sync_roots(vcpu);
vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa) //影子页表,真正起作用的cr3
==> vmcs_writel(GUEST_CR3, guest_cr3);
}