3.4Shadowpage页表虚拟化

               

3.4.1影子页表与EPT虚拟化对比

(1) 与ept基本区别点

init_kvm_mmu==>init_kvm_softmmu

static voidinit_kvm_softmmu(struct kvm_vcpu *vcpu)

{

    kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu);

    vcpu->arch.walk_mmu->set_cr3           = kvm_x86_ops->set_cr3;

    vcpu->arch.walk_mmu->get_cr3           = get_cr3;

    vcpu->arch.walk_mmu->get_pdptr         = kvm_pdptr_read;

    vcpu->arch.walk_mmu->inject_page_fault =kvm_inject_page_fault;

}

kvm_init_shadow_mmu根据分页类别(不分页, page32,pae,ia32-e)设置 struct kvm_mmu;

32bit paging模式设置如下paging32_init_context:

    context->nx = false;

    context->root_level = PT32_ROOT_LEVEL;

    context->page_fault = paging32_page_fault;

    context->gva_to_gpa = paging32_gva_to_gpa;

    context->sync_page = paging32_sync_page;

    context->invlpg = paging32_invlpg;

    context->update_pte = paging32_update_pte;

    context->direct_map = false;

 

mmu_alloc_roots==> mmu_alloc_shadow_roots;而ept调用的是mmu_alloc_direct_roots

它们在调用kvm_mmu_get_page==》kvm_mmu_alloc_page时采用的参数direct 一个为0一个为1。

    if (!direct)

       sp->gfns =mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);

gfns[index]用于存储页框的index索引对应的gfn号,用于gva到gpa的转换。影子页表方法需要使用软件来维护gva到gpa的装换;而EPT当出现ept导致的vm-exit时由硬件提供gpa.

 

(2) GVA 到GPA

下面以32bit paging为例,来分析gva到gpa

#define FNAME(name)paging##32_##name (paging_tmpl.h)

static gpa_tFNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,

                  structx86_exception *exception)

{

    struct guest_walker walker;

    gpa_t gpa = UNMAPPED_GVA;

    int r;

 

    r = FNAME(walk_addr)(&walker, vcpu, vaddr, access); //GVA toGFN 结果存放在walker.gfn

 

    if (r) {

       gpa = gfn_to_gpa(walker.gfn); //返回gpa

       gpa |= vaddr & ~PAGE_MASK;

    } else if (exception)

       *exception = walker.fault;//如果转换失败

 

    return gpa;

}

static inline gpa_tgfn_to_gpa(gfn_t gfn)

{

    return (gpa_t)gfn << PAGE_SHIFT;

}

 

 

int  walk_addr_generic(struct guest_walker*walker,struct kvm_vcpu *vcpu,

      struct kvm_mmu *mmu,gva_t addr, u32access)

a. 从pte          = mmu->get_cr3(vcpu);根据虚拟地址addr,计算物理地址gfn

b. 一层层的分析页目录项

       index= PT_INDEX(addr, walker->level); //根据页表层次计算在页框中的索引

       table_gfn = gpte_to_gfn(pte); //页框虚拟地址

       offset    = index *sizeof(pt_element_t);

       pte_gpa   =gfn_to_gpa(table_gfn) + offset; //取出当前层的数据地址

将遍历结果一层次序号存储,这样通过遍历能得到一个页每层的页框信息

       walker->table_gfn[walker->level - 1] = table_gfn;  

       walker->pte_gpa[walker->level - 1] = pte_gpa;

       host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn,

                     &walker->pte_writable[walker->level - 1]);

       ptep_user = (pt_element_t __user *)((void *)host_addr +offset); //hva才是能直接被vmm访问的地址

    if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte))))

           goto error;

       walker->ptes[walker->level - 1] = pte;

  (对于非嵌套的虚拟机case mmu-> translate_gpa ==  translate_gpa;)

       对页的属性进行判断,如果属性错误则记录errcode到:

    walker->fault.vector = PF_VECTOR;

    walker->fault.error_code_valid = true;

    walker->fault.error_code = errcode;

如果以是最后一层则计算出walker->gfn= real_gpa >> PAGE_SHIFT;

 

3.4.2 缺页异常处理

static intFNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,

                bool prefault)

 

a. 对于mmio引起的异常直接调用handle_mmio_page_fault后返回

b. r =FNAME(walk_addr)(&walker, vcpu, addr, error_code); 计算gfn

c.若计算gfn失败则表明guest os 为分配该页,调用inject_page_fault注入缺页异常给guestos

 void kvm_inject_page_fault(struct kvm_vcpu*vcpu, struct x86_exception *fault)

{

    ++vcpu->stat.pf_guest;

    vcpu->arch.cr2 = fault->address;

    kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);

}

 

d. try_async_pf gfn 到pfn

e. 处理invalid页handle_abnormal_pfn

g. FNAME(fetch)(vcpu, addr,&walker, write_fault,

            level, pfn, map_writable, prefault);

 关联pfn到影子页表

 

static intFNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,

            struct guest_walker*gw,

            int write_fault,int hlevel,

            pfn_t pfn, boolmap_writable, bool prefault)

 

a. 遍历影子页表

    for (shadow_walk_init(&it, vcpu, addr);

        shadow_walk_okay(&it) && it.level > gw->level;

        shadow_walk_next(&it))

b. 若影子页表对应的kvm_mmu_page为分配则:

if(!is_shadow_present_pte(*it.sptep)) {

           table_gfn = gw->table_gfn[it.level - 2];

           sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,

                        false,access, it.sptep);

 

c. 设子mmu_set_spte设置页表项

 

3.4.3 CR3寄存器操作

(1) 设置cr3

 handle_cr ==> kvm_set_cr3

若cr3为变化则       kvm_mmu_sync_roots(vcpu);

          kvm_mmu_flush_tlb(vcpu);

否则 a. kvm_mmu_new_cr3(vcpu);==》mmu_free_roots

     b. vcpu->arch.cr3 = cr3;

 

读取cr3 kvm_read_cr3(vcpu);//return vcpu->arch.cr3 ; 客户机读到的值

 

(2) vcpu_enter_guest ==>    kvm_mmu_reload(vcpu) ==> kvm_mmu_load

{

    kvm_mmu_sync_roots(vcpu);

    vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa) //影子页表,真正起作用的cr3

            ==>   vmcs_writel(GUEST_CR3, guest_cr3);
}

你可能感兴趣的:(虚拟化)