R0 VMM初始化入口在HMR0.cpp里的HMR0Init()
VMMR0_INT_DECL(int) HMR0Init(void)
{
uint32_t fCaps = 0;
int rc = SUPR0GetVTSupport(&fCaps);
if (RT_SUCCESS(rc))
{
if (fCaps & SUPVTCAPS_VT_X)
{
rc = hmR0InitIntel();
if (RT_FAILURE(rc))
return rc;
}
else
{
Assert(fCaps & SUPVTCAPS_AMD_V);
rc = hmR0InitAmd();
if (RT_FAILURE(rc))
return rc;
}
}
rc = RTMpNotificationRegister(hmR0MpEventCallback, NULL);
AssertRC(rc);
rc = RTPowerNotificationRegister(hmR0PowerCallback, NULL);
AssertRC(rc);
}
这个函数首先会去检查CPU是否支持VT,如果支持VT,检查是VMX还是SVM,并且分别调用这两种CPU的初始化代码
然后会去注册一些系统回调比如电源回调,当系统进入睡眠前,需要退出VMX, 在唤醒当时候再开启VMX,因为如果在睡眠之前不退出VMX状态,会导致唤醒当时候进入不可预知状态。具体说明可以参考intel手册24.11.1
static int hmR0InitIntel(void)
{
rc = VMXEnable(HCPhysScratchPage);
/*
* Install the VT-x methods.
*/
g_HmR0.pfnEnterSession = VMXR0Enter;
g_HmR0.pfnThreadCtxCallback = VMXR0ThreadCtxCallback;
g_HmR0.pfnCallRing3Callback = VMXR0CallRing3Callback;
g_HmR0.pfnExportHostState = VMXR0ExportHostState;
g_HmR0.pfnRunGuestCode = VMXR0RunGuestCode;
g_HmR0.pfnEnableCpu = VMXR0EnableCpu;
g_HmR0.pfnDisableCpu = VMXR0DisableCpu;
g_HmR0.pfnInitVM = VMXR0InitVM;
g_HmR0.pfnTermVM = VMXR0TermVM;
g_HmR0.pfnSetupVM = VMXR0SetupVM;
}
这个函数调用VMXEnable进入VMX模式,然后对应的handle函数赋值到g_HmR0里。
下面对每个函数分别解析他们的代码:
VMXR0SetupVM 设置了VMCS
VMMR0DECL(int) VMXR0SetupVM(PVM pVM)
{
每个VCPU都需要设置VMCS
for (VMCPUID i = 0; i < pVM->cCpus; i++)
{
rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
//调用VMPTRLD设置CPU当前VMCS指针
rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
//下面4个函数分别设置一些控制事件
rc = hmR0VmxSetupPinCtls(pVCpu);
rc = hmR0VmxSetupProcCtls(pVCpu);
rc = hmR0VmxSetupMiscCtls(pVCpu);
rc = hmR0VmxInitXcptBitmap(pVCpu);
//调用VMClEAR保存VMCS设置的内容
rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
}
}
VMXR0Enter active vmcs
VMMR0DECL(int) VMXR0Enter(PVMCPU pVCpu)
{
...
int rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
...
}
VMXR0EnableCpu 最终调用VMXON指令进入VT-X
VMXEnable是实现在HMR0A.asm文件里的汇编函数,调用vmxon指令在CPU上开启VT-x
VMMR0DECL(int) VMXR0EnableCpu(PHMPHYSCPU pHostCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost,
PCSUPHWVIRTMSRS pHwvirtMsrs)
{
hmR0VmxEnterRootMode(pVM, HCPhysCpuPage, pvCpuPage);
}
static int hmR0VmxEnterRootMode(PVM pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage)
{
int rc = VMXEnable(HCPhysCpuPage);
}
VMXR0DisableCpu和VMXR0EnableCpu相反,最终调用VMMOFF指令在当前CPU上关闭VT-X
VMMR0DECL(int) VMXR0DisableCpu(void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
{
return hmR0VmxLeaveRootMode();
}
static int hmR0VmxLeaveRootMode(void)
{
VMXDisable();
}
VMXR0InitVM初始化一些结构体数据,主要用于VMM管理
VMMR0DECL(int) VMXR0InitVM(PVM pVM)
{
int rc = hmR0VmxStructsAlloc(pVM);
}
static int hmR0VmxStructsAlloc(PVM pVM)
{
for (VMCPUID i = 0; i < pVM->cCpus; i++)
{
//给每个VCPU分配VMCS内存
rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjVmcs, &pVCpu->hm.s.vmx.pvVmcs, &pVCpu->hm.s.vmx.HCPhysVmcs);
//分配保存MsrBitmap的内存
rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjMsrBitmap, &pVCpu->hm.s.vmx.pvMsrBitmap,
&pVCpu->hm.s.vmx.HCPhysMsrBitmap);
//分配保存GuestMsr的内存
rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjGuestMsr, &pVCpu->hm.s.vmx.pvGuestMsr, &pVCpu->hm.s.vmx.HCPhysGuestMsr);
//分配保存HostMsr的内存
rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjHostMsr, &pVCpu->hm.s.vmx.pvHostMsr, &pVCpu->hm.s.vmx.HCPhysHostMsr);
}
}
VMXR0TermVM 是VMXR0InitVM对应的释放代码
VMMR0DECL(int) VMXR0TermVM(PVM pVM)
{
hmR0VmxStructsFree(pVM);
}
VMXR0RunGuestCode是执行GuestOS代码
VMMR0DECL(VBOXSTRICTRC) VMXR0RunGuestCode(PVMCPU pVCpu)
{
//进入GuestOS代码,当GuestOS发生VMExit,并且需要host处理这个VMExit的时候,会把VMExit的信息保存到一个全局变量里,然后返回。
rcStrict = hmR0VmxRunGuestCodeNormal(pVCpu);
//这个函数返回到R3,最终调用调用Host上对应的处理函数
int rc2 = hmR0VmxExitToRing3(pVCpu, rcStrict);
}
static VBOXSTRICTRC hmR0VmxRunGuestCodeNormal(PVMCPU pVCpu)
{
for (;; cLoops++)
{
hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
int rcRun = hmR0VmxRunGuest(pVCpu);
hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
//调用vmexit handle,如果vmexit返回VINF_SUCCESS表示这个vmexit被处理完成了,可以继续执行GuestOS代码
//如果VM handle需要host处理,返回非VINF_SUCCESS,就会跳出这个循环,退出VMM,让host处理这个vmexit,然后再返回VMX
rcStrict = hmR0VmxHandleExit(pVCpu, &VmxTransient, VmxTransient.uExitReason);
if (rcStrict == VINF_SUCCESS)
{
//如果不需要host处理这个vmexit,则继续循环运行GuestOS代码
//当然不能一直让GuestOS运行下去这样CPU就一直会被Guest代码全部占满导致hostOS上的进程无法获取执行时间,所以这里有个cMaxResumeLoops上限,当循环超过cMaxResumeLoops时,这强制退出循环。
//cMaxResumeLoops 默认是1024,也可以在配置里配置这个值
if (cLoops <= pVCpu->CTX_SUFF(pVM)->hm.s.cMaxResumeLoops)
continue; /* likely */
STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
rcStrict = VINF_EM_RAW_INTERRUPT;
}
//如果需要host处理这个vmexit,则跳出这个循环,返回给R3去处理
break;
}
}
VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPU pVCpu, bool fGlobalInit)
{
}
VMXR0ExportHostState 保存一些host的状态信息
TODO: 研究一下什么时候会用到这个
VMMR0DECL(int) VMXR0ExportHostState(PVMCPU pVCpu)
{
rc = hmR0VmxExportHostControlRegs();
rc = hmR0VmxExportHostSegmentRegs(pVCpu);
rc = hmR0VmxExportHostMsrs(pVCpu);
}
VMXR0SetupVM 里设置完成了需要handle的VMExit事件之后,当这些事件在GuestOS里被触发的时候,就会产生一个vmexit中断,VMM需要根据中断id,调用对应的中断处理函数。
VMExit中断发生之后,代码运行到
hmR0VmxRunGuestCodeNormal 里的hmR0VmxRunGuest函数返回。
先调用hmR0VmxPostRunGuest获取异常相关的信息
static void hmR0VmxPostRunGuest(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient, int rcVMRun)
{
....
//获取exitreason
uint32_t uExitReason;
int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
pVmxTransient->uExitReason = VMX_EXIT_REASON_BASIC(uExitReason);
......
}
hmR0VmxRunGuestCodeNormal里根据上面获取的VMExit reason调用相关的exit handle
/* Handle the VM-exit. */
#ifdef HMVMX_USE_FUNCTION_TABLE
rcStrict = g_apfnVMExitHandlers[VmxTransient.uExitReason](pVCpu, &VmxTransient);
#else
rcStrict = hmR0VmxHandleExit(pVCpu, &VmxTransient, VmxTransient.uExitReason);
#endif
两个分支实现差不多,一个是switch/case 跳转,一个是function table模式跳转
下面是function table的定义:
可以看到,VMX_EXIT_XCPT_OR_NMI 异常对应的处理函数是hmR0VmxExitXcptOrNmi
VMX_EXIT_CPUID 异常对应的处理函数是hmR0VmxExitCpuid
/**
* VMX_EXIT dispatch table.
*/
static const PFNVMXEXITHANDLER g_apfnVMExitHandlers[VMX_EXIT_MAX + 1] =
{
/* 00 VMX_EXIT_XCPT_OR_NMI */ hmR0VmxExitXcptOrNmi,
/* 01 VMX_EXIT_EXT_INT */ hmR0VmxExitExtInt,
/* 02 VMX_EXIT_TRIPLE_FAULT */ hmR0VmxExitTripleFault,
/* 03 VMX_EXIT_INIT_SIGNAL */ hmR0VmxExitInitSignal,
/* 04 VMX_EXIT_SIPI */ hmR0VmxExitSipi,
/* 05 VMX_EXIT_IO_SMI */ hmR0VmxExitIoSmi,
/* 06 VMX_EXIT_SMI */ hmR0VmxExitSmi,
/* 07 VMX_EXIT_INT_WINDOW */ hmR0VmxExitIntWindow,
/* 08 VMX_EXIT_NMI_WINDOW */ hmR0VmxExitNmiWindow,
/* 09 VMX_EXIT_TASK_SWITCH */ hmR0VmxExitTaskSwitch,
/* 10 VMX_EXIT_CPUID */ hmR0VmxExitCpuid,
/* 11 VMX_EXIT_GETSEC */ hmR0VmxExitGetsec,
/* 12 VMX_EXIT_HLT */ hmR0VmxExitHlt,
/* 13 VMX_EXIT_INVD */ hmR0VmxExitInvd,
....
};
以最简单的Cpuid的vmexit处理函数为例
因为虚拟机的CPU是虚拟CPU,不能返回真实CPU的Cpuid信息,所以当GuestOS执行CPUID这条指令的时候,一定触发VMExit异常
当GuestOS执行到cpuid这条指令的时候,根据uExitReason跳转到hmR0VmxExitCpuid里
HMVMX_EXIT_DECL hmR0VmxExitCpuid(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
{
rcStrict = IEMExecDecodedCpuid(pVCpu, pVmxTransient->cbInstr);
//模拟执行完成后,需要设置HM_CHANGED_GUEST_RIP, 让RIP跳过CPUID这条指令(模拟执行完成)
//当下一次进入GuestOS之前的hmR0VmxPreRunGuestCommitted函数最终调用到hmR0VmxExportGuestRip()这个函数,设置新的Guest RIP到CPUID的下一条指令
ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
//模拟cpuid指令成功,这会返回success,让GuestOS继续执行CPUID的下一条指令
rcStrict = VINF_SUCCESS;
}
VMM\VMMAll\IEMAll.cpp
VMM_INT_DECL(VBOXSTRICTRC) IEMExecDecodedCpuid(PVMCPU pVCpu, uint8_t cbInstr)
{
//初始化iem模拟器
iemInitExec(pVCpu, false /*fBypassHandlers*/);
//调用iemCImpl_cpuid这个函数模拟执行一条CPUID指令
VBOXSTRICTRC rcStrict = IEM_CIMPL_CALL_0(iemCImpl_cpuid);
}
VMM\VMMAll\IEMAllCImpl.cpp.h
IEM_CIMPL_DEF_0(iemCImpl_cpuid)
{
....
//调用CPUMGetGuestCpuId,获取模拟的GuestOS CPU相关信息,并保存到GstCtx里对应的寄存器里
CPUMGetGuestCpuId(pVCpu, pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ecx,
&pVCpu->cpum.GstCtx.eax, &pVCpu->cpum.GstCtx.ebx, &pVCpu->cpum.GstCtx.ecx, &pVCpu->cpum.GstCtx.edx);
pVCpu->cpum.GstCtx.rax &= UINT32_C(0xffffffff);
pVCpu->cpum.GstCtx.rbx &= UINT32_C(0xffffffff);
pVCpu->cpum.GstCtx.rcx &= UINT32_C(0xffffffff);
pVCpu->cpum.GstCtx.rdx &= UINT32_C(0xffffffff);
....
}
后面会专门写一章重点介绍部分重要的VMExit handle的处理过程