代码版本qemu1.5,只看linux下使用kvm加速的,guest为x86的。
vl.c,注册atexit函数,遍历exit_notifiers,执行每一个node中的notify函数
static void qemu_run_exit_notifiers(void)
{
notifier_list_notify(&exit_notifiers, NULL);
}
void notifier_list_notify(NotifierList *list, void *data)
{
Notifier *notifier, *next;
QLIST_FOREACH_SAFE(notifier, &list->notifiers, node, next) {
notifier->notify(notifier, data);
}
}
typedef struct NotifierList
{
QLIST_HEAD(, Notifier) notifiers;
} NotifierList;
struct Notifier
{
void (*notify)(Notifier *notifier, void *data);
QLIST_ENTRY(Notifier) node;
};
exit_notifiers 哪里来的呢:
static NotifierList exit_notifiers =
NOTIFIER_LIST_INITIALIZER(exit_notifiers);
#define NOTIFIER_LIST_INITIALIZER(head) \
{ QLIST_HEAD_INITIALIZER((head).notifiers) }
void qemu_add_exit_notifier(Notifier *notify)
{
notifier_list_add(&exit_notifiers, notify);
}
Qemu-error.c,设置progname为basename:
/*
* Set the program name for error_print_loc().
*/
void error_set_progname(const char *argv0)
{
const char *p = strrchr(argv0, '/');
progname = p ? p + 1 : argv0;
}
g_mem_set_vtable(&mem_trace);
if (!g_thread_supported()) {
#if !GLIB_CHECK_VERSION(2, 31, 0)
g_thread_init(NULL);
#else
fprintf(stderr, "glib threading failed to initialize.\n");
exit(1);
#endif
}
g_mem_set_vtable用于hook malloc和free,已废弃:https://developer.gnome.org/glib/stable/glib-Memory-Allocation.html
g_thread_init也不需要了:The GLib threading system used to be initialized with g_thread_init(). This is no longer necessary. Since version 2.32, the GLib threading system is automatically initialized at the start of your program, and all thread-creation functions and synchronization primitives are available right away.
https://developer.gnome.org/glib/stable/glib-Threads.html#g-mutex-init
参看《qemu QOM(qemu object model)和设备模拟》
从一个运行状态是否可以切换到另一个运行状态
static void runstate_init(void)
{
const RunStateTransition *p;
memset(&runstate_valid_transitions, 0, sizeof(runstate_valid_transitions));
for (p = &runstate_transitions_def[0]; p->from != RUN_STATE_MAX; p++) {
runstate_valid_transitions[p->from][p->to] = true;
}
}
static bool runstate_valid_transitions[RUN_STATE_MAX][RUN_STATE_MAX];
static const RunStateTransition runstate_transitions_def[] = {
/* from -> to */
{ RUN_STATE_DEBUG, RUN_STATE_RUNNING },
{ RUN_STATE_INMIGRATE, RUN_STATE_RUNNING },
{ RUN_STATE_INMIGRATE, RUN_STATE_PAUSED },
{ RUN_STATE_INTERNAL_ERROR, RUN_STATE_PAUSED },
{ RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_IO_ERROR, RUN_STATE_RUNNING },
{ RUN_STATE_IO_ERROR, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_PAUSED, RUN_STATE_RUNNING },
{ RUN_STATE_PAUSED, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_POSTMIGRATE, RUN_STATE_RUNNING },
{ RUN_STATE_POSTMIGRATE, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_PRELAUNCH, RUN_STATE_RUNNING },
{ RUN_STATE_PRELAUNCH, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_RUNNING },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_POSTMIGRATE },
{ RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING },
{ RUN_STATE_RUNNING, RUN_STATE_DEBUG },
{ RUN_STATE_RUNNING, RUN_STATE_INTERNAL_ERROR },
{ RUN_STATE_RUNNING, RUN_STATE_IO_ERROR },
{ RUN_STATE_RUNNING, RUN_STATE_PAUSED },
{ RUN_STATE_RUNNING, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_RUNNING, RUN_STATE_RESTORE_VM },
{ RUN_STATE_RUNNING, RUN_STATE_SAVE_VM },
{ RUN_STATE_RUNNING, RUN_STATE_SHUTDOWN },
{ RUN_STATE_RUNNING, RUN_STATE_WATCHDOG },
{ RUN_STATE_RUNNING, RUN_STATE_GUEST_PANICKED },
{ RUN_STATE_SAVE_VM, RUN_STATE_RUNNING },
{ RUN_STATE_SHUTDOWN, RUN_STATE_PAUSED },
{ RUN_STATE_SHUTDOWN, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_DEBUG, RUN_STATE_SUSPENDED },
{ RUN_STATE_RUNNING, RUN_STATE_SUSPENDED },
{ RUN_STATE_SUSPENDED, RUN_STATE_RUNNING },
{ RUN_STATE_SUSPENDED, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_WATCHDOG, RUN_STATE_RUNNING },
{ RUN_STATE_WATCHDOG, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_GUEST_PANICKED, RUN_STATE_PAUSED },
{ RUN_STATE_GUEST_PANICKED, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_MAX, RUN_STATE_MAX },
};
void init_clocks(void)
{
if (!rt_clock) {
rt_clock = qemu_new_clock(QEMU_CLOCK_REALTIME);
vm_clock = qemu_new_clock(QEMU_CLOCK_VIRTUAL);
host_clock = qemu_new_clock(QEMU_CLOCK_HOST);
}
}
static QEMUClock *qemu_new_clock(int type)
{
QEMUClock *clock;
clock = g_malloc0(sizeof(QEMUClock));
clock->type = type;
clock->enabled = true;
clock->last = INT64_MIN;
notifier_list_init(&clock->reset_notifiers);
return clock;
}
void os_setup_early_signal_handling(void)
{
struct sigaction act;
sigfillset(&act.sa_mask);
act.sa_flags = 0;
act.sa_handler = SIG_IGN;
sigaction(SIGPIPE, &act, NULL);
}
其中pc_piix.c中有代码注册了一些Machine的模型到first_machine链表中:
static void pc_machine_init(void)
{
qemu_register_machine(&pc_i440fx_machine_v1_5);
qemu_register_machine(&pc_i440fx_machine_v1_4);
qemu_register_machine(&pc_machine_v1_3);
qemu_register_machine(&pc_machine_v1_2);
qemu_register_machine(&pc_machine_v1_1);
qemu_register_machine(&pc_machine_v1_0);
qemu_register_machine(&pc_machine_v0_15);
qemu_register_machine(&pc_machine_v0_14);
qemu_register_machine(&pc_machine_v0_13);
qemu_register_machine(&pc_machine_v0_12);
qemu_register_machine(&pc_machine_v0_11);
qemu_register_machine(&pc_machine_v0_10);
qemu_register_machine(&isapc_machine);
#ifdef CONFIG_XEN
qemu_register_machine(&xenfv_machine);
#endif
}
machine_init(pc_machine_init);
添加到链表最后
static QEMUMachine *first_machine = NULL;
QEMUMachine *current_machine = NULL;
int qemu_register_machine(QEMUMachine *m)
{
QEMUMachine **pm;
pm = &first_machine;
while (*pm != NULL)
pm = &(*pm)->next;
m->next = NULL;
*pm = m;
return 0;
}
static QEMUMachine pc_i440fx_machine_v1_5 = {
.name = "pc-i440fx-1.5",
.alias = "pc",
.desc = "Standard PC (i440FX + PIIX, 1996)",
.init = pc_init_pci,
.hot_add_cpu = pc_hot_add_cpu,
.max_cpus = 255,
.is_default = 1,
DEFAULT_MACHINE_OPTIONS,
};
pc-i440fx-1.5在链表的前面,而且is_default = 1,所以被选为默认Machine
QEMUMachine *find_default_machine(void)
{
QEMUMachine *m;
for(m = first_machine; m != NULL; m = m->next) {
if (m->is_default) {
return m;
}
}
return NULL;
}
有两遍,第一遍确认是使用默认配置文件,还是用户指定的配置文件;第二遍是完整的参数解析
《qemu参数解析》
初始化了clock,timer,signal和aio
static int configure_accelerator(void)
{
const char *p = NULL;
char buf[10];
int i, ret;
bool accel_initialised = false;
bool init_failed = false;
QemuOptsList *list = qemu_find_opts("machine");
if (!QTAILQ_EMPTY(&list->head)) {
p = qemu_opt_get(QTAILQ_FIRST(&list->head), "accel");
}
if (p == NULL) {
/* Use the default "accelerator", tcg */
p = "tcg";
}
while (!accel_initialised && *p != '\0') {
if (*p == ':') {
p++;
}
p = get_opt_name(buf, sizeof (buf), p, ':');
for (i = 0; i < ARRAY_SIZE(accel_list); i++) {
if (strcmp(accel_list[i].opt_name, buf) == 0) {
if (!accel_list[i].available()) {
printf("%s not supported for this target\n",
accel_list[i].name);
continue;
}
*(accel_list[i].allowed) = true;
ret = accel_list[i].init();
if (ret < 0) {
init_failed = true;
fprintf(stderr, "failed to initialize %s: %s\n",
accel_list[i].name,
strerror(-ret));
*(accel_list[i].allowed) = false;
} else {
accel_initialised = true;
}
break;
}
}
if (i == ARRAY_SIZE(accel_list)) {
fprintf(stderr, "\"%s\" accelerator does not exist.\n", buf);
}
}
if (!accel_initialised) {
if (!init_failed) {
fprintf(stderr, "No accelerator found!\n");
}
exit(1);
}
if (init_failed) {
fprintf(stderr, "Back to %s accelerator.\n", accel_list[i].name);
}
return !accel_initialised;
}
static struct {
const char *opt_name;
const char *name;
int (*available)(void);
int (*init)(void);
bool *allowed;
} accel_list[] = {
{ "tcg", "tcg", tcg_available, tcg_init, &tcg_allowed },
{ "xen", "Xen", xen_available, xen_init, &xen_allowed },
{ "kvm", "KVM", kvm_available, kvm_init, &kvm_allowed },
{ "qtest", "QTest", qtest_available, qtest_init, &qtest_allowed },
};
int kvm_init(void)
{
static const char upgrade_note[] =
"Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
"(see http://sourceforge.net/projects/kvm).\n";
KVMState *s;
const KVMCapabilityInfo *missing_cap;
int ret;
int i;
int max_vcpus;
s = g_malloc0(sizeof(KVMState));
/*
* On systems where the kernel can support different base page
* sizes, host page size may be different from TARGET_PAGE_SIZE,
* even with KVM. TARGET_PAGE_SIZE is assumed to be the minimum
* page size for the system though.
*/
assert(TARGET_PAGE_SIZE <= getpagesize());
#ifdef KVM_CAP_SET_GUEST_DEBUG
QTAILQ_INIT(&s->kvm_sw_breakpoints);
#endif
for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
s->slots[i].slot = i;
}
s->vmfd = -1;
s->fd = qemu_open("/dev/kvm", O_RDWR);
if (s->fd == -1) {
fprintf(stderr, "Could not access KVM kernel module: %m\n");
ret = -errno;
goto err;
}
ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
if (ret < KVM_API_VERSION) {
if (ret > 0) {
ret = -EINVAL;
}
fprintf(stderr, "kvm version too old\n");
goto err;
}
if (ret > KVM_API_VERSION) {
ret = -EINVAL;
fprintf(stderr, "kvm version not supported\n");
goto err;
}
max_vcpus = kvm_max_vcpus(s);
if (smp_cpus > max_vcpus) {
ret = -EINVAL;
fprintf(stderr, "Number of SMP cpus requested (%d) exceeds max cpus "
"supported by KVM (%d)\n", smp_cpus, max_vcpus);
goto err;
}
s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
if (s->vmfd < 0) {
#ifdef TARGET_S390X
fprintf(stderr, "Please add the 'switch_amode' kernel parameter to "
"your host kernel command line\n");
#endif
ret = s->vmfd;
goto err;
}
missing_cap = kvm_check_extension_list(s, kvm_required_capabilites);
if (!missing_cap) {
missing_cap =
kvm_check_extension_list(s, kvm_arch_required_capabilities);
}
if (missing_cap) {
ret = -EINVAL;
fprintf(stderr, "kvm does not support %s\n%s",
missing_cap->name, upgrade_note);
goto err;
}
s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
s->broken_set_mem_region = 1;
ret = kvm_check_extension(s, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
if (ret > 0) {
s->broken_set_mem_region = 0;
}
#ifdef KVM_CAP_VCPU_EVENTS
s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
#endif
s->robust_singlestep =
kvm_check_extension(s, KVM_CAP_X86_ROBUST_SINGLESTEP);
#ifdef KVM_CAP_DEBUGREGS
s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS);
#endif
#ifdef KVM_CAP_XSAVE
s->xsave = kvm_check_extension(s, KVM_CAP_XSAVE);
#endif
#ifdef KVM_CAP_XCRS
s->xcrs = kvm_check_extension(s, KVM_CAP_XCRS);
#endif
#ifdef KVM_CAP_PIT_STATE2
s->pit_state2 = kvm_check_extension(s, KVM_CAP_PIT_STATE2);
#endif
#ifdef KVM_CAP_IRQ_ROUTING
s->direct_msi = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0);
#endif
s->intx_set_mask = kvm_check_extension(s, KVM_CAP_PCI_2_3);
s->irq_set_ioctl = KVM_IRQ_LINE;
if (kvm_check_extension(s, KVM_CAP_IRQ_INJECT_STATUS)) {
s->irq_set_ioctl = KVM_IRQ_LINE_STATUS;
}
ret = kvm_arch_init(s);
if (ret < 0) {
goto err;
}
ret = kvm_irqchip_create(s);
if (ret < 0) {
goto err;
}
kvm_state = s;
memory_listener_register(&kvm_memory_listener, &address_space_memory);
memory_listener_register(&kvm_io_listener, &address_space_io);
s->many_ioeventfds = kvm_check_many_ioeventfds();
cpu_interrupt_handler = kvm_handle_interrupt;
return 0;
err:
if (s->vmfd >= 0) {
close(s->vmfd);
}
if (s->fd != -1) {
close(s->fd);
}
g_free(s);
return ret;
}
设置stdout为行缓冲,也就是每一行刷新一下
void os_set_line_buffering(void)
{
setvbuf(stdout, NULL, _IOLBF, 0);
}
void cpu_exec_init_all(void)
{
#if !defined(CONFIG_USER_ONLY)
qemu_mutex_init(&ram_list.mutex);
memory_map_init();
io_mem_init();
#endif
}
QEMUMachineInitArgs args = { .ram_size = ram_size,
.boot_device = (boot_devices[0] == '\0') ?
machine->boot_order :
boot_devices,
.kernel_filename = kernel_filename,
.kernel_cmdline = kernel_cmdline,
.initrd_filename = initrd_filename,
.cpu_model = cpu_model };
machine->init(&args);
执行pc_init_pci,pc_init1 inside
static void pc_init_pci(QEMUMachineInitArgs *args)
{
ram_addr_t ram_size = args->ram_size;
const char *cpu_model = args->cpu_model;
const char *kernel_filename = args->kernel_filename;
const char *kernel_cmdline = args->kernel_cmdline;
const char *initrd_filename = args->initrd_filename;
const char *boot_device = args->boot_device;
pc_init1(get_system_memory(),
get_system_io(),
ram_size, boot_device,
kernel_filename, kernel_cmdline,
initrd_filename, cpu_model, 1, 1);
}
static void pc_init1(MemoryRegion *system_memory,
MemoryRegion *system_io,
ram_addr_t ram_size,
const char *boot_device,
const char *kernel_filename,
const char *kernel_cmdline,
const char *initrd_filename,
const char *cpu_model,
int pci_enabled,
int kvmclock_enabled)
{
int i;
ram_addr_t below_4g_mem_size, above_4g_mem_size;
PCIBus *pci_bus;
ISABus *isa_bus;
PCII440FXState *i440fx_state;
int piix3_devfn = -1;
qemu_irq *cpu_irq;
qemu_irq *gsi;
qemu_irq *i8259;
qemu_irq *smi_irq;
GSIState *gsi_state;
DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
BusState *idebus[MAX_IDE_BUS];
ISADevice *rtc_state;
ISADevice *floppy;
MemoryRegion *ram_memory;
MemoryRegion *pci_memory;
MemoryRegion *rom_memory;
DeviceState *icc_bridge;
void *fw_cfg = NULL;
icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
object_property_add_child(qdev_get_machine(), "icc-bridge",
OBJECT(icc_bridge), NULL);
pc_cpus_init(cpu_model, icc_bridge);
pc_acpi_init("acpi-dsdt.aml");
if (kvmclock_enabled) {
kvmclock_create();
}
if (ram_size >= QEMU_BELOW_4G_RAM_END ) {
above_4g_mem_size = ram_size - QEMU_BELOW_4G_RAM_END;
below_4g_mem_size = QEMU_BELOW_4G_RAM_END;
} else {
above_4g_mem_size = 0;
below_4g_mem_size = ram_size;
}
if (pci_enabled) {
pci_memory = g_new(MemoryRegion, 1);
memory_region_init(pci_memory, "pci", INT64_MAX);
rom_memory = pci_memory;
} else {
pci_memory = NULL;
rom_memory = system_memory;
}
/* allocate ram and load rom/bios */
if (!xen_enabled()) {
fw_cfg = pc_memory_init(system_memory,
kernel_filename, kernel_cmdline, initrd_filename,
below_4g_mem_size, above_4g_mem_size,
rom_memory, &ram_memory);
}
gsi_state = g_malloc0(sizeof(*gsi_state));
if (kvm_irqchip_in_kernel()) {
kvm_pc_setup_irq_routing(pci_enabled);
gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state,
GSI_NUM_PINS);
} else {
gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS);
}
if (pci_enabled) {
pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, &isa_bus, gsi,
system_memory, system_io, ram_size,
below_4g_mem_size,
0x100000000ULL - below_4g_mem_size,
0x100000000ULL + above_4g_mem_size,
(sizeof(hwaddr) == 4
? 0
: ((uint64_t)1 << 62)),
pci_memory, ram_memory);
} else {
pci_bus = NULL;
i440fx_state = NULL;
isa_bus = isa_bus_new(NULL, system_io);
no_hpet = 1;
}
isa_bus_irqs(isa_bus, gsi);
if (kvm_irqchip_in_kernel()) {
i8259 = kvm_i8259_init(isa_bus);
} else if (xen_enabled()) {
i8259 = xen_interrupt_controller_init();
} else {
cpu_irq = pc_allocate_cpu_irq();
i8259 = i8259_init(isa_bus, cpu_irq[0]);
}
for (i = 0; i < ISA_NUM_IRQS; i++) {
gsi_state->i8259_irq[i] = i8259[i];
}
if (pci_enabled) {
ioapic_init_gsi(gsi_state, "i440fx");
}
qdev_init_nofail(icc_bridge);
pc_register_ferr_irq(gsi[13]);
pc_vga_init(isa_bus, pci_enabled ? pci_bus : NULL);
if (xen_enabled()) {
pci_create_simple(pci_bus, -1, "xen-platform");
}
/* init basic PC hardware */
pc_basic_device_init(isa_bus, gsi, &rtc_state, &floppy, xen_enabled());
pc_nic_init(isa_bus, pci_bus);
ide_drive_get(hd, MAX_IDE_BUS);
if (pci_enabled) {
PCIDevice *dev;
if (xen_enabled()) {
dev = pci_piix3_xen_ide_init(pci_bus, hd, piix3_devfn + 1);
} else {
dev = pci_piix3_ide_init(pci_bus, hd, piix3_devfn + 1);
}
idebus[0] = qdev_get_child_bus(&dev->qdev, "ide.0");
idebus[1] = qdev_get_child_bus(&dev->qdev, "ide.1");
} else {
for(i = 0; i < MAX_IDE_BUS; i++) {
ISADevice *dev;
dev = isa_ide_init(isa_bus, ide_iobase[i], ide_iobase2[i],
ide_irq[i],
hd[MAX_IDE_DEVS * i], hd[MAX_IDE_DEVS * i + 1]);
idebus[i] = qdev_get_child_bus(&dev->qdev, "ide.0");
}
}
pc_cmos_init(below_4g_mem_size, above_4g_mem_size, boot_device,
floppy, idebus[0], idebus[1], rtc_state);
if (pci_enabled && usb_enabled(false)) {
pci_create_simple(pci_bus, piix3_devfn + 2, "piix3-usb-uhci");
}
if (pci_enabled && acpi_enabled) {
i2c_bus *smbus;
smi_irq = qemu_allocate_irqs(pc_acpi_smi_interrupt,
x86_env_get_cpu(first_cpu), 1);
/* TODO: Populate SPD eeprom data. */
smbus = piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100,
gsi[9], *smi_irq,
kvm_enabled(), fw_cfg);
smbus_eeprom_init(smbus, 8, NULL, 0);
}
if (pci_enabled) {
pc_pci_device_init(pci_bus);
}
if (has_pvpanic) {
pvpanic_init(isa_bus);
}
}
pc_init1
->pc_cpus_init
->pc_new_cpu
->cpu_x86_create
->cpu = X86_CPU(object_new(TYPE_X86_CPU)); // 需要理解QOM的使用
->x86_cpu_common_class_init
->dc->realize = x86_cpu_realizefn;
->x86_cpu_realizefn
->qemu_init_vcpu
->qemu_kvm_start_vcpu
->qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, env, QEMU_THREAD_JOINABLE);->qemu_kvm_cpu_thread_fn
vCPU线程:
static void *qemu_kvm_cpu_thread_fn(void *arg)
{
CPUArchState *env = arg;
CPUState *cpu = ENV_GET_CPU(env);
int r;
qemu_mutex_lock(&qemu_global_mutex);
qemu_thread_get_self(cpu->thread);
cpu->thread_id = qemu_get_thread_id();
cpu_single_env = env;
r = kvm_init_vcpu(cpu);
if (r < 0) {
fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
exit(1);
}
qemu_kvm_init_cpu_signals(env);
/* signal CPU creation */
cpu->created = true;
qemu_cond_signal(&qemu_cpu_cond);
while (1) {
if (cpu_can_run(cpu)) {
r = kvm_cpu_exec(env);
if (r == EXCP_DEBUG) {
cpu_handle_guest_debug(env);
}
}
qemu_kvm_wait_io_event(env);
}
return NULL;
}
kvm_run,中断的注入(in kvm_arch_pre_run),以及PMIO,MMIO的处理:
int kvm_cpu_exec(CPUArchState *env)
{
CPUState *cpu = ENV_GET_CPU(env);
struct kvm_run *run = cpu->kvm_run;
int ret, run_ret;
DPRINTF("kvm_cpu_exec()\n");
if (kvm_arch_process_async_events(cpu)) {
cpu->exit_request = 0;
return EXCP_HLT;
}
do {
if (cpu->kvm_vcpu_dirty) {
kvm_arch_put_registers(cpu, KVM_PUT_RUNTIME_STATE);
cpu->kvm_vcpu_dirty = false;
}
kvm_arch_pre_run(cpu, run);
if (cpu->exit_request) {
DPRINTF("interrupt exit requested\n");
/*
* KVM requires us to reenter the kernel after IO exits to complete
* instruction emulation. This self-signal will ensure that we
* leave ASAP again.
*/
qemu_cpu_kick_self();
}
qemu_mutex_unlock_iothread();
run_ret = kvm_vcpu_ioctl(cpu, KVM_RUN, 0);
qemu_mutex_lock_iothread();
kvm_arch_post_run(cpu, run);
if (run_ret < 0) {
if (run_ret == -EINTR || run_ret == -EAGAIN) {
DPRINTF("io window exit\n");
ret = EXCP_INTERRUPT;
break;
}
fprintf(stderr, "error: kvm run failed %s\n",
strerror(-run_ret));
abort();
}
trace_kvm_run_exit(cpu->cpu_index, run->exit_reason);
switch (run->exit_reason) {
case KVM_EXIT_IO:
DPRINTF("handle_io\n");
kvm_handle_io(run->io.port,
(uint8_t *)run + run->io.data_offset,
run->io.direction,
run->io.size,
run->io.count);
ret = 0;
break;
case KVM_EXIT_MMIO:
DPRINTF("handle_mmio\n");
cpu_physical_memory_rw(run->mmio.phys_addr,
run->mmio.data,
run->mmio.len,
run->mmio.is_write);
ret = 0;
break;
case KVM_EXIT_IRQ_WINDOW_OPEN:
DPRINTF("irq_window_open\n");
ret = EXCP_INTERRUPT;
break;
case KVM_EXIT_SHUTDOWN:
DPRINTF("shutdown\n");
qemu_system_reset_request();
ret = EXCP_INTERRUPT;
break;
case KVM_EXIT_UNKNOWN:
fprintf(stderr, "KVM: unknown exit, hardware reason %" PRIx64 "\n",
(uint64_t)run->hw.hardware_exit_reason);
ret = -1;
break;
case KVM_EXIT_INTERNAL_ERROR:
ret = kvm_handle_internal_error(env, run);
break;
default:
DPRINTF("kvm_arch_handle_exit\n");
ret = kvm_arch_handle_exit(cpu, run);
break;
}
} while (ret == 0);
if (ret < 0) {
cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
vm_stop(RUN_STATE_INTERNAL_ERROR);
}
cpu->exit_request = 0;
return ret;
}
static void qemu_run_machine_init_done_notifiers(void)
{
notifier_list_notify(&machine_init_done_notifiers, NULL);
}
static void main_loop(void)
{
bool nonblocking;
int last_io = 0;
#ifdef CONFIG_PROFILER
int64_t ti;
#endif
do {
nonblocking = !kvm_enabled() && !xen_enabled() && last_io > 0;
#ifdef CONFIG_PROFILER
ti = profile_getclock();
#endif
last_io = main_loop_wait(nonblocking);
#ifdef CONFIG_PROFILER
dev_time += profile_getclock() - ti;
#endif
} while (!main_loop_should_exit());
}