在“运行”章节中已经提到指令是在machine_run()中的cpus[i]->run_instr执行,而cpus[i]->run_instr有是在cpu_new的时候就注册为mips32_run_instr,因此实际执行指令的是mips32_run_instr。mips32_run_instr在tmp_mips_tail.cc中结合cpu_dymtrans.cc定义的, tmp_mips_tail.cc是由generate_tail.cc自动生成的:
tmp_mips_tail.cc #define DYNTRANS_RUN_INSTR_DEF mips_run_instr #include "cpu_dyntrans.cc" #undef DYNTRANS_RUN_INSTR_DEF cpu_dyntrans.cc int DYNTRANS_RUN_INSTR_DEF(struct cpu *cpu) { 转换pc DYNTRANS_PC_TO_POINTERS32(cpu); 执行指令 for (;;) { struct DYNTRANS_IC *ic; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; I; cpu->n_translated_instrs += 120; if (cpu->n_translated_instrs >= N_SAFE_DYNTRANS_LIMIT) break; } }
执行指令使用宏I,定义在cpu_dyntrans.cc:
#define I ic = cpu->cd.DYNTRANS_ARCH.next_ic ++; ic->f(cpu, ic);
其中DYNTRANS_ARCH根据不同的cpu架构而不同,对已mips,定义在tmp_mips_head.cc中
#define DYNTRANS_ARCH mips
mips的结构体类型是mips_cpu定义在cpu_mips.h中,里面保存CPU的各种信息包括通用寄存器, cache line信息等:
struct mips_cpu { struct mips_cpu_type_def cpu_type; /* General purpose registers: */ uint64_t gpr[N_MIPS_GPRS]; /* Dummy destination register when writing to the zero register: */ uint64_t scratch; /* Special purpose registers: */ uint64_t hi; uint64_t lo; /* Coprocessors: */ struct mips_coproc *coproc[N_MIPS_COPROCS]; uint64_t cop0_config_select1; int last_written_tlb_index; /* Count/compare timer: */ int compare_register_set; int compare_interrupts_pending; struct interrupt irq_compare; struct timer *timer; int rmw; /* Read-Modify-Write */ uint64_t rmw_len; /* Length of rmw modification */ uint64_t rmw_addr; /* Address of rmw modification */ /* * NOTE: The R5900 has 128-bit registers. I'm not really sure * whether they are used a lot or not, at least with code produced * with gcc they are not. An important case however is lq and sq * (load and store of 128-bit values). These "upper halves" of R5900 * quadwords can be used in those cases. * * hi1 and lo1 are the high 64-bit parts of the hi and lo registers. * sa is a 32-bit "shift amount" register. * * TODO: Generalize this. */ uint64_t gpr_quadhi[N_MIPS_GPRS]; uint64_t hi1; uint64_t lo1; uint32_t r5900_sa; /* * Data and Instruction caches: */ /* Cache sizes: (1 << x) x=0 for default values */ /* This is legacy stuff. TODO: Clean up! */ int cache_picache; int cache_pdcache; int cache_secondary; int cache_picache_linesize; int cache_pdcache_linesize; int cache_secondary_linesize; unsigned char *cache[2]; void *cache_tags[2]; uint64_t cache_last_paddr[2]; int cache_size[2]; int cache_linesize[2]; int cache_mask[2]; /* * Instruction translation cache and Virtual->Physical->Host * address translation: */ DYNTRANS_ITC(mips) VPH_TLBS(mips,MIPS) VPH32(mips,MIPS) VPH64(mips,MIPS) };
next_ic由宏DYNTRANS_ITC(Dyntrans "Instruction Translation Cache")产生,定义在cpu.h内:
#define DYNTRANS_ITC(arch) struct arch ## _tc_physpage *cur_physpage; \ struct arch ## _instr_call *cur_ic_page; \ struct arch ## _instr_call *next_ic; \ struct arch ## _tc_physpage *physpage_template;\ void (*combination_check)(struct cpu *, \ struct arch ## _instr_call *, int low_addr);
2.1 cpu_new的时候调用mips_cpu_init_tables 分配physpage_template, 并将其指令函数初始化为TO_BE_TRANSLATED
cpu.cc cpu_new() { fp->init_tables(cpu); }
init_table在CPU_FAMILY_INIT的时候已经注册为mips_cpu_init_tables, mips_cpu_init_tables由tmp_mips_tail.cc和cpy_dyntrans.cc共同产生:
tmp_mips_tail.cc #define DYNTRANS_INIT_TABLES mips_cpu_init_tables #include "cpu_dyntrans.cc" #undef DYNTRANS_INIT_TABLES void DYNTRANS_INIT_TABLES(struct cpu *cpu) { struct DYNTRANS_TC_PHYSPAGE *ppp; 分配temp page CHECK_ALLOCATION(ppp = (struct DYNTRANS_TC_PHYSPAGE *) malloc(sizeof(struct DYNTRANS_TC_PHYSPAGE))); //将指令函数全部指向TO_BE_TRANSLATED for (i=0; i<DYNTRANS_IC_ENTRIES_PER_PAGE; i++) ppp->ics[i].f = TO_BE_TRANSLATED; cpu->cd.DYNTRANS_ARCH.physpage_template = ppp; } tmp_mips_head.cc中定义了: #define DYNTRANS_TC_PHYSPAGE mips_tc_physpage cpu.h的宏DYNTRANS_MISC_DECLARATIONS在cpu_mips.h中定义了mips_tc_physpage结构体 cpu_mips.h DYNTRANS_MISC_DECLARATIONS(mips,MIPS,uint64_t) cpu.h #define DYNTRANS_MISC_DECLARATIONS(arch,ARCH,addrtype) struct \ arch ## _instr_call { \ void (*f)(struct cpu *, struct arch ## _instr_call *); \ size_t arg[ARCH ## _N_IC_ARGS]; \ }; \ \ /* Translation cache struct for each physical page: */ \ struct arch ## _tc_physpage { \ struct arch ## _instr_call ics[ARCH ## _IC_ENTRIES_PER_PAGE+2];\ uint32_t next_ofs; /* (0 for end of chain) */ \ uint32_t translations_bitmap; \ uint32_t translation_ranges_ofs; \ addrtype physaddr; \ }; \ \ struct arch ## _vpg_tlb_entry { \ uint8_t valid; \ uint8_t writeflag; \ addrtype vaddr_page; \ addrtype paddr_page; \ unsigned char *host_page; \ }; 展开得到 struct mips_tc_physpage { struct mips _instr_call ics[MIPS _IC_ENTRIES_PER_PAGE+2]; uint32_t next_ofs; /* (0 for end of chain) */ uint32_t translations_bitmap; uint32_t translation_ranges_ofs; addrtype physaddr; };
2.2 mips_run_instr的时候调用mips32_pc_to_pointers 会根据pc ,查看是否存在对应的phys_page,如果不存在,会alloc,并以physpage_template的值填充phys_page,同时通过mips32_update_translation_table把指令码load到host_load内,最后将next_ic和cur_ic_page指向对应的phys_page
在tmp_mips_tail.cc中有定义:
#define DYNTRANS_PC_TO_POINTERS_FUNC mips32_pc_to_pointers
结合cpu_dyntrans.cc中的定义可以看到pc to pointers的流程
void DYNTRANS_PC_TO_POINTERS_FUNC(struct cpu *cpu) { //获取PC所在的phys page cached_pc = cpu->pc; index = DYNTRANS_ADDR_TO_PAGENR(cached_pc); ppp = cpu->cd.DYNTRANS_ARCH.phys_page[index]; phy_page存在,直接赋给next_ic if (ppp != NULL) goto have_it; //phys_page不存在时,重新load DYNTRANS_PC_TO_POINTERS_GENERIC(cpu); 将nex_ic指向pyhs page have_it: cpu->cd.DYNTRANS_ARCH.cur_ic_page = &ppp->ics[0]; cpu->cd.DYNTRANS_ARCH.next_ic = cpu->cd.DYNTRANS_ARCH.cur_ic_page + DYNTRANS_PC_TO_IC_ENTRY(cached_pc); } void DYNTRANS_PC_TO_POINTERS_GENERIC(struct cpu *cpu) { //转换为物理地址 cached_pc = cpu->pc; vaddr = cached_pc; ok = cpu->translate_v2p( cpu, vaddr, &paddr, FLAG_INSTR); 转换为emul host memory地址 physaddr = paddr; unsigned char *host_page = memory_paddr_to_hostaddr(cpu->mem, physaddr, MEM_READ); 如果不存在,分配host memory block,并将指令码load到host_page内 if (host_page != NULL) { cpu->update_translation_table(cpu, cached_pc & ~q, host_page, 0, physaddr); } 寻找是否有可用phys_page physpage_entryp = &(((uint32_t *)cpu->translation_cache)[table_index]); physpage_ofs = *physpage_entryp; ppp = NULL; /* Traverse the physical page chain: */ while (physpage_ofs != 0) { } 没有找到,alloc pyhs_page if (physpage_ofs == 0) { DYNTRANS_TC_ALLOCATE(cpu, physaddr); } }
从tmp_mips_head.cc和tmp_mips_tail.cc 中可以看到DYNTRANS_TC_ALLOCATE和DYNTRANS_TC_ALLOCATE_DEFAULT_PAGE_DEF都被定义为mips_tc_allocate_default_page
static void DYNTRANS_TC_ALLOCATE_DEFAULT_PAGE_DEF(struct cpu *cpu, uint64_t physaddr) { struct DYNTRANS_TC_PHYSPAGE *ppp; 分配phys_page ppp = (struct DYNTRANS_TC_PHYSPAGE *)(cpu->translation_cache + cpu->translation_cache_cur_ofs); 将physpage_template的内容(TO_BE_TRANSLATED)赋给新的page,因此一开始访问所有next_ic都是TO_BE_TRANSLATED /* Copy the entire template page first: */ memcpy(ppp, cpu->cd.DYNTRANS_ARCH.physpage_template, sizeof( struct DYNTRANS_TC_PHYSPAGE)); ppp->physaddr = physaddr & ~(DYNTRANS_PAGESIZE - 1); cpu->translation_cache_cur_ofs += sizeof(struct DYNTRANS_TC_PHYSPAGE); cpu->translation_cache_cur_ofs --; cpu->translation_cache_cur_ofs |= 63; cpu->translation_cache_cur_ofs ++; }
2.3 执行指令的时候呼叫到I,因此会执行next_ic 内的f,也就是TO_BE_TRANSLATED == instr(to_be_translated)
ic = cpu->cd.DYNTRANS_ARCH.next_ic ++; ic->f(cpu, ic);
从上面的分析可以知道一开始的ic->f都是instr(to_be_translated)
2.4 执行instr(to_be_translated), 会根据pc将指令从码从host_load中取出来,并分析指令码,并翻译为对应的指令函数,并执行指令函数
在tmp_mips_head.cc中分别定义了:
#define instr(n) mips_instr_ ## n #define X(n) void mips_instr_ ## n(struct cpu *cpu, struct mips_instr_call *ic)
因此instr(to_be_translated) 在cpu_mips_instr.cc中定义
X(to_be_translated) { 从pc转换为页内地址addr addr = cpu->pc & ~((MIPS_IC_ENTRIES_PER_PAGE-1) << MIPS_INSTR_ALIGNMENT_SHIFT); addr += (low_pc << MIPS_INSTR_ALIGNMENT_SHIFT); cpu->pc = (MODE_int_t)addr; addr &= ~((1 << MIPS_INSTR_ALIGNMENT_SHIFT) - 1); 从页内addr读出指令码 page = cpu->cd.mips.host_load[(uint32_t)addr >> 12]; memcpy(ib, page + (addr & 0xffc), sizeof(ib)); uint32_t *p = (uint32_t *) ib; iword = *p; 分析指令码并转化为对应的指令函数 main_opcode = iword >> 26; rs = (iword >> 21) & 31; rt = (iword >> 16) & 31; rd = (iword >> 11) & 31; sa = (iword >> 6) & 31; imm = (int16_t)iword; s6 = iword & 63; s10 = (rs << 5) | sa; switch (main_opcode){ 将转换的指令函数替换to_be_translated case …: ic->f = instr(sll); } 连续转换128条指令码为指令函数 cpu->translation_readahead = MAX_DYNTRANS_READAHEAD; while (DYNTRANS_ADDR_TO_PAGENR(baseaddr +(i << DYNTRANS_INSTR_ALIGNMENT_SHIFT)) == pagenr && cpu->translation_readahead > 0) { void (*old_f)(struct cpu *, struct DYNTRANS_IC *) = ic[i].f; /* Already translated? Then abort: */ if (old_f != TO_BE_TRANSLATED) break; /* Translate the instruction: */ ic[i].f(cpu, ic+i); /* Translation failed? Then abort. */ if (ic[i].f == old_f) break; cpu->translation_readahead --; ++i; } cpu->translation_readahead = 0; 最后执行一条指令 ic->f(cpu, ic); }从上面可见执行一个I就会连续转换128条指令码才会执行下一个I,此时下一个I已经是指令函数,不用再转换,知道被转换的指令用完,或者是发生跳转到新的转换指令上,才会又发生指令码到指令函数的转换