开始之前先简单介绍几个和compiler相关的数据对象:
//libdex/DexFile.h /* * Direct-mapped "code_item". * * The "catches" table is used when throwing an exception, * "debugInfo" is used when displaying an exception stack trace or * debugging. An offset of zero indicates that there are no entries. */
//用来描述bytecode的 struct DexCode; //compilerIR.h(vm\compiler) //用来描述MIR的 typedef struct BasicBlock; //compilerIR.h(vm\compiler) //用来进行IR编译的(我只是简单看了一下,现在还不能确定是否就是用于描述IR)。 typedef struct CompilationUnit;
vm\compiler\codegen\arm\codegenDriver.cpp
这是一个非常长的函数,没想到google也会写这样的代码,因为长函数在maintain的时候,
是很容易让人迷失的。当然,也有可能是在项目逐渐构建的过程中,导致了函数体越来越长。
/* * Accept the work and start compiling. Returns true if compilation * is attempted. */ bool dvmCompilerDoWork(CompilerWorkOrder *work) { JitTraceDescription *desc; bool isCompile; bool success = true; if (gDvmJit.codeCacheFull) { return false; } switch (work->kind) { case kWorkOrderTrace: isCompile = true; /* Start compilation with maximally allowed trace length */ desc = (JitTraceDescription *)work->info; success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result, work->bailPtr, 0 /* no hints */); break; /×...............×/ default: isCompile = false; ALOGE("Jit: unknown work order type"); assert(0); // Bail if debug build, discard otherwise } if (!success) work->result.codeAddress = NULL; return isCompile; }
dowork有很多情况,但是现在让我们focus在真正有用的地方:
/* * Main entry point to start trace compilation. Basic blocks are constructed * first and they will be passed to the codegen routines to convert Dalvik * bytecode into machine code. */ bool dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts, JitTranslationInfo *info, jmp_buf *bailPtr, int optHints)这里面一开始就要通过:
const DexCode *dexCode = dvmGetMethodCode(desc->method);
找到编译的开始位置
/* * Get the associated code struct for a method. This returns NULL * for non-bytecode methods. */ INLINE const DexCode* dvmGetMethodCode(const Method* meth) { if (dvmIsBytecodeMethod(meth)) { /* * The insns field for a bytecode method actually points at * &(DexCode.insns), so we can subtract back to get at the * DexCode in front. */ return (const DexCode*) (((const u1*) meth->insns) - offsetof(DexCode, insns)); } else { return NULL; } }
然后做了很多安全/容错方面的检查,之后再进行的竟然是JIT的分析在先.
为何呢?不是先编译到machine code再进行JIT的chain分析吗?(MARK)
#if defined(WITH_JIT_TUNING) /* Locate the entry to store compilation statistics for this method */ methodStats = dvmCompilerAnalyzeMethodBody(desc->method, false); #endif
接着就会将bytecode都通过dvmCompilerNewBB()分解初始化成为MIR对象
/* Allocate the entry block */ curBB = dvmCompilerNewBB(kEntryBlock, numBlocks++); dvmInsertGrowableList(blockList, (intptr_t) curBB); curBB->startOffset = curOffset; entryCodeBB = dvmCompilerNewBB(kDalvikByteCode, numBlocks++); dvmInsertGrowableList(blockList, (intptr_t) entryCodeBB); entryCodeBB->startOffset = curOffset; curBB->fallThrough = entryCodeBB; curBB = entryCodeBB;
BB的类型有如下几种:
typedef enum BBType { /* For coding convenience reasons chaining cell types should appear first */ kChainingCellNormal = 0, kChainingCellHot, kChainingCellInvokeSingleton, kChainingCellInvokePredicted, kChainingCellBackwardBranch, kChainingCellGap, /* Don't insert new fields between Gap and Last */ kChainingCellLast = kChainingCellGap + 1, kEntryBlock, kDalvikByteCode, kExitBlock, kPCReconstruction, kExceptionHandling, kCatchEntry, } BBType;
然后循环地通过dvmCompilerNew将BB转化为MIR,BB也会被转化为真正的MIR,每个insn还会带有一个相关的CallsiteInfo。
/* * Analyze the trace descriptor and include up to the maximal number * of Dalvik instructions into the IR. */ while (1) { MIR *insn; int width; insn = (MIR *)dvmCompilerNew(sizeof(MIR), true); insn->offset = curOffset; width = parseInsn(codePtr, &insn->dalvikInsn, cUnit.printMe); /* The trace should never incude instruction data */ assert(width); insn->width = width; traceSize += width; dvmCompilerAppendMIR(curBB, insn); cUnit.numInsts++; int flags = dexGetFlagsFromOpcode(insn->dalvikInsn.opcode); if (flags & kInstrInvoke) { const Method *calleeMethod = (const Method *) currRun[JIT_TRACE_CUR_METHOD].info.meta; assert(numInsts == 1); CallsiteInfo *callsiteInfo = (CallsiteInfo *)dvmCompilerNew(sizeof(CallsiteInfo), true); callsiteInfo->classDescriptor = (const char *) currRun[JIT_TRACE_CLASS_DESC].info.meta; callsiteInfo->classLoader = (Object *) currRun[JIT_TRACE_CLASS_LOADER].info.meta; callsiteInfo->method = calleeMethod; insn->meta.callsiteInfo = callsiteInfo; } /*........................*/ }
然后还会做几项工作:
1)pc reconstruction,不懂
2)exception hanlding,异常处理,这是java的特性吧
3)SSA conversion,不懂
4)NonLoopAnalysis,分析空循环?
最后到了下一步的主要工作MIR2LIR的编译转换。
/* Convert MIR to LIR, etc. */ dvmCompilerMIR2LIR(&cUnit, info);之后自然就是从LIR到汇编的工作了。
/* Convert LIR into machine code. Loop for recoverable retries */ do { dvmCompilerAssembleLIR(&cUnit, info); cUnit.assemblerRetries++; if (cUnit.printMe && cUnit.assemblerStatus != kSuccess) ALOGD("Assembler abort #%d on %d",cUnit.assemblerRetries, cUnit.assemblerStatus); } while (cUnit.assemblerStatus == kRetryAll);这两部分的细节换下次讨论,太长了。