Dalvik :compiler 从bytecode到MIR

开始之前先简单介绍几个和compiler相关的数据对象:

//libdex/DexFile.h

/*
 * Direct-mapped "code_item".
 *
 * The "catches" table is used when throwing an exception,
 * "debugInfo" is used when displaying an exception stack trace or
 * debugging. An offset of zero indicates that there are no entries.
 */
//用来描述bytecode的
struct DexCode;

//compilerIR.h(vm\compiler)
//用来描述MIR的
typedef struct BasicBlock;

//compilerIR.h(vm\compiler)
//用来进行IR编译的(我只是简单看了一下,现在还不能确定是否就是用于描述IR)。
typedef struct CompilationUnit;

vm\compiler\codegen\arm\codegenDriver.cpp

这是一个非常长的函数,没想到google也会写这样的代码,因为长函数在maintain的时候,

是很容易让人迷失的。当然,也有可能是在项目逐渐构建的过程中,导致了函数体越来越长。

/*
 * Accept the work and start compiling.  Returns true if compilation
 * is attempted.
 */
bool dvmCompilerDoWork(CompilerWorkOrder *work)
{
    JitTraceDescription *desc;
    bool isCompile;
    bool success = true;

    if (gDvmJit.codeCacheFull) {
        return false;
    }

    switch (work->kind) {
        case kWorkOrderTrace:
            isCompile = true;
            /* Start compilation with maximally allowed trace length */
            desc = (JitTraceDescription *)work->info;
            success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
                                        work->bailPtr, 0 /* no hints */);
            break;
/×...............×/
        default:
            isCompile = false;
            ALOGE("Jit: unknown work order type");
            assert(0);  // Bail if debug build, discard otherwise
    }
    if (!success)
        work->result.codeAddress = NULL;
    return isCompile;
}

dowork有很多情况,但是现在让我们focus在真正有用的地方:

/*
 * Main entry point to start trace compilation. Basic blocks are constructed
 * first and they will be passed to the codegen routines to convert Dalvik
 * bytecode into machine code.
 */
bool dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts,
                     JitTranslationInfo *info, jmp_buf *bailPtr,
                     int optHints)
这里面一开始就要通过:
 const DexCode *dexCode = dvmGetMethodCode(desc->method);

找到编译的开始位置

/*
 * Get the associated code struct for a method. This returns NULL
 * for non-bytecode methods.
 */
INLINE const DexCode* dvmGetMethodCode(const Method* meth) {
    if (dvmIsBytecodeMethod(meth)) {
        /*
         * The insns field for a bytecode method actually points at
         * &(DexCode.insns), so we can subtract back to get at the
         * DexCode in front.
         */
        return (const DexCode*)
            (((const u1*) meth->insns) - offsetof(DexCode, insns));
    } else {
        return NULL;
    }
}

然后做了很多安全/容错方面的检查,之后再进行的竟然是JIT的分析在先.

为何呢?不是先编译到machine code再进行JIT的chain分析吗?(MARK)

#if defined(WITH_JIT_TUNING)
    /* Locate the entry to store compilation statistics for this method */
    methodStats = dvmCompilerAnalyzeMethodBody(desc->method, false);
#endif

接着就会将bytecode都通过dvmCompilerNewBB()分解初始化成为MIR对象

    /* Allocate the entry block */
    curBB = dvmCompilerNewBB(kEntryBlock, numBlocks++);
    dvmInsertGrowableList(blockList, (intptr_t) curBB);
    curBB->startOffset = curOffset;


    entryCodeBB = dvmCompilerNewBB(kDalvikByteCode, numBlocks++);
    dvmInsertGrowableList(blockList, (intptr_t) entryCodeBB);
    entryCodeBB->startOffset = curOffset;
    curBB->fallThrough = entryCodeBB;
    curBB = entryCodeBB;

BB的类型有如下几种:

typedef enum BBType {
    /* For coding convenience reasons chaining cell types should appear first */
    kChainingCellNormal = 0,
    kChainingCellHot,
    kChainingCellInvokeSingleton,
    kChainingCellInvokePredicted,
    kChainingCellBackwardBranch,
    kChainingCellGap,
    /* Don't insert new fields between Gap and Last */
    kChainingCellLast = kChainingCellGap + 1,
    kEntryBlock,
    kDalvikByteCode,
    kExitBlock,
    kPCReconstruction,
    kExceptionHandling,
    kCatchEntry,
} BBType;

然后循环地通过dvmCompilerNew将BB转化为MIR,BB也会被转化为真正的MIR,每个insn还会带有一个相关的CallsiteInfo。

    /*
     * Analyze the trace descriptor and include up to the maximal number
     * of Dalvik instructions into the IR.
     */
    while (1) {
        MIR *insn;
        int width;
        insn = (MIR *)dvmCompilerNew(sizeof(MIR), true);
        insn->offset = curOffset;
        width = parseInsn(codePtr, &insn->dalvikInsn, cUnit.printMe);


        /* The trace should never incude instruction data */
        assert(width);
        insn->width = width;
        traceSize += width;
        dvmCompilerAppendMIR(curBB, insn);
        cUnit.numInsts++;


        int flags = dexGetFlagsFromOpcode(insn->dalvikInsn.opcode);


        if (flags & kInstrInvoke) {
            const Method *calleeMethod = (const Method *)
                currRun[JIT_TRACE_CUR_METHOD].info.meta;
            assert(numInsts == 1);
            CallsiteInfo *callsiteInfo =
                (CallsiteInfo *)dvmCompilerNew(sizeof(CallsiteInfo), true);
            callsiteInfo->classDescriptor = (const char *)
                currRun[JIT_TRACE_CLASS_DESC].info.meta;
            callsiteInfo->classLoader = (Object *)
                currRun[JIT_TRACE_CLASS_LOADER].info.meta;
            callsiteInfo->method = calleeMethod;
            insn->meta.callsiteInfo = callsiteInfo;
        }


        /*........................*/
    }

然后还会做几项工作:

1)pc reconstruction,不懂

2)exception hanlding,异常处理,这是java的特性吧

3)SSA conversion,不懂

4)NonLoopAnalysis,分析空循环?

最后到了下一步的主要工作MIR2LIR的编译转换。

    /* Convert MIR to LIR, etc. */
    dvmCompilerMIR2LIR(&cUnit, info);
之后自然就是从LIR到汇编的工作了。

    /* Convert LIR into machine code. Loop for recoverable retries */
    do {
        dvmCompilerAssembleLIR(&cUnit, info);
        cUnit.assemblerRetries++;
        if (cUnit.printMe && cUnit.assemblerStatus != kSuccess)
            ALOGD("Assembler abort #%d on %d",cUnit.assemblerRetries,
                  cUnit.assemblerStatus);
    } while (cUnit.assemblerStatus == kRetryAll);
这两部分的细节换下次讨论,太长了。

你可能感兴趣的:(compiler,dalvik,JIT)