Micropython解释器大致执行流程

最终由mp_execute_bytecode执行bytecode 

    // execute the byte code with the correct globals context
    code_state->old_globals = mp_globals_get();
    mp_globals_set(self->globals);
    mp_vm_return_kind_t vm_return_kind = mp_execute_bytecode(code_state, MP_OBJ_NULL);
    mp_globals_set(code_state->old_globals);

它由fun_bc_call()调用,位于objfun.c



如果一切正常,中途没有ctrl+c , ctrl+d, 则执行:

            // got a line with non-zero length, see if it needs continuing
            while (mp_repl_continue_with_input(vstr_null_terminated_str(&line)))

每次读取一行。



line 作为一个变长的string ,存在如下结构体中:

/** variable string *********************************************/

typedef struct _vstr_t {
    size_t alloc;
    size_t len;
    char *buf;
    bool had_error : 1;
    bool fixed_buf : 1;
} vstr_t;


fun_bc_call 做成了一个type.

const mp_obj_type_t mp_type_fun_bc = {
    { &mp_type_type },
    .name = MP_QSTR_function,
#if MICROPY_CPYTHON_COMPAT
    .print = fun_bc_print,
#endif
    .call = fun_bc_call,
    .unary_op = mp_generic_unary_op,
#if MICROPY_PY_FUNCTION_ATTRS
    .attr = fun_bc_attr,
#endif
};
 
  

mp_parse 初始化分析器,并为其自身分配堆栈。由execute_from_lexer 完成编译

编译分成了两步:

mp_parse_tree_t parse_tree = mp_parse(lex, input_kind);

然后

 mp_obj_t module_fun = mp_compile(&parse_tree, source_name, emit_opt, is_repl);

如果中间产生pyc 文件,则过程是这样:

/* Write a compiled module to a file, placing the time of last
   modification of its source into the header.
   Errors are ignored, if a write error occurs an attempt is made to
   remove the file. */

static void
write_compiled_module(PyCodeObject *co, char *cpathname, struct stat *srcstat, time_t mtime)
{
    FILE *fp;
#ifdef MS_WINDOWS   /* since Windows uses different permissions  */
    mode_t mode = srcstat->st_mode & ~S_IEXEC;
    /* Issue #6074: We ensure user write access, so we can delete it later
     * when the source file changes. (On POSIX, this only requires write
     * access to the directory, on Windows, we need write access to the file
     * as well)
     */
    mode |= _S_IWRITE;
#else
    mode_t mode = srcstat->st_mode & ~S_IXUSR & ~S_IXGRP & ~S_IXOTH;
#endif

    fp = open_exclusive(cpathname, mode);
    if (fp == NULL) {
        if (Py_VerboseFlag)
            PySys_WriteStderr(
                "# can't create %s\n", cpathname);
        return;
    }
    PyMarshal_WriteLongToFile(pyc_magic, fp, Py_MARSHAL_VERSION);
    /* First write a 0 for mtime */
    PyMarshal_WriteLongToFile(0L, fp, Py_MARSHAL_VERSION);
    PyMarshal_WriteObjectToFile((PyObject *)co, fp, Py_MARSHAL_VERSION);
    if (fflush(fp) != 0 || ferror(fp)) {
        if (Py_VerboseFlag)
            PySys_WriteStderr("# can't write %s\n", cpathname);
        /* Don't keep partial file */
        fclose(fp);
        (void) unlink(cpathname);
        return;
    }
    /* Now write the true mtime (as a 32-bit field) */
    fseek(fp, 4L, 0);
    assert(mtime <= 0xFFFFFFFF);
    PyMarshal_WriteLongToFile((long)mtime, fp, Py_MARSHAL_VERSION);
    fflush(fp);
    fclose(fp);
    if (Py_VerboseFlag)
        PySys_WriteStderr("# wrote %s\n", cpathname);
}


如果python 源文件作为输入,那么要先load 进来:

/* Load a source module from a given file and return its module
   object WITH INCREMENTED REFERENCE COUNT.  If there's a matching
   byte-compiled file, use that instead. */

static PyObject *
load_source_module(char *name, char *pathname, FILE *fp)
{
    struct stat st;
    FILE *fpc;
    char *buf;
    char *cpathname;
    PyCodeObject *co = NULL;
    PyObject *m;
    time_t mtime;

    if (fstat(fileno(fp), &st) != 0) {
        PyErr_Format(PyExc_RuntimeError,
                     "unable to get file status from '%s'",
                     pathname);
        return NULL;
    }

#ifdef MS_WINDOWS
    mtime = win32_mtime(fp, pathname);
    if (mtime == (time_t)-1 && PyErr_Occurred())
        return NULL;
#else
    mtime = st.st_mtime;
#endif
    if (sizeof mtime > 4) {
        /* Python's .pyc timestamp handling presumes that the timestamp fits
           in 4 bytes. Since the code only does an equality comparison,
           ordering is not important and we can safely ignore the higher bits
           (collisions are extremely unlikely).
         */
        mtime &= 0xFFFFFFFF;
    }
    buf = PyMem_MALLOC(MAXPATHLEN+1);
    if (buf == NULL) {
        return PyErr_NoMemory();
    }
    cpathname = make_compiled_pathname(pathname, buf,
                                       (size_t)MAXPATHLEN + 1);
    if (cpathname != NULL &&
        (fpc = check_compiled_module(pathname, mtime, cpathname))) {
        co = read_compiled_module(cpathname, fpc);
        fclose(fpc);
        if (co == NULL)
            goto error_exit;
        if (update_compiled_module(co, pathname) < 0)
            goto error_exit;
        if (Py_VerboseFlag)
            PySys_WriteStderr("import %s # precompiled from %s\n",
                name, cpathname);
        pathname = cpathname;
    }
    else {
        co = parse_source_module(pathname, fp);
		
        if (co == NULL)
            goto error_exit;
        if (Py_VerboseFlag)
            PySys_WriteStderr("import %s # from %s\n",
                name, pathname);
        if (cpathname) {
            PyObject *ro = PySys_GetObject("dont_write_bytecode");
            int b = (ro == NULL) ? 0 : PyObject_IsTrue(ro);
            if (b < 0)
                goto error_exit;
            if (!b)
                write_compiled_module(co, cpathname, &st, mtime);
        }
    }
    m = PyImport_ExecCodeModuleEx(name, (PyObject *)co, pathname);
    Py_DECREF(co);

    PyMem_FREE(buf);
    return m;

error_exit:
    Py_XDECREF(co);
    PyMem_FREE(buf);
    return NULL;
}

此文件来自import.c, 函数很小,但是它包括了完整的过程,它读取一个文件,如果是编译好的,那么直接将其作为module object 返回,如果是python 文件,则先调用parse_source_module  得到codeobject.


micro python 对应do_load:

STATIC void do_load(mp_obj_t module_obj, vstr_t *file) {
    #if MICROPY_PERSISTENT_CODE_LOAD || MICROPY_ENABLE_COMPILER
    char *file_str = vstr_null_terminated_str(file);
    #endif

    #if MICROPY_PERSISTENT_CODE_LOAD
    if (file_str[file->len - 3] == 'm') {
        mp_raw_code_t *raw_code = mp_raw_code_load_file(file_str);
        do_execute_raw_code(module_obj, raw_code);
        return;
    }
    #endif

    #if MICROPY_ENABLE_COMPILER
    {
        mp_lexer_t *lex = mp_lexer_new_from_file(file_str);
        do_load_from_lexer(module_obj, lex, file_str);
    }
    #else
    nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ImportError,
        "script compilation not supported"));
    #endif
}


如果读进来就是byte code, 那么直接执行它,如果是py文件,那就先分析它:

 // parse, compile and execute the module in its context

翻译成中文就是分析, 编译 还有执行module中的内容

mp_parse_compile_execute



最后执行:

mp_call_function_0(module_fun);



从文件名得到 lex, 

mp_lexer_t *lex = mp_lexer_new_from_file(file_str);


先得到相应文件的fd, 然后根据filename, fd ,共同去拼装成一个fb


mp_lexer_t *mp_lexer_new_from_file(const char *filename) {
    int fd = open(filename, O_RDONLY, 0644);
    if (fd < 0) {
        return NULL;
    }
    return mp_lexer_new_from_fd(qstr_from_str(filename), fd, true);
}


接下来调用mp_lexer_new_from_fd

去捏出一个mp_lexer_file_buf_t 出来

mp_lexer_t *mp_lexer_new_from_fd(qstr filename, int fd, bool close_fd) {
    mp_lexer_file_buf_t *fb = m_new_obj_maybe(mp_lexer_file_buf_t);
    if (fb == NULL) {
        if (close_fd) {
            close(fd);
        }
        return NULL;
    }
    fb->fd = fd;
    fb->close_fd = close_fd;
    int n = read(fb->fd, fb->buf, sizeof(fb->buf));
    fb->len = n;
    fb->pos = 0;
    return mp_lexer_new(filename, fb, (mp_lexer_stream_next_byte_t)file_buf_next_byte, (mp_lexer_stream_close_t)file_buf_close);
}

也就是说最终生成lex 的是mp_lexer_new() 函数,很明显,这个函数必然是先按lex(mp_lexer_t) 申请一段内存,然后给相应的每个成员赋值。

// this data structure is exposed for efficiency
// public members are: source_name, tok_line, tok_column, tok_kind, vstr
typedef struct _mp_lexer_t {
    qstr source_name;           // name of source
    void *stream_data;          // data for stream
    mp_lexer_stream_next_byte_t stream_next_byte;   // stream callback to get next byte
    mp_lexer_stream_close_t stream_close;           // stream callback to free

    unichar chr0, chr1, chr2;   // current cached characters from source

    mp_uint_t line;             // current source line
    mp_uint_t column;           // current source column

    mp_int_t emit_dent;             // non-zero when there are INDENT/DEDENT tokens to emit
    mp_int_t nested_bracket_level;  // >0 when there are nested brackets over multiple lines

    mp_uint_t alloc_indent_level;
    mp_uint_t num_indent_level;
    uint16_t *indent_level;

    mp_uint_t tok_line;         // token source line
    mp_uint_t tok_column;       // token source column
    mp_token_kind_t tok_kind;   // token kind
    vstr_t vstr;                // token data
} mp_lexer_t;

这个结构体就是要得到每一行token 的kind 还有data.


在进行parse , compile之前,先要做nlr_push(&nlr)


// nlr_push() must be defined as a macro, because "The stack context will be
// invalidated if the function which called setjmp() returns."
#define nlr_push(buf) ((buf)->prev = MP_STATE_VM(nlr_top), MP_STATE_VM(nlr_top) = (buf), SetJump((BASE_LIBRARY_JUMP_BUFFER *)(buf)->jmpbuf))


debug 过程:在parse.c  我们有看到

    // truncate final chunk and link into chain of chunks
    if (parser.cur_chunk != NULL) {
        (void)m_renew(byte, parser.cur_chunk,
            sizeof(mp_parse_chunk_t) + parser.cur_chunk->alloc,
            sizeof(mp_parse_chunk_t) + parser.cur_chunk->union_.used);
        parser.cur_chunk->alloc = parser.cur_chunk->union_.used;
        parser.cur_chunk->union_.next = parser.tree.chunk;
        parser.tree.chunk = parser.cur_chunk;
    }

内存分配出错

先放着!!!!

生成raw code  之后, call

mp_obj_t mp_call_function_n_kw(mp_obj_t fun_in, mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args)


接下来:mp_obj_get_type(obj.c)  得到type


next :mp_obj_t fun_bc_call (objfun.c)

next: mp_stack_check()

 取得 bytecode 的起始地址:

    // get start of bytecode
    const byte *ip = self->bytecode;

 用上正确的名字空间(globals context) 去开始执行byte code.


    // execute the byte code with the correct globals context
    code_state->old_globals = mp_globals_get();

next: mp_vm_return_kind_t mp_execute_bytecode(mp_code_state *code_state, volatile mp_obj_t inject_exc)    vm.c



------------------------------------------------------------------------------------------------------------------------------------------------------------------------

mp_stack_check()

mp_exc_recursion_depth()

mp_obj_exception_make_new()

mp_arg_check_num()



    // Pointers which are constant for particular invocation of mp_execute_bytecode()
    mp_obj_t * /*const*/ fastn = &code_state->state[code_state->n_state - 1];



进入outer exception handling loop

    // outer exception handling loop
    for (;;) {
        nlr_buf_t nlr;
outer_dispatch_loop:

next:

            // loop to execute byte code
            for (;;) {
dispatch_loop:
#if MICROPY_OPT_COMPUTED_GOTO
                DISPATCH();
#else
                TRACE(ip);
                MARK_EXC_IP_GLOBAL();


vm.c 285 行

                #else
                ENTRY(MP_BC_LOAD_NAME): {
                    MARK_EXC_IP_SELECTIVE();
                    DECODE_QSTR;
                    mp_obj_t key = MP_OBJ_NEW_QSTR(qst);
                    mp_uint_t x = *ip;
                    if (x < MP_STATE_CTX(dict_locals)->map.alloc && MP_STATE_CTX(dict_locals)->map.table[x].key == key) {
                        PUSH(MP_STATE_CTX(dict_locals)->map.table[x].value);
                    } else {
                        mp_map_elem_t *elem = mp_map_lookup(&MP_STATE_CTX(dict_locals)->map, MP_OBJ_NEW_QSTR(qst), MP_MAP_LOOKUP);

next: 

pending_exception_check:
                MICROPY_VM_HOOK_LOOP
                if (MP_STATE_VM(mp_pending_exception) != MP_OBJ_NULL) {
                    MARK_EXC_IP_SELECTIVE();
                    mp_obj_t obj = MP_STATE_VM(mp_pending_exception);
                    MP_STATE_VM(mp_pending_exception) = MP_OBJ_NULL;
                    RAISE(obj);
                }



最终:

执行byte code

            // loop to execute byte code
            for (;;) {
dispatch_loop:
#if MICROPY_OPT_COMPUTED_GOTO
                DISPATCH();
#else
                TRACE(ip);
                MARK_EXC_IP_GLOBAL();
                switch (*ip++) {
#endif

                ENTRY(MP_BC_LOAD_CONST_FALSE):
                    PUSH(mp_const_false);
                    DISPATCH();


next:1225

                ENTRY_DEFAULT:
                    if (ip[-1] < MP_BC_LOAD_CONST_SMALL_INT_MULTI + 64) {
                        PUSH(MP_OBJ_NEW_SMALL_INT((mp_int_t)ip[-1] - MP_BC_LOAD_CONST_SMALL_INT_MULTI - 16));
                        DISPATCH();
                    } else if (ip[-1] < MP_BC_LOAD_FAST_MULTI + 16) {
                        obj_shared = fastn[MP_BC_LOAD_FAST_MULTI - (mp_int_t)ip[-1]];
                        goto load_check;
                    } else if (ip[-1] < MP_BC_STORE_FAST_MULTI + 16) {
                        fastn[MP_BC_STORE_FAST_MULTI - (mp_int_t)ip[-1]] = POP();
                        DISPATCH();
                    } else if (ip[-1] < MP_BC_UNARY_OP_MULTI + 7) {
                        SET_TOP(mp_unary_op(ip[-1] - MP_BC_UNARY_OP_MULTI, TOP()));



next: runtime.c  559

// args contains, eg: arg0  arg1  key0  value0  key1  value1
mp_obj_t mp_call_function_n_kw(mp_obj_t fun_in, mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args) {
    // TODO improve this: fun object can specify its type and we parse here the arguments,
    // passing to the function arrays of fixed and keyword arguments

next: runtime.c 72:

        // function requires a fixed number of arguments

        // dispatch function call
        switch (self->n_args_min) {
            case 0:
                return self->fun._0();

            case 1:
                return self->fun._1(args[0]);

            case 2:
                return self->fun._2(args[0], args[1]);



你可能感兴趣的:(Python)