语句的执行流程

openGauss进程的主函数main.cpp

/*
 * Any Postgres server process begins execution here.
 */
int main(int argc, char* argv[])
{
    char* mmap_env = NULL;
    syscall_lock_init();

    // 获取环境变量
    mmap_env = gs_getenv_r("GAUSS_MMAP_THRESHOLD");
    if (mmap_env != NULL) {
        check_backend_env(mmap_env);
        mmap_threshold = (size_t)atol(mmap_env);
    }

    // 实例上下文初始化
    knl_instance_init();

    g_instance.increCheckPoint_context = AllocSetContextCreate(
        INSTANCE_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE),
        "IncreCheckPointContext",
        ALLOCSET_DEFAULT_MINSIZE,
        ALLOCSET_DEFAULT_INITSIZE,
        ALLOCSET_DEFAULT_MAXSIZE,
        SHARED_CONTEXT);

    g_instance.account_context = AllocSetContextCreate(g_instance.instance_context,
        "StandbyAccontContext",
        ALLOCSET_DEFAULT_MINSIZE,
        ALLOCSET_DEFAULT_INITSIZE,
        ALLOCSET_DEFAULT_MAXSIZE,
        SHARED_CONTEXT);

    /*
     * Fire up essential subsystems: error and memory management
     *
     * Code after this point is allowed to use elog/ereport, though
     * localization of messages may not work right away, and messages won't go
     * anywhere but stderr until GUC settings get loaded.
     */
    // 启动内存上下文子系统
    MemoryContextInit();

    PmTopMemoryContext = t_thrd.top_mem_cxt;

    // 初始化主线程
    knl_thread_init(MASTER_THREAD);

    t_thrd.fake_session = create_session_context(t_thrd.top_mem_cxt, 0);
    t_thrd.fake_session->status = KNL_SESS_FAKE;

    u_sess = t_thrd.fake_session;

    SelfMemoryContext = THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_DEFAULT);

    MemoryContextSwitchTo(THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_DEFAULT));

    progname = get_progname(argv[0]); // 获取程序名

    /*
     * Platform-specific startup hacks
     * 平台特有的启动设置
     */
    startup_hacks(progname);

    /* if gaussdb's name is gs_encrypt, so run in encrypte_main() */
    // 如果程序名是gs_encrypt，调用encrypte_main将加密串返回
    if (!strcmp(progname, "gs_encrypt")) {
        return encrypte_main(argc, argv);
    }

    init_plog_global_mem();

    /*
     * Remember the physical location of the initially given argv[] array for
     * possible use by ps display.  On some platforms, the argv[] storage must
     * be overwritten in order to set the process title for ps. In such cases
     * save_ps_display_args makes and returns a new copy of the argv[] array.
     *
     * save_ps_display_args may also move the environment strings to make
     * extra room. Therefore this should be done as early as possible during
     * startup, to avoid entanglements with code that might save a getenv()
     * result pointer.
     * 保存argc, argv
     */
    argv = save_ps_display_args(argc, argv);

    /*
     * If supported on the current platform, set up a handler to be called if
     * the backend/postmaster crashes with a fatal signal or exception.
     */
     // 平台相关，设置当后端进程/postmaster崩溃时可调用的处理程序
#if defined(WIN32) && defined(HAVE_MINIDUMP_TYPE) 
    pgwin32_install_crashdump_handler();
#endif

    /*
     * Set up locale information from environment.  Note that LC_CTYPE and
     * LC_COLLATE will be overridden later from pg_control if we are in an
     * already-initialized database.  We set them here so that they will be
     * available to fill pg_control during initdb.  LC_MESSAGES will get set
     * later during GUC option processing, but we set it here to allow startup
     * error messages to be localized.
     */
    // 从环境变量设置区域信息
    set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("gaussdb"));

#ifdef WIN32

    /*
     * Windows uses codepages rather than the environment, so we work around
     * that by querying the environment explicitly first for LC_COLLATE and
     * LC_CTYPE. We have to do this because initdb passes those values in the
     * environment. If there is nothing there we fall back on the codepage.
     */
    {
        char* env_locale = NULL;

        if ((env_locale = gs_getenv_r("LC_COLLATE")) != NULL) {
            check_backend_env(env_locale);
            pg_perm_setlocale(LC_COLLATE, env_locale);
        } else
            pg_perm_setlocale(LC_COLLATE, "");

        if ((env_locale = gs_getenv_r("LC_CTYPE")) != NULL) {
            check_backend_env(env_locale);
            pg_perm_setlocale(LC_CTYPE, env_locale);
        } else
            pg_perm_setlocale(LC_CTYPE, "");
    }
#else
    pg_perm_setlocale(LC_COLLATE, "");
    pg_perm_setlocale(LC_CTYPE, "");
#endif

    /*
     * We keep these set to "C" always, except transiently in pg_locale.c; see
     * that file for explanations.
     */
    pg_perm_setlocale(LC_MONETARY, "C");
    pg_perm_setlocale(LC_NUMERIC, "C");
    pg_perm_setlocale(LC_TIME, "C");

    /*
     * Now that we have absorbed as much as we wish to from the locale
     * environment, remove any LC_ALL setting, so that the environment
     * variables installed by pg_perm_setlocale have force.
     */
    (void)unsetenv("LC_ALL");

    /*
     * Catch standard options before doing much else
     */
    if (argc > 1) {
        // 打印帮助信息后退出
        if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) {
            help(progname);
            exit(0);
        }
        // 打印版本信息后退出
        if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) {
            puts("gaussdb " DEF_GS_VERSION);
            exit(0);
        }
    }

    /*
     * Make sure we are not running as root.
     */
    // 确认不是使用root启动的程序
    check_root(progname);

    /*
     * Dispatch to one of various subprograms depending on first argument.
     */
#ifdef WIN32

    /*
     * Start our win32 signal implementation
     *
     * SubPostmasterMain() will do this for itself, but the remaining modes
     * need it here
     */
    pgwin32_signal_initialize();
#endif

    /* init trace context */
    if (gstrace_init(getpid()) == 0) {
        on_proc_exit(gstrace_destory, 0);
    }

    t_thrd.mem_cxt.gs_signal_mem_cxt = AllocSetContextCreate(
        t_thrd.top_mem_cxt, "gs_signal", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE);
    if (NULL == t_thrd.mem_cxt.gs_signal_mem_cxt) {
        ereport(LOG, (errmsg("could not start a new thread, because of no  enough system resource. ")));
        proc_exit(1);
    }

    /*
     * @BuiltinFunc
     * Create a global BuiltinFunc object shared among threads
     */
    // 线程共享的全局内置函数对象
    if (g_sorted_funcs[0] == NULL) {
        initBuiltinFuncs();
    }

    if (argc > 1 && strcmp(argv[1], "--boot") == 0) {
        // initdb相关，bootstrapping模式
        IsInitdb = true;
        gs_signal_monitor_startup();
        gs_signal_slots_init(1);
        (void)gs_signal_unblock_sigusr2();
        gs_signal_startup_siginfo("AuxiliaryProcessMain");
        BootStrapProcessMain(argc, argv); /* does not return */
    }

    // 打印guc的变量
    if (argc > 1 && strcmp(argv[1], "--describe-config") == 0)
        exit(GucInfoMain());

    if (argc > 1 && strcmp(argv[1], "--single") == 0) {
        // initdb相关，单用户模式
        IsInitdb = true;
        gs_signal_monitor_startup();
        gs_signal_slots_init(1);
        (void)gs_signal_unblock_sigusr2();
        gs_signal_startup_siginfo("PostgresMain");

        exit(PostgresMain(argc, argv, NULL, get_current_username(progname)));
    }

    // 数据库启动
    exit(PostmasterMain(argc, argv));
}

查询语句的执行流程

postgres=# create table a(id int);
CREATE TABLE
postgres=# insert into a values(1);
INSERT
postgres=# select * from a;

PostgresMain.cpp ReadCommand函数读取客户端命令
简单查询调用exec_simple_query函数

static void exec_simple_query(const char* query_string, MessageType messageType, StringInfo msg = NULL)
{
    ...
    // 报告后端线程正在处理查询语句
    pgstat_report_activity(STATE_RUNNING, query_string);
    ...
    // 开启事务
    start_xact_command();
    ...
    // SQL解析
    parsetree_list = pg_parse_query(reparsed_query.empty() ?
                                                query_string : reparsed_query.c_str(), &query_string_locationlist);
    ...
    /*
     * Run through the raw parsetree(s) and process each one.
     */
    // 遍历parsetree_list
    foreach (parsetree_item, parsetree_list) {
        ...
        Node* parsetree = (Node*)lfirst(parsetree_item);
        ...
        // 操作类型，当前为"SELECT"
        commandTag = CreateCommandTag(parsetree);
        ... 
        /* Make sure we are in a transaction command */
        start_xact_command();
        ...
        /*
         * Set up a snapshot if parse analysis/planning will need one.
         */
        // 设置快照
        if (analyze_requires_snapshot(parsetree)) {
            PushActiveSnapshot(GetTransactionSnapshot());
            snapshot_set = true;
        }
        ...
        // 分析解析树转换为查询树并重写查询树
        querytree_list = pg_analyze_and_rewrite(parsetree, query_string, NULL, 0);
        ...
        // 生成计划树
        plantree_list = pg_plan_queries(querytree_list, 0, NULL);
        ...
        // 创建未命令的portal来运行查询
        portal = CreatePortal("", true, true);
        ...
        // 启动portal
        PortalStart(portal, NULL, 0, InvalidSnapshot);
        ...
        // 运行portal，然后删除它及receiver
        (void)PortalRun(portal, FETCH_ALL, isTopLevel, receiver, receiver, completionTag);
        (*receiver->rDestroy)(receiver);
        PortalDrop(portal, false);
        ...
        // 事务提交
        finish_xact_command();
        ...
        // 命令完成
        EndCommand(completionTag, dest);
        ...
    }
}

词法语法解析
相关数据结构

typedef struct SelectStmt {
    NodeTag type;

    /*
     * These fields are used only in "leaf" SelectStmts.
     */
    List *distinctClause;   /* NULL, list of DISTINCT ON exprs, or
                             * lcons(NIL,NIL) for all (SELECT DISTINCT) */
    IntoClause *intoClause; /* target for SELECT INTO */
    List *targetList;       /* the target list (of ResTarget) */
    List *fromClause;       /* the FROM clause */
    Node *whereClause;      /* WHERE qualification */
    List *groupClause;      /* GROUP BY clauses */
    Node *havingClause;     /* HAVING conditional-expression */
    List *windowClause;     /* WINDOW window_name AS (...), ... */
    WithClause *withClause; /* WITH clause */

    /*
     * In a "leaf" node representing a VALUES list, the above fields are all
     * null, and instead this field is set.  Note that the elements of the
     * sublists are just expressions, without ResTarget decoration. Also note
     * that a list element can be DEFAULT (represented as a SetToDefault
     * node), regardless of the context of the VALUES list. It's up to parse
     * analysis to reject that where not valid.
     */
    List *valuesLists; /* untransformed list of expression lists */

    /*
     * These fields are used in both "leaf" SelectStmts and upper-level
     * SelectStmts.
     */
    List *sortClause;    /* sort clause (a list of SortBy's) */
    Node *limitOffset;   /* # of result tuples to skip */
    Node *limitCount;    /* # of result tuples to return */
    List *lockingClause; /* FOR UPDATE (list of LockingClause's) */
    HintState *hintState;

    /*
     * These fields are used only in upper-level SelectStmts.
     */
    SetOperation op;         /* type of set op */
    bool all;                /* ALL specified? */
    struct SelectStmt *larg; /* left child */
    struct SelectStmt *rarg; /* right child */

    /*
     * These fields are used by operator "(+)"
     */
    bool hasPlus;
    /* Eventually add fields for CORRESPONDING spec here */
} SelectStmt;

相关代码在parse.cpp，主要流程如下

// flex,bison进行语法解析
List* raw_parser(const char* str, List** query_string_locationlist)
{
    ...
    // 初始化词法分析器
    yyscanner = scanner_init(str, &yyextra.core_yy_extra, ScanKeywords, NumScanKeywords);
    ... 
    // 初始化语法分析器
    parser_init(&yyextra);

    // SQL解析
    yyresult = base_yyparse(yyscanner);

    /* Clean up (release memory) */
    scanner_finish(yyscanner);

    if (yyresult) { /* error */
        return NIL;
    }
    ...
    // 返回语法树
    return yyextra.parsetree;
}

转换查询树并重写
相关数据结构

typedef struct Query {
    NodeTag type;

    CmdType commandType; /* select|insert|update|delete|merge|utility */

    QuerySource querySource; /* where did I come from? */

    uint64 queryId; /* query identifier (can be set by plugins) */

    bool canSetTag; /* do I set the command result tag? */

    Node* utilityStmt; /* non-null if this is DECLARE CURSOR or a
                        * non-optimizable statement */

    int resultRelation; /* rtable index of target relation for
                         * INSERT/UPDATE/DELETE/MERGE; 0 for SELECT */

    bool hasAggs;         /* has aggregates in tlist or havingQual */
    bool hasWindowFuncs;  /* has window functions in tlist */
    bool hasSubLinks;     /* has subquery SubLink */
    bool hasDistinctOn;   /* distinctClause is from DISTINCT ON */
    bool hasRecursive;    /* WITH RECURSIVE was specified */
    bool hasModifyingCTE; /* has INSERT/UPDATE/DELETE in WITH */
    bool hasForUpdate;    /* FOR UPDATE or FOR SHARE was specified */
    bool hasRowSecurity;  /* rewriter has applied some RLS policy */
    bool hasSynonyms;     /* has synonym mapping in rtable */

    List* cteList; /* WITH list (of CommonTableExpr's) */

    List* rtable;       /* list of range table entries */
    FromExpr* jointree; /* table join tree (FROM and WHERE clauses) */

    List* targetList; /* target list (of TargetEntry) */

    List* starStart; /* Corresponding p_star_start in ParseState */

    List* starEnd; /* Corresponding p_star_end in ParseState */

    List* starOnly; /* Corresponding p_star_only in ParseState */

    List* returningList; /* return-values list (of TargetEntry) */

    List* groupClause; /* a list of SortGroupClause's */

    List* groupingSets; /* a list of GroupingSet's if present */

    Node* havingQual; /* qualifications applied to groups */

    List* windowClause; /* a list of WindowClause's */

    List* distinctClause; /* a list of SortGroupClause's */

    List* sortClause; /* a list of SortGroupClause's */

    Node* limitOffset; /* # of result tuples to skip (int8 expr) */
    Node* limitCount;  /* # of result tuples to return (int8 expr) */

    List* rowMarks; /* a list of RowMarkClause's */

    Node* setOperations; /* set-operation tree if this is top level of
                          * a UNION/INTERSECT/EXCEPT query */

    List *constraintDeps; /* a list of pg_constraint OIDs that the query
                           * depends on to be semantically valid */
    HintState* hintState;
#ifdef PGXC
    /* need this info for PGXC Planner, may be temporary */
    char* sql_statement;                 /* original query */
    bool is_local;                       /* enforce query execution on local node
                                          * this is used by EXECUTE DIRECT especially. */
    bool has_to_save_cmd_id;             /* true if the query is such an INSERT SELECT
                                          * that inserts into a child by selecting
                                          * from its parent OR a WITH query that
                                          * updates a table in main query and inserts
                                          * a row to the same table in WITH query */
    bool vec_output;                     /* true if it's vec output. this flag is used in FQS planning  */
    TdTruncCastStatus tdTruncCastStatus; /* Auto truncation Cast added, only used for stmt in stored procedure or
                                            prepare stmt. */
    List* equalVars;                     /* vars appears in UPDATE/DELETE clause */
#endif
    ParamListInfo boundParamsQ;

    int mergeTarget_relation;
    List* mergeSourceTargetList;
    List* mergeActionList; /* list of actions for MERGE (only) */
    Query* upsertQuery;    /* insert query for INSERT ON DUPLICATE KEY UPDATE (only) */
    UpsertExpr* upsertClause; /* DUPLICATE KEY UPDATE [NOTHING | ...] */

    bool isRowTriggerShippable; /* true if all row triggers are shippable. */
    bool use_star_targets;      /* true if use * for targetlist. */

    bool is_from_full_join_rewrite; /* true if the query is created when doing
                                     * full join rewrite. If true, we should not
                                     * do some expression processing.
                                     * Please refer to subquery_planner.
                                     */
    uint64 uniqueSQLId;             /* used by unique sql id */
    bool can_push;
    bool        unique_check;               /* true if the subquery is generated by general
                                             * sublink pullup, and scalar output is needed */
    Oid* fixed_paramTypes; /* For plpy CTAS query. CTAS is a recursive call.CREATE query is the first rewrited.
                            * thd 2nd rewrited query is INSERT SELECT.whithout this attribute, DB will have
                            * an error that has no idea about $x when INSERT SELECT query is analyzed. */
    int fixed_numParams;
} Query;

相关代码在analyze.cpp，主要流程如下

List* pg_analyze_and_rewrite(Node* parsetree, const char* query_string, Oid* paramTypes, int numParams)
{
    ...
    // 分析语法树转换为查询树
    query = parse_analyze(parsetree, query_string, paramTypes, numParams);
    ...
    /*
     * (2) Rewrite the queries, as necessary
     */
    // 重写查询树
    querytree_list = pg_rewrite_query(query);
    ...
    // 返回查询树
    return querytree_list;
}

Query* parse_analyze(
    Node* parseTree, const char* sourceText, Oid* paramTypes, int numParams, bool isFirstNode, bool isCreateView)
{
     // 
     ParseState* pstate = make_parsestate(NULL);
     ...
     // 转换
     query = transformTopLevelStmt(pstate, parseTree, isFirstNode, isCreateView);
     ...
    pfree_ext(pstate->p_ref_hook_state);
    free_parsestate(pstate);
     ...
     // 返回查询树
     return query;
}

Query* transformTopLevelStmt(ParseState* pstate, Node* parseTree, bool isFirstNode, bool isCreateView)
{
    if (IsA(parseTree, SelectStmt)) {
        // 转换select...into语法为create table as语法
        SelectStmt* stmt = (SelectStmt*)parseTree;

        /* If it's a set-operation tree, drill down to leftmost SelectStmt */
        while (stmt != NULL && stmt->op != SETOP_NONE)
            stmt = stmt->larg;
        AssertEreport(stmt && IsA(stmt, SelectStmt) && stmt->larg == NULL, MOD_OPT, "failure to check parseTree");

        if (stmt->intoClause) {
            CreateTableAsStmt* ctas = makeNode(CreateTableAsStmt);

            ctas->query = parseTree;
            ctas->into = stmt->intoClause;
            ctas->relkind = OBJECT_TABLE;
            ctas->is_select_into = true;

            /*
             * Remove the intoClause from the SelectStmt.  This makes it safe
             * for transformSelectStmt to complain if it finds intoClause set
             * (implying that the INTO appeared in a disallowed place).
             */
            stmt->intoClause = NULL;

            parseTree = (Node*)ctas;
        }
    }
    // 转换查询树
    return transformStmt(pstate, parseTree, isFirstNode, isCreateView);
}

Query* transformStmt(ParseState* pstate, Node* parseTree, bool isFirstNode, bool isCreateView)
{
    ...
    switch (nodeTag(parseTree)) {
        ...
        case T_SelectStmt: {
            SelectStmt* n = (SelectStmt*)parseTree;
            ...
            // 分析select语法树
            result = transformSelectStmt(pstate, n, isFirstNode, isCreateView);
            ...
        } break;
        ...
    }
    ...
    // 返回查询树
    return result;
}

static Query* transformSelectStmt(ParseState* pstate, SelectStmt* stmt, bool isFirstNode, bool isCreateView)
{
    Query* qry = makeNode(Query);
    ...
    qry->commandType = CMD_SELECT; // 命令类型：select
    ...
    // 转换with子句
    if (stmt->withClause) {
        qry->hasRecursive = stmt->withClause->recursive;
        qry->cteList = transformWithClause(pstate, stmt->withClause);
        qry->hasModifyingCTE = pstate->p_hasModifyingCTE;
    }
    ...
    /* process the FROM clause */
    // 转换from子句
    transformFromClause(pstate, stmt->fromClause, isFirstNode, isCreateView);
    
    /* transform targetlist */
    // 将ResTarget的列表转换为TargetEntry的列表
    qry->targetList = transformTargetList(pstate, stmt->targetList);

    /* Transform operator "(+)" to outer join */
    // (+)语法为外连接
    if (stmt->hasPlus && stmt->whereClause != NULL) {
        transformOperatorPlus(pstate, &stmt->whereClause);
    }
    ...
    /* mark column origins */
    // 用源表的OID和列号标记Vars的目标列表列
    markTargetListOrigins(pstate, qry->targetList);

    /* transform WHERE
     * Only "(+)" is valid when  it's in WhereClause of Select, set the flag to be true
     * during transform Whereclause.
     */
    // 转换where子句
    setIgnorePlusFlag(pstate, true);
    qual = transformWhereClause(pstate, stmt->whereClause, "WHERE");
    setIgnorePlusFlag(pstate, false);

    /*
     * Initial processing of HAVING clause is just like WHERE clause.
     */
    // 转换having子句
    qry->havingQual = transformWhereClause(pstate, stmt->havingClause, "HAVING");

    /*
     * Transform sorting/grouping stuff.  Do ORDER BY first because both
     * transformGroupClause and transformDistinctClause need the results. Note
     * that these functions can also change the targetList, so it's passed to
     * them by reference.
     */
    // 转换order by子句
    qry->sortClause = transformSortClause(
        pstate, stmt->sortClause, &qry->targetList, true /* fix unknowns */, false /* allow SQL92 rules */);

    /*
     * Transform A_const to columnref type in group by clause, So that repeated group column
     * will deleted in function transformGroupClause. If not to delete repeated column, for
     * group by rollup can have error result, because we need set null to non- group column.
     *
     * select a, b, b
     *  from t1
     *  group by rollup(1, 2), 3;
     *
     * To this example, column b should not be set to null, but if not to delete repeated column
     * b will be set to null and two b value is not equal.
     */
    // 将group by子句的A_const转换为columnref类型
    if (include_groupingset((Node*)stmt->groupClause)) {
        transformGroupConstToColumn(pstate, (Node*)stmt->groupClause, qry->targetList);
    }

    // 转换group by子句
    qry->groupClause = transformGroupClause(pstate,
        stmt->groupClause,
        &qry->groupingSets,
        &qry->targetList,
        qry->sortClause,
        false /* allow SQL92 rules */);

    if (stmt->distinctClause == NIL) {
        qry->distinctClause = NIL;
        qry->hasDistinctOn = false;
    } else if (linitial(stmt->distinctClause) == NULL) {
        // 转换distinct子句
        /* We had SELECT DISTINCT */
        qry->distinctClause = transformDistinctClause(pstate, &qry->targetList, qry->sortClause, false);
        qry->hasDistinctOn = false;
    } else {
        // 转换distinct on子句
        /* We had SELECT DISTINCT ON */
        qry->distinctClause =
            transformDistinctOnClause(pstate, stmt->distinctClause, &qry->targetList, qry->sortClause);
        qry->hasDistinctOn = true;
    }

    /* transform LIMIT */
    // 转换limit子句
    qry->limitOffset = transformLimitClause(pstate, stmt->limitOffset, "OFFSET");
    qry->limitCount = transformLimitClause(pstate, stmt->limitCount, "LIMIT");

    /* transform window clauses after we have seen all window functions */
    // 窗口函数相关
    qry->windowClause = transformWindowDefinitions(pstate, pstate->p_windowdefs, &qry->targetList);

    /* resolve any still-unresolved output columns as being type text */
    // 将还未解析的输出列解析为类型文本
    if (pstate->p_resolve_unknowns) {
        resolveTargetListUnknowns(pstate, qry->targetList);
    }

    // 创建FromExpr节点
    qry->rtable = pstate->p_rtable;
    qry->jointree = makeFromExpr(pstate->p_joinlist, qual);

    qry->hasSubLinks = pstate->p_hasSubLinks;
    qry->hasWindowFuncs = pstate->p_hasWindowFuncs;
    // 检查各子句中是否存在不应该有的窗口函数
    if (pstate->p_hasWindowFuncs) {
        parseCheckWindowFuncs(pstate, qry);
    }
    qry->hasAggs = pstate->p_hasAggs;

    // 转换FOR UPDATE/SHARE子句
    foreach (l, stmt->lockingClause) {
        transformLockingClause(pstate, qry, (LockingClause*)lfirst(l), false);
    }

    qry->hintState = stmt->hintState;
    ...
    // 标记排序信息
    assign_query_collations(pstate, qry);

    /* this must be done after collations, for reliable comparison of exprs */
Check for aggregates where they shouldn't be and improper grouping.
    // 检查子句中不应该存在的聚集和不适当的分组
    if (pstate->p_hasAggs || qry->groupClause || qry->groupingSets || qry->havingQual) {
        parseCheckAggregates(pstate, qry);
    }
    // 返回查询树
    return qry;
}

List* QueryRewrite(Query* parsetree)
{
    ...
    /*
     * Step 1
     *
     * Apply all non-SELECT rules possibly getting 0 or many queries
     */
    // 应用所有non-SELECT重写
    querylist = RewriteQuery(parsetree, NIL);

    /*
     * Step 2
     *
     * Apply all the RIR rules on each query
     *
     * This is also a handy place to mark each query with the original queryId
     */
    // 应用所有RIR规则
    results = NIL;
    foreach (l, querylist) {
        Query* query = (Query*)lfirst(l);

        query = fireRIRrules(query, NIL, false);

        query->queryId = input_query_id;

        results = lappend(results, query);
    }
    ...
    return results;
}

生成计划树
相关数据结构

typedef struct Plan {
    NodeTag type;

    int plan_node_id;   /* node id */
    int parent_node_id; /* parent node id */
    RemoteQueryExecType exec_type;

    /*
     * estimated execution costs for plan (see costsize.c for more info)
     */
    Cost startup_cost; /* cost expended before fetching any tuples */
    Cost total_cost;   /* total cost (assuming all tuples fetched) */

    /*
     * planner's estimate of result size of this plan step
     */
    double plan_rows; /* number of global rows plan is expected to emit */
    double multiple;
    int plan_width; /* average row width in bytes */
    int dop;        /* degree of parallelism of current plan */

    /*
     * machine learning model estimations
     */
    double pred_rows;
    double pred_startup_time;
    double pred_total_time;
    long pred_max_memory;
    /*
     * MPPDB Recursive-Union Support
     *
     * - @recursive_union_plan_nodeid
     *      Pointing to its belonging RecursiveUnion's plan node id to indate if we are
     *      under RecursiveUnion
     *
     * - @recursive_union_controller
     *      Indicate if current Plan node is controller node in recursive-union steps
     *
     * - @control_plan_nodeid
     *      Normally, set on the top-plan node of a producer thread, to indicate which
     *      control-plan we need syn-up with
     *
     * - @is_sync_planode
     *      Indicate the current producer thread is the sync-up thread in recursive union,
     *      normally set on produer's top plan node
     *
     * Please note the above four variables is meaningless if a plan node is not under
     * recursive-union's recursive part
     */
    /*
     * plan node id of RecursiveUnion node where current plan node belongs to, 0 for
     * not under recursive-union
     */
    int recursive_union_plan_nodeid;

    /* flag to indicate if it is controller plan node */
    bool recursive_union_controller;

    /* plan node id of Controller plan node, 0 for not in control */
    int control_plan_nodeid;

    /* flag indicate if the current plan node is the sync node (for multi-stream case) */
    bool is_sync_plannode;

    /*
     * Common structural data for all Plan types.
     */
    List* targetlist;      /* target list to be computed at this node */
    List* qual;            /* implicitly-ANDed qual conditions */
    struct Plan* lefttree; /* input plan tree(s) */
    struct Plan* righttree;

    bool ispwj;  /* is it special for partitionwisejoin? */
    int paramno; /* the partition'sn that it is scaning */

    List* initPlan;    /* Init Plan nodes (un-correlated expr
                        * subselects) */

    List* distributed_keys; /* distributed on which key */
    ExecNodes* exec_nodes;  /*  List of Datanodes where to execute this plan    */

    /*
     * Information for management of parameter-change-driven rescanning
     *
     * extParam includes the paramIDs of all external PARAM_EXEC params
     * affecting this plan node or its children.  setParam params from the
     * node's initPlans are not included, but their extParams are.
     *
     * allParam includes all the extParam paramIDs, plus the IDs of local
     * params that affect the node (i.e., the setParams of its initplans).
     * These are _all_ the PARAM_EXEC params that affect this node.
     */
    Bitmapset* extParam;
    Bitmapset* allParam;

    // For vectorized engine, plan produce vector output
    //
    bool vec_output;
    /*
     * @hdfs
     * Mark the foreign scan whether has unique results on one of its
     * output columns.
     */
    bool hasUniqueResults;
    /*
     * Mark the plan whether includes delta table or not.
     */
    bool isDeltaTable;

    /* used to replace work_mem, maxmem in [0], and minmem in [1] */
    int operatorMemKB[2];
    /* allowed max mem after spread */
    int operatorMaxMem;

    bool parallel_enabled; /* Is it run in parallel? */
    bool hasHashFilter;    /* true for this plan has a hashfilter */

    List* var_list;        /* Need bloom filter var list. */
    List* filterIndexList; /* Need used bloomfilter array index. */

    /* used to replace work_mem */
    int** ng_operatorMemKBArray; /* for multiple logic cluster */
    int ng_num;
    double innerdistinct; /* join inner rel distinct estimation value */
    double outerdistinct; /* join outer rel distinct estimation value */
} Plan;

typedef struct Path {
    NodeTag type;

    NodeTag pathtype; /* tag identifying scan/join method */

    RelOptInfo* parent;        /* the relation this path can build */
    ParamPathInfo* param_info; /* parameterization info, or NULL if none */

    /* estimated size/costs for path (see costsize.c for more info) */
    double rows; /* estimated number of global result tuples */
    double multiple;
    Cost startup_cost; /* cost expended before fetching any tuples */
    Cost total_cost;   /* total cost (assuming all tuples fetched) */
    Cost stream_cost;  /* cost of actions invoked by stream but can't be parallelled in this path */

    List* pathkeys;        /* sort ordering of path's output */
    List* distribute_keys; /* distribute key, Var list */
    char locator_type;
    Oid rangelistOid;
    int dop; /* degree of parallelism */
    /* pathkeys is a List of PathKey nodes; see above */
    Distribution distribution;
    int hint_value;       /* Mark this path if be hinted, and hint kind. */
    double innerdistinct; /* join inner rel distinct estimation value */
    double outerdistinct; /* join outer rel distinct estimation value */
} Path;

typedef struct PlannerInfo {
    NodeTag type;

    Query* parse; /* the Query being planned */

    PlannerGlobal* glob; /* global info for current planner run */

    Index query_level; /* 1 at the outermost Query */

    struct PlannerInfo* parent_root; /* NULL at outermost Query */

    /*
     * simple_rel_array holds pointers to "base rels" and "other rels" (see
     * comments for RelOptInfo for more info).  It is indexed by rangetable
     * index (so entry 0 is always wasted).  Entries can be NULL when an RTE
     * does not correspond to a base relation, such as a join RTE or an
     * unreferenced view RTE; or if the RelOptInfo hasn't been made yet.
     */
    struct RelOptInfo** simple_rel_array; /* All 1-rel RelOptInfos */
    int simple_rel_array_size;            /* allocated size of array */

    /*
     * List of changed var that mutated during cost-based rewrite optimization, the
     * element in the list is "struct RewriteVarMapping", for example:
     * - inlist2join
     * - pushjoin2union (will implemented)
     * _ ...
     *
     */
    List* var_mappings;
    Relids var_mapping_rels; /* all the relations that related to inlist2join */

    /*
     * simple_rte_array is the same length as simple_rel_array and holds
     * pointers to the associated rangetable entries.  This lets us avoid
     * rt_fetch(), which can be a bit slow once large inheritance sets have
     * been expanded.
     */
    RangeTblEntry** simple_rte_array; /* rangetable as an array */

    /*
     * all_baserels is a Relids set of all base relids (but not "other"
     * relids) in the query; that is, the Relids identifier of the final join
     * we need to form.
     */
    Relids all_baserels;

    /*
     * join_rel_list is a list of all join-relation RelOptInfos we have
     * considered in this planning run.  For small problems we just scan the
     * list to do lookups, but when there are many join relations we build a
     * hash table for faster lookups.  The hash table is present and valid
     * when join_rel_hash is not NULL.  Note that we still maintain the list
     * even when using the hash table for lookups; this simplifies life for
     * GEQO.
     */
    List* join_rel_list;        /* list of join-relation RelOptInfos */
    struct HTAB* join_rel_hash; /* optional hashtable for join relations */

    /*
     * When doing a dynamic-programming-style join search, join_rel_level[k]
     * is a list of all join-relation RelOptInfos of level k, and
     * join_cur_level is the current level.  New join-relation RelOptInfos are
     * automatically added to the join_rel_level[join_cur_level] list.
     * join_rel_level is NULL if not in use.
     */
    List** join_rel_level; /* lists of join-relation RelOptInfos */
    int join_cur_level;    /* index of list being extended */

    List* init_plans; /* init SubPlans for query */

    List* cte_plan_ids; /* per-CTE-item list of subplan IDs */

    List* eq_classes; /* list of active EquivalenceClasses */

    List* canon_pathkeys; /* list of "canonical" PathKeys */

    List* left_join_clauses; /* list of RestrictInfos for
                              * mergejoinable outer join clauses
                              * w/nonnullable var on left */

    List* right_join_clauses; /* list of RestrictInfos for
                               * mergejoinable outer join clauses
                               * w/nonnullable var on right */

    List* full_join_clauses; /* list of RestrictInfos for
                              * mergejoinable full join clauses */

    List* join_info_list; /* list of SpecialJoinInfos */

    List* lateral_info_list;  /* list of LateralJoinInfos */

    List* append_rel_list; /* list of AppendRelInfos */

    List* rowMarks; /* list of PlanRowMarks */

    List* placeholder_list; /* list of PlaceHolderInfos */

    List* query_pathkeys; /* desired pathkeys for query_planner(), and
                           * actual pathkeys afterwards */

    List* group_pathkeys;    /* groupClause pathkeys, if any */
    List* window_pathkeys;   /* pathkeys of bottom window, if any */
    List* distinct_pathkeys; /* distinctClause pathkeys, if any */
    List* sort_pathkeys;     /* sortClause pathkeys, if any */

    List* minmax_aggs; /* List of MinMaxAggInfos */

    List* initial_rels; /* RelOptInfos we are now trying to join */

    MemoryContext planner_cxt; /* context holding PlannerInfo */

    double total_table_pages; /* # of pages in all tables of query */

    double tuple_fraction; /* tuple_fraction passed to query_planner */
    double limit_tuples;   /* limit_tuples passed to query_planner */

    bool hasInheritedTarget;     /* true if parse->resultRelation is an
                                  * inheritance child rel */
    bool hasJoinRTEs;            /* true if any RTEs are RTE_JOIN kind */
    bool hasLateralRTEs;         /* true if any RTEs are marked LATERAL */
    bool hasHavingQual;          /* true if havingQual was non-null */
    bool hasPseudoConstantQuals; /* true if any RestrictInfo has
                                  * pseudoconstant = true */
    bool hasRecursion;           /* true if planning a recursive WITH item */

    /* Note: qualSecurityLevel is zero if there are no securityQuals */
    Index qualSecurityLevel; /* minimum security_level for quals */

#ifdef PGXC
    /* This field is used only when RemoteScan nodes are involved */
    int rs_alias_index; /* used to build the alias reference */

    /*
     * In Postgres-XC Coordinators are supposed to skip the handling of
     * row marks of type ROW_MARK_EXCLUSIVE & ROW_MARK_SHARE.
     * In order to do that we simply remove such type
     * of row marks from the list rowMarks. Instead they are saved
     * in xc_rowMarks list that is then handeled to add
     * FOR UPDATE/SHARE in the remote query
     */
    List* xc_rowMarks; /* list of PlanRowMarks of type ROW_MARK_EXCLUSIVE & ROW_MARK_SHARE */
#endif

    /* These fields are used only when hasRecursion is true: */
    int wt_param_id;                 /* PARAM_EXEC ID for the work table */
    struct Plan* non_recursive_plan; /* plan for non-recursive term */

    /* These fields are workspace for createplan.c */
    Relids curOuterRels;  /* outer rels above current node */
    List* curOuterParams; /* not-yet-assigned NestLoopParams */

    Index curIteratorParamIndex;
    bool isPartIteratorPlanning;
    int curItrs;
    List* subqueryRestrictInfo; /* Subquery RestrictInfo, which only be used in wondows agg. */

    /* optional private data for join_search_hook, e.g., GEQO */
    void* join_search_private;

    /* Added post-release, will be in a saner place in 9.3: */
    List* plan_params; /* list of PlannerParamItems, see below */

    /* For count_distinct, save null info for group by clause */
    List* join_null_info;
    /* for GroupingFunc fixup in setrefs */
    AttrNumber* grouping_map;

    /* If current query level is correlated with upper level */
    bool is_correlated;

    /* data redistribution for DFS table.
     * dataDestRelIndex is index into the range table. This variable
     * will take effect on data redistribution state.
     * The effective value must be greater than 0.
     */
    Index dataDestRelIndex;

    /* interesting keys of current query level */
    ItstDisKey dis_keys;

    /*
     * indicate if the subquery planning root (PlannerInfo) is under or rooted from
     * recursive-cte planning.
     */
    bool is_under_recursive_cte;

    /*
     * indicate if the subquery planning root (PlannerInfo) is under recursive-cte's
     * recursive branch
     */
    bool is_under_recursive_tree;
    bool has_recursive_correlated_rte; /* true if any RTE correlated with recursive cte */

    int subquery_type;
    Bitmapset *param_upper;
    
    bool hasRownumQual;
} PlannerInfo;

typedef struct PlannedStmt {
    NodeTag type;

    CmdType commandType; /* select|insert|update|delete */

    uint64 queryId; /* query identifier,  uniquely indicate this plan in Runtime (copied from Query) */

    bool hasReturning; /* is it insert|update|delete RETURNING? */

    bool hasModifyingCTE; /* has insert|update|delete in WITH? */

    bool canSetTag; /* do I set the command result tag? */

    bool transientPlan; /* redo plan when TransactionXmin changes? */

    bool dependsOnRole; /* is plan specific to current role? */

    Plan* planTree; /* tree of Plan nodes */

    List* rtable; /* list of RangeTblEntry nodes */

    /* rtable indexes of target relations for INSERT/UPDATE/DELETE */
    List* resultRelations; /* integer list of RT indexes, or NIL */

    Node* utilityStmt; /* non-null if this is DECLARE CURSOR */

    List* subplans; /* Plan trees for SubPlan expressions */

    Bitmapset* rewindPlanIDs; /* indices of subplans that require REWIND */

    List* rowMarks; /* a list of PlanRowMark's */

    /*
     * Notice: be careful to use relationOids as it may contain non-table OID
     * in some scenarios, e.g. assignment of relationOids in fix_expr_common.
     */
    List* relationOids; /* contain OIDs of relations the plan depends on */

    List* invalItems; /* other dependencies, as PlanInvalItems */

    int nParamExec; /* number of PARAM_EXEC Params used */

    int num_streams; /* number of stream exist in plan tree */

    int max_push_sql_num; /* number of split sql want push DN execute */

    int gather_count; /* gather_count in query */

    int num_nodes; /* number of data nodes */

    NodeDefinition* nodesDefinition; /* all data nodes' defination */

    int instrument_option; /* used for collect instrument data */

    int num_plannodes; /* how many plan node in this planstmt */

    int query_mem[2]; /* how many memory the query can use ,  memory in kb  */

    int assigned_query_mem[2]; /* how many memory the query is assigned   */

    bool is_dynmaic_smp;

    int dynsmp_max_cpu; /* max avaliable cpu for this dn */

    int dynsmp_avail_cpu; /* max avaliable cpu for this dn */

    int dynsmp_cpu_util;

    int dynsmp_active_statement;

    double dynsmp_query_estimate_cpu_usge;

    int dynsmp_plan_optimal_dop; /* the final optimized dop for the plan */

    int dynsmp_plan_original_dop;

    int dynsmp_dop_mem_limit; /* memory will put a limit on dop */

    int dynsmp_min_non_spill_dop; /* optimal dop cannot greater than this */

    int num_bucketmaps; /* Num of special-bucketmap stored in plannedstmt */

    uint2* bucketMap[MAX_SPECIAL_BUCKETMAP_NUM]; /* the map information need to be get */

    char* query_string; /* convey the query string to backend/stream thread of DataNode for debug purpose */

    List* subplan_ids; /* in which plan id subplan should be inited */

    List* initPlan; /* initplan in top plan node */
    /* data redistribution for DFS table.
     * dataDestRelIndex is index into the range table. This variable
     * will take effect on data redistribution state.
     */
    Index dataDestRelIndex;

    int MaxBloomFilterNum;

    int query_dop; /* Dop of current query. */

    double plannertime; /* planner execute time */

    /* set true in do_query_for_planrouter() for PlannedStmt sent to
     * the compute pool
     */
    bool in_compute_pool;

    /* true if there is/are ForeignScan node(s) of OBS foreign table
     * in plantree.
     */
    bool has_obsrel;

    List* plan_hint_warning; /* hint warning during plan generation, only used in CN */

    List* noanalyze_rellist; /* relations and attributes that have no statistics, only used in CN */

    int ng_num;                     /* nodegroup number */
    NodeGroupQueryMem* ng_queryMem; /* each nodegroup's query mem */
    bool ng_use_planA;              /* true means I am a planA, default false */

    bool isRowTriggerShippable; /* true if all row triggers are shippable. */
    bool is_stream_plan;
    bool multi_node_hint;

    uint64 uniqueSQLId;
} PlannedStmt;

相关代码在planner.cpp, planmain.cpp，主要流程如下

void query_planner(PlannerInfo* root, List* tlist, double tuple_fraction, double limit_tuples, Path** cheapest_path,
    Path** sorted_path, double* num_groups, List* rollup_groupclauses, List* rollup_lists)
{
    ...
    /*
     * If the query has an empty join tree, then it's something easy like
     * "SELECT 2+2;" or "INSERT ... VALUES()".  Fall through quickly.
     */
    // 空连接树语句处理
    if (parse->jointree->fromlist == NIL) {
        /* We need a trivial path result */
        *cheapest_path = (Path*)create_result_path(root, NULL, (List*)parse->jointree->quals);
        *sorted_path = NULL;

        /*
         * We still are required to canonicalize any pathkeys, in case it's
         * something like "SELECT 2+2 ORDER BY 1".
         */
        root->canon_pathkeys = NIL;
        canonicalize_all_pathkeys(root);
        return;
    }

    ...
    // 准备用于快速访问基本关系的数组
    setup_simple_rel_arrays(root);

    // 为所有基本关系构造一个新的RelOptInfo
    add_base_rels_to_query(root, (Node*)parse->jointree);
    check_scan_hint_validity(root);

    // 给targetlists增加条目，生成PlaceHolderInfo条目，为可证明的等价表达式构建了等价类，最后创建目标连接
    build_base_rel_tlists(root, tlist);

    find_placeholders_in_jointree(root);

    find_lateral_references(root);

    joinlist = deconstruct_jointree(root);

    ...
    // 由等价类重新考虑任何延迟的外连接条件
    reconsider_outer_join_clauses(root);

    // 对等价类，生成额外的约束子句。
    generate_base_implied_equalities(root);

    generate_base_implied_qualities(root);

    // 路径键规范化
    canonicalize_all_pathkeys(root);

    // 检查placeholder表达式
    fix_placeholder_input_needed_levels(root);

    // 移除不需要的外连接
    joinlist = remove_useless_joins(root, joinlist);

    // 分配placeholders到基础关系
    add_placeholders_to_base_rels(root);

    // 计算total_table_pages 
    total_pages = 0;
    for (rti = 1; rti < (unsigned int)root->simple_rel_array_size; rti++) {
        RelOptInfo* brel = root->simple_rel_array[rti];

        if (brel == NULL)
            continue;

        AssertEreport(brel->relid == rti,
            MOD_OPT,
            "invalid relation oid when generating a path for a basic query."); /* sanity check on array */

        if (brel->reloptkind == RELOPT_BASEREL || brel->reloptkind == RELOPT_OTHER_MEMBER_REL)
            total_pages += (double)brel->pages;
    }
    root->total_table_pages = total_pages;

    // 查找执行查询的所有可能的访问路径，返回表示查询中所有基本关系的连接的单个关系
    final_rel = make_one_rel(root, joinlist);
    ...
    if (parse->groupClause) { // 估算分组结果组的数量 
        ...
    } else if (parse->hasAggs || root->hasHavingQual || parse->groupingSets) {
        // 未分组的聚合读取所有元组
        ...
    } else if (parse->distinctClause) {
        // 未分组未聚合估算结果行
        ...
    } else {
        // 简单的非分组非聚合查询：计算tuple fraction
        ...
    }

    // 选择最廉价的路径
    cheapestpath = get_cheapest_path(root, final_rel, num_groups, has_groupby);

    ...
    *cheapest_path = cheapestpath;
    *sorted_path = sortedpath;
}

执行器
相关数据结构

typedef struct QueryDesc {
    /* These fields are provided by CreateQueryDesc */
    CmdType operation;            /* CMD_SELECT, CMD_UPDATE, etc. */
    PlannedStmt* plannedstmt;     /* planner's output, or null if utility */
    Node* utilitystmt;            /* utility statement, or null */
    const char* sourceText;       /* source text of the query */
    Snapshot snapshot;            /* snapshot to use for query */
    Snapshot crosscheck_snapshot; /* crosscheck for RI update/delete */
    DestReceiver* dest;           /* the destination for tuple output */
    ParamListInfo params;         /* param values being passed in */
    int instrument_options;       /* OR of InstrumentOption flags */

    /* These fields are set by ExecutorStart */
    TupleDesc tupDesc;    /* descriptor for result tuples */
    EState* estate;       /* executor's query-wide state */
    PlanState* planstate; /* tree of per-plan-node state */

    /* This is always set NULL by the core system, but plugins can change it */
    struct Instrumentation* totaltime; /* total time spent in ExecutorRun */
    bool executed;                     /* if the query already executed */
#ifdef ENABLE_MOT
    JitExec::JitContext* mot_jit_context;   /* MOT JIT context required for executing LLVM jitted code */
#endif
} QueryDesc;

语句的执行流程

语句的执行流程

查询语句的执行流程

你可能感兴趣的:(语句的执行流程)