postgres源码解析37 表创建执行全流程梳理--1

简介

在对postgres源代码有一定接触了解后,全局性地剖析SQL命令的执行全流程,本文以如下命令为例,[CREATE TABLE shy_test(id int primary key, name carchar(20))]一步一步结合源码深入分析,窥探其实现流程与设计思想。

关键数据结构

1 CreateStmt
该结构用于保存对Create Table Statement语句查询解析生成的相关信息,如表信息、column列信息列表,访问方法(heap,btree)等

typedef struct CreateStmt
{
	NodeTag		type;
	RangeVar   *relation;		/* relation to create */	
	List	   *tableElts;		/* column definitions (list of ColumnDef) */
	List	   *inhRelations;	/* relations to inherit from (list of
								 * RangeVar) */
	PartitionBoundSpec *partbound;	/* FOR VALUES clause */ 
	PartitionSpec *partspec;	/* PARTITION BY clause */  
	TypeName   *ofTypename;		/* OF typename  类型名 */ 
	List	   *constraints;	/* constraints (list of Constraint nodes)  约束*/
	List	   *options;		/* options from WITH clause    with 语句参数 */
	OnCommitAction oncommit;	/* what do we do at COMMIT? */
	char	   *tablespacename; /* table space to use, or NULL 表空间  */
	char	   *accessMethod;	/* table access method 访问方法  */
	bool		if_not_exists;	/* just do nothing if it already exists? 表是否存在*/
} CreateStmt;

(gdb) p (CreateStmt)$rawstmt->stmt
$6 = {type = T_CreateStmt, relation = 0x55db4d59e140, tableElts = 0x55db4d59e478, inhRelations = 0x0, partbound = 0x0, partspec = 0x0,
ofTypename = 0x0, constraints = 0x0, options = 0x0, oncommit = ONCOMMIT_NOOP, tablespacename = 0x0, accessMethod = 0x0, if_not_exists = false}

2 RangeVar
该结构保存SQL语句中from 子句信息,如catalogname/ relname、是否采用别名(alias)和继承关系。

typedef struct RangeVar
{
	NodeTag		type;
	char	   *catalogname;	/* the catalog (database) name, or NULL */
	char	   *schemaname;		/* the schema name, or NULL */
	char	   *relname;		/* the relation/sequence name */
	bool		inh;			/* expand rel by inheritance? recursively act
								 * on children? */
	char		relpersistence; /* see RELPERSISTENCE_* in pg_class.h */
	Alias	   *alias;			/* table alias & optional column aliases */
	int			location;		/* token location, or -1 if unknown */
} RangeVar;

(gdb) p *$createstmt->relation
$8 = {type = T_RangeVar, catalogname = 0x0, schemaname = 0x0, relname = 0x55db4d59e120 “shy_test”, inh = true, relpersistence = 112 ‘p’,
alias = 0x0, location = 13}

3 ColumnDef
该结构体保存创建表中的列定义信息,如列名、是否为空、压缩方法、是否定义默认值

/*
 * ColumnDef - column definition (used in various creates)
 *
 * If the column has a default value, we may have the value expression
 * in either "raw" form (an untransformed parse tree) or "cooked" form
 * (a post-parse-analysis, executable expression tree), depending on
 * how this ColumnDef node was created (by parsing, or by inheritance
 * from an existing relation).  We should never have both in the same node!
 *
 * Similarly, we may have a COLLATE specification in either raw form
 * (represented as a CollateClause with arg==NULL) or cooked form
 * (the collation's OID).
 *
 * The constraints list may contain a CONSTR_DEFAULT item in a raw
 * parsetree produced by gram.y, but transformCreateStmt will remove
 * the item and set raw_default instead.  CONSTR_DEFAULT items
 * should not appear in any subsequent processing.
 */
typedef struct ColumnDef
{
	NodeTag		type;
	char	   *colname;		/* name of column */
	TypeName   *typeName;		/* type of column */
	char	   *compression;	/* compression method for column */
	int			inhcount;		/* number of times column is inherited */
	bool		is_local;		/* column has local (non-inherited) def'n */
	bool		is_not_null;	/* NOT NULL constraint specified? */
	bool		is_from_type;	/* column definition came from table type */
	char		storage;		/* attstorage setting, or 0 for default */
	Node	   *raw_default;	/* default value (untransformed parse tree) */
	Node	   *cooked_default; /* default value (transformed expr tree) */
	char		identity;		/* attidentity setting */
	RangeVar   *identitySequence;	/* to store identity sequence name for
									 * ALTER TABLE ... ADD COLUMN */
	char		generated;		/* attgenerated setting */
	CollateClause *collClause;	/* untransformed COLLATE spec, if any */
	Oid			collOid;		/* collation OID (InvalidOid if not set) */
	List	   *constraints;	/* other constraints on column */
	List	   *fdwoptions;		/* per-column FDW options */
	int			location;		/* parse location, or -1 if none/unknown */
} ColumnDef;

(gdb) p *$columdef
$31 = {type = T_ColumnDef, colname = 0x55db4d59e190 “id”, typeName = 0x55db4d59e238, compression = 0x0, inhcount = 0, is_local = true, is_not_null = false, is_from_type = false, storage = 0 ‘\000’, raw_default = 0x0, cooked_default = 0x0, identity = 0 ‘\000’, identitySequence = 0x0, generated = 0 ‘\000’, collClause = 0x0, collOid = 0, constraints = 0x55db4d59e398, fdwoptions = 0x0, location = 23}

(gdb) p *$columdef1
$32 = {type = T_ColumnDef, colname = 0x55db4d59e4c8 “name”, typeName = 0x55db4d59e570, compression = 0x0, inhcount = 0, is_local = true, is_not_null = false, is_from_type = false, storage = 0 ‘\000’, raw_default = 0x0, cooked_default = 0x0, identity = 0 ‘\000’, identitySequence = 0x0, generated = 0 ‘\000’, collClause = 0x0, collOid = 0, constraints = 0x0, fdwoptions = 0x0, location = 43}

4 Constraint
该结构体用于保存约束信息,如主键、唯一索引、非空、外键和排他约束等信息。

typedef struct Constraint
{
	NodeTag		type;
	ConstrType	contype;		/* see above  约束类型 */

	/* Fields used for most/all constraint types: */
	char	   *conname;		/* Constraint name, or NULL if unnamed 约束名  */
	bool		deferrable;		/* DEFERRABLE?  可延迟 */
	bool		initdeferred;	/* INITIALLY DEFERRED? */
	int			location;		/* token location, or -1 if unknown */

	/* Fields used for constraints with expressions (CHECK and DEFAULT): */
	bool		is_no_inherit;	/* is constraint non-inheritable? 不可继承的约束*/
	Node	   *raw_expr;		/* expr, as untransformed parse tree 表达式,作为未转化解析树*/
	char	   *cooked_expr;	/* expr, as nodeToString representation */
	char		generated_when; /* ALWAYS or BY DEFAULT */

	/* Fields used for unique constraints (UNIQUE and PRIMARY KEY): */
	List	   *keys;			/* String nodes naming referenced key	 key列
								 * column(s) */
	List	   *including;		/* String nodes naming referenced nonkey  nonkey列
								 * column(s) */

	/* Fields used for EXCLUSION constraints: */
	List	   *exclusions;		/* list of (IndexElem, operator name) pairs */ 

	/* Fields used for index constraints (UNIQUE, PRIMARY KEY, EXCLUSION): */
	List	   *options;		/* options from WITH clause */   
	char	   *indexname;		/* existing index to use; otherwise NULL  索引名 */
	char	   *indexspace;		/* index tablespace; NULL for default  索引对应的表空间  */
	bool		reset_default_tblspc;	/* reset default_tablespace prior to
										 * creating the index */
	/* These could be, but currently are not, used for UNIQUE/PKEY: */
	char	   *access_method;	/* index access method; NULL for default  访问方法 */
	Node	   *where_clause;	/* partial index predicate      where子句信息	*/

	/* Fields used for FOREIGN KEY constraints: */
	RangeVar   *pktable;		/* Primary key table  主键信息  */
	List	   *fk_attrs;		/* Attributes of foreign key    外键属性列表*/
	List	   *pk_attrs;		/* Corresponding attrs in PK table  对应的主键属性*/
	char		fk_matchtype;	/* FULL, PARTIAL, SIMPLE */
	char		fk_upd_action;	/* ON UPDATE action */
	char		fk_del_action;	/* ON DELETE action */
	List	   *old_conpfeqop;	/* pg_constraint.conpfeqop of my former self */
	Oid			old_pktable_oid;	/* pg_constraint.confrelid of my former
									 * self */

	/* Fields used for constraints that allow a NOT VALID specification */
	bool		skip_validation;	/* skip validation of existing rows? */
	bool		initially_valid;	/* mark the new constraint as valid? */
} Constraint;
-------------------------------------------------------------------------------------
typedef enum ConstrType			/* types of constraints */
{
	CONSTR_NULL,				/* not standard SQL, but a lot of people
								 * expect it */
	CONSTR_NOTNULL,
	CONSTR_DEFAULT,
	CONSTR_IDENTITY,
	CONSTR_GENERATED,
	CONSTR_CHECK,
	CONSTR_PRIMARY,
	CONSTR_UNIQUE,
	CONSTR_EXCLUSION,
	CONSTR_FOREIGN,
	CONSTR_ATTR_DEFERRABLE,		/* attributes for previous constraint node */
	CONSTR_ATTR_NOT_DEFERRABLE,
	CONSTR_ATTR_DEFERRED,
	CONSTR_ATTR_IMMEDIATE
} ConstrType;

(gdb) p (Constraint)$columdef->constraints->elements->ptr_value
$26 = {type = T_Constraint, contype = CONSTR_PRIMARY, conname = 0x0, deferrable = false, initdeferred = false, location = 30,
is_no_inherit = false, raw_expr = 0x0, cooked_expr = 0x0, generated_when = 0 ‘\000’, keys = 0x0, including = 0x0, exclusions = 0x0,
options = 0x0, indexname = 0x0, indexspace = 0x0, reset_default_tblspc = false, access_method = 0x0, where_clause = 0x0, pktable = 0x0,
fk_attrs = 0x0, pk_attrs = 0x0, fk_matchtype = 0 ‘\000’, fk_upd_action = 0 ‘\000’, fk_del_action = 0 ‘\000’, old_conpfeqop = 0x0,
old_pktable_oid = 0, skip_validation = false, initially_valid = false}

5 Query
SQL语句完成词法、语法解析生成解析树,后进行查询分析与重写生成查询树,其元素为Query结构体

/*
 * Query -
 *	  Parse analysis turns all statements into a Query tree
 *	  for further processing by the rewriter and planner.
 * 对解析树进行分析生成查询树,继而供后续重写器和计划器处理
 *	  Utility statements (i.e. non-optimizable statements) have the
 *	  utilityStmt field set, and the rest of the Query is mostly dummy.
 *
 *	  Planning converts a Query tree into a Plan tree headed by a PlannedStmt
 *	  node --- the Query structure is not used by the executor.
 * 
 *  计划器将查询树转变成计划树,其head为 PlannedStmt节点
 */
typedef struct Query
{
	NodeTag		type;

	CmdType		commandType;	/* select|insert|update|delete|utility */

	QuerySource querySource;	/* where did I come from? */		

	uint64		queryId;		/* query identifier (can be set by plugins)  query 标识符*/ 

	bool		canSetTag;		/* do I set the command result tag?  */ 

	Node	   *utilityStmt;	/* non-null if commandType == CMD_UTILITY */  

	int			resultRelation; /* rtable index of target relation for 
								 * INSERT/UPDATE/DELETE; 0 for SELECT */ 

	bool		hasAggs;		/* has aggregates in tlist or havingQual  agg */ 
	bool		hasWindowFuncs; /* has window functions in tlist  是否有窗口函数 */
	bool		hasTargetSRFs;	/* has set-returning functions in tlist   是否设有returning functions */
	bool		hasSubLinks;	/* has subquery SubLink 子查询链  */
	bool		hasDistinctOn;	/* distinctClause is from DISTINCT ON  是否有distinct子句 */
	bool		hasRecursive;	/* WITH RECURSIVE was specified */
	bool		hasModifyingCTE;	/* has INSERT/UPDATE/DELETE in WITH */
	bool		hasForUpdate;	/* FOR [KEY] UPDATE/SHARE was specified 是否指定for update */
	bool		hasRowSecurity; /* rewriter has applied some RLS policy */

	bool		isReturn;		/* is a RETURN statement  return 查询*/

	List	   *cteList;		/* WITH list (of CommonTableExpr's) */

	List	   *rtable;			/* list of range table entries 范围表项 */
	FromExpr   *jointree;		/* table join tree (FROM and WHERE clauses) join tree */

	List	   *targetList;		/* target list (of TargetEntry) 投影列表*/

	OverridingKind override;	/* OVERRIDING clause */

	OnConflictExpr *onConflict; /* ON CONFLICT DO [NOTHING | UPDATE] 冲突*/

	List	   *returningList;	/* return-values list (of TargetEntry) 返回链表*/

	List	   *groupClause;	/* a list of SortGroupClause's */
	bool		groupDistinct;	/* is the group by clause distinct? */

	List	   *groupingSets;	/* a list of GroupingSet's if present */

	Node	   *havingQual;		/* qualifications applied to groups */

	List	   *windowClause;	/* a list of WindowClause's */

	List	   *distinctClause; /* a list of SortGroupClause's */

	List	   *sortClause;		/* a list of SortGroupClause's */

	Node	   *limitOffset;	/* # of result tuples to skip (int8 expr)  偏移*/
	Node	   *limitCount;		/* # of result tuples to return (int8 expr) 计数*/
	LimitOption limitOption;	/* limit type */

	List	   *rowMarks;		/* a list of RowMarkClause's */

	Node	   *setOperations;	/* set-operation tree if this is top level of
								 * a UNION/INTERSECT/EXCEPT query */

	List	   *constraintDeps; /* a list of pg_constraint OIDs that the query
								 * depends on to be semantically valid */

	List	   *withCheckOptions;	/* a list of WithCheckOption's (added
									 * during rewrite) */

	/*
	 * The following two fields identify the portion of the source text string
	 * containing this query.  They are typically only populated in top-level
	 * Queries, not in sub-queries.  When not set, they might both be zero, or
	 * both be -1 meaning "unknown".
	 */
	int			stmt_location;	/* start location, or -1 if unknown */
	int			stmt_len;		/* length in bytes; 0 means "rest of string" */
} Query;

解析分析生成Query结构体

(gdb) p *result
$36 = {type = T_Query, commandType = CMD_UTILITY, querySource = QSRC_ORIGINAL, queryId = 0, canSetTag = false, utilityStmt = 0x55db4d59e6d0, resultRelation = 0, hasAggs = false, hasWindowFuncs = false, hasTargetSRFs = false, hasSubLinks = false, hasDistinctOn = false, hasRecursive = false, hasModifyingCTE = false, hasForUpdate = false, hasRowSecurity = false, isReturn = false, cteList = 0x0, rtable = 0x0, jointree = 0x0, targetList = 0x0, override = OVERRIDING_NOT_SET, onConflict = 0x0, returningList = 0x0, groupClause = 0x0, groupDistinct = false, groupingSets = 0x0, havingQual = 0x0, windowClause = 0x0, distinctClause = 0x0, sortClause = 0x0, limitOffset = 0x0, limitCount = 0x0, limitOption = LIMIT_OPTION_COUNT, rowMarks = 0x0, setOperations = 0x0, constraintDeps = 0x0, withCheckOptions = 0x0,
stmt_location = 0, stmt_len = 0}

对Query进行重写后生成查询树链表信息

(gdb) p (Query)querytree_list->elements->ptr_value
$43 = {type = T_Query, commandType = CMD_UTILITY, querySource = QSRC_ORIGINAL, queryId = 0, canSetTag = true, utilityStmt = 0x55db4d59e6d0,
resultRelation = 0, hasAggs = false, hasWindowFuncs = false, hasTargetSRFs = false, hasSubLinks = false, hasDistinctOn = false,
hasRecursive = false, hasModifyingCTE = false, hasForUpdate = false, hasRowSecurity = false, isReturn = false, cteList = 0x0, rtable = 0x0,
jointree = 0x0, targetList = 0x0, override = OVERRIDING_NOT_SET, onConflict = 0x0, returningList = 0x0, groupClause = 0x0,
groupDistinct = false, groupingSets = 0x0, havingQual = 0x0, windowClause = 0x0, distinctClause = 0x0, sortClause = 0x0, limitOffset = 0x0,
limitCount = 0x0, limitOption = LIMIT_OPTION_COUNT, rowMarks = 0x0, setOperations = 0x0, constraintDeps = 0x0, withCheckOptions = 0x0,
stmt_location = 0, stmt_len = 60}

6 PlannedStmt
计划器会对上述的查询树进一步处理生成计划树

/* ----------------
 *		PlannedStmt node
 *
 * The output of the planner is a Plan tree headed by a PlannedStmt node.
 * PlannedStmt holds the "one time" information needed by the executor.
 * 
 * 计划器对此处理生成一个头部为 PlannedStmt node 计划树  
 * DDL语句其 commandType == CMD_UTILITY
 * For simplicity in APIs, we also wrap utility statements in PlannedStmt
 * nodes; in such cases, commandType == CMD_UTILITY, the statement itself
 * is in the utilityStmt field, and the rest of the struct is mostly dummy.
 * (We do use canSetTag, stmt_location, stmt_len, and possibly queryId.)
 * ----------------
 */
typedef struct PlannedStmt
{
	NodeTag		type;

	CmdType		commandType;	/* select|insert|update|delete|utility */ 

	uint64		queryId;		/* query identifier (copied from Query) */

	bool		hasReturning;	/* is it insert|update|delete RETURNING? */ 

	bool		hasModifyingCTE;	/* has insert|update|delete in WITH? */
 
	bool		canSetTag;		/* do I set the command result tag? */

	bool		transientPlan;	/* redo plan when TransactionXmin changes? */

	bool		dependsOnRole;	/* is plan specific to current role? */

	bool		parallelModeNeeded; /* parallel mode required to execute?  是否为并行模式 */

	int			jitFlags;		/* which forms of JIT should be performed  JIT 执行形式  */

	struct Plan *planTree;		/* tree of Plan nodes */  // plan nodes树

	List	   *rtable;			/* list of RangeTblEntry nodes */  // 范围链表

	/* rtable indexes of target relations for INSERT/UPDATE/DELETE */
	List	   *resultRelations;	/* integer list of RT indexes, or NIL */  // 范围表索引

	List	   *appendRelations;	/* list of AppendRelInfo nodes */ 

	List	   *subplans;		/* Plan trees for SubPlan expressions; note
								 * that some could be NULL */

	Bitmapset  *rewindPlanIDs;	/* indices of subplans that require REWIND */

	List	   *rowMarks;		/* a list of PlanRowMark's */

	List	   *relationOids;	/* OIDs of relations the plan depends on */ // relation oid

	List	   *invalItems;		/* other dependencies, as PlanInvalItems */ 

	List	   *paramExecTypes; /* type OIDs for PARAM_EXEC Params */ 

	Node	   *utilityStmt;	/* non-null if this is utility stmt */ 
 
	/* statement location in source string (copied from Query) */
	int			stmt_location;	/* start location, or -1 if unknown */
	int			stmt_len;		/* length in bytes; 0 means "rest of string" */
} PlannedStmt;

(gdb) p (PlannedStmt)plantree_list->elements->ptr_value
$48 = {type = T_PlannedStmt, commandType = CMD_UTILITY, queryId = 0, hasReturning = false, hasModifyingCTE = false, canSetTag = true, transientPlan = false, dependsOnRole = false, parallelModeNeeded = false, jitFlags = 0, planTree = 0x0, rtable = 0x0, resultRelations = 0x0, appendRelations = 0x0, subplans = 0x0, rewindPlanIDs = 0x0, rowMarks = 0x0, relationOids = 0x0, invalItems = 0x0, paramExecTypes = 0x0, utilityStmt = 0x55db4d59e6d0, stmt_location = 0, stmt_len = 60}

  本文主要整理总结涉及关键数据结构,下几小节将从执行器模块进行后续讲解其SQL的执行流程。

你可能感兴趣的:(postgres,数据库,sql)