本文是基于postgresql 14的代码进行分析解读,演示是在centos8系统上进行。
表的种类:
下面的宏定义了各种存储类型:
#define RELKIND_RELATION 'r' /* ordinary table */
#define RELKIND_INDEX 'i' /* secondary index */
#define RELKIND_SEQUENCE 'S' /* sequence object */
#define RELKIND_TOASTVALUE 't' /* for out-of-line values */
#define RELKIND_VIEW 'v' /* view */
#define RELKIND_MATVIEW 'm' /* materialized view */
#define RELKIND_COMPOSITE_TYPE 'c' /* composite type */
#define RELKIND_FOREIGN_TABLE 'f' /* foreign table */
#define RELKIND_PARTITIONED_TABLE 'p' /* partitioned table */
#define RELKIND_PARTITIONED_INDEX 'I' /* partitioned index */
#define RELPERSISTENCE_PERMANENT 'p' /* regular table */
#define RELPERSISTENCE_UNLOGGED 'u' /* unlogged permanent table */
#define RELPERSISTENCE_TEMP 't' /* temporary table */
/* default selection for replica identity (primary key or nothing) */
#define REPLICA_IDENTITY_DEFAULT 'd'
/* no replica identity is logged for this relation */
#define REPLICA_IDENTITY_NOTHING 'n'
/* all columns are logged as replica identity */
#define REPLICA_IDENTITY_FULL 'f'
/*
* an explicitly chosen candidate key's columns are used as replica identity.
* Note this will still be set if the index has been dropped; in that case it
* has the same meaning as 'n'.
*/
#define REPLICA_IDENTITY_INDEX 'i'
普通表 是RELKIND_RELATION,在pg_class 里面可以查到;
postgres=# select relkind from pg_class where relname='t1';
relkind
---------
r
(1 row)
创建普通表的关键事
创建普通表的代码流程
(1)创建表的入口
创建表代码调用关系:
exec_simple_query
->PortalRun
->PortalRunMulti
->PortalRunUtility
->PortalRunUtility
->standard_ProcessUtility
->ProcessUtilitySlow
->DefineRelation
DefineRelation是创建普通表的真正入口
/* ----------------------------------------------------------------
* DefineRelation
* Creates a new relation.
*
* stmt carries parsetree information from an ordinary CREATE TABLE statement.
* The other arguments are used to extend the behavior for other cases:
* relkind: relkind to assign to the new relation
* ownerId: if not InvalidOid, use this as the new relation's owner.
* typaddress: if not null, it's set to the pg_type entry's address.
* queryString: for error reporting
*
* Note that permissions checks are done against current user regardless of
* ownerId. A nonzero ownerId is used when someone is creating a relation
* "on behalf of" someone else, so we still want to see that the current user
* has permissions to do it.
*
* If successful, returns the address of the new relation.
* ----------------------------------------------------------------
*/
ObjectAddress
DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
ObjectAddress *typaddress, const char *queryString)
(2)填充表的默认配置参数
填充表的option
(void) heap_reloptions(relkind, reloptions, true);
/*
* Option parser for anything that uses StdRdOptions.
*/
bytea *
default_reloptions(Datum reloptions, bool validate, relopt_kind kind)
{
static const relopt_parse_elt tab[] = {
{"fillfactor", RELOPT_TYPE_INT, offsetof(StdRdOptions, fillfactor)},
{"autovacuum_enabled", RELOPT_TYPE_BOOL,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, enabled)},
{"autovacuum_vacuum_threshold", RELOPT_TYPE_INT,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_threshold)},
{"autovacuum_vacuum_insert_threshold", RELOPT_TYPE_INT,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_ins_threshold)},
{"autovacuum_analyze_threshold", RELOPT_TYPE_INT,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, analyze_threshold)},
{"autovacuum_vacuum_cost_limit", RELOPT_TYPE_INT,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_cost_limit)},
{"autovacuum_freeze_min_age", RELOPT_TYPE_INT,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_min_age)},
{"autovacuum_freeze_max_age", RELOPT_TYPE_INT,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_max_age)},
{"autovacuum_freeze_table_age", RELOPT_TYPE_INT,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_table_age)},
{"autovacuum_multixact_freeze_min_age", RELOPT_TYPE_INT,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_min_age)},
{"autovacuum_multixact_freeze_max_age", RELOPT_TYPE_INT,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_max_age)},
{"autovacuum_multixact_freeze_table_age", RELOPT_TYPE_INT,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_table_age)},
{"log_autovacuum_min_duration", RELOPT_TYPE_INT,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, log_min_duration)},
{"toast_tuple_target", RELOPT_TYPE_INT,
offsetof(StdRdOptions, toast_tuple_target)},
{"autovacuum_vacuum_cost_delay", RELOPT_TYPE_REAL,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_cost_delay)},
{"autovacuum_vacuum_scale_factor", RELOPT_TYPE_REAL,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_scale_factor)},
{"autovacuum_vacuum_insert_scale_factor", RELOPT_TYPE_REAL,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_ins_scale_factor)},
{"autovacuum_analyze_scale_factor", RELOPT_TYPE_REAL,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, analyze_scale_factor)},
{"user_catalog_table", RELOPT_TYPE_BOOL,
offsetof(StdRdOptions, user_catalog_table)},
{"parallel_workers", RELOPT_TYPE_INT,
offsetof(StdRdOptions, parallel_workers)},
{"vacuum_index_cleanup", RELOPT_TYPE_ENUM,
offsetof(StdRdOptions, vacuum_index_cleanup)},
{"vacuum_truncate", RELOPT_TYPE_BOOL,
offsetof(StdRdOptions, vacuum_truncate)}
};
return (bytea *) build_reloptions(reloptions, validate, kind,
sizeof(StdRdOptions),
tab, lengthof(tab));
}
(3)获取表的OID,和filenode最好一致
if (!OidIsValid(relid))
relid = GetNewRelFileNode(reltablespace, pg_class_desc,
relpersistence);
(4)创建表cache和物理表文件
/*
* Create the relcache entry (mostly dummy at this point) and the physical
* disk file. (If we fail further down, it's the smgr's responsibility to
* remove the disk file again.)
*
* NB: Note that passing create_storage = true is correct even for binary
* upgrade. The storage we create here will be replaced later, but we need
* to have something on disk in the meanwhile.
*/
new_rel_desc = heap_create(relname,
relnamespace,
reltablespace,
relid,
relfilenode,
accessmtd,
tupdesc,
relkind,
relpersistence,
shared_relation,
mapped_relation,
allow_system_table_mods,
&relfrozenxid,
&relminmxid,
true);
/*
* build the relcache entry.
*/
rel = RelationBuildLocalRelation(relname,
relnamespace,
tupDesc,
relid,
accessmtd,
relfilenode,
reltablespace,
shared_relation,
mapped_relation,
relpersistence,
relkind);
/*
* Have the storage manager create the relation's disk file, if needed.
*
* For tables, the AM callback creates both the main and the init fork.
* For others, only the main fork is created; the other forks will be
* created on demand.
*/
if (create_storage)
{
if (RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind))
table_relation_set_new_filenode(rel, &rel->rd_node,
relpersistence,
relfrozenxid, relminmxid);
else if (RELKIND_HAS_STORAGE(rel->rd_rel->relkind))
RelationCreateStorage(rel->rd_node, relpersistence);
else
Assert(false);
}
防止创建失败,先加到pending队列中,如果失败,则会删除;如果成功,则会从pending中移除
/* Add the relation to the list of stuff to delete at abort */
pending = (PendingRelDelete *)
MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
pending->relnode = rnode;
pending->backend = backend;
pending->atCommit = false; /* delete if abort */
pending->nestLevel = GetCurrentTransactionNestLevel();
pending->next = pendingDeletes;
pendingDeletes = pending;
后面会:
1、创建type
2、在pg_class中增加tuple
postgres=# select reltype, relkind ,reloptions from pg_class where relname='t1';
reltype | relkind | reloptions
---------+---------+------------
16455 | r |
(1 row)
postgres=# select * from pg_type where oid=16455;
oid | typname | typnamespace | typowner | typlen | typbyval | typtype | typcategory | typispreferred |
typisdefined | typdelim | typrelid | typsubscript | typelem | typarray | typinput | typoutput | typrec
eive | typsend | typmodin | typmodout | typanalyze | typalign | typstorage | typnotnull | typbasetyp
e | typtypmod | typndims | typcollation | typdefaultbin | typdefault | typacl
-------+---------+--------------+----------+--------+----------+---------+-------------+----------------+
--------------+----------+----------+--------------+---------+----------+-----------+------------+-------
------+-------------+----------+-----------+------------+----------+------------+------------+-----------
--+-----------+----------+--------------+---------------+------------+--------
16455 | t1 | 2200 | 10 | -1 | f | c | C | f |
t | , | 16453 | - | 0 | 16454 | record_in | record_out | record
_recv | record_send | - | - | - | d | x | f |
0 | -1 | 0 | 0 | | |
(1 row)
作者邮箱:[email protected]
如有错误或者疏漏欢迎指出,互相学习。
注:未经同意,不得转载!