PageHeaderData: 24字节长。包含关于页面的一般信息,包括空闲空间指针。
typedef struct PageHeaderData
{
/* XXX LSN is member of *any* block, not only page-organized ones */
PageXLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog
* record for last change to this page */
uint16 pd_checksum; /* checksum */
uint16 pd_flags; /* flag bits, see below */
LocationIndex pd_lower; /* offset to start of free space */
LocationIndex pd_upper; /* offset to end of free space */
LocationIndex pd_special; /* offset to start of special space */
uint16 pd_pagesize_version;
TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */
} PageHeaderData;
域 | 类型 | 长度 | 描述 |
---|---|---|---|
pd_lsn | PageXLogRecPtr | 8 bytes | LSN: 最后修改这个页面的WAL记录最后一个字节后面的第一个字节 |
pd_checksum | uint16 | 2 bytes | 页面校验码 |
pd_flags | uint16 | 2 bytes | 标志位 |
pd_lower | LocationIndex | 2 bytes | 到空闲空间开头的偏移量 |
pd_upper | LocationIndex | 2 bytes | 到空闲空间结尾的偏移量 |
pd_special | LocationIndex | 2 bytes | 到特殊空间开头的偏移量 |
pd_pagesize_version | uint16 | 2 bytes | 页面大小和布局版本号信息 |
pd_prune_xid | TransactionId | 4 bytes | 页面上最老未删除XID,如果没有则为0 |
ItemIdData:
typedef struct ItemIdData
{
unsigned lp_off:15, /* offset to tuple (from start of page) */
lp_flags:2, /* state of line pointer, see below */
lp_len:15; /* byte length of tuple */
} ItemIdData;
ItemIdData
结构用来指向文件块中的元组,其中lp_off
是元组在文件块中的偏移量,而lp_len
则说明了该元组的长度,lp_flags
表示元组的状态(分为未使用,正常使用,HOT重定向和死亡四种状态)/*
* lp_flags has these possible states. An UNUSED line pointer is available
* for immediate re-use, the other states are not.
*/
#define LP_UNUSED 0 /* unused (should always have lp_len=0) */
#define LP_NORMAL 1 /* used (should always have lp_len>0) */
#define LP_REDIRECT 2 /* HOT redirect (should have lp_len=0) */
#define LP_DEAD 3 /* dead, may or may not have storage */
ItemIdData
),每个占用四个字节。pd_lower
来判断,在分配新标识符的时候pd_lower
会增长。ItemPointer
,也叫做CTID
)都由一个页号和一个项标识符的索引组成。HeapTupleHeaderData
的结构,Freespace: 是指未分配的空间(空闲空间)
Special space:是特殊空间
Tuple:每个元组分两个部分元组头部和数据:元组头部存放该元组头部信息,数据部分存放用户存储的实际数据
struct HeapTupleHeaderData
{
union
{
HeapTupleFields t_heap;
DatumTupleFields t_datum;
} t_choice;
ItemPointerData t_ctid; /* current TID of this or newer tuple (or a
* speculative insertion token) */
/* Fields below here must match MinimalTupleData! */
#define FIELDNO_HEAPTUPLEHEADERDATA_INFOMASK2 2
uint16 t_infomask2; /* number of attributes + various flags */
#define FIELDNO_HEAPTUPLEHEADERDATA_INFOMASK 3
uint16 t_infomask; /* various flag bits, see below */
#define FIELDNO_HEAPTUPLEHEADERDATA_HOFF 4
uint8 t_hoff; /* sizeof header incl. bitmap, padding */
/* ^ - 23 bytes - ^ */
#define FIELDNO_HEAPTUPLEHEADERDATA_BITS 5
bits8 t_bits[FLEXIBLE_ARRAY_MEMBER]; /* bitmap of NULLs */
/* MORE DATA FOLLOWS AT END OF STRUCT */
};
typedef struct HeapTupleFields
{
TransactionId t_xmin; /* inserting xact ID */
TransactionId t_xmax; /* deleting or locking xact ID */
union
{
CommandId t_cid; /* inserting or deleting command ID, or both */
TransactionId t_xvac; /* old-style VACUUM FULL xact ID */
} t_field3;
} HeapTupleFields;
用于记录对元组执行插入/删除操作的事务ID和命令ID,这些信息主要用于并发控制时检查元组对事务的可见性。
- t_datum:
typedef struct DatumTupleFields
{
int32 datum_len_; /* varlena header (do not touch directly!) */
int32 datum_typmod; /* -1, or identifier of a record type */
Oid datum_typeid; /* composite type OID, or RECORDOID */
/*
* datum_typeid cannot be a domain over composite, only plain composite,
* even if the datum is meant as a value of a domain-over-composite type.
* This is in line with the general principle that CoerceToDomain does not
* change the physical representation of the base type value.
*
* Note: field ordering is chosen with thought that Oid might someday
* widen to 64 bits.
*/
} DatumTupleFields;
当一个新元组在内存中形成时时候,我们并不关心其事务可见性,因此在t_choice中只需要用DatumTupleFields结构来记录元组的长度等信息。
但是该元组插入到表文件时,需要在元组头信息中记录插该元组的事务和命令ID。
故此时会把t_choice所占用的内存转化为HeapTupleFields结构,并填充相应数据后再进行元组的插入。
t_ctid:用于记录当前元组或者新元组的物理位置(块内偏移量和元组长度)
如果元组更新,PostgreSQL对元组的更新采用的是标记删除旧版本并插入新版本的元组的方式,则记录的是新版本元组的物理位置。
t_infomask2:
t_infomask:
/*
* information stored in t_infomask:
*/
#define HEAP_HASNULL 0x0001 /* has null attribute(s) */
#define HEAP_HASVARWIDTH 0x0002 /* has variable-width attribute(s) */
#define HEAP_HASEXTERNAL 0x0004 /* has external stored attribute(s) */
#define HEAP_HASOID_OLD 0x0008 /* has an object-id field */
#define HEAP_XMAX_KEYSHR_LOCK 0x0010 /* xmax is a key-shared locker */
#define HEAP_COMBOCID 0x0020 /* t_cid is a combo CID */
#define HEAP_XMAX_EXCL_LOCK 0x0040 /* xmax is exclusive locker */
#define HEAP_XMAX_LOCK_ONLY 0x0080 /* xmax, if valid, is only a locker */
#define HEAP_XMIN_COMMITTED 0x0100 /* t_xmin committed */
#define HEAP_XMIN_INVALID 0x0200 /* t_xmin invalid/aborted */
#define HEAP_XMAX_COMMITTED 0x0400 /* t_xmax committed */
#define HEAP_XMAX_INVALID 0x0800 /* t_xmax invalid/aborted */
#define HEAP_XMAX_IS_MULTI 0x1000 /* t_xmax is a MultiXactId */
#define HEAP_UPDATED 0x2000 /* this is UPDATEd version of row */
#define HEAP_MOVED_OFF 0x4000 /* moved to another place by pre-9.0
* VACUUM FULL; kept for binary
* upgrade support */
#define HEAP_MOVED_IN 0x8000 /* moved from another place by pre-9.0
* VACUUM FULL; kept for binary
* upgrade support */
PostgreSQL采用的是第二种方法,每个元组的头部信息就包含了这个删除标记,其中记录了删除这个元组的事务ID和命令ID。如果上述两个ID有效,则表明该元组被删除,若无效,说明该元组是有效的或者说没有被删除。这种方法对于多版本并发控制也是有好处的。