create tablespace 与 heap_insert 函数

先说 heap_insert 函数:

/*

 *    heap_insert        - insert tuple into a heap

 *

 * The new tuple is stamped with current transaction ID and the specified

 * command ID.

 *

 * If the HEAP_INSERT_SKIP_WAL option is specified, the new tuple is not

 * logged in WAL, even for a non-temp relation.  Safe usage of this behavior

 * requires that we arrange that all new tuples go into new pages not

 * containing any tuples from other transactions, and that the relation gets

 * fsync'd before commit.  (See also heap_sync() comments)

 *

 * The HEAP_INSERT_SKIP_FSM option is passed directly to

 * RelationGetBufferForTuple, which see for more info.

 *

 * Note that these options will be applied when inserting into the heap's

 * TOAST table, too, if the tuple requires any out-of-line data.

 *

 * The BulkInsertState object (if any; bistate can be NULL for default

 * behavior) is also just passed through to RelationGetBufferForTuple.

 *

 * The return value is the OID assigned to the tuple (either here or by the

 * caller), or InvalidOid if no OID.  The header fields of *tup are updated

 * to match the stored tuple; in particular tup->t_self receives the actual

 * TID where the tuple was stored.    But note that any toasting of fields

 * within the tuple data is NOT reflected into *tup.

 */

Oid

heap_insert(Relation relation, HeapTuple tup, CommandId cid,

            int options, BulkInsertState bistate)

{

    /**

    Form_pg_class tmprel = relation->rd_rel;

    NameData    tmprelname = tmprel->relname;

    fprintf(stderr,"Insert into: %s\n", tmprelname.data);

    fprintf(stderr,"In heap_insert,going to insert into table:%d \n\n",relation->rd_node.relNode );

   */
TransactionId xid = GetCurrentTransactionId(); HeapTuple heaptup; Buffer buffer; bool all_visible_cleared = false; if (relation->rd_rel->relhasoids) { #ifdef NOT_USED /* this is redundant with an Assert in HeapTupleSetOid */ Assert(tup->t_data->t_infomask & HEAP_HASOID); #endif /* * If the object id of this tuple has already been assigned, trust the * caller. There are a couple of ways this can happen. At initial db * creation, the backend program sets oids for tuples. When we define * an index, we set the oid. Finally, in the future, we may allow * users to set their own object ids in order to support a persistent * object store (objects need to contain pointers to one another). */ if (!OidIsValid(HeapTupleGetOid(tup))) HeapTupleSetOid(tup, GetNewOid(relation)); } else { /* check there is not space for an OID */ Assert(!(tup->t_data->t_infomask & HEAP_HASOID)); } tup->t_data->t_infomask &= ~(HEAP_XACT_MASK); tup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK); tup->t_data->t_infomask |= HEAP_XMAX_INVALID; HeapTupleHeaderSetXmin(tup->t_data, xid); HeapTupleHeaderSetCmin(tup->t_data, cid); HeapTupleHeaderSetXmax(tup->t_data, 0); /* for cleanliness */ tup->t_tableOid = RelationGetRelid(relation); /* * If the new tuple is too big for storage or contains already toasted * out-of-line attributes from some other relation, invoke the toaster. * * Note: below this point, heaptup is the data we actually intend to store * into the relation; tup is the caller's original untoasted data. */ if (relation->rd_rel->relkind != RELKIND_RELATION) { /* toast table entries should never be recursively toasted */ Assert(!HeapTupleHasExternal(tup)); heaptup = tup; } else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD) heaptup = toast_insert_or_update(relation, tup, NULL, options); else heaptup = tup; /* * We're about to do the actual insert -- but check for conflict first, * to avoid possibly having to roll back work we've just done. * * For a heap insert, we only need to check for table-level SSI locks. * Our new tuple can't possibly conflict with existing tuple locks, and * heap page locks are only consolidated versions of tuple locks; they do * not lock "gaps" as index page locks do. So we don't need to identify * a buffer before making the call. */ CheckForSerializableConflictIn(relation, NULL, InvalidBuffer); /* Find buffer to insert this tuple into */ buffer = RelationGetBufferForTuple(relation, heaptup->t_len, InvalidBuffer, options, bistate); /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); RelationPutHeapTuple(relation, buffer, heaptup); if (PageIsAllVisible(BufferGetPage(buffer))) { all_visible_cleared = true; PageClearAllVisible(BufferGetPage(buffer)); } /* * XXX Should we set PageSetPrunable on this page ? * * The inserting transaction may eventually abort thus making this tuple * DEAD and hence available for pruning. Though we don't want to optimize * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the * aborted tuple will never be pruned until next vacuum is triggered. * * If you do add PageSetPrunable here, add it in heap_xlog_insert too. */ MarkBufferDirty(buffer); /* XLOG stuff */ if (!(options & HEAP_INSERT_SKIP_WAL) && RelationNeedsWAL(relation)) { xl_heap_insert xlrec; xl_heap_header xlhdr; XLogRecPtr recptr; XLogRecData rdata[3]; Page page = BufferGetPage(buffer); uint8 info = XLOG_HEAP_INSERT; xlrec.all_visible_cleared = all_visible_cleared; xlrec.target.node = relation->rd_node; xlrec.target.tid = heaptup->t_self; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfHeapInsert; rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); xlhdr.t_infomask2 = heaptup->t_data->t_infomask2; xlhdr.t_infomask = heaptup->t_data->t_infomask; xlhdr.t_hoff = heaptup->t_data->t_hoff; /* * note we mark rdata[1] as belonging to buffer; if XLogInsert decides * to write the whole page to the xlog, we don't need to store * xl_heap_header in the xlog. */ rdata[1].data = (char *) &xlhdr; rdata[1].len = SizeOfHeapHeader; rdata[1].buffer = buffer; rdata[1].buffer_std = true; rdata[1].next = &(rdata[2]); /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */ rdata[2].data = (char *) heaptup->t_data + offsetof(HeapTupleHeaderData, t_bits); rdata[2].len = heaptup->t_len - offsetof(HeapTupleHeaderData, t_bits); rdata[2].buffer = buffer; rdata[2].buffer_std = true; rdata[2].next = NULL; /* * If this is the single and first tuple on page, we can reinit the * page instead of restoring the whole thing. Set flag, and hide * buffer references from XLogInsert. */ if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber && PageGetMaxOffsetNumber(page) == FirstOffsetNumber) { info |= XLOG_HEAP_INIT_PAGE; rdata[1].buffer = rdata[2].buffer = InvalidBuffer; } recptr = XLogInsert(RM_HEAP_ID, info, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } END_CRIT_SECTION(); UnlockReleaseBuffer(buffer); /* Clear the bit in the visibility map if necessary */ if (all_visible_cleared) visibilitymap_clear(relation, ItemPointerGetBlockNumber(&(heaptup->t_self))); /* * If tuple is cachable, mark it for invalidation from the caches in case * we abort. Note it is OK to do this after releasing the buffer, because * the heaptup data structure is all in local memory, not in the shared * buffer. */ CacheInvalidateHeapTuple(relation, heaptup); pgstat_count_heap_insert(relation); /* * If heaptup is a private copy, release it. Don't forget to copy t_self * back to the caller's image, too. */ if (heaptup != tup) { tup->t_self = heaptup->t_self; heap_freetuple(heaptup); } return HeapTupleGetOid(tup); }

我如果执行一个普通的sql文,则可以加入这样的调试代码,来看看我是否确实向我想要的表中插入数据:

    /**

    Form_pg_class tmprel = relation->rd_rel;

    NameData    tmprelname = tmprel->relname;

    fprintf(stderr,"Insert into: %s\n", tmprelname.data);

    fprintf(stderr,"In heap_insert,going to insert into table:%d \n\n",relation->rd_node.relNode );

   */

当我执行 create tablespace的时候,我想它是要写入数据字典的。

但是上述代码反应出来的relNode是不正确的,而 relname也是空的值。

然后我从更高测调用层面来观察:

当我执行 create tablespace的时候,调用关系如下:

PostgresMain-->exec_simple_query-->PortalRun-->PortalRunMulti-->PortalRunUtility-->Createtablespace

-->simple_heap_insert-->heap_insert

再看 Createtablespace函数:

/*

 * Create a table space

 *

 * Only superusers can create a tablespace. This seems a reasonable restriction

 * since we're determining the system layout and, anyway, we probably have

 * root if we're doing this kind of activity

 */

void

CreateTableSpace(CreateTableSpaceStmt *stmt)

{

#ifdef HAVE_SYMLINK

    Relation    rel;

    Datum        values[Natts_pg_tablespace];

    bool        nulls[Natts_pg_tablespace];

    HeapTuple    tuple;

    Oid            tablespaceoid;

    char       *location;

    Oid            ownerId;



    /* Must be super user */

    if (!superuser())

        ereport(ERROR,

                (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),

                 errmsg("permission denied to create tablespace \"%s\"",

                        stmt->tablespacename),

                 errhint("Must be superuser to create a tablespace.")));



    /* However, the eventual owner of the tablespace need not be */

    if (stmt->owner)

        ownerId = get_role_oid(stmt->owner, false);

    else

        ownerId = GetUserId();



    /* Unix-ify the offered path, and strip any trailing slashes */

    location = pstrdup(stmt->location);

    canonicalize_path(location);



    /* disallow quotes, else CREATE DATABASE would be at risk */

    if (strchr(location, '\''))

        ereport(ERROR,

                (errcode(ERRCODE_INVALID_NAME),

                 errmsg("tablespace location cannot contain single quotes")));



    /*

     * Allowing relative paths seems risky

     *

     * this also helps us ensure that location is not empty or whitespace

     */

    if (!is_absolute_path(location))

        ereport(ERROR,

                (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),

                 errmsg("tablespace location must be an absolute path")));



    /*

     * Check that location isn't too long. Remember that we're going to append

     * 'PG_XXX/<dboid>/<relid>.<nnn>'.    FYI, we never actually reference the

     * whole path, but mkdir() uses the first two parts.

     */

    if (strlen(location) + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 +

        OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS > MAXPGPATH)

        ereport(ERROR,

                (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),

                 errmsg("tablespace location \"%s\" is too long",

                        location)));



    /*

     * Disallow creation of tablespaces named "pg_xxx"; we reserve this

     * namespace for system purposes.

     */

    if (!allowSystemTableMods && IsReservedName(stmt->tablespacename))

        ereport(ERROR,

                (errcode(ERRCODE_RESERVED_NAME),

                 errmsg("unacceptable tablespace name \"%s\"",

                        stmt->tablespacename),

        errdetail("The prefix \"pg_\" is reserved for system tablespaces.")));



    /*

     * Check that there is no other tablespace by this name.  (The unique

     * index would catch this anyway, but might as well give a friendlier

     * message.)

     */

    if (OidIsValid(get_tablespace_oid(stmt->tablespacename, true)))

        ereport(ERROR,

                (errcode(ERRCODE_DUPLICATE_OBJECT),

                 errmsg("tablespace \"%s\" already exists",

                        stmt->tablespacename)));



    /*

     * Insert tuple into pg_tablespace.  The purpose of doing this first is to

     * lock the proposed tablename against other would-be creators. The

     * insertion will roll back if we find problems below.

     */

    rel = heap_open(TableSpaceRelationId, RowExclusiveLock);



    MemSet(nulls, false, sizeof(nulls));



    values[Anum_pg_tablespace_spcname - 1] =

        DirectFunctionCall1(namein, CStringGetDatum(stmt->tablespacename));

    values[Anum_pg_tablespace_spcowner - 1] =

        ObjectIdGetDatum(ownerId);

    values[Anum_pg_tablespace_spclocation - 1] =

        CStringGetTextDatum(location);

    nulls[Anum_pg_tablespace_spcacl - 1] = true;

    nulls[Anum_pg_tablespace_spcoptions - 1] = true;



    tuple = heap_form_tuple(rel->rd_att, values, nulls);



    tablespaceoid = simple_heap_insert(rel, tuple);



    CatalogUpdateIndexes(rel, tuple);



    heap_freetuple(tuple);



    /* Record dependency on owner */

    recordDependencyOnOwner(TableSpaceRelationId, tablespaceoid, ownerId);



    /* Post creation hook for new tablespace */

    InvokeObjectAccessHook(OAT_POST_CREATE,

                           TableSpaceRelationId, tablespaceoid, 0);



    create_tablespace_directories(location, tablespaceoid);



    /* Record the filesystem change in XLOG */

    {

        xl_tblspc_create_rec xlrec;

        XLogRecData rdata[2];



        xlrec.ts_id = tablespaceoid;

        rdata[0].data = (char *) &xlrec;

        rdata[0].len = offsetof(xl_tblspc_create_rec, ts_path);

        rdata[0].buffer = InvalidBuffer;

        rdata[0].next = &(rdata[1]);



        rdata[1].data = (char *) location;

        rdata[1].len = strlen(location) + 1;

        rdata[1].buffer = InvalidBuffer;

        rdata[1].next = NULL;



        (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE, rdata);

    }



    /*

     * Force synchronous commit, to minimize the window between creating the

     * symlink on-disk and marking the transaction committed.  It's not great

     * that there is any window at all, but definitely we don't want to make

     * it larger than necessary.

     */

    ForceSyncCommit();



    pfree(location);



    /* We keep the lock on pg_tablespace until commit */

    heap_close(rel, NoLock);

#else                            /* !HAVE_SYMLINK */

    ereport(ERROR,

            (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),

             errmsg("tablespaces are not supported on this platform")));

#endif   /* HAVE_SYMLINK */

}
/*

 *    simple_heap_insert - insert a tuple

 *

 * Currently, this routine differs from heap_insert only in supplying

 * a default command ID and not allowing access to the speedup options.

 *

 * This should be used rather than using heap_insert directly in most places

 * where we are modifying system catalogs.

 */

Oid

simple_heap_insert(Relation relation, HeapTuple tup)

{

    return heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL);

}

我把它简练化,看看它都干了什么:

/*

 * Create a table space

 *

 * Only superusers can create a tablespace. This seems a reasonable restriction

 * since we're determining the system layout and, anyway, we probably have

 * root if we're doing this kind of activity

 */

void

CreateTableSpace(CreateTableSpaceStmt *stmt)

{

    ......

    /*

     * Insert tuple into pg_tablespace.  The purpose of doing this first is to

     * lock the proposed tablename against other would-be creators. The

     * insertion will roll back if we find problems below.

     */

    rel = heap_open(TableSpaceRelationId, RowExclusiveLock);

    ......

    tablespaceoid = simple_heap_insert(rel, tuple);

    ......

}

而 heap_open(TableSpaceRelationId, RowExclusiveLock) 这一句,
里面的 TableSpaceRelationId其实是宏:

/* ----------------

 *        pg_tablespace definition.  cpp turns this into

 *        typedef struct FormData_pg_tablespace

 * ----------------

 */

#define TableSpaceRelationId  1213

而如果想要看到值,可以运行下面的语句,恰好 1213 对应的就是 pg_tablespace 表。

[pgsql@localhost bin]$ ./psql

psql (9.1.2)

Type "help" for help.



pgsql=# select 1213::regclass;

   regclass    

---------------

 pg_tablespace

(1 row)



pgsql=# 

但是,实际上,pg_tablespace是数据字典,而数据库的目录中,并不存在一个单独的1213文件与之对应。

如果我用上述的:

    /**

    Form_pg_class tmprel = relation->rd_rel;

    NameData    tmprelname = tmprel->relname;

    fprintf(stderr,"Insert into: %s\n", tmprelname.data);

    fprintf(stderr,"In heap_insert,going to insert into table:%d \n\n",relation->rd_node.relNode );

   */

来看,就会知道 relNode是 12587。

我可以在 global目录下,找到这个 12587文件。

1213 对应着 12587文件。这是一个比较怪异的事情。

你可能感兴趣的:(tablespace)