PostgreSQL在何处处理 sql查询之十七

继续:

/*

 * estimate_rel_size - estimate # pages and # tuples in a table or index

 *

 * We also estimate the fraction of the pages that are marked all-visible in

 * the visibility map, for use in estimation of index-only scans.

 *

 * If attr_widths isn't NULL, it points to the zero-index entry of the

 * relation's attr_widths[] cache; we fill this in if we have need to compute

 * the attribute widths for estimation purposes.

 */

void

estimate_rel_size(Relation rel, int32 *attr_widths,

                  BlockNumber *pages, double *tuples, double *allvisfrac)

{

    ...

    switch (rel->rd_rel->relkind)

    {

        case RELKIND_RELATION:

        case RELKIND_INDEX:

        case RELKIND_TOASTVALUE:

            /* it has storage, ok to call the smgr */

            curpages = RelationGetNumberOfBlocks(rel);

            ...

            break;

        case RELKIND_SEQUENCE:

            ...

            break;

        case RELKIND_FOREIGN_TABLE:

            ...

            break;

        default:

            ...

            break;

    }

}

 首先要判断此表有多少个块: RelationGetNumberOfBlocks

/*

 * The physical storage of a relation consists of one or more forks. The

 * main fork is always created, but in addition to that there can be

 * additional forks for storing various metadata. ForkNumber is used when

 * we need to refer to a specific fork in a relation.

 */

typedef enum ForkNumber

{

    InvalidForkNumber = -1,

    MAIN_FORKNUM = 0,

    FSM_FORKNUM,

    VISIBILITYMAP_FORKNUM,

    INIT_FORKNUM



    /*

     * NOTE: if you add a new fork, change MAX_FORKNUM below and update the

     * forkNames array in catalog.c

     */

} ForkNumber;

再看:

#define RelationGetNumberOfBlocks(reln) \

    RelationGetNumberOfBlocksInFork(reln, MAIN_FORKNUM)

再看:

/*

 * RelationGetNumberOfBlocks

 *        Determines the current number of pages in the relation.

 */

BlockNumber

RelationGetNumberOfBlocksInFork(Relation relation, ForkNumber forkNum)

{

    /* Open it at the smgr level if not already done */

    RelationOpenSmgr(relation);



    return smgrnblocks(relation->rd_smgr, forkNum);

}

再看:

数据库表对应的文件发生问题时,smgrnblocks 函数会发生错误:

/*

 *    smgrnblocks() -- Calculate the number of blocks in the

 *                     supplied relation.

 */

BlockNumber

smgrnblocks(SMgrRelation reln, ForkNumber forknum)

{

    return (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln, forknum);

}

 此处,使用了函数指针,经过一番跟踪,发现当我第一次执行如 select * from tab01 命令时,会执行到:

/*

 *    mdnblocks() -- Get the number of blocks stored in a relation.

 *

 *        Important side effect: all active segments of the relation are opened

 *        and added to the mdfd_chain list.  If this routine has not been

 *        called, then only segments up to the last one actually touched

 *        are present in the chain.

 */

BlockNumber

mdnblocks(SMgrRelation reln, ForkNumber forknum)

{



    MdfdVec    *v = mdopen(reln, forknum, EXTENSION_FAIL);

    BlockNumber nblocks;

    BlockNumber segno = 0;



    /*

     * Skip through any segments that aren't the last one, to avoid redundant

     * seeks on them.  We have previously verified that these segments are

     * exactly RELSEG_SIZE long, and it's useless to recheck that each time.

     *

     * NOTE: this assumption could only be wrong if another backend has

     * truncated the relation.    We rely on higher code levels to handle that

     * scenario by closing and re-opening the md fd, which is handled via

     * relcache flush.    (Since the checkpointer doesn't participate in

     * relcache flush, it could have segment chain entries for inactive

     * segments; that's OK because the checkpointer never needs to compute

     * relation size.)

     */

    while (v->mdfd_chain != NULL)

    {

        segno++;

        v = v->mdfd_chain;

    }



    for (;;)

    {

        nblocks = _mdnblocks(reln, forknum, v);

        if (nblocks > ((BlockNumber) RELSEG_SIZE))

            elog(FATAL, "segment too big");

        if (nblocks < ((BlockNumber) RELSEG_SIZE))

            return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks;



        /*

         * If segment is exactly RELSEG_SIZE, advance to next one.

         */

        segno++;



        if (v->mdfd_chain == NULL)

        {

            /*

             * Because we pass O_CREAT, we will create the next segment (with

             * zero length) immediately, if the last segment is of length

             * RELSEG_SIZE.  While perhaps not strictly necessary, this keeps

             * the logic simple.

             */

            v->mdfd_chain = _mdfd_openseg(reln, forknum, segno, O_CREAT);

            if (v->mdfd_chain == NULL)

                ereport(ERROR,

                        (errcode_for_file_access(),

                         errmsg("could not open file \"%s\": %m",

                                _mdfd_segpath(reln, forknum, segno))));

        }



        v = v->mdfd_chain;

    }

}

下一步看 mdopen函数

/*

 *    mdopen() -- Open the specified relation.

 *

 * Note we only open the first segment, when there are multiple segments.

 *

 * If first segment is not present, either ereport or return NULL according

 * to "behavior".  We treat EXTENSION_CREATE the same as EXTENSION_FAIL;

 * EXTENSION_CREATE means it's OK to extend an existing relation, not to

 * invent one out of whole cloth.

 */

static MdfdVec *

mdopen(SMgrRelation reln, ForkNumber forknum, ExtensionBehavior behavior)

{

    ...



    path = relpath(reln->smgr_rnode, forknum);


fd = PathNameOpenFile(path, O_RDWR | PG_BINARY, 0600); if (fd < 0) { fprintf(stderr,"In %s----%d\n",__FUNCTION__, __LINE__); /* * During bootstrap, there are cases where a system relation will be * accessed (by internal backend processes) before the bootstrap * script nominally creates it. Therefore, accept mdopen() as a * substitute for mdcreate() in bootstrap mode only. (See mdcreate) */ if (IsBootstrapProcessingMode()) fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600); fprintf(stderr,"In %s----%d\n",__FUNCTION__, __LINE__); if (fd < 0) { if (behavior == EXTENSION_RETURN_NULL && FILE_POSSIBLY_DELETED(errno)) { pfree(path); return NULL; } ereport(ERROR, (errcode_for_file_access(), errmsg("could not open file \"%s\": %m", path))); } } ...return mdfd; }

再看 PathNameOpenFile,如果打开文件失败,就会返回-1。

/*

 * open a file in an arbitrary directory

 *

 * NB: if the passed pathname is relative (which it usually is),

 * it will be interpreted relative to the process' working directory

 * (which should always be $PGDATA when this code is running).

 */

File

PathNameOpenFile(FileName fileName, int fileFlags, int fileMode)

{

    char       *fnamecopy;

    File        file;

    Vfd           *vfdP;



    DO_DB(elog(LOG, "PathNameOpenFile: %s %x %o",

               fileName, fileFlags, fileMode));



    /*

     * We need a malloc'd copy of the file name; fail cleanly if no room.

     */

    fnamecopy = strdup(fileName);

    if (fnamecopy == NULL)

        ereport(ERROR,

                (errcode(ERRCODE_OUT_OF_MEMORY),

                 errmsg("out of memory")));



    file = AllocateVfd();

    vfdP = &VfdCache[file];



    while (nfile + numAllocatedDescs >= max_safe_fds)

    {

        if (!ReleaseLruFile())

            break;

    }



    vfdP->fd = BasicOpenFile(fileName, fileFlags, fileMode);



    if (vfdP->fd < 0)

    {

        FreeVfd(file);

        free(fnamecopy);

return -1;

    }

    ++nfile;

    DO_DB(elog(LOG, "PathNameOpenFile: success %d",

               vfdP->fd));



    Insert(file);



    vfdP->fileName = fnamecopy;

    /* Saved flags are adjusted to be OK for re-opening file */

    vfdP->fileFlags = fileFlags & ~(O_CREAT | O_TRUNC | O_EXCL);

    vfdP->fileMode = fileMode;

    vfdP->seekPos = 0;

    vfdP->fileSize = 0;

    vfdP->fdstate = 0x0;

    vfdP->resowner = NULL;



    return file;

}

 

你可能感兴趣的:(PostgreSQL)