PostgreSQL 的pg_buffercache 代码研究

磨砺技术珠矶,践行数据之道,追求卓越价值

回到上一级页面:PostgreSQL内部结构与源代码研究索引页    回到顶级页面:PostgreSQL索引页

[作者:技术者高健@博客园  mail: [email protected]]

pg_buffercache 代码位于 contrib 目录,总体上代码量200多行。

刚接触,感觉直接访问PostgreSQL 中的内存结构很神奇,特意学习了一下。

/*------------------------------------------------------------------------- * * pg_buffercache_pages.c * display some contents of the buffer cache * * contrib/pg_buffercache/pg_buffercache_pages.c *------------------------------------------------------------------------- */ #include "postgres.h" #include "catalog/pg_type.h" #include "funcapi.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h"                        

                        

                        

#define NUM_BUFFERCACHE_PAGES_ELEM    8                     PG_MODULE_MAGIC; Datum pg_buffercache_pages(PG_FUNCTION_ARGS); /* * Record structure holding the to be exposed cache data. */ typedef struct { uint32 bufferid; Oid relfilenode; Oid reltablespace; Oid reldatabase; ForkNumber forknum; BlockNumber blocknum; bool isvalid; bool isdirty; uint16 usagecount; } BufferCachePagesRec; /* * Function context for data persisting over repeated calls. */ typedef struct { TupleDesc tupdesc; BufferCachePagesRec *record; } BufferCachePagesContext; /* * Function returning data from the shared buffer cache - buffer number, * relation node/tablespace/database/blocknum and dirty indicator. */ PG_FUNCTION_INFO_V1(pg_buffercache_pages); Datum pg_buffercache_pages(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; Datum result; MemoryContext oldcontext; BufferCachePagesContext *fctx;                /* User function context. */ TupleDesc tupledesc; HeapTuple tuple; if (SRF_IS_FIRSTCALL()) { int i; volatile BufferDesc *bufHdr; funcctx = SRF_FIRSTCALL_INIT(); /* Switch context when allocating stuff to be used in later calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* Create a user function context for cross-call persistence */ fctx = (BufferCachePagesContext *) palloc(sizeof(BufferCachePagesContext)); /* Construct a tuple descriptor for the result rows. */ tupledesc = CreateTemplateTupleDesc(NUM_BUFFERCACHE_PAGES_ELEM, false); TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid", INT4OID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode", OIDOID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace", OIDOID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase", OIDOID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relforknumber", INT2OID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 6, "relblocknumber", INT8OID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 7, "isdirty", BOOLOID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 8, "usage_count", INT2OID, -1, 0); fctx->tupdesc = BlessTupleDesc(tupledesc); /* Allocate NBuffers worth of BufferCachePagesRec records. */ fctx->record = (BufferCachePagesRec *) palloc(sizeof(BufferCachePagesRec) * NBuffers); /* Set max calls and remember the user function context. */ funcctx->max_calls = NBuffers; funcctx->user_fctx = fctx; /* Return to original context when allocating transient memory */ MemoryContextSwitchTo(oldcontext); /* * To get a consistent picture of the buffer state, we must lock all * partitions of the buffer map. Needless to say, this is horrible * for concurrency. Must grab locks in increasing order to avoid * possible deadlocks. */                

        for (i = 0; i < NUM_BUFFER_PARTITIONS; i++) LWLockAcquire(FirstBufMappingLock + i, LW_SHARED); /* * Scan though all the buffers, saving the relevant fields in the * fctx->record structure. */                

        for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++) { /* Lock each buffer header before inspecting. */ LockBufHdr(bufHdr); fctx->record[i].bufferid = BufferDescriptorGetBuffer(bufHdr); fctx->record[i].relfilenode = bufHdr->tag.rnode.relNode; fctx->record[i].reltablespace = bufHdr->tag.rnode.spcNode; fctx->record[i].reldatabase = bufHdr->tag.rnode.dbNode; fctx->record[i].forknum = bufHdr->tag.forkNum; fctx->record[i].blocknum = bufHdr->tag.blockNum; fctx->record[i].usagecount = bufHdr->usage_count; if (bufHdr->flags & BM_DIRTY) fctx->record[i].isdirty = true; else fctx->record[i].isdirty = false; /* Note if the buffer is valid, and has storage created */            

            if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_TAG_VALID)) fctx->record[i].isvalid = true; else fctx->record[i].isvalid = false; UnlockBufHdr(bufHdr); } /* * And release locks. We do this in reverse order for two reasons: * (1) Anyone else who needs more than one of the locks will be trying * to lock them in increasing order; we don't want to release the * other process until it can get all the locks it needs. (2) This * avoids O(N^2) behavior inside LWLockRelease. */                

        for (i = NUM_BUFFER_PARTITIONS; --i >= 0;) LWLockRelease(FirstBufMappingLock + i); } funcctx = SRF_PERCALL_SETUP(); /* Get the saved state */ fctx = funcctx->user_fctx; if (funcctx->call_cntr < funcctx->max_calls) { uint32 i = funcctx->call_cntr; Datum values[NUM_BUFFERCACHE_PAGES_ELEM]; bool nulls[NUM_BUFFERCACHE_PAGES_ELEM]; values[0] = Int32GetDatum(fctx->record[i].bufferid); nulls[0] = false; /* * Set all fields except the bufferid to null if the buffer is unused * or not valid. */                

        if (fctx->record[i].blocknum == InvalidBlockNumber || fctx->record[i].isvalid == false) { nulls[1] = true; nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[7] = true; } else { values[1] = ObjectIdGetDatum(fctx->record[i].relfilenode); nulls[1] = false; values[2] = ObjectIdGetDatum(fctx->record[i].reltablespace); nulls[2] = false; values[3] = ObjectIdGetDatum(fctx->record[i].reldatabase); nulls[3] = false; values[4] = ObjectIdGetDatum(fctx->record[i].forknum); nulls[4] = false; values[5] = Int64GetDatum((int64) fctx->record[i].blocknum); nulls[5] = false; values[6] = BoolGetDatum(fctx->record[i].isdirty); nulls[6] = false; values[7] = Int16GetDatum(fctx->record[i].usagecount); nulls[7] = false; } /* Build and return the tuple. */ tuple = heap_form_tuple(fctx->tupdesc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); }  SRF_RETURN_DONE(funcctx); } 

 我的看法是这样的:

 官方给的例子是这样说的: 

Datum my_set_returning_function(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; Datum result; MemoryContext oldcontext; further declarations as needed if (SRF_IS_FIRSTCALL()) { funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx-> multi_call_memory_ctx); /* One-time setup code appears here: */

    <<user code>>

    <<if returning composite>>

    <<build TupleDesc, and perhaps AttInMetadata>>

    <<endif returning composite>>

    <<user code>> MemoryContextSwitchTo(oldcontext); } /* Each-time setup code appears here: */

  <<user code>> funcctx = SRF_PERCALL_SETUP(); <<user code>>



  /* this is just one way we might test whether we are done: */

  if (funcctx->call_cntr < funcctx->max_calls) { /* Here we want to return another item: */

    <<user code>>

    <<obtain result Datum>> SRF_RETURN_NEXT(funcctx, result); } else { /* Here we are done returning items and just need to clean up: */

    <<user code>> SRF_RETURN_DONE(funcctx); } }

 对于其中 的 call_cntr 和 max_calls ,不是太理解。原来想,是否一次就该结束了。但是验证的结果是:其实还是会被调用多次。

[作者:技术者高健@博客园  mail: [email protected]]

修改代码后部为这个样子:

…… if (funcctx->call_cntr < funcctx->max_calls) { fprintf(stderr,"!!!!!call_cntr is smaller than max_calls"); …… SRF_RETURN_NEXT(funcctx, result); } else{ fprintf(stderr,"call_cntr is not smaller than max_calls"); SRF_RETURN_DONE(funcctx); } 

运行结果:出现多次  !!!!!call_cntr is smaller than max_calls , 最后出现一次 call_cntr smaller than max_calls

哪怕是 我用  select  bufferid from pg_buffercache limit 1; 也是一样的效果。

再次测试,可以 发现, max_calls 居然为4096,也可以说, 我们在 psql 中对 pg_buffercache 的 一次普通查询,在后台进行了4096次运转。好古怪!

如果psql 中运行  select count(*) from pg_buffercache; 得出结果正好是 4096。

然后我们再看 NBuffers 的值,在程序中 下列循环之前,打印 NBuffers的值,发现只执行一次:值也是 4096

/*
* Scan though all the buffers, saving the relevant fields in the
* fctx->record structure.
*/
for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++)
{

   ...

}

那么可以说:

  /* Each-time setup code appears here: */

  <<user code>> funcctx = SRF_PERCALL_SETUP(); <<user code>>

之前的代码,只执行一次。后面的代码,虽然我们没有写循环,但是会循环(因为 SRF_RETURN_NEXT)直到我们的代码执行了 SRF_RETURN_DONE为止。

[作者:技术者高健@博客园  mail: [email protected]]

回到上一级页面:PostgreSQL内部结构与源代码研究索引页    回到顶级页面:PostgreSQL索引页

磨砺技术珠矶,践行数据之道,追求卓越价值

你可能感兴趣的:(PostgreSQL)