在pg方向的数据库运维将近两年了,最近开始进入源码层探究一下具体的实现流程,写的比较乱,看到哪里写到哪里吧。
pg的架构与Oracle类似,都是多进程架构,相似的进程也蛮多的,日志进程,读写进程,统计进程等等。那么多进程间是如何进行协同工作的呢?管道(半双工,FIFO,全双工),消息队列,信号量,共享存储(内存、文件)
pg的IPC主要采用了基于操作系统共享内存的编程技术,同时在此基础上实现了以下功能:
1. 进程与postermaster的通信机制
2. 统一管理进程的相关变量和函数
3. 提供了SI Message机制,即无效消息传递机制。
4. 相关清除的函数。
共享内存的实现:
ipci.c POSTGRES inter-process communication initialization code.
ipci.c里主要描述了共享内存初始化的流程,总计两个函数:分别是
void RequestAddinShmemSpace(Size size)
void CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
两个变量
static MT_LOCAL Size total_addin_request = 0;
static MT_LOCAL bool addin_request_allowed = true;
RequestAddinShmemSpace 当系统处于初始化内存时有用,一旦完成初始化再调用就会被忽略,同时非postmaster进程调用也会被忽略。代码如下:
void
RequestAddinShmemSpace(Size size)
{
if (IsUnderPostmaster || !addin_request_allowed)
return; /* too late */
total_addin_request = add_size(total_addin_request, size);
}
CreateSharedMemoryAndSemaphores 当postmaster进程首次调用时会进行共享内存初始化工作,首先计算分配的共享内存大小
size = 100000;
size = add_size(size, hash_estimate_size(SHMEM_INDEX_SIZE,
sizeof(ShmemIndexEnt)));
size = add_size(size, BufferShmemSize());
size = add_size(size, LockShmemSize());
size = add_size(size, ProcGlobalShmemSize());
size = add_size(size, SysWaitTimeStatShmemSize());
size = add_size(size, XLOGShmemSize());
size = add_size(size, CLOGShmemSize());
size = add_size(size, SUBTRANSShmemSize());
size = add_size(size, TwoPhaseShmemSize());
size = add_size(size, MultiXactShmemSize());
size = add_size(size, LWLockShmemSize());
size = add_size(size, ProcArrayShmemSize());
size = add_size(size, AppReservedConnLockShmemSize());
size = add_size(size, AppReservedConnShmemSize());
/* bug#24001: Computing needs of Shared memory size */
size = add_size(size, UserReservedConnLockShmemSize());
size = add_size(size, UserReservedConnShmemSize());
size = add_size(size, BackendStatusShmemSize());
size = add_size(size, SInvalShmemSize());
size = add_size(size, FreeSpaceShmemSize());
size = add_size(size, ProcSignalShmemSize());
size = add_size(size, BgWriterShmemSize());
size = add_size(size, BTreeShmemSize());
size = add_size(size, SyncScanShmemSize());
/* #ifdef EXEC_BACKEND */
size = add_size(size, ShmemBackendArraySize());
/* #endif */
size = add_size(size, DataFileCacheSize());
size = add_size(size, FileIdCacheSize());
size = add_size(size, RSCacheShmemSize());
/* add it finally */
size = add_size(size, XLogFileCacheSize());
/* bug#10796: for sharing xlog file handle */
size = add_size(size, XLogIdCacheSize());
/* bug#11847: Share memory for audit msg */
size = add_size(size, AuditShmemSize());
size = add_size(size, WalSndShmemSize());
size = add_size(size, WalRcvShmemSize());
/* freeze the addin request size and include it */
addin_request_allowed = false;
size = add_size(size, total_addin_request);
/* might as well round it off to a multiple of a typical page size */
size = add_size(size, 8192 - (size % 8192));
size = add_size(size, MonitorQueueSize());
size = add_size(size, EventShemeSize());
size = add_size(size, PrepareLimitShmemSize());
size = add_size(size, AutoVacShmemSize());
elog(DEBUG3, "invoking IpcMemoryCreate(size=%lu)",
(unsigned long) size);
开启DEBUG3的情况下,能看到系统启动时会输出当前预申请的共享内存大小。计算完walreceiver process进程的申请内存大小后,
addin_request_allowed = false;
之后再调用RequestAddinShmemSpace会被忽略.
seghdr = PGSharedMemoryCreate(size, makePrivate, port);
根据以上计算出来的size,去申请内存空间并将首地址赋给seghdr
InitShmemAccess(seghdr);
void
InitShmemAccess(void *seghdr)
{
PGShmemHeader *shmhdr = (PGShmemHeader *) seghdr;
ShmemSegHdr = shmhdr;
ShmemBase = (void*) shmhdr;
ShmemEnd = (char *) ShmemBase + shmhdr->totalsize;
}
根据传入的seghdr地址,得到分配的内存空间的起始地址与尾地址
/*
* Create semaphores
*/
numSemas = ProcGlobalSemas();
numSemas += SpinlockSemas();
numSemas += max_parallel_threads; /* bug#12162 bdml */
PGReserveSemaphores(numSemas, port);
计算Server保留的信号量数量,以及预先保留信号量,同时注册退出时清理函数,目前清理函数都在ipc.c文件里实现
/*
* Set up shared memory allocation mechanism
*/
if (!IsUnderPostmaster)
InitShmemAllocation();
/*
* Now initialize LWLocks, which do shared memory allocation and are
* needed for InitShmemIndex.
*/
CreateLWLocks();
/*
* Set up shmem.c index hashtable
*/
InitShmemIndex();
系统为共享内存创建一个名为shmem index的Hash索引表。
/*
* Set up xlog, clog, and buffers
* Set up xlog, clog, and buffers
*/
XLOGShmemInit();
CLOGShmemInit();
CommitTsShmemInit();
SUBTRANSShmemInit();
MultiXactShmemInit();
InitBufferPool();
/*
* Set up lock manager
*/
InitLocks();
/*
* Set up predicate lock manager
*/
InitPredicateLocks();
/*
* Set up process table
*/
if (!IsUnderPostmaster)
InitProcGlobal();
CreateSharedProcArray();
CreateSharedBackendStatus();
TwoPhaseShmemInit();
BackgroundWorkerShmemInit();
/*
* Set up shared-inval messaging
*/
CreateSharedInvalidationState();
/*
* Set up interprocess signaling mechanisms
*/
PMSignalShmemInit();
ProcSignalShmemInit();
CheckpointerShmemInit();
AutoVacuumShmemInit();
ReplicationSlotsShmemInit();
ReplicationOriginShmemInit();
WalSndShmemInit();
WalRcvShmemInit();
ApplyLauncherShmemInit();
/*
* Set up other modules that need some shared memory space
*/
SnapMgrInit();
BTreeShmemInit();
SyncScanShmemInit();
AsyncShmemInit();
BackendRandomShmemInit();
#ifdef EXEC_BACKEND
/*
* Alloc the win32 shared backend array
*/
if (!IsUnderPostmaster)
ShmemBackendArrayAllocation();
#endif
/* Initialize dynamic shared memory facilities. */
if (!IsUnderPostmaster)
dsm_postmaster_startup(shim);
/*
* Now give loadable modules a chance to set up their shmem allocations
*/
if (shmem_startup_hook)
shmem_startup_hook();
接下来是对xlog,clog,buffer,lock,process table,shared invalid messages,interprocess signaling mechanisms,以及一些别的需要分配初始化的内存结构模块,调用shmeminit从已分配的shared buffer里再分配空间。完成共享内存的申请,初始化工作。