From dca1257476fc4c0718fec35b11ba0f4a4e57151b Mon Sep 17 00:00:00 2001 From: Dmitrii Dolgov <9erthalion6@gmail.com> Date: Sat, 15 Mar 2025 16:38:59 +0100 Subject: [PATCH v4 3/8] Introduce multiple shmem segments for shared buffers Add more shmem segments to split shared buffers into following chunks: * BUFFERS_SHMEM_SEGMENT: contains buffer blocks * BUFFER_DESCRIPTORS_SHMEM_SEGMENT: contains buffer descriptors * BUFFER_IOCV_SHMEM_SEGMENT: contains condition variables for buffers * CHECKPOINT_BUFFERS_SHMEM_SEGMENT: contains checkpoint buffer ids * STRATEGY_SHMEM_SEGMENT: contains buffer strategy status Size of the corresponding shared data directly depends on NBuffers, meaning that if we would like to change NBuffers, they have to be resized correspondingly. Placing each of them in a separate shmem segment allows to achieve that. There are some asumptions made about each of shmem segments upper size limit. The buffer blocks have the largest, while the rest claim less extra room for resize. Ideally those limits have to be deduced from the maximum allowed shared memory. --- src/backend/port/sysv_shmem.c | 24 +++++++- src/backend/storage/buffer/buf_init.c | 79 +++++++++++++++++--------- src/backend/storage/buffer/buf_table.c | 6 +- src/backend/storage/buffer/freelist.c | 5 +- src/backend/storage/ipc/ipci.c | 2 +- src/include/storage/bufmgr.h | 2 +- src/include/storage/pg_shmem.h | 24 +++++++- 7 files changed, 105 insertions(+), 37 deletions(-) diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c index a0f03ff868f..f46d9d5d9cd 100644 --- a/src/backend/port/sysv_shmem.c +++ b/src/backend/port/sysv_shmem.c @@ -147,10 +147,18 @@ static int next_free_segment = 0; * * The reserved space pointer is calculated to slice up the total reserved * space into fixed fractions of address space for each segment, as specified - * in the SHMEM_RESIZE_RATIO array. + * in the SHMEM_RESIZE_RATIO array. E.g. we allow BUFFERS_SHMEM_SEGMENT to take + * up to 60% of the whole space when resizing, based on the fact that it most + * likely will be the main consumer of this memory. Those numbers are pulled + * out of thin air for now, makes sense to evaluate them more precise. */ -static double SHMEM_RESIZE_RATIO[1] = { - 1.0, /* MAIN_SHMEM_SLOT */ +static double SHMEM_RESIZE_RATIO[6] = { + 0.1, /* MAIN_SHMEM_SEGMENT */ + 0.6, /* BUFFERS_SHMEM_SEGMENT */ + 0.1, /* BUFFER_DESCRIPTORS_SHMEM_SEGMENT */ + 0.1, /* BUFFER_IOCV_SHMEM_SEGMENT */ + 0.05, /* CHECKPOINT_BUFFERS_SHMEM_SEGMENT */ + 0.05, /* STRATEGY_SHMEM_SEGMENT */ }; /* @@ -182,6 +190,16 @@ MappingName(int shmem_segment) { case MAIN_SHMEM_SEGMENT: return "main"; + case BUFFERS_SHMEM_SEGMENT: + return "buffers"; + case BUFFER_DESCRIPTORS_SHMEM_SEGMENT: + return "descriptors"; + case BUFFER_IOCV_SHMEM_SEGMENT: + return "iocv"; + case CHECKPOINT_BUFFERS_SHMEM_SEGMENT: + return "checkpoint"; + case STRATEGY_SHMEM_SEGMENT: + return "strategy"; default: return "unknown"; } diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c index ed1dc488a42..bd68b69ee98 100644 --- a/src/backend/storage/buffer/buf_init.c +++ b/src/backend/storage/buffer/buf_init.c @@ -62,7 +62,10 @@ CkptSortItem *CkptBufferIds; * Initialize shared buffer pool * * This is called once during shared-memory initialization (either in the - * postmaster, or in a standalone backend). + * postmaster, or in a standalone backend). Size of data structures initialized + * here depends on NBuffers, and to be able to change NBuffers without a + * restart we store each structure into a separate shared memory segment, which + * could be resized on demand. */ void BufferManagerShmemInit(void) @@ -74,22 +77,22 @@ BufferManagerShmemInit(void) /* Align descriptors to a cacheline boundary. */ BufferDescriptors = (BufferDescPadded *) - ShmemInitStruct("Buffer Descriptors", + ShmemInitStructInSegment("Buffer Descriptors", NBuffers * sizeof(BufferDescPadded), - &foundDescs); + &foundDescs, BUFFER_DESCRIPTORS_SHMEM_SEGMENT); /* Align buffer pool on IO page size boundary. */ BufferBlocks = (char *) TYPEALIGN(PG_IO_ALIGN_SIZE, - ShmemInitStruct("Buffer Blocks", + ShmemInitStructInSegment("Buffer Blocks", NBuffers * (Size) BLCKSZ + PG_IO_ALIGN_SIZE, - &foundBufs)); + &foundBufs, BUFFERS_SHMEM_SEGMENT)); /* Align condition variables to cacheline boundary. */ BufferIOCVArray = (ConditionVariableMinimallyPadded *) - ShmemInitStruct("Buffer IO Condition Variables", + ShmemInitStructInSegment("Buffer IO Condition Variables", NBuffers * sizeof(ConditionVariableMinimallyPadded), - &foundIOCV); + &foundIOCV, BUFFER_IOCV_SHMEM_SEGMENT); /* * The array used to sort to-be-checkpointed buffer ids is located in @@ -99,8 +102,9 @@ BufferManagerShmemInit(void) * painful. */ CkptBufferIds = (CkptSortItem *) - ShmemInitStruct("Checkpoint BufferIds", - NBuffers * sizeof(CkptSortItem), &foundBufCkpt); + ShmemInitStructInSegment("Checkpoint BufferIds", + NBuffers * sizeof(CkptSortItem), &foundBufCkpt, + CHECKPOINT_BUFFERS_SHMEM_SEGMENT); if (foundDescs || foundBufs || foundIOCV || foundBufCkpt) { @@ -156,33 +160,54 @@ BufferManagerShmemInit(void) * BufferManagerShmemSize * * compute the size of shared memory for the buffer pool including - * data pages, buffer descriptors, hash tables, etc. + * data pages, buffer descriptors, hash tables, etc. based on the + * shared memory segment. The main segment must not allocate anything + * related to buffers, every other segment will receive part of the + * data. */ Size -BufferManagerShmemSize(void) +BufferManagerShmemSize(int shmem_segment) { Size size = 0; - /* size of buffer descriptors */ - size = add_size(size, mul_size(NBuffers, sizeof(BufferDescPadded))); - /* to allow aligning buffer descriptors */ - size = add_size(size, PG_CACHE_LINE_SIZE); + if (shmem_segment == MAIN_SHMEM_SEGMENT) + return size; - /* size of data pages, plus alignment padding */ - size = add_size(size, PG_IO_ALIGN_SIZE); - size = add_size(size, mul_size(NBuffers, BLCKSZ)); + if (shmem_segment == BUFFER_DESCRIPTORS_SHMEM_SEGMENT) + { + /* size of buffer descriptors */ + size = add_size(size, mul_size(NBuffers, sizeof(BufferDescPadded))); + /* to allow aligning buffer descriptors */ + size = add_size(size, PG_CACHE_LINE_SIZE); + } - /* size of stuff controlled by freelist.c */ - size = add_size(size, StrategyShmemSize()); + if (shmem_segment == BUFFERS_SHMEM_SEGMENT) + { + /* size of data pages, plus alignment padding */ + size = add_size(size, PG_IO_ALIGN_SIZE); + size = add_size(size, mul_size(NBuffers, BLCKSZ)); + } - /* size of I/O condition variables */ - size = add_size(size, mul_size(NBuffers, - sizeof(ConditionVariableMinimallyPadded))); - /* to allow aligning the above */ - size = add_size(size, PG_CACHE_LINE_SIZE); + if (shmem_segment == STRATEGY_SHMEM_SEGMENT) + { + /* size of stuff controlled by freelist.c */ + size = add_size(size, StrategyShmemSize()); + } - /* size of checkpoint sort array in bufmgr.c */ - size = add_size(size, mul_size(NBuffers, sizeof(CkptSortItem))); + if (shmem_segment == BUFFER_IOCV_SHMEM_SEGMENT) + { + /* size of I/O condition variables */ + size = add_size(size, mul_size(NBuffers, + sizeof(ConditionVariableMinimallyPadded))); + /* to allow aligning the above */ + size = add_size(size, PG_CACHE_LINE_SIZE); + } + + if (shmem_segment == CHECKPOINT_BUFFERS_SHMEM_SEGMENT) + { + /* size of checkpoint sort array in bufmgr.c */ + size = add_size(size, mul_size(NBuffers, sizeof(CkptSortItem))); + } return size; } diff --git a/src/backend/storage/buffer/buf_table.c b/src/backend/storage/buffer/buf_table.c index a50955d5286..a9952b36eba 100644 --- a/src/backend/storage/buffer/buf_table.c +++ b/src/backend/storage/buffer/buf_table.c @@ -22,6 +22,7 @@ #include "postgres.h" #include "storage/buf_internals.h" +#include "storage/pg_shmem.h" /* entry for buffer lookup hashtable */ typedef struct @@ -59,10 +60,11 @@ InitBufTable(int size) info.entrysize = sizeof(BufferLookupEnt); info.num_partitions = NUM_BUFFER_PARTITIONS; - SharedBufHash = ShmemInitHash("Shared Buffer Lookup Table", + SharedBufHash = ShmemInitHashInSegment("Shared Buffer Lookup Table", size, size, &info, - HASH_ELEM | HASH_BLOBS | HASH_PARTITION); + HASH_ELEM | HASH_BLOBS | HASH_PARTITION, + STRATEGY_SHMEM_SEGMENT); } /* diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index 336715b6c63..81543cb5ced 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -19,6 +19,7 @@ #include "port/atomics.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" +#include "storage/pg_shmem.h" #include "storage/proc.h" #define INT_ACCESS_ONCE(var) ((int)(*((volatile int *)&(var)))) @@ -491,9 +492,9 @@ StrategyInitialize(bool init) * Get or create the shared strategy control block */ StrategyControl = (BufferStrategyControl *) - ShmemInitStruct("Buffer Strategy Status", + ShmemInitStructInSegment("Buffer Strategy Status", sizeof(BufferStrategyControl), - &found); + &found, STRATEGY_SHMEM_SEGMENT); if (!found) { diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 076888c0172..9d00b80b4f8 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -113,7 +113,7 @@ CalculateShmemSize(int *num_semaphores, int shmem_segment) sizeof(ShmemIndexEnt))); size = add_size(size, dsm_estimate_size()); size = add_size(size, DSMRegistryShmemSize()); - size = add_size(size, BufferManagerShmemSize()); + size = add_size(size, BufferManagerShmemSize(shmem_segment)); size = add_size(size, LockManagerShmemSize()); size = add_size(size, PredicateLockShmemSize()); size = add_size(size, ProcGlobalShmemSize()); diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index f2192ceb271..1977001e533 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -308,7 +308,7 @@ extern bool EvictUnpinnedBuffer(Buffer buf); /* in buf_init.c */ extern void BufferManagerShmemInit(void); -extern Size BufferManagerShmemSize(void); +extern Size BufferManagerShmemSize(int); /* in localbuf.c */ extern void AtProcExit_LocalBuffers(void); diff --git a/src/include/storage/pg_shmem.h b/src/include/storage/pg_shmem.h index 4a83e255652..c5009a1cd73 100644 --- a/src/include/storage/pg_shmem.h +++ b/src/include/storage/pg_shmem.h @@ -52,7 +52,7 @@ typedef struct ShmemSegment } ShmemSegment; /* Number of available segments for anonymous memory mappings */ -#define ANON_MAPPINGS 1 +#define ANON_MAPPINGS 6 extern PGDLLIMPORT ShmemSegment Segments[ANON_MAPPINGS]; @@ -107,7 +107,29 @@ extern void PGSharedMemoryDetach(void); extern void GetHugePageSize(Size *hugepagesize, int *mmap_flags); void *ReserveAnonymousMemory(Size reserve_size); +/* + * To be able to dynamically resize largest parts of the data stored in shared + * memory, we split it into multiple shared memory mappings segments. Each + * segment contains only certain part of the data, which size depends on + * NBuffers. + */ + /* The main segment, contains everything except buffer blocks and related data. */ #define MAIN_SHMEM_SEGMENT 0 +/* Buffer blocks */ +#define BUFFERS_SHMEM_SEGMENT 1 + +/* Buffer descriptors */ +#define BUFFER_DESCRIPTORS_SHMEM_SEGMENT 2 + +/* Condition variables for buffers */ +#define BUFFER_IOCV_SHMEM_SEGMENT 3 + +/* Checkpoint BufferIds */ +#define CHECKPOINT_BUFFERS_SHMEM_SEGMENT 4 + +/* Buffer strategy status */ +#define STRATEGY_SHMEM_SEGMENT 5 + #endif /* PG_SHMEM_H */ -- 2.45.1