From 08476af71724fcb3035fc907dc98a6ff351fe58e Mon Sep 17 00:00:00 2001 From: Dmitrii Dolgov <9erthalion6@gmail.com> Date: Sat, 5 Apr 2025 19:51:33 +0200 Subject: [PATCH v4 8/8] Support resize for hugetlb Linux kernel has a set of limitations on remapping hugetlb segments: it can't increase size of such segment [1], and shrinking it will not release the memory back. In fact support for hugetlb mremap was implemented no so long time ago [2]. As a workaround, avoid mremap for resizing shared memory. Instead unmap the whole segment and map it back at the same address with the new size, relying on the fact that fd for the anon file behind the segment is still open and will keep the memory content. [1]: https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/mremap.c?id=f4d2ef48250ad057e4f00087967b5ff366da9f39#n1593 [2]: https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/mm/mremap.c?id=550a7d60bd5e35a56942dba6d8a26752beb26c9f --- src/backend/port/sysv_shmem.c | 60 +++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c index 87000a24eea..f0b53ce1d7c 100644 --- a/src/backend/port/sysv_shmem.c +++ b/src/backend/port/sysv_shmem.c @@ -1109,6 +1109,7 @@ AnonymousShmemResize(void) /* Note that CalculateShmemSize indirectly depends on NBuffers */ Size new_size = CalculateShmemSize(&numSemas, i); AnonymousMapping *m = &Mappings[i]; + int mmap_flags = PG_MMAP_FLAGS; if (m->shmem == NULL) continue; @@ -1116,6 +1117,44 @@ AnonymousShmemResize(void) if (m->shmem_size == new_size) continue; +#ifndef MAP_HUGETLB + /* ReserveAnonymousMemory should have dealt with this case */ + Assert(huge_pages != HUGE_PAGES_ON && !huge_pages_on); +#else + if (huge_pages_on) + { + Size hugepagesize; + + /* Make sure nothing is messed up */ + Assert(huge_pages == HUGE_PAGES_ON || huge_pages == HUGE_PAGES_TRY); + + /* Round up the new size to a suitable large value */ + GetHugePageSize(&hugepagesize, &mmap_flags, NULL); + + if (new_size % hugepagesize != 0) + new_size += hugepagesize - (new_size % hugepagesize); + + mmap_flags = PG_MMAP_FLAGS | mmap_flags; + } +#endif + + /* + * Linux limitations do not allow us to mremap hugetlb in the way we + * want. E.g. no size increase is allowed, and for shrinking the memory + * will not be released back. To work around this unmap the segment and + * create a new one at the same address. Thanks for the backing anon + * file the content will still be kept in memory. + */ + elog(DEBUG1, "segment[%s]: remap from %zu to %zu at address %p", + MappingName(m->shmem_segment), m->shmem_size, + new_size, m->shmem); + + if (munmap(m->shmem, m->shmem_size) < 0) + ereport(FATAL, + (errcode(ERRCODE_SYSTEM_ERROR), + errmsg("could not unmap shared memory segment %s [%p]: %m", + MappingName(m->shmem_segment), m->shmem))); + /* Resize the backing anon file. */ if(ftruncate(m->segment_fd, new_size) == -1) ereport(FATAL, @@ -1123,25 +1162,14 @@ AnonymousShmemResize(void) errmsg("could not truncase anonymous file for \"%s\": %m", MappingName(m->shmem_segment)))); - /* Clean up some reserved space to resize into */ - if (munmap(m->shmem + m->shmem_size, new_size - m->shmem_size) == -1) - ereport(FATAL, - (errcode(ERRCODE_SYSTEM_ERROR), - errmsg("could not unmap %zu from reserved shared memory %p: %m", - new_size - m->shmem_size, m->shmem))); - - /* Claim the unused space */ - elog(DEBUG1, "segment[%s]: remap from %zu to %zu at address %p", - MappingName(m->shmem_segment), m->shmem_size, - new_size, m->shmem); - - ptr = mremap(m->shmem, m->shmem_size, new_size, 0); + /* Reclaim the space */ + ptr = mmap(m->shmem, new_size, PROT_READ | PROT_WRITE, + mmap_flags | MAP_FIXED, m->segment_fd, 0); if (ptr == MAP_FAILED) ereport(FATAL, (errcode(ERRCODE_SYSTEM_ERROR), - errmsg("could not resize shared memory segment %s [%p] to %d (%zu): %m", - MappingName(m->shmem_segment), m->shmem, NBuffers, - new_size))); + errmsg("could not map shared memory segment %s [%p] with size %zu: %m", + MappingName(m->shmem_segment), m->shmem, new_size))); reinit = true; m->shmem_size = new_size; -- 2.45.1