From ca38a370e866d27c8b51c83f8f18bdda1587b3df Mon Sep 17 00:00:00 2001 From: John Naylor Date: Mon, 31 Oct 2022 15:24:29 +0700 Subject: [PATCH v2 2/2] Attmept to remap the .text segment into huge pages at postmaster start Use MADV_COLLAPSE advice, available since Linux kernel 6.1. Andres Freund and John Naylor --- src/backend/port/huge_page.c | 113 ++++++++++++++++++++++++++++ src/backend/port/meson.build | 4 + src/backend/postmaster/postmaster.c | 7 ++ src/include/port/huge_page.h | 18 +++++ 4 files changed, 142 insertions(+) create mode 100644 src/backend/port/huge_page.c create mode 100644 src/include/port/huge_page.h diff --git a/src/backend/port/huge_page.c b/src/backend/port/huge_page.c new file mode 100644 index 0000000000..92f87bb3c2 --- /dev/null +++ b/src/backend/port/huge_page.c @@ -0,0 +1,113 @@ +/*------------------------------------------------------------------------- + * + * huge_page.c + * Map .text segment of binary to huge pages + * + * TODO: better rationale for separate file if the huge page handling + * in sysv_shmem.c were moved here. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/port/huge_page.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "port/huge_page.h" +#include "storage/fd.h" + +/* + * Collapse specified memory range to huge pages. + */ +static void +CollapseRegionToHugePages(void *addr, size_t advlen) +{ +#ifdef __linux__ + size_t advlen_up; + int r; + void *r2; + const size_t bound = 1024*1024*2; // FIXME: x86 + + fprintf(stderr, "old advlen: %lx\n", advlen); + advlen_up = (advlen + bound - 1) & ~(bound - 1); + + /* + * Increase size of mapping to cover the tailing padding to the next + * segment. Otherwise all the code in that range can't be put into + * a huge page (access in the non-mapped range needs to cause a fault, + * hence can't be in the huge page). + * XXX: Should proably assert that that space is actually zeroes. + */ + r2 = mremap(addr, advlen, advlen_up, 0); + if (r2 == MAP_FAILED) + fprintf(stderr, "mremap failed: %m\n"); + else if (r2 != addr) + fprintf(stderr, "mremap wrong addr: %m\n"); + else + advlen = advlen_up; + + fprintf(stderr, "new advlen: %lx\n", advlen); + + /* + * The docs for MADV_COLLAPSE say there should be at least one page + * in the mapped space "for every eligible hugepage-aligned/sized + * region to be collapsed". I just forced that. But probably not + * necessary. + */ + r = madvise(addr, advlen, MADV_WILLNEED); + if (r != 0) + fprintf(stderr, "MADV_WILLNEED failed: %m\n"); + + r = madvise(addr, advlen, MADV_POPULATE_READ); + if (r != 0) + fprintf(stderr, "MADV_POPULATE_READ failed: %m\n"); + + /* + * Make huge pages out of it. Requires at least linux 6.1. We could + * fall back to MADV_HUGEPAGE if it fails, but it doesn't do all that + * much in older kernels. + */ + r = madvise(addr, advlen, MADV_COLLAPSE); + if (r != 0) + { + fprintf(stderr, "MADV_COLLAPSE failed: %m\n"); + + r = madvise(addr, advlen, MADV_HUGEPAGE); + if (r != 0) + fprintf(stderr, "MADV_HUGEPAGE failed: %m\n"); + } +#endif +} + +/* Map the postgres .text segment into huge pages. */ +void +MapStaticCodeToLargePages(void) +{ +#ifdef __linux__ + FILE *fp = AllocateFile("/proc/self/maps", "r"); + char buf[128]; // got this from code reading /proc/meminfo -- enough? + uintptr_t addr; + uintptr_t end; + void * self = &MapStaticCodeToLargePages; + + if (fp) + { + while (fgets(buf, sizeof(buf), fp)) + { + if (sscanf(buf, "%lx-%lx", &addr, &end) == 2 && + addr <= (uintptr_t) self && (uintptr_t) self < end) + { + fprintf(stderr, "self: %p start: %lx end: %lx\n", self, addr, end); + CollapseRegionToHugePages((void *) addr, end - addr); + break; + } + } + FreeFile(fp); + } +#endif +} diff --git a/src/backend/port/meson.build b/src/backend/port/meson.build index 8fa68a88aa..af4d0c7bb7 100644 --- a/src/backend/port/meson.build +++ b/src/backend/port/meson.build @@ -25,6 +25,10 @@ if cdata.has('USE_WIN32_SHARED_MEMORY') backend_sources += files('win32_shmem.c') endif +if host_system == 'linux' + backend_sources += files('huge_page.c') +endif + if host_system == 'windows' subdir('win32') endif diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 4c49393fc5..216e8c5730 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -106,6 +106,7 @@ #include "pg_getopt.h" #include "pgstat.h" #include "port/pg_bswap.h" +#include "port/huge_page.h" #include "postmaster/autovacuum.h" #include "postmaster/auxprocess.h" #include "postmaster/bgworker_internals.h" @@ -1007,6 +1008,12 @@ PostmasterMain(int argc, char *argv[]) */ process_shared_preload_libraries(); + /* + * Try to map the binary code to huge pages. We do this just after + * any shared libraries are preloaded for future-proofing. + */ + MapStaticCodeToLargePages(); + /* * Initialize SSL library, if specified. */ diff --git a/src/include/port/huge_page.h b/src/include/port/huge_page.h new file mode 100644 index 0000000000..171819dd53 --- /dev/null +++ b/src/include/port/huge_page.h @@ -0,0 +1,18 @@ +/*------------------------------------------------------------------------- + * + * large_page.h + * Map .text segment of binary to huge pages + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/port/large_page.h + * + *------------------------------------------------------------------------- + */ +#ifndef LARGE_PAGE_H +#define LARGE_PAGE_H + +extern void MapStaticCodeToLargePages(void); + +#endif /* LARGE_PAGE_H */ -- 2.40.1