From c548a256e73211dee506762f8efbddcacfc61faf Mon Sep 17 00:00:00 2001 From: Jakub Wartak Date: Fri, 21 Feb 2025 10:19:35 +0100 Subject: [PATCH v5 1/3] Add optional dependency to libnuma for basic NUMA awareness routines Discussion: https://postgr.es/m/CAKZiRmxh6KWo0aqRqvmcoaX2jUxZYb4kGp3N%3Dq1w%2BDiH-696Xw%40mail.gmail.com --- .cirrus.tasks.yml | 1 + configure.ac | 13 ++++ meson.build | 17 +++++ meson_options.txt | 3 + src/Makefile.global.in | 1 + src/backend/Makefile | 3 + src/include/pg_config.h.in | 3 + src/include/port/pg_numa.h | 42 +++++++++++ src/makefiles/meson.build | 3 + src/port/Makefile | 1 + src/port/meson.build | 1 + src/port/pg_numa.c | 150 +++++++++++++++++++++++++++++++++++++ 12 files changed, 238 insertions(+) create mode 100644 src/include/port/pg_numa.h create mode 100644 src/port/pg_numa.c diff --git a/.cirrus.tasks.yml b/.cirrus.tasks.yml index 91b51142d2e..7467e029131 100644 --- a/.cirrus.tasks.yml +++ b/.cirrus.tasks.yml @@ -448,6 +448,7 @@ task: --enable-cassert --enable-injection-points --enable-debug \ --enable-tap-tests --enable-nls \ --with-segsize-blocks=6 \ + --with-libnuma \ \ ${LINUX_CONFIGURE_FEATURES} \ \ diff --git a/configure.ac b/configure.ac index b6d02f5ecc7..1a394dfc077 100644 --- a/configure.ac +++ b/configure.ac @@ -1041,6 +1041,19 @@ if test "$with_libcurl" = yes ; then fi +# +# libnuma +# +AC_MSG_CHECKING([whether to build with libnuma support]) +PGAC_ARG_BOOL(with, libnuma, no, [use libnuma for NUMA awareness], + [AC_DEFINE([USE_LIBNUMA], 1, [Define to build with NUMA awareness support. (--with-libnuma)])]) +AC_MSG_RESULT([$with_libnuma]) +AC_SUBST(with_libnuma) + +if test "$with_libnuma" = yes ; then + AC_CHECK_LIB(numa, numa_available, [], [AC_MSG_ERROR([library 'libnuma' is required for NUMA awareness])]) +fi + # # XML # diff --git a/meson.build b/meson.build index 574f992ed49..cf9dead5d02 100644 --- a/meson.build +++ b/meson.build @@ -949,6 +949,21 @@ else endif +############################################################### +# Library: libnuma +############################################################### + +libnumaopt = get_option('libnuma') +libnuma = dependency('libnuma', required: libnumaopt) +if not libnuma.found() + libnuma = cc.find_library('numa', required: libnumaopt, dirs: test_lib_d) +endif +if libnuma.found() + cdata.set('USE_LIBNUMA', 1) +else + libnuma = not_found_dep +endif + ############################################################### # Library: libxml @@ -3168,6 +3183,7 @@ backend_both_deps += [ icu_i18n, ldap, libintl, + libnuma, libxml, lz4, pam, @@ -3821,6 +3837,7 @@ if meson.version().version_compare('>=0.57') 'icu': icu, 'ldap': ldap, 'libcurl': libcurl, + 'libnuma': libnuma, 'libxml': libxml, 'libxslt': libxslt, 'llvm': llvm, diff --git a/meson_options.txt b/meson_options.txt index 702c4517145..adaadb5faf1 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -106,6 +106,9 @@ option('libcurl', type : 'feature', value: 'auto', option('libedit_preferred', type: 'boolean', value: false, description: 'Prefer BSD Libedit over GNU Readline') +option('libnuma', type: 'feature', value: 'auto', + description: 'NUMA awareness support') + option('libxml', type: 'feature', value: 'auto', description: 'XML support') diff --git a/src/Makefile.global.in b/src/Makefile.global.in index 3b620bac5ac..0bd4b2d7d32 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -191,6 +191,7 @@ with_gssapi = @with_gssapi@ with_krb_srvnam = @with_krb_srvnam@ with_ldap = @with_ldap@ with_libcurl = @with_libcurl@ +with_libnuma = @with_libnuma@ with_libxml = @with_libxml@ with_libxslt = @with_libxslt@ with_llvm = @with_llvm@ diff --git a/src/backend/Makefile b/src/backend/Makefile index 42d4a28e5aa..bff9f077a8c 100644 --- a/src/backend/Makefile +++ b/src/backend/Makefile @@ -54,6 +54,9 @@ ifeq ($(with_systemd),yes) LIBS += -lsystemd endif +# FIXME: filter-out / with/without with_libnuma? +LIBS += $(LIBNUMA_LIBS) + override LDFLAGS := $(LDFLAGS) $(LDFLAGS_EX) $(LDFLAGS_EX_BE) ########################################################################## diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index db6454090d2..8894f800607 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -672,6 +672,9 @@ /* Define to 1 to build with libcurl support. (--with-libcurl) */ #undef USE_LIBCURL +/* Define to 1 to build with NUMA awareness support. (--with-libnuma) */ +#undef USE_LIBNUMA + /* Define to 1 to build with XML support. (--with-libxml) */ #undef USE_LIBXML diff --git a/src/include/port/pg_numa.h b/src/include/port/pg_numa.h new file mode 100644 index 00000000000..be85b16b0de --- /dev/null +++ b/src/include/port/pg_numa.h @@ -0,0 +1,42 @@ +/*------------------------------------------------------------------------- + * + * pg_numa.h + * Miscellaneous functions for bit-wise operations. + * + * + * Copyright (c) 2019-2025, PostgreSQL Global Development Group + * + * src/include/port/pg_numa.h + * + *------------------------------------------------------------------------- + */ +#ifndef PG_NUMA_H +#define PG_NUMA_H + +#include "c.h" + +extern PGDLLIMPORT int pg_numa_init(void); +extern PGDLLIMPORT int pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status); +extern PGDLLIMPORT int pg_numa_get_max_node(void); +extern PGDLLIMPORT Size pg_numa_get_pagesize(void); + +#ifdef USE_LIBNUMA + +/* + * This is required on Linux, before pg_numa_query_pages() as we + * need to page-fault before move_pages(2) syscall returns valid results. + */ +#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr) \ + ro_volatile_var = *(uint64 *)ptr + +extern void numa_warn(int num, char *fmt,...) pg_attribute_printf(2, 3); +extern void numa_error(char *where); + +#else + +#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr) \ + do {} while(0) + +#endif + +#endif /* PG_NUMA_H */ diff --git a/src/makefiles/meson.build b/src/makefiles/meson.build index 60e13d50235..f786c191605 100644 --- a/src/makefiles/meson.build +++ b/src/makefiles/meson.build @@ -199,6 +199,8 @@ pgxs_empty = [ 'PTHREAD_CFLAGS', 'PTHREAD_LIBS', 'ICU_LIBS', + + 'LIBNUMA_CFLAGS', 'LIBNUMA_LIBS' ] if host_system == 'windows' and cc.get_argument_syntax() != 'msvc' @@ -230,6 +232,7 @@ pgxs_deps = { 'icu': icu, 'ldap': ldap, 'libcurl': libcurl, + 'libnuma': libnuma, 'libxml': libxml, 'libxslt': libxslt, 'llvm': llvm, diff --git a/src/port/Makefile b/src/port/Makefile index 4c224319512..a68a29d5414 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -44,6 +44,7 @@ OBJS = \ noblock.o \ path.o \ pg_bitutils.o \ + pg_numa.o \ pg_popcount_avx512.o \ pg_strong_random.o \ pgcheckdir.o \ diff --git a/src/port/meson.build b/src/port/meson.build index 7fcfa728d43..7ffbd4d88d2 100644 --- a/src/port/meson.build +++ b/src/port/meson.build @@ -7,6 +7,7 @@ pgport_sources = [ 'noblock.c', 'path.c', 'pg_bitutils.c', + 'pg_numa.c', 'pg_popcount_avx512.c', 'pg_strong_random.c', 'pgcheckdir.c', diff --git a/src/port/pg_numa.c b/src/port/pg_numa.c new file mode 100644 index 00000000000..e94e68abe42 --- /dev/null +++ b/src/port/pg_numa.c @@ -0,0 +1,150 @@ +/*------------------------------------------------------------------------- + * + * numa.c + * Basic NUMA portability routines + * + * + * Copyright (c) 1996-2025, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/port/numa.c + * + *------------------------------------------------------------------------- + */ + +#include "c.h" +#include "postgres.h" +#include "port/pg_numa.h" +#include "storage/pg_shmem.h" +#ifdef WIN32 +#include +#endif + +/* + * At this point we provide support only for Linux thanks to libnuma, but in + * future support for other platforms e.g. Win32 or FreeBSD might be possible + * too. For Win32 NUMA APIs see + * https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support + */ +#ifdef USE_LIBNUMA + +#include +#include +#include + +/* libnuma requires initialization as per numa(3) on Linux */ +int +pg_numa_init(void) +{ + int r = numa_available(); + + return r; +} + +int +pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status) +{ + return numa_move_pages(pid, count, pages, NULL, status, 0); +} + +int +pg_numa_get_max_node(void) +{ + return numa_max_node(); +} + +Size +pg_numa_get_pagesize(void) +{ + Size os_page_size = sysconf(_SC_PAGESIZE); + if (huge_pages_status == HUGE_PAGES_ON) + GetHugePageSize(&os_page_size, NULL); + return os_page_size; +} + +#ifndef FRONTEND +/* FIXME not tested, might crash */ +void +numa_warn(int num, char *fmt,...) +{ + va_list ap; + int olde = errno; + int needed; + StringInfoData msg; + + initStringInfo(&msg); + + va_start(ap, fmt); + needed = appendStringInfoVA(&msg, fmt, ap); + va_end(ap); + if (needed > 0) + { + enlargeStringInfo(&msg, needed); + va_start(ap, fmt); + appendStringInfoVA(&msg, fmt, ap); + va_end(ap); + } + + ereport(WARNING, + (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), + errmsg_internal("libnuma: WARNING: %s", msg.data))); + + pfree(msg.data); + + errno = olde; +} + +void +numa_error(char *where) +{ + int olde = errno; + + /* + * XXX: for now we issue just WARNING, but long-term that might depend on + * numa_set_strict() here + */ + elog(WARNING, "libnuma: ERROR: %s", where); + errno = olde; +} +#endif /* FRONTEND */ + +#else + +/* Empty wrappers */ +int +pg_numa_init(void) +{ + /* We state NUMA is not available */ + return -1; +} + +int +pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status) +{ + return 0; +} + +int +pg_numa_get_max_node(void) +{ + return 0; +} + +Size +pg_numa_get_pagesize(void) +{ +#ifndef WIN32 + Size os_page_size = sysconf(_SC_PAGESIZE); +#else + Size os_page_size; + SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); + os_page_size = sysinfo.dwPageSize; +#endif; + if (huge_pages_status == HUGE_PAGES_ON) + GetHugePageSize(&os_page_size, NULL); + return os_page_size; +} + +#endif -- 2.39.5