From f77b7e1a680a7aad370cf2ac9233b9c17bd5a4a2 Mon Sep 17 00:00:00 2001 From: Jakub Wartak Date: Fri, 21 Feb 2025 10:19:35 +0100 Subject: [PATCH v11 1/3] Add optional dependency to libnuma (Linux-only) for basic NUMA awareness routines and add minimal src/port/pg_numa.c portability wrapper. Other platforms can be added later. libnuma is unavailable on 32-bit builds, so due to lack of i386 shared object, we disable it there (it does not make sense anyway as i386 is is very memory-only limited even with PAE) Author: Jakub Wartak Co-authored-by: Bertrand Drouvot Reviewed-by: Andres Freund Discussion: https://postgr.es/m/CAKZiRmxh6KWo0aqRqvmcoaX2jUxZYb4kGp3N%3Dq1w%2BDiH-696Xw%40mail.gmail.com --- .cirrus.tasks.yml | 12 ++- configure | 87 ++++++++++++++++ configure.ac | 13 +++ doc/src/sgml/func.sgml | 13 +++ doc/src/sgml/installation.sgml | 21 ++++ meson.build | 21 ++++ meson_options.txt | 3 + src/Makefile.global.in | 1 + src/backend/storage/ipc/shmem.c | 11 ++ src/backend/utils/misc/guc_tables.c | 2 +- src/include/catalog/pg_proc.dat | 5 + src/include/pg_config.h.in | 3 + src/include/port/pg_numa.h | 43 ++++++++ src/include/storage/pg_shmem.h | 1 + src/makefiles/meson.build | 3 + src/port/Makefile | 1 + src/port/meson.build | 1 + src/port/pg_numa.c | 151 ++++++++++++++++++++++++++++ 18 files changed, 387 insertions(+), 5 deletions(-) create mode 100644 src/include/port/pg_numa.h create mode 100644 src/port/pg_numa.c diff --git a/.cirrus.tasks.yml b/.cirrus.tasks.yml index 5849cbb839a..7010dff7aef 100644 --- a/.cirrus.tasks.yml +++ b/.cirrus.tasks.yml @@ -445,8 +445,10 @@ task: EOF setup_additional_packages_script: | - #apt-get update - #DEBIAN_FRONTEND=noninteractive apt-get -y install ... + apt-get update + DEBIAN_FRONTEND=noninteractive apt-get -y install \ + libnuma1 \ + libnuma-dev matrix: # SPECIAL: @@ -471,6 +473,7 @@ task: --enable-cassert --enable-injection-points --enable-debug \ --enable-tap-tests --enable-nls \ --with-segsize-blocks=6 \ + --with-libnuma \ \ ${LINUX_CONFIGURE_FEATURES} \ \ @@ -519,6 +522,7 @@ task: -Dllvm=disabled \ --pkg-config-path /usr/lib/i386-linux-gnu/pkgconfig/ \ -DPERL=perl5.36-i386-linux-gnu \ + -Dlibnuma=disabled \ build-32 EOF @@ -835,8 +839,8 @@ task: folder: $CCACHE_DIR setup_additional_packages_script: | - #apt-get update - #DEBIAN_FRONTEND=noninteractive apt-get -y install ... + apt-get update + DEBIAN_FRONTEND=noninteractive apt-get -y install libnuma1 libnuma-dev ### # Test that code can be built with gcc/clang without warnings diff --git a/configure b/configure index 93fddd69981..23c33dd9971 100755 --- a/configure +++ b/configure @@ -711,6 +711,7 @@ with_libxml LIBCURL_LIBS LIBCURL_CFLAGS with_libcurl +with_libnuma with_uuid with_readline with_systemd @@ -868,6 +869,7 @@ with_libedit_preferred with_uuid with_ossp_uuid with_libcurl +with_libnuma with_libxml with_libxslt with_system_tzdata @@ -1581,6 +1583,7 @@ Optional Packages: --with-uuid=LIB build contrib/uuid-ossp using LIB (bsd,e2fs,ossp) --with-ossp-uuid obsolete spelling of --with-uuid=ossp --with-libcurl build with libcurl support + --with-libnuma build with libnuma support --with-libxml build with XML support --with-libxslt use XSLT support when building contrib/xml2 --with-system-tzdata=DIR @@ -9140,6 +9143,33 @@ fi +# +# NUMA +# + + + +# Check whether --with-libnuma was given. +if test "${with_libnuma+set}" = set; then : + withval=$with_libnuma; + case $withval in + yes) + +$as_echo "#define USE_LIBNUMA 1" >>confdefs.h + + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --with-libnuma option" "$LINENO" 5 + ;; + esac + +else + with_libnuma=no + +fi @@ -12378,6 +12408,63 @@ fi fi +if test "$with_libnuma" = yes ; then + + ac_fn_c_check_header_mongrel "$LINENO" "numa.h" "ac_cv_header_numa_h" "$ac_includes_default" +if test "x$ac_cv_header_numa_h" = xyes; then : + +else + as_fn_error $? "header file is required for --with-libnuma" "$LINENO" 5 +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for numa_available in -lnuma" >&5 +$as_echo_n "checking for numa_available in -lnuma... " >&6; } +if ${ac_cv_lib_numa_numa_available+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lnuma $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char numa_available (); +int +main () +{ +return numa_available (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_numa_numa_available=yes +else + ac_cv_lib_numa_numa_available=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_numa_numa_available" >&5 +$as_echo "$ac_cv_lib_numa_numa_available" >&6; } +if test "x$ac_cv_lib_numa_numa_available" = xyes; then : + + LIBS="-lnuma $LIBS" + +else + as_fn_error $? "library 'numa' does not provide numa_available" "$LINENO" 5 +fi + +fi + # XXX libcurl must link after libgssapi_krb5 on FreeBSD to avoid segfaults # during gss_acquire_cred(). This is possibly related to Curl's Heimdal # dependency on that platform? diff --git a/configure.ac b/configure.ac index b6d02f5ecc7..1a394dfc077 100644 --- a/configure.ac +++ b/configure.ac @@ -1041,6 +1041,19 @@ if test "$with_libcurl" = yes ; then fi +# +# libnuma +# +AC_MSG_CHECKING([whether to build with libnuma support]) +PGAC_ARG_BOOL(with, libnuma, no, [use libnuma for NUMA awareness], + [AC_DEFINE([USE_LIBNUMA], 1, [Define to build with NUMA awareness support. (--with-libnuma)])]) +AC_MSG_RESULT([$with_libnuma]) +AC_SUBST(with_libnuma) + +if test "$with_libnuma" = yes ; then + AC_CHECK_LIB(numa, numa_available, [], [AC_MSG_ERROR([library 'libnuma' is required for NUMA awareness])]) +fi + # # XML # diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 51dd8ad6571..071b98e6c9a 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -25061,6 +25061,19 @@ SELECT * FROM pg_ls_dir('.') WITH ORDINALITY AS t(ls,n); + + + + pg_numa_available + + pg_numa_available () + boolean + + + Returns true if a NUMA support is available. + + + diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml index e076cefa3b9..9f56205a1d7 100644 --- a/doc/src/sgml/installation.sgml +++ b/doc/src/sgml/installation.sgml @@ -1156,6 +1156,16 @@ build-postgresql: + + + + + Build with libnuma support for basic NUMA support. + Only supported on platforms for which the libnuma library is implemented. + + + + @@ -2611,6 +2621,17 @@ ninja install + + + + + Build with libnuma support for basic NUMA support. + Only supported on platforms for which the libnuma library is implemented. + The default for this option is auto. + + + + diff --git a/meson.build b/meson.build index 13c13748e5d..19500ebdfb2 100644 --- a/meson.build +++ b/meson.build @@ -949,6 +949,25 @@ else endif +############################################################### +# Library: libnuma +############################################################### + +libnumaopt = get_option('libnuma') +if not libnumaopt.disabled() + libnuma = dependency('numa', required: libnumaopt) + if not libnuma.found() + libnuma = cc.find_library('numa', required: libnumaopt) + endif + if libnuma.found() + cdata.set('USE_LIBNUMA', 1) + else + libnuma = not_found_dep + endif +else + libnuma = not_found_dep +endif + ############################################################### # Library: libxml @@ -3168,6 +3187,7 @@ backend_both_deps += [ icu_i18n, ldap, libintl, + libnuma, libxml, lz4, pam, @@ -3823,6 +3843,7 @@ if meson.version().version_compare('>=0.57') 'icu': icu, 'ldap': ldap, 'libcurl': libcurl, + 'libnuma': libnuma, 'libxml': libxml, 'libxslt': libxslt, 'llvm': llvm, diff --git a/meson_options.txt b/meson_options.txt index 702c4517145..adaadb5faf1 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -106,6 +106,9 @@ option('libcurl', type : 'feature', value: 'auto', option('libedit_preferred', type: 'boolean', value: false, description: 'Prefer BSD Libedit over GNU Readline') +option('libnuma', type: 'feature', value: 'auto', + description: 'NUMA awareness support') + option('libxml', type: 'feature', value: 'auto', description: 'XML support') diff --git a/src/Makefile.global.in b/src/Makefile.global.in index 3b620bac5ac..0bd4b2d7d32 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -191,6 +191,7 @@ with_gssapi = @with_gssapi@ with_krb_srvnam = @with_krb_srvnam@ with_ldap = @with_ldap@ with_libcurl = @with_libcurl@ +with_libnuma = @with_libnuma@ with_libxml = @with_libxml@ with_libxslt = @with_libxslt@ with_llvm = @with_llvm@ diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c index 895a43fb39e..4c9c3cb320f 100644 --- a/src/backend/storage/ipc/shmem.c +++ b/src/backend/storage/ipc/shmem.c @@ -68,6 +68,7 @@ #include "fmgr.h" #include "funcapi.h" #include "miscadmin.h" +#include "port/pg_numa.h" #include "storage/lwlock.h" #include "storage/pg_shmem.h" #include "storage/shmem.h" @@ -568,3 +569,13 @@ pg_get_shmem_allocations(PG_FUNCTION_ARGS) return (Datum) 0; } + +/* SQL level function returning whether NUMA support was compiled in. */ +Datum +pg_numa_available(PG_FUNCTION_ARGS) +{ + if(pg_numa_init() == -1) + PG_RETURN_BOOL(false); + PG_RETURN_BOOL(true); +} + diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index ad25cbb39c5..dd34c79f521 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -563,7 +563,7 @@ static int ssl_renegotiation_limit; */ int huge_pages = HUGE_PAGES_TRY; int huge_page_size; -static int huge_pages_status = HUGE_PAGES_UNKNOWN; +int huge_pages_status = HUGE_PAGES_UNKNOWN; /* * These variables are all dummies that don't do anything, except in some diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 42e427f8fe8..38612d8ae12 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -8489,6 +8489,10 @@ proargnames => '{name,off,size,allocated_size}', prosrc => 'pg_get_shmem_allocations' }, +{ oid => '5102', descr => 'Is NUMA compilation available?', + proname => 'pg_numa_available', provolatile => 'v', prorettype => 'bool', + proargtypes => '', prosrc => 'pg_numa_available' }, + # memory context of local backend { oid => '2282', descr => 'information about all memory contexts of local backend', @@ -12477,3 +12481,4 @@ prosrc => 'gist_stratnum_common' }, ] + diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index db6454090d2..8894f800607 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -672,6 +672,9 @@ /* Define to 1 to build with libcurl support. (--with-libcurl) */ #undef USE_LIBCURL +/* Define to 1 to build with NUMA awareness support. (--with-libnuma) */ +#undef USE_LIBNUMA + /* Define to 1 to build with XML support. (--with-libxml) */ #undef USE_LIBXML diff --git a/src/include/port/pg_numa.h b/src/include/port/pg_numa.h new file mode 100644 index 00000000000..d3ebe8b5bd8 --- /dev/null +++ b/src/include/port/pg_numa.h @@ -0,0 +1,43 @@ +/*------------------------------------------------------------------------- + * + * pg_numa.h + * Basic NUMA portability routines + * + * + * Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/include/port/pg_numa.h + * + *------------------------------------------------------------------------- + */ +#ifndef PG_NUMA_H +#define PG_NUMA_H + +#include "c.h" + +extern PGDLLIMPORT int pg_numa_init(void); +extern PGDLLIMPORT int pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status); +extern PGDLLIMPORT int pg_numa_get_max_node(void); +extern PGDLLIMPORT Size pg_numa_get_pagesize(void); + +#ifdef USE_LIBNUMA + +/* + * This is required on Linux, before pg_numa_query_pages() as we + * need to page-fault before move_pages(2) syscall returns valid results. + */ +#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr) \ + ro_volatile_var = *(uint64 *)ptr + +extern void numa_warn(int num, char *fmt,...) pg_attribute_printf(2, 3); +extern void numa_error(char *where); + +#else + +#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr) \ + do {} while(0) + +#endif + +#endif /* PG_NUMA_H */ diff --git a/src/include/storage/pg_shmem.h b/src/include/storage/pg_shmem.h index b99ebc9e86f..5f7d4b83a60 100644 --- a/src/include/storage/pg_shmem.h +++ b/src/include/storage/pg_shmem.h @@ -45,6 +45,7 @@ typedef struct PGShmemHeader /* standard header for all Postgres shmem */ extern PGDLLIMPORT int shared_memory_type; extern PGDLLIMPORT int huge_pages; extern PGDLLIMPORT int huge_page_size; +extern PGDLLIMPORT int huge_pages_status; /* Possible values for huge_pages and huge_pages_status */ typedef enum diff --git a/src/makefiles/meson.build b/src/makefiles/meson.build index 60e13d50235..f786c191605 100644 --- a/src/makefiles/meson.build +++ b/src/makefiles/meson.build @@ -199,6 +199,8 @@ pgxs_empty = [ 'PTHREAD_CFLAGS', 'PTHREAD_LIBS', 'ICU_LIBS', + + 'LIBNUMA_CFLAGS', 'LIBNUMA_LIBS' ] if host_system == 'windows' and cc.get_argument_syntax() != 'msvc' @@ -230,6 +232,7 @@ pgxs_deps = { 'icu': icu, 'ldap': ldap, 'libcurl': libcurl, + 'libnuma': libnuma, 'libxml': libxml, 'libxslt': libxslt, 'llvm': llvm, diff --git a/src/port/Makefile b/src/port/Makefile index 4c224319512..a68a29d5414 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -44,6 +44,7 @@ OBJS = \ noblock.o \ path.o \ pg_bitutils.o \ + pg_numa.o \ pg_popcount_avx512.o \ pg_strong_random.o \ pgcheckdir.o \ diff --git a/src/port/meson.build b/src/port/meson.build index 7fcfa728d43..7ffbd4d88d2 100644 --- a/src/port/meson.build +++ b/src/port/meson.build @@ -7,6 +7,7 @@ pgport_sources = [ 'noblock.c', 'path.c', 'pg_bitutils.c', + 'pg_numa.c', 'pg_popcount_avx512.c', 'pg_strong_random.c', 'pgcheckdir.c', diff --git a/src/port/pg_numa.c b/src/port/pg_numa.c new file mode 100644 index 00000000000..b9348caaca9 --- /dev/null +++ b/src/port/pg_numa.c @@ -0,0 +1,151 @@ +/*------------------------------------------------------------------------- + * + * pg_numa.c + * Basic NUMA portability routines + * + * + * Copyright (c) 2025, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/port/pg_numa.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include + +#ifdef WIN32 +#include +#endif + +#include "port/pg_numa.h" +#include "storage/pg_shmem.h" + +/* + * At this point we provide support only for Linux thanks to libnuma, but in + * future support for other platforms e.g. Win32 or FreeBSD might be possible + * too. For Win32 NUMA APIs see + * https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support + */ +#ifdef USE_LIBNUMA + +#include +#include + +/* libnuma requires initialization as per numa(3) on Linux */ +int +pg_numa_init(void) +{ + int r = numa_available(); + + return r; +} + +int +pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status) +{ + return numa_move_pages(pid, count, pages, NULL, status, 0); +} + +int +pg_numa_get_max_node(void) +{ + return numa_max_node(); +} + +Size +pg_numa_get_pagesize(void) +{ + Size os_page_size = sysconf(_SC_PAGESIZE); + if (huge_pages_status == HUGE_PAGES_ON) + GetHugePageSize(&os_page_size, NULL); + return os_page_size; +} + +#ifndef FRONTEND +/* XXX: not tested */ +void +numa_warn(int num, char *fmt,...) +{ + va_list ap; + int olde = errno; + int needed; + StringInfoData msg; + + initStringInfo(&msg); + + va_start(ap, fmt); + needed = appendStringInfoVA(&msg, fmt, ap); + va_end(ap); + if (needed > 0) + { + enlargeStringInfo(&msg, needed); + va_start(ap, fmt); + appendStringInfoVA(&msg, fmt, ap); + va_end(ap); + } + + ereport(WARNING, + (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), + errmsg_internal("libnuma: WARNING: %s", msg.data))); + + pfree(msg.data); + + errno = olde; +} + +void +numa_error(char *where) +{ + int olde = errno; + + /* + * XXX: for now we issue just WARNING, but long-term that might depend on + * numa_set_strict() here. + */ + elog(WARNING, "libnuma: ERROR: %s", where); + errno = olde; +} +#endif /* FRONTEND */ + +#else + +/* Empty wrappers */ +int +pg_numa_init(void) +{ + /* We state that NUMA is not available */ + return -1; +} + +int +pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status) +{ + return 0; +} + +int +pg_numa_get_max_node(void) +{ + return 0; +} + +Size +pg_numa_get_pagesize(void) +{ +#ifndef WIN32 + Size os_page_size = sysconf(_SC_PAGESIZE); +#else + Size os_page_size; + SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); + os_page_size = sysinfo.dwPageSize; +#endif + if (huge_pages_status == HUGE_PAGES_ON) + GetHugePageSize(&os_page_size, NULL); + return os_page_size; +} + +#endif -- 2.39.5