From bf2ceb73fe8a48ece981a82ef103284e5dfc0a54 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Sun, 24 Oct 2021 21:48:26 +1300 Subject: [PATCH v2] Use futex-based semaphore emulation on macOS. Provide an implementation of the POSIX unnamed semaphore API when requested at build time with the following options, enabled by default: meson.build: sema_type = "unnamed_posix+emulation" configure template: PREFERRED_SEMAPHORES="UNNAMED_POSIX+EMULATION" With both build systems, it falls back to sysv if native futexes are not detected. Only macOS 14.4's new os_sync_wait_on_address() is supported initially, but this could probably be expanded to OpenBSD and NetBSD. Futexes might have more general applications and on more platforms in later work. Discussion: https://www.postgresql.org/message-id/CA%2BhUKGKAPR%3DNXxMTCmEomCUpq%2BmrPLDVyfVa4bKZqfDjwyzD7Q%40mail.gmail.com --- configure | 24 +++++++ configure.ac | 8 +++ meson.build | 14 ++++ src/backend/port/Makefile | 3 +- src/backend/port/meson.build | 3 + src/backend/port/posix_sema.c | 14 ++++ src/backend/port/sem_init.c | 92 ++++++++++++++++++++++++ src/include/pg_config.h.in | 7 ++ src/include/port/pg_futex.h | 120 +++++++++++++++++++++++++++++++ src/template/darwin | 15 +--- src/tools/pgindent/typedefs.list | 2 + 11 files changed, 289 insertions(+), 13 deletions(-) create mode 100644 src/backend/port/sem_init.c create mode 100644 src/include/port/pg_futex.h diff --git a/configure b/configure index 507a2437c33..aba6f39eb38 100755 --- a/configure +++ b/configure @@ -16179,6 +16179,21 @@ _ACEOF # We can't use AC_CHECK_FUNCS to detect these functions, because it # won't handle deployment target restrictions on macOS +ac_fn_c_check_decl "$LINENO" "os_sync_wait_on_address" "ac_cv_have_decl_os_sync_wait_on_address" "#include +" +if test "x$ac_cv_have_decl_os_sync_wait_on_address" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_OS_SYNC_WAIT_ON_ADDRESS $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + HAVE_NATIVE_FUTEX=1 +fi + ac_fn_c_check_decl "$LINENO" "preadv" "ac_cv_have_decl_preadv" "#include " if test "x$ac_cv_have_decl_preadv" = xyes; then : @@ -18419,6 +18434,15 @@ if test "$ac_res" != no; then : fi fi + if test x"$PREFERRED_SEMAPHORES" = x"UNNAMED_POSIX+EMULATION" ; then + # Need futex support for this + if test x"$HAVE_NATIVE_FUTEX" = x"1" ; then + +$as_echo "#define USE_SEMAPHORE_EMULATION 1" >>confdefs.h + + USE_UNNAMED_POSIX_SEMAPHORES=1 + fi + fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking which semaphore API to use" >&5 $as_echo_n "checking which semaphore API to use... " >&6; } if test x"$USE_NAMED_POSIX_SEMAPHORES" = x"1" ; then diff --git a/configure.ac b/configure.ac index 5f4548adc5c..71f7c77b381 100644 --- a/configure.ac +++ b/configure.ac @@ -1841,6 +1841,7 @@ AC_CHECK_DECLS([strlcat, strlcpy, strnlen, strsep, timingsafe_bcmp]) # We can't use AC_CHECK_FUNCS to detect these functions, because it # won't handle deployment target restrictions on macOS +AC_CHECK_DECLS([os_sync_wait_on_address], [HAVE_NATIVE_FUTEX=1], [], [#include ]) AC_CHECK_DECLS([preadv], [], [], [#include ]) AC_CHECK_DECLS([pwritev], [], [], [#include ]) AC_CHECK_DECLS([strchrnul], [], [], [#include ]) @@ -2272,6 +2273,13 @@ if test "$PORTNAME" != "win32"; then # Need sem_init for this AC_SEARCH_LIBS(sem_init, [rt pthread], [USE_UNNAMED_POSIX_SEMAPHORES=1]) fi + if test x"$PREFERRED_SEMAPHORES" = x"UNNAMED_POSIX+EMULATION" ; then + # Need futex support for this + if test x"$HAVE_NATIVE_FUTEX" = x"1" ; then + AC_DEFINE(USE_SEMAPHORE_EMULATION, 1, [Define to enable semaphore emulation with futexes.]) + USE_UNNAMED_POSIX_SEMAPHORES=1 + fi + fi AC_MSG_CHECKING([which semaphore API to use]) if test x"$USE_NAMED_POSIX_SEMAPHORES" = x"1" ; then AC_DEFINE(USE_NAMED_POSIX_SEMAPHORES, 1, [Define to select named POSIX semaphores.]) diff --git a/meson.build b/meson.build index 30e0edda3e7..3c360bb5d02 100644 --- a/meson.build +++ b/meson.build @@ -207,6 +207,7 @@ if host_system == 'cygwin' mod_link_with_dir = 'libdir' elif host_system == 'darwin' + sema_kind = 'unnamed_posix+emulation' # reverts to sysv if unsupported dlsuffix = '.dylib' library_path_var = 'DYLD_LIBRARY_PATH' @@ -2660,6 +2661,7 @@ decl_checks = [ # checking for library symbols wouldn't handle deployment target # restrictions on macOS decl_checks += [ + ['os_sync_wait_on_address', 'os/os_sync_wait_on_address.h'], ['preadv', 'sys/uio.h'], ['pwritev', 'sys/uio.h'], ['strchrnul', 'string.h'], @@ -2968,6 +2970,18 @@ if sema_kind == 'unnamed_posix' and \ sema_kind = 'sysv' endif +# for unnamed_posix+emulation, use unnamed_posix and enable emulation, or fall +# back to sysv if support isn't found +if sema_kind == 'unnamed_posix+emulation' + if cdata.get('HAVE_DECL_OS_SYNC_WAIT_ON_ADDRESS') != 0 + cdata.set('USE_SEMAPHORE_EMULATION', 1, + description: 'Define to 1 to enable emulation of unnamed POSIX semaphores') + sema_kind = 'unnamed_posix' + else + sema_kind = 'sysv' + endif +endif + cdata.set('USE_@0@_SHARED_MEMORY'.format(shmem_kind.to_upper()), 1) cdata.set('USE_@0@_SEMAPHORES'.format(sema_kind.to_upper()), 1) diff --git a/src/backend/port/Makefile b/src/backend/port/Makefile index 47338d99229..639049e6be7 100644 --- a/src/backend/port/Makefile +++ b/src/backend/port/Makefile @@ -25,7 +25,8 @@ OBJS = \ $(TAS) \ atomics.o \ pg_sema.o \ - pg_shmem.o + pg_shmem.o \ + sem_init.o ifeq ($(PORTNAME), win32) SUBDIRS += win32 diff --git a/src/backend/port/meson.build b/src/backend/port/meson.build index 09d54e01d13..a130549ac9a 100644 --- a/src/backend/port/meson.build +++ b/src/backend/port/meson.build @@ -4,6 +4,9 @@ backend_sources += files( 'atomics.c', ) +if cdata.has('USE_SEMAPHORE_EMULATION') + backend_sources += files('sem_init.c') +endif if cdata.has('USE_UNNAMED_POSIX_SEMAPHORES') or cdata.has('USE_NAMED_POSIX_SEMAPHORES') backend_sources += files('posix_sema.c') diff --git a/src/backend/port/posix_sema.c b/src/backend/port/posix_sema.c index 269c7460817..707b0615559 100644 --- a/src/backend/port/posix_sema.c +++ b/src/backend/port/posix_sema.c @@ -36,6 +36,20 @@ #include "storage/pg_sema.h" #include "storage/shmem.h" +#ifdef USE_SEMAPHORE_EMULATION +#include "port/pg_semaphore.h" + +/* + * Redirect to futex-based emulation of the POSIX unnamed API, for systems that + * can't do sem_init() with pshared=1. + */ +#define sem_t pg_sem_t +#define sem_init pg_sem_init +#define sem_destroy pg_sem_destroy +#define sem_post pg_sem_post +#define sem_wait pg_sem_wait +#define sem_trywait pg_sem_trywait +#endif /* see file header comment */ #if defined(USE_NAMED_POSIX_SEMAPHORES) && defined(EXEC_BACKEND) diff --git a/src/backend/port/sem_init.c b/src/backend/port/sem_init.c new file mode 100644 index 00000000000..9d0f499240b --- /dev/null +++ b/src/backend/port/sem_init.c @@ -0,0 +1,92 @@ +/*------------------------------------------------------------------------- + * + * sem_init.c + * + * Drop-in replacement for POSIX sem_init(), sem_post(), sem_wait() and + * sem_destroy(). These can be used on systems that don't provide unnamed + * semaphores with pshared=1, but do have shared memory futexes that we can use + * to build our own. + * + * pg_sem_t is a typedef for pg_futex_u32 in src/include/port/pg_semaphore.h. + * If we wanted to add a waiter count it could become a struct. + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/port/sem_init.c + * + *-------------------------------------------------------------------------/ + */ + +#include "postgres.h" + +#ifdef USE_SEMAPHORE_EMULATION + +#include "port/atomics.h" +#include "port/pg_futex.h" +#include "port/pg_semaphore.h" + +/* + * Initialize a semaphore with a given value. + */ +int +pg_sem_init(pg_sem_t *semaphore, int pshared, int value) +{ + pg_atomic_init_u32(&semaphore->value, value); + return 0; +} + +/* + * Destroy a semaphore. + */ +int +pg_sem_destroy(pg_sem_t *semaphore) +{ + return 0; +} + +/* + * Increment the semaphore and wake any waiters. + * + * We use semaphores in contexts where there is always a waiter, so we don't + * bother with a waiter count that could suppress useless system calls here. + */ +int +pg_sem_post(pg_sem_t *semaphore) +{ + pg_atomic_fetch_add_u32(&semaphore->value, 1); + return pg_futex_wake_u32(semaphore, INT_MAX); +} + +/* + * Decrement the semaphore if it is above zero, or fail with EAGAIN if is + * already zero. + */ +int +pg_sem_trywait(pg_sem_t *semaphore) +{ + uint32 value = pg_atomic_read_u32(&semaphore->value); + + while (value > 0) + if (pg_atomic_compare_exchange_u32(&semaphore->value, &value, value - 1)) + return 0; + + errno = EAGAIN; + return -1; +} + +/* + * Decrement the semaphore, waiting first for it to rise above zero if + * necessary. + */ +int +pg_sem_wait(pg_sem_t *semaphore) +{ + while (pg_sem_trywait(semaphore) < 0) + if (pg_futex_wait_u32(semaphore, 0, NULL) < 0) + return -1; + + return 0; +} + +#endif /* HAVE_EMULATED_SEMAPHORES */ diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index c4dc5d72bdb..905499a2933 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -95,6 +95,10 @@ don't. */ #undef HAVE_DECL_MEMSET_S +/* Define to 1 if you have the declaration of `os_sync_wait_on_address', and + to 0 if you don't. */ +#undef HAVE_DECL_OS_SYNC_WAIT_ON_ADDRESS + /* Define to 1 if you have the declaration of `posix_fadvise', and to 0 if you don't. */ #undef HAVE_DECL_POSIX_FADVISE @@ -727,6 +731,9 @@ /* Define to 1 to build with PAM support. (--with-pam) */ #undef USE_PAM +/* Define to enable semaphore emulation with futexes. */ +#undef USE_SEMAPHORE_EMULATION + /* Define to 1 to use software CRC-32C implementation (slicing-by-8). */ #undef USE_SLICING_BY_8_CRC32C diff --git a/src/include/port/pg_futex.h b/src/include/port/pg_futex.h new file mode 100644 index 00000000000..6fae7c7e810 --- /dev/null +++ b/src/include/port/pg_futex.h @@ -0,0 +1,120 @@ +/*------------------------------------------------------------------------- + * + * pg_futex.h + * + * Futex abstraction. Initially supported only on macOS, but it could be + * extended to other systems. A wrapper struct and initialization function are + * used to reserve the option of providing a fallback implementation. + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/port/pg_futex.h + * + *-------------------------------------------------------------------------/ + */ +#ifndef PG_FUTEX_H +#define PG_FUTEX_H + +#ifdef FRONTEND +#error "not for use in frontend code" +#endif + +#if HAVE_DECL_OS_SYNC_WAIT_ON_ADDRESS +#include +#define HAVE_PG_FUTEX_U32 +#endif + +#ifdef HAVE_PG_FUTEX_U32 + +#include "port/atomics.h" + +#include + +typedef struct pg_futex_u32 +{ + pg_atomic_uint32 value; +} pg_futex_u32; + +static inline void +pg_futex_init_u32(pg_futex_u32 *futex, uint32 value) +{ + pg_atomic_init_u32(&futex->value, value); +} + +/* + * Return 0 immediately if futex->value is not equal to the expected value, and + * otherwise wait to be woken up explicitly by a thread that has changed the + * value, and then return 0. Return -1 and set errno on error. ETIMEDOUT + * indicates that the optional timeout has been reached. + * + * Note for future implementations: not all systems distinguish between value + * check failure and being woken up, so this function returns 0 in both cases. + */ +static inline int +pg_futex_wait_u32(pg_futex_u32 *futex, uint32 expected, struct timespec *timeout) +{ + /* + * Our atomic 32 bit integers are just wrapped integers, so it is safe for + * the kernel to read them when it performs its value check. + */ + StaticAssertStmt(sizeof(futex->value) == sizeof(expected), "unexpected size"); + +#if HAVE_DECL_OS_SYNC_WAIT_ON_ADDRESS + if (timeout) + { + uint64 timeout_ns = timeout->tv_sec * 1000000000 + timeout->tv_nsec; + + if (os_sync_wait_on_address_with_timeout(&futex->value, + expected, + sizeof(futex->value), + OS_SYNC_WAIT_ON_ADDRESS_SHARED, + OS_CLOCK_MACH_ABSOLUTE_TIME, + timeout_ns) >= 0) + return 0; + } + else + { + if (os_sync_wait_on_address(&futex->value, + expected, + sizeof(futex->value), + OS_SYNC_WAIT_ON_ADDRESS_SHARED) >= 0) + return 0; + } + return -1; +#else +#error "futexes not implemented for this platform" +#endif +} + +/* + * Wake at least a given number of waiters. + * + * Note for future implementations: not all systems report how many were woken, + * so this function hides that by returning 0 for success. + */ +static inline int +pg_futex_wake_u32(pg_futex_u32 *futex, int nwaiters) +{ +#if HAVE_DECL_OS_SYNC_WAIT_ON_ADDRESS + int rc; + + if (nwaiters == 1) + rc = os_sync_wake_by_address_any(&futex->value, + sizeof(futex->value), + OS_SYNC_WAKE_BY_ADDRESS_SHARED); + else + rc = os_sync_wake_by_address_all(&futex->value, + sizeof(futex->value), + OS_SYNC_WAKE_BY_ADDRESS_SHARED); + if (rc < 0 && errno == ENOENT) + rc = 0; /* no waiters */ + return rc; +#else +#error "futexes not implemented for this platform" +#endif +} + +#endif /* HAVE_PG_FUTEX_U32 */ + +#endif /* PG_FUTEX_H */ diff --git a/src/template/darwin b/src/template/darwin index e8eb9390687..96aa464e424 100644 --- a/src/template/darwin +++ b/src/template/darwin @@ -14,17 +14,8 @@ fi # Extra CFLAGS for code that will go into a shared library CFLAGS_SL="" -# Select appropriate semaphore support. Darwin 6.0 (macOS 10.2) and up -# support System V semaphores; before that we have to use named POSIX -# semaphores, which are less good for our purposes because they eat a -# file descriptor per backend per max_connection slot. -case $host_os in - darwin[015].*) - USE_NAMED_POSIX_SEMAPHORES=1 - ;; - *) - USE_SYSV_SEMAPHORES=1 - ;; -esac +# If targeting macOS 14.4 and above, use futexes to emulate unnamed semaphores, +# and otherwise fall back to System V semaphores. +PREFERRED_SEMAPHORES="UNNAMED_POSIX+EMULATION" DLSUFFIX=".dylib" diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index e6f2e93b2d6..e8663470d51 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3871,6 +3871,7 @@ pg_enc2name pg_encname pg_fe_sasl_mech pg_funcptr_t +pg_futex_u32 pg_gssinfo pg_hmac_ctx pg_hmac_errno @@ -3885,6 +3886,7 @@ pg_regex_t pg_regmatch_t pg_regoff_t pg_saslprep_rc +pg_sem_t pg_sha1_ctx pg_sha224_ctx pg_sha256_ctx -- 2.39.5 (Apple Git-154)