From ab504665cc51814bbe0d8757d35e331fd9b6a41a Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Wed, 4 Jun 2025 12:19:53 +1200 Subject: [PATCH] Load optional collation version from glibc LOCPATH. One technique for dealing with glibc locale definition changes across Linux distribution upgrades or migrations is to compile the locale definitions from the source system with the target system's localedef tool, and then point to the newly compiled locales with the LOCPATH environment variable, with certain caveats. Unfortunately this breaks the rather simple-minded approach in commit d5ac14f9, which reports the new system's glibc library version for lack of anything better. Spurious warnings about mismatched collation are reported, defeating the goal of the LOCPATH-based upgrade/migration technique. Since neither POSIX nor glibc defines a way for locales to report their version (cf FreeBSD querylocale(), ICU ucol_getVersion(), Windows GetNLSVersionEx()), invent a way for a user of the LOCPATH technique to supply user-defined version information. This can be used to store the version string of the source system, or invent a new convention for labeling collation versions. The version is read from the first text file found in this list: * $LOCPATH//LC_COLLATE.version * $LOCPATH//version * $LOCPATH/LC_COLLATE.version * $LOCPATH/version Only if none of these files are found will the glibc library's reported version be used, as before. There is no change in behaviour for most users, since LOCPATH is not normally defined. Non-glibc builds are not affected. glibc itself has no knowledge of these files, which are a new PostgreSQL invention that relies only on knowledge of how glibc's compiled locales are laid out, including "normalization" of the codeset part of the part. Back-patch to 13, where d5ac14f9 shipped. --- src/backend/main/main.c | 2 + src/backend/utils/adt/pg_locale_libc.c | 126 ++++++++++++++++++++++++- src/include/utils/pg_locale.h | 1 + 3 files changed, 127 insertions(+), 2 deletions(-) diff --git a/src/backend/main/main.c b/src/backend/main/main.c index 7d63cf94a6b..7dd7060c93a 100644 --- a/src/backend/main/main.c +++ b/src/backend/main/main.c @@ -113,6 +113,8 @@ main(int argc, char *argv[]) MyProcPid = getpid(); MemoryContextInit(); + pg_locale_init_libc(); + /* * Set reference point for stack-depth checking. (There's no point in * enabling this before error reporting works.) diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c index 199857e22db..c6403dda2c8 100644 --- a/src/backend/utils/adt/pg_locale_libc.c +++ b/src/backend/utils/adt/pg_locale_libc.c @@ -19,6 +19,7 @@ #include "catalog/pg_collation.h" #include "mb/pg_wchar.h" #include "miscadmin.h" +#include "storage/fd.h" #include "utils/builtins.h" #include "utils/formatting.h" #include "utils/memutils.h" @@ -33,6 +34,10 @@ #include #endif +#ifdef __GLIBC__ +static char *glibc_locpath = NULL; +#endif + /* * Size of stack buffer to use for string transformations, used to avoid heap * allocations in typical cases. This should be large enough that most strings @@ -119,6 +124,23 @@ static const struct collate_methods collate_methods_libc_win32_utf8 = { }; #endif +void +pg_locale_init_libc(void) +{ +#ifdef __GLIBC__ + /* https://www.gnu.org/software/libc/manual/html_node/Locale-Names.html */ + const char *locpath = getenv("LOCPATH"); + + /* + * Capture a copy of LOCPATH so that get_collation_actual_version_libc() + * can find optional user-supplied custom version strings when using a + * non-default tree of locale files. + */ + if (locpath) + glibc_locpath = MemoryContextStrdup(TopMemoryContext, locpath); +#endif +} + size_t strlower_libc(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale) @@ -655,6 +677,43 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen, return result; } +#if defined(__GLIBC__) +/* + * Read version string from user-supplied file into a palloc'd string with any + * trailing whitespace removed. Return NULL if the file doesn't exist. + */ +static char * +read_collversion_from_file(const char *path) +{ + char buffer[TEXTBUFLEN]; + ssize_t size; + int fd; + + fd = OpenTransientFile(path, O_RDONLY); + if (fd < 0) + { + if (errno == ENOENT) + return NULL; + ereport(ERROR, errmsg("could not open file \"%s\": %m", path)); + } + size = read(fd, buffer, sizeof(buffer) - 1); + if (size < 0) + { + int save_errno = errno; + + CloseTransientFile(fd); + errno = save_errno; + ereport(ERROR, errmsg("could not read from file \"%s\": %m", path)); + } + while (size > 0 && isspace((unsigned char) buffer[size - 1])) + size--; + buffer[size] = 0; + CloseTransientFile(fd); + + return pstrdup(buffer); +} +#endif + char * get_collation_actual_version_libc(const char *collcollate) { @@ -665,8 +724,71 @@ get_collation_actual_version_libc(const char *collcollate) pg_strcasecmp("POSIX", collcollate) != 0) { #if defined(__GLIBC__) - /* Use the glibc version because we don't have anything better. */ - collversion = pstrdup(gnu_get_libc_version()); + + /* + * If the user defined the environment variable LOCPATH (a glibc + * extension) to override the search location for locale definitions, + * perhaps pointing to definitions compiled from another distribution + * or version of glibc as part of an upgrade strategy, provide a way + * for the reported version string to be loaded from + * $LOCPATH/{collcollate}/LC_COLLATE.version, ../version, or the same + * names at top level in $LOCPATH. + * + * This convention is a PostgreSQL invention not known to glibc. + * Neither glibc nor POSIX provides a way to store or query a version + * string inside locale components themselves. + */ + if (glibc_locpath) + { + char collcollate_dir[LOCALE_NAME_BUFLEN]; + char pathname[MAXPGPATH]; + char *p; + + /* lower-case and digits only in codeset part, .UTF-8 -> .utf8 */ + snprintf(collcollate_dir, sizeof(collcollate_dir), "%s", + collcollate); + p = strchr(collcollate_dir, '.'); + if (p) + { + ++p; + while (*p) + { + if (!isalnum(*p)) + { + memmove(p, p + 1, strlen(p)); /* counts terminator */ + continue; + } + *p = tolower((unsigned char) *p); + ++p; + } + } + + snprintf(pathname, sizeof(pathname), "%s/%s/LC_COLLATE.version", + glibc_locpath, collcollate_dir); + collversion = read_collversion_from_file(pathname); + if (collversion == NULL) + { + snprintf(pathname, sizeof(pathname), "%s/%s/version", + glibc_locpath, collcollate_dir); + collversion = read_collversion_from_file(pathname); + } + if (collversion == NULL) + { + snprintf(pathname, sizeof(pathname), "%s/LC_COLLATE.version", + glibc_locpath); + collversion = read_collversion_from_file(pathname); + } + if (collversion == NULL) + { + snprintf(pathname, sizeof(pathname), "%s/version", + glibc_locpath); + collversion = read_collversion_from_file(pathname); + } + } + + /* Use the glibc version if we don't have anything better. */ + if (collversion == NULL) + collversion = pstrdup(gnu_get_libc_version()); #elif defined(LC_VERSION_MASK) locale_t loc; diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 7b8cbf58d2c..820937ef062 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -121,6 +121,7 @@ struct pg_locale_struct } info; }; +extern void pg_locale_init_libc(void); extern void init_database_collation(void); extern pg_locale_t pg_newlocale_from_collation(Oid collid); -- 2.39.5