From 7f3218da7d1206664522d41ff247b1e96815a757 Mon Sep 17 00:00:00 2001 From: "Chao Li (Evan)" Date: Tue, 25 Nov 2025 13:40:13 +0800 Subject: [PATCH v2 2/2] Make libc-based case-folding functions match unicode_strlower()'s behavior MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The comments for unicode_strlower() state that the destination buffer is filled with as many characters as will fit, truncating the result if needed; the string is NUL-terminated only if there is sufficient space. In contrast, the libc variants (strlower_libc_sb(), strupper_libc_sb(), and strtitle_libc_sb()) previously refused to copy anything unless the destination was large enough for the full result plus the terminating NULL. This patch updates all three libc-based functions to follow the same “best effort copy + optional NULL terminator” model as unicode_strlower(). This ensures consistent behavior across all case-folding routines, regardless of whether a Unicode locale is in use. No existing regression tests required changes, and “make check” passes. Author: Chao Li Discussion: https://postgr.es/m/CAEoWx2mW0P8CByavV58zm3=eb2MQHaKOcDEF5B2UJYRyC2c3ig@mail.gmail.com --- src/backend/utils/adt/pg_locale_libc.c | 159 +++++++++++++------------ 1 file changed, 81 insertions(+), 78 deletions(-) diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c index abf27283a33..c5ff2a0b681 100644 --- a/src/backend/utils/adt/pg_locale_libc.c +++ b/src/backend/utils/adt/pg_locale_libc.c @@ -426,36 +426,37 @@ static size_t strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { + ssize_t reallen; + locale_t loc = locale->lt; + if (srclen < 0) srclen = strlen(src); - if (srclen + 1 <= destsize) - { - locale_t loc = locale->lt; - char *p; + reallen = srclen + 1; + if (reallen > destsize) + reallen = destsize; - memcpy(dest, src, srclen); - dest[srclen] = '\0'; + memcpy(dest, src, reallen); + if (reallen < destsize) + dest[reallen] = '\0'; - /* - * Note: we assume that tolower_l() will not be so broken as to need - * an isupper_l() guard test. When using the default collation, we - * apply the traditional Postgres behavior that forces ASCII-style - * treatment of I/i, but in non-default collations you get exactly - * what the collation says. - */ - for (p = dest; *p; p++) + /* + * Note: we assume that tolower_l() will not be so broken as to need an + * isupper_l() guard test. When using the default collation, we apply the + * traditional Postgres behavior that forces ASCII-style treatment of I/i, + * but in non-default collations you get exactly what the collation says. + */ + for (char *p = dest; *p; p++) + { + if (locale->is_default) { - if (locale->is_default) - { - if (*p >= 'A' && *p <= 'Z') - *p += 'a' - 'A'; - else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc)) - *p = tolower_l((unsigned char) *p, loc); - } - else + if (*p >= 'A' && *p <= 'Z') + *p += 'a' - 'A'; + else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc)) *p = tolower_l((unsigned char) *p, loc); } + else + *p = tolower_l((unsigned char) *p, loc); } return srclen; @@ -513,53 +514,54 @@ static size_t strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { + ssize_t reallen; + locale_t loc = locale->lt; + int wasalnum = false; + if (srclen < 0) srclen = strlen(src); - if (srclen + 1 <= destsize) - { - locale_t loc = locale->lt; - int wasalnum = false; - char *p; + reallen = srclen + 1; + if (reallen > destsize) + reallen = destsize; - memcpy(dest, src, srclen); - dest[srclen] = '\0'; + memcpy(dest, src, reallen); + if (reallen < destsize) + dest[reallen] = '\0'; - /* - * Note: we assume that toupper_l()/tolower_l() will not be so broken - * as to need guard tests. When using the default collation, we apply - * the traditional Postgres behavior that forces ASCII-style treatment - * of I/i, but in non-default collations you get exactly what the - * collation says. - */ - for (p = dest; *p; p++) + /* + * Note: we assume that toupper_l()/tolower_l() will not be so broken as + * to need guard tests. When using the default collation, we apply the + * traditional Postgres behavior that forces ASCII-style treatment of I/i, + * but in non-default collations you get exactly what the collation says. + */ + for (char *p = dest; *p; p++) + { + if (locale->is_default) { - if (locale->is_default) + if (wasalnum) { - if (wasalnum) - { - if (*p >= 'A' && *p <= 'Z') - *p += 'a' - 'A'; - else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc)) - *p = tolower_l((unsigned char) *p, loc); - } - else - { - if (*p >= 'a' && *p <= 'z') - *p -= 'a' - 'A'; - else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc)) - *p = toupper_l((unsigned char) *p, loc); - } + if (*p >= 'A' && *p <= 'Z') + *p += 'a' - 'A'; + else if (IS_HIGHBIT_SET(*p) && isupper_l(*p, loc)) + *p = tolower_l((unsigned char) *p, loc); } else { - if (wasalnum) - *p = tolower_l((unsigned char) *p, loc); - else + if (*p >= 'a' && *p <= 'z') + *p -= 'a' - 'A'; + else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc)) *p = toupper_l((unsigned char) *p, loc); } - wasalnum = isalnum_l((unsigned char) *p, loc); } + else + { + if (wasalnum) + *p = tolower_l((unsigned char) *p, loc); + else + *p = toupper_l((unsigned char) *p, loc); + } + wasalnum = isalnum_l((unsigned char) *p, loc); } return srclen; @@ -624,36 +626,37 @@ static size_t strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { + ssize_t reallen; + locale_t loc = locale->lt; + if (srclen < 0) srclen = strlen(src); - if (srclen + 1 <= destsize) - { - locale_t loc = locale->lt; - char *p; + reallen = srclen + 1; + if (reallen > destsize) + reallen = destsize; - memcpy(dest, src, srclen); - dest[srclen] = '\0'; + memcpy(dest, src, reallen); + if (reallen < destsize) + dest[reallen] = '\0'; - /* - * Note: we assume that toupper_l() will not be so broken as to need - * an islower_l() guard test. When using the default collation, we - * apply the traditional Postgres behavior that forces ASCII-style - * treatment of I/i, but in non-default collations you get exactly - * what the collation says. - */ - for (p = dest; *p; p++) + /* + * Note: we assume that toupper_l() will not be so broken as to need an + * islower_l() guard test. When using the default collation, we apply the + * traditional Postgres behavior that forces ASCII-style treatment of I/i, + * but in non-default collations you get exactly what the collation says. + */ + for (char *p = dest; *p; p++) + { + if (locale->is_default) { - if (locale->is_default) - { - if (*p >= 'a' && *p <= 'z') - *p -= 'a' - 'A'; - else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc)) - *p = toupper_l((unsigned char) *p, loc); - } - else + if (*p >= 'a' && *p <= 'z') + *p -= 'a' - 'A'; + else if (IS_HIGHBIT_SET(*p) && islower_l(*p, loc)) *p = toupper_l((unsigned char) *p, loc); } + else + *p = toupper_l((unsigned char) *p, loc); } return srclen; -- 2.39.5 (Apple Git-154)