From 748134c2093412042f6db425c9f011aebf0c82d7 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 5 May 2026 10:55:06 -0700 Subject: [PATCH v1] Don't accept length of -1 in pg_locale.h APIs. Reverts ac30021356. Per discussion, that commit interfered with useful tooling, and was not worth the special cases. Suggested-by: Andres Freund Discussion: https://postgr.es/m/s32n3tm2mjh247f3xkkxkdk7cf77hglbr3ia3hrsdjylajou7y@nlldpag3tjd5 --- src/backend/utils/adt/pg_locale.c | 42 +++-- src/backend/utils/adt/pg_locale_builtin.c | 13 +- src/backend/utils/adt/pg_locale_icu.c | 187 ++++++++++++++++------ src/backend/utils/adt/pg_locale_libc.c | 159 +++++++++--------- src/common/unicode/case_test.c | 69 ++------ src/common/unicode_case.c | 26 ++- src/include/common/unicode_case.h | 8 +- src/include/utils/pg_locale.h | 45 +++--- 8 files changed, 296 insertions(+), 253 deletions(-) diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 6c5c1019e1e..3f1fb9fafd9 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1262,11 +1262,10 @@ get_collation_actual_version(char collprovider, const char *collcollate) /* lowercasing/casefolding in C locale */ static size_t -strlower_c(char *dst, size_t dstsize, const char *src, ssize_t srclen) +strlower_c(char *dst, size_t dstsize, const char *src, size_t srclen) { int i; - srclen = (srclen >= 0) ? srclen : strlen(src); for (i = 0; i < srclen && i < dstsize; i++) dst[i] = pg_ascii_tolower(src[i]); if (i < dstsize) @@ -1276,12 +1275,11 @@ strlower_c(char *dst, size_t dstsize, const char *src, ssize_t srclen) /* titlecasing in C locale */ static size_t -strtitle_c(char *dst, size_t dstsize, const char *src, ssize_t srclen) +strtitle_c(char *dst, size_t dstsize, const char *src, size_t srclen) { bool wasalnum = false; int i; - srclen = (srclen >= 0) ? srclen : strlen(src); for (i = 0; i < srclen && i < dstsize; i++) { char c = src[i]; @@ -1302,11 +1300,10 @@ strtitle_c(char *dst, size_t dstsize, const char *src, ssize_t srclen) /* uppercasing in C locale */ static size_t -strupper_c(char *dst, size_t dstsize, const char *src, ssize_t srclen) +strupper_c(char *dst, size_t dstsize, const char *src, size_t srclen) { int i; - srclen = (srclen >= 0) ? srclen : strlen(src); for (i = 0; i < srclen && i < dstsize; i++) dst[i] = pg_ascii_toupper(src[i]); if (i < dstsize) @@ -1315,7 +1312,7 @@ strupper_c(char *dst, size_t dstsize, const char *src, ssize_t srclen) } size_t -pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, +pg_strlower(char *dst, size_t dstsize, const char *src, size_t srclen, pg_locale_t locale) { if (locale->ctype == NULL) @@ -1325,7 +1322,7 @@ pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, } size_t -pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen, +pg_strtitle(char *dst, size_t dstsize, const char *src, size_t srclen, pg_locale_t locale) { if (locale->ctype == NULL) @@ -1335,7 +1332,7 @@ pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen, } size_t -pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen, +pg_strupper(char *dst, size_t dstsize, const char *src, size_t srclen, pg_locale_t locale) { if (locale->ctype == NULL) @@ -1345,7 +1342,7 @@ pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen, } size_t -pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen, +pg_strfold(char *dst, size_t dstsize, const char *src, size_t srclen, pg_locale_t locale) { /* in the C locale, casefolding is the same as lowercasing */ @@ -1363,7 +1360,7 @@ pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen, * pg_strfold(..., default_locale)? */ size_t -pg_downcase_ident(char *dst, size_t dstsize, const char *src, ssize_t srclen) +pg_downcase_ident(char *dst, size_t dstsize, const char *src, size_t srclen) { pg_locale_t locale = default_locale; @@ -1383,7 +1380,7 @@ pg_downcase_ident(char *dst, size_t dstsize, const char *src, ssize_t srclen) int pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale) { - return locale->collate->strncoll(arg1, -1, arg2, -1, locale); + return locale->collate->strcoll(arg1, arg2, locale); } /* @@ -1393,15 +1390,14 @@ pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale) * appropriate for the given locale, platform, and database encoding. If the * locale is not specified, use the database collation. * - * The input strings must be encoded in the database encoding. If an input - * string is NUL-terminated, its length may be specified as -1. + * The input strings must be encoded in the database encoding. * * The caller is responsible for breaking ties if the collation is * deterministic; this maintains consistency with pg_strnxfrm(), which cannot * easily account for deterministic collations. */ int -pg_strncoll(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, +pg_strncoll(const char *arg1, size_t len1, const char *arg2, size_t len2, pg_locale_t locale) { return locale->collate->strncoll(arg1, len1, arg2, len2, locale); @@ -1433,7 +1429,7 @@ pg_strxfrm_enabled(pg_locale_t locale) size_t pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale) { - return locale->collate->strnxfrm(dest, destsize, src, -1, locale); + return locale->collate->strxfrm(dest, destsize, src, locale); } /* @@ -1443,9 +1439,8 @@ pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale) * ordinary strcmp() on transformed strings is equivalent to pg_strcoll() on * untransformed strings. * - * The input string must be encoded in the database encoding. If the input - * string is NUL-terminated, its length may be specified as -1. If 'destsize' - * is zero, 'dest' may be NULL. + * The input string must be encoded in the database encoding. If 'destsize' is + * zero, 'dest' may be NULL. * * Not all providers support pg_strnxfrm() safely. The caller should check * pg_strxfrm_enabled() first, otherwise this function may return wrong @@ -1456,7 +1451,7 @@ pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale) * 'destsize' or greater, the resulting contents of 'dest' are undefined. */ size_t -pg_strnxfrm(char *dest, size_t destsize, const char *src, ssize_t srclen, +pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale) { return locale->collate->strnxfrm(dest, destsize, src, srclen, locale); @@ -1481,7 +1476,7 @@ size_t pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, pg_locale_t locale) { - return locale->collate->strnxfrm_prefix(dest, destsize, src, -1, locale); + return locale->collate->strxfrm_prefix(dest, destsize, src, locale); } /* @@ -1491,8 +1486,7 @@ pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, * memcmp() on the byte sequence is equivalent to pg_strncoll() on * untransformed strings. The result is not nul-terminated. * - * The input string must be encoded in the database encoding. If the input - * string is NUL-terminated, its length may be specified as -1. + * The input string must be encoded in the database encoding. * * Not all providers support pg_strnxfrm_prefix() safely. The caller should * check pg_strxfrm_prefix_enabled() first, otherwise this function may return @@ -1504,7 +1498,7 @@ pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, */ size_t pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale) + size_t srclen, pg_locale_t locale) { return locale->collate->strnxfrm_prefix(dest, destsize, src, srclen, locale); } diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c index 794aa37df76..01d4f55b07e 100644 --- a/src/backend/utils/adt/pg_locale_builtin.c +++ b/src/backend/utils/adt/pg_locale_builtin.c @@ -60,8 +60,7 @@ initcap_wbnext(void *state) { struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state; - while (wbstate->offset < wbstate->len && - wbstate->str[wbstate->offset] != '\0') + while (wbstate->offset < wbstate->len) { char32_t u = utf8_to_unicode((const unsigned char *) wbstate->str + wbstate->offset); @@ -84,7 +83,7 @@ initcap_wbnext(void *state) } static size_t -strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, +strlower_builtin(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale) { return unicode_strlower(dest, destsize, src, srclen, @@ -92,12 +91,12 @@ strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, } static size_t -strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, +strtitle_builtin(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale) { struct WordBoundaryState wbstate = { .str = src, - .len = (srclen < 0) ? strlen(src) : srclen, + .len = srclen, .offset = 0, .posix = !locale->builtin.casemap_full, .init = false, @@ -110,7 +109,7 @@ strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, } static size_t -strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, +strupper_builtin(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale) { return unicode_strupper(dest, destsize, src, srclen, @@ -118,7 +117,7 @@ strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, } static size_t -strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, +strfold_builtin(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale) { return unicode_strfold(dest, destsize, src, srclen, diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c index a4a4e82eb9e..99b1f266c5a 100644 --- a/src/backend/utils/adt/pg_locale_icu.c +++ b/src/backend/utils/adt/pg_locale_icu.c @@ -57,29 +57,33 @@ extern UCollator *pg_ucol_open(const char *loc_str); static UCaseMap *pg_ucasemap_open(const char *loc_str); static size_t strlower_icu(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); + size_t srclen, pg_locale_t locale); static size_t strtitle_icu(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); + size_t srclen, pg_locale_t locale); static size_t strupper_icu(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); + size_t srclen, pg_locale_t locale); static size_t strfold_icu(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); + size_t srclen, pg_locale_t locale); static size_t strlower_icu_utf8(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); + size_t srclen, pg_locale_t locale); static size_t strtitle_icu_utf8(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); + size_t srclen, pg_locale_t locale); static size_t strupper_icu_utf8(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); + size_t srclen, pg_locale_t locale); static size_t strfold_icu_utf8(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); + size_t srclen, pg_locale_t locale); static size_t downcase_ident_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -static int strncoll_icu(const char *arg1, ssize_t len1, - const char *arg2, ssize_t len2, + size_t srclen, pg_locale_t locale); +static int strncoll_icu(const char *arg1, size_t len1, + const char *arg2, size_t len2, pg_locale_t locale); +static int strcoll_icu(const char *arg1, const char *arg2, + pg_locale_t locale); static size_t strnxfrm_icu(char *dest, size_t destsize, - const char *src, ssize_t srclen, + const char *src, size_t srclen, pg_locale_t locale); +static size_t strxfrm_icu(char *dest, size_t destsize, const char *src, + pg_locale_t locale); extern char *get_collation_actual_version_icu(const char *collcollate); typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity, @@ -96,20 +100,24 @@ static UConverter *icu_converter = NULL; static UCollator *make_icu_collator(const char *iculocstr, const char *icurules); -static int strncoll_icu(const char *arg1, ssize_t len1, - const char *arg2, ssize_t len2, - pg_locale_t locale); static size_t strnxfrm_prefix_icu(char *dest, size_t destsize, - const char *src, ssize_t srclen, + const char *src, size_t srclen, pg_locale_t locale); +static size_t strxfrm_prefix_icu(char *dest, size_t destsize, const char *src, + pg_locale_t locale); #ifdef HAVE_UCOL_STRCOLLUTF8 -static int strncoll_icu_utf8(const char *arg1, ssize_t len1, - const char *arg2, ssize_t len2, +static int strncoll_icu_utf8(const char *arg1, size_t len1, + const char *arg2, size_t len2, pg_locale_t locale); +static int strcoll_icu_utf8(const char *arg1, + const char *arg2, + pg_locale_t locale); #endif static size_t strnxfrm_prefix_icu_utf8(char *dest, size_t destsize, - const char *src, ssize_t srclen, + const char *src, size_t srclen, pg_locale_t locale); +static size_t strxfrm_prefix_icu_utf8(char *dest, size_t destsize, const char *src, + pg_locale_t locale); static void init_icu_converter(void); static size_t uchar_length(UConverter *converter, const char *str, int32_t len); @@ -124,7 +132,7 @@ static void icu_set_collation_attributes(UCollator *collator, const char *loc, UErrorCode *status); static int32_t icu_convert_case(ICU_Convert_Func func, char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); + size_t srclen, pg_locale_t locale); static int32_t u_strToTitle_default_BI(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, @@ -154,19 +162,26 @@ tolower_icu(pg_wchar wc, pg_locale_t locale) static const struct collate_methods collate_methods_icu = { .strncoll = strncoll_icu, + .strcoll = strcoll_icu, .strnxfrm = strnxfrm_icu, + .strxfrm = strxfrm_icu, .strnxfrm_prefix = strnxfrm_prefix_icu, + .strxfrm_prefix = strxfrm_prefix_icu, .strxfrm_is_safe = true, }; static const struct collate_methods collate_methods_icu_utf8 = { #ifdef HAVE_UCOL_STRCOLLUTF8 .strncoll = strncoll_icu_utf8, + .strcoll = strcoll_icu_utf8, #else .strncoll = strncoll_icu, + .strcoll = strcoll_icu, #endif .strnxfrm = strnxfrm_icu, + .strxfrm = strxfrm_icu, .strnxfrm_prefix = strnxfrm_prefix_icu_utf8, + .strxfrm_prefix = strxfrm_prefix_icu_utf8, .strxfrm_is_safe = true, }; @@ -604,35 +619,35 @@ make_icu_collator(const char *iculocstr, const char *icurules) } static size_t -strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, +strlower_icu(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale) { return icu_convert_case(u_strToLower, dest, destsize, src, srclen, locale); } static size_t -strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, +strtitle_icu(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale) { return icu_convert_case(u_strToTitle_default_BI, dest, destsize, src, srclen, locale); } static size_t -strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, +strupper_icu(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale) { return icu_convert_case(u_strToUpper, dest, destsize, src, srclen, locale); } static size_t -strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, +strfold_icu(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale) { return icu_convert_case(u_strFoldCase_default, dest, destsize, src, srclen, locale); } static size_t -strlower_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen, +strlower_icu_utf8(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale) { UErrorCode status = U_ZERO_ERROR; @@ -646,7 +661,7 @@ strlower_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen, } static size_t -strtitle_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen, +strtitle_icu_utf8(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale) { UErrorCode status = U_ZERO_ERROR; @@ -660,7 +675,7 @@ strtitle_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen, } static size_t -strupper_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen, +strupper_icu_utf8(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale) { UErrorCode status = U_ZERO_ERROR; @@ -674,7 +689,7 @@ strupper_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen, } static size_t -strfold_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen, +strfold_icu_utf8(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale) { UErrorCode status = U_ZERO_ERROR; @@ -695,7 +710,7 @@ strfold_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen, */ static size_t downcase_ident_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale) + size_t srclen, pg_locale_t locale) { int i; bool libc_lower; @@ -724,12 +739,11 @@ downcase_ident_icu(char *dst, size_t dstsize, const char *src, * strncoll_icu_utf8 * * Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given - * database encoding. An argument length of -1 means the string is - * NUL-terminated. + * database encoding. */ #ifdef HAVE_UCOL_STRCOLLUTF8 int -strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, +strncoll_icu_utf8(const char *arg1, size_t len1, const char *arg2, size_t len2, pg_locale_t locale) { int result; @@ -748,12 +762,31 @@ strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2 return result; } + +int +strcoll_icu_utf8(const char *arg1, const char *arg2, pg_locale_t locale) +{ + int result; + UErrorCode status; + + Assert(GetDatabaseEncoding() == PG_UTF8); + + status = U_ZERO_ERROR; + result = ucol_strcollUTF8(locale->icu.ucol, + arg1, -1, + arg2, -1, + &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("collation failed: %s", u_errorName(status)))); + + return result; +} #endif -/* 'srclen' of -1 means the strings are NUL-terminated */ -size_t -strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, - pg_locale_t locale) +static size_t +strnxfrm_icu_internal(char *dest, size_t destsize, const char *src, ssize_t srclen, + pg_locale_t locale) { char sbuf[TEXTBUFLEN]; char *buf = sbuf; @@ -795,11 +828,24 @@ strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, return result_bsize; } -/* 'srclen' of -1 means the strings are NUL-terminated */ -size_t -strnxfrm_prefix_icu_utf8(char *dest, size_t destsize, - const char *src, ssize_t srclen, - pg_locale_t locale) +static size_t +strnxfrm_icu(char *dest, size_t destsize, const char *src, size_t srclen, + pg_locale_t locale) +{ + return strnxfrm_icu_internal(dest, destsize, src, srclen, locale); +} + +static size_t +strxfrm_icu(char *dest, size_t destsize, const char *src, + pg_locale_t locale) +{ + return strnxfrm_icu_internal(dest, destsize, src, -1, locale); +} + +static size_t +strnxfrm_prefix_icu_utf8_internal(char *dest, size_t destsize, + const char *src, ssize_t srclen, + pg_locale_t locale) { size_t result; UCharIterator iter; @@ -825,6 +871,21 @@ strnxfrm_prefix_icu_utf8(char *dest, size_t destsize, return result; } +static size_t +strnxfrm_prefix_icu_utf8(char *dest, size_t destsize, + const char *src, size_t srclen, + pg_locale_t locale) +{ + return strnxfrm_prefix_icu_utf8_internal(dest, destsize, src, srclen, locale); +} + +static size_t +strxfrm_prefix_icu_utf8(char *dest, size_t destsize, const char *src, + pg_locale_t locale) +{ + return strnxfrm_prefix_icu_utf8_internal(dest, destsize, src, -1, locale); +} + char * get_collation_actual_version_icu(const char *collcollate) { @@ -940,7 +1001,7 @@ convert_case_uchar(ICU_Convert_Func func, pg_locale_t mylocale, static int32_t icu_convert_case(ICU_Convert_Func func, char *dest, size_t destsize, - const char *src, ssize_t srclen, pg_locale_t locale) + const char *src, size_t srclen, pg_locale_t locale) { int32_t len_uchar; int32_t len_conv; @@ -1010,15 +1071,15 @@ foldcase_options(const char *locale) * strncoll_icu * * Convert the arguments from the database encoding to UChar strings, then - * call ucol_strcoll(). An argument length of -1 means that the string is - * NUL-terminated. + * call ucol_strcoll(). * * When the database encoding is UTF-8, and ICU supports ucol_strcollUTF8(), * caller should call that instead. */ static int -strncoll_icu(const char *arg1, ssize_t len1, - const char *arg2, ssize_t len2, pg_locale_t locale) +strncoll_icu_internal(const char *arg1, ssize_t len1, + const char *arg2, ssize_t len2, + pg_locale_t locale) { char sbuf[TEXTBUFLEN]; char *buf = sbuf; @@ -1062,11 +1123,23 @@ strncoll_icu(const char *arg1, ssize_t len1, return result; } -/* 'srclen' of -1 means the strings are NUL-terminated */ +static int +strncoll_icu(const char *arg1, size_t len1, const char *arg2, size_t len2, + pg_locale_t locale) +{ + return strncoll_icu_internal(arg1, len1, arg2, len2, locale); +} + +static int +strcoll_icu(const char *arg1, const char *arg2, pg_locale_t locale) +{ + return strncoll_icu_internal(arg1, -1, arg2, -1, locale); +} + static size_t -strnxfrm_prefix_icu(char *dest, size_t destsize, - const char *src, ssize_t srclen, - pg_locale_t locale) +strnxfrm_prefix_icu_internal(char *dest, size_t destsize, + const char *src, ssize_t srclen, + pg_locale_t locale) { char sbuf[TEXTBUFLEN]; char *buf = sbuf; @@ -1114,6 +1187,20 @@ strnxfrm_prefix_icu(char *dest, size_t destsize, return result_bsize; } +static size_t +strnxfrm_prefix_icu(char *dest, size_t destsize, const char *src, size_t srclen, + pg_locale_t locale) +{ + return strnxfrm_prefix_icu_internal(dest, destsize, src, srclen, locale); +} + +static size_t +strxfrm_prefix_icu(char *dest, size_t destsize, const char *src, + pg_locale_t locale) +{ + return strnxfrm_prefix_icu_internal(dest, destsize, src, -1, locale); +} + static void init_icu_converter(void) { diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c index 78f6ea161a0..0b52d6f8fe3 100644 --- a/src/backend/utils/adt/pg_locale_libc.c +++ b/src/backend/utils/adt/pg_locale_libc.c @@ -82,42 +82,48 @@ extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context); -static int strncoll_libc(const char *arg1, ssize_t len1, - const char *arg2, ssize_t len2, +static int strncoll_libc(const char *arg1, size_t len1, + const char *arg2, size_t len2, pg_locale_t locale); +static int strcoll_libc(const char *arg1, const char *arg2, + pg_locale_t locale); static size_t strnxfrm_libc(char *dest, size_t destsize, - const char *src, ssize_t srclen, + const char *src, size_t srclen, pg_locale_t locale); +static size_t strxfrm_libc(char *dest, size_t destsize, + const char *src, pg_locale_t locale); extern char *get_collation_actual_version_libc(const char *collcollate); static locale_t make_libc_collator(const char *collate, const char *ctype); #ifdef WIN32 -static int strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, - const char *arg2, ssize_t len2, +static int strncoll_libc_win32_utf8(const char *arg1, size_t len1, + const char *arg2, size_t len2, pg_locale_t locale); +static int strcoll_libc_win32_utf8(const char *arg1, const char *arg2, + pg_locale_t locale); #endif static size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, locale_t loc); static size_t strlower_libc_sb(char *dest, size_t destsize, - const char *src, ssize_t srclen, + const char *src, size_t srclen, pg_locale_t locale); static size_t strlower_libc_mb(char *dest, size_t destsize, - const char *src, ssize_t srclen, + const char *src, size_t srclen, pg_locale_t locale); static size_t strtitle_libc_sb(char *dest, size_t destsize, - const char *src, ssize_t srclen, + const char *src, size_t srclen, pg_locale_t locale); static size_t strtitle_libc_mb(char *dest, size_t destsize, - const char *src, ssize_t srclen, + const char *src, size_t srclen, pg_locale_t locale); static size_t strupper_libc_sb(char *dest, size_t destsize, - const char *src, ssize_t srclen, + const char *src, size_t srclen, pg_locale_t locale); static size_t strupper_libc_mb(char *dest, size_t destsize, - const char *src, ssize_t srclen, + const char *src, size_t srclen, pg_locale_t locale); static bool @@ -324,7 +330,7 @@ tolower_libc_mb(pg_wchar wc, pg_locale_t locale) */ static size_t downcase_ident_libc_sb(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale) + size_t srclen, pg_locale_t locale) { locale_t loc = locale->lt; int i; @@ -420,8 +426,11 @@ static const struct ctype_methods ctype_methods_libc_utf8 = { static const struct collate_methods collate_methods_libc = { .strncoll = strncoll_libc, + .strcoll = strcoll_libc, .strnxfrm = strnxfrm_libc, + .strxfrm = strxfrm_libc, .strnxfrm_prefix = NULL, + .strxfrm_prefix = NULL, /* * Unfortunately, it seems that strxfrm() for non-C collations is broken @@ -442,7 +451,9 @@ static const struct collate_methods collate_methods_libc = { #ifdef WIN32 static const struct collate_methods collate_methods_libc_win32_utf8 = { .strncoll = strncoll_libc_win32_utf8, + .strcoll = strcoll_libc_win32_utf8, .strnxfrm = strnxfrm_libc, + .strxfrm = strxfrm_libc, .strnxfrm_prefix = NULL, #ifdef TRUST_STRXFRM .strxfrm_is_safe = true, @@ -453,12 +464,9 @@ static const struct collate_methods collate_methods_libc_win32_utf8 = { #endif static size_t -strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, +strlower_libc_sb(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale) { - if (srclen < 0) - srclen = strlen(src); - if (srclen + 1 <= destsize) { locale_t loc = locale->lt; @@ -492,7 +500,7 @@ strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, } static size_t -strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, +strlower_libc_mb(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale) { locale_t loc = locale->lt; @@ -502,9 +510,6 @@ strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, size_t curr_char; size_t max_size; - if (srclen < 0) - srclen = strlen(src); - /* Overflow paranoia */ if ((srclen + 1) > (INT_MAX / sizeof(wchar_t))) ereport(ERROR, @@ -540,12 +545,9 @@ strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, } static size_t -strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, +strtitle_libc_sb(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale) { - if (srclen < 0) - srclen = strlen(src); - if (srclen + 1 <= destsize) { locale_t loc = locale->lt; @@ -596,7 +598,7 @@ strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, } static size_t -strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, +strtitle_libc_mb(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale) { locale_t loc = locale->lt; @@ -607,9 +609,6 @@ strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, size_t curr_char; size_t max_size; - if (srclen < 0) - srclen = strlen(src); - /* Overflow paranoia */ if ((srclen + 1) > (INT_MAX / sizeof(wchar_t))) ereport(ERROR, @@ -651,12 +650,9 @@ strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, } static size_t -strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, +strupper_libc_sb(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale) { - if (srclen < 0) - srclen = strlen(src); - if (srclen + 1 <= destsize) { locale_t loc = locale->lt; @@ -690,7 +686,7 @@ strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, } static size_t -strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, +strupper_libc_mb(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale) { locale_t loc = locale->lt; @@ -700,9 +696,6 @@ strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, size_t curr_char; size_t max_size; - if (srclen < 0) - srclen = strlen(src); - /* Overflow paranoia */ if ((srclen + 1) > (INT_MAX / sizeof(wchar_t))) ereport(ERROR, @@ -889,17 +882,17 @@ make_libc_collator(const char *collate, const char *ctype) * strncoll_libc * * NUL-terminate arguments, if necessary, and pass to strcoll_l(). - * - * An input string length of -1 means that it's already NUL-terminated. */ -int -strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, +static int +strncoll_libc(const char *arg1, size_t len1, const char *arg2, size_t len2, pg_locale_t locale) { char sbuf[TEXTBUFLEN]; char *buf = sbuf; - size_t bufsize1 = (len1 == -1) ? 0 : len1 + 1; - size_t bufsize2 = (len2 == -1) ? 0 : len2 + 1; + size_t bufsize1 = len1 + 1; + size_t bufsize2 = len2 + 1; + char *buf1; + char *buf2; const char *arg1n; const char *arg2n; int result; @@ -907,32 +900,16 @@ strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, if (bufsize1 + bufsize2 > TEXTBUFLEN) buf = palloc(bufsize1 + bufsize2); - /* nul-terminate arguments if necessary */ - if (len1 == -1) - { - arg1n = arg1; - } - else - { - char *buf1 = buf; - - memcpy(buf1, arg1, len1); - buf1[len1] = '\0'; - arg1n = buf1; - } + buf1 = buf; + buf2 = buf + bufsize1; - if (len2 == -1) - { - arg2n = arg2; - } - else - { - char *buf2 = buf + bufsize1; + memcpy(buf1, arg1, len1); + buf1[len1] = '\0'; + arg1n = buf1; - memcpy(buf2, arg2, len2); - buf2[len2] = '\0'; - arg2n = buf2; - } + memcpy(buf2, arg2, len2); + buf2[len2] = '\0'; + arg2n = buf2; result = strcoll_l(arg1n, arg2n, locale->lt); @@ -942,15 +919,22 @@ strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, return result; } +/* + * strcoll_libc + */ +static int +strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale) +{ + return strcoll_l(arg1, arg2, locale->lt); +} + /* * strnxfrm_libc * - * NUL-terminate src, if necessary, and pass to strxfrm_l(). - * - * A source length of -1 means that it's already NUL-terminated. + * NUL-terminate src and pass to strxfrm_l(). */ -size_t -strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen, +static size_t +strnxfrm_libc(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale) { char sbuf[TEXTBUFLEN]; @@ -958,9 +942,6 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen, size_t bufsize = srclen + 1; size_t result; - if (srclen == -1) - return strxfrm_l(dest, src, destsize, locale->lt); - if (bufsize > TEXTBUFLEN) buf = palloc(bufsize); @@ -979,6 +960,15 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen, return result; } +/* + * strxfrm_libc + */ +static size_t +strxfrm_libc(char *dest, size_t destsize, const char *src, pg_locale_t locale) +{ + return strxfrm_l(dest, src, destsize, locale->lt); +} + char * get_collation_actual_version_libc(const char *collcollate) { @@ -1049,13 +1039,11 @@ get_collation_actual_version_libc(const char *collcollate) * * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and * invoke wcscoll_l(). - * - * An input string length of -1 means that it's NUL-terminated. */ #ifdef WIN32 static int -strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2, - ssize_t len2, pg_locale_t locale) +strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2, + size_t len2, pg_locale_t locale) { char sbuf[TEXTBUFLEN]; char *buf = sbuf; @@ -1068,11 +1056,6 @@ strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2, Assert(GetDatabaseEncoding() == PG_UTF8); - if (len1 == -1) - len1 = strlen(arg1); - if (len2 == -1) - len2 = strlen(arg2); - a1len = len1 * 2 + 2; a2len = len2 * 2 + 2; @@ -1120,6 +1103,16 @@ strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2, return result; } + +static int +strcoll_libc_win32_utf8(const char *arg1, const char *arg2, + pg_locale_t locale) +{ + size_t len1 = strlen(arg1); + size_t len2 = strlen(arg2); + + return strncoll_libc_win32_utf8(arg1, len1, arg2, len2, locale); +} #endif /* WIN32 */ /* simple subroutine for reporting errors from newlocale() */ diff --git a/src/common/unicode/case_test.c b/src/common/unicode/case_test.c index 099530c1ead..a0dbf00b671 100644 --- a/src/common/unicode/case_test.c +++ b/src/common/unicode/case_test.c @@ -34,7 +34,7 @@ static UCaseMap *casemap = NULL; #endif typedef size_t (*TestFunc) (char *dst, size_t dstsize, const char *src, - ssize_t srclen); + size_t srclen); /* simple boundary iterator copied from pg_locale_builtin.c */ struct WordBoundaryState @@ -114,6 +114,7 @@ icu_test_full(char *str) char icu_upper[BUFSZ]; char icu_fold[BUFSZ]; UErrorCode status; + size_t len = strlen(str); /* full case mapping doesn't use posix semantics */ struct WordBoundaryState wbstate = { @@ -125,18 +126,18 @@ icu_test_full(char *str) .prev_alnum = false, }; - unicode_strlower(lower, BUFSZ, str, -1, true); - unicode_strtitle(title, BUFSZ, str, -1, true, initcap_wbnext, &wbstate); - unicode_strupper(upper, BUFSZ, str, -1, true); - unicode_strfold(fold, BUFSZ, str, -1, true); + unicode_strlower(lower, BUFSZ, str, len, true); + unicode_strtitle(title, BUFSZ, str, len, true, initcap_wbnext, &wbstate); + unicode_strupper(upper, BUFSZ, str, len, true); + unicode_strfold(fold, BUFSZ, str, len, true); status = U_ZERO_ERROR; - ucasemap_utf8ToLower(casemap, icu_lower, BUFSZ, str, -1, &status); + ucasemap_utf8ToLower(casemap, icu_lower, BUFSZ, str, len, &status); status = U_ZERO_ERROR; - ucasemap_utf8ToTitle(casemap, icu_title, BUFSZ, str, -1, &status); + ucasemap_utf8ToTitle(casemap, icu_title, BUFSZ, str, len, &status); status = U_ZERO_ERROR; - ucasemap_utf8ToUpper(casemap, icu_upper, BUFSZ, str, -1, &status); + ucasemap_utf8ToUpper(casemap, icu_upper, BUFSZ, str, len, &status); status = U_ZERO_ERROR; - ucasemap_utf8FoldCase(casemap, icu_fold, BUFSZ, str, -1, &status); + ucasemap_utf8FoldCase(casemap, icu_fold, BUFSZ, str, len, &status); if (strcmp(lower, icu_lower) != 0) { @@ -209,18 +210,16 @@ static void test_convert(TestFunc tfunc, const char *test_string, const char *expected) { size_t src1len = strlen(test_string); - size_t src2len = -1; /* NUL-terminated */ size_t dst1len = strlen(expected); size_t dst2len = strlen(expected) + 1; /* NUL-terminated */ char *src1 = malloc(src1len); char *dst1 = malloc(dst1len); - char *src2 = strdup(test_string); char *dst2 = malloc(dst2len); size_t needed; memcpy(src1, test_string, src1len); /* not NUL-terminated */ - /* neither source nor destination are NUL-terminated */ + /* destination is not NUL-terminated */ memset(dst1, 0x7F, dst1len); needed = tfunc(dst1, dst1len, src1, src1len); if (needed != strlen(expected)) @@ -236,7 +235,7 @@ test_convert(TestFunc tfunc, const char *test_string, const char *expected) exit(1); } - /* destination is NUL-terminated and source is not */ + /* destination is NUL-terminated */ memset(dst2, 0x7F, dst2len); needed = tfunc(dst2, dst2len, src1, src1len); if (needed != strlen(expected)) @@ -252,59 +251,25 @@ test_convert(TestFunc tfunc, const char *test_string, const char *expected) exit(1); } - /* source is NUL-terminated and destination is not */ - memset(dst1, 0x7F, dst1len); - needed = tfunc(dst1, dst1len, src2, src2len); - if (needed != strlen(expected)) - { - printf("case_test: convert_case test3 FAILURE: '%s' needed %zu expected %zu\n", - test_string, needed, strlen(expected)); - printf("case_test: convert_case test3 FAILURE: needed %zu\n", needed); - exit(1); - } - if (memcmp(dst1, expected, dst1len) != 0) - { - printf("case_test: convert_case test3 FAILURE: test: '%s' result: '%.*s' expected: '%s'\n", - test_string, (int) dst1len, dst1, expected); - exit(1); - } - - /* both source and destination are NUL-terminated */ - memset(dst2, 0x7F, dst2len); - needed = tfunc(dst2, dst2len, src2, src2len); - if (needed != strlen(expected)) - { - printf("case_test: convert_case test4 FAILURE: '%s' needed %zu expected %zu\n", - test_string, needed, strlen(expected)); - exit(1); - } - if (strcmp(dst2, expected) != 0) - { - printf("case_test: convert_case test4 FAILURE: test: '%s' result: '%s' expected: '%s'\n", - test_string, dst2, expected); - exit(1); - } - free(src1); free(dst1); - free(src2); free(dst2); } static size_t tfunc_lower(char *dst, size_t dstsize, const char *src, - ssize_t srclen) + size_t srclen) { return unicode_strlower(dst, dstsize, src, srclen, true); } static size_t tfunc_title(char *dst, size_t dstsize, const char *src, - ssize_t srclen) + size_t srclen) { struct WordBoundaryState wbstate = { .str = src, - .len = (srclen < 0) ? strlen(src) : srclen, + .len = srclen, .offset = 0, .init = false, .prev_alnum = false, @@ -316,14 +281,14 @@ tfunc_title(char *dst, size_t dstsize, const char *src, static size_t tfunc_upper(char *dst, size_t dstsize, const char *src, - ssize_t srclen) + size_t srclen) { return unicode_strupper(dst, dstsize, src, srclen, true); } static size_t tfunc_fold(char *dst, size_t dstsize, const char *src, - ssize_t srclen) + size_t srclen) { return unicode_strfold(dst, dstsize, src, srclen, true); } diff --git a/src/common/unicode_case.c b/src/common/unicode_case.c index 0b8d3ffc0b4..d6ee00b7d9c 100644 --- a/src/common/unicode_case.c +++ b/src/common/unicode_case.c @@ -39,7 +39,7 @@ static const char32_t *const casekind_map[NCaseKind] = }; static char32_t find_case_map(char32_t ucs, const char32_t *map); -static size_t convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen, +static size_t convert_case(char *dst, size_t dstsize, const char *src, size_t srclen, CaseKind str_casekind, bool full, WordBoundaryNext wbnext, void *wbstate); static enum CaseMapResult casemap(char32_t u1, CaseKind casekind, bool full, @@ -84,8 +84,7 @@ unicode_casefold_simple(char32_t code) * Convert src to lowercase, and return the result length (not including * terminating NUL). * - * String src must be encoded in UTF-8. If srclen < 0, src must be - * NUL-terminated. + * String src must be encoded in UTF-8. * * Result string is stored in dst, truncating if larger than dstsize. If * dstsize is greater than the result length, dst will be NUL-terminated; @@ -98,7 +97,7 @@ unicode_casefold_simple(char32_t code) * conditions are satisfied. */ size_t -unicode_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, +unicode_strlower(char *dst, size_t dstsize, const char *src, size_t srclen, bool full) { return convert_case(dst, dstsize, src, srclen, CaseLower, full, NULL, @@ -111,8 +110,7 @@ unicode_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, * Convert src to titlecase, and return the result length (not including * terminating NUL). * - * String src must be encoded in UTF-8. If srclen < 0, src must be - * NUL-terminated. + * String src must be encoded in UTF-8. * * Result string is stored in dst, truncating if larger than dstsize. If * dstsize is greater than the result length, dst will be NUL-terminated; @@ -135,7 +133,7 @@ unicode_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, * the string to indicate the final boundary. */ size_t -unicode_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen, +unicode_strtitle(char *dst, size_t dstsize, const char *src, size_t srclen, bool full, WordBoundaryNext wbnext, void *wbstate) { return convert_case(dst, dstsize, src, srclen, CaseTitle, full, wbnext, @@ -148,8 +146,7 @@ unicode_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen, * Convert src to uppercase, and return the result length (not including * terminating NUL). * - * String src must be encoded in UTF-8. If srclen < 0, src must be - * NUL-terminated. + * String src must be encoded in UTF-8. * * Result string is stored in dst, truncating if larger than dstsize. If * dstsize is greater than the result length, dst will be NUL-terminated; @@ -162,7 +159,7 @@ unicode_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen, * conditions are satisfied. */ size_t -unicode_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen, +unicode_strupper(char *dst, size_t dstsize, const char *src, size_t srclen, bool full) { return convert_case(dst, dstsize, src, srclen, CaseUpper, full, NULL, @@ -175,8 +172,7 @@ unicode_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen, * Case fold src, and return the result length (not including terminating * NUL). * - * String src must be encoded in UTF-8. If srclen < 0, src must be - * NUL-terminated. + * String src must be encoded in UTF-8. * * Result string is stored in dst, truncating if larger than dstsize. If * dstsize is greater than the result length, dst will be NUL-terminated; @@ -186,7 +182,7 @@ unicode_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen, * required buffer size before allocating. */ size_t -unicode_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen, +unicode_strfold(char *dst, size_t dstsize, const char *src, size_t srclen, bool full) { return convert_case(dst, dstsize, src, srclen, CaseFold, full, NULL, @@ -210,7 +206,7 @@ unicode_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen, * map a single codepoint to multiple codepoints, or depend on conditions. */ static size_t -convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen, +convert_case(char *dst, size_t dstsize, const char *src, size_t srclen, CaseKind str_casekind, bool full, WordBoundaryNext wbnext, void *wbstate) { @@ -229,7 +225,7 @@ convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen, Assert(boundary == 0); /* start of text is always a boundary */ } - while ((srclen < 0 || srcoff < srclen) && src[srcoff] != '\0') + while (srcoff < srclen) { char32_t u1 = utf8_to_unicode((const unsigned char *) src + srcoff); int u1len = unicode_utf8len(u1); diff --git a/src/include/common/unicode_case.h b/src/include/common/unicode_case.h index 2737c1382d4..03add78cabe 100644 --- a/src/include/common/unicode_case.h +++ b/src/include/common/unicode_case.h @@ -21,13 +21,13 @@ char32_t unicode_titlecase_simple(char32_t code); char32_t unicode_uppercase_simple(char32_t code); char32_t unicode_casefold_simple(char32_t code); size_t unicode_strlower(char *dst, size_t dstsize, const char *src, - ssize_t srclen, bool full); + size_t srclen, bool full); size_t unicode_strtitle(char *dst, size_t dstsize, const char *src, - ssize_t srclen, bool full, + size_t srclen, bool full, WordBoundaryNext wbnext, void *wbstate); size_t unicode_strupper(char *dst, size_t dstsize, const char *src, - ssize_t srclen, bool full); + size_t srclen, bool full); size_t unicode_strfold(char *dst, size_t dstsize, const char *src, - ssize_t srclen, bool full); + size_t srclen, bool full); #endif /* UNICODE_CASE_H */ diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 444350bb803..b74821fdfa9 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -63,20 +63,29 @@ typedef struct pg_locale_struct *pg_locale_t; struct collate_methods { /* required */ - int (*strncoll) (const char *arg1, ssize_t len1, - const char *arg2, ssize_t len2, + int (*strncoll) (const char *arg1, size_t len1, + const char *arg2, size_t len2, pg_locale_t locale); + int (*strcoll) (const char *arg1, const char *arg2, + pg_locale_t locale); + /* required */ size_t (*strnxfrm) (char *dest, size_t destsize, - const char *src, ssize_t srclen, + const char *src, size_t srclen, pg_locale_t locale); + size_t (*strxfrm) (char *dest, size_t destsize, + const char *src, pg_locale_t locale); + /* optional */ size_t (*strnxfrm_prefix) (char *dest, size_t destsize, - const char *src, ssize_t srclen, + const char *src, size_t srclen, pg_locale_t locale); + size_t (*strxfrm_prefix) (char *dest, size_t destsize, + const char *src, pg_locale_t locale); + /* * If the strnxfrm method is not trusted to return the correct results, * set strxfrm_is_safe to false. It set to false, the method will not be @@ -90,19 +99,19 @@ struct ctype_methods { /* case mapping: LOWER()/INITCAP()/UPPER() */ size_t (*strlower) (char *dest, size_t destsize, - const char *src, ssize_t srclen, + const char *src, size_t srclen, pg_locale_t locale); size_t (*strtitle) (char *dest, size_t destsize, - const char *src, ssize_t srclen, + const char *src, size_t srclen, pg_locale_t locale); size_t (*strupper) (char *dest, size_t destsize, - const char *src, ssize_t srclen, + const char *src, size_t srclen, pg_locale_t locale); size_t (*strfold) (char *dest, size_t destsize, - const char *src, ssize_t srclen, + const char *src, size_t srclen, pg_locale_t locale); size_t (*downcase_ident) (char *dest, size_t destsize, - const char *src, ssize_t srclen, + const char *src, size_t srclen, pg_locale_t locale); /* required */ @@ -172,32 +181,32 @@ extern pg_locale_t pg_newlocale_from_collation(Oid collid); extern char *get_collation_actual_version(char collprovider, const char *collcollate); extern size_t pg_strlower(char *dst, size_t dstsize, - const char *src, ssize_t srclen, + const char *src, size_t srclen, pg_locale_t locale); extern size_t pg_strtitle(char *dst, size_t dstsize, - const char *src, ssize_t srclen, + const char *src, size_t srclen, pg_locale_t locale); extern size_t pg_strupper(char *dst, size_t dstsize, - const char *src, ssize_t srclen, + const char *src, size_t srclen, pg_locale_t locale); extern size_t pg_strfold(char *dst, size_t dstsize, - const char *src, ssize_t srclen, + const char *src, size_t srclen, pg_locale_t locale); extern size_t pg_downcase_ident(char *dst, size_t dstsize, - const char *src, ssize_t srclen); + const char *src, size_t srclen); extern int pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale); -extern int pg_strncoll(const char *arg1, ssize_t len1, - const char *arg2, ssize_t len2, pg_locale_t locale); +extern int pg_strncoll(const char *arg1, size_t len1, + const char *arg2, size_t len2, pg_locale_t locale); extern bool pg_strxfrm_enabled(pg_locale_t locale); extern size_t pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale); extern size_t pg_strnxfrm(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); + size_t srclen, pg_locale_t locale); extern bool pg_strxfrm_prefix_enabled(pg_locale_t locale); extern size_t pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, pg_locale_t locale); extern size_t pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); + size_t srclen, pg_locale_t locale); extern bool pg_iswdigit(pg_wchar wc, pg_locale_t locale); extern bool pg_iswalpha(pg_wchar wc, pg_locale_t locale); -- 2.43.0