From 70d98770ce6c1795ab172adf10bda87dafa310e3 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 14 Mar 2023 09:58:29 -0700 Subject: [PATCH v5 1/5] Support language tags in older ICU versions (53 and earlier). By calling uloc_canonicalize() before parsing the attributes, the existing locale attribute parsing logic works on language tags as well. Fix a small memory leak, too. Discussion: http://postgr.es/m/60da0cecfb512a78b8666b31631a636215d8ce73.camel@j-davis.com --- src/backend/commands/collationcmds.c | 8 +++--- src/backend/utils/adt/pg_locale.c | 26 ++++++++++++++++--- .../regress/expected/collate.icu.utf8.out | 8 ++++++ src/test/regress/sql/collate.icu.utf8.sql | 4 +++ 4 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index 8949684afe..b8f2e7059f 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -950,7 +950,6 @@ pg_import_system_collations(PG_FUNCTION_ARGS) const char *name; char *langtag; char *icucomment; - const char *iculocstr; Oid collid; if (i == -1) @@ -959,20 +958,19 @@ pg_import_system_collations(PG_FUNCTION_ARGS) name = uloc_getAvailable(i); langtag = get_icu_language_tag(name); - iculocstr = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name; /* * Be paranoid about not allowing any non-ASCII strings into * pg_collation */ - if (!pg_is_ascii(langtag) || !pg_is_ascii(iculocstr)) + if (!pg_is_ascii(langtag) || !pg_is_ascii(langtag)) continue; collid = CollationCreate(psprintf("%s-x-icu", langtag), nspid, GetUserId(), COLLPROVIDER_ICU, true, -1, - NULL, NULL, iculocstr, NULL, - get_collation_actual_version(COLLPROVIDER_ICU, iculocstr), + NULL, NULL, langtag, NULL, + get_collation_actual_version(COLLPROVIDER_ICU, langtag), true, true); if (OidIsValid(collid)) { diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 1d3d4d86d3..b9c7fbd511 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -2643,9 +2643,28 @@ pg_attribute_unused() static void icu_set_collation_attributes(UCollator *collator, const char *loc) { - char *str = asc_tolower(loc, strlen(loc)); + UErrorCode status; + int32_t len; + char *icu_locale_id; + char *lower_str; + char *str; - str = strchr(str, '@'); + /* first, make sure the string is an ICU format locale ID */ + status = U_ZERO_ERROR; + len = uloc_canonicalize(loc, NULL, 0, &status); + icu_locale_id = palloc(len + 1); + status = U_ZERO_ERROR; + len = uloc_canonicalize(loc, icu_locale_id, len + 1, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("canonicalization failed for locale string \"%s\": %s", + loc, u_errorName(status)))); + + lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id)); + + pfree(icu_locale_id); + + str = strchr(lower_str, '@'); if (!str) return; str++; @@ -2660,7 +2679,6 @@ icu_set_collation_attributes(UCollator *collator, const char *loc) char *value; UColAttribute uattr; UColAttributeValue uvalue; - UErrorCode status; status = U_ZERO_ERROR; @@ -2727,6 +2745,8 @@ icu_set_collation_attributes(UCollator *collator, const char *loc) loc, u_errorName(status)))); } } + + pfree(lower_str); } #endif /* USE_ICU */ diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index 9a3e12e42d..6225b575ce 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -1304,6 +1304,14 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse t | t (1 row) +-- test language tags +CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false); +SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive; + ?column? +---------- + t +(1 row) + CREATE TABLE test1cs (x text COLLATE case_sensitive); CREATE TABLE test2cs (x text COLLATE case_sensitive); CREATE TABLE test3cs (x text COLLATE case_sensitive); diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index 0790068f31..64cbfd0a5b 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -518,6 +518,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive; SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive; +-- test language tags +CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false); +SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive; + CREATE TABLE test1cs (x text COLLATE case_sensitive); CREATE TABLE test2cs (x text COLLATE case_sensitive); CREATE TABLE test3cs (x text COLLATE case_sensitive); -- 2.34.1