From 45ba378d8f818d9a09d01ac13feaef2fbcf1fbb3 Mon Sep 17 00:00:00 2001 From: JoongHyuk Shin Date: Tue, 17 Mar 2026 14:48:49 +0900 Subject: [PATCH] Move makeObjectName to src/common/ and fix pg_dump NOT NULL constraint name comparison pg_dump generates a constraint name using psprintf("%s_not_null", ...) for domain NOT NULL constraints, and psprintf("%s_%s_not_null", ...) for column NOT NULL constraints. PostgreSQL itself uses makeObjectName() which applies NAMEDATALEN truncation. When the resulting name exceeds NAMEDATALEN-1 characters, the generated name differs from the stored catalog name, so pg_dump incorrectly emits CONSTRAINT NOT NULL instead of the inline NOT NULL form. Fix by moving makeObjectName() from src/backend/commands/indexcmds.c to src/common/string.c (adding an encoding parameter), so pg_dump can call it directly with the server encoding obtained via the archive. Also move pg_encoding_mbcliplen() from src/backend/utils/mb/mbutils.c to src/common/wchar.c for the same reason. Both domain NOT NULL and column NOT NULL code paths in pg_dump.c are updated to use makeObjectName() instead of psprintf(). Add regression tests for the truncation case: a 55-char domain name (where domain_name + "_not_null" > NAMEDATALEN-1) and a 27-char table with a 27-char column (where table_name + "_" + col_name + "_not_null" > NAMEDATALEN-1). --- src/backend/catalog/pg_constraint.c | 5 +- src/backend/catalog/pg_type.c | 6 +- src/backend/commands/indexcmds.c | 88 ++-------------------------- src/backend/commands/statscmds.c | 5 +- src/backend/utils/mb/mbutils.c | 30 +--------- src/bin/pg_dump/pg_backup.h | 2 + src/bin/pg_dump/pg_dump.c | 30 ++++++++-- src/bin/pg_dump/t/002_pg_dump.pl | 38 ++++++++++++ src/common/string.c | 91 +++++++++++++++++++++++++++++ src/common/wchar.c | 47 +++++++++++++++ src/include/commands/defrem.h | 2 - src/include/common/string.h | 2 + 12 files changed, 225 insertions(+), 121 deletions(-) diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c index b12765ae691..8219714ec2e 100644 --- a/src/backend/catalog/pg_constraint.c +++ b/src/backend/catalog/pg_constraint.c @@ -29,6 +29,8 @@ #include "catalog/pg_type.h" #include "commands/defrem.h" #include "common/int.h" +#include "common/string.h" +#include "mb/pg_wchar.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/fmgroids.h" @@ -533,7 +535,8 @@ ChooseConstraintName(const char *name1, const char *name2, for (;;) { - conname = makeObjectName(name1, name2, modlabel); + conname = makeObjectName(name1, name2, modlabel, + GetDatabaseEncoding()); found = false; diff --git a/src/backend/catalog/pg_type.c b/src/backend/catalog/pg_type.c index fc369c35aa6..3d4363f82f8 100644 --- a/src/backend/catalog/pg_type.c +++ b/src/backend/catalog/pg_type.c @@ -28,6 +28,7 @@ #include "catalog/pg_type.h" #include "commands/defrem.h" #include "commands/typecmds.h" +#include "common/string.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "utils/acl.h" @@ -856,7 +857,7 @@ makeArrayTypeName(const char *typeName, Oid typeNamespace) */ /* First, try with no numeric suffix */ - arr_name = makeObjectName("", typeName, NULL); + arr_name = makeObjectName("", typeName, NULL, GetDatabaseEncoding()); for (;;) { @@ -868,7 +869,8 @@ makeArrayTypeName(const char *typeName, Oid typeNamespace) /* That attempt conflicted. Prepare a new name with some digits. */ pfree(arr_name); snprintf(suffix, sizeof(suffix), "%d", ++pass); - arr_name = makeObjectName("", typeName, suffix); + arr_name = makeObjectName("", typeName, suffix, + GetDatabaseEncoding()); } return arr_name; diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index cbd76066f74..9a8eca628db 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -44,6 +44,7 @@ #include "commands/progress.h" #include "commands/tablecmds.h" #include "commands/tablespace.h" +#include "common/string.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -2519,88 +2520,10 @@ GetOperatorFromCompareType(Oid opclass, Oid rhstype, CompareType cmptype, } /* - * makeObjectName() - * - * Create a name for an implicitly created index, sequence, constraint, - * extended statistics, etc. - * - * The parameters are typically: the original table name, the original field - * name, and a "type" string (such as "seq" or "pkey"). The field name - * and/or type can be NULL if not relevant. - * - * The result is a palloc'd string. - * - * The basic result we want is "name1_name2_label", omitting "_name2" or - * "_label" when those parameters are NULL. However, we must generate - * a name with less than NAMEDATALEN characters! So, we truncate one or - * both names if necessary to make a short-enough string. The label part - * is never truncated (so it had better be reasonably short). - * - * The caller is responsible for checking uniqueness of the generated - * name and retrying as needed; retrying will be done by altering the - * "label" string (which is why we never truncate that part). + * makeObjectName() is now in src/common/string.c so that frontend code + * can use it too. Backend callers pass GetDatabaseEncoding() as the + * encoding argument. */ -char * -makeObjectName(const char *name1, const char *name2, const char *label) -{ - char *name; - int overhead = 0; /* chars needed for label and underscores */ - int availchars; /* chars available for name(s) */ - int name1chars; /* chars allocated to name1 */ - int name2chars; /* chars allocated to name2 */ - int ndx; - - name1chars = strlen(name1); - if (name2) - { - name2chars = strlen(name2); - overhead++; /* allow for separating underscore */ - } - else - name2chars = 0; - if (label) - overhead += strlen(label) + 1; - - availchars = NAMEDATALEN - 1 - overhead; - Assert(availchars > 0); /* else caller chose a bad label */ - - /* - * If we must truncate, preferentially truncate the longer name. This - * logic could be expressed without a loop, but it's simple and obvious as - * a loop. - */ - while (name1chars + name2chars > availchars) - { - if (name1chars > name2chars) - name1chars--; - else - name2chars--; - } - - name1chars = pg_mbcliplen(name1, name1chars, name1chars); - if (name2) - name2chars = pg_mbcliplen(name2, name2chars, name2chars); - - /* Now construct the string using the chosen lengths */ - name = palloc(name1chars + name2chars + overhead + 1); - memcpy(name, name1, name1chars); - ndx = name1chars; - if (name2) - { - name[ndx++] = '_'; - memcpy(name + ndx, name2, name2chars); - ndx += name2chars; - } - if (label) - { - name[ndx++] = '_'; - strcpy(name + ndx, label); - } - else - name[ndx] = '\0'; - - return name; -} /* * Select a nonconflicting name for a new relation. This is ordinarily @@ -2652,7 +2575,8 @@ ChooseRelationName(const char *name1, const char *name2, SysScanDesc scan; bool collides; - relname = makeObjectName(name1, name2, modlabel); + relname = makeObjectName(name1, name2, modlabel, + GetDatabaseEncoding()); /* is there any conflicting relation name? */ ScanKeyInit(&key[0], diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c index c1da79f36ba..201e08d1654 100644 --- a/src/backend/commands/statscmds.c +++ b/src/backend/commands/statscmds.c @@ -27,6 +27,8 @@ #include "catalog/pg_statistic_ext_data.h" #include "commands/comment.h" #include "commands/defrem.h" +#include "common/string.h" +#include "mb/pg_wchar.h" #include "miscadmin.h" #include "nodes/nodeFuncs.h" #include "optimizer/optimizer.h" @@ -860,7 +862,8 @@ ChooseExtendedStatisticName(const char *name1, const char *name2, { Oid existingstats; - stxname = makeObjectName(name1, name2, modlabel); + stxname = makeObjectName(name1, name2, modlabel, + GetDatabaseEncoding()); existingstats = GetSysCacheOid2(STATEXTNAMENSP, Anum_pg_statistic_ext_oid, PointerGetDatum(stxname), diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 78f4d5e202c..fdd2e025a4e 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -1215,35 +1215,9 @@ pg_mbcliplen(const char *mbstr, int len, int limit) } /* - * pg_mbcliplen with specified encoding; string must be valid in encoding + * pg_encoding_mbcliplen is now in src/common/wchar.c so that frontend + * code can use it too. */ -int -pg_encoding_mbcliplen(int encoding, const char *mbstr, - int len, int limit) -{ - mblen_converter mblen_fn; - int clen = 0; - int l; - - /* optimization for single byte encoding */ - if (pg_encoding_max_length(encoding) == 1) - return cliplen(mbstr, len, limit); - - mblen_fn = pg_wchar_table[encoding].mblen; - - while (len > 0 && *mbstr) - { - l = (*mblen_fn) ((const unsigned char *) mbstr); - if ((clen + l) > limit) - break; - clen += l; - if (clen == limit) - break; - len -= l; - mbstr += l; - } - return clen; -} /* * Similar to pg_mbcliplen except the limit parameter specifies the diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index fda912ba0a9..11584840d7d 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -242,6 +242,8 @@ typedef struct Archive /* info needed for string escaping */ int encoding; /* libpq code for client_encoding */ + int server_encoding; /* libpq code for server encoding; distinct + * from encoding (client encoding) */ bool std_strings; /* standard_conforming_strings */ /* other important stuff */ diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index b41a3ae3db4..f1c37b65cc3 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -60,6 +60,8 @@ #include "common/int.h" #include "common/relpath.h" #include "common/shortest_dec.h" +#include "common/string.h" +#include "mb/pg_wchar.h" #include "compress_io.h" #include "dumputils.h" #include "fe_utils/option_utils.h" @@ -1436,6 +1438,20 @@ setup_connection(Archive *AH, const char *dumpencoding, AH->encoding = PQclientEncoding(conn); setFmtEncoding(AH->encoding); + /* + * Get the server encoding so we can replicate the server's object name + * truncation logic (which uses server encoding, not client encoding). + */ + { + const char *senc = PQparameterStatus(conn, "server_encoding"); + + if (senc == NULL) + pg_fatal("could not get server encoding"); + AH->server_encoding = pg_char_to_encoding(senc); + if (AH->server_encoding < 0) + pg_fatal("unrecognized server encoding \"%s\"", senc); + } + /* * Set the role if requested. In a parallel dump worker, we'll be passed * use_role == NULL, but AH->use_role is already set (if user specified it @@ -10208,9 +10224,11 @@ determineNotNullFlags(Archive *fout, PGresult *res, int r, { char *default_name; - /* XXX should match ChooseConstraintName better */ - default_name = psprintf("%s_%s_not_null", tbinfo->dobj.name, - tbinfo->attnames[j]); + /* Use makeObjectName to match ChooseConstraintName's truncation */ + default_name = makeObjectName(tbinfo->dobj.name, + tbinfo->attnames[j], + "not_null", + fout->server_encoding); if (strcmp(default_name, PQgetvalue(res, r, i_notnull_name)) == 0) tbinfo->notnull_constrs[j] = ""; @@ -12903,8 +12921,10 @@ dumpDomain(Archive *fout, const TypeInfo *tyinfo) { char *default_name; - /* XXX should match ChooseConstraintName better */ - default_name = psprintf("%s_not_null", tyinfo->dobj.name); + /* Use makeObjectName to match ChooseConstraintName's truncation */ + default_name = makeObjectName(tyinfo->dobj.name, NULL, + "not_null", + fout->server_encoding); if (strcmp(default_name, notnull->dobj.name) == 0) appendPQExpBufferStr(q, " NOT NULL"); diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index 051a3d8ea3d..8d0b0ed3df6 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -2293,6 +2293,44 @@ my %tests = ( }, }, + # Test that pg_dump uses makeObjectName truncation logic when deciding + # whether to emit CONSTRAINT for an auto-generated NOT NULL constraint name. + # A 55-char domain name causes makeObjectName to truncate to 54 chars + # before appending "_not_null", so pg_dump must apply the same truncation. + 'CREATE DOMAIN dump_test.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' => { + create_order => 101, + create_sql => + 'CREATE DOMAIN dump_test.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa AS integer NOT NULL;', + regexp => qr/^ + \QCREATE DOMAIN dump_test.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa AS integer NOT NULL;\E + /xm, + like => + { %full_runs, %dump_test_schema_runs, section_pre_data => 1, }, + unlike => { + exclude_dump_test_schema => 1, + only_dump_measurement => 1, + }, + }, + + # A 27-char table name + 27-char column name (total 54 > 53) causes + # makeObjectName to truncate, so pg_dump must apply the same truncation + # when deciding whether to emit CONSTRAINT for a column NOT NULL. + 'CREATE TABLE dump_test.ttttttttttttttttttttttttttt' => { + create_order => 102, + create_sql => + 'CREATE TABLE dump_test.ttttttttttttttttttttttttttt (ttttttttttttttttttttttttttt integer NOT NULL);', + regexp => qr/^ + \QCREATE TABLE dump_test.ttttttttttttttttttttttttttt (\E\n + \s+\Qttttttttttttttttttttttttttt integer NOT NULL\E + /xm, + like => + { %full_runs, %dump_test_schema_runs, section_pre_data => 1, }, + unlike => { + exclude_dump_test_schema => 1, + only_dump_measurement => 1, + }, + }, + 'CREATE FUNCTION dump_test.pltestlang_call_handler' => { create_order => 17, create_sql => 'CREATE FUNCTION dump_test.pltestlang_call_handler() diff --git a/src/common/string.c b/src/common/string.c index 41c74a1502d..dd11fcd4a6e 100644 --- a/src/common/string.c +++ b/src/common/string.c @@ -22,6 +22,97 @@ #endif #include "common/string.h" +#include "mb/pg_wchar.h" + + +/* + * makeObjectName() + * + * Create a name for an implicitly created index, sequence, constraint, + * extended statistics, etc. + * + * The parameters are typically: the original table name, the original field + * name, and a "type" string (such as "seq" or "pkey"). The field name + * and/or type can be NULL if not relevant. + * + * The result is a palloc'd string (pg_malloc'd in frontend code). + * + * The basic result we want is "name1_name2_label", omitting "_name2" or + * "_label" when those parameters are NULL. However, we must generate + * a name with less than NAMEDATALEN characters! So, we truncate one or + * both names if necessary to make a short-enough string. The label part + * is never truncated (so it had better be reasonably short). + * + * The caller is responsible for checking uniqueness of the generated + * name and retrying as needed; retrying will be done by altering the + * "label" string (which is why we never truncate that part). + * + * The encoding parameter is used to avoid splitting multibyte characters + * when truncating. Backend callers should pass GetDatabaseEncoding(); + * frontend callers should pass the server encoding. + */ +char * +makeObjectName(const char *name1, const char *name2, const char *label, + int encoding) +{ + char *name; + int overhead = 0; /* chars needed for label and underscores */ + int availchars; /* chars available for name(s) */ + int name1chars; /* chars allocated to name1 */ + int name2chars; /* chars allocated to name2 */ + int ndx; + + name1chars = strlen(name1); + if (name2) + { + name2chars = strlen(name2); + overhead++; /* allow for separating underscore */ + } + else + name2chars = 0; + if (label) + overhead += strlen(label) + 1; + + availchars = NAMEDATALEN - 1 - overhead; + Assert(availchars > 0); /* else caller chose a bad label */ + + /* + * If we must truncate, preferentially truncate the longer name. This + * logic could be expressed without a loop, but it's simple and obvious as + * a loop. + */ + while (name1chars + name2chars > availchars) + { + if (name1chars > name2chars) + name1chars--; + else + name2chars--; + } + + name1chars = pg_encoding_mbcliplen(encoding, name1, name1chars, name1chars); + if (name2) + name2chars = pg_encoding_mbcliplen(encoding, name2, name2chars, name2chars); + + /* Now construct the string using the chosen lengths */ + name = palloc(name1chars + name2chars + overhead + 1); + memcpy(name, name1, name1chars); + ndx = name1chars; + if (name2) + { + name[ndx++] = '_'; + memcpy(name + ndx, name2, name2chars); + ndx += name2chars; + } + if (label) + { + name[ndx++] = '_'; + strcpy(name + ndx, label); + } + else + name[ndx] = '\0'; + + return name; +} /* diff --git a/src/common/wchar.c b/src/common/wchar.c index e7b6595b042..39e2ab815f1 100644 --- a/src/common/wchar.c +++ b/src/common/wchar.c @@ -2244,3 +2244,50 @@ pg_encoding_max_length(int encoding) pg_wchar_table[encoding].maxmblen : pg_wchar_table[PG_SQL_ASCII].maxmblen; } + +/* cliplen for any single-byte encoding */ +static int +cliplen(const char *str, int len, int limit) +{ + int l = 0; + + len = Min(len, limit); + while (l < len && str[l]) + l++; + return l; +} + +/* + * pg_encoding_mbcliplen -- return the byte length of a multibyte string + * (not necessarily NULL terminated) that is no longer than limit bytes. + * This function does not break multibyte character boundaries. + * + * The string must be valid in the specified encoding. + */ +int +pg_encoding_mbcliplen(int encoding, const char *mbstr, + int len, int limit) +{ + mblen_converter mblen_fn; + int clen = 0; + int l; + + /* optimization for single byte encoding */ + if (pg_encoding_max_length(encoding) == 1) + return cliplen(mbstr, len, limit); + + mblen_fn = pg_wchar_table[encoding].mblen; + + while (len > 0 && *mbstr) + { + l = (*mblen_fn) ((const unsigned char *) mbstr); + if ((clen + l) > limit) + break; + clen += l; + if (clen == limit) + break; + len -= l; + mbstr += l; + } + return clen; +} diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h index 8f4a2d9bbc1..554c01504ec 100644 --- a/src/include/commands/defrem.h +++ b/src/include/commands/defrem.h @@ -38,8 +38,6 @@ extern ObjectAddress DefineIndex(ParseState *pstate, bool skip_build, bool quiet); extern void ExecReindex(ParseState *pstate, const ReindexStmt *stmt, bool isTopLevel); -extern char *makeObjectName(const char *name1, const char *name2, - const char *label); extern char *ChooseRelationName(const char *name1, const char *name2, const char *label, Oid namespaceid, bool isconstraint); diff --git a/src/include/common/string.h b/src/include/common/string.h index 2a7c31ea74e..07cc230488b 100644 --- a/src/include/common/string.h +++ b/src/include/common/string.h @@ -24,6 +24,8 @@ typedef struct PromptInterruptContext } PromptInterruptContext; /* functions in src/common/string.c */ +extern char *makeObjectName(const char *name1, const char *name2, + const char *label, int encoding); extern bool pg_str_endswith(const char *str, const char *end); extern int strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base); -- 2.52.0