From fc680ddf2d11ffc211cd5f3c3521564da16592f2 Mon Sep 17 00:00:00 2001 From: JoongHyuk Shin Date: Thu, 19 Mar 2026 17:10:52 +0900 Subject: [PATCH] Move makeObjectName to src/common/ and fix pg_dump NOT NULL constraint name comparison pg_dump compared NOT NULL constraint names using a simple psprintf("%s_not_null", ...) format, which did not account for NAMEDATALEN truncation. When a table or column name was long enough to trigger truncation, the generated name differed from what ChooseConstraintName() produces via makeObjectName(), causing pg_dump to emit a redundant CONSTRAINT clause. Fix by making makeObjectName() available to frontend code. Move it from src/backend/commands/indexcmds.c to src/common/objectname.c, adding an encoding parameter so callers can specify the encoding for multibyte-aware truncation. Also move pg_encoding_mbcliplen() to src/common/wchar.c for the same reason. Backend callers pass GetDatabaseEncoding(); pg_dump passes the server encoding obtained via PQparameterStatus(). --- src/backend/catalog/pg_constraint.c | 5 +- src/backend/catalog/pg_type.c | 6 +- src/backend/commands/indexcmds.c | 88 +--------------------- src/backend/commands/statscmds.c | 5 +- src/backend/utils/mb/mbutils.c | 31 -------- src/bin/pg_dump/pg_backup.h | 3 + src/bin/pg_dump/pg_dump.c | 36 +++++++-- src/bin/pg_dump/t/002_pg_dump.pl | 38 ++++++++++ src/common/Makefile | 1 + src/common/meson.build | 1 + src/common/objectname.c | 112 ++++++++++++++++++++++++++++ src/common/wchar.c | 53 +++++++++++++ src/include/commands/defrem.h | 2 - src/include/common/string.h | 4 + 14 files changed, 258 insertions(+), 127 deletions(-) create mode 100644 src/common/objectname.c diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c index b12765ae691..8219714ec2e 100644 --- a/src/backend/catalog/pg_constraint.c +++ b/src/backend/catalog/pg_constraint.c @@ -29,6 +29,8 @@ #include "catalog/pg_type.h" #include "commands/defrem.h" #include "common/int.h" +#include "common/string.h" +#include "mb/pg_wchar.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/fmgroids.h" @@ -533,7 +535,8 @@ ChooseConstraintName(const char *name1, const char *name2, for (;;) { - conname = makeObjectName(name1, name2, modlabel); + conname = makeObjectName(name1, name2, modlabel, + GetDatabaseEncoding()); found = false; diff --git a/src/backend/catalog/pg_type.c b/src/backend/catalog/pg_type.c index fc369c35aa6..3d4363f82f8 100644 --- a/src/backend/catalog/pg_type.c +++ b/src/backend/catalog/pg_type.c @@ -28,6 +28,7 @@ #include "catalog/pg_type.h" #include "commands/defrem.h" #include "commands/typecmds.h" +#include "common/string.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "utils/acl.h" @@ -856,7 +857,7 @@ makeArrayTypeName(const char *typeName, Oid typeNamespace) */ /* First, try with no numeric suffix */ - arr_name = makeObjectName("", typeName, NULL); + arr_name = makeObjectName("", typeName, NULL, GetDatabaseEncoding()); for (;;) { @@ -868,7 +869,8 @@ makeArrayTypeName(const char *typeName, Oid typeNamespace) /* That attempt conflicted. Prepare a new name with some digits. */ pfree(arr_name); snprintf(suffix, sizeof(suffix), "%d", ++pass); - arr_name = makeObjectName("", typeName, suffix); + arr_name = makeObjectName("", typeName, suffix, + GetDatabaseEncoding()); } return arr_name; diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index cbd76066f74..5b5daa37e6b 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -44,6 +44,7 @@ #include "commands/progress.h" #include "commands/tablecmds.h" #include "commands/tablespace.h" +#include "common/string.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -2518,90 +2519,6 @@ GetOperatorFromCompareType(Oid opclass, Oid rhstype, CompareType cmptype, get_opfamily_name(opfamily, false), get_am_name(amid))); } -/* - * makeObjectName() - * - * Create a name for an implicitly created index, sequence, constraint, - * extended statistics, etc. - * - * The parameters are typically: the original table name, the original field - * name, and a "type" string (such as "seq" or "pkey"). The field name - * and/or type can be NULL if not relevant. - * - * The result is a palloc'd string. - * - * The basic result we want is "name1_name2_label", omitting "_name2" or - * "_label" when those parameters are NULL. However, we must generate - * a name with less than NAMEDATALEN characters! So, we truncate one or - * both names if necessary to make a short-enough string. The label part - * is never truncated (so it had better be reasonably short). - * - * The caller is responsible for checking uniqueness of the generated - * name and retrying as needed; retrying will be done by altering the - * "label" string (which is why we never truncate that part). - */ -char * -makeObjectName(const char *name1, const char *name2, const char *label) -{ - char *name; - int overhead = 0; /* chars needed for label and underscores */ - int availchars; /* chars available for name(s) */ - int name1chars; /* chars allocated to name1 */ - int name2chars; /* chars allocated to name2 */ - int ndx; - - name1chars = strlen(name1); - if (name2) - { - name2chars = strlen(name2); - overhead++; /* allow for separating underscore */ - } - else - name2chars = 0; - if (label) - overhead += strlen(label) + 1; - - availchars = NAMEDATALEN - 1 - overhead; - Assert(availchars > 0); /* else caller chose a bad label */ - - /* - * If we must truncate, preferentially truncate the longer name. This - * logic could be expressed without a loop, but it's simple and obvious as - * a loop. - */ - while (name1chars + name2chars > availchars) - { - if (name1chars > name2chars) - name1chars--; - else - name2chars--; - } - - name1chars = pg_mbcliplen(name1, name1chars, name1chars); - if (name2) - name2chars = pg_mbcliplen(name2, name2chars, name2chars); - - /* Now construct the string using the chosen lengths */ - name = palloc(name1chars + name2chars + overhead + 1); - memcpy(name, name1, name1chars); - ndx = name1chars; - if (name2) - { - name[ndx++] = '_'; - memcpy(name + ndx, name2, name2chars); - ndx += name2chars; - } - if (label) - { - name[ndx++] = '_'; - strcpy(name + ndx, label); - } - else - name[ndx] = '\0'; - - return name; -} - /* * Select a nonconflicting name for a new relation. This is ordinarily * used to choose index names (which is why it's here) but it can also @@ -2652,7 +2569,8 @@ ChooseRelationName(const char *name1, const char *name2, SysScanDesc scan; bool collides; - relname = makeObjectName(name1, name2, modlabel); + relname = makeObjectName(name1, name2, modlabel, + GetDatabaseEncoding()); /* is there any conflicting relation name? */ ScanKeyInit(&key[0], diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c index c1da79f36ba..201e08d1654 100644 --- a/src/backend/commands/statscmds.c +++ b/src/backend/commands/statscmds.c @@ -27,6 +27,8 @@ #include "catalog/pg_statistic_ext_data.h" #include "commands/comment.h" #include "commands/defrem.h" +#include "common/string.h" +#include "mb/pg_wchar.h" #include "miscadmin.h" #include "nodes/nodeFuncs.h" #include "optimizer/optimizer.h" @@ -860,7 +862,8 @@ ChooseExtendedStatisticName(const char *name1, const char *name2, { Oid existingstats; - stxname = makeObjectName(name1, name2, modlabel); + stxname = makeObjectName(name1, name2, modlabel, + GetDatabaseEncoding()); existingstats = GetSysCacheOid2(STATEXTNAMENSP, Anum_pg_statistic_ext_oid, PointerGetDatum(stxname), diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 78f4d5e202c..116b150bbf8 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -1214,37 +1214,6 @@ pg_mbcliplen(const char *mbstr, int len, int limit) len, limit); } -/* - * pg_mbcliplen with specified encoding; string must be valid in encoding - */ -int -pg_encoding_mbcliplen(int encoding, const char *mbstr, - int len, int limit) -{ - mblen_converter mblen_fn; - int clen = 0; - int l; - - /* optimization for single byte encoding */ - if (pg_encoding_max_length(encoding) == 1) - return cliplen(mbstr, len, limit); - - mblen_fn = pg_wchar_table[encoding].mblen; - - while (len > 0 && *mbstr) - { - l = (*mblen_fn) ((const unsigned char *) mbstr); - if ((clen + l) > limit) - break; - clen += l; - if (clen == limit) - break; - len -= l; - mbstr += l; - } - return clen; -} - /* * Similar to pg_mbcliplen except the limit parameter specifies the * character length, not the byte length. diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index fda912ba0a9..4d1e4635d5f 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -242,6 +242,9 @@ typedef struct Archive /* info needed for string escaping */ int encoding; /* libpq code for client_encoding */ + int server_encoding; /* libpq code for server encoding; + * distinct from encoding (client + * encoding) */ bool std_strings; /* standard_conforming_strings */ /* other important stuff */ diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index b41a3ae3db4..b344d2e1437 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -60,6 +60,8 @@ #include "common/int.h" #include "common/relpath.h" #include "common/shortest_dec.h" +#include "common/string.h" +#include "mb/pg_wchar.h" #include "compress_io.h" #include "dumputils.h" #include "fe_utils/option_utils.h" @@ -1436,6 +1438,20 @@ setup_connection(Archive *AH, const char *dumpencoding, AH->encoding = PQclientEncoding(conn); setFmtEncoding(AH->encoding); + /* + * Get the server encoding so we can replicate the server's object name + * truncation logic (which uses server encoding, not client encoding). + */ + { + const char *senc = PQparameterStatus(conn, "server_encoding"); + + if (senc == NULL) + pg_fatal("could not get server encoding"); + AH->server_encoding = pg_char_to_encoding(senc); + if (AH->server_encoding < 0) + pg_fatal("unrecognized server encoding \"%s\"", senc); + } + /* * Set the role if requested. In a parallel dump worker, we'll be passed * use_role == NULL, but AH->use_role is already set (if user specified it @@ -10208,9 +10224,14 @@ determineNotNullFlags(Archive *fout, PGresult *res, int r, { char *default_name; - /* XXX should match ChooseConstraintName better */ - default_name = psprintf("%s_%s_not_null", tbinfo->dobj.name, - tbinfo->attnames[j]); + /* + * Use makeObjectName to match ChooseConstraintName's + * truncation + */ + default_name = makeObjectName(tbinfo->dobj.name, + tbinfo->attnames[j], + "not_null", + fout->server_encoding); if (strcmp(default_name, PQgetvalue(res, r, i_notnull_name)) == 0) tbinfo->notnull_constrs[j] = ""; @@ -12903,8 +12924,13 @@ dumpDomain(Archive *fout, const TypeInfo *tyinfo) { char *default_name; - /* XXX should match ChooseConstraintName better */ - default_name = psprintf("%s_not_null", tyinfo->dobj.name); + /* + * Use makeObjectName to match ChooseConstraintName's + * truncation + */ + default_name = makeObjectName(tyinfo->dobj.name, NULL, + "not_null", + fout->server_encoding); if (strcmp(default_name, notnull->dobj.name) == 0) appendPQExpBufferStr(q, " NOT NULL"); diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index 051a3d8ea3d..8d0b0ed3df6 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -2293,6 +2293,44 @@ my %tests = ( }, }, + # Test that pg_dump uses makeObjectName truncation logic when deciding + # whether to emit CONSTRAINT for an auto-generated NOT NULL constraint name. + # A 55-char domain name causes makeObjectName to truncate to 54 chars + # before appending "_not_null", so pg_dump must apply the same truncation. + 'CREATE DOMAIN dump_test.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' => { + create_order => 101, + create_sql => + 'CREATE DOMAIN dump_test.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa AS integer NOT NULL;', + regexp => qr/^ + \QCREATE DOMAIN dump_test.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa AS integer NOT NULL;\E + /xm, + like => + { %full_runs, %dump_test_schema_runs, section_pre_data => 1, }, + unlike => { + exclude_dump_test_schema => 1, + only_dump_measurement => 1, + }, + }, + + # A 27-char table name + 27-char column name (total 54 > 53) causes + # makeObjectName to truncate, so pg_dump must apply the same truncation + # when deciding whether to emit CONSTRAINT for a column NOT NULL. + 'CREATE TABLE dump_test.ttttttttttttttttttttttttttt' => { + create_order => 102, + create_sql => + 'CREATE TABLE dump_test.ttttttttttttttttttttttttttt (ttttttttttttttttttttttttttt integer NOT NULL);', + regexp => qr/^ + \QCREATE TABLE dump_test.ttttttttttttttttttttttttttt (\E\n + \s+\Qttttttttttttttttttttttttttt integer NOT NULL\E + /xm, + like => + { %full_runs, %dump_test_schema_runs, section_pre_data => 1, }, + unlike => { + exclude_dump_test_schema => 1, + only_dump_measurement => 1, + }, + }, + 'CREATE FUNCTION dump_test.pltestlang_call_handler' => { create_order => 17, create_sql => 'CREATE FUNCTION dump_test.pltestlang_call_handler() diff --git a/src/common/Makefile b/src/common/Makefile index 2c720caa509..15b48b72016 100644 --- a/src/common/Makefile +++ b/src/common/Makefile @@ -65,6 +65,7 @@ OBJS_COMMON = \ kwlookup.o \ link-canary.o \ md5_common.o \ + objectname.o \ parse_manifest.o \ percentrepl.o \ pg_get_line.o \ diff --git a/src/common/meson.build b/src/common/meson.build index 4f9b8b8263d..81064a8b476 100644 --- a/src/common/meson.build +++ b/src/common/meson.build @@ -19,6 +19,7 @@ common_sources = files( 'kwlookup.c', 'link-canary.c', 'md5_common.c', + 'objectname.c', 'parse_manifest.c', 'percentrepl.c', 'pg_get_line.c', diff --git a/src/common/objectname.c b/src/common/objectname.c new file mode 100644 index 00000000000..f6ecde13457 --- /dev/null +++ b/src/common/objectname.c @@ -0,0 +1,112 @@ +/*------------------------------------------------------------------------- + * + * objectname.c + * helper for generating names of implicitly created objects + * + * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/common/objectname.c + * + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "common/string.h" +#include "mb/pg_wchar.h" + +/* + * makeObjectName() + * + * Create a name for an implicitly created index, sequence, constraint, + * extended statistics, etc. + * + * The parameters are typically: the original table name, the original field + * name, and a "type" string (such as "seq" or "pkey"). The field name + * and/or type can be NULL if not relevant. + * + * The result is a palloc'd string (pg_malloc'd in frontend code). + * + * The basic result we want is "name1_name2_label", omitting "_name2" or + * "_label" when those parameters are NULL. However, we must generate + * a name with less than NAMEDATALEN characters! So, we truncate one or + * both names if necessary to make a short-enough string. The label part + * is never truncated (so it had better be reasonably short). + * + * The caller is responsible for checking uniqueness of the generated + * name and retrying as needed; retrying will be done by altering the + * "label" string (which is why we never truncate that part). + * + * The encoding parameter is used to avoid splitting multibyte characters + * when truncating. Backend callers should pass GetDatabaseEncoding(); + * frontend callers should pass the server encoding. + */ +char * +makeObjectName(const char *name1, const char *name2, const char *label, + int encoding) +{ + char *name; + int overhead = 0; /* chars needed for label and underscores */ + int availchars; /* chars available for name(s) */ + int name1chars; /* chars allocated to name1 */ + int name2chars; /* chars allocated to name2 */ + int ndx; + + name1chars = strlen(name1); + if (name2) + { + name2chars = strlen(name2); + overhead++; /* allow for separating underscore */ + } + else + name2chars = 0; + if (label) + overhead += strlen(label) + 1; + + availchars = NAMEDATALEN - 1 - overhead; + Assert(availchars > 0); /* else caller chose a bad label */ + + /* + * If we must truncate, preferentially truncate the longer name. This + * logic could be expressed without a loop, but it's simple and obvious as + * a loop. + */ + while (name1chars + name2chars > availchars) + { + if (name1chars > name2chars) + name1chars--; + else + name2chars--; + } + + name1chars = pg_encoding_mbcliplen(encoding, name1, name1chars, name1chars); + if (name2) + name2chars = pg_encoding_mbcliplen(encoding, name2, name2chars, name2chars); + + /* Now construct the string using the chosen lengths */ + name = palloc(name1chars + name2chars + overhead + 1); + memcpy(name, name1, name1chars); + ndx = name1chars; + if (name2) + { + name[ndx++] = '_'; + memcpy(name + ndx, name2, name2chars); + ndx += name2chars; + } + if (label) + { + name[ndx++] = '_'; + strcpy(name + ndx, label); + } + else + name[ndx] = '\0'; + + return name; +} diff --git a/src/common/wchar.c b/src/common/wchar.c index e7b6595b042..ef683e789d7 100644 --- a/src/common/wchar.c +++ b/src/common/wchar.c @@ -2244,3 +2244,56 @@ pg_encoding_max_length(int encoding) pg_wchar_table[encoding].maxmblen : pg_wchar_table[PG_SQL_ASCII].maxmblen; } + +/* + * cliplen for any single-byte encoding. + * + * Note: a copy of this function exists in mbutils.c, used by + * pg_mbcharcliplen(). They are kept separate because pg_mbcharcliplen + * is backend-only while pg_encoding_mbcliplen is in src/common. + */ +static int +cliplen(const char *str, int len, int limit) +{ + int l = 0; + + len = Min(len, limit); + while (l < len && str[l]) + l++; + return l; +} + +/* + * pg_encoding_mbcliplen -- return the byte length of a multibyte string + * (not necessarily NULL terminated) that is no longer than limit bytes. + * This function does not break multibyte character boundaries. + * + * The string must be valid in the specified encoding. + */ +int +pg_encoding_mbcliplen(int encoding, const char *mbstr, + int len, int limit) +{ + mblen_converter mblen_fn; + int clen = 0; + int l; + + /* optimization for single byte encoding */ + if (pg_encoding_max_length(encoding) == 1) + return cliplen(mbstr, len, limit); + + mblen_fn = pg_wchar_table[encoding].mblen; + + while (len > 0 && *mbstr) + { + l = (*mblen_fn) ((const unsigned char *) mbstr); + if ((clen + l) > limit) + break; + clen += l; + if (clen == limit) + break; + len -= l; + mbstr += l; + } + return clen; +} diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h index 8f4a2d9bbc1..554c01504ec 100644 --- a/src/include/commands/defrem.h +++ b/src/include/commands/defrem.h @@ -38,8 +38,6 @@ extern ObjectAddress DefineIndex(ParseState *pstate, bool skip_build, bool quiet); extern void ExecReindex(ParseState *pstate, const ReindexStmt *stmt, bool isTopLevel); -extern char *makeObjectName(const char *name1, const char *name2, - const char *label); extern char *ChooseRelationName(const char *name1, const char *name2, const char *label, Oid namespaceid, bool isconstraint); diff --git a/src/include/common/string.h b/src/include/common/string.h index 2a7c31ea74e..9d3963599fc 100644 --- a/src/include/common/string.h +++ b/src/include/common/string.h @@ -23,6 +23,10 @@ typedef struct PromptInterruptContext bool canceled; /* indicates whether cancellation occurred */ } PromptInterruptContext; +/* functions in src/common/objectname.c */ +extern char *makeObjectName(const char *name1, const char *name2, + const char *label, int encoding); + /* functions in src/common/string.c */ extern bool pg_str_endswith(const char *str, const char *end); extern int strtoint(const char *pg_restrict str, char **pg_restrict endptr, -- 2.52.0