From 216bafaf9b0d081bdc670d64d162de75d5b9155c Mon Sep 17 00:00:00 2001 From: shruthikc-gowda Date: Fri, 20 Aug 2021 19:26:12 +0530 Subject: [PATCH v2] Preserve relfilenode and tablespace OID in pg_upgrade The patch aims to preserve the OIDs of relfilenode and tablespace during binary upgrade so that the OIDs are same across old and new cluster. Author: Shruthi KC, based on an earlier patch from Antonin Houska Discussion: https://www.postgresql.org/message-id/7082.1562337694@localhost --- src/backend/catalog/heap.c | 36 +++++++++ src/backend/catalog/index.c | 1 + src/backend/commands/tablespace.c | 14 +++- src/backend/utils/adt/pg_upgrade_support.c | 44 ++++++++++ src/bin/pg_dump/pg_dump.c | 94 ++++++++++++++-------- src/bin/pg_dump/pg_dumpall.c | 3 + src/bin/pg_upgrade/pg_upgrade.c | 13 +-- src/include/catalog/binary_upgrade.h | 5 ++ src/include/catalog/pg_proc.dat | 16 ++++ .../spgist_name_ops/expected/spgist_name_ops.out | 12 ++- 10 files changed, 195 insertions(+), 43 deletions(-) diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 83746d3..f3c99f6 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -91,7 +91,9 @@ /* Potentially set by pg_upgrade_support functions */ Oid binary_upgrade_next_heap_pg_class_oid = InvalidOid; +Oid binary_upgrade_next_heap_pg_class_relfilenode = InvalidOid; Oid binary_upgrade_next_toast_pg_class_oid = InvalidOid; +Oid binary_upgrade_next_toast_pg_class_relfilenode = InvalidOid; static void AddNewRelationTuple(Relation pg_class_desc, Relation new_rel_desc, @@ -379,6 +381,40 @@ heap_create(const char *relname, relfilenode = relid; } + if (IsBinaryUpgrade) + { + /* Override relfilenode? */ + if (relkind == RELKIND_RELATION || relkind == RELKIND_SEQUENCE || + relkind == RELKIND_MATVIEW) + { + if (!OidIsValid(binary_upgrade_next_heap_pg_class_relfilenode)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("relfilenode value not set when in binary upgrade mode"))); + + relfilenode = binary_upgrade_next_heap_pg_class_relfilenode; + binary_upgrade_next_heap_pg_class_relfilenode = InvalidOid; + } + else if (relkind == RELKIND_TOASTVALUE) + { + if (!OidIsValid(binary_upgrade_next_toast_pg_class_relfilenode)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("toast relfilenode value not set when in binary upgrade mode"))); + relfilenode = binary_upgrade_next_toast_pg_class_relfilenode; + binary_upgrade_next_toast_pg_class_relfilenode = InvalidOid; + } + else if (relkind == RELKIND_INDEX) + { + if (!OidIsValid(binary_upgrade_next_index_pg_class_relfilenode)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("pg_class index relfilenode value not set when in binary upgrade mode"))); + relfilenode = binary_upgrade_next_index_pg_class_relfilenode; + binary_upgrade_next_index_pg_class_relfilenode = InvalidOid; + } + } + /* * Never allow a pg_class entry to explicitly specify the database's * default tablespace in reltablespace; force it to zero instead. This diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 26bfa74..a124617 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -86,6 +86,7 @@ /* Potentially set by pg_upgrade_support functions */ Oid binary_upgrade_next_index_pg_class_oid = InvalidOid; +Oid binary_upgrade_next_index_pg_class_relfilenode = InvalidOid; /* * Pointer-free representation of variables used when reindexing system diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index a54239a..34e5746 100644 --- a/src/backend/commands/tablespace.c +++ b/src/backend/commands/tablespace.c @@ -88,6 +88,7 @@ char *default_tablespace = NULL; char *temp_tablespaces = NULL; +Oid binary_upgrade_next_pg_tablespace_oid = InvalidOid; static void create_tablespace_directories(const char *location, const Oid tablespaceoid); @@ -335,7 +336,18 @@ CreateTableSpace(CreateTableSpaceStmt *stmt) MemSet(nulls, false, sizeof(nulls)); - tablespaceoid = GetNewOidWithIndex(rel, TablespaceOidIndexId, + if (IsBinaryUpgrade) + { + if (!OidIsValid(binary_upgrade_next_pg_tablespace_oid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("pg_tablespace OID value not set when in binary upgrade mode"))); + + tablespaceoid = binary_upgrade_next_pg_tablespace_oid; + binary_upgrade_next_pg_tablespace_oid = InvalidOid; + } + else + tablespaceoid = GetNewOidWithIndex(rel, TablespaceOidIndexId, Anum_pg_tablespace_oid); values[Anum_pg_tablespace_oid - 1] = ObjectIdGetDatum(tablespaceoid); values[Anum_pg_tablespace_spcname - 1] = diff --git a/src/backend/utils/adt/pg_upgrade_support.c b/src/backend/utils/adt/pg_upgrade_support.c index b5b46d7..d4dc284 100644 --- a/src/backend/utils/adt/pg_upgrade_support.c +++ b/src/backend/utils/adt/pg_upgrade_support.c @@ -30,6 +30,17 @@ do { \ } while (0) Datum +binary_upgrade_set_next_pg_tablespace_oid(PG_FUNCTION_ARGS) +{ + Oid tbspoid = PG_GETARG_OID(0); + + CHECK_IS_BINARY_UPGRADE; + binary_upgrade_next_pg_tablespace_oid = tbspoid; + + PG_RETURN_VOID(); +} + +Datum binary_upgrade_set_next_pg_type_oid(PG_FUNCTION_ARGS) { Oid typoid = PG_GETARG_OID(0); @@ -85,6 +96,17 @@ binary_upgrade_set_next_heap_pg_class_oid(PG_FUNCTION_ARGS) } Datum +binary_upgrade_set_next_heap_pg_class_relfilenode(PG_FUNCTION_ARGS) +{ + Oid nodeoid = PG_GETARG_OID(0); + + CHECK_IS_BINARY_UPGRADE; + binary_upgrade_next_heap_pg_class_relfilenode = nodeoid; + + PG_RETURN_VOID(); +} + +Datum binary_upgrade_set_next_index_pg_class_oid(PG_FUNCTION_ARGS) { Oid reloid = PG_GETARG_OID(0); @@ -96,6 +118,17 @@ binary_upgrade_set_next_index_pg_class_oid(PG_FUNCTION_ARGS) } Datum +binary_upgrade_set_next_index_pg_class_relfilenode(PG_FUNCTION_ARGS) +{ + Oid nodeoid = PG_GETARG_OID(0); + + CHECK_IS_BINARY_UPGRADE; + binary_upgrade_next_index_pg_class_relfilenode = nodeoid; + + PG_RETURN_VOID(); +} + +Datum binary_upgrade_set_next_toast_pg_class_oid(PG_FUNCTION_ARGS) { Oid reloid = PG_GETARG_OID(0); @@ -107,6 +140,17 @@ binary_upgrade_set_next_toast_pg_class_oid(PG_FUNCTION_ARGS) } Datum +binary_upgrade_set_next_toast_pg_class_relfilenode(PG_FUNCTION_ARGS) +{ + Oid nodeoid = PG_GETARG_OID(0); + + CHECK_IS_BINARY_UPGRADE; + binary_upgrade_next_toast_pg_class_relfilenode = nodeoid; + + PG_RETURN_VOID(); +} + +Datum binary_upgrade_set_next_pg_enum_oid(PG_FUNCTION_ARGS) { Oid enumoid = PG_GETARG_OID(0); diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 90ac445..6ced774 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -4700,48 +4700,63 @@ binary_upgrade_set_pg_class_oids(Archive *fout, PQExpBuffer upgrade_buffer, Oid pg_class_oid, bool is_index) { + PQExpBuffer upgrade_query = createPQExpBuffer(); + PGresult *upgrade_res; + Oid pg_class_relfilenode; + Oid pg_class_reltoastrelid; + Oid pg_class_relfilenode_toast; + char pg_class_relkind; + Oid pg_index_indexrelid; + Oid pg_class_relfilenode_toast_idx; + + /* + * Preserve the OIDs of the table's toast table and index, if any. + * + * One complexity is that the current table definition might not require + * the creation of a TOAST table, but the old database might have a TOAST + * table that was created earlier, before some wide columns were dropped. + * By setting the TOAST oid we force creation of the TOAST heap and index + * by the new backend, so we can copy the files during binary upgrade + * without worrying about this case. + */ + appendPQExpBuffer(upgrade_query, + "SELECT c.relfilenode, c.relkind, c.reltoastrelid, ct.relfilenode AS relfilenode_toast, i.indexrelid, cti.relfilenode AS relfilenode_toast_index " + "FROM pg_catalog.pg_class c LEFT JOIN " + "pg_catalog.pg_index i ON (c.reltoastrelid = i.indrelid AND i.indisvalid) " + "LEFT JOIN pg_catalog.pg_class ct ON (c.reltoastrelid = ct.oid) " + "LEFT JOIN pg_catalog.pg_class AS cti ON (i.indexrelid = cti.oid) " + "WHERE c.oid = '%u'::pg_catalog.oid;", + pg_class_oid); + + upgrade_res = ExecuteSqlQueryForSingleRow(fout, upgrade_query->data); + + pg_class_relfilenode = atooid(PQgetvalue(upgrade_res, 0, + PQfnumber(upgrade_res, "relfilenode"))); + pg_class_reltoastrelid = atooid(PQgetvalue(upgrade_res, 0, + PQfnumber(upgrade_res, "reltoastrelid"))); + pg_class_relfilenode_toast = atooid(PQgetvalue(upgrade_res, 0, + PQfnumber(upgrade_res, "relfilenode_toast"))); + pg_class_relkind = *PQgetvalue(upgrade_res, 0, + PQfnumber(upgrade_res, "relkind")); + pg_index_indexrelid = atooid(PQgetvalue(upgrade_res, 0, + PQfnumber(upgrade_res, "indexrelid"))); + pg_class_relfilenode_toast_idx = atooid(PQgetvalue(upgrade_res, 0, + PQfnumber(upgrade_res, "relfilenode_toast_index"))); + appendPQExpBufferStr(upgrade_buffer, - "\n-- For binary upgrade, must preserve pg_class oids\n"); + "\n-- For binary upgrade, must preserve pg_class oids and relfilenodes\n"); if (!is_index) { - PQExpBuffer upgrade_query = createPQExpBuffer(); - PGresult *upgrade_res; - Oid pg_class_reltoastrelid; - char pg_class_relkind; - Oid pg_index_indexrelid; - appendPQExpBuffer(upgrade_buffer, "SELECT pg_catalog.binary_upgrade_set_next_heap_pg_class_oid('%u'::pg_catalog.oid);\n", pg_class_oid); - /* - * Preserve the OIDs of the table's toast table and index, if any. - * Indexes cannot have toast tables, so we need not make this probe in - * the index code path. - * - * One complexity is that the current table definition might not - * require the creation of a TOAST table, but the old database might - * have a TOAST table that was created earlier, before some wide - * columns were dropped. By setting the TOAST oid we force creation - * of the TOAST heap and index by the new backend, so we can copy the - * files during binary upgrade without worrying about this case. - */ - appendPQExpBuffer(upgrade_query, - "SELECT c.reltoastrelid, c.relkind, i.indexrelid " - "FROM pg_catalog.pg_class c LEFT JOIN " - "pg_catalog.pg_index i ON (c.reltoastrelid = i.indrelid AND i.indisvalid) " - "WHERE c.oid = '%u'::pg_catalog.oid;", - pg_class_oid); - - upgrade_res = ExecuteSqlQueryForSingleRow(fout, upgrade_query->data); - - pg_class_reltoastrelid = atooid(PQgetvalue(upgrade_res, 0, - PQfnumber(upgrade_res, "reltoastrelid"))); - pg_class_relkind = *PQgetvalue(upgrade_res, 0, - PQfnumber(upgrade_res, "relkind")); - pg_index_indexrelid = atooid(PQgetvalue(upgrade_res, 0, - PQfnumber(upgrade_res, "indexrelid"))); + /* Not every relation has storage. */ + if (OidIsValid(pg_class_relfilenode)) + appendPQExpBuffer(upgrade_buffer, + "SELECT pg_catalog.binary_upgrade_set_next_heap_pg_class_relfilenode('%u'::pg_catalog.oid);\n", + pg_class_relfilenode); /* * In a pre-v12 database, partitioned tables might be marked as having @@ -4753,20 +4768,31 @@ binary_upgrade_set_pg_class_oids(Archive *fout, appendPQExpBuffer(upgrade_buffer, "SELECT pg_catalog.binary_upgrade_set_next_toast_pg_class_oid('%u'::pg_catalog.oid);\n", pg_class_reltoastrelid); + appendPQExpBuffer(upgrade_buffer, + "SELECT pg_catalog.binary_upgrade_set_next_toast_pg_class_relfilenode('%u'::pg_catalog.oid);\n", + pg_class_relfilenode_toast); /* every toast table has an index */ appendPQExpBuffer(upgrade_buffer, "SELECT pg_catalog.binary_upgrade_set_next_index_pg_class_oid('%u'::pg_catalog.oid);\n", pg_index_indexrelid); + appendPQExpBuffer(upgrade_buffer, + "SELECT pg_catalog.binary_upgrade_set_next_index_pg_class_relfilenode('%u'::pg_catalog.oid);\n", + pg_class_relfilenode_toast_idx); } PQclear(upgrade_res); destroyPQExpBuffer(upgrade_query); } else + { appendPQExpBuffer(upgrade_buffer, "SELECT pg_catalog.binary_upgrade_set_next_index_pg_class_oid('%u'::pg_catalog.oid);\n", pg_class_oid); + appendPQExpBuffer(upgrade_buffer, + "SELECT pg_catalog.binary_upgrade_set_next_index_pg_class_relfilenode('%u'::pg_catalog.oid);\n", + pg_class_relfilenode); + } appendPQExpBufferChar(upgrade_buffer, '\n'); } diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index c291017..618187c 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -1265,6 +1265,9 @@ dumpTablespaces(PGconn *conn) /* needed for buildACLCommands() */ fspcname = pg_strdup(fmtId(spcname)); + appendPQExpBufferStr(buf, "\n-- For binary upgrade, must preserve pg_tablespace oid\n"); + appendPQExpBuffer(buf, "SELECT pg_catalog.binary_upgrade_set_next_pg_tablespace_oid('%u'::pg_catalog.oid);\n", spcoid); + appendPQExpBuffer(buf, "CREATE TABLESPACE %s", fspcname); appendPQExpBuffer(buf, " OWNER %s", fmtId(spcowner)); diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index 3628bd7..acaf343 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -15,12 +15,13 @@ * oids are the same between old and new clusters. This is important * because toast oids are stored as toast pointers in user tables. * - * While pg_class.oid and pg_class.relfilenode are initially the same - * in a cluster, they can diverge due to CLUSTER, REINDEX, or VACUUM - * FULL. In the new cluster, pg_class.oid and pg_class.relfilenode will - * be the same and will match the old pg_class.oid value. Because of - * this, old/new pg_class.relfilenode values will not match if CLUSTER, - * REINDEX, or VACUUM FULL have been performed in the old cluster. + * While pg_class.oid and pg_class.relfilenode are initially the same in a + * cluster, they can diverge due to CLUSTER, REINDEX, or VACUUM FULL. We + * control assignments of pg_class.relfilenode because we want the filenames + * to match between the old and new cluster. + * + * We control assignment of pg_tablespace.oid because we want the oid to match + * between the old and new cluster. * * We control all assignments of pg_type.oid because these oids are stored * in user composite type values. diff --git a/src/include/catalog/binary_upgrade.h b/src/include/catalog/binary_upgrade.h index f6e82e7..4ba5748 100644 --- a/src/include/catalog/binary_upgrade.h +++ b/src/include/catalog/binary_upgrade.h @@ -14,14 +14,19 @@ #ifndef BINARY_UPGRADE_H #define BINARY_UPGRADE_H +extern PGDLLIMPORT Oid binary_upgrade_next_pg_tablespace_oid; + extern PGDLLIMPORT Oid binary_upgrade_next_pg_type_oid; extern PGDLLIMPORT Oid binary_upgrade_next_array_pg_type_oid; extern PGDLLIMPORT Oid binary_upgrade_next_mrng_pg_type_oid; extern PGDLLIMPORT Oid binary_upgrade_next_mrng_array_pg_type_oid; extern PGDLLIMPORT Oid binary_upgrade_next_heap_pg_class_oid; +extern PGDLLIMPORT Oid binary_upgrade_next_heap_pg_class_relfilenode; extern PGDLLIMPORT Oid binary_upgrade_next_index_pg_class_oid; +extern PGDLLIMPORT Oid binary_upgrade_next_index_pg_class_relfilenode; extern PGDLLIMPORT Oid binary_upgrade_next_toast_pg_class_oid; +extern PGDLLIMPORT Oid binary_upgrade_next_toast_pg_class_relfilenode; extern PGDLLIMPORT Oid binary_upgrade_next_pg_enum_oid; extern PGDLLIMPORT Oid binary_upgrade_next_pg_authid_oid; diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index b603700..0626525 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -11036,6 +11036,22 @@ proname => 'binary_upgrade_set_missing_value', provolatile => 'v', proparallel => 'u', prorettype => 'void', proargtypes => 'oid text text', prosrc => 'binary_upgrade_set_missing_value' }, +{ oid => '4544', descr => 'for use by pg_upgrade', + proname => 'binary_upgrade_set_next_heap_pg_class_relfilenode', provolatile => 'v', + proparallel => 'u', prorettype => 'void', proargtypes => 'oid', + prosrc => 'binary_upgrade_set_next_heap_pg_class_relfilenode' }, +{ oid => '4545', descr => 'for use by pg_upgrade', + proname => 'binary_upgrade_set_next_index_pg_class_relfilenode', provolatile => 'v', + proparallel => 'u', prorettype => 'void', proargtypes => 'oid', + prosrc => 'binary_upgrade_set_next_index_pg_class_relfilenode' }, +{ oid => '4546', descr => 'for use by pg_upgrade', + proname => 'binary_upgrade_set_next_toast_pg_class_relfilenode', provolatile => 'v', + proparallel => 'u', prorettype => 'void', proargtypes => 'oid', + prosrc => 'binary_upgrade_set_next_toast_pg_class_relfilenode' }, +{ oid => '4547', descr => 'for use by pg_upgrade', + proname => 'binary_upgrade_set_next_pg_tablespace_oid', provolatile => 'v', + proparallel => 'u', prorettype => 'void', proargtypes => 'oid', + prosrc => 'binary_upgrade_set_next_pg_tablespace_oid' }, # conversion functions { oid => '4302', diff --git a/src/test/modules/spgist_name_ops/expected/spgist_name_ops.out b/src/test/modules/spgist_name_ops/expected/spgist_name_ops.out index ac0ddce..8541b4a 100644 --- a/src/test/modules/spgist_name_ops/expected/spgist_name_ops.out +++ b/src/test/modules/spgist_name_ops/expected/spgist_name_ops.out @@ -52,14 +52,18 @@ select * from t ------------------------------------------------------+----+------------------------------------------------------ binary_upgrade_set_next_array_pg_type_oid | | binary_upgrade_set_next_array_pg_type_oid binary_upgrade_set_next_heap_pg_class_oid | | binary_upgrade_set_next_heap_pg_class_oid + binary_upgrade_set_next_heap_pg_class_relfilenode | | binary_upgrade_set_next_heap_pg_class_relfilenode binary_upgrade_set_next_index_pg_class_oid | 1 | binary_upgrade_set_next_index_pg_class_oid + binary_upgrade_set_next_index_pg_class_relfilenode | 1 | binary_upgrade_set_next_index_pg_class_relfilenode binary_upgrade_set_next_multirange_array_pg_type_oid | 1 | binary_upgrade_set_next_multirange_array_pg_type_oid binary_upgrade_set_next_multirange_pg_type_oid | 1 | binary_upgrade_set_next_multirange_pg_type_oid binary_upgrade_set_next_pg_authid_oid | | binary_upgrade_set_next_pg_authid_oid binary_upgrade_set_next_pg_enum_oid | | binary_upgrade_set_next_pg_enum_oid + binary_upgrade_set_next_pg_tablespace_oid | | binary_upgrade_set_next_pg_tablespace_oid binary_upgrade_set_next_pg_type_oid | | binary_upgrade_set_next_pg_type_oid binary_upgrade_set_next_toast_pg_class_oid | 1 | binary_upgrade_set_next_toast_pg_class_oid -(9 rows) + binary_upgrade_set_next_toast_pg_class_relfilenode | 1 | binary_upgrade_set_next_toast_pg_class_relfilenode +(13 rows) -- Verify clean failure when INCLUDE'd columns result in overlength tuple -- The error message details are platform-dependent, so show only SQLSTATE @@ -97,14 +101,18 @@ select * from t ------------------------------------------------------+----+------------------------------------------------------ binary_upgrade_set_next_array_pg_type_oid | | binary_upgrade_set_next_array_pg_type_oid binary_upgrade_set_next_heap_pg_class_oid | | binary_upgrade_set_next_heap_pg_class_oid + binary_upgrade_set_next_heap_pg_class_relfilenode | | binary_upgrade_set_next_heap_pg_class_relfilenode binary_upgrade_set_next_index_pg_class_oid | 1 | binary_upgrade_set_next_index_pg_class_oid + binary_upgrade_set_next_index_pg_class_relfilenode | 1 | binary_upgrade_set_next_index_pg_class_relfilenode binary_upgrade_set_next_multirange_array_pg_type_oid | 1 | binary_upgrade_set_next_multirange_array_pg_type_oid binary_upgrade_set_next_multirange_pg_type_oid | 1 | binary_upgrade_set_next_multirange_pg_type_oid binary_upgrade_set_next_pg_authid_oid | | binary_upgrade_set_next_pg_authid_oid binary_upgrade_set_next_pg_enum_oid | | binary_upgrade_set_next_pg_enum_oid + binary_upgrade_set_next_pg_tablespace_oid | | binary_upgrade_set_next_pg_tablespace_oid binary_upgrade_set_next_pg_type_oid | | binary_upgrade_set_next_pg_type_oid binary_upgrade_set_next_toast_pg_class_oid | 1 | binary_upgrade_set_next_toast_pg_class_oid -(9 rows) + binary_upgrade_set_next_toast_pg_class_relfilenode | 1 | binary_upgrade_set_next_toast_pg_class_relfilenode +(13 rows) \set VERBOSITY sqlstate insert into t values(repeat('xyzzy', 12), 42, repeat('xyzzy', 4000)); -- 1.8.3.1