From d45ce69e9d15e5da36ca651d7c6ca46cd84399f2 Mon Sep 17 00:00:00 2001 From: "Andrey M. Borodin" Date: Sat, 23 Jul 2022 14:08:10 +0500 Subject: [PATCH v24 1/4] Refactor amcheck to extract common locking routines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Other indexes will need to do same precautions before doing checks: - ensuring index is checkable - switching user context - taking care about GUCs changed by index functions To reuse existing functionality this commit moves it to amcheck.c. Author: Andrey Borodin Reviewed-By: José Villanova Reviewed-By: Aleksander Alekseev Reviewed-By: Nikolay Samokhvalov Reviewed-By: Andres Freund Discussion: https://postgr.es/m/45AC9B0A-2B45-40EE-B08F-BDCF5739D1E1%40yandex-team.ru --- contrib/amcheck/Makefile | 1 + contrib/amcheck/amcheck.c | 173 ++++++++++++++++++++++++ contrib/amcheck/amcheck.h | 30 ++++ contrib/amcheck/meson.build | 1 + contrib/amcheck/verify_nbtree.c | 233 +++++++------------------------- 5 files changed, 256 insertions(+), 182 deletions(-) create mode 100644 contrib/amcheck/amcheck.c create mode 100644 contrib/amcheck/amcheck.h diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile index b82f221e50..6d26551fe3 100644 --- a/contrib/amcheck/Makefile +++ b/contrib/amcheck/Makefile @@ -3,6 +3,7 @@ MODULE_big = amcheck OBJS = \ $(WIN32RES) \ + amcheck.o \ verify_heapam.o \ verify_nbtree.o diff --git a/contrib/amcheck/amcheck.c b/contrib/amcheck/amcheck.c new file mode 100644 index 0000000000..5a9c9429a3 --- /dev/null +++ b/contrib/amcheck/amcheck.c @@ -0,0 +1,173 @@ +/*------------------------------------------------------------------------- + * + * amcheck.c + * Utility functions common to all access methods. + * + * Copyright (c) 2017-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/amcheck/amcheck.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/table.h" +#include "access/tableam.h" +#include "amcheck.h" +#include "catalog/index.h" +#include "commands/tablecmds.h" +#include "utils/guc.h" + +static bool amcheck_index_mainfork_expected(Relation rel); + + +/* + * Check if index relation should have a file for its main relation fork. + * Verification uses this to skip unlogged indexes when in hot standby mode, + * where there is simply nothing to verify. + * + * NB: Caller should call index_checkable() before calling here. + */ +static bool +amcheck_index_mainfork_expected(Relation rel) +{ + if (rel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED || + !RecoveryInProgress()) + return true; + + ereport(NOTICE, + (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION), + errmsg("cannot verify unlogged index \"%s\" during recovery, skipping", + RelationGetRelationName(rel)))); + + return false; +} + +void +amcheck_lock_relation_and_check(Oid indrelid, + Oid am_id, + IndexDoCheckCallback check, + LOCKMODE lockmode, + void *state) +{ + Oid heapid; + Relation indrel; + Relation heaprel; + Oid save_userid; + int save_sec_context; + int save_nestlevel; + + /* + * We must lock table before index to avoid deadlocks. However, if the + * passed indrelid isn't an index then IndexGetRelation() will fail. + * Rather than emitting a not-very-helpful error message, postpone + * complaining, expecting that the is-it-an-index test below will fail. + * + * In hot standby mode this will raise an error when parentcheck is true. + */ + heapid = IndexGetRelation(indrelid, true); + if (OidIsValid(heapid)) + { + heaprel = table_open(heapid, lockmode); + + /* + * Switch to the table owner's userid, so that any index functions are + * run as that user. Also lock down security-restricted operations + * and arrange to make GUC variable changes local to this command. + */ + GetUserIdAndSecContext(&save_userid, &save_sec_context); + SetUserIdAndSecContext(heaprel->rd_rel->relowner, + save_sec_context | SECURITY_RESTRICTED_OPERATION); + save_nestlevel = NewGUCNestLevel(); + } + else + { + heaprel = NULL; + /* Set these just to suppress "uninitialized variable" warnings */ + save_userid = InvalidOid; + save_sec_context = -1; + save_nestlevel = -1; + } + + /* + * Open the target index relations separately (like relation_openrv(), but + * with heap relation locked first to prevent deadlocking). In hot + * standby mode this will raise an error when parentcheck is true. + * + * There is no need for the usual indcheckxmin usability horizon test + * here, even in the heapallindexed case, because index undergoing + * verification only needs to have entries for a new transaction snapshot. + * (If this is a parentcheck verification, there is no question about + * committed or recently dead heap tuples lacking index entries due to + * concurrent activity.) + */ + indrel = index_open(indrelid, lockmode); + + /* + * Since we did the IndexGetRelation call above without any lock, it's + * barely possible that a race against an index drop/recreation could have + * netted us the wrong table. + */ + if (heaprel == NULL || heapid != IndexGetRelation(indrelid, false)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("could not open parent table of index \"%s\"", + RelationGetRelationName(indrel)))); + + /* Relation suitable for checking */ + index_checkable(indrel, am_id); + + if (amcheck_index_mainfork_expected(indrel)) + check(indrel, heaprel, state); + + /* Roll back any GUC changes executed by index functions */ + AtEOXact_GUC(false, save_nestlevel); + + /* Restore userid and security context */ + SetUserIdAndSecContext(save_userid, save_sec_context); + + /* + * Release locks early. That's ok here because nothing in the called + * routines will trigger shared cache invalidations to be sent, so we can + * relax the usual pattern of only releasing locks after commit. + */ + index_close(indrel, lockmode); + if (heaprel) + table_close(heaprel, lockmode); +} + +/* + * Basic checks about the suitability of a relation for checking as an index. + * + * + * NB: Intentionally not checking permissions, the function is normally not + * callable by non-superusers. If granted, it's useful to be able to check a + * whole cluster. + */ +void +index_checkable(Relation rel, Oid am_id) +{ + if (rel->rd_rel->relkind != RELKIND_INDEX || + rel->rd_rel->relam != am_id) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("only B-Tree indexes are supported as targets for verification"), + errdetail("Relation \"%s\" is not a B-Tree index.", + RelationGetRelationName(rel)))); + + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary tables of other sessions"), + errdetail("Index \"%s\" is associated with temporary relation.", + RelationGetRelationName(rel)))); + + if (!rel->rd_index->indisvalid) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot check index \"%s\"", + RelationGetRelationName(rel)), + errdetail("Index is not valid."))); +} diff --git a/contrib/amcheck/amcheck.h b/contrib/amcheck/amcheck.h new file mode 100644 index 0000000000..b139da067a --- /dev/null +++ b/contrib/amcheck/amcheck.h @@ -0,0 +1,30 @@ +/*------------------------------------------------------------------------- + * + * amcheck.h + * Shared routines for amcheck verifications. + * + * Copyright (c) 2017-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/amcheck/amcheck.h + * + *------------------------------------------------------------------------- + */ +#include "storage/bufpage.h" +#include "storage/lmgr.h" +#include "storage/lockdefs.h" +#include "utils/relcache.h" +#include "miscadmin.h" + +/* Typedefs for callback functions for amcheck_lock_relation */ +typedef void (*IndexCheckableCallback) (Relation index); +typedef void (*IndexDoCheckCallback) (Relation rel, + Relation heaprel, + void *state); + +extern void amcheck_lock_relation_and_check(Oid indrelid, + Oid am_id, + IndexDoCheckCallback check, + LOCKMODE lockmode, void *state); + +extern void index_checkable(Relation rel, Oid am_id); diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build index 5b55cf343a..cd81cbf3bc 100644 --- a/contrib/amcheck/meson.build +++ b/contrib/amcheck/meson.build @@ -1,6 +1,7 @@ # Copyright (c) 2022-2023, PostgreSQL Global Development Group amcheck_sources = files( + 'amcheck.c', 'verify_heapam.c', 'verify_nbtree.c', ) diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index 257cff671b..c2ae2cb011 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -29,13 +29,12 @@ #include "access/tableam.h" #include "access/transam.h" #include "access/xact.h" +#include "amcheck.h" #include "catalog/index.h" #include "catalog/pg_am.h" #include "commands/tablecmds.h" #include "common/pg_prng.h" #include "lib/bloomfilter.h" -#include "miscadmin.h" -#include "storage/lmgr.h" #include "storage/smgr.h" #include "utils/guc.h" #include "utils/memutils.h" @@ -135,13 +134,19 @@ typedef struct BtreeLevel bool istruerootlevel; } BtreeLevel; +typedef struct BTCallbackState +{ + bool parentcheck; + bool heapallindexed; + bool rootdescend; +} BTCallbackState; + + PG_FUNCTION_INFO_V1(bt_index_check); PG_FUNCTION_INFO_V1(bt_index_parent_check); -static void bt_index_check_internal(Oid indrelid, bool parentcheck, - bool heapallindexed, bool rootdescend); -static inline void btree_index_checkable(Relation rel); -static inline bool btree_index_mainfork_expected(Relation rel); +static void bt_index_check_callback(Relation indrel, Relation heaprel, + void *state); static void bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, bool readonly, bool heapallindexed, bool rootdescend); @@ -203,12 +208,18 @@ Datum bt_index_check(PG_FUNCTION_ARGS) { Oid indrelid = PG_GETARG_OID(0); - bool heapallindexed = false; + BTCallbackState args; - if (PG_NARGS() == 2) - heapallindexed = PG_GETARG_BOOL(1); + args.heapallindexed = false; + args.rootdescend = false; + args.parentcheck = false; - bt_index_check_internal(indrelid, false, heapallindexed, false); + if (PG_NARGS() >= 2) + args.heapallindexed = PG_GETARG_BOOL(1); + + amcheck_lock_relation_and_check(indrelid, BTREE_AM_OID, + bt_index_check_callback, + AccessShareLock, &args); PG_RETURN_VOID(); } @@ -226,15 +237,20 @@ Datum bt_index_parent_check(PG_FUNCTION_ARGS) { Oid indrelid = PG_GETARG_OID(0); - bool heapallindexed = false; - bool rootdescend = false; + BTCallbackState args; + + args.heapallindexed = false; + args.rootdescend = false; + args.parentcheck = true; if (PG_NARGS() >= 2) - heapallindexed = PG_GETARG_BOOL(1); + args.heapallindexed = PG_GETARG_BOOL(1); if (PG_NARGS() == 3) - rootdescend = PG_GETARG_BOOL(2); + args.rootdescend = PG_GETARG_BOOL(2); - bt_index_check_internal(indrelid, true, heapallindexed, rootdescend); + amcheck_lock_relation_and_check(indrelid, BTREE_AM_OID, + bt_index_check_callback, + ShareLock, &args); PG_RETURN_VOID(); } @@ -243,182 +259,35 @@ bt_index_parent_check(PG_FUNCTION_ARGS) * Helper for bt_index_[parent_]check, coordinating the bulk of the work. */ static void -bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed, - bool rootdescend) +bt_index_check_callback(Relation indrel, Relation heaprel, void *state) { - Oid heapid; - Relation indrel; - Relation heaprel; - LOCKMODE lockmode; - Oid save_userid; - int save_sec_context; - int save_nestlevel; - - if (parentcheck) - lockmode = ShareLock; - else - lockmode = AccessShareLock; - - /* - * We must lock table before index to avoid deadlocks. However, if the - * passed indrelid isn't an index then IndexGetRelation() will fail. - * Rather than emitting a not-very-helpful error message, postpone - * complaining, expecting that the is-it-an-index test below will fail. - * - * In hot standby mode this will raise an error when parentcheck is true. - */ - heapid = IndexGetRelation(indrelid, true); - if (OidIsValid(heapid)) - { - heaprel = table_open(heapid, lockmode); - - /* - * Switch to the table owner's userid, so that any index functions are - * run as that user. Also lock down security-restricted operations - * and arrange to make GUC variable changes local to this command. - */ - GetUserIdAndSecContext(&save_userid, &save_sec_context); - SetUserIdAndSecContext(heaprel->rd_rel->relowner, - save_sec_context | SECURITY_RESTRICTED_OPERATION); - save_nestlevel = NewGUCNestLevel(); - } - else - { - heaprel = NULL; - /* Set these just to suppress "uninitialized variable" warnings */ - save_userid = InvalidOid; - save_sec_context = -1; - save_nestlevel = -1; - } - - /* - * Open the target index relations separately (like relation_openrv(), but - * with heap relation locked first to prevent deadlocking). In hot - * standby mode this will raise an error when parentcheck is true. - * - * There is no need for the usual indcheckxmin usability horizon test - * here, even in the heapallindexed case, because index undergoing - * verification only needs to have entries for a new transaction snapshot. - * (If this is a parentcheck verification, there is no question about - * committed or recently dead heap tuples lacking index entries due to - * concurrent activity.) - */ - indrel = index_open(indrelid, lockmode); + BTCallbackState *args = (BTCallbackState *) state; + bool heapkeyspace, + allequalimage; - /* - * Since we did the IndexGetRelation call above without any lock, it's - * barely possible that a race against an index drop/recreation could have - * netted us the wrong table. - */ - if (heaprel == NULL || heapid != IndexGetRelation(indrelid, false)) + if (!smgrexists(RelationGetSmgr(indrel), MAIN_FORKNUM)) ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_TABLE), - errmsg("could not open parent table of index \"%s\"", + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" lacks a main relation fork", RelationGetRelationName(indrel)))); - /* Relation suitable for checking as B-Tree? */ - btree_index_checkable(indrel); - - if (btree_index_mainfork_expected(indrel)) - { - bool heapkeyspace, - allequalimage; - - if (!smgrexists(RelationGetSmgr(indrel), MAIN_FORKNUM)) - ereport(ERROR, - (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("index \"%s\" lacks a main relation fork", - RelationGetRelationName(indrel)))); - - /* Extract metadata from metapage, and sanitize it in passing */ - _bt_metaversion(indrel, &heapkeyspace, &allequalimage); - if (allequalimage && !heapkeyspace) - ereport(ERROR, - (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("index \"%s\" metapage has equalimage field set on unsupported nbtree version", - RelationGetRelationName(indrel)))); - if (allequalimage && !_bt_allequalimage(indrel, false)) - ereport(ERROR, - (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("index \"%s\" metapage incorrectly indicates that deduplication is safe", - RelationGetRelationName(indrel)))); - - /* Check index, possibly against table it is an index on */ - bt_check_every_level(indrel, heaprel, heapkeyspace, parentcheck, - heapallindexed, rootdescend); - } - - /* Roll back any GUC changes executed by index functions */ - AtEOXact_GUC(false, save_nestlevel); - - /* Restore userid and security context */ - SetUserIdAndSecContext(save_userid, save_sec_context); - - /* - * Release locks early. That's ok here because nothing in the called - * routines will trigger shared cache invalidations to be sent, so we can - * relax the usual pattern of only releasing locks after commit. - */ - index_close(indrel, lockmode); - if (heaprel) - table_close(heaprel, lockmode); -} - -/* - * Basic checks about the suitability of a relation for checking as a B-Tree - * index. - * - * NB: Intentionally not checking permissions, the function is normally not - * callable by non-superusers. If granted, it's useful to be able to check a - * whole cluster. - */ -static inline void -btree_index_checkable(Relation rel) -{ - if (rel->rd_rel->relkind != RELKIND_INDEX || - rel->rd_rel->relam != BTREE_AM_OID) + /* Extract metadata from metapage, and sanitize it in passing */ + _bt_metaversion(indrel, &heapkeyspace, &allequalimage); + if (allequalimage && !heapkeyspace) ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("only B-Tree indexes are supported as targets for verification"), - errdetail("Relation \"%s\" is not a B-Tree index.", - RelationGetRelationName(rel)))); - - if (RELATION_IS_OTHER_TEMP(rel)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot access temporary tables of other sessions"), - errdetail("Index \"%s\" is associated with temporary relation.", - RelationGetRelationName(rel)))); - - if (!rel->rd_index->indisvalid) + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" metapage has equalimage field set on unsupported nbtree version", + RelationGetRelationName(indrel)))); + if (allequalimage && !_bt_allequalimage(indrel, false)) ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot check index \"%s\"", - RelationGetRelationName(rel)), - errdetail("Index is not valid."))); -} - -/* - * Check if B-Tree index relation should have a file for its main relation - * fork. Verification uses this to skip unlogged indexes when in hot standby - * mode, where there is simply nothing to verify. We behave as if the - * relation is empty. - * - * NB: Caller should call btree_index_checkable() before calling here. - */ -static inline bool -btree_index_mainfork_expected(Relation rel) -{ - if (rel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED || - !RecoveryInProgress()) - return true; + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" metapage incorrectly indicates that deduplication is safe", + RelationGetRelationName(indrel)))); - ereport(DEBUG1, - (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION), - errmsg("cannot verify unlogged index \"%s\" during recovery, skipping", - RelationGetRelationName(rel)))); + /* Check index, possibly against table it is an index on */ + bt_check_every_level(indrel, heaprel, heapkeyspace, args->parentcheck, + args->heapallindexed, args->rootdescend); - return false; } /* -- 2.32.0 (Apple Git-132)