diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile index dcec3b85203..dd9b5ecf926 100644 --- a/contrib/amcheck/Makefile +++ b/contrib/amcheck/Makefile @@ -1,13 +1,13 @@ # contrib/amcheck/Makefile MODULE_big = amcheck -OBJS = verify_nbtree.o $(WIN32RES) +OBJS = verify_nbtree.o verify_gist.o $(WIN32RES) EXTENSION = amcheck DATA = amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql PGFILEDESC = "amcheck - function for verifying relation integrity" -REGRESS = check check_btree +REGRESS = check check_btree check_gist ifdef USE_PGXS PG_CONFIG = pg_config diff --git a/contrib/amcheck/amcheck--1.1--1.2.sql b/contrib/amcheck/amcheck--1.1--1.2.sql index 883530dec74..1d461fff5b9 100644 --- a/contrib/amcheck/amcheck--1.1--1.2.sql +++ b/contrib/amcheck/amcheck--1.1--1.2.sql @@ -17,3 +17,13 @@ LANGUAGE C STRICT PARALLEL RESTRICTED; -- Don't want this to be available to public REVOKE ALL ON FUNCTION bt_index_parent_check(regclass, boolean, boolean) FROM PUBLIC; + +-- +-- gist_index_parent_check() +-- +CREATE FUNCTION gist_index_parent_check(index regclass) +RETURNS VOID +AS 'MODULE_PATHNAME', 'gist_index_parent_check' +LANGUAGE C STRICT; + +REVOKE ALL ON FUNCTION gist_index_parent_check(regclass) FROM PUBLIC; diff --git a/contrib/amcheck/amcheck.h b/contrib/amcheck/amcheck.h new file mode 100644 index 00000000000..84da7d7ec0f --- /dev/null +++ b/contrib/amcheck/amcheck.h @@ -0,0 +1,31 @@ +/*------------------------------------------------------------------------- + * + * amcheck.h + * Shared routines for amcheck verifications. + * + * Copyright (c) 2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/amcheck/amcheck.h + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/transam.h" +#include "catalog/index.h" +#include "catalog/pg_am.h" +#include "commands/tablecmds.h" +#include "miscadmin.h" +#include "storage/lmgr.h" +#include "utils/memutils.h" +#include "utils/snapmgr.h" + +extern void +amcheck_lock_relation(Oid indrelid, bool parentcheck,Relation *indrel, + Relation *heaprel, LOCKMODE *lockmode); + +extern void +amcheck_unlock_relation(Oid indrelid, Relation indrel, Relation heaprel, LOCKMODE lockmode); diff --git a/contrib/amcheck/verify_gist.c b/contrib/amcheck/verify_gist.c new file mode 100644 index 00000000000..8b37b20bc9c --- /dev/null +++ b/contrib/amcheck/verify_gist.c @@ -0,0 +1,348 @@ +/*------------------------------------------------------------------------- + * + * verify_gist.c + * Verifies the integrity of GiST indexes based on invariants. + * + * Verification checks that all paths in GiST graph contain + * consistent keys: tuples on parent pages consistently include tuples + * from children pages. Also, verification checks graph invariants: + * internal page must have at least one downlinks, internal page can + * reference either only leaf pages or only internal pages. + * + * + * Copyright (c) 2017-2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/amcheck/verify_gist.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/gist_private.h" +#include "amcheck.h" + + +typedef struct GistScanItem +{ + int depth; + IndexTuple parenttup; + BlockNumber parentblk; + XLogRecPtr parentlsn; + BlockNumber blkno; + struct GistScanItem *next; +} GistScanItem; + +static void check_index_page(Relation rel, Buffer buffer); + +static IndexTuple gist_refind_parent(Relation rel, BlockNumber parentblkno, BlockNumber childblkno, BufferAccessStrategy strategy); + +static void gist_check_parent_keys_consistency(Relation rel); + +static void gist_index_checkable(Relation rel); + +static void +check_index_page(Relation rel, Buffer buffer) +{ + Page page = BufferGetPage(buffer); + + gistcheckpage(rel, buffer); + if (GistPageGetOpaque(page)->gist_page_id != GIST_PAGE_ID) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has corrupted pages", + RelationGetRelationName(rel)))); + if (GistPageIsDeleted(page)) + { + elog(ERROR,"boom"); + if (!GistPageIsLeaf(page)) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has deleted internal page", + RelationGetRelationName(rel)))); + if (PageGetMaxOffsetNumber(page) > InvalidOffsetNumber) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has deleted page with tuples", + RelationGetRelationName(rel)))); + } +} + +/* + * Try to re-find downlink pointing to 'blkno', in 'parentblkno'. + * + * If found, returns a palloc'd copy of the downlink tuple. Otherwise, + * returns NULL. + */ +static IndexTuple +gist_refind_parent(Relation rel, BlockNumber parentblkno, BlockNumber childblkno, + BufferAccessStrategy strategy) +{ + Buffer parentbuf; + Page parentpage; + OffsetNumber o, + parent_maxoff; + IndexTuple result = NULL; + + parentbuf = ReadBufferExtended(rel, MAIN_FORKNUM, childblkno, + RBM_NORMAL, strategy); + + LockBuffer(parentbuf, GIST_SHARE); + parentpage = BufferGetPage(parentbuf); + parent_maxoff = PageGetMaxOffsetNumber(parentpage); + for (o = FirstOffsetNumber; o <= parent_maxoff; o = OffsetNumberNext(o)) + { + ItemId p_iid = PageGetItemId(parentpage, o); + IndexTuple itup = (IndexTuple) PageGetItem(parentpage, p_iid); + + if (ItemPointerGetBlockNumber(&(itup->t_tid)) == childblkno) + { + /* Found it! Make copy and return it */ + result = CopyIndexTuple(itup); + break; + } + } + + LockBuffer(parentbuf, GIST_UNLOCK); + + return result; +} + +/* + * Main entry point for GiST check. Allocates memory context and scans + * through GiST graph. + * This function verifies that tuples of internal pages cover all the key + * space of each tuple on leaf page. To do this we invoke + * gist_check_internal_page() for every internal page. + * + * gist_check_internal_page() in it's turn takes every tuple and tries + * to adjust it by tuples on referenced child page. Parent gist tuple should + * never requre an adjustement. + */ +static void +gist_check_parent_keys_consistency(Relation rel) +{ + BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD); + GistScanItem *stack; + MemoryContext mctx; + MemoryContext oldcontext; + GISTSTATE *state; + int leafdepth; + + mctx = AllocSetContextCreate(CurrentMemoryContext, + "amcheck context", + ALLOCSET_DEFAULT_SIZES); + oldcontext = MemoryContextSwitchTo(mctx); + + state = initGISTstate(rel); + + /* + * We don't know the height of the tree yet, but as soon as we encounter + * a leaf page, we will set 'leafdepth' to its depth. + */ + leafdepth = -1; + + /* Start the scan at the root page */ + stack = (GistScanItem *) palloc0(sizeof(GistScanItem)); + stack->depth = 0; + stack->parenttup = NULL; + stack->parentblk = InvalidBlockNumber; + stack->parentlsn = InvalidXLogRecPtr; + stack->blkno = GIST_ROOT_BLKNO; + + while (stack) + { + GistScanItem *stack_next; + Buffer buffer; + Page page; + OffsetNumber i, + maxoff; + XLogRecPtr lsn; + + CHECK_FOR_INTERRUPTS(); + + buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno, + RBM_NORMAL, strategy); + LockBuffer(buffer, GIST_SHARE); + page = (Page) BufferGetPage(buffer); + lsn = BufferGetLSNAtomic(buffer); + + /* Do basic sanity checks on the page headers */ + check_index_page(rel, buffer); + + /* + * It's possible that the page was split since we looked at the parent, + * so that we didn't missed the downlink of the right sibling when we + * scanned the parent. If so, add the right sibling to the stack now. + */ + if (GistFollowRight(page) || stack->parentlsn < GistPageGetNSN(page)) + { + /* split page detected, install right link to the stack */ + GistScanItem *ptr = (GistScanItem *) palloc(sizeof(GistScanItem)); + + ptr->depth = stack->depth; + ptr->parenttup = CopyIndexTuple(stack->parenttup); + ptr->parentblk = stack->parentblk; + ptr->parentlsn = stack->parentlsn; + ptr->blkno = GistPageGetOpaque(page)->rightlink; + ptr->next = stack->next; + stack->next = ptr; + } + + /* Check that the tree has the same height in all branches */ + if (GistPageIsLeaf(page)) + { + if (leafdepth == -1) + leafdepth = stack->depth; + else if (stack->depth != leafdepth) + { + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly", + RelationGetRelationName(rel)))); + } + } + + /* + * Check that each tuple looks valid, and is consistent with the + * downlink we followed when we stepped on this page. + */ + maxoff = PageGetMaxOffsetNumber(page); + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + ItemId iid = PageGetItemId(page, i); + IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid); + + /* + * Check that it's not a leftover invalid tuple from pre-9.1 + * See also gistdoinsert() and gistbulkdelete() handlding of such + * tuples. We do consider it error here. + */ + if (GistTupleIsInvalid(idxtuple)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("index \"%s\" contains an inner tuple marked as invalid", + RelationGetRelationName(rel)), + errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."), + errhint("Please REINDEX it."))); + + if (MAXALIGN(ItemIdGetLength(iid)) != MAXALIGN(IndexTupleSize(idxtuple))) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has inconsistent tuple sizes", + RelationGetRelationName(rel)))); + + /* + * Check if this tuple is consistent with the downlink in the + * parent. + * + * XXX: shouldn't we rather use gist_consistent? + */ + if (stack->parenttup && gistgetadjusted(rel, stack->parenttup, idxtuple, state)) + { + /* + * There was a discrepancy between parent and child tuples. + * We need to verify it is not a result of concurrent call + * of gistplacetopage(). So, lock parent and try to find downlink + * for current page. It may be missing due to concurrent page + * split, this is OK. + */ + pfree(stack->parenttup); + stack->parenttup = gist_refind_parent(rel, stack->parentblk, stack->blkno, strategy); + + /* We found it - make a final check before failing */ + if (stack->parenttup && gistgetadjusted(rel, stack->parenttup, idxtuple, state)) + { + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has inconsistent records", + RelationGetRelationName(rel)))); + } + else + { + /* + * But now it is properly adjusted - nothing to do here. + */ + } + } + + /* If this is an internal page, recurse into the child */ + if (!GistPageIsLeaf(page)) + { + GistScanItem *ptr = (GistScanItem *) palloc(sizeof(GistScanItem)); + + ptr->depth = stack->depth + 1; + ptr->parenttup = CopyIndexTuple(idxtuple); + ptr->parentblk = stack->blkno; + ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); + ptr->parentlsn = lsn; + ptr->next = stack->next; + stack->next = ptr; + } + } + + LockBuffer(buffer, GIST_UNLOCK); + ReleaseBuffer(buffer); + + /* Step to next item in the queue */ + stack_next = stack->next; + if (stack->parenttup) + pfree(stack->parenttup); + pfree(stack); + stack = stack_next; + } + + MemoryContextSwitchTo(oldcontext); + MemoryContextDelete(mctx); +} + +/* Check that relation is eligible for GiST verification */ +static void +gist_index_checkable(Relation rel) +{ + if (rel->rd_rel->relkind != RELKIND_INDEX || + rel->rd_rel->relam != GIST_AM_OID) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("only GiST indexes are supported as targets for this" + " verification"), + errdetail("Relation \"%s\" is not a GiST index.", + RelationGetRelationName(rel)))); + + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary tables of other sessions"), + errdetail("Index \"%s\" is associated with temporary relation.", + RelationGetRelationName(rel)))); + + if (!rel->rd_index->indisvalid) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot check index \"%s\"", + RelationGetRelationName(rel)), + errdetail("Index is not valid"))); +} + +PG_FUNCTION_INFO_V1(gist_index_parent_check); + +Datum +gist_index_parent_check(PG_FUNCTION_ARGS) +{ + Oid indrelid = PG_GETARG_OID(0); + Relation indrel; + Relation heaprel; + LOCKMODE lockmode; + + /* lock table and index with neccesary level */ + amcheck_lock_relation(indrelid, true, &indrel, &heaprel, &lockmode); + + /* verify that this is GiST eligible for check */ + gist_index_checkable(indrel); + gist_check_parent_keys_consistency(indrel); + + /* Unlock index and table */ + amcheck_unlock_relation(indrelid, indrel, heaprel, lockmode); + + PG_RETURN_VOID(); +} diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index 6ae3bca9536..50991a656a5 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -21,23 +21,14 @@ * *------------------------------------------------------------------------- */ -#include "postgres.h" +#include "amcheck.h" #include "access/heapam.h" -#include "access/htup_details.h" #include "access/nbtree.h" #include "access/tableam.h" #include "access/transam.h" #include "access/xact.h" -#include "catalog/index.h" -#include "catalog/pg_am.h" -#include "commands/tablecmds.h" #include "lib/bloomfilter.h" -#include "miscadmin.h" -#include "storage/lmgr.h" -#include "utils/memutils.h" -#include "utils/snapmgr.h" - PG_MODULE_MAGIC; @@ -212,23 +203,18 @@ bt_index_parent_check(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } -/* - * Helper for bt_index_[parent_]check, coordinating the bulk of the work. - */ -static void -bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed, - bool rootdescend) + +/* Lock aquisition reused accross different am types */ +void +amcheck_lock_relation(Oid indrelid, bool parentcheck, Relation *indrel, + Relation *heaprel, LOCKMODE *lockmode) { Oid heapid; - Relation indrel; - Relation heaprel; - bool heapkeyspace; - LOCKMODE lockmode; if (parentcheck) - lockmode = ShareLock; + *lockmode = ShareLock; else - lockmode = AccessShareLock; + *lockmode = AccessShareLock; /* * We must lock table before index to avoid deadlocks. However, if the @@ -240,9 +226,9 @@ bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed, */ heapid = IndexGetRelation(indrelid, true); if (OidIsValid(heapid)) - heaprel = table_open(heapid, lockmode); + *heaprel = heap_open(heapid, *lockmode); else - heaprel = NULL; + *heaprel = NULL; /* * Open the target index relations separately (like relation_openrv(), but @@ -256,27 +242,23 @@ bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed, * committed or recently dead heap tuples lacking index entries due to * concurrent activity.) */ - indrel = index_open(indrelid, lockmode); + *indrel = index_open(indrelid, *lockmode); /* * Since we did the IndexGetRelation call above without any lock, it's * barely possible that a race against an index drop/recreation could have * netted us the wrong table. */ - if (heaprel == NULL || heapid != IndexGetRelation(indrelid, false)) + if (*heaprel == NULL || heapid != IndexGetRelation(indrelid, false)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_TABLE), errmsg("could not open parent table of index %s", - RelationGetRelationName(indrel)))); - - /* Relation suitable for checking as B-Tree? */ - btree_index_checkable(indrel); - - /* Check index, possibly against table it is an index on */ - heapkeyspace = _bt_heapkeyspace(indrel); - bt_check_every_level(indrel, heaprel, heapkeyspace, parentcheck, - heapallindexed, rootdescend); + RelationGetRelationName(*indrel)))); +} +/* Pair for amcheck_lock_relation() */ +void amcheck_unlock_relation(Oid indrelid, Relation indrel, Relation heaprel, LOCKMODE lockmode) +{ /* * Release locks early. That's ok here because nothing in the called * routines will trigger shared cache invalidations to be sent, so we can @@ -287,6 +269,33 @@ bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed, table_close(heaprel, lockmode); } +/* + * Helper for bt_index_[parent_]check, coordinating the bulk of the work. + */ +static void +bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed, + bool rootdescend) +{ + Relation indrel; + Relation heaprel; + LOCKMODE lockmode; + bool heapkeyspace; + + /* lock table and index with neccesary level */ + amcheck_lock_relation(indrelid, parentcheck, &indrel, &heaprel, &lockmode); + + /* Relation suitable for checking as B-Tree? */ + btree_index_checkable(indrel); + + /* Check index, possibly against table it is an index on */ + heapkeyspace = _bt_heapkeyspace(indrel); + bt_check_every_level(indrel, heaprel, heapkeyspace, parentcheck, + heapallindexed, rootdescend); + + /* Unlock index and table */ + amcheck_unlock_relation(indrelid, indrel, heaprel, lockmode); +} + /* * Basic checks about the suitability of a relation for checking as a B-Tree * index. diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml index 627651d8d4a..34842eaebf6 100644 --- a/doc/src/sgml/amcheck.sgml +++ b/doc/src/sgml/amcheck.sgml @@ -165,6 +165,27 @@ ORDER BY c.relpages DESC LIMIT 10; + + + + gist_index_parent_check(index regclass) returns void + + gist_index_parent_check + + + + + + gist_index_parent_check tests that its target GiST + has consistent parent-child tuples relations (no parent tuples + require tuple adjustement) and page graph respects balanced-tree + invariants (internal pages reference only leaf page or only internal + pages). As with bt_index_parent_check, the + gist_index_parent_check aquires + ShareLock on index and heap relations. + + +