From 161f39b3001fec522ca1d3bb881635d05f91a984 Mon Sep 17 00:00:00 2001 From: Andrey Date: Tue, 1 Jan 2019 15:03:13 +0500 Subject: [PATCH] GiST verification function for amcheck v5 --- contrib/amcheck/Makefile | 6 +- contrib/amcheck/amcheck--1.1--1.2.sql | 14 + contrib/amcheck/amcheck.control | 2 +- contrib/amcheck/amcheck.h | 31 ++ contrib/amcheck/expected/check_gist.out | 9 + contrib/amcheck/sql/check_gist.sql | 4 + contrib/amcheck/verify_gist.c | 391 ++++++++++++++++++++++++ contrib/amcheck/verify_nbtree.c | 72 +++-- doc/src/sgml/amcheck.sgml | 21 ++ 9 files changed, 514 insertions(+), 36 deletions(-) create mode 100644 contrib/amcheck/amcheck--1.1--1.2.sql create mode 100644 contrib/amcheck/amcheck.h create mode 100644 contrib/amcheck/expected/check_gist.out create mode 100644 contrib/amcheck/sql/check_gist.sql create mode 100644 contrib/amcheck/verify_gist.c diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile index c5764b544f..dd9b5ecf92 100644 --- a/contrib/amcheck/Makefile +++ b/contrib/amcheck/Makefile @@ -1,13 +1,13 @@ # contrib/amcheck/Makefile MODULE_big = amcheck -OBJS = verify_nbtree.o $(WIN32RES) +OBJS = verify_nbtree.o verify_gist.o $(WIN32RES) EXTENSION = amcheck -DATA = amcheck--1.0--1.1.sql amcheck--1.0.sql +DATA = amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql PGFILEDESC = "amcheck - function for verifying relation integrity" -REGRESS = check check_btree +REGRESS = check check_btree check_gist ifdef USE_PGXS PG_CONFIG = pg_config diff --git a/contrib/amcheck/amcheck--1.1--1.2.sql b/contrib/amcheck/amcheck--1.1--1.2.sql new file mode 100644 index 0000000000..5571642f4a --- /dev/null +++ b/contrib/amcheck/amcheck--1.1--1.2.sql @@ -0,0 +1,14 @@ +/* amcheck--1.1--1.2.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION amcheck UPDATE TO '1.2'" to load this file. \quit + +-- +-- gist_index_parent_check() +-- +CREATE FUNCTION gist_index_parent_check(index regclass) +RETURNS VOID +AS 'MODULE_PATHNAME', 'gist_index_parent_check' +LANGUAGE C STRICT; + +REVOKE ALL ON FUNCTION gist_index_parent_check(regclass) FROM PUBLIC; diff --git a/contrib/amcheck/amcheck.control b/contrib/amcheck/amcheck.control index 469048403d..c6e310046d 100644 --- a/contrib/amcheck/amcheck.control +++ b/contrib/amcheck/amcheck.control @@ -1,5 +1,5 @@ # amcheck extension comment = 'functions for verifying relation integrity' -default_version = '1.1' +default_version = '1.2' module_pathname = '$libdir/amcheck' relocatable = true diff --git a/contrib/amcheck/amcheck.h b/contrib/amcheck/amcheck.h new file mode 100644 index 0000000000..84da7d7ec0 --- /dev/null +++ b/contrib/amcheck/amcheck.h @@ -0,0 +1,31 @@ +/*------------------------------------------------------------------------- + * + * amcheck.h + * Shared routines for amcheck verifications. + * + * Copyright (c) 2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/amcheck/amcheck.h + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/transam.h" +#include "catalog/index.h" +#include "catalog/pg_am.h" +#include "commands/tablecmds.h" +#include "miscadmin.h" +#include "storage/lmgr.h" +#include "utils/memutils.h" +#include "utils/snapmgr.h" + +extern void +amcheck_lock_relation(Oid indrelid, bool parentcheck,Relation *indrel, + Relation *heaprel, LOCKMODE *lockmode); + +extern void +amcheck_unlock_relation(Oid indrelid, Relation indrel, Relation heaprel, LOCKMODE lockmode); diff --git a/contrib/amcheck/expected/check_gist.out b/contrib/amcheck/expected/check_gist.out new file mode 100644 index 0000000000..4f373aea62 --- /dev/null +++ b/contrib/amcheck/expected/check_gist.out @@ -0,0 +1,9 @@ +-- minimal test, basically just verifying that amcheck works with GiST +CREATE TABLE gist_check AS SELECT point(s,1) c FROM generate_series(1,10000) s; +CREATE INDEX gist_check_idx ON gist_check USING gist(c); +SELECT gist_index_parent_check('gist_check_idx'); + gist_index_parent_check +------------------------- + +(1 row) + diff --git a/contrib/amcheck/sql/check_gist.sql b/contrib/amcheck/sql/check_gist.sql new file mode 100644 index 0000000000..95e62ba975 --- /dev/null +++ b/contrib/amcheck/sql/check_gist.sql @@ -0,0 +1,4 @@ +-- minimal test, basically just verifying that amcheck works with GiST +CREATE TABLE gist_check AS SELECT point(s,1) c FROM generate_series(1,10000) s; +CREATE INDEX gist_check_idx ON gist_check USING gist(c); +SELECT gist_index_parent_check('gist_check_idx'); diff --git a/contrib/amcheck/verify_gist.c b/contrib/amcheck/verify_gist.c new file mode 100644 index 0000000000..38c00f103c --- /dev/null +++ b/contrib/amcheck/verify_gist.c @@ -0,0 +1,391 @@ +/*------------------------------------------------------------------------- + * + * verify_nbtree.c + * Verifies the integrity of GiST indexes based on invariants. + * + * Verification checks that all paths in GiST graph are contatining + * consisnent keys: tuples on parent pages consistently include tuples + * from children pages. Also, verification checks graph invariants: + * internal page must have at least one downlinks, internal page can + * reference either only leaf pages or only internal pages. + * + * + * Copyright (c) 2017-2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/amcheck/verify_gist.c + * + *------------------------------------------------------------------------- + */ +#include "amcheck.h" + +#include "access/gist_private.h" + + +typedef struct GistScanItem +{ + GistNSN parentlsn; + BlockNumber blkno; + struct GistScanItem *next; +} GistScanItem; + +static inline void +check_index_tuple(IndexTuple idxtuple, Relation rel, ItemId iid); + +static inline void +check_index_page(Relation rel, Page page, Buffer buffer); + +static inline bool +gist_check_internal_page(Relation rel, Page page_copy, Buffer buffer, + BufferAccessStrategy strategy, GISTSTATE *state); + +static inline void +gist_check_parent_keys_consistency(Relation rel); + +static inline void +gist_check_page_keys(Relation rel, Buffer parentbuffer, Page page, + BlockNumber blkno, IndexTuple parent, GISTSTATE *state); + +static void +pushStackIfSplited(Page page, GistScanItem *stack); + +static inline void +gist_index_checkable(Relation rel); + +static inline void +check_index_tuple(IndexTuple idxtuple, Relation rel, ItemId iid) +{ + /* + * Check that it's not a leftover invalid tuple from pre-9.1 + * See also gistdoinsert() and gistbulkdelete() handlding of such tuples. + * We do not consider it error here, but warn operator. + */ + if (GistTupleIsInvalid(idxtuple)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("index \"%s\" contains an inner tuple marked as" + " invalid",RelationGetRelationName(rel)), + errdetail("This is caused by an incomplete page split at " + "crash recovery before upgrading to PostgreSQL 9.1."), + errhint("Please REINDEX it."))); + + if (MAXALIGN(ItemIdGetLength(iid)) != MAXALIGN(IndexTupleSize(idxtuple))) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has tuple sizes", + RelationGetRelationName(rel)))); +} + +static inline void +check_index_page(Relation rel, Page page, Buffer buffer) +{ + gistcheckpage(rel, buffer); + if (GistPageGetOpaque(page)->gist_page_id != GIST_PAGE_ID) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has corrupted pages", + RelationGetRelationName(rel)))); +} + +/* + * For every tuple on page check if it is contained by tuple on parent page + */ +static inline void +gist_check_page_keys(Relation rel, Buffer parentbuffer, Page page, + BlockNumber blkno, IndexTuple parent, GISTSTATE *state) +{ + OffsetNumber i, + maxoff = PageGetMaxOffsetNumber(page); + + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + ItemId iid = PageGetItemId(page, i); + IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid); + + check_index_tuple(idxtuple, rel, iid); + + /* + * Tree is inconsistent if adjustement is necessary for any parent + * tuple + */ + if (gistgetadjusted(rel, parent, idxtuple, state)) + { + /* + * OK, we found a discrepency between parent and child tuples. + * We need to verify it is not a result of concurrent call + * of gistplacetopage(). So, lock parent and try to find downlink + * for current page. It may be missing due to concurrent page + * split, this is OK. + */ + LockBuffer(parentbuffer, GIST_SHARE); + Page parentpage = (Page) BufferGetPage(parentbuffer); + OffsetNumber o, + parent_maxoff = PageGetMaxOffsetNumber(parentpage); + + for (o = FirstOffsetNumber; o <= parent_maxoff; o = OffsetNumberNext(i)) + { + ItemId p_iid = PageGetItemId(parentpage, o); + parent = (IndexTuple) PageGetItem(parentpage, p_iid); + BlockNumber child_blkno = ItemPointerGetBlockNumber(&(parent->t_tid)); + if (child_blkno == blkno) + { + /* We found it - make a final check before failing */ + if (gistgetadjusted(rel, parent, idxtuple, state)) + { + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has inconsistent records", + RelationGetRelationName(rel)))); + } + else + { + /* + * But now it is properly adjusted - nothing to do here. + */ + break; + } + } + } + + + LockBuffer(parentbuffer, GIST_UNLOCK); + } + } +} + +/* + * Check of an internal page. + * Return true if further descent is necessary. + * Hold pins on two pages at a time (parent+child). + * But coupled lock on parent is taken iif parent-child discrepency found. + * Locks is taken on every leaf page, and only then, if neccesary, on leaf + * inside gist_check_page_keys() call. + */ +static inline bool +gist_check_internal_page(Relation rel, Page page_copy, Buffer buffer, + BufferAccessStrategy strategy, GISTSTATE *state) +{ + bool has_leafs = false; + bool has_internals = false; + OffsetNumber i, + maxoff = PageGetMaxOffsetNumber(page_copy); + + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + ItemId iid = PageGetItemId(page_copy, i); + IndexTuple idxtuple = (IndexTuple) PageGetItem(page_copy, iid); + + BlockNumber child_blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); + Buffer buffer; + Page child_page; + + check_index_tuple(idxtuple, rel, iid); + + buffer = ReadBufferExtended(rel, MAIN_FORKNUM, child_blkno, + RBM_NORMAL, strategy); + + LockBuffer(buffer, GIST_SHARE); + child_page = (Page) BufferGetPage(buffer); + check_index_page(rel, child_page, buffer); + + has_leafs = has_leafs || GistPageIsLeaf(child_page); + has_internals = has_internals || !GistPageIsLeaf(child_page); + gist_check_page_keys(rel, buffer, child_page, child_blkno, idxtuple, state); + + UnlockReleaseBuffer(buffer); + } + + if (!(has_leafs || has_internals)) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" internal page has no downlink references", + RelationGetRelationName(rel)))); + + + if (has_leafs == has_internals) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" page references both internal and leaf pages", + RelationGetRelationName(rel)))); + + return has_internals; +} + +/* add pages with unfinished split to scan */ +static void +pushStackIfSplited(Page page, GistScanItem *stack) +{ + GISTPageOpaque opaque = GistPageGetOpaque(page); + + if (stack->blkno != GIST_ROOT_BLKNO && !XLogRecPtrIsInvalid(stack->parentlsn) && + (GistFollowRight(page) || stack->parentlsn < GistPageGetNSN(page)) && + opaque->rightlink != InvalidBlockNumber /* sanity check */ ) + { + /* split page detected, install right link to the stack */ + + GistScanItem *ptr = (GistScanItem *) palloc(sizeof(GistScanItem)); + + ptr->blkno = opaque->rightlink; + ptr->parentlsn = stack->parentlsn; + ptr->next = stack->next; + stack->next = ptr; + } +} + +/* + * Main entry point for GiST check. Allocates memory context and scans + * through GiST graph. + * This function verifies that tuples of internal pages cover all the key + * space of each tuple on leaf page. To do this we invoke + * gist_check_internal_page() for every internal page. + * + * gist_check_internal_page() in it's turn takes every tuple and tries + * to adjust it by tuples on referenced child page. Parent gist tuple should + * never requre an adjustement. + */ +static inline void +gist_check_parent_keys_consistency(Relation rel) +{ + GistScanItem *stack, + *ptr; + + BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD); + + MemoryContext mctx = AllocSetContextCreate(CurrentMemoryContext, + "amcheck context", + ALLOCSET_DEFAULT_SIZES); + + MemoryContext oldcontext = MemoryContextSwitchTo(mctx); + GISTSTATE *state = initGISTstate(rel); + + stack = (GistScanItem *) palloc0(sizeof(GistScanItem)); + stack->blkno = GIST_ROOT_BLKNO; + + while (stack) + { + Buffer buffer; + Page page; + OffsetNumber i, + maxoff; + IndexTuple idxtuple; + ItemId iid; + + CHECK_FOR_INTERRUPTS(); + + buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno, + RBM_NORMAL, strategy); + LockBuffer(buffer, GIST_SHARE); + page = (Page) BufferGetPage(buffer); + check_index_page(rel, page, buffer); + maxoff = PageGetMaxOffsetNumber(page); + + if (GistPageIsLeaf(page)) + { + /* should never happen unless it is root */ + if (stack->blkno != GIST_ROOT_BLKNO) + { + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\": internal pages traversal " + "encountered leaf page unexpectedly", + RelationGetRelationName(rel)))); + } + check_index_page(rel, page, buffer); + + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + iid = PageGetItemId(page, i); + idxtuple = (IndexTuple) PageGetItem(page, iid); + check_index_tuple(idxtuple, rel, iid); + } + LockBuffer(buffer, GIST_UNLOCK); + } + else + { + /* we need to copy only internal pages */ + Page page_copy = palloc(BLCKSZ); + memcpy(page_copy, page, BLCKSZ); + LockBuffer(buffer, GIST_UNLOCK); + + /* check for split proceeded after look at parent */ + pushStackIfSplited(page_copy, stack); + + if (gist_check_internal_page(rel, page_copy, buffer, strategy, state)) + { + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + iid = PageGetItemId(page_copy, i); + idxtuple = (IndexTuple) PageGetItem(page_copy, iid); + + ptr = (GistScanItem *) palloc(sizeof(GistScanItem)); + ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); + ptr->parentlsn = BufferGetLSNAtomic(buffer); + ptr->next = stack->next; + stack->next = ptr; + } + } + + pfree(page_copy); + } + + ReleaseBuffer(buffer); + + ptr = stack->next; + pfree(stack); + stack = ptr; + } + + MemoryContextSwitchTo(oldcontext); + MemoryContextDelete(mctx); +} + +/* Check that relation is eligible for GiST verification */ +static inline void +gist_index_checkable(Relation rel) +{ + if (rel->rd_rel->relkind != RELKIND_INDEX || + rel->rd_rel->relam != GIST_AM_OID) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("only GiST indexes are supported as targets for this" + " verification"), + errdetail("Relation \"%s\" is not a GiST index.", + RelationGetRelationName(rel)))); + + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary tables of other sessions"), + errdetail("Index \"%s\" is associated with temporary relation.", + RelationGetRelationName(rel)))); + + if (!rel->rd_index->indisvalid) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot check index \"%s\"", + RelationGetRelationName(rel)), + errdetail("Index is not valid"))); +} + +PG_FUNCTION_INFO_V1(gist_index_parent_check); + +Datum +gist_index_parent_check(PG_FUNCTION_ARGS) +{ + Oid indrelid = PG_GETARG_OID(0); + Relation indrel; + Relation heaprel; + LOCKMODE lockmode; + + /* lock table and index with neccesary level */ + amcheck_lock_relation(indrelid, true, &indrel, &heaprel, &lockmode); + + /* verify that this is GiST eligible for check */ + gist_index_checkable(indrel); + gist_check_parent_keys_consistency(indrel); + + /* Unlock index and table */ + amcheck_unlock_relation(indrelid, indrel, heaprel, lockmode); + + PG_RETURN_VOID(); +} diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index 964200a767..f17552c5e5 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -21,22 +21,12 @@ * *------------------------------------------------------------------------- */ -#include "postgres.h" +#include "amcheck.h" #include "access/heapam.h" -#include "access/htup_details.h" #include "access/nbtree.h" -#include "access/transam.h" #include "access/xact.h" -#include "catalog/index.h" -#include "catalog/pg_am.h" -#include "commands/tablecmds.h" #include "lib/bloomfilter.h" -#include "miscadmin.h" -#include "storage/lmgr.h" -#include "utils/memutils.h" -#include "utils/snapmgr.h" - PG_MODULE_MAGIC; @@ -195,21 +185,18 @@ bt_index_parent_check(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } -/* - * Helper for bt_index_[parent_]check, coordinating the bulk of the work. - */ -static void -bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed) + +/* Lock aquisition reused accross different am types */ +void +amcheck_lock_relation(Oid indrelid, bool parentcheck, Relation *indrel, + Relation *heaprel, LOCKMODE *lockmode) { Oid heapid; - Relation indrel; - Relation heaprel; - LOCKMODE lockmode; if (parentcheck) - lockmode = ShareLock; + *lockmode = ShareLock; else - lockmode = AccessShareLock; + *lockmode = AccessShareLock; /* * We must lock table before index to avoid deadlocks. However, if the @@ -221,9 +208,9 @@ bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed) */ heapid = IndexGetRelation(indrelid, true); if (OidIsValid(heapid)) - heaprel = table_open(heapid, lockmode); + *heaprel = heap_open(heapid, *lockmode); else - heaprel = NULL; + *heaprel = NULL; /* * Open the target index relations separately (like relation_openrv(), but @@ -237,25 +224,23 @@ bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed) * committed or recently dead heap tuples lacking index entries due to * concurrent activity.) */ - indrel = index_open(indrelid, lockmode); + *indrel = index_open(indrelid, *lockmode); /* * Since we did the IndexGetRelation call above without any lock, it's * barely possible that a race against an index drop/recreation could have * netted us the wrong table. */ - if (heaprel == NULL || heapid != IndexGetRelation(indrelid, false)) + if (*heaprel == NULL || heapid != IndexGetRelation(indrelid, false)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_TABLE), errmsg("could not open parent table of index %s", - RelationGetRelationName(indrel)))); - - /* Relation suitable for checking as B-Tree? */ - btree_index_checkable(indrel); - - /* Check index, possibly against table it is an index on */ - bt_check_every_level(indrel, heaprel, parentcheck, heapallindexed); + RelationGetRelationName(*indrel)))); +} +/* Pair for amcheck_lock_relation() */ +void amcheck_unlock_relation(Oid indrelid, Relation indrel, Relation heaprel, LOCKMODE lockmode) +{ /* * Release locks early. That's ok here because nothing in the called * routines will trigger shared cache invalidations to be sent, so we can @@ -266,6 +251,29 @@ bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed) table_close(heaprel, lockmode); } +/* + * Helper for bt_index_[parent_]check, coordinating the bulk of the work. + */ +static void +bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed) +{ + Relation indrel; + Relation heaprel; + LOCKMODE lockmode; + + /* lock table and index with neccesary level */ + amcheck_lock_relation(indrelid, parentcheck, &indrel, &heaprel, &lockmode); + + /* Relation suitable for checking as B-Tree? */ + btree_index_checkable(indrel); + + /* Check index, possibly against table it is an index on */ + bt_check_every_level(indrel, heaprel, parentcheck, heapallindexed); + + /* Unlock index and table */ + amcheck_unlock_relation(indrelid, indrel, heaprel, lockmode); +} + /* * Basic checks about the suitability of a relation for checking as a B-Tree * index. diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml index 8bb60d5c2d..c147be1c11 100644 --- a/doc/src/sgml/amcheck.sgml +++ b/doc/src/sgml/amcheck.sgml @@ -162,6 +162,27 @@ ORDER BY c.relpages DESC LIMIT 10; + + + + gist_index_parent_check(index regclass) returns void + + gist_index_parent_check + + + + + + gist_index_parent_check tests that its target GiST + has consistent parent-child tuples relations (no parent tuples + require tuple adjustement) and page graph respects balanced-tree + invariants (internal pages reference only leaf page or only internal + pages). As with bt_index_parent_check, the + gist_index_parent_check aquires + ShareLock on index and heap relations. + + + -- 2.20.1