diff --git a/contrib/pg_visibility/Makefile b/contrib/pg_visibility/Makefile index fbbaa2e..379591a 100644 --- a/contrib/pg_visibility/Makefile +++ b/contrib/pg_visibility/Makefile @@ -4,7 +4,7 @@ MODULE_big = pg_visibility OBJS = pg_visibility.o $(WIN32RES) EXTENSION = pg_visibility -DATA = pg_visibility--1.0.sql +DATA = pg_visibility--1.1.sql pg_visibility--1.0--1.1.sql PGFILEDESC = "pg_visibility - page visibility information" ifdef USE_PGXS diff --git a/contrib/pg_visibility/pg_visibility--1.0--1.1.sql b/contrib/pg_visibility/pg_visibility--1.0--1.1.sql new file mode 100644 index 0000000..2c97dfd --- /dev/null +++ b/contrib/pg_visibility/pg_visibility--1.0--1.1.sql @@ -0,0 +1,17 @@ +/* contrib/pg_visibility/pg_visibility--1.0--1.1.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pg_visibility UPDATE TO '1.1'" to load this file. \quit + +CREATE FUNCTION pg_check_frozen(regclass, t_ctid OUT tid) +RETURNS SETOF tid +AS 'MODULE_PATHNAME', 'pg_check_frozen' +LANGUAGE C STRICT; + +CREATE FUNCTION pg_check_visible(regclass, t_ctid OUT tid) +RETURNS SETOF tid +AS 'MODULE_PATHNAME', 'pg_check_visible' +LANGUAGE C STRICT; + +REVOKE ALL ON FUNCTION pg_check_frozen(regclass) FROM PUBLIC; +REVOKE ALL ON FUNCTION pg_check_visible(regclass) FROM PUBLIC; diff --git a/contrib/pg_visibility/pg_visibility--1.0.sql b/contrib/pg_visibility/pg_visibility--1.0.sql deleted file mode 100644 index da511e5..0000000 --- a/contrib/pg_visibility/pg_visibility--1.0.sql +++ /dev/null @@ -1,52 +0,0 @@ -/* contrib/pg_visibility/pg_visibility--1.0.sql */ - --- complain if script is sourced in psql, rather than via CREATE EXTENSION -\echo Use "CREATE EXTENSION pg_visibility" to load this file. \quit - --- Show visibility map information. -CREATE FUNCTION pg_visibility_map(regclass, blkno bigint, - all_visible OUT boolean, - all_frozen OUT boolean) -RETURNS record -AS 'MODULE_PATHNAME', 'pg_visibility_map' -LANGUAGE C STRICT; - --- Show visibility map and page-level visibility information. -CREATE FUNCTION pg_visibility(regclass, blkno bigint, - all_visible OUT boolean, - all_frozen OUT boolean, - pd_all_visible OUT boolean) -RETURNS record -AS 'MODULE_PATHNAME', 'pg_visibility' -LANGUAGE C STRICT; - --- Show visibility map information for each block in a relation. -CREATE FUNCTION pg_visibility_map(regclass, blkno OUT bigint, - all_visible OUT boolean, - all_frozen OUT boolean) -RETURNS SETOF record -AS 'MODULE_PATHNAME', 'pg_visibility_map_rel' -LANGUAGE C STRICT; - --- Show visibility map and page-level visibility information for each block. -CREATE FUNCTION pg_visibility(regclass, blkno OUT bigint, - all_visible OUT boolean, - all_frozen OUT boolean, - pd_all_visible OUT boolean) -RETURNS SETOF record -AS 'MODULE_PATHNAME', 'pg_visibility_rel' -LANGUAGE C STRICT; - --- Show summary of visibility map bits for a relation. -CREATE FUNCTION pg_visibility_map_summary(regclass, - OUT all_visible bigint, OUT all_frozen bigint) -RETURNS record -AS 'MODULE_PATHNAME', 'pg_visibility_map_summary' -LANGUAGE C STRICT; - --- Don't want these to be available to public. -REVOKE ALL ON FUNCTION pg_visibility_map(regclass, bigint) FROM PUBLIC; -REVOKE ALL ON FUNCTION pg_visibility(regclass, bigint) FROM PUBLIC; -REVOKE ALL ON FUNCTION pg_visibility_map(regclass) FROM PUBLIC; -REVOKE ALL ON FUNCTION pg_visibility(regclass) FROM PUBLIC; -REVOKE ALL ON FUNCTION pg_visibility_map_summary(regclass) FROM PUBLIC; diff --git a/contrib/pg_visibility/pg_visibility--1.1.sql b/contrib/pg_visibility/pg_visibility--1.1.sql new file mode 100644 index 0000000..b49b644 --- /dev/null +++ b/contrib/pg_visibility/pg_visibility--1.1.sql @@ -0,0 +1,67 @@ +/* contrib/pg_visibility/pg_visibility--1.1.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION pg_visibility" to load this file. \quit + +-- Show visibility map information. +CREATE FUNCTION pg_visibility_map(regclass, blkno bigint, + all_visible OUT boolean, + all_frozen OUT boolean) +RETURNS record +AS 'MODULE_PATHNAME', 'pg_visibility_map' +LANGUAGE C STRICT; + +-- Show visibility map and page-level visibility information. +CREATE FUNCTION pg_visibility(regclass, blkno bigint, + all_visible OUT boolean, + all_frozen OUT boolean, + pd_all_visible OUT boolean) +RETURNS record +AS 'MODULE_PATHNAME', 'pg_visibility' +LANGUAGE C STRICT; + +-- Show visibility map information for each block in a relation. +CREATE FUNCTION pg_visibility_map(regclass, blkno OUT bigint, + all_visible OUT boolean, + all_frozen OUT boolean) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'pg_visibility_map_rel' +LANGUAGE C STRICT; + +-- Show visibility map and page-level visibility information for each block. +CREATE FUNCTION pg_visibility(regclass, blkno OUT bigint, + all_visible OUT boolean, + all_frozen OUT boolean, + pd_all_visible OUT boolean) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'pg_visibility_rel' +LANGUAGE C STRICT; + +-- Show summary of visibility map bits for a relation. +CREATE FUNCTION pg_visibility_map_summary(regclass, + OUT all_visible bigint, OUT all_frozen bigint) +RETURNS record +AS 'MODULE_PATHNAME', 'pg_visibility_map_summary' +LANGUAGE C STRICT; + +-- Show tupleids of non-frozen tuples if any in all_frozen pages +-- for a relation. +CREATE FUNCTION pg_check_frozen(regclass, t_ctid OUT tid) +RETURNS SETOF tid +AS 'MODULE_PATHNAME', 'pg_check_frozen' +LANGUAGE C STRICT; + +-- Show tupleids of dead tuples if any in all_visible pages for a relation. +CREATE FUNCTION pg_check_visible(regclass, t_ctid OUT tid) +RETURNS SETOF tid +AS 'MODULE_PATHNAME', 'pg_check_visible' +LANGUAGE C STRICT; + +-- Don't want these to be available to public. +REVOKE ALL ON FUNCTION pg_visibility_map(regclass, bigint) FROM PUBLIC; +REVOKE ALL ON FUNCTION pg_visibility(regclass, bigint) FROM PUBLIC; +REVOKE ALL ON FUNCTION pg_visibility_map(regclass) FROM PUBLIC; +REVOKE ALL ON FUNCTION pg_visibility(regclass) FROM PUBLIC; +REVOKE ALL ON FUNCTION pg_visibility_map_summary(regclass) FROM PUBLIC; +REVOKE ALL ON FUNCTION pg_check_frozen(regclass) FROM PUBLIC; +REVOKE ALL ON FUNCTION pg_check_visible(regclass) FROM PUBLIC; diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c index 5e5c7cc..7802e22 100644 --- a/contrib/pg_visibility/pg_visibility.c +++ b/contrib/pg_visibility/pg_visibility.c @@ -14,25 +14,38 @@ #include "funcapi.h" #include "miscadmin.h" #include "storage/bufmgr.h" +#include "storage/procarray.h" #include "utils/rel.h" PG_MODULE_MAGIC; typedef struct vbits { - BlockNumber next; - BlockNumber count; + BlockNumber next; + BlockNumber count; uint8 bits[FLEXIBLE_ARRAY_MEMBER]; } vbits; +typedef struct corrupt_items +{ + BlockNumber next; + BlockNumber count; + ItemPointer tids; +} corrupt_items; + PG_FUNCTION_INFO_V1(pg_visibility_map); PG_FUNCTION_INFO_V1(pg_visibility_map_rel); PG_FUNCTION_INFO_V1(pg_visibility); PG_FUNCTION_INFO_V1(pg_visibility_rel); PG_FUNCTION_INFO_V1(pg_visibility_map_summary); +PG_FUNCTION_INFO_V1(pg_check_frozen); +PG_FUNCTION_INFO_V1(pg_check_visible); static TupleDesc pg_visibility_tupdesc(bool include_blkno, bool include_pd); static vbits *collect_visibility_data(Oid relid, bool include_pd); +static corrupt_items *collect_corrupt_items(Oid relid, bool all_visible, + bool all_frozen); +static void record_corrupt_item(corrupt_items *items, ItemPointer tid); /* * Visibility map information for a single block of a relation. @@ -129,7 +142,7 @@ pg_visibility_map_rel(PG_FUNCTION_ARGS) if (SRF_IS_FIRSTCALL()) { Oid relid = PG_GETARG_OID(0); - MemoryContext oldcontext; + MemoryContext oldcontext; funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); @@ -173,7 +186,7 @@ pg_visibility_rel(PG_FUNCTION_ARGS) if (SRF_IS_FIRSTCALL()) { Oid relid = PG_GETARG_OID(0); - MemoryContext oldcontext; + MemoryContext oldcontext; funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); @@ -214,8 +227,8 @@ pg_visibility_map_summary(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); Relation rel; - BlockNumber nblocks; - BlockNumber blkno; + BlockNumber nblocks; + BlockNumber blkno; Buffer vmbuffer = InvalidBuffer; int64 all_visible = 0; int64 all_frozen = 0; @@ -259,6 +272,68 @@ pg_visibility_map_summary(PG_FUNCTION_ARGS) } /* + * Return the TIDs of non-frozen tuples present in pages marked all-frozen + * in the visibility map. We hope no one will ever find any, but there could + * be bugs, database corruption, etc. + */ +Datum +pg_check_frozen(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + corrupt_items *items; + + if (SRF_IS_FIRSTCALL()) + { + Oid relid = PG_GETARG_OID(0); + MemoryContext oldcontext; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + funcctx->user_fctx = collect_corrupt_items(relid, false, true); + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + items = (corrupt_items *) funcctx->user_fctx; + + if (items->next < items->count) + SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++])); + + SRF_RETURN_DONE(funcctx); +} + +/* + * Return the TIDs of not-all-visible tuples in pages marked all-visible + * in the visibility map. We hope no one will ever find any, but there could + * be bugs, database corruption, etc. + */ +Datum +pg_check_visible(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + corrupt_items *items; + + if (SRF_IS_FIRSTCALL()) + { + Oid relid = PG_GETARG_OID(0); + MemoryContext oldcontext; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + funcctx->user_fctx = collect_corrupt_items(relid, true, false); + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + items = (corrupt_items *) funcctx->user_fctx; + + if (items->next < items->count) + SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++])); + + SRF_RETURN_DONE(funcctx); +} + +/* * Helper function to construct whichever TupleDesc we need for a particular * call. */ @@ -292,16 +367,16 @@ static vbits * collect_visibility_data(Oid relid, bool include_pd) { Relation rel; - BlockNumber nblocks; + BlockNumber nblocks; vbits *info; - BlockNumber blkno; + BlockNumber blkno; Buffer vmbuffer = InvalidBuffer; - BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD); + BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD); rel = relation_open(relid, AccessShareLock); nblocks = RelationGetNumberOfBlocks(rel); - info = palloc0(offsetof(vbits, bits) + nblocks); + info = palloc0(offsetof(vbits, bits) +nblocks); info->next = 0; info->count = nblocks; @@ -320,8 +395,8 @@ collect_visibility_data(Oid relid, bool include_pd) info->bits[blkno] |= (1 << 1); /* - * Page-level data requires reading every block, so only get it if - * the caller needs it. Use a buffer access strategy, too, to prevent + * Page-level data requires reading every block, so only get it if the + * caller needs it. Use a buffer access strategy, too, to prevent * cache-trashing. */ if (include_pd) @@ -348,3 +423,189 @@ collect_visibility_data(Oid relid, bool include_pd) return info; } + +/* + * Returns a list of items whose visibility map information does not match + * the status of the tuples on the page. + * + * If all_visible is passed as true, this will include all items which are + * on pages marked as all-visible in the visibility map but which do not + * seem to in fact be all-visible. + * + * If all_frozen is passed as true, this will include all items which are + * on pages marked as all-frozen but which do not seem to in fact be frozen. + */ +static corrupt_items * +collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) +{ + Relation rel; + BlockNumber nblocks; + corrupt_items *items; + BlockNumber blkno; + Buffer vmbuffer = InvalidBuffer; + BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD); + TransactionId OldestXmin = InvalidTransactionId; + + rel = relation_open(relid, AccessShareLock); + + nblocks = RelationGetNumberOfBlocks(rel); + + if (all_visible) + OldestXmin = GetOldestXmin(rel, true); + + /* + * Guess an initial array size. We don't expect many corrupted tuples, so + * start with a small array. This function uses the "next" field to track + * the next offset where we can store an item (which is the same thing as + * the number of items found so far) and the "count" field to track the + * number of entries allocated. We'll repurpose these fields before + * returning. + */ + items = palloc0(sizeof(corrupt_items)); + items->next = 0; + items->count = 64; + items->tids = palloc(items->count * sizeof(ItemPointerData)); + + /* Loop over every block in the relation. */ + for (blkno = 0; blkno < nblocks; ++blkno) + { + bool check_frozen = false; + bool check_visible = false; + Buffer buffer; + Page page; + OffsetNumber offnum, + maxoff; + + /* Make sure we are interruptible. */ + CHECK_FOR_INTERRUPTS(); + + /* Use the visibility map to decide whether to check this page. */ + if (all_frozen && VM_ALL_FROZEN(rel, blkno, &vmbuffer)) + check_frozen = true; + if (all_visible && VM_ALL_VISIBLE(rel, blkno, &vmbuffer)) + check_visible = true; + if (!check_visible && !check_frozen) + continue; + + /* Read and lock the page. */ + buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, + bstrategy); + LockBuffer(buffer, BUFFER_LOCK_SHARE); + + page = BufferGetPage(buffer); + maxoff = PageGetMaxOffsetNumber(page); + + /* + * The visibility map bits might have changed while we were acquiring + * the page lock. Recheck to avoid returning spurious results. + */ + if (check_frozen && !VM_ALL_FROZEN(rel, blkno, &vmbuffer)) + check_frozen = false; + if (check_visible && !VM_ALL_VISIBLE(rel, blkno, &vmbuffer)) + check_visible = false; + if (!check_visible && !check_frozen) + { + UnlockReleaseBuffer(buffer); + continue; + } + + /* Iterate over each tuple on the page. */ + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + HeapTupleData tuple; + ItemId itemid; + + itemid = PageGetItemId(page, offnum); + + /* Unused or redirect line pointers are of no interest. */ + if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid)) + continue; + + /* Dead line pointers are neither all-visible nor frozen. */ + if (ItemIdIsDead(itemid)) + { + ItemPointerData tid; + + ItemPointerSet(&tid, blkno, offnum); + record_corrupt_item(items, &tid); + continue; + } + + /* Initialize a HeapTupleData structure for checks below. */ + tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = relid; + + /* + * If we're checking whether the page is all-visible, we expect + * the tuple to be live, xmin to be hinted committed, and the xmin + * to be old enough that everyone can see it. The tests here + * should match the ones in lazy_scan_heap. + */ + if (check_visible) + { + HTSV_Result state; + + state = HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buffer); + if (state != HEAPTUPLE_LIVE || + !HeapTupleHeaderXminCommitted(tuple.t_data)) + record_corrupt_item(items, &tuple.t_data->t_ctid); + else + { + TransactionId xmin; + + xmin = HeapTupleHeaderGetXmin(tuple.t_data); + if (!TransactionIdPrecedes(xmin, OldestXmin)) + record_corrupt_item(items, &tuple.t_data->t_ctid); + } + } + + /* + * If we're checking whether the page is all-frozen, we expect the + * tuple to be in a state where it will never need freezing. + */ + if (check_frozen) + { + if (heap_tuple_needs_eventual_freeze(tuple.t_data)) + record_corrupt_item(items, &tuple.t_data->t_ctid); + } + } + + UnlockReleaseBuffer(buffer); + } + + /* Clean up. */ + if (vmbuffer != InvalidBuffer) + ReleaseBuffer(vmbuffer); + relation_close(rel, AccessShareLock); + + /* + * Before returning, repurpose the fields to match caller's expectations. + * next is now the next item that should be read (rather than written) and + * count is now the number of items we wrote (rather than the number we + * allocated). + */ + items->count = items->next; + items->next = 0; + + return items; +} + +/* + * Remember one corrupt item. + */ +static void +record_corrupt_item(corrupt_items *items, ItemPointer tid) +{ + /* enlarge output array if needed. */ + if (items->next >= items->count) + { + items->count *= 2; + items->tids = repalloc(items->tids, + items->count * sizeof(ItemPointerData)); + } + /* and add the new item */ + items->tids[items->next++] = *tid; +} diff --git a/contrib/pg_visibility/pg_visibility.control b/contrib/pg_visibility/pg_visibility.control index 1d71853..f93ed01 100644 --- a/contrib/pg_visibility/pg_visibility.control +++ b/contrib/pg_visibility/pg_visibility.control @@ -1,5 +1,5 @@ # pg_visibility extension comment = 'examine the visibility map (VM) and page-level visibility info' -default_version = '1.0' +default_version = '1.1' module_pathname = '$libdir/pg_visibility' relocatable = true diff --git a/doc/src/sgml/pgvisibility.sgml b/doc/src/sgml/pgvisibility.sgml index 48b003d..4cdca7d 100644 --- a/doc/src/sgml/pgvisibility.sgml +++ b/doc/src/sgml/pgvisibility.sgml @@ -32,7 +32,8 @@ Functions which display information about PD_ALL_VISIBLE are much more costly than those which only consult the visibility map, because they must read the relation's data blocks rather than only the - (much smaller) visibility map. + (much smaller) visibility map. Functions that check the relation's + data blocks are similarly expensive. @@ -92,6 +93,31 @@ + + + pg_check_frozen(regclass, t_ctid OUT tid) returns setof tid + + + + Returns the TIDs of non-frozen tuples present in pages marked all-frozen + in the visibility map. If this function returns a non-empty set of + TIDs, the database is corrupt. + + + + + + pg_check_visible(regclass, t_ctid OUT tid) returns setof tid + + + + Returns the TIDs of tuples which are not all-visible despite the fact + that the pages which contain them are marked as all-visible in the + visibility map. If this function returns a non-empty set of TIDs, the + database is corrupt. + + + diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 9b38d35..168649f 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2374,6 +2374,7 @@ convert_testexpr_context core_YYSTYPE core_yy_extra_type core_yyscan_t +corrupt_items cost_qual_eval_context count_agg_clauses_context create_upper_paths_hook_type