diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index bbcac69d48..459b2ad4ae 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -10958,6 +10958,14 @@ proallargtypes => '{oid,text,int8,timestamptz}', proargmodes => '{i,o,o,o}', proargnames => '{tablespace,name,size,modification}', prosrc => 'pg_ls_tmpdir_1arg' }, +{ oid => '9147', descr => 'check pages of a relation', + proname => 'pg_relation_check_pages', procost => '10000', + prorows => '20', proisstrict => 'f', proretset => 't', proparallel => 'r', + provolatile => 'v', prorettype => 'record', proargtypes => 'regclass text', + proallargtypes => '{regclass,text,text,int8}', + proargmodes => '{i,i,o,o}', + proargnames => '{relation,fork,path,failed_block_num}', + prosrc => 'pg_relation_check_pages' }, # hash partitioning constraint function { oid => '5028', descr => 'hash partition CHECK constraint', diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index ee91b8fa26..a21cab2eaf 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -240,6 +240,9 @@ extern void AtProcExit_LocalBuffers(void); extern void TestForOldSnapshot_impl(Snapshot snapshot, Relation relation); +extern bool CheckBuffer(struct SMgrRelationData *smgr, ForkNumber forknum, + BlockNumber blkno); + /* in freelist.c */ extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype); extern void FreeAccessStrategy(BufferAccessStrategy strategy); diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 85cd147e21..c6dd084fbc 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1300,6 +1300,14 @@ LANGUAGE INTERNAL STRICT VOLATILE AS 'pg_create_logical_replication_slot'; +CREATE OR REPLACE FUNCTION pg_relation_check_pages( + IN relation regclass, IN fork text DEFAULT NULL, + OUT path text, OUT failed_block_num bigint) +RETURNS SETOF record +LANGUAGE internal +VOLATILE PARALLEL RESTRICTED +AS 'pg_relation_check_pages'; + CREATE OR REPLACE FUNCTION make_interval(years int4 DEFAULT 0, months int4 DEFAULT 0, weeks int4 DEFAULT 0, days int4 DEFAULT 0, hours int4 DEFAULT 0, mins int4 DEFAULT 0, @@ -1444,6 +1452,7 @@ AS 'unicode_is_normalized'; -- can later change who can access these functions, or leave them as only -- available to superuser / cluster owner, if they choose. -- +REVOKE EXECUTE ON FUNCTION pg_relation_check_pages(regclass, text) FROM public; REVOKE EXECUTE ON FUNCTION pg_start_backup(text, boolean, boolean) FROM public; REVOKE EXECUTE ON FUNCTION pg_stop_backup() FROM public; REVOKE EXECUTE ON FUNCTION pg_stop_backup(boolean, boolean) FROM public; diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index e549fa1d30..427177e9ce 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -4583,3 +4583,100 @@ TestForOldSnapshot_impl(Snapshot snapshot, Relation relation) (errcode(ERRCODE_SNAPSHOT_TOO_OLD), errmsg("snapshot too old"))); } + + +/* + * CheckBuffer + * + * Check the state of a buffer without loading it into the shared buffers. To + * avoid torn pages and possible false positives when reading data, a shared + * LWLock is taken on the target buffer pool partition mapping, and we check + * if the page is in shared_buffers or not. + * + * If the page is found as dirty in the shared buffers, it is ignored as + * it will be flushed to disk either before the end of the next checkpoint + * or during recovery in case of unsafe shutdown + * + * If the page is found in the shared buffers but is not dirty, we still + * check the state of its data on disk, as it could be possible that the + * page stayed in shared buffers for a rather long time but its on-disk data + * got corrupted. An I/O lock is taken on the block and the block is then + * read from storage to prevent any concurrent activity from happening. + * + * If the page is not found in shared_buffers, the block is read from disk + * while holding the buffer pool partition mapping LWLock. + * + * The page data is stored a private memory area local to this function. + * + * The caller of this function should hold an AccessShareLock on the relation + * check whose page is checked. + */ +bool +CheckBuffer(SMgrRelation smgr, ForkNumber forknum, BlockNumber blkno) +{ + char buffer[BLCKSZ]; + BufferTag buf_tag; /* identity of requested block */ + uint32 buf_hash; /* hash value for buf_tag */ + LWLock *partLock; /* buffer partition lock for the buffer */ + BufferDesc *bufdesc; + int buf_id; + + Assert(smgrexists(smgr, forknum)); + + /* create a tag so we can look after the buffer */ + INIT_BUFFERTAG(buf_tag, smgr->smgr_rnode.node, forknum, blkno); + + /* determine its hash code and partition lock ID */ + buf_hash = BufTableHashCode(&buf_tag); + partLock = BufMappingPartitionLock(buf_hash); + + /* see if the block is in the buffer pool or not */ + LWLockAcquire(partLock, LW_SHARED); + buf_id = BufTableLookup(&buf_tag, buf_hash); + if (buf_id >= 0) + { + uint32 buf_state; + + /* + * Found it. Now, retrieve its state to know what to do with it, and + * release the pin immediately. We do so to limit overhead as much + * as possible. We keep the shared lightweight lock on the target + * buffer mapping partition for noe, so this buffer cannot be evicted, + * and we will acquire an I/O Lock on the buffer if we need to read + * the content on disk. + */ + bufdesc = GetBufferDescriptor(buf_id); + + buf_state = LockBufHdr(bufdesc); + UnlockBufHdr(bufdesc, buf_state); + + /* If the page is dirty or invalid, skip it */ + if ((buf_state & BM_DIRTY) || !(buf_state & BM_TAG_VALID)) + { + LWLockRelease(partLock); + return true; + } + + /* + * Read the buffer from disk, taking an I/O lock to prevent torn-page + * reads, in the unlikely event that it was concurrently dirtied and + * flushed. + */ + LWLockAcquire(BufferDescriptorGetIOLock(bufdesc), LW_SHARED); + smgrread(smgr, forknum, blkno, buffer); + LWLockRelease(BufferDescriptorGetIOLock(bufdesc)); + } + else + { + /* + * Simply read the buffer. There's no risk of modification on it as + * we are holding the buffer pool partition mapping lock. + */ + smgrread(smgr, forknum, blkno, buffer); + } + + /* buffer lookup done, so now do its check */ + LWLockRelease(partLock); + + return PageIsVerified(buffer, blkno); +} diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index b4d55e849b..e2279af1e5 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -69,6 +69,7 @@ OBJS = \ oid.o \ oracle_compat.o \ orderedsetaggs.o \ + pagefuncs.o \ partitionfuncs.o \ pg_locale.o \ pg_lsn.o \ diff --git a/src/backend/utils/adt/pagefuncs.c b/src/backend/utils/adt/pagefuncs.c new file mode 100644 index 0000000000..7de415fc59 --- /dev/null +++ b/src/backend/utils/adt/pagefuncs.c @@ -0,0 +1,219 @@ +/*------------------------------------------------------------------------- + * + * pagefuncs.c + * Functions for page related features. + * + * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/pagefuncs.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/relation.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "storage/bufmgr.h" +#include "storage/lmgr.h" +#include "storage/smgr.h" +#include "utils/builtins.h" + +static void check_one_relation(TupleDesc tupdesc, Tuplestorestate *tupstore, + Oid relid, ForkNumber single_forknum); +static void check_relation_fork(TupleDesc tupdesc, Tuplestorestate *tupstore, + Relation relation, ForkNumber forknum); + +/* + * callback arguments for check_pages_error_callback() + */ +typedef struct CheckPagesErrorInfo +{ + char *path; + BlockNumber blkno; +} CheckPagesErrorInfo; + +/* + * Error callback specific to check_relation_fork(). + */ +static void +check_pages_error_callback(void *arg) +{ + CheckPagesErrorInfo *errinfo = (CheckPagesErrorInfo *) arg; + + errcontext("while checking page %u of path %s", + errinfo->blkno, errinfo->path); +} + +/* + * pg_relation_check_pages + * + * Check the state of all the pages for one or more fork types in the given + * relation. + */ +Datum +pg_relation_check_pages(PG_FUNCTION_ARGS) +{ + Oid relid; + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupdesc; + Tuplestorestate *tupstore; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + ForkNumber forknum; + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; + + MemoryContextSwitchTo(oldcontext); + + /* handle arguments */ + if (PG_ARGISNULL(0)) + { + /* Just leave if nothing is defined */ + PG_RETURN_VOID(); + } + + /* By default all the forks of a relation are checked */ + if (PG_ARGISNULL(1)) + forknum = InvalidForkNumber; + else + { + const char *forkname = TextDatumGetCString(PG_GETARG_TEXT_PP(1)); + forknum = forkname_to_number(forkname); + } + + relid = PG_GETARG_OID(0); + + check_one_relation(tupdesc, tupstore, relid, forknum); + tuplestore_donestoring(tupstore); + + return (Datum) 0; +} + +/* + * Perform the check on a single relation, possibly filtered with a single + * fork. This function will check if the given relation exists or not, as + * a relation could be dropped after checking for the list of relations and + * before getting here, and we don't want to error out in this case. + */ +static void +check_one_relation(TupleDesc tupdesc, Tuplestorestate *tupstore, + Oid relid, ForkNumber single_forknum) +{ + Relation relation; + ForkNumber forknum; + + relation = relation_open(relid, AccessShareLock); + + /* sanity checks, returning no results if no support */ + if (!RELKIND_HAS_STORAGE(relation->rd_rel->relkind) || + relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP) + { + relation_close(relation, AccessShareLock); + return; + } + + RelationOpenSmgr(relation); + + for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) + { + if (single_forknum != InvalidForkNumber && single_forknum != forknum) + continue; + + if (smgrexists(relation->rd_smgr, forknum)) + check_relation_fork(tupdesc, tupstore, relation, forknum); + } + + relation_close(relation, AccessShareLock); +} + +/* + * For a given relation and fork, Do the real work of iterating over all pages + * and doing the check. Caller must hold an AccessShareLock lock on the given + * relation. + */ +static void +check_relation_fork(TupleDesc tupdesc, Tuplestorestate *tupstore, + Relation relation, ForkNumber forknum) +{ + BlockNumber blkno, + nblocks; + SMgrRelation smgr = relation->rd_smgr; + char *path; + CheckPagesErrorInfo errinfo; + ErrorContextCallback errcallback; + + /* Number of output arguments in the SRF */ +#define PG_CHECK_RELATION_COLS 2 + + Assert(CheckRelationLockedByMe(relation, AccessShareLock, true)); + + /* + * We remember the number of blocks here. Since caller must hold a lock on + * the relation, we know that it won't be truncated while we're iterating + * over the blocks. Any block added after this function started won't be + * checked, but this is out of scope as such pages will be flushed before + * the next checkpoint's completion. + */ + nblocks = RelationGetNumberOfBlocksInFork(relation, forknum); + + path = relpathbackend(smgr->smgr_rnode.node, + smgr->smgr_rnode.backend, + forknum); + + /* + * Error context to print some information about blocks and relations + * impacted by corruptions. + */ + errinfo.path = pstrdup(path); + errinfo.blkno = 0; + errcallback.callback = check_pages_error_callback; + errcallback.arg = (void *) &errinfo; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + + for (blkno = 0; blkno < nblocks; blkno++) + { + Datum values[PG_CHECK_RELATION_COLS]; + bool nulls[PG_CHECK_RELATION_COLS]; + int i = 0; + + /* Update block number for the error context */ + errinfo.blkno = blkno; + + /* Check the given buffer */ + if (CheckBuffer(smgr, forknum, blkno)) + continue; + + memset(values, 0, sizeof(values)); + memset(nulls, 0, sizeof(nulls)); + + values[i++] = CStringGetTextDatum(path); + values[i++] = UInt32GetDatum(blkno); + + Assert(i == PG_CHECK_RELATION_COLS); + + /* Save the corrupted blocks in the tuplestore. */ + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + + pfree(path); + } + + /* Pop the error context stack */ + error_context_stack = errcallback.previous; +} diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile index a6d2ffbf9e..a845af71fd 100644 --- a/src/test/modules/Makefile +++ b/src/test/modules/Makefile @@ -7,6 +7,7 @@ include $(top_builddir)/src/Makefile.global SUBDIRS = \ brin \ commit_ts \ + check_relation \ delay_execution \ dummy_index_am \ dummy_seclabel \ diff --git a/src/test/modules/check_relation/.gitignore b/src/test/modules/check_relation/.gitignore new file mode 100644 index 0000000000..871e943d50 --- /dev/null +++ b/src/test/modules/check_relation/.gitignore @@ -0,0 +1,2 @@ +# Generated by test suite +/tmp_check/ diff --git a/src/test/modules/check_relation/Makefile b/src/test/modules/check_relation/Makefile new file mode 100644 index 0000000000..a540cdece2 --- /dev/null +++ b/src/test/modules/check_relation/Makefile @@ -0,0 +1,14 @@ +# src/test/modules/check_relation/Makefile + +TAP_TESTS = 1 + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/check_relation +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/src/test/modules/check_relation/README b/src/test/modules/check_relation/README new file mode 100644 index 0000000000..415c4b21ad --- /dev/null +++ b/src/test/modules/check_relation/README @@ -0,0 +1,23 @@ +src/test/check_relation/README + +Regression tests for online checksums verification +================================================== + +This directory contains a test suite for online checksums verification. + +Running the tests +================= + +NOTE: You must have given the --enable-tap-tests argument to configure. + +Run + make check +or + make installcheck +You can use "make installcheck" if you previously did "make install". +In that case, the code in the installation tree is tested. With +"make check", a temporary installation tree is built from the current +sources and then tested. + +Either way, this test initializes, starts, and stops a test Postgres +cluster. diff --git a/src/test/modules/check_relation/t/001_checksums_check.pl b/src/test/modules/check_relation/t/001_checksums_check.pl new file mode 100644 index 0000000000..73a7f2d849 --- /dev/null +++ b/src/test/modules/check_relation/t/001_checksums_check.pl @@ -0,0 +1,246 @@ +use strict; +use warnings; + +use PostgresNode; +use TestLib; +use Test::More tests => 52; + +our $CHECKSUM_UINT16_OFFSET = 4; +our $PD_UPPER_UINT16_OFFSET = 7; +our $BLOCKSIZE; +our $TOTAL_NB_ERR = 0; + +sub get_block +{ + my ($filename, $blkno) = @_; + my $block; + + open(my $infile, '<', $filename) or die; + binmode($infile); + + my $success = read($infile, $block, $BLOCKSIZE, ($blkno * $BLOCKSIZE)); + die($!) if not defined $success; + + close($infile); + + return($block); +} + +sub overwrite_block +{ + my ($filename, $block, $blkno) = @_; + + open(my $outfile, '>', $filename) or die; + binmode ($outfile); + + my $nb = syswrite($outfile, $block, $BLOCKSIZE, ($blkno * $BLOCKSIZE)); + + die($!) if not defined $nb; + die("Write error") if ($nb != $BLOCKSIZE); + + $outfile->flush(); + + close($outfile); +} + +sub get_uint16_from_page +{ + my ($block, $offset) = @_; + + return (unpack("S*", $block))[$offset]; +} + +sub set_uint16_to_page +{ + my ($block, $data, $offset) = @_; + + my $pack = pack("S", $data); + + # vec with 16B or more won't preserve endianness + vec($block, 2*$offset, 8) = (unpack('C*', $pack))[0]; + vec($block, (2*$offset) + 1, 8) = (unpack('C*', $pack))[1]; + + return $block; +} + +sub check_checksums_call +{ + my ($node, $relname) = @_; + + my ($cmdret, $stdout, $stderr) = $node->psql('postgres', "SELECT COUNT(*)" + . " FROM pg_catalog.pg_relation_check_pages('$relname')" + ); + + return ($stderr eq ''); +} + +sub check_checksums_nb_error +{ + my ($node, $nb, $pattern) = @_; + + my ($cmdret, $stdout, $stderr) = $node->psql('postgres', "SELECT COUNT(*)" + . " FROM (SELECT pg_catalog.pg_relation_check_pages(oid, 'main')" + . " FROM pg_class WHERE relkind in ('r', 'i', 'm')) AS s" + ); + + is($cmdret, 0, 'Function should run successfully'); + like($stderr, $pattern, 'Error output should match expectations'); + is($stdout, $nb, "Should have $nb error"); + + $TOTAL_NB_ERR += $nb; +} + +sub check_pg_stat_database_nb_error +{ + my ($node) = @_; + + my ($cmdret, $stdout, $stderr) = $node->psql('postgres', "SELECT " + . " sum(checksum_failures)" + . " FROM pg_catalog.pg_stat_database" + ); + + is($cmdret, 0, 'Function should run successfully'); + is($stderr, '', 'Function should run successfully'); + is($stdout, $TOTAL_NB_ERR, "Should have $TOTAL_NB_ERR error"); +} + +sub get_checksums_errors +{ + my ($node, $nb, $pattern) = @_; + + my ($cmdret, $stdout, $stderr) = $node->psql('postgres', "SELECT " + . "relname, failed_block_num" + . " FROM (SELECT relname, (pg_catalog.pg_relation_check_pages(oid)).*" + . " FROM pg_class WHERE relkind in ('r','i', 'm')) AS s" + ); + + is($cmdret, '0', 'Function should run successfully'); + like($stderr, $pattern, 'Error output should match expectations'); + + $TOTAL_NB_ERR += $nb; + + return $stdout; +} + +# This function will perform various test by modifying the specified block at +# the specified uint16 offset, checking that the corruption is correctly +# detected, and finally restore the specified block to its original content. +sub corrupt_and_test_block +{ + my ($node, $filename, $blkno, $offset, $fake_data, $error_pattern, $test_prefix) = @_; + + check_checksums_nb_error($node, 0, qr/^$/); + + check_pg_stat_database_nb_error($node); + + $node->stop(); + + my $original_block = get_block($filename, 0); + my $original_data = get_uint16_from_page($original_block, $offset); + + isnt($original_data, $fake_data, + "$test_prefix: The fake data at offset $offset should be different" + . " from the existing one"); + + my $new_block = set_uint16_to_page($original_block, $fake_data, $offset); + isnt($original_data, get_uint16_from_page($new_block, $offset), + "$test_prefix: The fake data at offset $offset should have been changed in memory"); + + overwrite_block($filename, $new_block, 0); + + my $written_data = get_uint16_from_page(get_block($filename, 0), $offset); + isnt($original_data, $written_data, + "$test_prefix: The data written at offset $offset should be different" + . " from the original one"); + is(get_uint16_from_page($new_block, $offset), $written_data, + "$test_prefix: The data written at offset $offset should be the same" + . " as the one in memory"); + is($written_data, $fake_data, + "$test_prefix: The data written at offset $offset should be the one" + . " we wanted to write"); + + $node->start(); + + check_checksums_nb_error($node, 1, $error_pattern); + + my $det = get_checksums_errors($node, 1, $error_pattern); + is($det, "t1|0", + "$test_prefix: The checksums error for modification at offset $offset" + . " should be detected"); + + $node->stop(); + + $new_block = set_uint16_to_page($original_block, $original_data, $offset); + is($original_data, get_uint16_from_page($new_block, $offset), + "$test_prefix: The data at offset $offset should have been restored in memory"); + + overwrite_block($filename, $new_block, 0); + is($original_data, get_uint16_from_page(get_block($filename, $blkno), + $offset), + "$test_prefix: The data at offset $offset should have been restored on disk"); + + $node->start(); + + check_checksums_nb_error($node, 0, qr/^$/); +} + +if (exists $ENV{MY_PG_REGRESS}) +{ + $ENV{PG_REGRESS} = $ENV{MY_PG_REGRESS}; +} + +my $node = get_new_node('main'); + +my %params; +$params{'extra'} = ['--data-checksums']; +$node->init(%params); + +$node->start(); + +$ENV{PGOPTIONS} = '--client-min-messages=WARNING'; + +my ($cmdret, $stdout, $stderr) = $node->psql('postgres', "SELECT" + . " current_setting('data_checksums')"); + +is($stdout, 'on', 'Data checksums should be enabled'); + +($cmdret, $stdout, $stderr) = $node->psql('postgres', "SELECT" + . " current_setting('block_size')"); + +$BLOCKSIZE = $stdout; + +# Basic schema to corrupt and check +$node->safe_psql( + 'postgres', q| + CREATE TABLE public.t1(id integer); + INSERT INTO public.t1 SELECT generate_series(1, 100); + CHECKPOINT; +|); + +# get the underlying heap absolute path +($cmdret, $stdout, $stderr) = $node->psql('postgres', "SELECT" + . " current_setting('data_directory') || '/' || pg_relation_filepath('t1')" +); + +isnt($stdout, '', 'A relfilenode should be returned'); + +my $filename = $stdout; + +check_checksums_nb_error($node, 0, qr/^$/); + +check_pg_stat_database_nb_error($node); + +my $fake_uint16 = hex '0x0000'; + +# Test with a modified checksum. We use a zero checksum here as it's the only +# one that cannot exist on a checksummed page. We also don't have an easy way +# to compute what the checksum would be after a modification in a random place +# in the block. +corrupt_and_test_block($node, $filename, 0, $CHECKSUM_UINT16_OFFSET, + $fake_uint16, qr/WARNING.*page verification failed/s, + 'corrupted page: '); + +# Test corruption making the block looks like it's PageIsNew(). This should +# pass. +corrupt_and_test_block($node, $filename, 0, $PD_UPPER_UINT16_OFFSET, + $fake_uint16, qr/^$/, 'new page: '); diff --git a/src/test/regress/expected/pagefuncs.out b/src/test/regress/expected/pagefuncs.out new file mode 100644 index 0000000000..38a72b01b3 --- /dev/null +++ b/src/test/regress/expected/pagefuncs.out @@ -0,0 +1,72 @@ +-- +-- Tests for functions related to relation pages +-- +-- Restricted to superusers by default +CREATE ROLE regress_pgfunc_user; +SET ROLE regress_pgfunc_user; +SELECT pg_relation_check_pages('pg_class'); -- error +ERROR: permission denied for function pg_relation_check_pages +SELECT pg_relation_check_pages('pg_class', 'main'); -- error +ERROR: permission denied for function pg_relation_check_pages +RESET ROLE; +DROP ROLE regress_pgfunc_user; +-- NULL and simple sanity checks +SELECT pg_relation_check_pages(NULL); -- empty result + pg_relation_check_pages +------------------------- +(0 rows) + +SELECT pg_relation_check_pages(NULL, NULL); -- empty result + pg_relation_check_pages +------------------------- +(0 rows) + +SELECT pg_relation_check_pages('pg_class', 'invalid_fork'); -- error +ERROR: invalid fork name +HINT: Valid fork names are "main", "fsm", "vm", and "init". +-- Relation types that are supported +CREATE TABLE pgfunc_test_tab (id int); +CREATE INDEX pgfunc_test_ind ON pgfunc_test_tab(id); +INSERT INTO pgfunc_test_tab VALUES (generate_series(1,1000)); +SELECT pg_relation_check_pages('pgfunc_test_tab'); + pg_relation_check_pages +------------------------- +(0 rows) + +SELECT pg_relation_check_pages('pgfunc_test_ind'); + pg_relation_check_pages +------------------------- +(0 rows) + +DROP TABLE pgfunc_test_tab; +CREATE MATERIALIZED VIEW pgfunc_test_matview AS SELECT 1; +SELECT pg_relation_check_pages('pgfunc_test_matview'); + pg_relation_check_pages +------------------------- +(0 rows) + +DROP MATERIALIZED VIEW pgfunc_test_matview; +CREATE SEQUENCE pgfunc_test_seq; +SELECT pg_relation_check_pages('pgfunc_test_seq'); + pg_relation_check_pages +------------------------- +(0 rows) + +DROP SEQUENCE pgfunc_test_seq; +-- pg_relation_check_pages() returns no results if passed relations that +-- do not support the operation, like relations without storage or temporary +-- relations. +CREATE TEMPORARY TABLE pgfunc_test_temp AS SELECT generate_series(1,10) AS a; +SELECT pg_relation_check_pages('pgfunc_test_temp'); + pg_relation_check_pages +------------------------- +(0 rows) + +DROP TABLE pgfunc_test_temp; +CREATE VIEW pgfunc_test_view AS SELECT 1; +SELECT pg_relation_check_pages('pgfunc_test_view'); + pg_relation_check_pages +------------------------- +(0 rows) + +DROP VIEW pgfunc_test_view; diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index ae89ed7f0b..7a46a13252 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -112,7 +112,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr # ---------- # Another group of parallel tests # ---------- -test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain +test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain pagefuncs # event triggers cannot run concurrently with any test that runs DDL test: event_trigger diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index 525bdc804f..9a80b80f73 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -197,6 +197,7 @@ test: hash_part test: indexing test: partition_aggregate test: partition_info +test: pagefuncs test: tuplesort test: explain test: event_trigger diff --git a/src/test/regress/sql/pagefuncs.sql b/src/test/regress/sql/pagefuncs.sql new file mode 100644 index 0000000000..12d32eeae4 --- /dev/null +++ b/src/test/regress/sql/pagefuncs.sql @@ -0,0 +1,41 @@ +-- +-- Tests for functions related to relation pages +-- + +-- Restricted to superusers by default +CREATE ROLE regress_pgfunc_user; +SET ROLE regress_pgfunc_user; +SELECT pg_relation_check_pages('pg_class'); -- error +SELECT pg_relation_check_pages('pg_class', 'main'); -- error +RESET ROLE; +DROP ROLE regress_pgfunc_user; + +-- NULL and simple sanity checks +SELECT pg_relation_check_pages(NULL); -- empty result +SELECT pg_relation_check_pages(NULL, NULL); -- empty result +SELECT pg_relation_check_pages('pg_class', 'invalid_fork'); -- error + +-- Relation types that are supported +CREATE TABLE pgfunc_test_tab (id int); +CREATE INDEX pgfunc_test_ind ON pgfunc_test_tab(id); +INSERT INTO pgfunc_test_tab VALUES (generate_series(1,1000)); +SELECT pg_relation_check_pages('pgfunc_test_tab'); +SELECT pg_relation_check_pages('pgfunc_test_ind'); +DROP TABLE pgfunc_test_tab; + +CREATE MATERIALIZED VIEW pgfunc_test_matview AS SELECT 1; +SELECT pg_relation_check_pages('pgfunc_test_matview'); +DROP MATERIALIZED VIEW pgfunc_test_matview; +CREATE SEQUENCE pgfunc_test_seq; +SELECT pg_relation_check_pages('pgfunc_test_seq'); +DROP SEQUENCE pgfunc_test_seq; + +-- pg_relation_check_pages() returns no results if passed relations that +-- do not support the operation, like relations without storage or temporary +-- relations. +CREATE TEMPORARY TABLE pgfunc_test_temp AS SELECT generate_series(1,10) AS a; +SELECT pg_relation_check_pages('pgfunc_test_temp'); +DROP TABLE pgfunc_test_temp; +CREATE VIEW pgfunc_test_view AS SELECT 1; +SELECT pg_relation_check_pages('pgfunc_test_view'); +DROP VIEW pgfunc_test_view; diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index c99499e52b..57a45cf0d3 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -26219,6 +26219,56 @@ SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8'); + + Data Sanity Functions + + + The functions shown in + provide ways to check the sanity of data files in the cluster. + + + + Data Sanity Functions + + + Name Return Type Description + + + + + + + pg_relation_check_pages(relation regclass [, fork text DEFAULT NULL ]) + + setof record + Check the pages of a relation. + + + + +
+ + + pg_relation_check_pages + + + pg_relation_check_pages iterates over all blocks of a + given relation and verifies if they are in a state where they can safely + be loaded into the shared buffers. If defined, + fork specifies that only the pages of the given + fork are to be verified. Fork can be 'main' for the + main data fork, 'fsm' for the free space map, + 'vm' for the visibility map, or + 'init' for the initialization fork. The default of + NULL means that all the forks of the relation are + checked. The function returns a list of blocks that are considered as + corrupted with the path of the related file. Use of this function is + restricted to superusers by default but access may be granted to others + using GRANT. + + +
+ diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm index 90594bd41b..adbabba3c4 100644 --- a/src/tools/msvc/Mkvcbuild.pm +++ b/src/tools/msvc/Mkvcbuild.pm @@ -50,7 +50,8 @@ my @contrib_excludes = ( 'pgcrypto', 'sepgsql', 'brin', 'test_extensions', 'test_misc', 'test_pg_dump', - 'snapshot_too_old', 'unsafe_tests'); + 'snapshot_too_old', 'unsafe_tests', + 'check_relation'); # Set of variables for frontend modules my $frontend_defines = { 'initdb' => 'FRONTEND' };