diff --git a/contrib/pg_prewarm/Makefile b/contrib/pg_prewarm/Makefile index 617ac8e0..53bce449 100644 --- a/contrib/pg_prewarm/Makefile +++ b/contrib/pg_prewarm/Makefile @@ -12,6 +12,9 @@ PGFILEDESC = "pg_prewarm - preload relation data into system buffer cache" REGRESS = pg_prewarm +EXTRA_INSTALL = src/test/modules/injection_points +export enable_injection_points + TAP_TESTS = 1 ifdef USE_PGXS diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c index ba0bc8e6..2c207efa 100644 --- a/contrib/pg_prewarm/autoprewarm.c +++ b/contrib/pg_prewarm/autoprewarm.c @@ -39,12 +39,14 @@ #include "storage/fd.h" #include "storage/ipc.h" #include "storage/latch.h" +#include "storage/lmgr.h" #include "storage/lwlock.h" #include "storage/procsignal.h" #include "storage/read_stream.h" #include "storage/smgr.h" #include "tcop/tcopprot.h" #include "utils/guc.h" +#include "utils/injection_point.h" #include "utils/rel.h" #include "utils/relfilenumbermap.h" #include "utils/timestamp.h" @@ -52,6 +54,11 @@ #define AUTOPREWARM_FILE "autoprewarm.blocks" +/* + * Block interval for checking conflicting lock waiters during prewarming. + */ +#define PREWARM_WAITER_CHECK_INTERVAL 1024 + /* Metadata for each block we dump. */ typedef struct BlockInfoRecord { @@ -635,13 +642,83 @@ autoprewarm_database_main(Datum main_arg) * read stream callback will check that we still have free buffers * before requesting each block from the read stream API. */ - while ((buf = read_stream_next_buffer(stream, NULL)) != InvalidBuffer) { - apw_state->prewarmed_blocks++; - ReleaseBuffer(buf); - } + int blocks_since_check = 0; + bool rel_dropped = false; - read_stream_end(stream); + while ((buf = read_stream_next_buffer(stream, NULL)) != InvalidBuffer) + { + CHECK_FOR_INTERRUPTS(); + apw_state->prewarmed_blocks++; + ReleaseBuffer(buf); + blocks_since_check++; + + /* + * Periodically check for conflicting lock waiters. If + * found, end the current read stream and yield our lock + * so the waiter can proceed. + */ + if (blocks_since_check >= PREWARM_WAITER_CHECK_INTERVAL) + { + blocks_since_check = 0; + + INJECTION_POINT("autoprewarm-before-check-and-yield", NULL); + + if (LockHasWaitersRelation(rel, AccessShareLock)) + { + read_stream_end(stream); + relation_close(rel, AccessShareLock); + + /* Reacquire and check if relation is still valid. */ + rel = try_relation_open(reloid, AccessShareLock); + if (rel == NULL) + { + rel_dropped = true; + break; + } + + /* + * Recalculate fork size; skip remainder if + * truncated. + */ + if (!smgrexists(RelationGetSmgr(rel), forknum)) + { + i = p.pos; + blk = block_info[i]; + break; + } + p.nblocks = RelationGetNumberOfBlocksInFork(rel, forknum); + + /* Restart stream for remaining blocks. */ + stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE | + READ_STREAM_DEFAULT | + READ_STREAM_USE_BATCHING, + NULL, + rel, + p.forknum, + apw_read_stream_next_block, + &p, + 0); + } + } + } + + if (rel_dropped) + { + /* Fast-forward past remaining blocks for this relation. */ + for (; i < apw_state->prewarm_stop_idx; i++) + { + blk = block_info[i]; + if (blk.tablespace != tablespace || + blk.filenumber != filenumber) + break; + } + CommitTransactionCommand(); + continue; /* outer while loop */ + } + + read_stream_end(stream); + } /* Advance i past all the blocks just prewarmed. */ i = p.pos; diff --git a/contrib/pg_prewarm/meson.build b/contrib/pg_prewarm/meson.build index e70546a4..8cc66073 100644 --- a/contrib/pg_prewarm/meson.build +++ b/contrib/pg_prewarm/meson.build @@ -37,6 +37,7 @@ tests += { 'tap': { 'tests': [ 't/001_basic.pl', + 't/002_lock_yield.pl', ], }, } diff --git a/contrib/pg_prewarm/pg_prewarm.c b/contrib/pg_prewarm/pg_prewarm.c index c2716086..6e8f2d93 100644 --- a/contrib/pg_prewarm/pg_prewarm.c +++ b/contrib/pg_prewarm/pg_prewarm.c @@ -25,6 +25,7 @@ #include "storage/smgr.h" #include "utils/acl.h" #include "utils/builtins.h" +#include "utils/injection_point.h" #include "utils/lsyscache.h" #include "utils/rel.h" @@ -44,6 +45,74 @@ typedef enum static PGIOAlignedBlock blockbuffer; +/* + * Block interval for checking conflicting lock waiters. Checking every + * block is too expensive because LockHasWaitersRelation performs a + * lock-table probe, but we don't want to check too infrequently either, to avoid + * long stalls when there are waiters. + */ +#define PREWARM_WAITER_CHECK_INTERVAL 1024 + +/* + * Check whether any session is waiting for a lock that conflicts with + * AccessShareLock on the relation. If so, release the lock to let the + * waiter proceed, then try to reacquire it. + * + * Throws an error if the relation was dropped or truncated past 'next_block' while + * the lock was not held. + */ +static Relation +pg_prewarm_check_and_yield(Relation rel, Oid relOid, Oid privOid, + ForkNumber forkNumber, int64 next_block, + int64 blocks_done, int64 *last_block) +{ + int64 nblocks; + + INJECTION_POINT("pg_prewarm-before-check-and-yield", NULL); + + /* Nothing to do if nobody is waiting for a conflicting lock. */ + if (!LockHasWaitersRelation(rel, AccessShareLock)) + return rel; + + /* Release all locks to let the waiter proceed. */ + relation_close(rel, AccessShareLock); + if (privOid != relOid) + UnlockRelationOid(privOid, AccessShareLock); + + /* Reacquire in the correct order: parent table before index. */ + if (privOid != relOid) + LockRelationOid(privOid, AccessShareLock); + rel = try_relation_open(relOid, AccessShareLock); + if (rel == NULL) + { + if (privOid != relOid) + UnlockRelationOid(privOid, AccessShareLock); + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("relation was dropped during pg_prewarm after %" PRId64 " blocks", + blocks_done))); + } + + /* Check if the fork still exists and has enough blocks. */ + if (!smgrexists(RelationGetSmgr(rel), forkNumber) || + (nblocks = RelationGetNumberOfBlocksInFork(rel, forkNumber)) <= next_block) + { + relation_close(rel, AccessShareLock); + if (privOid != relOid) + UnlockRelationOid(privOid, AccessShareLock); + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("relation was truncated during pg_prewarm after %" PRId64 " blocks", + blocks_done))); + } + + /* Adjust endpoint if the relation was partially truncated. */ + if (*last_block >= nblocks) + *last_block = nblocks - 1; + + return rel; +} + /* * pg_prewarm(regclass, mode text, fork text, * first_block int8, last_block int8) @@ -208,6 +277,11 @@ pg_prewarm(PG_FUNCTION_ARGS) for (block = first_block; block <= last_block; ++block) { CHECK_FOR_INTERRUPTS(); + if (blocks_done > 0 && + blocks_done % PREWARM_WAITER_CHECK_INTERVAL == 0) + rel = pg_prewarm_check_and_yield(rel, relOid, privOid, + forkNumber, block, + blocks_done, &last_block); PrefetchBuffer(rel, forkNumber, block); ++blocks_done; } @@ -227,6 +301,11 @@ pg_prewarm(PG_FUNCTION_ARGS) for (block = first_block; block <= last_block; ++block) { CHECK_FOR_INTERRUPTS(); + if (blocks_done > 0 && + blocks_done % PREWARM_WAITER_CHECK_INTERVAL == 0) + rel = pg_prewarm_check_and_yield(rel, relOid, privOid, + forkNumber, block, + blocks_done, &last_block); smgrread(RelationGetSmgr(rel), forkNumber, block, blockbuffer.data); ++blocks_done; } @@ -266,6 +345,33 @@ pg_prewarm(PG_FUNCTION_ARGS) buf = read_stream_next_buffer(stream, NULL); ReleaseBuffer(buf); ++blocks_done; + + /* + * Periodically check for conflicting lock waiters. If found, end + * the current read stream and yield, then start a new stream for + * the remaining blocks. + */ + if (blocks_done % PREWARM_WAITER_CHECK_INTERVAL == 0 && + block < last_block) + { + read_stream_end(stream); + rel = pg_prewarm_check_and_yield(rel, relOid, privOid, + forkNumber, block + 1, + blocks_done, &last_block); + + /* Restart stream for remaining blocks. */ + p.current_blocknum = block + 1; + p.last_exclusive = last_block + 1; + stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE | + READ_STREAM_FULL | + READ_STREAM_USE_BATCHING, + NULL, + rel, + forkNumber, + block_range_read_stream_cb, + &p, + 0); + } } Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer); read_stream_end(stream); diff --git a/contrib/pg_prewarm/t/002_lock_yield.pl b/contrib/pg_prewarm/t/002_lock_yield.pl new file mode 100644 index 00000000..b5af7c08 --- /dev/null +++ b/contrib/pg_prewarm/t/002_lock_yield.pl @@ -0,0 +1,152 @@ +# Copyright (c) 2025-2026, PostgreSQL Global Development Group +# +# Test that pg_prewarm yields its AccessShareLock when a conflicting +# DDL operation (TRUNCATE, DROP TABLE) is waiting, and that pg_prewarm +# reports an appropriate error afterwards. + +use strict; +use warnings FATAL => 'all'; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +if ($ENV{enable_injection_points} ne 'yes') +{ + plan skip_all => 'Injection points not supported by this build'; +} + +my $node = PostgreSQL::Test::Cluster->new('main'); +$node->init; +$node->start; + +if (!$node->check_extension('injection_points')) +{ + plan skip_all => 'Extension injection_points not installed'; +} + +$node->safe_psql('postgres', q( + CREATE EXTENSION pg_prewarm; + CREATE EXTENSION injection_points; +)); + +# Create tables large enough to trigger the waiter check +# (> PREWARM_WAITER_CHECK_INTERVAL = 1024 blocks). +$node->safe_psql('postgres', q( + CREATE TABLE trunc_test AS + SELECT i, repeat('x', 200) AS padding + FROM generate_series(1, 50000) i; + CREATE TABLE drop_test AS + SELECT i, repeat('x', 200) AS padding + FROM generate_series(1, 50000) i; +)); + +my $nblocks = $node->safe_psql('postgres', + "SELECT pg_relation_size('trunc_test') / current_setting('block_size')::int"); +ok($nblocks > 1024, "trunc_test has more than 1024 blocks ($nblocks)"); + +# ------------------------------------------------------------------ +# Test 1: Normal pg_prewarm on a large table with no conflicting waiters. +# ------------------------------------------------------------------ +my $result = $node->safe_psql('postgres', + "SELECT pg_prewarm('trunc_test', 'buffer')"); +like($result, qr/^[1-9][0-9]*$/, 'pg_prewarm buffer mode succeeds normally'); + +# ------------------------------------------------------------------ +# Test 2: TRUNCATE proceeds during pg_prewarm. +# +# pg_prewarm pauses at the injection point while holding AccessShareLock. +# TRUNCATE blocks waiting for AccessExclusiveLock. After we wake up +# pg_prewarm it detects the waiter, yields, and TRUNCATE completes. +# pg_prewarm then errors because the relation was truncated. +# ------------------------------------------------------------------ +$node->safe_psql('postgres', + "SELECT injection_points_attach('pg_prewarm-before-check-and-yield', 'wait')"); + +my $prewarm = $node->background_psql('postgres', on_error_stop => 0); +$prewarm->query_until(qr/starting_prewarm/, q( + \echo starting_prewarm + SELECT pg_prewarm('trunc_test', 'buffer'); +)); + +# Wait for pg_prewarm to hit the injection point. +$node->poll_query_until('postgres', q( + SELECT count(*) > 0 FROM pg_stat_activity + WHERE wait_event = 'pg_prewarm-before-check-and-yield'; +), 't'); + +# Start TRUNCATE in a second session — it will block on the lock. +my $truncate = $node->background_psql('postgres'); +$truncate->query_until(qr/starting_truncate/, q( + \echo starting_truncate + TRUNCATE trunc_test; +)); + +# Confirm TRUNCATE is waiting for a lock. +$node->poll_query_until('postgres', q( + SELECT count(*) > 0 FROM pg_stat_activity + WHERE query LIKE '%TRUNCATE trunc_test%' + AND wait_event_type = 'Lock'; +), 't'); + +# Detach the injection point so pg_prewarm won't pause again at the +# next check interval, then wake it up. +$node->safe_psql('postgres', + "SELECT injection_points_detach('pg_prewarm-before-check-and-yield')"); +$node->safe_psql('postgres', + "SELECT injection_points_wakeup('pg_prewarm-before-check-and-yield')"); + +# TRUNCATE should now complete because pg_prewarm yielded its lock. +$truncate->quit; +pass('TRUNCATE completed during pg_prewarm'); + +# pg_prewarm should have reported that the relation was truncated. +$prewarm->quit; +like($prewarm->{stderr}, + qr/relation was truncated during pg_prewarm/, + 'pg_prewarm reports truncation after TRUNCATE'); + +# ------------------------------------------------------------------ +# Test 3: DROP TABLE proceeds during pg_prewarm. +# ------------------------------------------------------------------ +$node->safe_psql('postgres', + "SELECT injection_points_attach('pg_prewarm-before-check-and-yield', 'wait')"); + +my $prewarm2 = $node->background_psql('postgres', on_error_stop => 0); +$prewarm2->query_until(qr/starting_prewarm/, q( + \echo starting_prewarm + SELECT pg_prewarm('drop_test', 'buffer'); +)); + +$node->poll_query_until('postgres', q( + SELECT count(*) > 0 FROM pg_stat_activity + WHERE wait_event = 'pg_prewarm-before-check-and-yield'; +), 't'); + +my $drop = $node->background_psql('postgres'); +$drop->query_until(qr/starting_drop/, q( + \echo starting_drop + DROP TABLE drop_test; +)); + +$node->poll_query_until('postgres', q( + SELECT count(*) > 0 FROM pg_stat_activity + WHERE query LIKE '%DROP TABLE drop_test%' + AND wait_event_type = 'Lock'; +), 't'); + +$node->safe_psql('postgres', + "SELECT injection_points_detach('pg_prewarm-before-check-and-yield')"); +$node->safe_psql('postgres', + "SELECT injection_points_wakeup('pg_prewarm-before-check-and-yield')"); + +$drop->quit; +pass('DROP TABLE completed during pg_prewarm'); + +$prewarm2->quit; +like($prewarm2->{stderr}, + qr/relation was dropped during pg_prewarm/, + 'pg_prewarm reports drop after DROP TABLE'); + +$node->stop; +done_testing();