commit ec7af33d6f36a24b1ae1c68661277188f45030b3 Author: mithun Date: Tue Feb 7 14:57:35 2017 +0530 commit 1: auto_pg_prewarm diff --git a/contrib/pg_prewarm/Makefile b/contrib/pg_prewarm/Makefile index 7ad941e..706b0da 100644 --- a/contrib/pg_prewarm/Makefile +++ b/contrib/pg_prewarm/Makefile @@ -1,10 +1,10 @@ # contrib/pg_prewarm/Makefile MODULE_big = pg_prewarm -OBJS = pg_prewarm.o $(WIN32RES) +OBJS = pg_prewarm.o auto_pg_prewarm.o $(WIN32RES) EXTENSION = pg_prewarm -DATA = pg_prewarm--1.1.sql pg_prewarm--1.0--1.1.sql +DATA = pg_prewarm--1.1.sql pg_prewarm--1.1--1.2.sql pg_prewarm--1.0--1.1.sql PGFILEDESC = "pg_prewarm - preload relation data into system buffer cache" ifdef USE_PGXS diff --git a/contrib/pg_prewarm/auto_pg_prewarm.c b/contrib/pg_prewarm/auto_pg_prewarm.c new file mode 100644 index 0000000..f4ba4b5 --- /dev/null +++ b/contrib/pg_prewarm/auto_pg_prewarm.c @@ -0,0 +1,785 @@ +/*------------------------------------------------------------------------- + * + * auto_pg_prewarm.c + * Automatically dumps buffer pool's block info and then load blocks into + * buffer pool. + * + * Copyright (c) 2013-2017, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/pg_prewarm.c/auto_pg_prewarm.c + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include + +/* These are always necessary for a bgworker. */ +#include "miscadmin.h" +#include "postmaster/bgworker.h" +#include "storage/ipc.h" +#include "storage/latch.h" +#include "storage/lwlock.h" +#include "storage/proc.h" +#include "storage/shmem.h" + +/* These are necessary for prewarm utilities. */ +#include "pgstat.h" +#include "storage/buf_internals.h" +#include "storage/smgr.h" +#include "utils/memutils.h" +#include "utils/resowner.h" +#include "utils/guc.h" +#include "catalog/pg_class.h" + +/* + * auto pg_prewarm : + * + * What is it? + * =========== + * A pair of bgwrokers one which automatically dumps buffer pool's block info at + * a given interval and another which loads those block into buffer pool when + * the server restarts. + * + * How does it work? + * ================= + * When the shared library pg_prewarm is preloaded during server startup. A + * bgworker "auto pg_prewarm load" is launched immediately after the server + * is started. The bgworker will start loading blocks obtained from block info + * entry in + * $PGDATA/AUTO_PG_PREWARM_FILE, until there is a free buffer in the buffer + * pool. This way we do not replace any new blocks which were loaded either by + * the recovery process or the querying clients. + * + * Once the "auto pg_prewarm load" bgworker has completed its job, it will + * register a dynamic bgworker "auto pg_prewarm dump" which has to be launched + * when the server reaches to a consistent state. The new bgworker will + * periodically scan the buffer pool and then dump the meta info of blocks + * which are currently in the buffer pool. The GUC pg_prewarm.dump_interval if + * set > 0 indicates the minimum time interval between two dumps. If + * pg_prewarm.dump_interval is set to AT_PWARM_DUMP_AT_SHUTDOWN_ONLY the + * bgworker will only dump at the time of server shutdown. If it is set to + * AT_PWARM_LOAD_ONLY we do not want the bgworker to dump anymore, so it stops + * there. + * + * To relaunch a stopped "auto pg_prewarm dump" bgworker we can use the utility + * function launch_pg_prewarm_dump. + */ + +PG_FUNCTION_INFO_V1(launch_pg_prewarm_dump); + +#define AT_PWARM_LOAD_ONLY -1 +#define AT_PWARM_DUMP_AT_SHUTDOWN_ONLY 0 +#define AT_PWARM_DEFAULT_DUMP_INTERVAL 300 + +/* Primary functions */ +void _PG_init(void); +static void auto_pgprewarm_main(Datum main_arg); +static bool load_block(RelFileNode rnode, char reltype, ForkNumber forkNum, + BlockNumber blockNum); +static void register_auto_pgprewarm(void); +void auto_pgprewarm_dump_main(void); +pid_t auto_pg_prewarm_dump_launcher(void); + +/* Secondary/supporting functions */ +static void sigtermHandler(SIGNAL_ARGS); +static void sighupHandler(SIGNAL_ARGS); + +/* flags set by signal handlers */ +static volatile sig_atomic_t got_sigterm = false; +static volatile sig_atomic_t got_sighup = false; + +/* + * Signal handler for SIGTERM + * Set a flag to let the main loop to terminate, and set our latch to wake it + * up. + */ +static void +sigtermHandler(SIGNAL_ARGS) +{ + int save_errno = errno; + + got_sigterm = true; + + if (MyProc) + SetLatch(&MyProc->procLatch); + + errno = save_errno; +} + +/* + * Signal handler for SIGHUP + * Set a flag to tell the main loop to reread the config file, and set our + * latch to wake it up. + */ +static void +sighupHandler(SIGNAL_ARGS) +{ + int save_errno = errno; + + got_sighup = true; + + if (MyProc) + SetLatch(&MyProc->procLatch); + + errno = save_errno; +} + +/* Meta-data of each persistent page buffer which is dumped and used to load. */ +typedef struct BlockInfoRecord +{ + Oid database; /* datbase */ + Oid spcNode; /* tablespace */ + Oid filenode; /* relation */ + ForkNumber forknum; /* fork number */ + BlockNumber blocknum; /* block number */ +} BlockInfoRecord; + +/* Try loading only once during startup. If any error do not retry. */ +static bool avoid_loading = false; + +/* + * And avoid dumping if we receive SIGTERM while loading. Also, do not retry if + * dump has failed previously. + */ +static bool avoid_dumping = false; + +int dump_interval = 0; + +/* compare member elements to check if they are not equal. */ +#define cmp_member_elem(fld) \ +do { \ + if (a->fld < b->fld) \ + return -1; \ + else if (a->fld > b->fld) \ + return 1; \ +} while(0); + +/* + * sort_cmp_func - compare function used while qsorting BlockInfoRecord objects. + */ +static int +sort_cmp_func(const void *p, const void *q) +{ + BlockInfoRecord *a = (BlockInfoRecord *) p; + BlockInfoRecord *b = (BlockInfoRecord *) q; + + cmp_member_elem(database); + cmp_member_elem(spcNode); + cmp_member_elem(filenode); + cmp_member_elem(forknum); + cmp_member_elem(blocknum); + return 0; +} + +#define AUTO_PG_PREWARM_FILE "autopgprewarm" + +/* + * load_block - Load a given block. + */ +static bool +load_block(RelFileNode rnode, char reltype, ForkNumber forkNum, + BlockNumber blockNum) +{ + Buffer buffer; + + /* + * Load the page only if there exist a free buffer. We do not want to + * replace an existing buffer. + */ + if (have_free_buffer()) + { + SMgrRelation smgr = smgropen(rnode, InvalidBackendId); + + /* + * Check if fork exists first otherwise we will not be able to use one + * free buffer for each nonexisting block. + */ + if (smgrexists(smgr, forkNum)) + { + buffer = ReadBufferForPrewarm(smgr, reltype, + forkNum, blockNum, + RBM_NORMAL, NULL); + if (BufferIsValid(buffer)) + ReleaseBuffer(buffer); + } + + return true; + } + + return false; +} + +/* + * load_now - The main routine which reads from the dump file and loads each + * block. We try to load each blocknum read from AUTO_PG_PREWARM_FILE until + * we have any free buffer left or SIGTERM is received. If we fail to load a + * block we ignore the ERROR and try to load next blocknum. This is because + * there is a possibility that corresponding blocknum might have been + * deleted. + */ +static void +load_now(void) +{ + static char dump_file_path[MAXPGPATH]; + FILE *file = NULL; + uint32 i, + num_buffers = 0; + + if (avoid_loading) + return; + + avoid_loading = true; + + /* Check if file exists and open file in read mode. */ + snprintf(dump_file_path, sizeof(dump_file_path), "%s.save", + AUTO_PG_PREWARM_FILE); + file = fopen(dump_file_path, PG_BINARY_R); + + if (!file) + return; /* No file to load. */ + + if (fscanf(file, "<<%u>>", &num_buffers) != 1) + { + fclose(file); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("auto pg_prewarm load : error reading num of elements" + " in \"%s\" : %m", dump_file_path))); + } + + elog(LOG, "auto pg_prewarm load : number of buffers to load %u", + num_buffers); + + for (i = 0; i < num_buffers; i++) + { + RelFileNode rnode; + uint32 forknum; + BlockNumber blocknum; + bool have_free_buf = true; + + if (got_sigterm) + { + /* + * Received shutdown while we were still loading the buffers. No + * need to dump at this stage. + */ + avoid_dumping = true; + break; + } + + if (!have_free_buf) + break; + + /* Get next block. */ + if (5 != fscanf(file, "%u,%u,%u,%u,%u\n", &rnode.dbNode, &rnode.spcNode, + &rnode.relNode, &forknum, &blocknum)) + break; /* No more valid entry hence stop processing. */ + + PG_TRY(); + { + have_free_buf = load_block(rnode, RELPERSISTENCE_PERMANENT, + (ForkNumber) forknum, blocknum); + } + PG_CATCH(); + { + /* Any error handle it and then try to load next buffer. */ + + /* Prevent interrupts while cleaning up */ + HOLD_INTERRUPTS(); + + /* Report the error to the server log */ + EmitErrorReport(); + + LWLockReleaseAll(); + AbortBufferIO(); + UnlockBuffers(); + + /* buffer pins are released here. */ + ResourceOwnerRelease(CurrentResourceOwner, + RESOURCE_RELEASE_BEFORE_LOCKS, + false, true); + FlushErrorState(); + + /* Now we can allow interrupts again */ + RESUME_INTERRUPTS(); + } + PG_END_TRY(); + } + + fclose(file); + + elog(LOG, + "auto pg_prewarm load : number of buffers actually tried to load %u", + i); + return; +} + +/* + * dump_now - The main routine which goes through each buffer header and + * dumps their metadata in the format + * . We Sort these data + * and then dump them. Sorting is necessary as it facilitates sequential read + * during load. Unlike load, if we encounter any error we abort the dump. + */ +static void +dump_now(void) +{ + static char dump_file_path[MAXPGPATH], + transient_dump_file_path[MAXPGPATH]; + uint32 i; + int ret; + uint32 num_buffers; + BlockInfoRecord *block_info_array; + BufferDesc *bufHdr; + FILE *file = NULL; + + if (avoid_dumping) + return; + + avoid_dumping = true; + block_info_array = + (BlockInfoRecord *) palloc(sizeof(BlockInfoRecord) * NBuffers); + + for (num_buffers = 0, i = 0; i < NBuffers; i++) + { + uint32 buf_state; + + bufHdr = GetBufferDescriptor(i); + + /* Lock each buffer header before inspecting. */ + buf_state = LockBufHdr(bufHdr); + + /* Only valid and persistent page buffers are dumped. */ + if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID) && + (buf_state & BM_PERMANENT)) + { + block_info_array[num_buffers].database = bufHdr->tag.rnode.dbNode; + block_info_array[num_buffers].spcNode = bufHdr->tag.rnode.spcNode; + block_info_array[num_buffers].filenode = bufHdr->tag.rnode.relNode; + block_info_array[num_buffers].forknum = bufHdr->tag.forkNum; + block_info_array[num_buffers].blocknum = bufHdr->tag.blockNum; + ++num_buffers; + } + + UnlockBufHdr(bufHdr, buf_state); + } + + /* Sorting now only to avoid sorting while loading. */ + pg_qsort(block_info_array, num_buffers, sizeof(BlockInfoRecord), + sort_cmp_func); + + snprintf(transient_dump_file_path, sizeof(dump_file_path), + "%s.save.tmp", AUTO_PG_PREWARM_FILE); + file = fopen(transient_dump_file_path, "w"); + if (file == NULL) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("auto pg_prewarm dump : could not open \"%s\": %m", + dump_file_path))); + + snprintf(dump_file_path, sizeof(dump_file_path), + "%s.save", AUTO_PG_PREWARM_FILE); + + /* Write num_buffers first and then BlockMetaInfoRecords. */ + ret = fprintf(file, "<<%u>>\n", num_buffers); + if (ret < 0) + { + fclose(file); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("auto pg_prewarm dump : error writing to \"%s\" : %m", + dump_file_path))); + } + + for (i = 0; i < num_buffers; i++) + { + ret = fprintf(file, "%u,%u,%u,%u,%u\n", + block_info_array[i].database, + block_info_array[i].spcNode, + block_info_array[i].filenode, + (uint32) block_info_array[i].forknum, + block_info_array[i].blocknum); + if (ret < 0) + { + fclose(file); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("auto pg_prewarm dump : error writing to" + " \"%s\" : %m", dump_file_path))); + } + } + + pfree(block_info_array); + + /* + * Rename transient_dump_file_path to dump_file_path to make things + * permanent. + */ + ret = fclose(file); + if (ret != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("auto pg_prewarm dump : error closing \"%s\" : %m", + transient_dump_file_path))); + + ret = unlink(dump_file_path); + if (ret != 0 && errno != ENOENT) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("auto pg_prewarm dump : unlink \"%s\" failed : %m", + dump_file_path))); + + ret = rename(transient_dump_file_path, dump_file_path); + if (ret != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("auto pg_prewarm dump : failed to rename \"%s\" to" + " \"%s\" : %m", + transient_dump_file_path, dump_file_path))); + + if (!got_sigterm) + avoid_dumping = false; + + elog(LOG, "auto pg_prewarm dump : saved metadata info of %d blocks", + num_buffers); +} + +/* Register auto pg_prewarm load bgworker. */ +static void +register_auto_pgprewarm() +{ + BackgroundWorker auto_pg_prewarm; + + MemSet(&auto_pg_prewarm, 0, sizeof(auto_pg_prewarm)); + auto_pg_prewarm.bgw_main_arg = Int32GetDatum(0); + auto_pg_prewarm.bgw_flags = BGWORKER_SHMEM_ACCESS; + + /* Register the auto pg_prewarm background worker */ + auto_pg_prewarm.bgw_start_time = BgWorkerStart_PostmasterStart; + auto_pg_prewarm.bgw_restart_time = BGW_NEVER_RESTART; + auto_pg_prewarm.bgw_main = auto_pgprewarm_main; + snprintf(auto_pg_prewarm.bgw_name, BGW_MAXLEN, "auto pg_prewarm load"); + RegisterBackgroundWorker(&auto_pg_prewarm); +} + +/* Extension's entry point. */ +void +_PG_init(void) +{ + /* Define custom GUC variables. */ + DefineCustomIntVariable("pg_prewarm.dump_interval", + "Sets the maximum time between two buffer pool dumps", + "If set to Zero, timer based dumping is disabled." + " If set to -1 we never dump.", + &dump_interval, + AT_PWARM_DEFAULT_DUMP_INTERVAL, + AT_PWARM_LOAD_ONLY, INT_MAX / 1000, + PGC_SIGHUP, + GUC_UNIT_S, + NULL, + NULL, + NULL); + + /* + * auto pg_prewarm load should be started from postmaster as a preloaded + * library. + */ + if (!process_shared_preload_libraries_in_progress) + return; + + /* Register auto pg_prewarm load. */ + register_auto_pgprewarm(); +} + +/* + * auto_pgprewarm_main -- The Main entry point of auto pg_pgwarm dump + * process. This is invoked as a background worker. + */ +static void +auto_pgprewarm_main(Datum main_arg) +{ + MemoryContext autoprewarmer_context; + sigjmp_buf local_sigjmp_buf; + + /* Establish signal handlers before unblocking signals. */ + pqsignal(SIGTERM, sigtermHandler); + pqsignal(SIGHUP, sighupHandler); + + /* + * Create a resource owner to keep track of our resources. + */ + CurrentResourceOwner = ResourceOwnerCreate(NULL, "autoprewarmer"); + + /* + * Create a memory context that we will do all our work in. We do this so + * that we can reset the context during error recovery and thereby avoid + * possible memory leaks. + */ + autoprewarmer_context = AllocSetContextCreate(TopMemoryContext, + "autoprewarmer", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + MemoryContextSwitchTo(autoprewarmer_context); + + /* + * If an exception is encountered, processing resumes here. + */ + if (sigsetjmp(local_sigjmp_buf, 1) != 0) + { + /* Since not using PG_TRY, must reset error stack by hand */ + error_context_stack = NULL; + + /* Prevent interrupts while cleaning up */ + HOLD_INTERRUPTS(); + + /* Report the error to the server log */ + EmitErrorReport(); + + LWLockReleaseAll(); + AbortBufferIO(); + UnlockBuffers(); + + /* buffer pins are released here. */ + ResourceOwnerRelease(CurrentResourceOwner, + RESOURCE_RELEASE_BEFORE_LOCKS, + false, true); + AtEOXact_Buffers(false); + AtEOXact_SMgr(); + + MemoryContextSwitchTo(autoprewarmer_context); + FlushErrorState(); + + /* Flush any leaked data in the top-level context */ + MemoryContextResetAndDeleteChildren(autoprewarmer_context); + + /* Now we can allow interrupts again */ + RESUME_INTERRUPTS(); + + /* Close all open files after any error. */ + smgrcloseall(); + } + + /* We can now handle ereport(ERROR) */ + PG_exception_stack = &local_sigjmp_buf; + + /* We're now ready to receive signals */ + BackgroundWorkerUnblockSignals(); + load_now(); + + /* + * In case of a SIGHUP, just reload the configuration. + */ + if (got_sighup) + { + got_sighup = false; + ProcessConfigFile(PGC_SIGHUP); + } + + /* launch auto pg_prewarm dump bgworker. */ + if (!avoid_dumping && + dump_interval != AT_PWARM_LOAD_ONLY) + auto_pg_prewarm_dump_launcher(); +} + +/* + * auto_pgprewarm_dump_main -- The main entry point of auto pg_pgwarm dump + * process. This is invoked as a background worker. + */ +void +auto_pgprewarm_dump_main(void) +{ + MemoryContext autoprewarmer_context; + sigjmp_buf local_sigjmp_buf; + int timeout = AT_PWARM_DEFAULT_DUMP_INTERVAL; + + /* Establish signal handlers before unblocking signals. */ + pqsignal(SIGTERM, sigtermHandler); + pqsignal(SIGHUP, sighupHandler); + + /* + * Create a resource owner to keep track of our resources. + */ + CurrentResourceOwner = ResourceOwnerCreate(NULL, "autoprewarmer"); + + /* + * Create a memory context that we will do all our work in. We do this so + * that we can reset the context during error recovery and thereby avoid + * possible memory leaks. + */ + autoprewarmer_context = AllocSetContextCreate(TopMemoryContext, + "autoprewarmer", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + MemoryContextSwitchTo(autoprewarmer_context); + + + /* + * If an exception is encountered, processing resumes here. + */ + if (sigsetjmp(local_sigjmp_buf, 1) != 0) + { + /* Since not using PG_TRY, must reset error stack by hand */ + error_context_stack = NULL; + + /* Prevent interrupts while cleaning up */ + HOLD_INTERRUPTS(); + + /* Report the error to the server log */ + EmitErrorReport(); + + LWLockReleaseAll(); + AbortBufferIO(); + UnlockBuffers(); + + /* buffer pins are released here. */ + ResourceOwnerRelease(CurrentResourceOwner, + RESOURCE_RELEASE_BEFORE_LOCKS, + false, true); + AtEOXact_Buffers(false); + AtEOXact_SMgr(); + + MemoryContextSwitchTo(autoprewarmer_context); + FlushErrorState(); + + /* Flush any leaked data in the top-level context */ + MemoryContextResetAndDeleteChildren(autoprewarmer_context); + + /* Now we can allow interrupts again */ + RESUME_INTERRUPTS(); + + /* Close all open files after any error. */ + smgrcloseall(); + + /* Error while dumping is treated as fatal hence do proc_exit */ + if (avoid_dumping) + proc_exit(1); + } + + /* We can now handle ereport(ERROR) */ + PG_exception_stack = &local_sigjmp_buf; + + /* We're now ready to receive signals */ + BackgroundWorkerUnblockSignals(); + + /* + * In case of a SIGHUP, just reload the configuration. + */ + if (got_sighup) + { + got_sighup = false; + ProcessConfigFile(PGC_SIGHUP); + } + + /* Has been set not to dump. nothing more to do. */ + if (dump_interval == AT_PWARM_LOAD_ONLY) + return; + + while (!got_sigterm) + { + int rc; + + if (dump_interval > AT_PWARM_DUMP_AT_SHUTDOWN_ONLY) + timeout = dump_interval; + + ResetLatch(&MyProc->procLatch); + rc = WaitLatch(&MyProc->procLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + timeout * 1000, PG_WAIT_EXTENSION); + + if (rc & WL_POSTMASTER_DEATH) + proc_exit(1); + + /* + * In case of a SIGHUP, just reload the configuration. + */ + if (got_sighup) + { + got_sighup = false; + ProcessConfigFile(PGC_SIGHUP); + } + + /* Has been set not to dump. nothing more to do. */ + if (dump_interval == AT_PWARM_LOAD_ONLY) + return; + + /* If dump_interval is set then dump the buff pool. */ + if ((rc & WL_TIMEOUT) && + (dump_interval > AT_PWARM_DUMP_AT_SHUTDOWN_ONLY)) + dump_now(); + } + + /* One last block meta info dump while postmaster shutdown. */ + if (dump_interval != AT_PWARM_LOAD_ONLY) + dump_now(); +} + +/* + * Dynamically launch an auto pg_prewarm dump worker. + */ +pid_t +auto_pg_prewarm_dump_launcher(void) +{ + BackgroundWorker worker; + BackgroundWorkerHandle *handle; + BgwHandleStatus status; + pid_t pid; + + worker.bgw_flags = BGWORKER_SHMEM_ACCESS; + worker.bgw_start_time = BgWorkerStart_ConsistentState; + worker.bgw_restart_time = BGW_NEVER_RESTART; + worker.bgw_main = NULL; /* new worker might not have library loaded */ + sprintf(worker.bgw_library_name, "pg_prewarm"); + sprintf(worker.bgw_function_name, "auto_pgprewarm_dump_main"); + snprintf(worker.bgw_name, BGW_MAXLEN, "auto pg_prewarm dump"); + + /* set bgw_notify_pid so that we can use WaitForBackgroundWorkerStartup */ + worker.bgw_notify_pid = MyProcPid; + + if (!RegisterDynamicBackgroundWorker(&worker, &handle)) + { + avoid_dumping = true; + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_RESOURCES), + errmsg("registering dynamic bgworker \"auto pg_prewarm dump\" failed"), + errhint("Consider increasing configuration parameter " + "\"max_worker_processes\"."))); + } + + status = WaitForBackgroundWorkerStartup(handle, &pid); + + if (status == BGWH_STOPPED) + { + avoid_dumping = true; + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_RESOURCES), + errmsg("could not start auto pg_prewarm dump bgworker"), + errhint("More details may be available in the server log."))); + } + + if (status == BGWH_POSTMASTER_DIED) + { + avoid_dumping = true; + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_RESOURCES), + errmsg("cannot start bgworker auto pg_prewarm without postmaster"), + errhint("Kill all remaining database processes and restart" + " the database."))); + } + Assert(status == BGWH_STARTED); + return pid; +} + +/* + * The C-Language entry function to launch auto pg_prewarm dump. + */ +Datum +launch_pg_prewarm_dump(PG_FUNCTION_ARGS) +{ + pid_t pid; + + pid = auto_pg_prewarm_dump_launcher(); + PG_RETURN_INT32(pid); +} diff --git a/contrib/pg_prewarm/pg_prewarm--1.1--1.2.sql b/contrib/pg_prewarm/pg_prewarm--1.1--1.2.sql new file mode 100644 index 0000000..86b219d --- /dev/null +++ b/contrib/pg_prewarm/pg_prewarm--1.1--1.2.sql @@ -0,0 +1,9 @@ +/* contrib/pg_prewarm/pg_prewarm--1.0--1.1.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pg_prewarm UPDATE TO '1.2'" to load this file. \quit + +CREATE FUNCTION launch_pg_prewarm_dump() +RETURNS pg_catalog.int4 STRICT +AS 'MODULE_PATHNAME', 'launch_pg_prewarm_dump' +LANGUAGE C; diff --git a/contrib/pg_prewarm/pg_prewarm.control b/contrib/pg_prewarm/pg_prewarm.control index cf2fb92..40e3add 100644 --- a/contrib/pg_prewarm/pg_prewarm.control +++ b/contrib/pg_prewarm/pg_prewarm.control @@ -1,5 +1,5 @@ # pg_prewarm extension comment = 'prewarm relation data' -default_version = '1.1' +default_version = '1.2' module_pathname = '$libdir/pg_prewarm' relocatable = true diff --git a/doc/src/sgml/pgprewarm.sgml b/doc/src/sgml/pgprewarm.sgml index c090401..b559141 100644 --- a/doc/src/sgml/pgprewarm.sgml +++ b/doc/src/sgml/pgprewarm.sgml @@ -58,6 +58,46 @@ pg_prewarm(regclass, mode text default 'buffer', fork text default 'main', + auto pg_prewarm bgworker + + + If we preload the pg_prewarm shared library, we start a pair of bgworkers + which automatically dump all of the buffer pool block info at a regular + interval and at the time server shutdown (smart and fast mode only). + And then load these blocks when the server restarts. + + + + If shared_preload_libraries is set with pg_prewarm a bgworker + auto pg_prewarm load is started by the postmaster. + Postmaster does not wait for recovery to finish and database to reach a + consistent state. If there is a dump file + autopgprewarm.save to load, the bgworker starts loading + each block entry in it to buffer pool until there is a free buffer available. + This way we do not replace any new blocks which were loaded either by the + recovery process or the querying clients. + Once auto pg_prewarm load has finished its job of + prewarming buffer pool, it launches a dynamic bgworker + auto pg_prewarm dump which periodically dumps the meta + info of blocks present in the buffer pool. + + + + Set pg_prewarm.dump_interval in seconds to specify the minimum interval + between two dumps. If it is set to zero then dumping based on the timer is + disabled, we only dump while server shutdown. If set to -1 dumping itself is + disabled, the auto pg_prewarm dump worker just stop there. + By default, it is set to 300 seconds. + + + + To relaunch a stopped "auto pg_prewarm dump" bgworker without restarting the + server, we can use the utility function + launch_pg_prewarm_dump() RETURNS int4. + + + + Author diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 3cb5120..82d1464 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -693,6 +693,20 @@ ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, mode, strategy, &hit); } +/* + * ReadBufferForPrewarm -- This new interface is for auto pg_prewarm. + */ +Buffer +ReadBufferForPrewarm(SMgrRelation smgr, char relpersistence, + ForkNumber forkNum, BlockNumber blockNum, + ReadBufferMode mode, BufferAccessStrategy strategy) +{ + bool hit; + + return ReadBuffer_common(smgr, relpersistence, forkNum, blockNum, + mode, strategy, &hit); +} + /* * ReadBuffer_common -- common logic for all ReadBuffer variants diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index 5d0a636..4606a32 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -169,6 +169,19 @@ ClockSweepTick(void) } /* + * have_free_buffer -- This function check whether there is a free buffer in + * buffer pool. Used by auto pg_prewarm module. + */ +bool +have_free_buffer() +{ + if (StrategyControl->firstFreeBuffer >= 0) + return true; + else + return false; +} + +/* * StrategyGetBuffer * * Called by the bufmgr to get the next candidate buffer to use in diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index d117b66..58d4871 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -317,6 +317,7 @@ extern void StrategyNotifyBgWriter(int bgwprocno); extern Size StrategyShmemSize(void); extern void StrategyInitialize(bool init); +extern bool have_free_buffer(void); /* buf_table.c */ extern Size BufTableShmemSize(int size); diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 4c697e2..8cd55a7 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -16,6 +16,7 @@ #include "storage/block.h" #include "storage/buf.h" +#include "storage/smgr.h" #include "storage/bufpage.h" #include "storage/relfilenode.h" #include "utils/relcache.h" @@ -172,6 +173,10 @@ extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy); +extern Buffer ReadBufferForPrewarm(SMgrRelation smgr, char relpersistence, + ForkNumber forkNum, BlockNumber blockNum, + ReadBufferMode mode, + BufferAccessStrategy strategy); extern void ReleaseBuffer(Buffer buffer); extern void UnlockReleaseBuffer(Buffer buffer); extern void MarkBufferDirty(Buffer buffer);