From a189eaa673bab9e4bccef7e1bb7077ef5a3fdf0d Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Thu, 8 Aug 2019 16:05:53 -0400 Subject: [PATCH v2] New undo request manager, now with UndoRequestManagerOldestFXID. --- src/backend/access/undo/Makefile | 2 +- src/backend/access/undo/undorequest.c | 1114 +++++++++++++++++ src/backend/lib/rbtree.c | 46 +- src/include/access/transam.h | 1 + src/include/access/undorequest.h | 80 ++ src/include/lib/rbtree.h | 42 +- src/test/modules/Makefile | 1 + .../test_undo_request_manager/Makefile | 21 + .../expected/test_undo_request_manager.out | 28 + .../sql/test_undo_request_manager.sql | 16 + .../test_undo_request_manager--1.0.sql | 9 + .../test_undo_request_manager.c | 139 ++ .../test_undo_request_manager.control | 4 + src/tools/pgindent/typedefs.list | 4 + 14 files changed, 1479 insertions(+), 28 deletions(-) create mode 100644 src/backend/access/undo/undorequest.c create mode 100644 src/include/access/undorequest.h create mode 100644 src/test/modules/test_undo_request_manager/Makefile create mode 100644 src/test/modules/test_undo_request_manager/expected/test_undo_request_manager.out create mode 100644 src/test/modules/test_undo_request_manager/sql/test_undo_request_manager.sql create mode 100644 src/test/modules/test_undo_request_manager/test_undo_request_manager--1.0.sql create mode 100644 src/test/modules/test_undo_request_manager/test_undo_request_manager.c create mode 100644 src/test/modules/test_undo_request_manager/test_undo_request_manager.control diff --git a/src/backend/access/undo/Makefile b/src/backend/access/undo/Makefile index 219c6963cf..d036717671 100644 --- a/src/backend/access/undo/Makefile +++ b/src/backend/access/undo/Makefile @@ -12,6 +12,6 @@ subdir = src/backend/access/undo top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = undolog.o +OBJS = undolog.o undorequest.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/undo/undorequest.c b/src/backend/access/undo/undorequest.c new file mode 100644 index 0000000000..d011aaac65 --- /dev/null +++ b/src/backend/access/undo/undorequest.c @@ -0,0 +1,1114 @@ +/*------------------------------------------------------------------------- + * + * undorequest.c + * Undo request manager. + * + * From the moment a transaction begins until the moment that it commits, + * there is a possibility that it might abort, either due to an exception + * or because the entire system is restarted (e.g. because of a power + * cut). If this happens, all undo generated by that transaction prior + * to the abort must be applied. To ensure this, the calling code must + * ensure that an "undo request" is registered for every transaction + * that generates undo. + * + * The undo request should be registered before the transaction writes any + * undo records (except for temporary undo records, which the creating backend + * will need to process locally). If the transaction goes on to commit, the + * undo request can be deleted; if it goes on to abort, it needs to be updated + * with the final size of the undo generated by that transaction so that + * we can prioritize it appropriately. One of the key tasks of this module + * is to decide on the order in which undo requests should been processed; + * see GetNextUndoRequest for details. + * + * We have only a fixed amount of shared memory to store undo requests; + * because an undo request has to be created before any undo that might + * need to be processed is written, we should never end up in a situation + * where there are more existing undo requests that can fit. In extreme + * cases, this might cause us to have to refuse to create new requests, + * but that should very rare. If we're starting to run low on space, + * FinalizeUndoRequest() will signal callers that undo should be + * performed in the foreground; actually hitting the hard limit requires + * foreground undo to be interrupted by a crash. + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/access/undo/undorequest.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/undorequest.h" +#include "lib/rbtree.h" +#include "storage/shmem.h" +#include "utils/timestamp.h" + +/* + * An UndoRequest represents the possible need to perform undo actions for + * a transaction if it aborts; thus, it should be allocated before writing + * undo that might require the system to perform cleanup actions (except + * temporary undo, for which the backend is always responsible) and + * deallocated when it is clear that no such actions will need to be + * performed or when they have all been performed successfully. + * + * At any given time, an UndoRequest is one of three states: FREE (not + * allocated to any transaction; available for reuse), UNLISTED (allocated + * to a transaction but not in any RBTree), or LISTED (allocated to a + * transaction and in either both requests_by_fxid and requests_by_size or + * else in requests_by_retry_time). + * + * Changes to UndoRequest objects are protected by the UndoRequestManager's + * lock, but not all changes require the lock. The following rules apply: + * + * fxid must be InvalidFullTransactionId if and only if the UndoRequest is + * FREE, and may only be changed while holding the lock. + * + * next_free_request must be NULL unless the UndoRequest is FREE, and may + * only be changed while holding the lock. + * + * The remaining fields must be accurate if the UndoRequest is LISTED, but + * otherwise may or may not contain correct data. They should not be changed + * while the request is FREE, may be changed without holding the lock while + * the request is UNLISTED, and may only be changed while holding the lock + * if the requested is LISTED. + * + * Callers must be careful never to lose track of an entry that is UNLISTED; + * such entries will be permanently leaked. An entry that is FREE can be + * reallocated by this module, while one that is LISTED should eventually + * get processed and become FREE, but an UNLISTED entry remains the caller's + * responsibility until the state is changed. + */ +struct UndoRequest +{ + FullTransactionId fxid; + Oid dbid; + Size size; + UndoRecPtr start_location_logged; + UndoRecPtr end_location_logged; + UndoRecPtr start_location_unlogged; + UndoRecPtr end_location_unlogged; + TimestampTz retry_time; + UndoRequest *next_free_request; +}; + +/* + * An UndoRequestNode just points to an UndoRequest. We use it so that the + * same UndoRequest can be placed into more than one RBTree at the same + * time. + */ +typedef struct UndoRequestNode +{ + RBTNode rbtnode; + UndoRequest *req; +} UndoRequestNode; + +/* + * Possible sources of UndoRequest objects in need of processing. + */ +typedef enum UndoRequestSource +{ + UNDO_SOURCE_FXID, + UNDO_SOURCE_SIZE, + UNDO_SOURCE_RETRY_TIME +} UndoRequestSource; + +/* + * An UndoRequestManager manages a collection of UndoRequest and + * UndoRequestNode objects. Typically, there would only be one such object + * for the whole system, but it's possible to create others for testing + * purposes. + */ +struct UndoRequestManager +{ + LWLock *lock; /* for synchronization */ + Size capacity; /* max # of UndoRequests */ + Size utilization; /* # of non-FREE UndoRequests */ + Size soft_size_limit; /* threshold to not background */ + UndoRequestSource source; /* which RBTree to check next? */ + RBTree requests_by_fxid; /* lower FXIDs first */ + RBTree requests_by_size; /* bigger sizes first */ + RBTree requests_by_retry_time; /* sooner retry times first */ + bool oldest_fxid_valid; /* true if next field is valid */ + FullTransactionId oldest_fxid; /* oldest FXID of any UndoRequest */ + UndoRequest *all_requests; + UndoRequest *first_free_request; + UndoRequestNode *first_free_request_node; +}; + +/* Static functions. */ +static UndoRequest *FindUndoRequestForDatabase(UndoRequestManager *urm, + Oid dbid); +static bool BackgroundUndoOK(UndoRequestManager *urm, + UndoRequest *req); +static RBTNode *UndoRequestNodeAllocate(void *arg); +static void UndoRequestNodeFree(RBTNode *x, void *arg); +static void UndoRequestNodeCombine(RBTNode *existing, const RBTNode *newdata, + void *arg); +static int UndoRequestNodeCompareRetryTime(const RBTNode *a, + const RBTNode *b, + void *arg); +static int UndoRequestNodeCompareFXID(const RBTNode *a, const RBTNode *b, + void *arg); +static int UndoRequestNodeCompareSize(const RBTNode *a, const RBTNode *b, + void *arg); +static void InsertUndoRequest(RBTree *rbt, UndoRequest *req); +static void RemoveUndoRequest(RBTree *rbt, UndoRequest *req); +static UndoRequest *FindUndoRequest(UndoRequestManager *urm, + FullTransactionId fxid); + +/* + * Compute the amount of space that will be needed by an undo request manager. + * + * We need space for the UndoRequestManager itself, for the UndoRequest + * objects, and for the UndoRequestNode objects. We need twice as many + * UndoRequestNode objects as we do UndoRequest objects, because unfailed + * requests are stored in both requests_by_fxid and requests_by_size; failed + * requests are stored only in requests_by_retry_time. + */ +Size +EstimateUndoRequestManagerSize(Size capacity) +{ + Size s = MAXALIGN(sizeof(UndoRequestManager)); + + s = add_size(s, MAXALIGN(mul_size(capacity, sizeof(UndoRequest)))); + s = add_size(s, MAXALIGN(mul_size(capacity, + mul_size(2, sizeof(UndoRequestNode))))); + + return s; +} + +/* + * Initialize an undo request manager. + * + * The caller is responsible for providing an appropriately-sized chunk of + * memory; use EstimateUndoRequestManagerSize to find out how much space will + * be needed. This means that this infrastructure can potentially be used in + * either shared memory or, if desired, in backend-private memory. It will not + * work in DSM, though, because it uses pointers. + * + * The caller must also provide a lock that will be used to protect access + * to the data managed by this undo request manager. This cannot be NULL, + * even if the memory is private. + */ +void +InitializeUndoRequestManager(UndoRequestManager *urm, LWLock *lock, + Size capacity, Size soft_limit) +{ + UndoRequest *reqs; + UndoRequestNode *nodes; + int i; + + /* Basic initialization. */ + urm->lock = lock; + urm->capacity = capacity; + urm->utilization = 0; + urm->soft_size_limit = soft_limit; + urm->source = UNDO_SOURCE_FXID; + rbt_initialize(&urm->requests_by_fxid, sizeof(UndoRequestNode), + UndoRequestNodeCompareFXID, UndoRequestNodeCombine, + UndoRequestNodeAllocate, UndoRequestNodeFree, urm); + rbt_initialize(&urm->requests_by_size, sizeof(UndoRequestNode), + UndoRequestNodeCompareSize, UndoRequestNodeCombine, + UndoRequestNodeAllocate, UndoRequestNodeFree, urm); + rbt_initialize(&urm->requests_by_retry_time, sizeof(UndoRequestNode), + UndoRequestNodeCompareRetryTime, UndoRequestNodeCombine, + UndoRequestNodeAllocate, UndoRequestNodeFree, urm); + urm->oldest_fxid_valid = true; + urm->oldest_fxid = InvalidFullTransactionId; + + /* Find memory for UndoRequest and UndoRequestNode arenas. */ + reqs = (UndoRequest *) + (((char *) urm) + MAXALIGN(sizeof(UndoRequestManager))); + urm->all_requests = reqs; + nodes = (UndoRequestNode *) + (((char *) reqs) + MAXALIGN(capacity * sizeof(UndoRequest))); + + /* Build a free list of UndoRequest objects. */ + urm->first_free_request = reqs; + for (i = 0; i < capacity - 1; ++i) + { + UndoRequest *current = &reqs[i]; + UndoRequest *next = &reqs[i + 1]; + + current->next_free_request = next; + } + reqs[capacity - 1].next_free_request = NULL; + + /* + * Similarly, build a free list of UndoRequestNode objects. In this case, + * we use the first few bytes of the free object to store a pointer to the + * next free object. + */ + StaticAssertStmt(sizeof(UndoRequestNode) >= sizeof(UndoRequestNode *), + "UndoRequestNode is too small"); + urm->first_free_request_node = nodes; + for (i = 0; i < 2 * capacity - 1; ++i) + { + UndoRequestNode *current = &nodes[i]; + UndoRequestNode *next = &nodes[i + 1]; + + *(UndoRequestNode **) current = next; + } + *(UndoRequestNode **) &nodes[2 * capacity - 1] = NULL; +} + +/* + * Register a new undo request. If unable, returns NULL. + * + * This function should be called before a transaction first writes any undo; + * at end of transaction, the caller call either UnregisterUndoRequest (on + * commit) or FinalizeUndoRequest (on abort). + * + * The returned request is UNLISTED (as defined above). + */ +UndoRequest * +RegisterUndoRequest(UndoRequestManager *urm, FullTransactionId fxid, Oid dbid) +{ + UndoRequest *req; + + LWLockAcquire(urm->lock, LW_EXCLUSIVE); + + req = urm->first_free_request; + if (req != NULL) + { + /* Pop free list. */ + urm->first_free_request = req->next_free_request; + req->next_free_request = NULL; + + /* Increase utilization. */ + ++urm->utilization; + + /* Initialize request object. */ + req->fxid = fxid; + req->dbid = dbid; + req->size = 0; + req->start_location_logged = InvalidUndoRecPtr; + req->end_location_logged = InvalidUndoRecPtr; + req->start_location_unlogged = InvalidUndoRecPtr; + req->end_location_unlogged = InvalidUndoRecPtr; + req->retry_time = DT_NOBEGIN; + + /* Save this fxid as the oldest one, if necessary. */ + if (urm->oldest_fxid_valid && + (!FullTransactionIdIsValid(urm->oldest_fxid) + || FullTransactionIdPrecedes(fxid, urm->oldest_fxid))) + urm->oldest_fxid = fxid; + } + + LWLockRelease(urm->lock); + + return req; +} + +/* + * Set the start location for either logged or unlogged undo. + * + * We don't need a lock here, because this request must be UNLISTED (as + * defined above). + */ +void +UndoRequestSetStartLocation(UndoRequestManager *urm, UndoRequest *req, + bool is_logged, UndoRecPtr start_location) +{ + Assert(UndoRecPtrIsValid(start_location)); + if (is_logged) + req->start_location_logged = start_location; + else + req->start_location_unlogged = start_location; +} + +/* + * Finalize details for an undo request. + * + * Since an UndoRequest should be registered before beginning to write undo, + * the undo size won't be known at that point; this function should be getting + * called at prepare time for a prepared transaction, or at abort time + * otherwise, by which point the size should be known. + * + * Caller should report the total size of generated undo in bytes, counting + * only logged and unlogged undo that will be processed by background workers. + * Any undo bytes that aren't part of the logged or unlogged undo records + * that may need cleanup actions performed should not be included in size; + * for example, temporary undo doesn't count, as the caller must deal with + * that outside of this mechanism. + * + * Caller must also pass the end location for logged and unlogged undo; + * each should be if InvalidUndoRecPtr if and only if the corresponding + * start location was never set. + * + * We don't need a lock here, because this request must be UNLISTED (as + * defined above). + */ +void +FinalizeUndoRequest(UndoRequestManager *urm, UndoRequest *req, Size size, + UndoRecPtr end_location_logged, + UndoRecPtr end_location_unlogged) +{ + Assert(size != 0); + Assert(UndoRecPtrIsValid(end_location_logged) || + UndoRecPtrIsValid(end_location_unlogged)); + Assert(UndoRecPtrIsValid(end_location_logged) == + UndoRecPtrIsValid(req->start_location_logged)); + Assert(UndoRecPtrIsValid(end_location_unlogged) == + UndoRecPtrIsValid(req->start_location_unlogged)); + req->size = size; + req->end_location_logged = end_location_logged; + req->end_location_unlogged = end_location_unlogged; +} + +/* + * Release a previously-allocated undo request. + * + * On entry, the undo request should be either LISTED or UNLISTED; on exit, + * it will be FREE (as these terms are defined above). + * + * This should be used at transaction commit, if an UndoRequest was + * registered, or when undo for an aborted transaction has been succesfully + * processed. + * + * Because this function may be called as a post-commit step, it must never + * throw an ERROR. + */ +void +UnregisterUndoRequest(UndoRequestManager *urm, UndoRequest *req) +{ + LWLockAcquire(urm->lock, LW_EXCLUSIVE); + + /* + * Remove the UndoRequest from any RBTree that contains it. If the retry + * time is not DT_NOBEGIN, then the request has been finalized and undo + * has subsequently failed. If the size is 0, the request has not been + * finalized yet, so it's not in any RBTree. + */ + if (req->retry_time != DT_NOBEGIN) + RemoveUndoRequest(&urm->requests_by_retry_time, req); + else if (req->size != 0) + { + RemoveUndoRequest(&urm->requests_by_fxid, req); + RemoveUndoRequest(&urm->requests_by_size, req); + } + + /* Plan to recompute oldest_fxid, if necessary. */ + if (FullTransactionIdEquals(req->fxid, urm->oldest_fxid)) + urm->oldest_fxid_valid = false; + + /* Push onto freelist. */ + req->next_free_request = urm->first_free_request; + urm->first_free_request = req; + + /* Decrease utilization. */ + --urm->utilization; + + LWLockRelease(urm->lock); +} + +/* + * Try to hand an undo request off for background processing. + * + * If this function returns true, the UndoRequest can be left for background + * processing; the caller need not do anything more. If this function returns + * false, the caller should try to process it in the foreground, and must + * call either UnregisterUndoRequest on success or RescheduleUndoRequest + * on failure. + * + * Because this function may be called as during transaction abort, it must + * never throw an ERROR. Technically, InsertUndoRequest might reach + * UndoRequestNodeAllocate which could ERROR if the freelist is empty, but + * if that happens there's a bug someplace. + * + * On entry, the UndoRequest should be UNLISTED; on exit, it is LISTED + * if this function returns true, and remains UNLISTED if this function + * returns false (see above for definitions). + */ +bool +PerformUndoInBackground(UndoRequestManager *urm, UndoRequest *req) +{ + bool background; + + /* + * If we failed after allocating an UndoRequest but before setting any + * start locations, there's no work to be done. In that case, we can just + * unregister the request. + */ + if (!UndoRecPtrIsValid(req->start_location_logged) && + !UndoRecPtrIsValid(req->start_location_unlogged)) + { + UnregisterUndoRequest(urm, req); + return true; + } + + /* + * We need to check shared state in order to determine whether or not to + * perform this undo in the background, and if we are going to perform it + * in the background, also to add it to requests_by_fxid and + * requests_by_size. + */ + LWLockAcquire(urm->lock, LW_EXCLUSIVE); + background = BackgroundUndoOK(urm, req); + if (background) + { + /* + * We're going to handle this in the background, so add it to + * requests_by_fxid and requests_by_size, so that GetNextUndoRequest + * can find it. + */ + InsertUndoRequest(&urm->requests_by_fxid, req); + InsertUndoRequest(&urm->requests_by_size, req); + } + LWLockRelease(urm->lock); + + return background; +} + +/* + * Get an undo request that needs background processing. + * + * Unless dbid is InvalidOid, any request returned must be from the indicated + * database. If minimum_runtime_reached is true, the caller only wants to + * process another request if the next request happens to be from the correct + * database. If it's false, the caller wants to avoiding exiting too quickly, + * and would like to process a request from the database if there's one + * available. + * + * If no suitable request is found, *fxid gets InvalidFullTransactionId; + * otherwise, *fxid gets the FullTransactionId of the transaction and + * the parameters which follow get the start and end locations of logged + * and unlogged undo for that transaction. It's possible that the transaction + * wrote only logged undo or only unlogged undo, in which case the other + * pair fields will have a value of InvalidUndoRecPtr, but it should never + * happen that all of the fields get InvalidUndoRecPtr, because that would + * mean we queued up an UndoRequest to do nothing. + * + * This function, as a side effect, makes the returned UndoRequest UNLISTED, + * as defined above, so that no other backend will attempt to process it + * simultaneously. The caller must be certain to call either + * UnregisterUndoRequest (if successful) or RescheduleUndoRequest (on + * failure) to avoid leaking the UndoRequest. + */ +UndoRequest * +GetNextUndoRequest(UndoRequestManager *urm, Oid dbid, + bool minimum_runtime_reached, + FullTransactionId *fxid, + UndoRecPtr * start_location_logged, + UndoRecPtr * end_location_logged, + UndoRecPtr * start_location_unlogged, + UndoRecPtr * end_location_unlogged) +{ + UndoRequest *req = NULL; + int nloops; + bool saw_db_mismatch = false; + + LWLockAcquire(urm->lock, LW_EXCLUSIVE); + + /* Some might have no work, so loop until all are checked. */ + for (nloops = 0; nloops < 3; ++nloops) + { + RBTree *rbt; + UndoRequestSource source = urm->source; + UndoRequestNode *node; + + /* + * We rotate between the three possible sources of UndoRequest + * objects. + * + * The idea here is that processing the requests with the oldest + * transaction IDs is important because it helps us discard undo log + * data sooner and because it allows XID horizons to advance. On the + * other hand, handling transactions that generated a very large + * amount of undo is also a priority, because undo will probably take + * a long to finish and thus should be started as early as possible + * and also because it likely touched a large number of pages which + * will be slow to access until the undo is processed. + * + * However, we also need to make sure to periodically retry undo for + * transactions that previously failed. We hope that this will be very + * rare, but if it does happen we can neither affort to retry those + * transactions over and over in preference to all others, nor on the + * other hand to just ignore them forever. + * + * We could try to come up with some scoring system that assigns + * relative levels of importance to FullTransactionId age, undo size, + * and retry time, but it seems difficult to come up with a weighting + * system that can ensure that nothing gets starved. By rotating among + * the sources evenly, we know that as long as we continue to process + * undo requests on some sort of regular basis, each source will get + * some amount of attention. + */ + switch (source) + { + case UNDO_SOURCE_FXID: + rbt = &urm->requests_by_fxid; + urm->source = UNDO_SOURCE_SIZE; + break; + case UNDO_SOURCE_SIZE: + rbt = &urm->requests_by_size; + urm->source = UNDO_SOURCE_RETRY_TIME; + break; + case UNDO_SOURCE_RETRY_TIME: + rbt = &urm->requests_by_retry_time; + urm->source = UNDO_SOURCE_FXID; + break; + } + + /* Get highest-priority item. */ + node = (UndoRequestNode *) rbt_leftmost(rbt); + if (node == NULL) + continue; + + /* + * We can only take an item from the retry time RBTree if the retry + * time is in the past. + */ + if (source == UNDO_SOURCE_RETRY_TIME && + node->req->retry_time > GetCurrentTimestamp()) + continue; + + /* + * If a database OID was specified, it must match. If it does not, we + * go ahead and try any remaining RBTree. Note that this needs to be + * after the other tests so that we get the right value for the + * saw_db_mismatch flag. + */ + if (OidIsValid(dbid) && node->req->dbid != dbid) + { + saw_db_mismatch = true; + continue; + } + + /* Looks like we have a winner. */ + req = node->req; + break; + } + + /* + * Determine whether we should do a more exhaustive search. + * + * If we found a node, we don't need look any harder. If we didn't see a + * database mismatch, then looking harder can't help: there's nothing to + * do at all, never mind for which database. If the caller set + * minimum_runtime_reached, then they don't want us to look harder. + */ + if (req == NULL && saw_db_mismatch && !minimum_runtime_reached) + req = FindUndoRequestForDatabase(urm, dbid); + + /* + * If we found a suitable request, remove it from any RBTree that contains + * it. + */ + if (req != NULL) + { + if (req->retry_time != DT_NOBEGIN) + RemoveUndoRequest(&urm->requests_by_retry_time, req); + else + { + RemoveUndoRequest(&urm->requests_by_fxid, req); + RemoveUndoRequest(&urm->requests_by_size, req); + } + } + + LWLockRelease(urm->lock); + + /* + * Set output parameters. Any request we found is now UNLISTED, so it's + * safe to do this without the lock. + */ + if (req == NULL) + *fxid = InvalidFullTransactionId; + else + { + *fxid = req->fxid; + *start_location_logged = req->start_location_logged; + *end_location_logged = req->end_location_logged; + *start_location_unlogged = req->start_location_unlogged; + *end_location_unlogged = req->end_location_unlogged; + } + + /* All done. */ + return req; +} + +/* + * Reschedule an undo request after undo failure. + * + * This function should be called when undo processing fails, either in the + * foreground or in the background. The foreground case occurs when + * FinalizeUndoRequest returns false and undo then also fails; the background + * case occurs when GetNextUndoRequest returns an UndoRequest and undo then + * fails. Note that this function isn't used after a shutdown or crash: see + * comments in RecreateUndoRequest for how we handle that case. + * + * In either of the cases where this function is reached, the UndoRequest + * should be UNLISTED; on return, it will be LISTED (both as defined above). + * If it's a foreground undo failure, it's never been LISTED; if it's a + * background undo failure, it was made UNLISTED by GetNextUndoRequest. So, + * we don't have to remove the request from anywhere, not even conditionally; + * we just need to add it to the set of failed requests. + * + * Because this function may be called as during transaction abort, it must + * never throw an ERROR. Technically, InsertUndoRequest might reach + * UndoRequestNodeAllocate which could ERROR if the freelist is empty, but + * if that happens there's a bug someplace. + */ +void +RescheduleUndoRequest(UndoRequestManager *urm, UndoRequest *req) +{ + LWLockAcquire(urm->lock, LW_EXCLUSIVE); + + /* + * This algorithm for determining the next retry time is fairly + * unsophisticated: the first retry happens after 10 seconds, and each + * subsequent retry after 30 seconds. We could do something more + * complicated here, but we'd need to do more bookkeeping and it's unclear + * what we'd gain. + */ + if (req->retry_time == DT_NOBEGIN) + req->retry_time = + TimestampTzPlusMilliseconds(GetCurrentTimestamp(), 10 * 1000); + else + req->retry_time = + TimestampTzPlusMilliseconds(GetCurrentTimestamp(), 30 * 1000); + + InsertUndoRequest(&urm->requests_by_retry_time, req); + LWLockRelease(urm->lock); +} + +/* + * Recreate UndoRequest state after a shutdown. + * + * This function is expected to be called after a shutdown, whether a clean + * shutdown or a crash, both for aborted transactions with unprocessed undo + * and also for prepared transactions. All calls to this function must be + * completed, and SuspendPreparedUndoRequest must be called for every prepared + * transaction, before the first call to GetNextUndoRequest occurs. + * + * This function be called up two twice per FullTransactionId, once with + * is_logged true and once with is_logged false, because the transaction may + * have both logged and unlogged undo in different places. start_location is + * the beginning of the type of undo indicated by the is_logged parameter, and + * size is the amount of such undo in bytes. If this function is called twice, + * the result will be a single UndoRequest containing both start locations and + * a size which is the sum of the two sizes passed to the separate calls. + * + * If this function is unable to allocate a new UndoRequest when required, + * it will return false. If that happens, it's not safe to continue using + * this UndoRequestManager and a system-wide shutdown to raise the limit on + * the number of outstanding requests is indicated. + */ +bool +RecreateUndoRequest(UndoRequestManager *urm, FullTransactionId fxid, + Oid dbid, bool is_logged, UndoRecPtr start_location, + UndoRecPtr end_location, Size size) +{ + UndoRequest *req; + + Assert(UndoRecPtrIsValid(start_location)); + LWLockAcquire(urm->lock, LW_EXCLUSIVE); + req = FindUndoRequest(urm, fxid); + if (req) + { + /* Already called for opposite value of is_logged. */ + if (is_logged) + { + Assert(!UndoRecPtrIsValid(req->start_location_logged)); + Assert(!UndoRecPtrIsValid(req->end_location_logged)); + req->start_location_logged = start_location; + req->end_location_logged = end_location; + } + else + { + Assert(!UndoRecPtrIsValid(req->start_location_unlogged)); + Assert(!UndoRecPtrIsValid(req->end_location_unlogged)); + req->start_location_unlogged = start_location; + req->end_location_unlogged = end_location; + } + Assert(req->dbid == dbid); + + /* Adjusting size may change position in RBTree. */ + RemoveUndoRequest(&urm->requests_by_size, req); + req->size += size; + InsertUndoRequest(&urm->requests_by_size, req); + } + else + { + /* First call for this FullTransactionId. */ + req = urm->first_free_request; + if (req == NULL) + { + LWLockRelease(urm->lock); + return false; + } + + /* We got an item; pop it from the free list. */ + urm->first_free_request = req->next_free_request; + req->next_free_request = NULL; + + /* Increase utilization. */ + ++urm->utilization; + + /* Initialize request object. */ + req->fxid = fxid; + req->dbid = dbid; + req->size = size; + req->start_location_logged = InvalidUndoRecPtr; + req->start_location_unlogged = InvalidUndoRecPtr; + req->retry_time = DT_NOBEGIN; + if (is_logged) + req->start_location_logged = start_location; + else + req->start_location_unlogged = start_location; + + /* + * List this request so that undo workers will see it. Note that we + * assume that these are new aborts, but it's possible that there are + * actually a whole series of previous undo failures before the + * shutdown or crash. If we had the information about whether this + * request had failed previously, we could set req->retry_time and + * insert it into requests_by_retry_time rather than requests_by_fxid + * and requests_by_size, but it doesn't seem important to retain + * information about undo failure across crashes or shutdowns, because + * we're just trying to guarantee that we don't busy-loop or starve + * other requests. (FindUndoRequest would get confused, too.) + */ + InsertUndoRequest(&urm->requests_by_fxid, req); + InsertUndoRequest(&urm->requests_by_size, req); + } + + LWLockRelease(urm->lock); + return true; +} + +/* + * Adjust UndoRequestManager state for prepared transactions. + * + * After a restart, once all calls to RecreateUndoRequest have been completed + * and before the first call to GetNextUndoRequest, this function should + * be called for each prepared transaction. That's necessary to avoid + * prematurely executed undo actions for transactions that haven't aborted + * yet and might go on to commit. The UndoRequest for the indicated fxid is + * made UNLISTED (as defined above) so that GetNextUndoRequest does not find + * them. + * + * The caller should retain a pointer to the returned UndoRequest and, when + * the prepared transaction is eventually committed or rolled back, should + * invoke UnregisterUndoRequest on commit or FinalizeUndoRequest on abort. + */ +UndoRequest * +SuspendPreparedUndoRequest(UndoRequestManager *urm, FullTransactionId fxid) +{ + UndoRequest *req; + + LWLockAcquire(urm->lock, LW_EXCLUSIVE); + req = FindUndoRequest(urm, fxid); + Assert(req != NULL); + Assert(req->size != 0); + RemoveUndoRequest(&urm->requests_by_fxid, req); + RemoveUndoRequest(&urm->requests_by_size, req); + LWLockRelease(urm->lock); + + return req; +} + +/* + * Get oldest registered FXID, whether LISTED or UNLISTED (as defined above). + * + * We cache the result of this computation so as to avoid repeating it too + * often. + */ +FullTransactionId +UndoRequestManagerOldestFXID(UndoRequestManager *urm) +{ + FullTransactionId result = InvalidFullTransactionId; + + LWLockAcquire(urm->lock, LW_EXCLUSIVE); + + if (urm->oldest_fxid_valid) + result = urm->oldest_fxid; + else + { + int i; + + for (i = 0; i < urm->capacity; ++i) + { + UndoRequest *req = &urm->all_requests[i]; + + if (FullTransactionIdIsValid(req->fxid) && + (!FullTransactionIdIsValid(result) || + FullTransactionIdPrecedes(req->fxid, result))) + result = req->fxid; + } + + urm->oldest_fxid = result; + urm->oldest_fxid_valid = true; + } + + LWLockRelease(urm->lock); + + return result; +} + +/* + * Perform a left-to-right search of all three RBTrees, looking for a request + * for a given database. The searches are interleaved so that we latch + * onto the highest-priority request in any RBTree. + * + * It's possible that we should have some kind of limit on this search, so + * that it doesn't do an exhaustive search of every RBTree. However, it's not + * exactly clear how that would affect the behavior, or how to pick a + * reasonable limit. + */ +static UndoRequest * +FindUndoRequestForDatabase(UndoRequestManager *urm, Oid dbid) +{ + RBTreeIterator iter[3]; + int doneflags = 0; + int i = 0; + + rbt_begin_iterate(&urm->requests_by_fxid, LeftRightWalk, &iter[0]); + rbt_begin_iterate(&urm->requests_by_size, LeftRightWalk, &iter[1]); + rbt_begin_iterate(&urm->requests_by_retry_time, LeftRightWalk, &iter[2]); + + while (1) + { + UndoRequestNode *node; + + if ((doneflags & (1 << i)) == 0) + { + node = (UndoRequestNode *) rbt_iterate(&iter[i]); + if (node == NULL) + { + doneflags |= 1 << i; + if (doneflags == 7) /* all bits set */ + break; + } + else if (node->req->dbid == dbid) + return node->req; + } + i = (i + 1) % 3; + } + + return NULL; +} + +/* + * Is it OK to handle this UndoRequest in the background? + */ +static bool +BackgroundUndoOK(UndoRequestManager *urm, UndoRequest *req) +{ + /* + * If we've passed the soft size limit, it's not OK to background it. + */ + if (urm->utilization > urm->soft_size_limit) + return false; + + /* + * Otherwise, allow it. + * + * TODO: We probably want to introduce some additional rules here based on + * the size of the request. + */ + return true; +} + +/* + * RBTree callback to allocate an UndoRequestNode. + * + * Everything is preallocated, so we're just popping the freelist. + */ +static RBTNode * +UndoRequestNodeAllocate(void *arg) +{ + UndoRequestManager *urm = arg; + UndoRequestNode *node = urm->first_free_request_node; + + /* + * Any LISTED UndoRequest should either be in both requests_by_fxid and + * requests_by_size, or it should be in requests_by_retry_time, or it + * should be in neither RBTree; consequently, it should be impossible to + * use more than 2 UndoRequestNode objects per UndoRequest. Since we + * preallocate that number, we should never run out. In case there's a bug + * in the logic, let's insert a runtime check here even when Asserts are + * disabled. + */ + if (node == NULL) + elog(ERROR, "no free UndoRequestNode"); + + /* Pop freelist. */ + urm->first_free_request_node = *(UndoRequestNode **) node; + + return &node->rbtnode; +} + +/* + * RBTree callback to free an UndoRequestNode. + * + * Just put it back on the freelist. + */ +static void +UndoRequestNodeFree(RBTNode *x, void *arg) +{ + UndoRequestManager *urm = arg; + UndoRequestNode *node = (UndoRequestNode *) x; + + *(UndoRequestNode **) node = urm->first_free_request_node; + urm->first_free_request_node = node; +} + +/* + * RBTree callback to combine an UndoRequestNode with another one. + * + * The key for every RBTree includes the FXID, which is unique, so it should + * never happen that we need to merge requests. + */ +static void +UndoRequestNodeCombine(RBTNode *existing, const RBTNode *newdata, void *arg) +{ + elog(ERROR, "undo requests should never need to be combined"); +} + +/* + * RBTree comparator for requests_by_retry_time. Older retry + * times first; in the case of a tie, smaller FXIDs first. This avoids ties, + * which is important since we don't want to merge requests, and also favors + * retiring older transactions first, which is generally desirable. + */ +static int +UndoRequestNodeCompareRetryTime(const RBTNode *a, const RBTNode *b, void *arg) +{ + const UndoRequestNode *aa = (UndoRequestNode *) a; + const UndoRequestNode *bb = (UndoRequestNode *) b; + FullTransactionId fxid_a = aa->req->fxid; + FullTransactionId fxid_b = bb->req->fxid; + TimestampTz retry_time_a = aa->req->retry_time; + TimestampTz retry_time_b = bb->req->retry_time; + + if (retry_time_a != retry_time_b) + return retry_time_a < retry_time_b ? -1 : 1; + + if (FullTransactionIdPrecedes(fxid_a, fxid_b)) + return -1; + else if (FullTransactionIdPrecedes(fxid_b, fxid_a)) + return 1; + else + return 0; +} + +/* + * RBTree comparator for requests_by_size. Lower FXIDs first. No tiebreak, + * because FXIDs should be unique. + */ +static int +UndoRequestNodeCompareFXID(const RBTNode *a, const RBTNode *b, void *arg) +{ + const UndoRequestNode *aa = (UndoRequestNode *) a; + const UndoRequestNode *bb = (UndoRequestNode *) b; + FullTransactionId fxid_a = aa->req->fxid; + FullTransactionId fxid_b = bb->req->fxid; + + if (FullTransactionIdPrecedes(fxid_a, fxid_b)) + return -1; + else if (FullTransactionIdPrecedes(fxid_b, fxid_a)) + return 1; + else + return 0; +} + +/* + * RBTree comparator for requests_by_size. As in we do for the retry + * time RBTree, break ties in favor of lower FXIDs. + */ +static int +UndoRequestNodeCompareSize(const RBTNode *a, const RBTNode *b, void *arg) +{ + const UndoRequestNode *aa = (UndoRequestNode *) a; + const UndoRequestNode *bb = (UndoRequestNode *) b; + FullTransactionId fxid_a = aa->req->fxid; + FullTransactionId fxid_b = bb->req->fxid; + Size size_a = aa->req->size; + Size size_b = bb->req->size; + + if (size_a != size_b) + return size_a < size_b ? 1 : -1; + + if (FullTransactionIdPrecedes(fxid_a, fxid_b)) + return -1; + else if (FullTransactionIdPrecedes(fxid_b, fxid_a)) + return 1; + else + return 0; +} + +/* + * Insert an UndoRequest into one RBTree. + * + * The actual RBTree element is an UndoRequestNode, which just points to + * the actual UndoRequest. + */ +static void +InsertUndoRequest(RBTree *rbt, UndoRequest *req) +{ + UndoRequestNode dummy; + bool isNew; + + /* + * The rbt_insert interface is a bit strange: we have to pass something + * that looks like an RBTNode, but the RBTNode itself doesn't need to be + * initialized - only the "extra" data that follows the end of the + * structure needs to be correct. + */ + dummy.req = req; + rbt_insert(rbt, &dummy.rbtnode, &isNew); + Assert(isNew); +} + +/* + * Remove an UndoRequest from one RBTree. + * + * This is just the reverse of InsertUndoRequest, with the same interface + * quirk. + */ +static void +RemoveUndoRequest(RBTree *rbt, UndoRequest *req) +{ + UndoRequestNode dummy; + RBTNode *node; + + dummy.req = req; + node = rbt_find(rbt, &dummy.rbtnode); + rbt_delete(rbt, node); +} + +/* + * Find an UndoRequest by FXID. + * + * If we needed to do this frequently, it might be worth maintaining a hash + * table mapping FXID -> UndoRequest, but since we only need it after a system + * restart, RBTree's O(lg n) performance seems good enough. + * + * Note that this can only find an UndoRequest that has not failed and is not + * yet being processed, because a failed UndoRequest would be in + * requests_by_retry_time, not requests_by_fxid, and an in-progress + * UndoRequest wouldn't be in either data structure. That restriction, too, + * is OK for current uses. + */ +static UndoRequest * +FindUndoRequest(UndoRequestManager *urm, FullTransactionId fxid) +{ + UndoRequest dummy_request; + UndoRequestNode dummy_node; + RBTNode *node; + + /* + * Here we need both a dummy UndoRequest and a dummy UndoRequestNode; only + * the comparator will look at the dummy UndoRequestNode, and it will only + * look at UndoRequest, and specifically its FXID. + */ + dummy_request.fxid = fxid; + dummy_node.req = &dummy_request; + node = rbt_find(&urm->requests_by_fxid, &dummy_node.rbtnode); + if (node == NULL) + return NULL; + return ((UndoRequestNode *) node)->req; +} diff --git a/src/backend/lib/rbtree.c b/src/backend/lib/rbtree.c index 33181e9211..bda870eab7 100644 --- a/src/backend/lib/rbtree.c +++ b/src/backend/lib/rbtree.c @@ -35,25 +35,6 @@ #define RBTBLACK (0) #define RBTRED (1) -/* - * RBTree control structure - */ -struct RBTree -{ - RBTNode *root; /* root node, or RBTNIL if tree is empty */ - - /* Remaining fields are constant after rbt_create */ - - Size node_size; /* actual size of tree nodes */ - /* The caller-supplied manipulation functions */ - rbt_comparator comparator; - rbt_combiner combiner; - rbt_allocfunc allocfunc; - rbt_freefunc freefunc; - /* Passthrough arg passed to all manipulation functions */ - void *arg; -}; - /* * all leafs are sentinels, use customized NIL name to prevent * collision with system-wide constant NIL which is actually NULL @@ -122,6 +103,33 @@ rbt_create(Size node_size, return tree; } +/* + * rbt_initialize: initalize an empty RBTree + * + * This is just like rbt_create, except that the caller is responsible for + * allocating the memory. + */ +void +rbt_initialize(RBTree *rbt, + Size node_size, + rbt_comparator comparator, + rbt_combiner combiner, + rbt_allocfunc allocfunc, + rbt_freefunc freefunc, + void *arg) +{ + Assert(node_size > sizeof(RBTNode)); + + rbt->root = RBTNIL; + rbt->node_size = node_size; + rbt->comparator = comparator; + rbt->combiner = combiner; + rbt->allocfunc = allocfunc; + rbt->freefunc = freefunc; + + rbt->arg = arg; +} + /* Copy the additional data fields from one RBTNode to another */ static inline void rbt_copy_data(RBTree *rbt, RBTNode *dest, const RBTNode *src) diff --git a/src/include/access/transam.h b/src/include/access/transam.h index 33fd052156..cc00509699 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -47,6 +47,7 @@ #define EpochFromFullTransactionId(x) ((uint32) ((x).value >> 32)) #define XidFromFullTransactionId(x) ((uint32) (x).value) #define U64FromFullTransactionId(x) ((x).value) +#define FullTransactionIdEquals(a, b) ((a).value == (b).value) #define FullTransactionIdPrecedes(a, b) ((a).value < (b).value) #define FullTransactionIdIsValid(x) TransactionIdIsValid(XidFromFullTransactionId(x)) #define InvalidFullTransactionId FullTransactionIdFromEpochAndXid(0, InvalidTransactionId) diff --git a/src/include/access/undorequest.h b/src/include/access/undorequest.h new file mode 100644 index 0000000000..7be7308e15 --- /dev/null +++ b/src/include/access/undorequest.h @@ -0,0 +1,80 @@ +/*------------------------------------------------------------------------- + * + * undorequest.h + * Undo request manager. + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/access/undorequest.h + * + *------------------------------------------------------------------------- + */ +#ifndef UNDOREQUEST_H +#define UNDOREQUEST_H + +#include "access/transam.h" +#include "access/undolog.h" +#include "datatype/timestamp.h" + +struct UndoRequest; +struct UndoRequestManager; +typedef struct UndoRequest UndoRequest; +typedef struct UndoRequestManager UndoRequestManager; + +/* Initialization functions. */ +extern Size EstimateUndoRequestManagerSize(Size capacity); +extern void InitializeUndoRequestManager(UndoRequestManager *urm, + LWLock *lock, Size capacity, + Size soft_limit); + +/* Call this before inserting undo records. */ +extern UndoRequest *RegisterUndoRequest(UndoRequestManager *urm, + FullTransactionId fxid, + Oid dbid); + +/* Remember where our undo starts and ends. */ +extern void UndoRequestSetStartLocation(UndoRequestManager *urm, + UndoRequest *req, + bool is_logged, + UndoRecPtr start_location); + +/* Remember undo size and end locations. */ +extern void FinalizeUndoRequest(UndoRequestManager *urm, + UndoRequest *req, + Size size, + UndoRecPtr end_location_logged, + UndoRecPtr end_location_unlogged); + +/* Forget about an UndoRequest we don't need any more. */ +extern void UnregisterUndoRequest(UndoRequestManager *urm, UndoRequest *req); + +/* Attempt to dispatch UndoRequest for background processing. */ +extern bool PerformUndoInBackground(UndoRequestManager *urm, UndoRequest *req); + +/* Get work for background undo process. */ +extern UndoRequest *GetNextUndoRequest(UndoRequestManager *urm, Oid dbid, + bool minimum_runtime_reached, + FullTransactionId *fxid, + UndoRecPtr *start_location_logged, + UndoRecPtr *end_location_logged, + UndoRecPtr *start_location_unlogged, + UndoRecPtr *end_location_unlogged); + +/* Reschedule failed undo attempt. */ +extern void RescheduleUndoRequest(UndoRequestManager *urm, UndoRequest *req); + +/* Restore state after crash. */ +extern bool RecreateUndoRequest(UndoRequestManager *urm, + FullTransactionId fxid, Oid dbid, + bool is_logged, + UndoRecPtr start_location, + UndoRecPtr end_location, + Size size); +extern UndoRequest *SuspendPreparedUndoRequest(UndoRequestManager *urm, + FullTransactionId fxid); + +/* Get oldest registered FXID. */ +FullTransactionId UndoRequestManagerOldestFXID(UndoRequestManager *urm); + +#endif diff --git a/src/include/lib/rbtree.h b/src/include/lib/rbtree.h index 6d79a24015..ff6f99a932 100644 --- a/src/include/lib/rbtree.h +++ b/src/include/lib/rbtree.h @@ -28,8 +28,33 @@ typedef struct RBTNode struct RBTNode *parent; /* parent, or NULL (not RBTNIL!) if none */ } RBTNode; -/* Opaque struct representing a whole tree */ -typedef struct RBTree RBTree; +/* Support functions to be provided by caller */ +typedef int (*rbt_comparator) (const RBTNode *a, const RBTNode *b, void *arg); +typedef void (*rbt_combiner) (RBTNode *existing, const RBTNode *newdata, void *arg); +typedef RBTNode *(*rbt_allocfunc) (void *arg); +typedef void (*rbt_freefunc) (RBTNode *x, void *arg); + +/* + * RBTree control structure + * + * This is declared here to make it possible to preallocate an object of + * the correct size, but callers should not access the members diretly. + */ +typedef struct RBTree +{ + RBTNode *root; /* root node, or RBTNIL if tree is empty */ + + /* Remaining fields are constant after rbt_create */ + + Size node_size; /* actual size of tree nodes */ + /* The caller-supplied manipulation functions */ + rbt_comparator comparator; + rbt_combiner combiner; + rbt_allocfunc allocfunc; + rbt_freefunc freefunc; + /* Passthrough arg passed to all manipulation functions */ + void *arg; +} RBTree; /* Available tree iteration orderings */ typedef enum RBTOrderControl @@ -53,18 +78,19 @@ struct RBTreeIterator bool is_over; }; -/* Support functions to be provided by caller */ -typedef int (*rbt_comparator) (const RBTNode *a, const RBTNode *b, void *arg); -typedef void (*rbt_combiner) (RBTNode *existing, const RBTNode *newdata, void *arg); -typedef RBTNode *(*rbt_allocfunc) (void *arg); -typedef void (*rbt_freefunc) (RBTNode *x, void *arg); - extern RBTree *rbt_create(Size node_size, rbt_comparator comparator, rbt_combiner combiner, rbt_allocfunc allocfunc, rbt_freefunc freefunc, void *arg); +extern void rbt_initialize(RBTree *rbt, + Size node_size, + rbt_comparator comparator, + rbt_combiner combiner, + rbt_allocfunc allocfunc, + rbt_freefunc freefunc, + void *arg); extern RBTNode *rbt_find(RBTree *rbt, const RBTNode *data); extern RBTNode *rbt_leftmost(RBTree *rbt); diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile index 60d6d7be1b..f32afffab1 100644 --- a/src/test/modules/Makefile +++ b/src/test/modules/Makefile @@ -19,6 +19,7 @@ SUBDIRS = \ test_rbtree \ test_rls_hooks \ test_shm_mq \ + test_undo_request_manager \ unsafe_tests \ worker_spi diff --git a/src/test/modules/test_undo_request_manager/Makefile b/src/test/modules/test_undo_request_manager/Makefile new file mode 100644 index 0000000000..5bc4695004 --- /dev/null +++ b/src/test/modules/test_undo_request_manager/Makefile @@ -0,0 +1,21 @@ +# src/test/modules/test_undo_request_manager/Makefile + +MODULE_big = test_undo_request_manager +OBJS = test_undo_request_manager.o $(WIN32RES) +PGFILEDESC = "test_undo_request_manager - test undo request manager code" + +EXTENSION = test_undo_request_manager +DATA = test_undo_request_manager--1.0.sql + +REGRESS = test_undo_request_manager + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/test_undo_request_manager +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/src/test/modules/test_undo_request_manager/expected/test_undo_request_manager.out b/src/test/modules/test_undo_request_manager/expected/test_undo_request_manager.out new file mode 100644 index 0000000000..c79611b3b6 --- /dev/null +++ b/src/test/modules/test_undo_request_manager/expected/test_undo_request_manager.out @@ -0,0 +1,28 @@ +CREATE EXTENSION test_undo_request_manager; +-- not enough space +select urm_simple_test(1, '{10000,20000}'); +ERROR: unable to register undo request #2 +-- simple case +select urm_simple_test(2, '{10000,20000}'); + urm_simple_test +----------------- + {1001,1002} +(1 row) + +-- should alternate between early and large requests in order +select urm_simple_test(10, +'{10000,20000,30000,40000,50000,1000000,1000000,1000000,1000000}'); + urm_simple_test +------------------------------------------------ + {1001,1006,1002,1007,1003,1008,1004,1009,1005} +(1 row) + +-- should alternate between early and large requests, but the large requests +-- should be processed in reverse order +select urm_simple_test(10, +'{10000,20000,30000,40000,50000,1000000,2000000,3000000,4000000,50000000}'); + urm_simple_test +----------------------------------------------------- + {1001,1010,1002,1009,1003,1008,1004,1007,1005,1006} +(1 row) + diff --git a/src/test/modules/test_undo_request_manager/sql/test_undo_request_manager.sql b/src/test/modules/test_undo_request_manager/sql/test_undo_request_manager.sql new file mode 100644 index 0000000000..6611e040b6 --- /dev/null +++ b/src/test/modules/test_undo_request_manager/sql/test_undo_request_manager.sql @@ -0,0 +1,16 @@ +CREATE EXTENSION test_undo_request_manager; + +-- not enough space +select urm_simple_test(1, '{10000,20000}'); + +-- simple case +select urm_simple_test(2, '{10000,20000}'); + +-- should alternate between early and large requests in order +select urm_simple_test(10, +'{10000,20000,30000,40000,50000,1000000,1000000,1000000,1000000}'); + +-- should alternate between early and large requests, but the large requests +-- should be processed in reverse order +select urm_simple_test(10, +'{10000,20000,30000,40000,50000,1000000,2000000,3000000,4000000,50000000}'); diff --git a/src/test/modules/test_undo_request_manager/test_undo_request_manager--1.0.sql b/src/test/modules/test_undo_request_manager/test_undo_request_manager--1.0.sql new file mode 100644 index 0000000000..30ff471c23 --- /dev/null +++ b/src/test/modules/test_undo_request_manager/test_undo_request_manager--1.0.sql @@ -0,0 +1,9 @@ +/* src/test/modules/test_undo_request_manager/test_undo_request_manager--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION test_undo_request_manager" to load this file. \quit + +CREATE FUNCTION urm_simple_test(capacity pg_catalog.int4, + requests pg_catalog.int8[]) + RETURNS pg_catalog.int8[] STRICT + AS 'MODULE_PATHNAME' LANGUAGE C; diff --git a/src/test/modules/test_undo_request_manager/test_undo_request_manager.c b/src/test/modules/test_undo_request_manager/test_undo_request_manager.c new file mode 100644 index 0000000000..8b994283c8 --- /dev/null +++ b/src/test/modules/test_undo_request_manager/test_undo_request_manager.c @@ -0,0 +1,139 @@ +/*-------------------------------------------------------------------------- + * + * test_undo_request_manager.c + * Test undo request manager. + * + * Copyright (c) 2013-2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/test/modules/test_undo_request_manager/undo_request_manager.c + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/undorequest.h" +#include "catalog/pg_type_d.h" +#include "fmgr.h" +#include "miscadmin.h" +#include "storage/proc.h" +#include "utils/array.h" + +PG_MODULE_MAGIC; +PG_FUNCTION_INFO_V1(urm_simple_test); + +/* + * SQL-callable test function. We create an UndoRequestManager in + * backend-private memory here and exercise it a bit to see if it breaks. + * + * The first argument is the capacity of the UndoRequestManager as an integer. + * + * The second argument is 1-dimensional bigint array, where each subarray + * contains a hypothetical undo size. + * + * This function registers and inserts all the requests (failing if space is + * exhausted) with fake, sequentially assigned transaction IDs, and then + * fetches them back one by one. The return value is an array of fake + * transaction IDs in the order they were returned. + * + * This test doesn't simulate undo failure, multi-database operation, or + * prepared transactions. + */ +Datum +urm_simple_test(PG_FUNCTION_ARGS) +{ + int64 capacity = PG_GETARG_INT32(0); + ArrayType *array = PG_GETARG_ARRAYTYPE_P(1); + Datum *darray; + int nentries; + Datum *dresult; + ArrayType *result; + UndoRequestManager *urm; + const UndoRecPtr SomeValidUndoRecPtr = InvalidUndoRecPtr + 1; + int i; + FullTransactionId fake_fxid = FullTransactionIdFromEpochAndXid(0, 1000); + + /* Require positive capacity. */ + if (capacity <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("undo request manager capacity must be a positive integer"))); + + /* Sanity-check and deconstruct array. */ + if (ARR_NDIM(array) != 1) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_ELEMENT_ERROR), + errmsg("array must have exactly 1 dimension"))); + if (array_contains_nulls(array)) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_ELEMENT_ERROR), + errmsg("cannot work with arrays containing NULLs"))); + deconstruct_array(array, INT8OID, 8, FLOAT8PASSBYVAL, 'd', + &darray, NULL, &nentries); + + /* + * Initialize UndoRequestManager. We have to supply an LWLock; rather than + * creating a new one somewhere, just use our own backendLock. These locks + * aren't that heavily trafficked and we won't have any reason to take it + * for any other purpose while the UndoRequstManager holds it, so this + * should be safe enough. + * + * We make the soft limit equal to the full capacity here for testing + * purposes, which means that we should always succeed in dispatching to + * the background. + */ + urm = palloc(EstimateUndoRequestManagerSize(capacity)); + InitializeUndoRequestManager(urm, &MyProc->backendLock, + capacity, capacity); + + /* Insert entries as provided by caller. */ + for (i = 0; i < nentries; ++i) + { + int64 size = DatumGetInt64(darray[i]); + UndoRequest *req; + + FullTransactionIdAdvance(&fake_fxid); + + req = RegisterUndoRequest(urm, fake_fxid, MyDatabaseId); + if (req == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unable to register undo request #%d", i + 1))); + UndoRequestSetStartLocation(urm, req, true, SomeValidUndoRecPtr); + FinalizeUndoRequest(urm, req, size, + SomeValidUndoRecPtr, + InvalidUndoRecPtr); + if (!PerformUndoInBackground(urm, req)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unable to background undo request #%d", i + 1))); + } + + /* Now get the entries back. */ + dresult = palloc(nentries * sizeof(Datum)); + for (i = 0; true; ++i) + { + UndoRequest *req; + UndoRecPtr p[4]; + + /* Get some work. */ + req = GetNextUndoRequest(urm, MyDatabaseId, true, + &fake_fxid, &p[0], &p[1], &p[2], &p[3]); + if (req == NULL) + break; + if (i >= nentries) + elog(ERROR, "found more undo requests than were inserted"); + + /* Save the fake FXID. */ + dresult[i] = + Int64GetDatum((int64) U64FromFullTransactionId(fake_fxid)); + + /* Report that we successfully processed the imaginary undo. */ + UnregisterUndoRequest(urm, req); + } + + /* Put result into array form. */ + result = construct_array(dresult, i, INT8OID, 8, FLOAT8PASSBYVAL, 'd'); + PG_RETURN_ARRAYTYPE_P(result); +} diff --git a/src/test/modules/test_undo_request_manager/test_undo_request_manager.control b/src/test/modules/test_undo_request_manager/test_undo_request_manager.control new file mode 100644 index 0000000000..0a340e9843 --- /dev/null +++ b/src/test/modules/test_undo_request_manager/test_undo_request_manager.control @@ -0,0 +1,4 @@ +comment = 'Test code for undo request manager' +default_version = '1.0' +module_pathname = '$libdir/test_undo_request_manager' +relocatable = true diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 432d2d812e..4c78ba61a5 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2536,6 +2536,10 @@ ULONG ULONG_PTR UV UVersionInfo +UndoRequest +UndoRequestManager +UndoRequestNode +UndoRequestSource Unique UniquePath UniquePathMethod -- 2.17.2 (Apple Git-113)