LCOV - code coverage report
Current view: top level - src/backend/replication - slot.c (source / functions) Hit Total Coverage
Test: PostgreSQL 14devel Lines: 431 591 72.9 %
Date: 2020-10-28 11:24:57 Functions: 26 28 92.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * slot.c
       4             :  *     Replication slot management.
       5             :  *
       6             :  *
       7             :  * Copyright (c) 2012-2020, PostgreSQL Global Development Group
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/replication/slot.c
      12             :  *
      13             :  * NOTES
      14             :  *
      15             :  * Replication slots are used to keep state about replication streams
      16             :  * originating from this cluster.  Their primary purpose is to prevent the
      17             :  * premature removal of WAL or of old tuple versions in a manner that would
      18             :  * interfere with replication; they are also useful for monitoring purposes.
      19             :  * Slots need to be permanent (to allow restarts), crash-safe, and allocatable
      20             :  * on standbys (to support cascading setups).  The requirement that slots be
      21             :  * usable on standbys precludes storing them in the system catalogs.
      22             :  *
      23             :  * Each replication slot gets its own directory inside the $PGDATA/pg_replslot
      24             :  * directory. Inside that directory the state file will contain the slot's
      25             :  * own data. Additional data can be stored alongside that file if required.
      26             :  * While the server is running, the state data is also cached in memory for
      27             :  * efficiency.
      28             :  *
      29             :  * ReplicationSlotAllocationLock must be taken in exclusive mode to allocate
      30             :  * or free a slot. ReplicationSlotControlLock must be taken in shared mode
      31             :  * to iterate over the slots, and in exclusive mode to change the in_use flag
      32             :  * of a slot.  The remaining data in each slot is protected by its mutex.
      33             :  *
      34             :  *-------------------------------------------------------------------------
      35             :  */
      36             : 
      37             : #include "postgres.h"
      38             : 
      39             : #include <unistd.h>
      40             : #include <sys/stat.h>
      41             : 
      42             : #include "access/transam.h"
      43             : #include "access/xlog_internal.h"
      44             : #include "common/string.h"
      45             : #include "miscadmin.h"
      46             : #include "pgstat.h"
      47             : #include "replication/slot.h"
      48             : #include "storage/fd.h"
      49             : #include "storage/proc.h"
      50             : #include "storage/procarray.h"
      51             : #include "utils/builtins.h"
      52             : 
      53             : /*
      54             :  * Replication slot on-disk data structure.
      55             :  */
      56             : typedef struct ReplicationSlotOnDisk
      57             : {
      58             :     /* first part of this struct needs to be version independent */
      59             : 
      60             :     /* data not covered by checksum */
      61             :     uint32      magic;
      62             :     pg_crc32c   checksum;
      63             : 
      64             :     /* data covered by checksum */
      65             :     uint32      version;
      66             :     uint32      length;
      67             : 
      68             :     /*
      69             :      * The actual data in the slot that follows can differ based on the above
      70             :      * 'version'.
      71             :      */
      72             : 
      73             :     ReplicationSlotPersistentData slotdata;
      74             : } ReplicationSlotOnDisk;
      75             : 
      76             : /* size of version independent data */
      77             : #define ReplicationSlotOnDiskConstantSize \
      78             :     offsetof(ReplicationSlotOnDisk, slotdata)
      79             : /* size of the part of the slot not covered by the checksum */
      80             : #define SnapBuildOnDiskNotChecksummedSize \
      81             :     offsetof(ReplicationSlotOnDisk, version)
      82             : /* size of the part covered by the checksum */
      83             : #define SnapBuildOnDiskChecksummedSize \
      84             :     sizeof(ReplicationSlotOnDisk) - SnapBuildOnDiskNotChecksummedSize
      85             : /* size of the slot data that is version dependent */
      86             : #define ReplicationSlotOnDiskV2Size \
      87             :     sizeof(ReplicationSlotOnDisk) - ReplicationSlotOnDiskConstantSize
      88             : 
      89             : #define SLOT_MAGIC      0x1051CA1   /* format identifier */
      90             : #define SLOT_VERSION    2       /* version for new files */
      91             : 
      92             : /* Control array for replication slot management */
      93             : ReplicationSlotCtlData *ReplicationSlotCtl = NULL;
      94             : 
      95             : /* My backend's replication slot in the shared memory array */
      96             : ReplicationSlot *MyReplicationSlot = NULL;
      97             : 
      98             : /* GUCs */
      99             : int         max_replication_slots = 0;  /* the maximum number of replication
     100             :                                          * slots */
     101             : 
     102             : static int ReplicationSlotAcquireInternal(ReplicationSlot *slot,
     103             :                                           const char *name, SlotAcquireBehavior behavior);
     104             : static void ReplicationSlotDropAcquired(void);
     105             : static void ReplicationSlotDropPtr(ReplicationSlot *slot);
     106             : 
     107             : /* internal persistency functions */
     108             : static void RestoreSlotFromDisk(const char *name);
     109             : static void CreateSlotOnDisk(ReplicationSlot *slot);
     110             : static void SaveSlotToPath(ReplicationSlot *slot, const char *path, int elevel);
     111             : 
     112             : /*
     113             :  * Report shared-memory space needed by ReplicationSlotsShmemInit.
     114             :  */
     115             : Size
     116       10704 : ReplicationSlotsShmemSize(void)
     117             : {
     118       10704 :     Size        size = 0;
     119             : 
     120       10704 :     if (max_replication_slots == 0)
     121           0 :         return size;
     122             : 
     123       10704 :     size = offsetof(ReplicationSlotCtlData, replication_slots);
     124       10704 :     size = add_size(size,
     125             :                     mul_size(max_replication_slots, sizeof(ReplicationSlot)));
     126             : 
     127       10704 :     return size;
     128             : }
     129             : 
     130             : /*
     131             :  * Allocate and initialize shared memory for replication slots.
     132             :  */
     133             : void
     134        3568 : ReplicationSlotsShmemInit(void)
     135             : {
     136             :     bool        found;
     137             : 
     138        3568 :     if (max_replication_slots == 0)
     139        3568 :         return;
     140             : 
     141        3568 :     ReplicationSlotCtl = (ReplicationSlotCtlData *)
     142        3568 :         ShmemInitStruct("ReplicationSlot Ctl", ReplicationSlotsShmemSize(),
     143             :                         &found);
     144             : 
     145        3568 :     if (!found)
     146             :     {
     147             :         int         i;
     148             : 
     149             :         /* First time through, so initialize */
     150        3568 :         MemSet(ReplicationSlotCtl, 0, ReplicationSlotsShmemSize());
     151             : 
     152       38384 :         for (i = 0; i < max_replication_slots; i++)
     153             :         {
     154       34816 :             ReplicationSlot *slot = &ReplicationSlotCtl->replication_slots[i];
     155             : 
     156             :             /* everything else is zeroed by the memset above */
     157       34816 :             SpinLockInit(&slot->mutex);
     158       34816 :             LWLockInitialize(&slot->io_in_progress_lock,
     159             :                              LWTRANCHE_REPLICATION_SLOT_IO);
     160       34816 :             ConditionVariableInit(&slot->active_cv);
     161             :         }
     162             :     }
     163             : }
     164             : 
     165             : /*
     166             :  * Check whether the passed slot name is valid and report errors at elevel.
     167             :  *
     168             :  * Slot names may consist out of [a-z0-9_]{1,NAMEDATALEN-1} which should allow
     169             :  * the name to be used as a directory name on every supported OS.
     170             :  *
     171             :  * Returns whether the directory name is valid or not if elevel < ERROR.
     172             :  */
     173             : bool
     174         376 : ReplicationSlotValidateName(const char *name, int elevel)
     175             : {
     176             :     const char *cp;
     177             : 
     178         376 :     if (strlen(name) == 0)
     179             :     {
     180           0 :         ereport(elevel,
     181             :                 (errcode(ERRCODE_INVALID_NAME),
     182             :                  errmsg("replication slot name \"%s\" is too short",
     183             :                         name)));
     184           0 :         return false;
     185             :     }
     186             : 
     187         376 :     if (strlen(name) >= NAMEDATALEN)
     188             :     {
     189           0 :         ereport(elevel,
     190             :                 (errcode(ERRCODE_NAME_TOO_LONG),
     191             :                  errmsg("replication slot name \"%s\" is too long",
     192             :                         name)));
     193           0 :         return false;
     194             :     }
     195             : 
     196        6606 :     for (cp = name; *cp; cp++)
     197             :     {
     198        6996 :         if (!((*cp >= 'a' && *cp <= 'z')
     199        2110 :               || (*cp >= '0' && *cp <= '9')
     200         764 :               || (*cp == '_')))
     201             :         {
     202           2 :             ereport(elevel,
     203             :                     (errcode(ERRCODE_INVALID_NAME),
     204             :                      errmsg("replication slot name \"%s\" contains invalid character",
     205             :                             name),
     206             :                      errhint("Replication slot names may only contain lower case letters, numbers, and the underscore character.")));
     207           0 :             return false;
     208             :         }
     209             :     }
     210         374 :     return true;
     211             : }
     212             : 
     213             : /*
     214             :  * Create a new replication slot and mark it as used by this backend.
     215             :  *
     216             :  * name: Name of the slot
     217             :  * db_specific: logical decoding is db specific; if the slot is going to
     218             :  *     be used for that pass true, otherwise false.
     219             :  */
     220             : void
     221         360 : ReplicationSlotCreate(const char *name, bool db_specific,
     222             :                       ReplicationSlotPersistency persistency)
     223             : {
     224         360 :     ReplicationSlot *slot = NULL;
     225             :     int         i;
     226             : 
     227         360 :     Assert(MyReplicationSlot == NULL);
     228             : 
     229         360 :     ReplicationSlotValidateName(name, ERROR);
     230             : 
     231             :     /*
     232             :      * If some other backend ran this code concurrently with us, we'd likely
     233             :      * both allocate the same slot, and that would be bad.  We'd also be at
     234             :      * risk of missing a name collision.  Also, we don't want to try to create
     235             :      * a new slot while somebody's busy cleaning up an old one, because we
     236             :      * might both be monkeying with the same directory.
     237             :      */
     238         358 :     LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);
     239             : 
     240             :     /*
     241             :      * Check for name collision, and identify an allocatable slot.  We need to
     242             :      * hold ReplicationSlotControlLock in shared mode for this, so that nobody
     243             :      * else can change the in_use flags while we're looking at them.
     244             :      */
     245         358 :     LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
     246        2890 :     for (i = 0; i < max_replication_slots; i++)
     247             :     {
     248        2536 :         ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
     249             : 
     250        2536 :         if (s->in_use && strcmp(name, NameStr(s->data.name)) == 0)
     251           4 :             ereport(ERROR,
     252             :                     (errcode(ERRCODE_DUPLICATE_OBJECT),
     253             :                      errmsg("replication slot \"%s\" already exists", name)));
     254        2532 :         if (!s->in_use && slot == NULL)
     255         352 :             slot = s;
     256             :     }
     257         354 :     LWLockRelease(ReplicationSlotControlLock);
     258             : 
     259             :     /* If all slots are in use, we're out of luck. */
     260         354 :     if (slot == NULL)
     261           2 :         ereport(ERROR,
     262             :                 (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
     263             :                  errmsg("all replication slots are in use"),
     264             :                  errhint("Free one or increase max_replication_slots.")));
     265             : 
     266             :     /*
     267             :      * Since this slot is not in use, nobody should be looking at any part of
     268             :      * it other than the in_use field unless they're trying to allocate it.
     269             :      * And since we hold ReplicationSlotAllocationLock, nobody except us can
     270             :      * be doing that.  So it's safe to initialize the slot.
     271             :      */
     272         352 :     Assert(!slot->in_use);
     273         352 :     Assert(slot->active_pid == 0);
     274             : 
     275             :     /* first initialize persistent data */
     276         352 :     memset(&slot->data, 0, sizeof(ReplicationSlotPersistentData));
     277         352 :     namestrcpy(&slot->data.name, name);
     278         352 :     slot->data.database = db_specific ? MyDatabaseId : InvalidOid;
     279         352 :     slot->data.persistency = persistency;
     280             : 
     281             :     /* and then data only present in shared memory */
     282         352 :     slot->just_dirtied = false;
     283         352 :     slot->dirty = false;
     284         352 :     slot->effective_xmin = InvalidTransactionId;
     285         352 :     slot->effective_catalog_xmin = InvalidTransactionId;
     286         352 :     slot->candidate_catalog_xmin = InvalidTransactionId;
     287         352 :     slot->candidate_xmin_lsn = InvalidXLogRecPtr;
     288         352 :     slot->candidate_restart_valid = InvalidXLogRecPtr;
     289         352 :     slot->candidate_restart_lsn = InvalidXLogRecPtr;
     290             : 
     291             :     /*
     292             :      * Create the slot on disk.  We haven't actually marked the slot allocated
     293             :      * yet, so no special cleanup is required if this errors out.
     294             :      */
     295         352 :     CreateSlotOnDisk(slot);
     296             : 
     297             :     /*
     298             :      * We need to briefly prevent any other backend from iterating over the
     299             :      * slots while we flip the in_use flag. We also need to set the active
     300             :      * flag while holding the ControlLock as otherwise a concurrent
     301             :      * ReplicationSlotAcquire() could acquire the slot as well.
     302             :      */
     303         352 :     LWLockAcquire(ReplicationSlotControlLock, LW_EXCLUSIVE);
     304             : 
     305         352 :     slot->in_use = true;
     306             : 
     307             :     /* We can now mark the slot active, and that makes it our slot. */
     308         352 :     SpinLockAcquire(&slot->mutex);
     309         352 :     Assert(slot->active_pid == 0);
     310         352 :     slot->active_pid = MyProcPid;
     311         352 :     SpinLockRelease(&slot->mutex);
     312         352 :     MyReplicationSlot = slot;
     313             : 
     314         352 :     LWLockRelease(ReplicationSlotControlLock);
     315             : 
     316             :     /*
     317             :      * Create statistics entry for the new logical slot. We don't collect any
     318             :      * stats for physical slots, so no need to create an entry for the same.
     319             :      * See ReplicationSlotDropPtr for why we need to do this before releasing
     320             :      * ReplicationSlotAllocationLock.
     321             :      */
     322         352 :     if (SlotIsLogical(slot))
     323         334 :         pgstat_report_replslot(NameStr(slot->data.name), 0, 0, 0);
     324             : 
     325             :     /*
     326             :      * Now that the slot has been marked as in_use and active, it's safe to
     327             :      * let somebody else try to allocate a slot.
     328             :      */
     329         352 :     LWLockRelease(ReplicationSlotAllocationLock);
     330             : 
     331             :     /* Let everybody know we've modified this slot */
     332         352 :     ConditionVariableBroadcast(&slot->active_cv);
     333         352 : }
     334             : 
     335             : /*
     336             :  * Search for the named replication slot.
     337             :  *
     338             :  * Return the replication slot if found, otherwise NULL.
     339             :  *
     340             :  * The caller must hold ReplicationSlotControlLock in shared mode.
     341             :  */
     342             : ReplicationSlot *
     343         670 : SearchNamedReplicationSlot(const char *name)
     344             : {
     345             :     int         i;
     346         670 :     ReplicationSlot *slot = NULL;
     347             : 
     348         670 :     Assert(LWLockHeldByMeInMode(ReplicationSlotControlLock,
     349             :                                 LW_SHARED));
     350             : 
     351         930 :     for (i = 0; i < max_replication_slots; i++)
     352             :     {
     353         920 :         ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
     354             : 
     355         920 :         if (s->in_use && strcmp(name, NameStr(s->data.name)) == 0)
     356             :         {
     357         660 :             slot = s;
     358         660 :             break;
     359             :         }
     360             :     }
     361             : 
     362         670 :     return slot;
     363             : }
     364             : 
     365             : /*
     366             :  * Find a previously created slot and mark it as used by this process.
     367             :  *
     368             :  * The return value is only useful if behavior is SAB_Inquire, in which
     369             :  * it's zero if we successfully acquired the slot, -1 if the slot no longer
     370             :  * exists, or the PID of the owning process otherwise.  If behavior is
     371             :  * SAB_Error, then trying to acquire an owned slot is an error.
     372             :  * If SAB_Block, we sleep until the slot is released by the owning process.
     373             :  */
     374             : int
     375         668 : ReplicationSlotAcquire(const char *name, SlotAcquireBehavior behavior)
     376             : {
     377         668 :     return ReplicationSlotAcquireInternal(NULL, name, behavior);
     378             : }
     379             : 
     380             : /*
     381             :  * Mark the specified slot as used by this process.
     382             :  *
     383             :  * Only one of slot and name can be specified.
     384             :  * If slot == NULL, search for the slot with the given name.
     385             :  *
     386             :  * See comments about the return value in ReplicationSlotAcquire().
     387             :  */
     388             : static int
     389         668 : ReplicationSlotAcquireInternal(ReplicationSlot *slot, const char *name,
     390             :                                SlotAcquireBehavior behavior)
     391             : {
     392             :     ReplicationSlot *s;
     393             :     int         active_pid;
     394             : 
     395         668 :     AssertArg((slot == NULL) ^ (name == NULL));
     396             : 
     397             : retry:
     398         668 :     Assert(MyReplicationSlot == NULL);
     399             : 
     400         668 :     LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
     401             : 
     402             :     /*
     403             :      * Search for the slot with the specified name if the slot to acquire is
     404             :      * not given. If the slot is not found, we either return -1 or error out.
     405             :      */
     406         668 :     s = slot ? slot : SearchNamedReplicationSlot(name);
     407         668 :     if (s == NULL || !s->in_use)
     408             :     {
     409          10 :         LWLockRelease(ReplicationSlotControlLock);
     410             : 
     411          10 :         if (behavior == SAB_Inquire)
     412           0 :             return -1;
     413          10 :         ereport(ERROR,
     414             :                 (errcode(ERRCODE_UNDEFINED_OBJECT),
     415             :                  errmsg("replication slot \"%s\" does not exist",
     416             :                         name ? name : NameStr(slot->data.name))));
     417             :     }
     418             : 
     419             :     /*
     420             :      * This is the slot we want; check if it's active under some other
     421             :      * process.  In single user mode, we don't need this check.
     422             :      */
     423         658 :     if (IsUnderPostmaster)
     424             :     {
     425             :         /*
     426             :          * Get ready to sleep on the slot in case it is active if SAB_Block.
     427             :          * (We may end up not sleeping, but we don't want to do this while
     428             :          * holding the spinlock.)
     429             :          */
     430         658 :         if (behavior == SAB_Block)
     431          18 :             ConditionVariablePrepareToSleep(&s->active_cv);
     432             : 
     433         658 :         SpinLockAcquire(&s->mutex);
     434         658 :         if (s->active_pid == 0)
     435         520 :             s->active_pid = MyProcPid;
     436         658 :         active_pid = s->active_pid;
     437         658 :         SpinLockRelease(&s->mutex);
     438             :     }
     439             :     else
     440           0 :         active_pid = MyProcPid;
     441         658 :     LWLockRelease(ReplicationSlotControlLock);
     442             : 
     443             :     /*
     444             :      * If we found the slot but it's already active in another process, we
     445             :      * either error out, return the PID of the owning process, or retry
     446             :      * after a short wait, as caller specified.
     447             :      */
     448         658 :     if (active_pid != MyProcPid)
     449             :     {
     450           0 :         if (behavior == SAB_Error)
     451           0 :             ereport(ERROR,
     452             :                     (errcode(ERRCODE_OBJECT_IN_USE),
     453             :                      errmsg("replication slot \"%s\" is active for PID %d",
     454             :                             NameStr(s->data.name), active_pid)));
     455           0 :         else if (behavior == SAB_Inquire)
     456           0 :             return active_pid;
     457             : 
     458             :         /* Wait here until we get signaled, and then restart */
     459           0 :         ConditionVariableSleep(&s->active_cv,
     460             :                                WAIT_EVENT_REPLICATION_SLOT_DROP);
     461           0 :         ConditionVariableCancelSleep();
     462           0 :         goto retry;
     463             :     }
     464         658 :     else if (behavior == SAB_Block)
     465          18 :         ConditionVariableCancelSleep(); /* no sleep needed after all */
     466             : 
     467             :     /* Let everybody know we've modified this slot */
     468         658 :     ConditionVariableBroadcast(&s->active_cv);
     469             : 
     470             :     /* We made this slot active, so it's ours now. */
     471         658 :     MyReplicationSlot = s;
     472             : 
     473             :     /* success */
     474         658 :     return 0;
     475             : }
     476             : 
     477             : /*
     478             :  * Release the replication slot that this backend considers to own.
     479             :  *
     480             :  * This or another backend can re-acquire the slot later.
     481             :  * Resources this slot requires will be preserved.
     482             :  */
     483             : void
     484         848 : ReplicationSlotRelease(void)
     485             : {
     486         848 :     ReplicationSlot *slot = MyReplicationSlot;
     487             : 
     488         848 :     Assert(slot != NULL && slot->active_pid != 0);
     489             : 
     490         848 :     if (slot->data.persistency == RS_EPHEMERAL)
     491             :     {
     492             :         /*
     493             :          * Delete the slot. There is no !PANIC case where this is allowed to
     494             :          * fail, all that may happen is an incomplete cleanup of the on-disk
     495             :          * data.
     496             :          */
     497           6 :         ReplicationSlotDropAcquired();
     498             :     }
     499             : 
     500             :     /*
     501             :      * If slot needed to temporarily restrain both data and catalog xmin to
     502             :      * create the catalog snapshot, remove that temporary constraint.
     503             :      * Snapshots can only be exported while the initial snapshot is still
     504             :      * acquired.
     505             :      */
     506        1696 :     if (!TransactionIdIsValid(slot->data.xmin) &&
     507         848 :         TransactionIdIsValid(slot->effective_xmin))
     508             :     {
     509         126 :         SpinLockAcquire(&slot->mutex);
     510         126 :         slot->effective_xmin = InvalidTransactionId;
     511         126 :         SpinLockRelease(&slot->mutex);
     512         126 :         ReplicationSlotsComputeRequiredXmin(false);
     513             :     }
     514             : 
     515         848 :     if (slot->data.persistency == RS_PERSISTENT)
     516             :     {
     517             :         /*
     518             :          * Mark persistent slot inactive.  We're not freeing it, just
     519             :          * disconnecting, but wake up others that may be waiting for it.
     520             :          */
     521         560 :         SpinLockAcquire(&slot->mutex);
     522         560 :         slot->active_pid = 0;
     523         560 :         SpinLockRelease(&slot->mutex);
     524         560 :         ConditionVariableBroadcast(&slot->active_cv);
     525             :     }
     526             : 
     527         848 :     MyReplicationSlot = NULL;
     528             : 
     529             :     /* might not have been set when we've been a plain slot */
     530         848 :     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
     531         848 :     MyProc->vacuumFlags &= ~PROC_IN_LOGICAL_DECODING;
     532         848 :     ProcGlobal->vacuumFlags[MyProc->pgxactoff] = MyProc->vacuumFlags;
     533         848 :     LWLockRelease(ProcArrayLock);
     534         848 : }
     535             : 
     536             : /*
     537             :  * Cleanup all temporary slots created in current session.
     538             :  */
     539             : void
     540        2858 : ReplicationSlotCleanup(void)
     541             : {
     542             :     int         i;
     543             : 
     544        2858 :     Assert(MyReplicationSlot == NULL);
     545             : 
     546             : restart:
     547        3002 :     LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
     548       30458 :     for (i = 0; i < max_replication_slots; i++)
     549             :     {
     550       27600 :         ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
     551             : 
     552       27600 :         if (!s->in_use)
     553       25700 :             continue;
     554             : 
     555        1900 :         SpinLockAcquire(&s->mutex);
     556        1900 :         if (s->active_pid == MyProcPid)
     557             :         {
     558         144 :             Assert(s->data.persistency == RS_TEMPORARY);
     559         144 :             SpinLockRelease(&s->mutex);
     560         144 :             LWLockRelease(ReplicationSlotControlLock);  /* avoid deadlock */
     561             : 
     562         144 :             ReplicationSlotDropPtr(s);
     563             : 
     564         144 :             ConditionVariableBroadcast(&s->active_cv);
     565         144 :             goto restart;
     566             :         }
     567             :         else
     568        1756 :             SpinLockRelease(&s->mutex);
     569             :     }
     570             : 
     571        2858 :     LWLockRelease(ReplicationSlotControlLock);
     572        2858 : }
     573             : 
     574             : /*
     575             :  * Permanently drop replication slot identified by the passed in name.
     576             :  */
     577             : void
     578         172 : ReplicationSlotDrop(const char *name, bool nowait)
     579             : {
     580         172 :     Assert(MyReplicationSlot == NULL);
     581             : 
     582         172 :     (void) ReplicationSlotAcquire(name, nowait ? SAB_Error : SAB_Block);
     583             : 
     584         162 :     ReplicationSlotDropAcquired();
     585         162 : }
     586             : 
     587             : /*
     588             :  * Permanently drop the currently acquired replication slot.
     589             :  */
     590             : static void
     591         168 : ReplicationSlotDropAcquired(void)
     592             : {
     593         168 :     ReplicationSlot *slot = MyReplicationSlot;
     594             : 
     595         168 :     Assert(MyReplicationSlot != NULL);
     596             : 
     597             :     /* slot isn't acquired anymore */
     598         168 :     MyReplicationSlot = NULL;
     599             : 
     600         168 :     ReplicationSlotDropPtr(slot);
     601         168 : }
     602             : 
     603             : /*
     604             :  * Permanently drop the replication slot which will be released by the point
     605             :  * this function returns.
     606             :  */
     607             : static void
     608         312 : ReplicationSlotDropPtr(ReplicationSlot *slot)
     609             : {
     610             :     char        path[MAXPGPATH];
     611             :     char        tmppath[MAXPGPATH];
     612             : 
     613             :     /*
     614             :      * If some other backend ran this code concurrently with us, we might try
     615             :      * to delete a slot with a certain name while someone else was trying to
     616             :      * create a slot with the same name.
     617             :      */
     618         312 :     LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);
     619             : 
     620             :     /* Generate pathnames. */
     621         312 :     sprintf(path, "pg_replslot/%s", NameStr(slot->data.name));
     622         312 :     sprintf(tmppath, "pg_replslot/%s.tmp", NameStr(slot->data.name));
     623             : 
     624             :     /*
     625             :      * Rename the slot directory on disk, so that we'll no longer recognize
     626             :      * this as a valid slot.  Note that if this fails, we've got to mark the
     627             :      * slot inactive before bailing out.  If we're dropping an ephemeral or a
     628             :      * temporary slot, we better never fail hard as the caller won't expect
     629             :      * the slot to survive and this might get called during error handling.
     630             :      */
     631         312 :     if (rename(path, tmppath) == 0)
     632             :     {
     633             :         /*
     634             :          * We need to fsync() the directory we just renamed and its parent to
     635             :          * make sure that our changes are on disk in a crash-safe fashion.  If
     636             :          * fsync() fails, we can't be sure whether the changes are on disk or
     637             :          * not.  For now, we handle that by panicking;
     638             :          * StartupReplicationSlots() will try to straighten it out after
     639             :          * restart.
     640             :          */
     641         312 :         START_CRIT_SECTION();
     642         312 :         fsync_fname(tmppath, true);
     643         312 :         fsync_fname("pg_replslot", true);
     644         312 :         END_CRIT_SECTION();
     645             :     }
     646             :     else
     647             :     {
     648           0 :         bool        fail_softly = slot->data.persistency != RS_PERSISTENT;
     649             : 
     650           0 :         SpinLockAcquire(&slot->mutex);
     651           0 :         slot->active_pid = 0;
     652           0 :         SpinLockRelease(&slot->mutex);
     653             : 
     654             :         /* wake up anyone waiting on this slot */
     655           0 :         ConditionVariableBroadcast(&slot->active_cv);
     656             : 
     657           0 :         ereport(fail_softly ? WARNING : ERROR,
     658             :                 (errcode_for_file_access(),
     659             :                  errmsg("could not rename file \"%s\" to \"%s\": %m",
     660             :                         path, tmppath)));
     661             :     }
     662             : 
     663             :     /*
     664             :      * The slot is definitely gone.  Lock out concurrent scans of the array
     665             :      * long enough to kill it.  It's OK to clear the active PID here without
     666             :      * grabbing the mutex because nobody else can be scanning the array here,
     667             :      * and nobody can be attached to this slot and thus access it without
     668             :      * scanning the array.
     669             :      *
     670             :      * Also wake up processes waiting for it.
     671             :      */
     672         312 :     LWLockAcquire(ReplicationSlotControlLock, LW_EXCLUSIVE);
     673         312 :     slot->active_pid = 0;
     674         312 :     slot->in_use = false;
     675         312 :     LWLockRelease(ReplicationSlotControlLock);
     676         312 :     ConditionVariableBroadcast(&slot->active_cv);
     677             : 
     678             :     /*
     679             :      * Slot is dead and doesn't prevent resource removal anymore, recompute
     680             :      * limits.
     681             :      */
     682         312 :     ReplicationSlotsComputeRequiredXmin(false);
     683         312 :     ReplicationSlotsComputeRequiredLSN();
     684             : 
     685             :     /*
     686             :      * If removing the directory fails, the worst thing that will happen is
     687             :      * that the user won't be able to create a new slot with the same name
     688             :      * until the next server restart.  We warn about it, but that's all.
     689             :      */
     690         312 :     if (!rmtree(tmppath, true))
     691           0 :         ereport(WARNING,
     692             :                 (errmsg("could not remove directory \"%s\"", tmppath)));
     693             : 
     694             :     /*
     695             :      * Send a message to drop the replication slot to the stats collector.
     696             :      * Since there is no guarantee of the order of message transfer on a UDP
     697             :      * connection, it's possible that a message for creating a new slot
     698             :      * reaches before a message for removing the old slot. We send the drop
     699             :      * and create messages while holding ReplicationSlotAllocationLock to
     700             :      * reduce that possibility. If the messages reached in reverse, we would
     701             :      * lose one statistics update message. But the next update message will
     702             :      * create the statistics for the replication slot.
     703             :      */
     704         312 :     if (SlotIsLogical(slot))
     705         294 :         pgstat_report_replslot_drop(NameStr(slot->data.name));
     706             : 
     707             :     /*
     708             :      * We release this at the very end, so that nobody starts trying to create
     709             :      * a slot while we're still cleaning up the detritus of the old one.
     710             :      */
     711         312 :     LWLockRelease(ReplicationSlotAllocationLock);
     712         312 : }
     713             : 
     714             : /*
     715             :  * Serialize the currently acquired slot's state from memory to disk, thereby
     716             :  * guaranteeing the current state will survive a crash.
     717             :  */
     718             : void
     719         720 : ReplicationSlotSave(void)
     720             : {
     721             :     char        path[MAXPGPATH];
     722             : 
     723         720 :     Assert(MyReplicationSlot != NULL);
     724             : 
     725         720 :     sprintf(path, "pg_replslot/%s", NameStr(MyReplicationSlot->data.name));
     726         720 :     SaveSlotToPath(MyReplicationSlot, path, ERROR);
     727         720 : }
     728             : 
     729             : /*
     730             :  * Signal that it would be useful if the currently acquired slot would be
     731             :  * flushed out to disk.
     732             :  *
     733             :  * Note that the actual flush to disk can be delayed for a long time, if
     734             :  * required for correctness explicitly do a ReplicationSlotSave().
     735             :  */
     736             : void
     737         996 : ReplicationSlotMarkDirty(void)
     738             : {
     739         996 :     ReplicationSlot *slot = MyReplicationSlot;
     740             : 
     741         996 :     Assert(MyReplicationSlot != NULL);
     742             : 
     743         996 :     SpinLockAcquire(&slot->mutex);
     744         996 :     MyReplicationSlot->just_dirtied = true;
     745         996 :     MyReplicationSlot->dirty = true;
     746         996 :     SpinLockRelease(&slot->mutex);
     747         996 : }
     748             : 
     749             : /*
     750             :  * Convert a slot that's marked as RS_EPHEMERAL to a RS_PERSISTENT slot,
     751             :  * guaranteeing it will be there after an eventual crash.
     752             :  */
     753             : void
     754         186 : ReplicationSlotPersist(void)
     755             : {
     756         186 :     ReplicationSlot *slot = MyReplicationSlot;
     757             : 
     758         186 :     Assert(slot != NULL);
     759         186 :     Assert(slot->data.persistency != RS_PERSISTENT);
     760             : 
     761         186 :     SpinLockAcquire(&slot->mutex);
     762         186 :     slot->data.persistency = RS_PERSISTENT;
     763         186 :     SpinLockRelease(&slot->mutex);
     764             : 
     765         186 :     ReplicationSlotMarkDirty();
     766         186 :     ReplicationSlotSave();
     767         186 : }
     768             : 
     769             : /*
     770             :  * Compute the oldest xmin across all slots and store it in the ProcArray.
     771             :  *
     772             :  * If already_locked is true, ProcArrayLock has already been acquired
     773             :  * exclusively.
     774             :  */
     775             : void
     776        1132 : ReplicationSlotsComputeRequiredXmin(bool already_locked)
     777             : {
     778             :     int         i;
     779        1132 :     TransactionId agg_xmin = InvalidTransactionId;
     780        1132 :     TransactionId agg_catalog_xmin = InvalidTransactionId;
     781             : 
     782        1132 :     Assert(ReplicationSlotCtl != NULL);
     783             : 
     784        1132 :     LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
     785             : 
     786       10340 :     for (i = 0; i < max_replication_slots; i++)
     787             :     {
     788        9208 :         ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
     789             :         TransactionId effective_xmin;
     790             :         TransactionId effective_catalog_xmin;
     791             : 
     792        9208 :         if (!s->in_use)
     793        7950 :             continue;
     794             : 
     795        1258 :         SpinLockAcquire(&s->mutex);
     796        1258 :         effective_xmin = s->effective_xmin;
     797        1258 :         effective_catalog_xmin = s->effective_catalog_xmin;
     798        1258 :         SpinLockRelease(&s->mutex);
     799             : 
     800             :         /* check the data xmin */
     801        1258 :         if (TransactionIdIsValid(effective_xmin) &&
     802           0 :             (!TransactionIdIsValid(agg_xmin) ||
     803           0 :              TransactionIdPrecedes(effective_xmin, agg_xmin)))
     804         142 :             agg_xmin = effective_xmin;
     805             : 
     806             :         /* check the catalog xmin */
     807        1258 :         if (TransactionIdIsValid(effective_catalog_xmin) &&
     808         550 :             (!TransactionIdIsValid(agg_catalog_xmin) ||
     809         550 :              TransactionIdPrecedes(effective_catalog_xmin, agg_catalog_xmin)))
     810         676 :             agg_catalog_xmin = effective_catalog_xmin;
     811             :     }
     812             : 
     813        1132 :     LWLockRelease(ReplicationSlotControlLock);
     814             : 
     815        1132 :     ProcArraySetReplicationSlotXmin(agg_xmin, agg_catalog_xmin, already_locked);
     816        1132 : }
     817             : 
     818             : /*
     819             :  * Compute the oldest restart LSN across all slots and inform xlog module.
     820             :  *
     821             :  * Note: while max_slot_wal_keep_size is theoretically relevant for this
     822             :  * purpose, we don't try to account for that, because this module doesn't
     823             :  * know what to compare against.
     824             :  */
     825             : void
     826         998 : ReplicationSlotsComputeRequiredLSN(void)
     827             : {
     828             :     int         i;
     829         998 :     XLogRecPtr  min_required = InvalidXLogRecPtr;
     830             : 
     831         998 :     Assert(ReplicationSlotCtl != NULL);
     832             : 
     833         998 :     LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
     834        8914 :     for (i = 0; i < max_replication_slots; i++)
     835             :     {
     836        7916 :         ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
     837             :         XLogRecPtr  restart_lsn;
     838             : 
     839        7916 :         if (!s->in_use)
     840        6996 :             continue;
     841             : 
     842         920 :         SpinLockAcquire(&s->mutex);
     843         920 :         restart_lsn = s->data.restart_lsn;
     844         920 :         SpinLockRelease(&s->mutex);
     845             : 
     846         920 :         if (restart_lsn != InvalidXLogRecPtr &&
     847         362 :             (min_required == InvalidXLogRecPtr ||
     848             :              restart_lsn < min_required))
     849         568 :             min_required = restart_lsn;
     850             :     }
     851         998 :     LWLockRelease(ReplicationSlotControlLock);
     852             : 
     853         998 :     XLogSetReplicationSlotMinimumLSN(min_required);
     854         998 : }
     855             : 
     856             : /*
     857             :  * Compute the oldest WAL LSN required by *logical* decoding slots..
     858             :  *
     859             :  * Returns InvalidXLogRecPtr if logical decoding is disabled or no logical
     860             :  * slots exist.
     861             :  *
     862             :  * NB: this returns a value >= ReplicationSlotsComputeRequiredLSN(), since it
     863             :  * ignores physical replication slots.
     864             :  *
     865             :  * The results aren't required frequently, so we don't maintain a precomputed
     866             :  * value like we do for ComputeRequiredLSN() and ComputeRequiredXmin().
     867             :  */
     868             : XLogRecPtr
     869        1404 : ReplicationSlotsComputeLogicalRestartLSN(void)
     870             : {
     871        1404 :     XLogRecPtr  result = InvalidXLogRecPtr;
     872             :     int         i;
     873             : 
     874        1404 :     if (max_replication_slots <= 0)
     875           0 :         return InvalidXLogRecPtr;
     876             : 
     877        1404 :     LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
     878             : 
     879       15180 :     for (i = 0; i < max_replication_slots; i++)
     880             :     {
     881             :         ReplicationSlot *s;
     882             :         XLogRecPtr  restart_lsn;
     883             : 
     884       13776 :         s = &ReplicationSlotCtl->replication_slots[i];
     885             : 
     886             :         /* cannot change while ReplicationSlotCtlLock is held */
     887       13776 :         if (!s->in_use)
     888       13668 :             continue;
     889             : 
     890             :         /* we're only interested in logical slots */
     891         108 :         if (!SlotIsLogical(s))
     892           0 :             continue;
     893             : 
     894             :         /* read once, it's ok if it increases while we're checking */
     895         108 :         SpinLockAcquire(&s->mutex);
     896         108 :         restart_lsn = s->data.restart_lsn;
     897         108 :         SpinLockRelease(&s->mutex);
     898             : 
     899         108 :         if (restart_lsn == InvalidXLogRecPtr)
     900           0 :             continue;
     901             : 
     902         108 :         if (result == InvalidXLogRecPtr ||
     903             :             restart_lsn < result)
     904         108 :             result = restart_lsn;
     905             :     }
     906             : 
     907        1404 :     LWLockRelease(ReplicationSlotControlLock);
     908             : 
     909        1404 :     return result;
     910             : }
     911             : 
     912             : /*
     913             :  * ReplicationSlotsCountDBSlots -- count the number of slots that refer to the
     914             :  * passed database oid.
     915             :  *
     916             :  * Returns true if there are any slots referencing the database. *nslots will
     917             :  * be set to the absolute number of slots in the database, *nactive to ones
     918             :  * currently active.
     919             :  */
     920             : bool
     921           0 : ReplicationSlotsCountDBSlots(Oid dboid, int *nslots, int *nactive)
     922             : {
     923             :     int         i;
     924             : 
     925           0 :     *nslots = *nactive = 0;
     926             : 
     927           0 :     if (max_replication_slots <= 0)
     928           0 :         return false;
     929             : 
     930           0 :     LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
     931           0 :     for (i = 0; i < max_replication_slots; i++)
     932             :     {
     933             :         ReplicationSlot *s;
     934             : 
     935           0 :         s = &ReplicationSlotCtl->replication_slots[i];
     936             : 
     937             :         /* cannot change while ReplicationSlotCtlLock is held */
     938           0 :         if (!s->in_use)
     939           0 :             continue;
     940             : 
     941             :         /* only logical slots are database specific, skip */
     942           0 :         if (!SlotIsLogical(s))
     943           0 :             continue;
     944             : 
     945             :         /* not our database, skip */
     946           0 :         if (s->data.database != dboid)
     947           0 :             continue;
     948             : 
     949             :         /* count slots with spinlock held */
     950           0 :         SpinLockAcquire(&s->mutex);
     951           0 :         (*nslots)++;
     952           0 :         if (s->active_pid != 0)
     953           0 :             (*nactive)++;
     954           0 :         SpinLockRelease(&s->mutex);
     955             :     }
     956           0 :     LWLockRelease(ReplicationSlotControlLock);
     957             : 
     958           0 :     if (*nslots > 0)
     959           0 :         return true;
     960           0 :     return false;
     961             : }
     962             : 
     963             : /*
     964             :  * ReplicationSlotsDropDBSlots -- Drop all db-specific slots relating to the
     965             :  * passed database oid. The caller should hold an exclusive lock on the
     966             :  * pg_database oid for the database to prevent creation of new slots on the db
     967             :  * or replay from existing slots.
     968             :  *
     969             :  * Another session that concurrently acquires an existing slot on the target DB
     970             :  * (most likely to drop it) may cause this function to ERROR. If that happens
     971             :  * it may have dropped some but not all slots.
     972             :  *
     973             :  * This routine isn't as efficient as it could be - but we don't drop
     974             :  * databases often, especially databases with lots of slots.
     975             :  */
     976             : void
     977           0 : ReplicationSlotsDropDBSlots(Oid dboid)
     978             : {
     979             :     int         i;
     980             : 
     981           0 :     if (max_replication_slots <= 0)
     982           0 :         return;
     983             : 
     984             : restart:
     985           0 :     LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
     986           0 :     for (i = 0; i < max_replication_slots; i++)
     987             :     {
     988             :         ReplicationSlot *s;
     989             :         char       *slotname;
     990             :         int         active_pid;
     991             : 
     992           0 :         s = &ReplicationSlotCtl->replication_slots[i];
     993             : 
     994             :         /* cannot change while ReplicationSlotCtlLock is held */
     995           0 :         if (!s->in_use)
     996           0 :             continue;
     997             : 
     998             :         /* only logical slots are database specific, skip */
     999           0 :         if (!SlotIsLogical(s))
    1000           0 :             continue;
    1001             : 
    1002             :         /* not our database, skip */
    1003           0 :         if (s->data.database != dboid)
    1004           0 :             continue;
    1005             : 
    1006             :         /* acquire slot, so ReplicationSlotDropAcquired can be reused  */
    1007           0 :         SpinLockAcquire(&s->mutex);
    1008             :         /* can't change while ReplicationSlotControlLock is held */
    1009           0 :         slotname = NameStr(s->data.name);
    1010           0 :         active_pid = s->active_pid;
    1011           0 :         if (active_pid == 0)
    1012             :         {
    1013           0 :             MyReplicationSlot = s;
    1014           0 :             s->active_pid = MyProcPid;
    1015             :         }
    1016           0 :         SpinLockRelease(&s->mutex);
    1017             : 
    1018             :         /*
    1019             :          * Even though we hold an exclusive lock on the database object a
    1020             :          * logical slot for that DB can still be active, e.g. if it's
    1021             :          * concurrently being dropped by a backend connected to another DB.
    1022             :          *
    1023             :          * That's fairly unlikely in practice, so we'll just bail out.
    1024             :          */
    1025           0 :         if (active_pid)
    1026           0 :             ereport(ERROR,
    1027             :                     (errcode(ERRCODE_OBJECT_IN_USE),
    1028             :                      errmsg("replication slot \"%s\" is active for PID %d",
    1029             :                             slotname, active_pid)));
    1030             : 
    1031             :         /*
    1032             :          * To avoid duplicating ReplicationSlotDropAcquired() and to avoid
    1033             :          * holding ReplicationSlotControlLock over filesystem operations,
    1034             :          * release ReplicationSlotControlLock and use
    1035             :          * ReplicationSlotDropAcquired.
    1036             :          *
    1037             :          * As that means the set of slots could change, restart scan from the
    1038             :          * beginning each time we release the lock.
    1039             :          */
    1040           0 :         LWLockRelease(ReplicationSlotControlLock);
    1041           0 :         ReplicationSlotDropAcquired();
    1042           0 :         goto restart;
    1043             :     }
    1044           0 :     LWLockRelease(ReplicationSlotControlLock);
    1045             : }
    1046             : 
    1047             : 
    1048             : /*
    1049             :  * Check whether the server's configuration supports using replication
    1050             :  * slots.
    1051             :  */
    1052             : void
    1053        1010 : CheckSlotRequirements(void)
    1054             : {
    1055             :     /*
    1056             :      * NB: Adding a new requirement likely means that RestoreSlotFromDisk()
    1057             :      * needs the same check.
    1058             :      */
    1059             : 
    1060        1010 :     if (max_replication_slots == 0)
    1061           0 :         ereport(ERROR,
    1062             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    1063             :                  errmsg("replication slots can only be used if max_replication_slots > 0")));
    1064             : 
    1065        1010 :     if (wal_level < WAL_LEVEL_REPLICA)
    1066           0 :         ereport(ERROR,
    1067             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    1068             :                  errmsg("replication slots can only be used if wal_level >= replica")));
    1069        1010 : }
    1070             : 
    1071             : /*
    1072             :  * Reserve WAL for the currently active slot.
    1073             :  *
    1074             :  * Compute and set restart_lsn in a manner that's appropriate for the type of
    1075             :  * the slot and concurrency safe.
    1076             :  */
    1077             : void
    1078         322 : ReplicationSlotReserveWal(void)
    1079             : {
    1080         322 :     ReplicationSlot *slot = MyReplicationSlot;
    1081             : 
    1082         322 :     Assert(slot != NULL);
    1083         322 :     Assert(slot->data.restart_lsn == InvalidXLogRecPtr);
    1084             : 
    1085             :     /*
    1086             :      * The replication slot mechanism is used to prevent removal of required
    1087             :      * WAL. As there is no interlock between this routine and checkpoints, WAL
    1088             :      * segments could concurrently be removed when a now stale return value of
    1089             :      * ReplicationSlotsComputeRequiredLSN() is used. In the unlikely case that
    1090             :      * this happens we'll just retry.
    1091             :      */
    1092             :     while (true)
    1093             :     {
    1094             :         XLogSegNo   segno;
    1095             :         XLogRecPtr  restart_lsn;
    1096             : 
    1097             :         /*
    1098             :          * For logical slots log a standby snapshot and start logical decoding
    1099             :          * at exactly that position. That allows the slot to start up more
    1100             :          * quickly.
    1101             :          *
    1102             :          * That's not needed (or indeed helpful) for physical slots as they'll
    1103             :          * start replay at the last logged checkpoint anyway. Instead return
    1104             :          * the location of the last redo LSN. While that slightly increases
    1105             :          * the chance that we have to retry, it's where a base backup has to
    1106             :          * start replay at.
    1107             :          */
    1108         322 :         if (!RecoveryInProgress() && SlotIsLogical(slot))
    1109         318 :         {
    1110             :             XLogRecPtr  flushptr;
    1111             : 
    1112             :             /* start at current insert position */
    1113         318 :             restart_lsn = GetXLogInsertRecPtr();
    1114         318 :             SpinLockAcquire(&slot->mutex);
    1115         318 :             slot->data.restart_lsn = restart_lsn;
    1116         318 :             SpinLockRelease(&slot->mutex);
    1117             : 
    1118             :             /* make sure we have enough information to start */
    1119         318 :             flushptr = LogStandbySnapshot();
    1120             : 
    1121             :             /* and make sure it's fsynced to disk */
    1122         318 :             XLogFlush(flushptr);
    1123             :         }
    1124             :         else
    1125             :         {
    1126           4 :             restart_lsn = GetRedoRecPtr();
    1127           4 :             SpinLockAcquire(&slot->mutex);
    1128           4 :             slot->data.restart_lsn = restart_lsn;
    1129           4 :             SpinLockRelease(&slot->mutex);
    1130             :         }
    1131             : 
    1132             :         /* prevent WAL removal as fast as possible */
    1133         322 :         ReplicationSlotsComputeRequiredLSN();
    1134             : 
    1135             :         /*
    1136             :          * If all required WAL is still there, great, otherwise retry. The
    1137             :          * slot should prevent further removal of WAL, unless there's a
    1138             :          * concurrent ReplicationSlotsComputeRequiredLSN() after we've written
    1139             :          * the new restart_lsn above, so normally we should never need to loop
    1140             :          * more than twice.
    1141             :          */
    1142         322 :         XLByteToSeg(slot->data.restart_lsn, segno, wal_segment_size);
    1143         322 :         if (XLogGetLastRemovedSegno() < segno)
    1144         322 :             break;
    1145           0 :     }
    1146         322 : }
    1147             : 
    1148             : /*
    1149             :  * Mark any slot that points to an LSN older than the given segment
    1150             :  * as invalid; it requires WAL that's about to be removed.
    1151             :  *
    1152             :  * NB - this runs as part of checkpoint, so avoid raising errors if possible.
    1153             :  */
    1154             : void
    1155         702 : InvalidateObsoleteReplicationSlots(XLogSegNo oldestSegno)
    1156             : {
    1157             :     XLogRecPtr  oldestLSN;
    1158             : 
    1159         702 :     XLogSegNoOffsetToRecPtr(oldestSegno, 0, wal_segment_size, oldestLSN);
    1160             : 
    1161             : restart:
    1162         702 :     LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
    1163       15180 :     for (int i = 0; i < max_replication_slots; i++)
    1164             :     {
    1165        6888 :         ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
    1166        6888 :         XLogRecPtr  restart_lsn = InvalidXLogRecPtr;
    1167             :         NameData    slotname;
    1168             :         int     wspid;
    1169        6888 :         int     last_signaled_pid = 0;
    1170             : 
    1171        6888 :         if (!s->in_use)
    1172       13722 :             continue;
    1173             : 
    1174          54 :         SpinLockAcquire(&s->mutex);
    1175          54 :         slotname = s->data.name;
    1176          54 :         restart_lsn = s->data.restart_lsn;
    1177          54 :         SpinLockRelease(&s->mutex);
    1178             : 
    1179          54 :         if (XLogRecPtrIsInvalid(restart_lsn) || restart_lsn >= oldestLSN)
    1180          54 :             continue;
    1181           0 :         LWLockRelease(ReplicationSlotControlLock);
    1182           0 :         CHECK_FOR_INTERRUPTS();
    1183             : 
    1184             :         /* Get ready to sleep on the slot in case it is active */
    1185           0 :         ConditionVariablePrepareToSleep(&s->active_cv);
    1186             : 
    1187             :         for (;;)
    1188             :         {
    1189             :             /*
    1190             :              * Try to mark this slot as used by this process.
    1191             :              *
    1192             :              * Note that ReplicationSlotAcquireInternal(SAB_Inquire)
    1193             :              * should not cancel the prepared condition variable
    1194             :              * if this slot is active in other process. Because in this case
    1195             :              * we have to wait on that CV for the process owning
    1196             :              * the slot to be terminated, later.
    1197             :              */
    1198           0 :             wspid = ReplicationSlotAcquireInternal(s, NULL, SAB_Inquire);
    1199             : 
    1200             :             /*
    1201             :              * Exit the loop if we successfully acquired the slot or
    1202             :              * the slot was dropped during waiting for the owning process
    1203             :              * to be terminated. For example, the latter case is likely to
    1204             :              * happen when the slot is temporary because it's automatically
    1205             :              * dropped by the termination of the owning process.
    1206             :              */
    1207           0 :             if (wspid <= 0)
    1208           0 :                 break;
    1209             : 
    1210             :             /*
    1211             :              * Signal to terminate the process that owns the slot.
    1212             :              *
    1213             :              * There is the race condition where other process may own
    1214             :              * the slot after the process using it was terminated and before
    1215             :              * this process owns it. To handle this case, we signal again
    1216             :              * if the PID of the owning process is changed than the last.
    1217             :              *
    1218             :              * XXX This logic assumes that the same PID is not reused
    1219             :              * very quickly.
    1220             :              */
    1221           0 :             if (last_signaled_pid != wspid)
    1222             :             {
    1223           0 :                 ereport(LOG,
    1224             :                         (errmsg("terminating process %d because replication slot \"%s\" is too far behind",
    1225             :                                 wspid, NameStr(slotname))));
    1226           0 :                 (void) kill(wspid, SIGTERM);
    1227           0 :                 last_signaled_pid = wspid;
    1228             :             }
    1229             : 
    1230           0 :             ConditionVariableTimedSleep(&s->active_cv, 10,
    1231             :                                         WAIT_EVENT_REPLICATION_SLOT_DROP);
    1232           0 :         }
    1233           0 :         ConditionVariableCancelSleep();
    1234             : 
    1235             :         /*
    1236             :          * Do nothing here and start from scratch if the slot has
    1237             :          * already been dropped.
    1238             :          */
    1239           0 :         if (wspid == -1)
    1240           0 :             goto restart;
    1241             : 
    1242           0 :         ereport(LOG,
    1243             :                 (errmsg("invalidating slot \"%s\" because its restart_lsn %X/%X exceeds max_slot_wal_keep_size",
    1244             :                         NameStr(slotname),
    1245             :                         (uint32) (restart_lsn >> 32),
    1246             :                         (uint32) restart_lsn)));
    1247             : 
    1248           0 :         SpinLockAcquire(&s->mutex);
    1249           0 :         s->data.invalidated_at = s->data.restart_lsn;
    1250           0 :         s->data.restart_lsn = InvalidXLogRecPtr;
    1251           0 :         SpinLockRelease(&s->mutex);
    1252             : 
    1253             :         /* Make sure the invalidated state persists across server restart */
    1254           0 :         ReplicationSlotMarkDirty();
    1255           0 :         ReplicationSlotSave();
    1256           0 :         ReplicationSlotRelease();
    1257             : 
    1258             :         /* if we did anything, start from scratch */
    1259           0 :         goto restart;
    1260             :     }
    1261         702 :     LWLockRelease(ReplicationSlotControlLock);
    1262         702 : }
    1263             : 
    1264             : /*
    1265             :  * Flush all replication slots to disk.
    1266             :  *
    1267             :  * This needn't actually be part of a checkpoint, but it's a convenient
    1268             :  * location.
    1269             :  */
    1270             : void
    1271         702 : CheckPointReplicationSlots(void)
    1272             : {
    1273             :     int         i;
    1274             : 
    1275         702 :     elog(DEBUG1, "performing replication slot checkpoint");
    1276             : 
    1277             :     /*
    1278             :      * Prevent any slot from being created/dropped while we're active. As we
    1279             :      * explicitly do *not* want to block iterating over replication_slots or
    1280             :      * acquiring a slot we cannot take the control lock - but that's OK,
    1281             :      * because holding ReplicationSlotAllocationLock is strictly stronger, and
    1282             :      * enough to guarantee that nobody can change the in_use bits on us.
    1283             :      */
    1284         702 :     LWLockAcquire(ReplicationSlotAllocationLock, LW_SHARED);
    1285             : 
    1286        7590 :     for (i = 0; i < max_replication_slots; i++)
    1287             :     {
    1288        6888 :         ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
    1289             :         char        path[MAXPGPATH];
    1290             : 
    1291        6888 :         if (!s->in_use)
    1292        6834 :             continue;
    1293             : 
    1294             :         /* save the slot to disk, locking is handled in SaveSlotToPath() */
    1295          54 :         sprintf(path, "pg_replslot/%s", NameStr(s->data.name));
    1296          54 :         SaveSlotToPath(s, path, LOG);
    1297             :     }
    1298         702 :     LWLockRelease(ReplicationSlotAllocationLock);
    1299         702 : }
    1300             : 
    1301             : /*
    1302             :  * Load all replication slots from disk into memory at server startup. This
    1303             :  * needs to be run before we start crash recovery.
    1304             :  */
    1305             : void
    1306         316 : StartupReplicationSlots(void)
    1307             : {
    1308             :     DIR        *replication_dir;
    1309             :     struct dirent *replication_de;
    1310             : 
    1311         316 :     elog(DEBUG1, "starting up replication slots");
    1312             : 
    1313             :     /* restore all slots by iterating over all on-disk entries */
    1314         316 :     replication_dir = AllocateDir("pg_replslot");
    1315        1280 :     while ((replication_de = ReadDir(replication_dir, "pg_replslot")) != NULL)
    1316             :     {
    1317             :         struct stat statbuf;
    1318             :         char        path[MAXPGPATH + 12];
    1319             : 
    1320         980 :         if (strcmp(replication_de->d_name, ".") == 0 ||
    1321         332 :             strcmp(replication_de->d_name, "..") == 0)
    1322        1264 :             continue;
    1323             : 
    1324          16 :         snprintf(path, sizeof(path), "pg_replslot/%s", replication_de->d_name);
    1325             : 
    1326             :         /* we're only creating directories here, skip if it's not our's */
    1327          16 :         if (lstat(path, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode))
    1328           0 :             continue;
    1329             : 
    1330             :         /* we crashed while a slot was being setup or deleted, clean up */
    1331          16 :         if (pg_str_endswith(replication_de->d_name, ".tmp"))
    1332             :         {
    1333           0 :             if (!rmtree(path, true))
    1334             :             {
    1335           0 :                 ereport(WARNING,
    1336             :                         (errmsg("could not remove directory \"%s\"",
    1337             :                                 path)));
    1338           0 :                 continue;
    1339             :             }
    1340           0 :             fsync_fname("pg_replslot", true);
    1341           0 :             continue;
    1342             :         }
    1343             : 
    1344             :         /* looks like a slot in a normal state, restore */
    1345          16 :         RestoreSlotFromDisk(replication_de->d_name);
    1346             :     }
    1347         316 :     FreeDir(replication_dir);
    1348             : 
    1349             :     /* currently no slots exist, we're done. */
    1350         316 :     if (max_replication_slots <= 0)
    1351         316 :         return;
    1352             : 
    1353             :     /* Now that we have recovered all the data, compute replication xmin */
    1354         316 :     ReplicationSlotsComputeRequiredXmin(false);
    1355         316 :     ReplicationSlotsComputeRequiredLSN();
    1356             : }
    1357             : 
    1358             : /* ----
    1359             :  * Manipulation of on-disk state of replication slots
    1360             :  *
    1361             :  * NB: none of the routines below should take any notice whether a slot is the
    1362             :  * current one or not, that's all handled a layer above.
    1363             :  * ----
    1364             :  */
    1365             : static void
    1366         352 : CreateSlotOnDisk(ReplicationSlot *slot)
    1367             : {
    1368             :     char        tmppath[MAXPGPATH];
    1369             :     char        path[MAXPGPATH];
    1370             :     struct stat st;
    1371             : 
    1372             :     /*
    1373             :      * No need to take out the io_in_progress_lock, nobody else can see this
    1374             :      * slot yet, so nobody else will write. We're reusing SaveSlotToPath which
    1375             :      * takes out the lock, if we'd take the lock here, we'd deadlock.
    1376             :      */
    1377             : 
    1378         352 :     sprintf(path, "pg_replslot/%s", NameStr(slot->data.name));
    1379         352 :     sprintf(tmppath, "pg_replslot/%s.tmp", NameStr(slot->data.name));
    1380             : 
    1381             :     /*
    1382             :      * It's just barely possible that some previous effort to create or drop a
    1383             :      * slot with this name left a temp directory lying around. If that seems
    1384             :      * to be the case, try to remove it.  If the rmtree() fails, we'll error
    1385             :      * out at the MakePGDirectory() below, so we don't bother checking
    1386             :      * success.
    1387             :      */
    1388         352 :     if (stat(tmppath, &st) == 0 && S_ISDIR(st.st_mode))
    1389           0 :         rmtree(tmppath, true);
    1390             : 
    1391             :     /* Create and fsync the temporary slot directory. */
    1392         352 :     if (MakePGDirectory(tmppath) < 0)
    1393           0 :         ereport(ERROR,
    1394             :                 (errcode_for_file_access(),
    1395             :                  errmsg("could not create directory \"%s\": %m",
    1396             :                         tmppath)));
    1397         352 :     fsync_fname(tmppath, true);
    1398             : 
    1399             :     /* Write the actual state file. */
    1400         352 :     slot->dirty = true;          /* signal that we really need to write */
    1401         352 :     SaveSlotToPath(slot, tmppath, ERROR);
    1402             : 
    1403             :     /* Rename the directory into place. */
    1404         352 :     if (rename(tmppath, path) != 0)
    1405           0 :         ereport(ERROR,
    1406             :                 (errcode_for_file_access(),
    1407             :                  errmsg("could not rename file \"%s\" to \"%s\": %m",
    1408             :                         tmppath, path)));
    1409             : 
    1410             :     /*
    1411             :      * If we'd now fail - really unlikely - we wouldn't know whether this slot
    1412             :      * would persist after an OS crash or not - so, force a restart. The
    1413             :      * restart would try to fsync this again till it works.
    1414             :      */
    1415         352 :     START_CRIT_SECTION();
    1416             : 
    1417         352 :     fsync_fname(path, true);
    1418         352 :     fsync_fname("pg_replslot", true);
    1419             : 
    1420         352 :     END_CRIT_SECTION();
    1421         352 : }
    1422             : 
    1423             : /*
    1424             :  * Shared functionality between saving and creating a replication slot.
    1425             :  */
    1426             : static void
    1427        1126 : SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
    1428             : {
    1429             :     char        tmppath[MAXPGPATH];
    1430             :     char        path[MAXPGPATH];
    1431             :     int         fd;
    1432             :     ReplicationSlotOnDisk cp;
    1433             :     bool        was_dirty;
    1434             : 
    1435             :     /* first check whether there's something to write out */
    1436        1126 :     SpinLockAcquire(&slot->mutex);
    1437        1126 :     was_dirty = slot->dirty;
    1438        1126 :     slot->just_dirtied = false;
    1439        1126 :     SpinLockRelease(&slot->mutex);
    1440             : 
    1441             :     /* and don't do anything if there's nothing to write */
    1442        1126 :     if (!was_dirty)
    1443         104 :         return;
    1444             : 
    1445        1074 :     LWLockAcquire(&slot->io_in_progress_lock, LW_EXCLUSIVE);
    1446             : 
    1447             :     /* silence valgrind :( */
    1448        1074 :     memset(&cp, 0, sizeof(ReplicationSlotOnDisk));
    1449             : 
    1450        1074 :     sprintf(tmppath, "%s/state.tmp", dir);
    1451        1074 :     sprintf(path, "%s/state", dir);
    1452             : 
    1453        1074 :     fd = OpenTransientFile(tmppath, O_CREAT | O_EXCL | O_WRONLY | PG_BINARY);
    1454        1074 :     if (fd < 0)
    1455             :     {
    1456             :         /*
    1457             :          * If not an ERROR, then release the lock before returning.  In case
    1458             :          * of an ERROR, the error recovery path automatically releases the
    1459             :          * lock, but no harm in explicitly releasing even in that case.  Note
    1460             :          * that LWLockRelease() could affect errno.
    1461             :          */
    1462           0 :         int         save_errno = errno;
    1463             : 
    1464           0 :         LWLockRelease(&slot->io_in_progress_lock);
    1465           0 :         errno = save_errno;
    1466           0 :         ereport(elevel,
    1467             :                 (errcode_for_file_access(),
    1468             :                  errmsg("could not create file \"%s\": %m",
    1469             :                         tmppath)));
    1470           0 :         return;
    1471             :     }
    1472             : 
    1473        1074 :     cp.magic = SLOT_MAGIC;
    1474        1074 :     INIT_CRC32C(cp.checksum);
    1475        1074 :     cp.version = SLOT_VERSION;
    1476        1074 :     cp.length = ReplicationSlotOnDiskV2Size;
    1477             : 
    1478        1074 :     SpinLockAcquire(&slot->mutex);
    1479             : 
    1480        1074 :     memcpy(&cp.slotdata, &slot->data, sizeof(ReplicationSlotPersistentData));
    1481             : 
    1482        1074 :     SpinLockRelease(&slot->mutex);
    1483             : 
    1484        1074 :     COMP_CRC32C(cp.checksum,
    1485             :                 (char *) (&cp) + SnapBuildOnDiskNotChecksummedSize,
    1486             :                 SnapBuildOnDiskChecksummedSize);
    1487        1074 :     FIN_CRC32C(cp.checksum);
    1488             : 
    1489        1074 :     errno = 0;
    1490        1074 :     pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_WRITE);
    1491        1074 :     if ((write(fd, &cp, sizeof(cp))) != sizeof(cp))
    1492             :     {
    1493           0 :         int         save_errno = errno;
    1494             : 
    1495           0 :         pgstat_report_wait_end();
    1496           0 :         CloseTransientFile(fd);
    1497           0 :         LWLockRelease(&slot->io_in_progress_lock);
    1498             : 
    1499             :         /* if write didn't set errno, assume problem is no disk space */
    1500           0 :         errno = save_errno ? save_errno : ENOSPC;
    1501           0 :         ereport(elevel,
    1502             :                 (errcode_for_file_access(),
    1503             :                  errmsg("could not write to file \"%s\": %m",
    1504             :                         tmppath)));
    1505           0 :         return;
    1506             :     }
    1507        1074 :     pgstat_report_wait_end();
    1508             : 
    1509             :     /* fsync the temporary file */
    1510        1074 :     pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_SYNC);
    1511        1074 :     if (pg_fsync(fd) != 0)
    1512             :     {
    1513           0 :         int         save_errno = errno;
    1514             : 
    1515           0 :         pgstat_report_wait_end();
    1516           0 :         CloseTransientFile(fd);
    1517           0 :         LWLockRelease(&slot->io_in_progress_lock);
    1518           0 :         errno = save_errno;
    1519           0 :         ereport(elevel,
    1520             :                 (errcode_for_file_access(),
    1521             :                  errmsg("could not fsync file \"%s\": %m",
    1522             :                         tmppath)));
    1523           0 :         return;
    1524             :     }
    1525        1074 :     pgstat_report_wait_end();
    1526             : 
    1527        1074 :     if (CloseTransientFile(fd) != 0)
    1528             :     {
    1529           0 :         int         save_errno = errno;
    1530             : 
    1531           0 :         LWLockRelease(&slot->io_in_progress_lock);
    1532           0 :         errno = save_errno;
    1533           0 :         ereport(elevel,
    1534             :                 (errcode_for_file_access(),
    1535             :                  errmsg("could not close file \"%s\": %m",
    1536             :                         tmppath)));
    1537           0 :         return;
    1538             :     }
    1539             : 
    1540             :     /* rename to permanent file, fsync file and directory */
    1541        1074 :     if (rename(tmppath, path) != 0)
    1542             :     {
    1543           0 :         int         save_errno = errno;
    1544             : 
    1545           0 :         LWLockRelease(&slot->io_in_progress_lock);
    1546           0 :         errno = save_errno;
    1547           0 :         ereport(elevel,
    1548             :                 (errcode_for_file_access(),
    1549             :                  errmsg("could not rename file \"%s\" to \"%s\": %m",
    1550             :                         tmppath, path)));
    1551           0 :         return;
    1552             :     }
    1553             : 
    1554             :     /*
    1555             :      * Check CreateSlotOnDisk() for the reasoning of using a critical section.
    1556             :      */
    1557        1074 :     START_CRIT_SECTION();
    1558             : 
    1559        1074 :     fsync_fname(path, false);
    1560        1074 :     fsync_fname(dir, true);
    1561        1074 :     fsync_fname("pg_replslot", true);
    1562             : 
    1563        1074 :     END_CRIT_SECTION();
    1564             : 
    1565             :     /*
    1566             :      * Successfully wrote, unset dirty bit, unless somebody dirtied again
    1567             :      * already.
    1568             :      */
    1569        1074 :     SpinLockAcquire(&slot->mutex);
    1570        1074 :     if (!slot->just_dirtied)
    1571        1074 :         slot->dirty = false;
    1572        1074 :     SpinLockRelease(&slot->mutex);
    1573             : 
    1574        1074 :     LWLockRelease(&slot->io_in_progress_lock);
    1575             : }
    1576             : 
    1577             : /*
    1578             :  * Load a single slot from disk into memory.
    1579             :  */
    1580             : static void
    1581          16 : RestoreSlotFromDisk(const char *name)
    1582             : {
    1583             :     ReplicationSlotOnDisk cp;
    1584             :     int         i;
    1585             :     char        slotdir[MAXPGPATH + 12];
    1586             :     char        path[MAXPGPATH + 22];
    1587             :     int         fd;
    1588          16 :     bool        restored = false;
    1589             :     int         readBytes;
    1590             :     pg_crc32c   checksum;
    1591             : 
    1592             :     /* no need to lock here, no concurrent access allowed yet */
    1593             : 
    1594             :     /* delete temp file if it exists */
    1595          16 :     sprintf(slotdir, "pg_replslot/%s", name);
    1596          16 :     sprintf(path, "%s/state.tmp", slotdir);
    1597          16 :     if (unlink(path) < 0 && errno != ENOENT)
    1598           0 :         ereport(PANIC,
    1599             :                 (errcode_for_file_access(),
    1600             :                  errmsg("could not remove file \"%s\": %m", path)));
    1601             : 
    1602          16 :     sprintf(path, "%s/state", slotdir);
    1603             : 
    1604          16 :     elog(DEBUG1, "restoring replication slot from \"%s\"", path);
    1605             : 
    1606             :     /* on some operating systems fsyncing a file requires O_RDWR */
    1607          16 :     fd = OpenTransientFile(path, O_RDWR | PG_BINARY);
    1608             : 
    1609             :     /*
    1610             :      * We do not need to handle this as we are rename()ing the directory into
    1611             :      * place only after we fsync()ed the state file.
    1612             :      */
    1613          16 :     if (fd < 0)
    1614           0 :         ereport(PANIC,
    1615             :                 (errcode_for_file_access(),
    1616             :                  errmsg("could not open file \"%s\": %m", path)));
    1617             : 
    1618             :     /*
    1619             :      * Sync state file before we're reading from it. We might have crashed
    1620             :      * while it wasn't synced yet and we shouldn't continue on that basis.
    1621             :      */
    1622          16 :     pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_RESTORE_SYNC);
    1623          16 :     if (pg_fsync(fd) != 0)
    1624           0 :         ereport(PANIC,
    1625             :                 (errcode_for_file_access(),
    1626             :                  errmsg("could not fsync file \"%s\": %m",
    1627             :                         path)));
    1628          16 :     pgstat_report_wait_end();
    1629             : 
    1630             :     /* Also sync the parent directory */
    1631          16 :     START_CRIT_SECTION();
    1632          16 :     fsync_fname(slotdir, true);
    1633          16 :     END_CRIT_SECTION();
    1634             : 
    1635             :     /* read part of statefile that's guaranteed to be version independent */
    1636          16 :     pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_READ);
    1637          16 :     readBytes = read(fd, &cp, ReplicationSlotOnDiskConstantSize);
    1638          16 :     pgstat_report_wait_end();
    1639          16 :     if (readBytes != ReplicationSlotOnDiskConstantSize)
    1640             :     {
    1641           0 :         if (readBytes < 0)
    1642           0 :             ereport(PANIC,
    1643             :                     (errcode_for_file_access(),
    1644             :                      errmsg("could not read file \"%s\": %m", path)));
    1645             :         else
    1646           0 :             ereport(PANIC,
    1647             :                     (errcode(ERRCODE_DATA_CORRUPTED),
    1648             :                      errmsg("could not read file \"%s\": read %d of %zu",
    1649             :                             path, readBytes,
    1650             :                             (Size) ReplicationSlotOnDiskConstantSize)));
    1651             :     }
    1652             : 
    1653             :     /* verify magic */
    1654          16 :     if (cp.magic != SLOT_MAGIC)
    1655           0 :         ereport(PANIC,
    1656             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1657             :                  errmsg("replication slot file \"%s\" has wrong magic number: %u instead of %u",
    1658             :                         path, cp.magic, SLOT_MAGIC)));
    1659             : 
    1660             :     /* verify version */
    1661          16 :     if (cp.version != SLOT_VERSION)
    1662           0 :         ereport(PANIC,
    1663             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1664             :                  errmsg("replication slot file \"%s\" has unsupported version %u",
    1665             :                         path, cp.version)));
    1666             : 
    1667             :     /* boundary check on length */
    1668          16 :     if (cp.length != ReplicationSlotOnDiskV2Size)
    1669           0 :         ereport(PANIC,
    1670             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1671             :                  errmsg("replication slot file \"%s\" has corrupted length %u",
    1672             :                         path, cp.length)));
    1673             : 
    1674             :     /* Now that we know the size, read the entire file */
    1675          16 :     pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_READ);
    1676          16 :     readBytes = read(fd,
    1677             :                      (char *) &cp + ReplicationSlotOnDiskConstantSize,
    1678          16 :                      cp.length);
    1679          16 :     pgstat_report_wait_end();
    1680          16 :     if (readBytes != cp.length)
    1681             :     {
    1682           0 :         if (readBytes < 0)
    1683           0 :             ereport(PANIC,
    1684             :                     (errcode_for_file_access(),
    1685             :                      errmsg("could not read file \"%s\": %m", path)));
    1686             :         else
    1687           0 :             ereport(PANIC,
    1688             :                     (errcode(ERRCODE_DATA_CORRUPTED),
    1689             :                      errmsg("could not read file \"%s\": read %d of %zu",
    1690             :                             path, readBytes, (Size) cp.length)));
    1691             :     }
    1692             : 
    1693          16 :     if (CloseTransientFile(fd) != 0)
    1694           0 :         ereport(PANIC,
    1695             :                 (errcode_for_file_access(),
    1696             :                  errmsg("could not close file \"%s\": %m", path)));
    1697             : 
    1698             :     /* now verify the CRC */
    1699          16 :     INIT_CRC32C(checksum);
    1700          16 :     COMP_CRC32C(checksum,
    1701             :                 (char *) &cp + SnapBuildOnDiskNotChecksummedSize,
    1702             :                 SnapBuildOnDiskChecksummedSize);
    1703          16 :     FIN_CRC32C(checksum);
    1704             : 
    1705          16 :     if (!EQ_CRC32C(checksum, cp.checksum))
    1706           0 :         ereport(PANIC,
    1707             :                 (errmsg("checksum mismatch for replication slot file \"%s\": is %u, should be %u",
    1708             :                         path, checksum, cp.checksum)));
    1709             : 
    1710             :     /*
    1711             :      * If we crashed with an ephemeral slot active, don't restore but delete
    1712             :      * it.
    1713             :      */
    1714          16 :     if (cp.slotdata.persistency != RS_PERSISTENT)
    1715             :     {
    1716           0 :         if (!rmtree(slotdir, true))
    1717             :         {
    1718           0 :             ereport(WARNING,
    1719             :                     (errmsg("could not remove directory \"%s\"",
    1720             :                             slotdir)));
    1721             :         }
    1722           0 :         fsync_fname("pg_replslot", true);
    1723          16 :         return;
    1724             :     }
    1725             : 
    1726             :     /*
    1727             :      * Verify that requirements for the specific slot type are met. That's
    1728             :      * important because if these aren't met we're not guaranteed to retain
    1729             :      * all the necessary resources for the slot.
    1730             :      *
    1731             :      * NB: We have to do so *after* the above checks for ephemeral slots,
    1732             :      * because otherwise a slot that shouldn't exist anymore could prevent
    1733             :      * restarts.
    1734             :      *
    1735             :      * NB: Changing the requirements here also requires adapting
    1736             :      * CheckSlotRequirements() and CheckLogicalDecodingRequirements().
    1737             :      */
    1738          16 :     if (cp.slotdata.database != InvalidOid && wal_level < WAL_LEVEL_LOGICAL)
    1739           0 :         ereport(FATAL,
    1740             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    1741             :                  errmsg("logical replication slot \"%s\" exists, but wal_level < logical",
    1742             :                         NameStr(cp.slotdata.name)),
    1743             :                  errhint("Change wal_level to be logical or higher.")));
    1744          16 :     else if (wal_level < WAL_LEVEL_REPLICA)
    1745           0 :         ereport(FATAL,
    1746             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    1747             :                  errmsg("physical replication slot \"%s\" exists, but wal_level < replica",
    1748             :                         NameStr(cp.slotdata.name)),
    1749             :                  errhint("Change wal_level to be replica or higher.")));
    1750             : 
    1751             :     /* nothing can be active yet, don't lock anything */
    1752          32 :     for (i = 0; i < max_replication_slots; i++)
    1753             :     {
    1754             :         ReplicationSlot *slot;
    1755             : 
    1756          16 :         slot = &ReplicationSlotCtl->replication_slots[i];
    1757             : 
    1758          16 :         if (slot->in_use)
    1759           0 :             continue;
    1760             : 
    1761             :         /* restore the entire set of persistent data */
    1762          16 :         memcpy(&slot->data, &cp.slotdata,
    1763             :                sizeof(ReplicationSlotPersistentData));
    1764             : 
    1765             :         /* initialize in memory state */
    1766          16 :         slot->effective_xmin = cp.slotdata.xmin;
    1767          16 :         slot->effective_catalog_xmin = cp.slotdata.catalog_xmin;
    1768             : 
    1769          16 :         slot->candidate_catalog_xmin = InvalidTransactionId;
    1770          16 :         slot->candidate_xmin_lsn = InvalidXLogRecPtr;
    1771          16 :         slot->candidate_restart_lsn = InvalidXLogRecPtr;
    1772          16 :         slot->candidate_restart_valid = InvalidXLogRecPtr;
    1773             : 
    1774          16 :         slot->in_use = true;
    1775          16 :         slot->active_pid = 0;
    1776             : 
    1777          16 :         restored = true;
    1778          16 :         break;
    1779             :     }
    1780             : 
    1781          16 :     if (!restored)
    1782           0 :         ereport(FATAL,
    1783             :                 (errmsg("too many replication slots active before shutdown"),
    1784             :                  errhint("Increase max_replication_slots and try again.")));
    1785             : }

Generated by: LCOV version 1.14