LCOV - code coverage report
Current view: top level - src/backend/replication/logical - tablesync.c (source / functions) Hit Total Coverage
Test: PostgreSQL 14devel Lines: 286 312 91.7 %
Date: 2020-10-28 11:24:57 Functions: 12 12 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  * tablesync.c
       3             :  *    PostgreSQL logical replication: initial table data synchronization
       4             :  *
       5             :  * Copyright (c) 2012-2020, PostgreSQL Global Development Group
       6             :  *
       7             :  * IDENTIFICATION
       8             :  *    src/backend/replication/logical/tablesync.c
       9             :  *
      10             :  * NOTES
      11             :  *    This file contains code for initial table data synchronization for
      12             :  *    logical replication.
      13             :  *
      14             :  *    The initial data synchronization is done separately for each table,
      15             :  *    in a separate apply worker that only fetches the initial snapshot data
      16             :  *    from the publisher and then synchronizes the position in the stream with
      17             :  *    the main apply worker.
      18             :  *
      19             :  *    There are several reasons for doing the synchronization this way:
      20             :  *     - It allows us to parallelize the initial data synchronization
      21             :  *       which lowers the time needed for it to happen.
      22             :  *     - The initial synchronization does not have to hold the xid and LSN
      23             :  *       for the time it takes to copy data of all tables, causing less
      24             :  *       bloat and lower disk consumption compared to doing the
      25             :  *       synchronization in a single process for the whole database.
      26             :  *     - It allows us to synchronize any tables added after the initial
      27             :  *       synchronization has finished.
      28             :  *
      29             :  *    The stream position synchronization works in multiple steps:
      30             :  *     - Apply worker requests a tablesync worker to start, setting the new
      31             :  *       table state to INIT.
      32             :  *     - Tablesync worker starts; changes table state from INIT to DATASYNC while
      33             :  *       copying.
      34             :  *     - Tablesync worker finishes the copy and sets table state to SYNCWAIT;
      35             :  *       waits for state change.
      36             :  *     - Apply worker periodically checks for tables in SYNCWAIT state.  When
      37             :  *       any appear, it sets the table state to CATCHUP and starts loop-waiting
      38             :  *       until either the table state is set to SYNCDONE or the sync worker
      39             :  *       exits.
      40             :  *     - After the sync worker has seen the state change to CATCHUP, it will
      41             :  *       read the stream and apply changes (acting like an apply worker) until
      42             :  *       it catches up to the specified stream position.  Then it sets the
      43             :  *       state to SYNCDONE.  There might be zero changes applied between
      44             :  *       CATCHUP and SYNCDONE, because the sync worker might be ahead of the
      45             :  *       apply worker.
      46             :  *     - Once the state is set to SYNCDONE, the apply will continue tracking
      47             :  *       the table until it reaches the SYNCDONE stream position, at which
      48             :  *       point it sets state to READY and stops tracking.  Again, there might
      49             :  *       be zero changes in between.
      50             :  *
      51             :  *    So the state progression is always: INIT -> DATASYNC -> SYNCWAIT ->
      52             :  *    CATCHUP -> SYNCDONE -> READY.
      53             :  *
      54             :  *    The catalog pg_subscription_rel is used to keep information about
      55             :  *    subscribed tables and their state.  Some transient state during data
      56             :  *    synchronization is kept in shared memory.  The states SYNCWAIT and
      57             :  *    CATCHUP only appear in memory.
      58             :  *
      59             :  *    Example flows look like this:
      60             :  *     - Apply is in front:
      61             :  *        sync:8
      62             :  *          -> set in memory SYNCWAIT
      63             :  *        apply:10
      64             :  *          -> set in memory CATCHUP
      65             :  *          -> enter wait-loop
      66             :  *        sync:10
      67             :  *          -> set in catalog SYNCDONE
      68             :  *          -> exit
      69             :  *        apply:10
      70             :  *          -> exit wait-loop
      71             :  *          -> continue rep
      72             :  *        apply:11
      73             :  *          -> set in catalog READY
      74             :  *
      75             :  *     - Sync is in front:
      76             :  *        sync:10
      77             :  *          -> set in memory SYNCWAIT
      78             :  *        apply:8
      79             :  *          -> set in memory CATCHUP
      80             :  *          -> continue per-table filtering
      81             :  *        sync:10
      82             :  *          -> set in catalog SYNCDONE
      83             :  *          -> exit
      84             :  *        apply:10
      85             :  *          -> set in catalog READY
      86             :  *          -> stop per-table filtering
      87             :  *          -> continue rep
      88             :  *-------------------------------------------------------------------------
      89             :  */
      90             : 
      91             : #include "postgres.h"
      92             : 
      93             : #include "access/table.h"
      94             : #include "access/xact.h"
      95             : #include "catalog/pg_subscription_rel.h"
      96             : #include "catalog/pg_type.h"
      97             : #include "commands/copy.h"
      98             : #include "miscadmin.h"
      99             : #include "parser/parse_relation.h"
     100             : #include "pgstat.h"
     101             : #include "replication/logicallauncher.h"
     102             : #include "replication/logicalrelation.h"
     103             : #include "replication/walreceiver.h"
     104             : #include "replication/worker_internal.h"
     105             : #include "storage/ipc.h"
     106             : #include "utils/builtins.h"
     107             : #include "utils/lsyscache.h"
     108             : #include "utils/memutils.h"
     109             : #include "utils/snapmgr.h"
     110             : 
     111             : static bool table_states_valid = false;
     112             : 
     113             : StringInfo  copybuf = NULL;
     114             : 
     115             : /*
     116             :  * Exit routine for synchronization worker.
     117             :  */
     118             : static void
     119             : pg_attribute_noreturn()
     120         122 : finish_sync_worker(void)
     121             : {
     122             :     /*
     123             :      * Commit any outstanding transaction. This is the usual case, unless
     124             :      * there was nothing to do for the table.
     125             :      */
     126         122 :     if (IsTransactionState())
     127             :     {
     128         122 :         CommitTransactionCommand();
     129         122 :         pgstat_report_stat(false);
     130             :     }
     131             : 
     132             :     /* And flush all writes. */
     133         122 :     XLogFlush(GetXLogWriteRecPtr());
     134             : 
     135         122 :     StartTransactionCommand();
     136         122 :     ereport(LOG,
     137             :             (errmsg("logical replication table synchronization worker for subscription \"%s\", table \"%s\" has finished",
     138             :                     MySubscription->name,
     139             :                     get_rel_name(MyLogicalRepWorker->relid))));
     140         122 :     CommitTransactionCommand();
     141             : 
     142             :     /* Find the main apply worker and signal it. */
     143         122 :     logicalrep_worker_wakeup(MyLogicalRepWorker->subid, InvalidOid);
     144             : 
     145             :     /* Stop gracefully */
     146         122 :     proc_exit(0);
     147             : }
     148             : 
     149             : /*
     150             :  * Wait until the relation sync state is set in the catalog to the expected
     151             :  * one; return true when it happens.
     152             :  *
     153             :  * Returns false if the table sync worker or the table itself have
     154             :  * disappeared, or the table state has been reset.
     155             :  *
     156             :  * Currently, this is used in the apply worker when transitioning from
     157             :  * CATCHUP state to SYNCDONE.
     158             :  */
     159             : static bool
     160         238 : wait_for_relation_state_change(Oid relid, char expected_state)
     161             : {
     162             :     char        state;
     163             : 
     164             :     for (;;)
     165             :     {
     166             :         LogicalRepWorker *worker;
     167             :         XLogRecPtr  statelsn;
     168             : 
     169         238 :         CHECK_FOR_INTERRUPTS();
     170             : 
     171         238 :         InvalidateCatalogSnapshot();
     172         238 :         state = GetSubscriptionRelState(MyLogicalRepWorker->subid,
     173             :                                         relid, &statelsn);
     174             : 
     175         238 :         if (state == SUBREL_STATE_UNKNOWN)
     176         112 :             break;
     177             : 
     178         238 :         if (state == expected_state)
     179           0 :             return true;
     180             : 
     181             :         /* Check if the sync worker is still running and bail if not. */
     182         238 :         LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
     183         238 :         worker = logicalrep_worker_find(MyLogicalRepWorker->subid, relid,
     184             :                                         false);
     185         238 :         LWLockRelease(LogicalRepWorkerLock);
     186         238 :         if (!worker)
     187         112 :             break;
     188             : 
     189         126 :         (void) WaitLatch(MyLatch,
     190             :                          WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
     191             :                          1000L, WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE);
     192             : 
     193         126 :         ResetLatch(MyLatch);
     194         126 :     }
     195             : 
     196         112 :     return false;
     197             : }
     198             : 
     199             : /*
     200             :  * Wait until the apply worker changes the state of our synchronization
     201             :  * worker to the expected one.
     202             :  *
     203             :  * Used when transitioning from SYNCWAIT state to CATCHUP.
     204             :  *
     205             :  * Returns false if the apply worker has disappeared.
     206             :  */
     207             : static bool
     208         244 : wait_for_worker_state_change(char expected_state)
     209             : {
     210             :     int         rc;
     211             : 
     212             :     for (;;)
     213             :     {
     214             :         LogicalRepWorker *worker;
     215             : 
     216         244 :         CHECK_FOR_INTERRUPTS();
     217             : 
     218             :         /*
     219             :          * Done if already in correct state.  (We assume this fetch is atomic
     220             :          * enough to not give a misleading answer if we do it with no lock.)
     221             :          */
     222         244 :         if (MyLogicalRepWorker->relstate == expected_state)
     223         122 :             return true;
     224             : 
     225             :         /*
     226             :          * Bail out if the apply worker has died, else signal it we're
     227             :          * waiting.
     228             :          */
     229         122 :         LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
     230         122 :         worker = logicalrep_worker_find(MyLogicalRepWorker->subid,
     231             :                                         InvalidOid, false);
     232         122 :         if (worker && worker->proc)
     233         122 :             logicalrep_worker_wakeup_ptr(worker);
     234         122 :         LWLockRelease(LogicalRepWorkerLock);
     235         122 :         if (!worker)
     236           0 :             break;
     237             : 
     238             :         /*
     239             :          * Wait.  We expect to get a latch signal back from the apply worker,
     240             :          * but use a timeout in case it dies without sending one.
     241             :          */
     242         122 :         rc = WaitLatch(MyLatch,
     243             :                        WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
     244             :                        1000L, WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE);
     245             : 
     246         122 :         if (rc & WL_LATCH_SET)
     247         122 :             ResetLatch(MyLatch);
     248         122 :     }
     249             : 
     250           0 :     return false;
     251             : }
     252             : 
     253             : /*
     254             :  * Callback from syscache invalidation.
     255             :  */
     256             : void
     257         562 : invalidate_syncing_table_states(Datum arg, int cacheid, uint32 hashvalue)
     258             : {
     259         562 :     table_states_valid = false;
     260         562 : }
     261             : 
     262             : /*
     263             :  * Handle table synchronization cooperation from the synchronization
     264             :  * worker.
     265             :  *
     266             :  * If the sync worker is in CATCHUP state and reached (or passed) the
     267             :  * predetermined synchronization point in the WAL stream, mark the table as
     268             :  * SYNCDONE and finish.
     269             :  */
     270             : static void
     271         130 : process_syncing_tables_for_sync(XLogRecPtr current_lsn)
     272             : {
     273         130 :     Assert(IsTransactionState());
     274             : 
     275         130 :     SpinLockAcquire(&MyLogicalRepWorker->relmutex);
     276             : 
     277         260 :     if (MyLogicalRepWorker->relstate == SUBREL_STATE_CATCHUP &&
     278         130 :         current_lsn >= MyLogicalRepWorker->relstate_lsn)
     279             :     {
     280             :         TimeLineID  tli;
     281             : 
     282         122 :         MyLogicalRepWorker->relstate = SUBREL_STATE_SYNCDONE;
     283         122 :         MyLogicalRepWorker->relstate_lsn = current_lsn;
     284             : 
     285         122 :         SpinLockRelease(&MyLogicalRepWorker->relmutex);
     286             : 
     287         366 :         UpdateSubscriptionRelState(MyLogicalRepWorker->subid,
     288         122 :                                    MyLogicalRepWorker->relid,
     289         122 :                                    MyLogicalRepWorker->relstate,
     290         122 :                                    MyLogicalRepWorker->relstate_lsn);
     291             : 
     292         122 :         walrcv_endstreaming(wrconn, &tli);
     293         122 :         finish_sync_worker();
     294             :     }
     295             :     else
     296           8 :         SpinLockRelease(&MyLogicalRepWorker->relmutex);
     297           8 : }
     298             : 
     299             : /*
     300             :  * Handle table synchronization cooperation from the apply worker.
     301             :  *
     302             :  * Walk over all subscription tables that are individually tracked by the
     303             :  * apply process (currently, all that have state other than
     304             :  * SUBREL_STATE_READY) and manage synchronization for them.
     305             :  *
     306             :  * If there are tables that need synchronizing and are not being synchronized
     307             :  * yet, start sync workers for them (if there are free slots for sync
     308             :  * workers).  To prevent starting the sync worker for the same relation at a
     309             :  * high frequency after a failure, we store its last start time with each sync
     310             :  * state info.  We start the sync worker for the same relation after waiting
     311             :  * at least wal_retrieve_retry_interval.
     312             :  *
     313             :  * For tables that are being synchronized already, check if sync workers
     314             :  * either need action from the apply worker or have finished.  This is the
     315             :  * SYNCWAIT to CATCHUP transition.
     316             :  *
     317             :  * If the synchronization position is reached (SYNCDONE), then the table can
     318             :  * be marked as READY and is no longer tracked.
     319             :  */
     320             : static void
     321        5448 : process_syncing_tables_for_apply(XLogRecPtr current_lsn)
     322             : {
     323             :     struct tablesync_start_time_mapping
     324             :     {
     325             :         Oid         relid;
     326             :         TimestampTz last_start_time;
     327             :     };
     328             :     static List *table_states = NIL;
     329             :     static HTAB *last_start_times = NULL;
     330             :     ListCell   *lc;
     331        5448 :     bool        started_tx = false;
     332             : 
     333        5448 :     Assert(!IsTransactionState());
     334             : 
     335             :     /* We need up-to-date sync state info for subscription tables here. */
     336        5448 :     if (!table_states_valid)
     337             :     {
     338             :         MemoryContext oldctx;
     339             :         List       *rstates;
     340             :         ListCell   *lc;
     341             :         SubscriptionRelState *rstate;
     342             : 
     343             :         /* Clean the old list. */
     344         366 :         list_free_deep(table_states);
     345         366 :         table_states = NIL;
     346             : 
     347         366 :         StartTransactionCommand();
     348         366 :         started_tx = true;
     349             : 
     350             :         /* Fetch all non-ready tables. */
     351         366 :         rstates = GetSubscriptionNotReadyRelations(MySubscription->oid);
     352             : 
     353             :         /* Allocate the tracking info in a permanent memory context. */
     354         366 :         oldctx = MemoryContextSwitchTo(CacheMemoryContext);
     355        1180 :         foreach(lc, rstates)
     356             :         {
     357         814 :             rstate = palloc(sizeof(SubscriptionRelState));
     358         814 :             memcpy(rstate, lfirst(lc), sizeof(SubscriptionRelState));
     359         814 :             table_states = lappend(table_states, rstate);
     360             :         }
     361         366 :         MemoryContextSwitchTo(oldctx);
     362             : 
     363         366 :         table_states_valid = true;
     364             :     }
     365             : 
     366             :     /*
     367             :      * Prepare a hash table for tracking last start times of workers, to avoid
     368             :      * immediate restarts.  We don't need it if there are no tables that need
     369             :      * syncing.
     370             :      */
     371        5448 :     if (table_states && !last_start_times)
     372          52 :     {
     373             :         HASHCTL     ctl;
     374             : 
     375          52 :         memset(&ctl, 0, sizeof(ctl));
     376          52 :         ctl.keysize = sizeof(Oid);
     377          52 :         ctl.entrysize = sizeof(struct tablesync_start_time_mapping);
     378          52 :         last_start_times = hash_create("Logical replication table sync worker start times",
     379             :                                        256, &ctl, HASH_ELEM | HASH_BLOBS);
     380             :     }
     381             : 
     382             :     /*
     383             :      * Clean up the hash table when we're done with all tables (just to
     384             :      * release the bit of memory).
     385             :      */
     386        5396 :     else if (!table_states && last_start_times)
     387             :     {
     388          48 :         hash_destroy(last_start_times);
     389          48 :         last_start_times = NULL;
     390             :     }
     391             : 
     392             :     /*
     393             :      * Process all tables that are being synchronized.
     394             :      */
     395        6474 :     foreach(lc, table_states)
     396             :     {
     397        1026 :         SubscriptionRelState *rstate = (SubscriptionRelState *) lfirst(lc);
     398             : 
     399        1026 :         if (rstate->state == SUBREL_STATE_SYNCDONE)
     400             :         {
     401             :             /*
     402             :              * Apply has caught up to the position where the table sync has
     403             :              * finished.  Mark the table as ready so that the apply will just
     404             :              * continue to replicate it normally.
     405             :              */
     406         110 :             if (current_lsn >= rstate->lsn)
     407             :             {
     408         110 :                 rstate->state = SUBREL_STATE_READY;
     409         110 :                 rstate->lsn = current_lsn;
     410         110 :                 if (!started_tx)
     411             :                 {
     412           0 :                     StartTransactionCommand();
     413           0 :                     started_tx = true;
     414             :                 }
     415             : 
     416         220 :                 UpdateSubscriptionRelState(MyLogicalRepWorker->subid,
     417         110 :                                            rstate->relid, rstate->state,
     418             :                                            rstate->lsn);
     419             :             }
     420             :         }
     421             :         else
     422             :         {
     423             :             LogicalRepWorker *syncworker;
     424             : 
     425             :             /*
     426             :              * Look for a sync worker for this relation.
     427             :              */
     428         916 :             LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
     429             : 
     430         916 :             syncworker = logicalrep_worker_find(MyLogicalRepWorker->subid,
     431             :                                                 rstate->relid, false);
     432             : 
     433         916 :             if (syncworker)
     434             :             {
     435             :                 /* Found one, update our copy of its state */
     436         268 :                 SpinLockAcquire(&syncworker->relmutex);
     437         268 :                 rstate->state = syncworker->relstate;
     438         268 :                 rstate->lsn = syncworker->relstate_lsn;
     439         268 :                 if (rstate->state == SUBREL_STATE_SYNCWAIT)
     440             :                 {
     441             :                     /*
     442             :                      * Sync worker is waiting for apply.  Tell sync worker it
     443             :                      * can catchup now.
     444             :                      */
     445         112 :                     syncworker->relstate = SUBREL_STATE_CATCHUP;
     446         112 :                     syncworker->relstate_lsn =
     447         112 :                         Max(syncworker->relstate_lsn, current_lsn);
     448             :                 }
     449         268 :                 SpinLockRelease(&syncworker->relmutex);
     450             : 
     451             :                 /* If we told worker to catch up, wait for it. */
     452         268 :                 if (rstate->state == SUBREL_STATE_SYNCWAIT)
     453             :                 {
     454             :                     /* Signal the sync worker, as it may be waiting for us. */
     455         112 :                     if (syncworker->proc)
     456         112 :                         logicalrep_worker_wakeup_ptr(syncworker);
     457             : 
     458             :                     /* Now safe to release the LWLock */
     459         112 :                     LWLockRelease(LogicalRepWorkerLock);
     460             : 
     461             :                     /*
     462             :                      * Enter busy loop and wait for synchronization worker to
     463             :                      * reach expected state (or die trying).
     464             :                      */
     465         112 :                     if (!started_tx)
     466             :                     {
     467          54 :                         StartTransactionCommand();
     468          54 :                         started_tx = true;
     469             :                     }
     470             : 
     471         112 :                     wait_for_relation_state_change(rstate->relid,
     472             :                                                    SUBREL_STATE_SYNCDONE);
     473             :                 }
     474             :                 else
     475         156 :                     LWLockRelease(LogicalRepWorkerLock);
     476             :             }
     477             :             else
     478             :             {
     479             :                 /*
     480             :                  * If there is no sync worker for this table yet, count
     481             :                  * running sync workers for this subscription, while we have
     482             :                  * the lock.
     483             :                  */
     484         648 :                 int         nsyncworkers =
     485         648 :                 logicalrep_sync_worker_count(MyLogicalRepWorker->subid);
     486             : 
     487             :                 /* Now safe to release the LWLock */
     488         648 :                 LWLockRelease(LogicalRepWorkerLock);
     489             : 
     490             :                 /*
     491             :                  * If there are free sync worker slot(s), start a new sync
     492             :                  * worker for the table.
     493             :                  */
     494         648 :                 if (nsyncworkers < max_sync_workers_per_subscription)
     495             :                 {
     496         132 :                     TimestampTz now = GetCurrentTimestamp();
     497             :                     struct tablesync_start_time_mapping *hentry;
     498             :                     bool        found;
     499             : 
     500         132 :                     hentry = hash_search(last_start_times, &rstate->relid,
     501             :                                          HASH_ENTER, &found);
     502             : 
     503         152 :                     if (!found ||
     504          20 :                         TimestampDifferenceExceeds(hentry->last_start_time, now,
     505             :                                                    wal_retrieve_retry_interval))
     506             :                     {
     507         366 :                         logicalrep_worker_launch(MyLogicalRepWorker->dbid,
     508         122 :                                                  MySubscription->oid,
     509         122 :                                                  MySubscription->name,
     510         122 :                                                  MyLogicalRepWorker->userid,
     511             :                                                  rstate->relid);
     512         122 :                         hentry->last_start_time = now;
     513             :                     }
     514             :                 }
     515             :             }
     516             :         }
     517             :     }
     518             : 
     519        5448 :     if (started_tx)
     520             :     {
     521         420 :         CommitTransactionCommand();
     522         420 :         pgstat_report_stat(false);
     523             :     }
     524        5448 : }
     525             : 
     526             : /*
     527             :  * Process possible state change(s) of tables that are being synchronized.
     528             :  */
     529             : void
     530        5578 : process_syncing_tables(XLogRecPtr current_lsn)
     531             : {
     532        5578 :     if (am_tablesync_worker())
     533         130 :         process_syncing_tables_for_sync(current_lsn);
     534             :     else
     535        5448 :         process_syncing_tables_for_apply(current_lsn);
     536        5456 : }
     537             : 
     538             : /*
     539             :  * Create list of columns for COPY based on logical relation mapping.
     540             :  */
     541             : static List *
     542         126 : make_copy_attnamelist(LogicalRepRelMapEntry *rel)
     543             : {
     544         126 :     List       *attnamelist = NIL;
     545             :     int         i;
     546             : 
     547         350 :     for (i = 0; i < rel->remoterel.natts; i++)
     548             :     {
     549         224 :         attnamelist = lappend(attnamelist,
     550         224 :                               makeString(rel->remoterel.attnames[i]));
     551             :     }
     552             : 
     553             : 
     554         126 :     return attnamelist;
     555             : }
     556             : 
     557             : /*
     558             :  * Data source callback for the COPY FROM, which reads from the remote
     559             :  * connection and passes the data back to our local COPY.
     560             :  */
     561             : static int
     562       26418 : copy_read_data(void *outbuf, int minread, int maxread)
     563             : {
     564       26418 :     int         bytesread = 0;
     565             :     int         avail;
     566             : 
     567             :     /* If there are some leftover data from previous read, use it. */
     568       26418 :     avail = copybuf->len - copybuf->cursor;
     569       26418 :     if (avail)
     570             :     {
     571           0 :         if (avail > maxread)
     572           0 :             avail = maxread;
     573           0 :         memcpy(outbuf, &copybuf->data[copybuf->cursor], avail);
     574           0 :         copybuf->cursor += avail;
     575           0 :         maxread -= avail;
     576           0 :         bytesread += avail;
     577             :     }
     578             : 
     579       52836 :     while (maxread > 0 && bytesread < minread)
     580             :     {
     581       26418 :         pgsocket    fd = PGINVALID_SOCKET;
     582             :         int         len;
     583       26418 :         char       *buf = NULL;
     584             : 
     585             :         for (;;)
     586             :         {
     587             :             /* Try read the data. */
     588       26418 :             len = walrcv_receive(wrconn, &buf, &fd);
     589             : 
     590       26418 :             CHECK_FOR_INTERRUPTS();
     591             : 
     592       26418 :             if (len == 0)
     593           0 :                 break;
     594       26418 :             else if (len < 0)
     595       26544 :                 return bytesread;
     596             :             else
     597             :             {
     598             :                 /* Process the data */
     599       26292 :                 copybuf->data = buf;
     600       26292 :                 copybuf->len = len;
     601       26292 :                 copybuf->cursor = 0;
     602             : 
     603       26292 :                 avail = copybuf->len - copybuf->cursor;
     604       26292 :                 if (avail > maxread)
     605           0 :                     avail = maxread;
     606       26292 :                 memcpy(outbuf, &copybuf->data[copybuf->cursor], avail);
     607       26292 :                 outbuf = (void *) ((char *) outbuf + avail);
     608       26292 :                 copybuf->cursor += avail;
     609       26292 :                 maxread -= avail;
     610       26292 :                 bytesread += avail;
     611             :             }
     612             : 
     613       26292 :             if (maxread <= 0 || bytesread >= minread)
     614       26292 :                 return bytesread;
     615           0 :         }
     616             : 
     617             :         /*
     618             :          * Wait for more data or latch.
     619             :          */
     620           0 :         (void) WaitLatchOrSocket(MyLatch,
     621             :                                  WL_SOCKET_READABLE | WL_LATCH_SET |
     622             :                                  WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
     623             :                                  fd, 1000L, WAIT_EVENT_LOGICAL_SYNC_DATA);
     624             : 
     625           0 :         ResetLatch(MyLatch);
     626             :     }
     627             : 
     628           0 :     return bytesread;
     629             : }
     630             : 
     631             : 
     632             : /*
     633             :  * Get information about remote relation in similar fashion the RELATION
     634             :  * message provides during replication.
     635             :  */
     636             : static void
     637         126 : fetch_remote_table_info(char *nspname, char *relname,
     638             :                         LogicalRepRelation *lrel)
     639             : {
     640             :     WalRcvExecResult *res;
     641             :     StringInfoData cmd;
     642             :     TupleTableSlot *slot;
     643         126 :     Oid         tableRow[] = {OIDOID, CHAROID, CHAROID};
     644         126 :     Oid         attrRow[] = {TEXTOID, OIDOID, INT4OID, BOOLOID};
     645             :     bool        isnull;
     646             :     int         natt;
     647             : 
     648         126 :     lrel->nspname = nspname;
     649         126 :     lrel->relname = relname;
     650             : 
     651             :     /* First fetch Oid and replica identity. */
     652         126 :     initStringInfo(&cmd);
     653         126 :     appendStringInfo(&cmd, "SELECT c.oid, c.relreplident, c.relkind"
     654             :                      "  FROM pg_catalog.pg_class c"
     655             :                      "  INNER JOIN pg_catalog.pg_namespace n"
     656             :                      "        ON (c.relnamespace = n.oid)"
     657             :                      " WHERE n.nspname = %s"
     658             :                      "   AND c.relname = %s",
     659             :                      quote_literal_cstr(nspname),
     660             :                      quote_literal_cstr(relname));
     661         126 :     res = walrcv_exec(wrconn, cmd.data, lengthof(tableRow), tableRow);
     662             : 
     663         126 :     if (res->status != WALRCV_OK_TUPLES)
     664           0 :         ereport(ERROR,
     665             :                 (errmsg("could not fetch table info for table \"%s.%s\" from publisher: %s",
     666             :                         nspname, relname, res->err)));
     667             : 
     668         126 :     slot = MakeSingleTupleTableSlot(res->tupledesc, &TTSOpsMinimalTuple);
     669         126 :     if (!tuplestore_gettupleslot(res->tuplestore, true, false, slot))
     670           0 :         ereport(ERROR,
     671             :                 (errmsg("table \"%s.%s\" not found on publisher",
     672             :                         nspname, relname)));
     673             : 
     674         126 :     lrel->remoteid = DatumGetObjectId(slot_getattr(slot, 1, &isnull));
     675         126 :     Assert(!isnull);
     676         126 :     lrel->replident = DatumGetChar(slot_getattr(slot, 2, &isnull));
     677         126 :     Assert(!isnull);
     678         126 :     lrel->relkind = DatumGetChar(slot_getattr(slot, 3, &isnull));
     679         126 :     Assert(!isnull);
     680             : 
     681         126 :     ExecDropSingleTupleTableSlot(slot);
     682         126 :     walrcv_clear_result(res);
     683             : 
     684             :     /* Now fetch columns. */
     685         126 :     resetStringInfo(&cmd);
     686         252 :     appendStringInfo(&cmd,
     687             :                      "SELECT a.attname,"
     688             :                      "       a.atttypid,"
     689             :                      "       a.atttypmod,"
     690             :                      "       a.attnum = ANY(i.indkey)"
     691             :                      "  FROM pg_catalog.pg_attribute a"
     692             :                      "  LEFT JOIN pg_catalog.pg_index i"
     693             :                      "       ON (i.indexrelid = pg_get_replica_identity_index(%u))"
     694             :                      " WHERE a.attnum > 0::pg_catalog.int2"
     695             :                      "   AND NOT a.attisdropped %s"
     696             :                      "   AND a.attrelid = %u"
     697             :                      " ORDER BY a.attnum",
     698             :                      lrel->remoteid,
     699         126 :                      (walrcv_server_version(wrconn) >= 120000 ? "AND a.attgenerated = ''" : ""),
     700             :                      lrel->remoteid);
     701         126 :     res = walrcv_exec(wrconn, cmd.data, lengthof(attrRow), attrRow);
     702             : 
     703         126 :     if (res->status != WALRCV_OK_TUPLES)
     704           0 :         ereport(ERROR,
     705             :                 (errmsg("could not fetch table info for table \"%s.%s\": %s",
     706             :                         nspname, relname, res->err)));
     707             : 
     708             :     /* We don't know the number of rows coming, so allocate enough space. */
     709         126 :     lrel->attnames = palloc0(MaxTupleAttributeNumber * sizeof(char *));
     710         126 :     lrel->atttyps = palloc0(MaxTupleAttributeNumber * sizeof(Oid));
     711         126 :     lrel->attkeys = NULL;
     712             : 
     713         126 :     natt = 0;
     714         126 :     slot = MakeSingleTupleTableSlot(res->tupledesc, &TTSOpsMinimalTuple);
     715         476 :     while (tuplestore_gettupleslot(res->tuplestore, true, false, slot))
     716             :     {
     717         448 :         lrel->attnames[natt] =
     718         224 :             TextDatumGetCString(slot_getattr(slot, 1, &isnull));
     719         224 :         Assert(!isnull);
     720         224 :         lrel->atttyps[natt] = DatumGetObjectId(slot_getattr(slot, 2, &isnull));
     721         224 :         Assert(!isnull);
     722         224 :         if (DatumGetBool(slot_getattr(slot, 4, &isnull)))
     723         100 :             lrel->attkeys = bms_add_member(lrel->attkeys, natt);
     724             : 
     725             :         /* Should never happen. */
     726         224 :         if (++natt >= MaxTupleAttributeNumber)
     727           0 :             elog(ERROR, "too many columns in remote table \"%s.%s\"",
     728             :                  nspname, relname);
     729             : 
     730         224 :         ExecClearTuple(slot);
     731             :     }
     732         126 :     ExecDropSingleTupleTableSlot(slot);
     733             : 
     734         126 :     lrel->natts = natt;
     735             : 
     736         126 :     walrcv_clear_result(res);
     737         126 :     pfree(cmd.data);
     738         126 : }
     739             : 
     740             : /*
     741             :  * Copy existing data of a table from publisher.
     742             :  *
     743             :  * Caller is responsible for locking the local relation.
     744             :  */
     745             : static void
     746         126 : copy_table(Relation rel)
     747             : {
     748             :     LogicalRepRelMapEntry *relmapentry;
     749             :     LogicalRepRelation lrel;
     750             :     WalRcvExecResult *res;
     751             :     StringInfoData cmd;
     752             :     CopyState   cstate;
     753             :     List       *attnamelist;
     754             :     ParseState *pstate;
     755             : 
     756             :     /* Get the publisher relation info. */
     757         126 :     fetch_remote_table_info(get_namespace_name(RelationGetNamespace(rel)),
     758         126 :                             RelationGetRelationName(rel), &lrel);
     759             : 
     760             :     /* Put the relation into relmap. */
     761         126 :     logicalrep_relmap_update(&lrel);
     762             : 
     763             :     /* Map the publisher relation to local one. */
     764         126 :     relmapentry = logicalrep_rel_open(lrel.remoteid, NoLock);
     765         126 :     Assert(rel == relmapentry->localrel);
     766             : 
     767             :     /* Start copy on the publisher. */
     768         126 :     initStringInfo(&cmd);
     769         126 :     if (lrel.relkind == RELKIND_RELATION)
     770         118 :         appendStringInfo(&cmd, "COPY %s TO STDOUT",
     771         118 :                          quote_qualified_identifier(lrel.nspname, lrel.relname));
     772             :     else
     773             :     {
     774             :         /*
     775             :          * For non-tables, we need to do COPY (SELECT ...), but we can't just
     776             :          * do SELECT * because we need to not copy generated columns.
     777             :          */
     778           8 :         appendStringInfoString(&cmd, "COPY (SELECT ");
     779          24 :         for (int i = 0; i < lrel.natts; i++)
     780             :         {
     781          16 :             appendStringInfoString(&cmd, quote_identifier(lrel.attnames[i]));
     782          16 :             if (i < lrel.natts - 1)
     783           8 :                 appendStringInfoString(&cmd, ", ");
     784             :         }
     785           8 :         appendStringInfo(&cmd, " FROM %s) TO STDOUT",
     786           8 :                          quote_qualified_identifier(lrel.nspname, lrel.relname));
     787             :     }
     788         126 :     res = walrcv_exec(wrconn, cmd.data, 0, NULL);
     789         126 :     pfree(cmd.data);
     790         126 :     if (res->status != WALRCV_OK_COPY_OUT)
     791           0 :         ereport(ERROR,
     792             :                 (errmsg("could not start initial contents copy for table \"%s.%s\": %s",
     793             :                         lrel.nspname, lrel.relname, res->err)));
     794         126 :     walrcv_clear_result(res);
     795             : 
     796         126 :     copybuf = makeStringInfo();
     797             : 
     798         126 :     pstate = make_parsestate(NULL);
     799         126 :     (void) addRangeTableEntryForRelation(pstate, rel, AccessShareLock,
     800             :                                          NULL, false, false);
     801             : 
     802         126 :     attnamelist = make_copy_attnamelist(relmapentry);
     803         126 :     cstate = BeginCopyFrom(pstate, rel, NULL, false, copy_read_data, attnamelist, NIL);
     804             : 
     805             :     /* Do the copy */
     806         126 :     (void) CopyFrom(cstate);
     807             : 
     808         122 :     logicalrep_rel_close(relmapentry, NoLock);
     809         122 : }
     810             : 
     811             : /*
     812             :  * Start syncing the table in the sync worker.
     813             :  *
     814             :  * If nothing needs to be done to sync the table, we exit the worker without
     815             :  * any further action.
     816             :  *
     817             :  * The returned slot name is palloc'ed in current memory context.
     818             :  */
     819             : char *
     820         126 : LogicalRepSyncTableStart(XLogRecPtr *origin_startpos)
     821             : {
     822             :     char       *slotname;
     823             :     char       *err;
     824             :     char        relstate;
     825             :     XLogRecPtr  relstate_lsn;
     826             :     Relation    rel;
     827             :     WalRcvExecResult *res;
     828             : 
     829             :     /* Check the state of the table synchronization. */
     830         126 :     StartTransactionCommand();
     831         126 :     relstate = GetSubscriptionRelState(MyLogicalRepWorker->subid,
     832         126 :                                        MyLogicalRepWorker->relid,
     833             :                                        &relstate_lsn);
     834         126 :     CommitTransactionCommand();
     835             : 
     836         126 :     SpinLockAcquire(&MyLogicalRepWorker->relmutex);
     837         126 :     MyLogicalRepWorker->relstate = relstate;
     838         126 :     MyLogicalRepWorker->relstate_lsn = relstate_lsn;
     839         126 :     SpinLockRelease(&MyLogicalRepWorker->relmutex);
     840             : 
     841             :     /*
     842             :      * If synchronization is already done or no longer necessary, exit now
     843             :      * that we've updated shared memory state.
     844             :      */
     845         126 :     switch (relstate)
     846             :     {
     847             :         case SUBREL_STATE_SYNCDONE:
     848             :         case SUBREL_STATE_READY:
     849             :         case SUBREL_STATE_UNKNOWN:
     850           0 :             finish_sync_worker();   /* doesn't return */
     851             :     }
     852             : 
     853             :     /*
     854             :      * To build a slot name for the sync work, we are limited to NAMEDATALEN -
     855             :      * 1 characters.  We cut the original slot name to NAMEDATALEN - 28 chars
     856             :      * and append _%u_sync_%u (1 + 10 + 6 + 10 + '\0').  (It's actually the
     857             :      * NAMEDATALEN on the remote that matters, but this scheme will also work
     858             :      * reasonably if that is different.)
     859             :      */
     860             :     StaticAssertStmt(NAMEDATALEN >= 32, "NAMEDATALEN too small");  /* for sanity */
     861         378 :     slotname = psprintf("%.*s_%u_sync_%u",
     862             :                         NAMEDATALEN - 28,
     863         126 :                         MySubscription->slotname,
     864         126 :                         MySubscription->oid,
     865         126 :                         MyLogicalRepWorker->relid);
     866             : 
     867             :     /*
     868             :      * Here we use the slot name instead of the subscription name as the
     869             :      * application_name, so that it is different from the main apply worker,
     870             :      * so that synchronous replication can distinguish them.
     871             :      */
     872         126 :     wrconn = walrcv_connect(MySubscription->conninfo, true, slotname, &err);
     873         126 :     if (wrconn == NULL)
     874           0 :         ereport(ERROR,
     875             :                 (errmsg("could not connect to the publisher: %s", err)));
     876             : 
     877         126 :     Assert(MyLogicalRepWorker->relstate == SUBREL_STATE_INIT ||
     878             :            MyLogicalRepWorker->relstate == SUBREL_STATE_DATASYNC);
     879             : 
     880         126 :     SpinLockAcquire(&MyLogicalRepWorker->relmutex);
     881         126 :     MyLogicalRepWorker->relstate = SUBREL_STATE_DATASYNC;
     882         126 :     MyLogicalRepWorker->relstate_lsn = InvalidXLogRecPtr;
     883         126 :     SpinLockRelease(&MyLogicalRepWorker->relmutex);
     884             : 
     885             :     /* Update the state and make it visible to others. */
     886         126 :     StartTransactionCommand();
     887         378 :     UpdateSubscriptionRelState(MyLogicalRepWorker->subid,
     888         126 :                                MyLogicalRepWorker->relid,
     889         126 :                                MyLogicalRepWorker->relstate,
     890         126 :                                MyLogicalRepWorker->relstate_lsn);
     891         126 :     CommitTransactionCommand();
     892         126 :     pgstat_report_stat(false);
     893             : 
     894             :     /*
     895             :      * We want to do the table data sync in a single transaction.
     896             :      */
     897         126 :     StartTransactionCommand();
     898             : 
     899             :     /*
     900             :      * Use a standard write lock here. It might be better to disallow access
     901             :      * to the table while it's being synchronized. But we don't want to block
     902             :      * the main apply process from working and it has to open the relation in
     903             :      * RowExclusiveLock when remapping remote relation id to local one.
     904             :      */
     905         126 :     rel = table_open(MyLogicalRepWorker->relid, RowExclusiveLock);
     906             : 
     907             :     /*
     908             :      * Start a transaction in the remote node in REPEATABLE READ mode.  This
     909             :      * ensures that both the replication slot we create (see below) and the
     910             :      * COPY are consistent with each other.
     911             :      */
     912         126 :     res = walrcv_exec(wrconn,
     913             :                       "BEGIN READ ONLY ISOLATION LEVEL REPEATABLE READ",
     914             :                       0, NULL);
     915         126 :     if (res->status != WALRCV_OK_COMMAND)
     916           0 :         ereport(ERROR,
     917             :                 (errmsg("table copy could not start transaction on publisher"),
     918             :                  errdetail("The error was: %s", res->err)));
     919         126 :     walrcv_clear_result(res);
     920             : 
     921             :     /*
     922             :      * Create a new temporary logical decoding slot.  This slot will be used
     923             :      * for the catchup phase after COPY is done, so tell it to use the
     924             :      * snapshot to make the final data consistent.
     925             :      */
     926         126 :     walrcv_create_slot(wrconn, slotname, true,
     927             :                        CRS_USE_SNAPSHOT, origin_startpos);
     928             : 
     929             :     /* Now do the initial data copy */
     930         126 :     PushActiveSnapshot(GetTransactionSnapshot());
     931         126 :     copy_table(rel);
     932         122 :     PopActiveSnapshot();
     933             : 
     934         122 :     res = walrcv_exec(wrconn, "COMMIT", 0, NULL);
     935         122 :     if (res->status != WALRCV_OK_COMMAND)
     936           0 :         ereport(ERROR,
     937             :                 (errmsg("table copy could not finish transaction on publisher"),
     938             :                  errdetail("The error was: %s", res->err)));
     939         122 :     walrcv_clear_result(res);
     940             : 
     941         122 :     table_close(rel, NoLock);
     942             : 
     943             :     /* Make the copy visible. */
     944         122 :     CommandCounterIncrement();
     945             : 
     946             :     /*
     947             :      * We are done with the initial data synchronization, update the state.
     948             :      */
     949         122 :     SpinLockAcquire(&MyLogicalRepWorker->relmutex);
     950         122 :     MyLogicalRepWorker->relstate = SUBREL_STATE_SYNCWAIT;
     951         122 :     MyLogicalRepWorker->relstate_lsn = *origin_startpos;
     952         122 :     SpinLockRelease(&MyLogicalRepWorker->relmutex);
     953             : 
     954             :     /*
     955             :      * Finally, wait until the main apply worker tells us to catch up and then
     956             :      * return to let LogicalRepApplyLoop do it.
     957             :      */
     958         122 :     wait_for_worker_state_change(SUBREL_STATE_CATCHUP);
     959         122 :     return slotname;
     960             : }

Generated by: LCOV version 1.14