LCOV - code coverage report
Current view: top level - src/backend/replication/logical - worker.c (source / functions) Hit Total Coverage
Test: PostgreSQL 14devel Lines: 1015 1084 93.6 %
Date: 2020-10-28 11:24:57 Functions: 52 54 96.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  * worker.c
       3             :  *     PostgreSQL logical replication worker (apply)
       4             :  *
       5             :  * Copyright (c) 2016-2020, PostgreSQL Global Development Group
       6             :  *
       7             :  * IDENTIFICATION
       8             :  *    src/backend/replication/logical/worker.c
       9             :  *
      10             :  * NOTES
      11             :  *    This file contains the worker which applies logical changes as they come
      12             :  *    from remote logical replication stream.
      13             :  *
      14             :  *    The main worker (apply) is started by logical replication worker
      15             :  *    launcher for every enabled subscription in a database. It uses
      16             :  *    walsender protocol to communicate with publisher.
      17             :  *
      18             :  *    This module includes server facing code and shares libpqwalreceiver
      19             :  *    module with walreceiver for providing the libpq specific functionality.
      20             :  *
      21             :  *
      22             :  * STREAMED TRANSACTIONS
      23             :  * ---------------------
      24             :  * Streamed transactions (large transactions exceeding a memory limit on the
      25             :  * upstream) are not applied immediately, but instead, the data is written
      26             :  * to temporary files and then applied at once when the final commit arrives.
      27             :  *
      28             :  * Unlike the regular (non-streamed) case, handling streamed transactions has
      29             :  * to handle aborts of both the toplevel transaction and subtransactions. This
      30             :  * is achieved by tracking offsets for subtransactions, which is then used
      31             :  * to truncate the file with serialized changes.
      32             :  *
      33             :  * The files are placed in tmp file directory by default, and the filenames
      34             :  * include both the XID of the toplevel transaction and OID of the
      35             :  * subscription. This is necessary so that different workers processing a
      36             :  * remote transaction with the same XID doesn't interfere.
      37             :  *
      38             :  * We use BufFiles instead of using normal temporary files because (a) the
      39             :  * BufFile infrastructure supports temporary files that exceed the OS file size
      40             :  * limit, (b) provides a way for automatic clean up on the error and (c) provides
      41             :  * a way to survive these files across local transactions and allow to open and
      42             :  * close at stream start and close. We decided to use SharedFileSet
      43             :  * infrastructure as without that it deletes the files on the closure of the
      44             :  * file and if we decide to keep stream files open across the start/stop stream
      45             :  * then it will consume a lot of memory (more than 8K for each BufFile and
      46             :  * there could be multiple such BufFiles as the subscriber could receive
      47             :  * multiple start/stop streams for different transactions before getting the
      48             :  * commit). Moreover, if we don't use SharedFileSet then we also need to invent
      49             :  * a new way to pass filenames to BufFile APIs so that we are allowed to open
      50             :  * the file we desired across multiple stream-open calls for the same
      51             :  * transaction.
      52             :  *-------------------------------------------------------------------------
      53             :  */
      54             : 
      55             : #include "postgres.h"
      56             : 
      57             : #include <sys/stat.h>
      58             : #include <unistd.h>
      59             : 
      60             : #include "access/table.h"
      61             : #include "access/tableam.h"
      62             : #include "access/xact.h"
      63             : #include "access/xlog_internal.h"
      64             : #include "catalog/catalog.h"
      65             : #include "catalog/namespace.h"
      66             : #include "catalog/partition.h"
      67             : #include "catalog/pg_inherits.h"
      68             : #include "catalog/pg_subscription.h"
      69             : #include "catalog/pg_subscription_rel.h"
      70             : #include "catalog/pg_tablespace.h"
      71             : #include "commands/tablecmds.h"
      72             : #include "commands/tablespace.h"
      73             : #include "commands/trigger.h"
      74             : #include "executor/executor.h"
      75             : #include "executor/execPartition.h"
      76             : #include "executor/nodeModifyTable.h"
      77             : #include "funcapi.h"
      78             : #include "libpq/pqformat.h"
      79             : #include "libpq/pqsignal.h"
      80             : #include "mb/pg_wchar.h"
      81             : #include "miscadmin.h"
      82             : #include "nodes/makefuncs.h"
      83             : #include "optimizer/optimizer.h"
      84             : #include "parser/analyze.h"
      85             : #include "parser/parse_relation.h"
      86             : #include "pgstat.h"
      87             : #include "postmaster/bgworker.h"
      88             : #include "postmaster/interrupt.h"
      89             : #include "postmaster/postmaster.h"
      90             : #include "postmaster/walwriter.h"
      91             : #include "replication/decode.h"
      92             : #include "replication/logical.h"
      93             : #include "replication/logicalproto.h"
      94             : #include "replication/logicalrelation.h"
      95             : #include "replication/logicalworker.h"
      96             : #include "replication/origin.h"
      97             : #include "replication/reorderbuffer.h"
      98             : #include "replication/snapbuild.h"
      99             : #include "replication/walreceiver.h"
     100             : #include "replication/worker_internal.h"
     101             : #include "rewrite/rewriteHandler.h"
     102             : #include "storage/buffile.h"
     103             : #include "storage/bufmgr.h"
     104             : #include "storage/fd.h"
     105             : #include "storage/ipc.h"
     106             : #include "storage/lmgr.h"
     107             : #include "storage/proc.h"
     108             : #include "storage/procarray.h"
     109             : #include "tcop/tcopprot.h"
     110             : #include "utils/builtins.h"
     111             : #include "utils/catcache.h"
     112             : #include "utils/dynahash.h"
     113             : #include "utils/datum.h"
     114             : #include "utils/fmgroids.h"
     115             : #include "utils/guc.h"
     116             : #include "utils/inval.h"
     117             : #include "utils/lsyscache.h"
     118             : #include "utils/memutils.h"
     119             : #include "utils/rel.h"
     120             : #include "utils/syscache.h"
     121             : #include "utils/timeout.h"
     122             : 
     123             : #define NAPTIME_PER_CYCLE 1000  /* max sleep time between cycles (1s) */
     124             : 
     125             : typedef struct FlushPosition
     126             : {
     127             :     dlist_node  node;
     128             :     XLogRecPtr  local_end;
     129             :     XLogRecPtr  remote_end;
     130             : } FlushPosition;
     131             : 
     132             : static dlist_head lsn_mapping = DLIST_STATIC_INIT(lsn_mapping);
     133             : 
     134             : typedef struct SlotErrCallbackArg
     135             : {
     136             :     LogicalRepRelMapEntry *rel;
     137             :     int         local_attnum;
     138             :     int         remote_attnum;
     139             : } SlotErrCallbackArg;
     140             : 
     141             : /*
     142             :  * Stream xid hash entry. Whenever we see a new xid we create this entry in the
     143             :  * xidhash and along with it create the streaming file and store the fileset handle.
     144             :  * The subxact file is created iff there is any subxact info under this xid. This
     145             :  * entry is used on the subsequent streams for the xid to get the corresponding
     146             :  * fileset handles, so storing them in hash makes the search faster.
     147             :  */
     148             : typedef struct StreamXidHash
     149             : {
     150             :     TransactionId xid;          /* xid is the hash key and must be first */
     151             :     SharedFileSet *stream_fileset;  /* shared file set for stream data */
     152             :     SharedFileSet *subxact_fileset; /* shared file set for subxact info */
     153             : } StreamXidHash;
     154             : 
     155             : static MemoryContext ApplyMessageContext = NULL;
     156             : MemoryContext ApplyContext = NULL;
     157             : 
     158             : /* per stream context for streaming transactions */
     159             : static MemoryContext LogicalStreamingContext = NULL;
     160             : 
     161             : WalReceiverConn *wrconn = NULL;
     162             : 
     163             : Subscription *MySubscription = NULL;
     164             : bool        MySubscriptionValid = false;
     165             : 
     166             : bool        in_remote_transaction = false;
     167             : static XLogRecPtr remote_final_lsn = InvalidXLogRecPtr;
     168             : 
     169             : /* fields valid only when processing streamed transaction */
     170             : bool        in_streamed_transaction = false;
     171             : 
     172             : static TransactionId stream_xid = InvalidTransactionId;
     173             : 
     174             : /*
     175             :  * Hash table for storing the streaming xid information along with shared file
     176             :  * set for streaming and subxact files.
     177             :  */
     178             : static HTAB *xidhash = NULL;
     179             : 
     180             : /* BufFile handle of the current streaming file */
     181             : static BufFile *stream_fd = NULL;
     182             : 
     183             : typedef struct SubXactInfo
     184             : {
     185             :     TransactionId xid;          /* XID of the subxact */
     186             :     int         fileno;         /* file number in the buffile */
     187             :     off_t       offset;         /* offset in the file */
     188             : } SubXactInfo;
     189             : 
     190             : /* Sub-transaction data for the current streaming transaction */
     191             : typedef struct ApplySubXactData
     192             : {
     193             :     uint32      nsubxacts;      /* number of sub-transactions */
     194             :     uint32      nsubxacts_max;  /* current capacity of subxacts */
     195             :     TransactionId subxact_last; /* xid of the last sub-transaction */
     196             :     SubXactInfo *subxacts;      /* sub-xact offset in changes file */
     197             : } ApplySubXactData;
     198             : 
     199             : static ApplySubXactData subxact_data = {0, 0, InvalidTransactionId, NULL};
     200             : 
     201             : static inline void subxact_filename(char *path, Oid subid, TransactionId xid);
     202             : static inline void changes_filename(char *path, Oid subid, TransactionId xid);
     203             : 
     204             : /*
     205             :  * Information about subtransactions of a given toplevel transaction.
     206             :  */
     207             : static void subxact_info_write(Oid subid, TransactionId xid);
     208             : static void subxact_info_read(Oid subid, TransactionId xid);
     209             : static void subxact_info_add(TransactionId xid);
     210             : static inline void cleanup_subxact_info(void);
     211             : 
     212             : /*
     213             :  * Serialize and deserialize changes for a toplevel transaction.
     214             :  */
     215             : static void stream_cleanup_files(Oid subid, TransactionId xid);
     216             : static void stream_open_file(Oid subid, TransactionId xid, bool first);
     217             : static void stream_write_change(char action, StringInfo s);
     218             : static void stream_close_file(void);
     219             : 
     220             : static void send_feedback(XLogRecPtr recvpos, bool force, bool requestReply);
     221             : 
     222             : static void store_flush_position(XLogRecPtr remote_lsn);
     223             : 
     224             : static void maybe_reread_subscription(void);
     225             : 
     226             : /* prototype needed because of stream_commit */
     227             : static void apply_dispatch(StringInfo s);
     228             : 
     229             : static void apply_handle_insert_internal(ResultRelInfo *relinfo,
     230             :                                          EState *estate, TupleTableSlot *remoteslot);
     231             : static void apply_handle_update_internal(ResultRelInfo *relinfo,
     232             :                                          EState *estate, TupleTableSlot *remoteslot,
     233             :                                          LogicalRepTupleData *newtup,
     234             :                                          LogicalRepRelMapEntry *relmapentry);
     235             : static void apply_handle_delete_internal(ResultRelInfo *relinfo, EState *estate,
     236             :                                          TupleTableSlot *remoteslot,
     237             :                                          LogicalRepRelation *remoterel);
     238             : static bool FindReplTupleInLocalRel(EState *estate, Relation localrel,
     239             :                                     LogicalRepRelation *remoterel,
     240             :                                     TupleTableSlot *remoteslot,
     241             :                                     TupleTableSlot **localslot);
     242             : static void apply_handle_tuple_routing(ResultRelInfo *relinfo,
     243             :                                        EState *estate,
     244             :                                        TupleTableSlot *remoteslot,
     245             :                                        LogicalRepTupleData *newtup,
     246             :                                        LogicalRepRelMapEntry *relmapentry,
     247             :                                        CmdType operation);
     248             : 
     249             : static int apply_spooled_messages(TransactionId xid, XLogRecPtr lsn);
     250             : 
     251             : /*
     252             :  * Should this worker apply changes for given relation.
     253             :  *
     254             :  * This is mainly needed for initial relation data sync as that runs in
     255             :  * separate worker process running in parallel and we need some way to skip
     256             :  * changes coming to the main apply worker during the sync of a table.
     257             :  *
     258             :  * Note we need to do smaller or equals comparison for SYNCDONE state because
     259             :  * it might hold position of end of initial slot consistent point WAL
     260             :  * record + 1 (ie start of next record) and next record can be COMMIT of
     261             :  * transaction we are now processing (which is what we set remote_final_lsn
     262             :  * to in apply_handle_begin).
     263             :  */
     264             : static bool
     265      227766 : should_apply_changes_for_rel(LogicalRepRelMapEntry *rel)
     266             : {
     267      227766 :     if (am_tablesync_worker())
     268           0 :         return MyLogicalRepWorker->relid == rel->localreloid;
     269             :     else
     270      455540 :         return (rel->state == SUBREL_STATE_READY ||
     271          40 :                 (rel->state == SUBREL_STATE_SYNCDONE &&
     272           8 :                  rel->statelsn <= remote_final_lsn));
     273             : }
     274             : 
     275             : /*
     276             :  * Make sure that we started local transaction.
     277             :  *
     278             :  * Also switches to ApplyMessageContext as necessary.
     279             :  */
     280             : static bool
     281      228454 : ensure_transaction(void)
     282             : {
     283      228454 :     if (IsTransactionState())
     284             :     {
     285      227376 :         SetCurrentStatementStartTimestamp();
     286             : 
     287      227376 :         if (CurrentMemoryContext != ApplyMessageContext)
     288           0 :             MemoryContextSwitchTo(ApplyMessageContext);
     289             : 
     290      227376 :         return false;
     291             :     }
     292             : 
     293        1078 :     SetCurrentStatementStartTimestamp();
     294        1078 :     StartTransactionCommand();
     295             : 
     296        1078 :     maybe_reread_subscription();
     297             : 
     298        1074 :     MemoryContextSwitchTo(ApplyMessageContext);
     299        1074 :     return true;
     300             : }
     301             : 
     302             : /*
     303             :  * Handle streamed transactions.
     304             :  *
     305             :  * If in streaming mode (receiving a block of streamed transaction), we
     306             :  * simply redirect it to a file for the proper toplevel transaction.
     307             :  *
     308             :  * Returns true for streamed transactions, false otherwise (regular mode).
     309             :  */
     310             : static bool
     311      476874 : handle_streamed_transaction(const char action, StringInfo s)
     312             : {
     313             :     TransactionId xid;
     314             : 
     315             :     /* not in streaming mode */
     316      476874 :     if (!in_streamed_transaction)
     317      228036 :         return false;
     318             : 
     319      248838 :     Assert(stream_fd != NULL);
     320      248838 :     Assert(TransactionIdIsValid(stream_xid));
     321             : 
     322             :     /*
     323             :      * We should have received XID of the subxact as the first part of the
     324             :      * message, so extract it.
     325             :      */
     326      248838 :     xid = pq_getmsgint(s, 4);
     327             : 
     328      248838 :     Assert(TransactionIdIsValid(xid));
     329             : 
     330             :     /* Add the new subxact to the array (unless already there). */
     331      248838 :     subxact_info_add(xid);
     332             : 
     333             :     /* write the change to the current file */
     334      248838 :     stream_write_change(action, s);
     335             : 
     336      248838 :     return true;
     337             : }
     338             : 
     339             : /*
     340             :  * Executor state preparation for evaluation of constraint expressions,
     341             :  * indexes and triggers.
     342             :  *
     343             :  * This is based on similar code in copy.c
     344             :  */
     345             : static EState *
     346      227702 : create_estate_for_relation(LogicalRepRelMapEntry *rel)
     347             : {
     348             :     EState     *estate;
     349             :     RangeTblEntry *rte;
     350             : 
     351      227702 :     estate = CreateExecutorState();
     352             : 
     353      227702 :     rte = makeNode(RangeTblEntry);
     354      227702 :     rte->rtekind = RTE_RELATION;
     355      227702 :     rte->relid = RelationGetRelid(rel->localrel);
     356      227702 :     rte->relkind = rel->localrel->rd_rel->relkind;
     357      227702 :     rte->rellockmode = AccessShareLock;
     358      227702 :     ExecInitRangeTable(estate, list_make1(rte));
     359             : 
     360      227702 :     estate->es_output_cid = GetCurrentCommandId(true);
     361             : 
     362             :     /* Prepare to catch AFTER triggers. */
     363      227702 :     AfterTriggerBeginQuery();
     364             : 
     365      227702 :     return estate;
     366             : }
     367             : 
     368             : /*
     369             :  * Executes default values for columns for which we can't map to remote
     370             :  * relation columns.
     371             :  *
     372             :  * This allows us to support tables which have more columns on the downstream
     373             :  * than on the upstream.
     374             :  */
     375             : static void
     376      102826 : slot_fill_defaults(LogicalRepRelMapEntry *rel, EState *estate,
     377             :                    TupleTableSlot *slot)
     378             : {
     379      102826 :     TupleDesc   desc = RelationGetDescr(rel->localrel);
     380      102826 :     int         num_phys_attrs = desc->natts;
     381             :     int         i;
     382             :     int         attnum,
     383      102826 :                 num_defaults = 0;
     384             :     int        *defmap;
     385             :     ExprState **defexprs;
     386             :     ExprContext *econtext;
     387             : 
     388      102826 :     econtext = GetPerTupleExprContext(estate);
     389             : 
     390             :     /* We got all the data via replication, no need to evaluate anything. */
     391      102826 :     if (num_phys_attrs == rel->remoterel.natts)
     392      103594 :         return;
     393             : 
     394      102058 :     defmap = (int *) palloc(num_phys_attrs * sizeof(int));
     395      102058 :     defexprs = (ExprState **) palloc(num_phys_attrs * sizeof(ExprState *));
     396             : 
     397      102058 :     Assert(rel->attrmap->maplen == num_phys_attrs);
     398      537176 :     for (attnum = 0; attnum < num_phys_attrs; attnum++)
     399             :     {
     400             :         Expr       *defexpr;
     401             : 
     402      435118 :         if (TupleDescAttr(desc, attnum)->attisdropped || TupleDescAttr(desc, attnum)->attgenerated)
     403           4 :             continue;
     404             : 
     405      435114 :         if (rel->attrmap->attnums[attnum] >= 0)
     406      229104 :             continue;
     407             : 
     408      206010 :         defexpr = (Expr *) build_column_default(rel->localrel, attnum + 1);
     409             : 
     410      206010 :         if (defexpr != NULL)
     411             :         {
     412             :             /* Run the expression through planner */
     413      170042 :             defexpr = expression_planner(defexpr);
     414             : 
     415             :             /* Initialize executable expression in copycontext */
     416      170042 :             defexprs[num_defaults] = ExecInitExpr(defexpr, NULL);
     417      170042 :             defmap[num_defaults] = attnum;
     418      170042 :             num_defaults++;
     419             :         }
     420             : 
     421             :     }
     422             : 
     423      272100 :     for (i = 0; i < num_defaults; i++)
     424      340084 :         slot->tts_values[defmap[i]] =
     425      170042 :             ExecEvalExpr(defexprs[i], econtext, &slot->tts_isnull[defmap[i]]);
     426             : }
     427             : 
     428             : /*
     429             :  * Error callback to give more context info about type conversion failure.
     430             :  */
     431             : static void
     432           0 : slot_store_error_callback(void *arg)
     433             : {
     434           0 :     SlotErrCallbackArg *errarg = (SlotErrCallbackArg *) arg;
     435             :     LogicalRepRelMapEntry *rel;
     436             :     char       *remotetypname;
     437             :     Oid         remotetypoid,
     438             :                 localtypoid;
     439             : 
     440             :     /* Nothing to do if remote attribute number is not set */
     441           0 :     if (errarg->remote_attnum < 0)
     442           0 :         return;
     443             : 
     444           0 :     rel = errarg->rel;
     445           0 :     remotetypoid = rel->remoterel.atttyps[errarg->remote_attnum];
     446             : 
     447             :     /* Fetch remote type name from the LogicalRepTypMap cache */
     448           0 :     remotetypname = logicalrep_typmap_gettypname(remotetypoid);
     449             : 
     450             :     /* Fetch local type OID from the local sys cache */
     451           0 :     localtypoid = get_atttype(rel->localreloid, errarg->local_attnum + 1);
     452             : 
     453           0 :     errcontext("processing remote data for replication target relation \"%s.%s\" column \"%s\", "
     454             :                "remote type %s, local type %s",
     455             :                rel->remoterel.nspname, rel->remoterel.relname,
     456           0 :                rel->remoterel.attnames[errarg->remote_attnum],
     457             :                remotetypname,
     458             :                format_type_be(localtypoid));
     459             : }
     460             : 
     461             : /*
     462             :  * Store tuple data into slot.
     463             :  *
     464             :  * Incoming data can be either text or binary format.
     465             :  */
     466             : static void
     467      227702 : slot_store_data(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
     468             :                 LogicalRepTupleData *tupleData)
     469             : {
     470      227702 :     int         natts = slot->tts_tupleDescriptor->natts;
     471             :     int         i;
     472             :     SlotErrCallbackArg errarg;
     473             :     ErrorContextCallback errcallback;
     474             : 
     475      227702 :     ExecClearTuple(slot);
     476             : 
     477             :     /* Push callback + info on the error context stack */
     478      227702 :     errarg.rel = rel;
     479      227702 :     errarg.local_attnum = -1;
     480      227702 :     errarg.remote_attnum = -1;
     481      227702 :     errcallback.callback = slot_store_error_callback;
     482      227702 :     errcallback.arg = (void *) &errarg;
     483      227702 :     errcallback.previous = error_context_stack;
     484      227702 :     error_context_stack = &errcallback;
     485             : 
     486             :     /* Call the "in" function for each non-dropped, non-null attribute */
     487      227702 :     Assert(natts == rel->attrmap->maplen);
     488     1161872 :     for (i = 0; i < natts; i++)
     489             :     {
     490      934170 :         Form_pg_attribute att = TupleDescAttr(slot->tts_tupleDescriptor, i);
     491      934170 :         int         remoteattnum = rel->attrmap->attnums[i];
     492             : 
     493      934170 :         if (!att->attisdropped && remoteattnum >= 0)
     494      479710 :         {
     495      479710 :             StringInfo  colvalue = &tupleData->colvalues[remoteattnum];
     496             : 
     497      479710 :             Assert(remoteattnum < tupleData->ncols);
     498             : 
     499      479710 :             errarg.local_attnum = i;
     500      479710 :             errarg.remote_attnum = remoteattnum;
     501             : 
     502      479710 :             if (tupleData->colstatus[remoteattnum] == LOGICALREP_COLUMN_TEXT)
     503             :             {
     504             :                 Oid         typinput;
     505             :                 Oid         typioparam;
     506             : 
     507      350828 :                 getTypeInputInfo(att->atttypid, &typinput, &typioparam);
     508      701656 :                 slot->tts_values[i] =
     509      350828 :                     OidInputFunctionCall(typinput, colvalue->data,
     510             :                                          typioparam, att->atttypmod);
     511      350828 :                 slot->tts_isnull[i] = false;
     512             :             }
     513      128882 :             else if (tupleData->colstatus[remoteattnum] == LOGICALREP_COLUMN_BINARY)
     514             :             {
     515             :                 Oid         typreceive;
     516             :                 Oid         typioparam;
     517             : 
     518             :                 /*
     519             :                  * In some code paths we may be asked to re-parse the same
     520             :                  * tuple data.  Reset the StringInfo's cursor so that works.
     521             :                  */
     522       66738 :                 colvalue->cursor = 0;
     523             : 
     524       66738 :                 getTypeBinaryInputInfo(att->atttypid, &typreceive, &typioparam);
     525      133476 :                 slot->tts_values[i] =
     526       66738 :                     OidReceiveFunctionCall(typreceive, colvalue,
     527             :                                            typioparam, att->atttypmod);
     528             : 
     529             :                 /* Trouble if it didn't eat the whole buffer */
     530       66738 :                 if (colvalue->cursor != colvalue->len)
     531           0 :                     ereport(ERROR,
     532             :                             (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
     533             :                              errmsg("incorrect binary data format in logical replication column %d",
     534             :                                     remoteattnum + 1)));
     535       66738 :                 slot->tts_isnull[i] = false;
     536             :             }
     537             :             else
     538             :             {
     539             :                 /*
     540             :                  * NULL value from remote.  (We don't expect to see
     541             :                  * LOGICALREP_COLUMN_UNCHANGED here, but if we do, treat it as
     542             :                  * NULL.)
     543             :                  */
     544       62144 :                 slot->tts_values[i] = (Datum) 0;
     545       62144 :                 slot->tts_isnull[i] = true;
     546             :             }
     547             : 
     548      479710 :             errarg.local_attnum = -1;
     549      479710 :             errarg.remote_attnum = -1;
     550             :         }
     551             :         else
     552             :         {
     553             :             /*
     554             :              * We assign NULL to dropped attributes and missing values
     555             :              * (missing values should be later filled using
     556             :              * slot_fill_defaults).
     557             :              */
     558      454460 :             slot->tts_values[i] = (Datum) 0;
     559      454460 :             slot->tts_isnull[i] = true;
     560             :         }
     561             :     }
     562             : 
     563             :     /* Pop the error context stack */
     564      227702 :     error_context_stack = errcallback.previous;
     565             : 
     566      227702 :     ExecStoreVirtualTuple(slot);
     567      227702 : }
     568             : 
     569             : /*
     570             :  * Replace updated columns with data from the LogicalRepTupleData struct.
     571             :  * This is somewhat similar to heap_modify_tuple but also calls the type
     572             :  * input functions on the user data.
     573             :  *
     574             :  * "slot" is filled with a copy of the tuple in "srcslot", replacing
     575             :  * columns provided in "tupleData" and leaving others as-is.
     576             :  *
     577             :  * Caution: unreplaced pass-by-ref columns in "slot" will point into the
     578             :  * storage for "srcslot".  This is OK for current usage, but someday we may
     579             :  * need to materialize "slot" at the end to make it independent of "srcslot".
     580             :  */
     581             : static void
     582       62776 : slot_modify_data(TupleTableSlot *slot, TupleTableSlot *srcslot,
     583             :                  LogicalRepRelMapEntry *rel,
     584             :                  LogicalRepTupleData *tupleData)
     585             : {
     586       62776 :     int         natts = slot->tts_tupleDescriptor->natts;
     587             :     int         i;
     588             :     SlotErrCallbackArg errarg;
     589             :     ErrorContextCallback errcallback;
     590             : 
     591             :     /* We'll fill "slot" with a virtual tuple, so we must start with ... */
     592       62776 :     ExecClearTuple(slot);
     593             : 
     594             :     /*
     595             :      * Copy all the column data from srcslot, so that we'll have valid values
     596             :      * for unreplaced columns.
     597             :      */
     598       62776 :     Assert(natts == srcslot->tts_tupleDescriptor->natts);
     599       62776 :     slot_getallattrs(srcslot);
     600       62776 :     memcpy(slot->tts_values, srcslot->tts_values, natts * sizeof(Datum));
     601       62776 :     memcpy(slot->tts_isnull, srcslot->tts_isnull, natts * sizeof(bool));
     602             : 
     603             :     /* For error reporting, push callback + info on the error context stack */
     604       62776 :     errarg.rel = rel;
     605       62776 :     errarg.local_attnum = -1;
     606       62776 :     errarg.remote_attnum = -1;
     607       62776 :     errcallback.callback = slot_store_error_callback;
     608       62776 :     errcallback.arg = (void *) &errarg;
     609       62776 :     errcallback.previous = error_context_stack;
     610       62776 :     error_context_stack = &errcallback;
     611             : 
     612             :     /* Call the "in" function for each replaced attribute */
     613       62776 :     Assert(natts == rel->attrmap->maplen);
     614      313368 :     for (i = 0; i < natts; i++)
     615             :     {
     616      250592 :         Form_pg_attribute att = TupleDescAttr(slot->tts_tupleDescriptor, i);
     617      250592 :         int         remoteattnum = rel->attrmap->attnums[i];
     618             : 
     619      250592 :         if (remoteattnum < 0)
     620      125080 :             continue;
     621             : 
     622      125512 :         Assert(remoteattnum < tupleData->ncols);
     623             : 
     624      125512 :         if (tupleData->colstatus[remoteattnum] != LOGICALREP_COLUMN_UNCHANGED)
     625             :         {
     626      125506 :             StringInfo  colvalue = &tupleData->colvalues[remoteattnum];
     627             : 
     628      125506 :             errarg.local_attnum = i;
     629      125506 :             errarg.remote_attnum = remoteattnum;
     630             : 
     631      125506 :             if (tupleData->colstatus[remoteattnum] == LOGICALREP_COLUMN_TEXT)
     632             :             {
     633             :                 Oid         typinput;
     634             :                 Oid         typioparam;
     635             : 
     636       82060 :                 getTypeInputInfo(att->atttypid, &typinput, &typioparam);
     637      164120 :                 slot->tts_values[i] =
     638       82060 :                     OidInputFunctionCall(typinput, colvalue->data,
     639             :                                          typioparam, att->atttypmod);
     640       82060 :                 slot->tts_isnull[i] = false;
     641             :             }
     642       43446 :             else if (tupleData->colstatus[remoteattnum] == LOGICALREP_COLUMN_BINARY)
     643             :             {
     644             :                 Oid         typreceive;
     645             :                 Oid         typioparam;
     646             : 
     647             :                 /*
     648             :                  * In some code paths we may be asked to re-parse the same
     649             :                  * tuple data.  Reset the StringInfo's cursor so that works.
     650             :                  */
     651       43362 :                 colvalue->cursor = 0;
     652             : 
     653       43362 :                 getTypeBinaryInputInfo(att->atttypid, &typreceive, &typioparam);
     654       86724 :                 slot->tts_values[i] =
     655       43362 :                     OidReceiveFunctionCall(typreceive, colvalue,
     656             :                                            typioparam, att->atttypmod);
     657             : 
     658             :                 /* Trouble if it didn't eat the whole buffer */
     659       43362 :                 if (colvalue->cursor != colvalue->len)
     660           0 :                     ereport(ERROR,
     661             :                             (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
     662             :                              errmsg("incorrect binary data format in logical replication column %d",
     663             :                                     remoteattnum + 1)));
     664       43362 :                 slot->tts_isnull[i] = false;
     665             :             }
     666             :             else
     667             :             {
     668             :                 /* must be LOGICALREP_COLUMN_NULL */
     669          84 :                 slot->tts_values[i] = (Datum) 0;
     670          84 :                 slot->tts_isnull[i] = true;
     671             :             }
     672             : 
     673      125506 :             errarg.local_attnum = -1;
     674      125506 :             errarg.remote_attnum = -1;
     675             :         }
     676             :     }
     677             : 
     678             :     /* Pop the error context stack */
     679       62776 :     error_context_stack = errcallback.previous;
     680             : 
     681             :     /* And finally, declare that "slot" contains a valid virtual tuple */
     682       62776 :     ExecStoreVirtualTuple(slot);
     683       62776 : }
     684             : 
     685             : /*
     686             :  * Handle BEGIN message.
     687             :  */
     688             : static void
     689         498 : apply_handle_begin(StringInfo s)
     690             : {
     691             :     LogicalRepBeginData begin_data;
     692             : 
     693         498 :     logicalrep_read_begin(s, &begin_data);
     694             : 
     695         498 :     remote_final_lsn = begin_data.final_lsn;
     696             : 
     697         498 :     in_remote_transaction = true;
     698             : 
     699         498 :     pgstat_report_activity(STATE_RUNNING, NULL);
     700         498 : }
     701             : 
     702             : /*
     703             :  * Handle COMMIT message.
     704             :  *
     705             :  * TODO, support tracking of multiple origins
     706             :  */
     707             : static void
     708         486 : apply_handle_commit(StringInfo s)
     709             : {
     710             :     LogicalRepCommitData commit_data;
     711             : 
     712         486 :     logicalrep_read_commit(s, &commit_data);
     713             : 
     714         486 :     Assert(commit_data.commit_lsn == remote_final_lsn);
     715             : 
     716             :     /* The synchronization worker runs in single transaction. */
     717         486 :     if (IsTransactionState() && !am_tablesync_worker())
     718             :     {
     719             :         /*
     720             :          * Update origin state so we can restart streaming from correct
     721             :          * position in case of crash.
     722             :          */
     723         368 :         replorigin_session_origin_lsn = commit_data.end_lsn;
     724         368 :         replorigin_session_origin_timestamp = commit_data.committime;
     725             : 
     726         368 :         CommitTransactionCommand();
     727             : 
     728         368 :         pgstat_report_stat(false);
     729             : 
     730         368 :         store_flush_position(commit_data.end_lsn);
     731             :     }
     732             :     else
     733             :     {
     734             :         /* Process any invalidation messages that might have accumulated. */
     735         118 :         AcceptInvalidationMessages();
     736         118 :         maybe_reread_subscription();
     737             :     }
     738             : 
     739         486 :     in_remote_transaction = false;
     740             : 
     741             :     /* Process any tables that are being synchronized in parallel. */
     742         486 :     process_syncing_tables(commit_data.end_lsn);
     743             : 
     744         486 :     pgstat_report_activity(STATE_IDLE, NULL);
     745         486 : }
     746             : 
     747             : /*
     748             :  * Called from apply_handle_prepare to handle a PREPARE TRANSACTION.
     749             :  */
     750             : static void
     751           6 : apply_handle_prepare_txn(LogicalRepPrepareData * prepare_data)
     752             : {
     753           6 :     Assert(prepare_data->prepare_lsn == remote_final_lsn);
     754             : 
     755             :     /* The synchronization worker runs in single transaction. */
     756           6 :     if (IsTransactionState() && !am_tablesync_worker())
     757             :     {
     758             :         /* End the earlier transaction and start a new one */
     759           6 :         BeginTransactionBlock();
     760           6 :         CommitTransactionCommand();
     761           6 :         StartTransactionCommand();
     762             : 
     763             :         /*
     764             :          * Update origin state so we can restart streaming from correct
     765             :          * position in case of crash.
     766             :          */
     767           6 :         replorigin_session_origin_lsn = prepare_data->end_lsn;
     768           6 :         replorigin_session_origin_timestamp = prepare_data->preparetime;
     769             : 
     770           6 :         PrepareTransactionBlock(prepare_data->gid);
     771           6 :         CommitTransactionCommand();
     772           6 :         pgstat_report_stat(false);
     773             : 
     774           6 :         store_flush_position(prepare_data->end_lsn);
     775             :     }
     776             :     else
     777             :     {
     778             :         /* Process any invalidation messages that might have accumulated. */
     779           0 :         AcceptInvalidationMessages();
     780           0 :         maybe_reread_subscription();
     781             :     }
     782             : 
     783           6 :     in_remote_transaction = false;
     784             : 
     785             :     /* Process any tables that are being synchronized in parallel. */
     786           6 :     process_syncing_tables(prepare_data->end_lsn);
     787             : 
     788           6 :     pgstat_report_activity(STATE_IDLE, NULL);
     789           6 : }
     790             : 
     791             : /*
     792             :  * Called from apply_handle_prepare to handle a COMMIT PREPARED of a previously
     793             :  * PREPARED transaction.
     794             :  */
     795             : static void
     796          16 : apply_handle_commit_prepared_txn(LogicalRepPrepareData * prepare_data)
     797             : {
     798             :     /* there is no transaction when COMMIT PREPARED is called */
     799          16 :     ensure_transaction();
     800             : 
     801             :     /*
     802             :      * Update origin state so we can restart streaming from correct position
     803             :      * in case of crash.
     804             :      */
     805          16 :     replorigin_session_origin_lsn = prepare_data->end_lsn;
     806          16 :     replorigin_session_origin_timestamp = prepare_data->preparetime;
     807             : 
     808          16 :     FinishPreparedTransaction(prepare_data->gid, true);
     809          16 :     CommitTransactionCommand();
     810          16 :     pgstat_report_stat(false);
     811             : 
     812          16 :     store_flush_position(prepare_data->end_lsn);
     813          16 :     in_remote_transaction = false;
     814             : 
     815             :     /* Process any tables that are being synchronized in parallel. */
     816          16 :     process_syncing_tables(prepare_data->end_lsn);
     817             : 
     818          16 :     pgstat_report_activity(STATE_IDLE, NULL);
     819          16 : }
     820             : 
     821             : /*
     822             :  * Called from apply_handle_prepare to handle a ROLLBACK PREPARED of a previously
     823             :  * PREPARED TRANSACTION.
     824             :  */
     825             : static void
     826           4 : apply_handle_rollback_prepared_txn(LogicalRepPrepareData * prepare_data)
     827             : {
     828             :     /*
     829             :      * Update origin state so we can restart streaming from correct position
     830             :      * in case of crash.
     831             :      */
     832           4 :     replorigin_session_origin_lsn = prepare_data->end_lsn;
     833           4 :     replorigin_session_origin_timestamp = prepare_data->preparetime;
     834             : 
     835             :     /*
     836             :      * During logical decoding, on the apply side, it's possible that a
     837             :      * prepared transaction got aborted while decoding. In that case, we stop
     838             :      * the decoding and abort the transaction immediately. However the
     839             :      * ROLLBACK prepared processing still reaches the subscriber. In that case
     840             :      * it's ok to have a missing gid
     841             :      */
     842           4 :     if (LookupGXact(prepare_data->gid))
     843             :     {
     844             :         /* there is no transaction when ABORT/ROLLBACK PREPARED is called */
     845           4 :         ensure_transaction();
     846           4 :         FinishPreparedTransaction(prepare_data->gid, false);
     847           4 :         CommitTransactionCommand();
     848             :     }
     849             : 
     850           4 :     pgstat_report_stat(false);
     851             : 
     852           4 :     store_flush_position(prepare_data->end_lsn);
     853           4 :     in_remote_transaction = false;
     854             : 
     855             :     /* Process any tables that are being synchronized in parallel. */
     856           4 :     process_syncing_tables(prepare_data->end_lsn);
     857             : 
     858           4 :     pgstat_report_activity(STATE_IDLE, NULL);
     859           4 : }
     860             : 
     861             : /*
     862             :  * Handle PREPARE message.
     863             :  */
     864             : static void
     865          26 : apply_handle_prepare(StringInfo s)
     866             : {
     867             :     LogicalRepPrepareData prepare_data;
     868             : 
     869          26 :     logicalrep_read_prepare(s, &prepare_data);
     870             : 
     871          26 :     switch (prepare_data.prepare_type)
     872             :     {
     873             :         case LOGICALREP_IS_PREPARE:
     874           6 :             apply_handle_prepare_txn(&prepare_data);
     875           6 :             break;
     876             : 
     877             :         case LOGICALREP_IS_COMMIT_PREPARED:
     878          16 :             apply_handle_commit_prepared_txn(&prepare_data);
     879          16 :             break;
     880             : 
     881             :         case LOGICALREP_IS_ROLLBACK_PREPARED:
     882           4 :             apply_handle_rollback_prepared_txn(&prepare_data);
     883           4 :             break;
     884             : 
     885             :         default:
     886           0 :             ereport(ERROR,
     887             :                     (errcode(ERRCODE_PROTOCOL_VIOLATION),
     888             :                      errmsg("unexpected type of prepare message: %d",
     889             :                             prepare_data.prepare_type)));
     890             :     }
     891          26 : }
     892             : 
     893             : /*
     894             :  * Handle STREAM PREPARE.
     895             :  *
     896             :  * Logic is in two parts:
     897             :  * 1. Replay all the spooled operations
     898             :  * 2. Mark the transaction as prepared
     899             :  */
     900             : static void
     901          12 : apply_handle_stream_prepare(StringInfo s)
     902             : {
     903          12 :     int nchanges = 0;
     904             :     LogicalRepPrepareData prepare_data;
     905             :     TransactionId xid;
     906             : 
     907          12 :     Assert(!in_streamed_transaction);
     908             : 
     909          12 :     xid = logicalrep_read_stream_prepare(s, &prepare_data);
     910          12 :     elog(DEBUG1, "received prepare for streamed transaction %u", xid);
     911             : 
     912             :     /*
     913             :      * This should be a PREPARE only. The COMMIT PREPARED and ROLLBACK PREPARED
     914             :      * for streaming are handled by the non-streaming APIs.
     915             :      */
     916          12 :     Assert(prepare_data.prepare_type == LOGICALREP_IS_PREPARE);
     917             : 
     918             :     /*
     919             :      * ========================================
     920             :      * 1. Replay all the spooled operations
     921             :      * - This code is same as what apply_handle_stream_commit does for NON two-phase stream commit
     922             :      * ========================================
     923             :      */
     924             : 
     925          12 :     ensure_transaction();
     926             : 
     927          12 :     nchanges = apply_spooled_messages(xid, prepare_data.prepare_lsn);
     928             : 
     929             :     /*
     930             :      * ========================================
     931             :      * 2. Mark the transaction as prepared.
     932             :      * - This code is same as what apply_handle_prepare_txn does for two-phase prepare of the non-streamed tx
     933             :      * ========================================
     934             :      */
     935          12 :     BeginTransactionBlock();
     936          12 :     CommitTransactionCommand();
     937          12 :     StartTransactionCommand();
     938             : 
     939             :     /*
     940             :      * Update origin state so we can restart streaming from correct position
     941             :      * in case of crash.
     942             :      */
     943          12 :     replorigin_session_origin_lsn = prepare_data.end_lsn;
     944          12 :     replorigin_session_origin_timestamp = prepare_data.preparetime;
     945             : 
     946          12 :     PrepareTransactionBlock(prepare_data.gid);
     947          12 :     CommitTransactionCommand();
     948             : 
     949          12 :     pgstat_report_stat(false);
     950             : 
     951          12 :     store_flush_position(prepare_data.end_lsn);
     952             : 
     953          12 :     elog(DEBUG1, "apply_handle_stream_prepare_txn: replayed %d (all) changes.", nchanges);
     954             : 
     955          12 :     in_remote_transaction = false;
     956             : 
     957             :     /* Process any tables that are being synchronized in parallel. */
     958          12 :     process_syncing_tables(prepare_data.end_lsn);
     959             : 
     960             :     /* unlink the files with serialized changes and subxact info */
     961          12 :     stream_cleanup_files(MyLogicalRepWorker->subid, xid);
     962             : 
     963          12 :     pgstat_report_activity(STATE_IDLE, NULL);
     964          12 : }
     965             : 
     966             : /*
     967             :  * Handle ORIGIN message.
     968             :  *
     969             :  * TODO, support tracking of multiple origins
     970             :  */
     971             : static void
     972           0 : apply_handle_origin(StringInfo s)
     973             : {
     974             :     /*
     975             :      * ORIGIN message can only come inside streaming transaction or inside
     976             :      * remote transaction and before any actual writes.
     977             :      */
     978           0 :     if (!in_streamed_transaction &&
     979           0 :         (!in_remote_transaction ||
     980           0 :          (IsTransactionState() && !am_tablesync_worker())))
     981           0 :         ereport(ERROR,
     982             :                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
     983             :                  errmsg("ORIGIN message sent out of order")));
     984           0 : }
     985             : 
     986             : /*
     987             :  * Handle STREAM START message.
     988             :  */
     989             : static void
     990         614 : apply_handle_stream_start(StringInfo s)
     991             : {
     992             :     bool        first_segment;
     993             :     HASHCTL     hash_ctl;
     994             : 
     995         614 :     Assert(!in_streamed_transaction);
     996             : 
     997             :     /*
     998             :      * Start a transaction on stream start, this transaction will be committed
     999             :      * on the stream stop. We need the transaction for handling the buffile,
    1000             :      * used for serializing the streaming data and subxact info.
    1001             :      */
    1002         614 :     ensure_transaction();
    1003             : 
    1004             :     /* notify handle methods we're processing a remote transaction */
    1005         614 :     in_streamed_transaction = true;
    1006             : 
    1007             :     /* extract XID of the top-level transaction */
    1008         614 :     stream_xid = logicalrep_read_stream_start(s, &first_segment);
    1009             : 
    1010             :     /*
    1011             :      * Initialize the xidhash table if we haven't yet. This will be used for
    1012             :      * the entire duration of the apply worker so create it in permanent
    1013             :      * context.
    1014             :      */
    1015         614 :     if (xidhash == NULL)
    1016             :     {
    1017          16 :         hash_ctl.keysize = sizeof(TransactionId);
    1018          16 :         hash_ctl.entrysize = sizeof(StreamXidHash);
    1019          16 :         hash_ctl.hcxt = ApplyContext;
    1020          16 :         xidhash = hash_create("StreamXidHash", 1024, &hash_ctl,
    1021             :                               HASH_ELEM | HASH_CONTEXT);
    1022             :     }
    1023             : 
    1024             :     /* open the spool file for this transaction */
    1025         614 :     stream_open_file(MyLogicalRepWorker->subid, stream_xid, first_segment);
    1026             : 
    1027             :     /* if this is not the first segment, open existing subxact file */
    1028         614 :     if (!first_segment)
    1029         570 :         subxact_info_read(MyLogicalRepWorker->subid, stream_xid);
    1030             : 
    1031         614 :     pgstat_report_activity(STATE_RUNNING, NULL);
    1032         614 : }
    1033             : 
    1034             : /*
    1035             :  * Handle STREAM STOP message.
    1036             :  */
    1037             : static void
    1038         614 : apply_handle_stream_stop(StringInfo s)
    1039             : {
    1040         614 :     Assert(in_streamed_transaction);
    1041             : 
    1042             :     /*
    1043             :      * Close the file with serialized changes, and serialize information about
    1044             :      * subxacts for the toplevel transaction.
    1045             :      */
    1046         614 :     subxact_info_write(MyLogicalRepWorker->subid, stream_xid);
    1047         614 :     stream_close_file();
    1048             : 
    1049             :     /* We must be in a valid transaction state */
    1050         614 :     Assert(IsTransactionState());
    1051             : 
    1052             :     /* Commit the per-stream transaction */
    1053         614 :     CommitTransactionCommand();
    1054             : 
    1055         614 :     in_streamed_transaction = false;
    1056             : 
    1057             :     /* Reset per-stream context */
    1058         614 :     MemoryContextReset(LogicalStreamingContext);
    1059             : 
    1060         614 :     pgstat_report_activity(STATE_IDLE, NULL);
    1061         614 : }
    1062             : 
    1063             : /*
    1064             :  * Handle STREAM abort message.
    1065             :  */
    1066             : static void
    1067          26 : apply_handle_stream_abort(StringInfo s)
    1068             : {
    1069             :     TransactionId xid;
    1070             :     TransactionId subxid;
    1071             : 
    1072          26 :     Assert(!in_streamed_transaction);
    1073             : 
    1074          26 :     logicalrep_read_stream_abort(s, &xid, &subxid);
    1075             : 
    1076             :     /*
    1077             :      * If the two XIDs are the same, it's in fact abort of toplevel xact, so
    1078             :      * just delete the files with serialized info.
    1079             :      */
    1080          26 :     if (xid == subxid)
    1081           2 :         stream_cleanup_files(MyLogicalRepWorker->subid, xid);
    1082             :     else
    1083             :     {
    1084             :         /*
    1085             :          * OK, so it's a subxact. We need to read the subxact file for the
    1086             :          * toplevel transaction, determine the offset tracked for the subxact,
    1087             :          * and truncate the file with changes. We also remove the subxacts
    1088             :          * with higher offsets (or rather higher XIDs).
    1089             :          *
    1090             :          * We intentionally scan the array from the tail, because we're likely
    1091             :          * aborting a change for the most recent subtransactions.
    1092             :          *
    1093             :          * We can't use the binary search here as subxact XIDs won't
    1094             :          * necessarily arrive in sorted order, consider the case where we have
    1095             :          * released the savepoint for multiple subtransactions and then
    1096             :          * performed rollback to savepoint for one of the earlier
    1097             :          * sub-transaction.
    1098             :          */
    1099             : 
    1100             :         int64       i;
    1101             :         int64       subidx;
    1102             :         BufFile    *fd;
    1103          24 :         bool        found = false;
    1104             :         char        path[MAXPGPATH];
    1105             :         StreamXidHash *ent;
    1106             : 
    1107          24 :         subidx = -1;
    1108          24 :         ensure_transaction();
    1109          24 :         subxact_info_read(MyLogicalRepWorker->subid, xid);
    1110             : 
    1111          28 :         for (i = subxact_data.nsubxacts; i > 0; i--)
    1112             :         {
    1113          20 :             if (subxact_data.subxacts[i - 1].xid == subxid)
    1114             :             {
    1115          16 :                 subidx = (i - 1);
    1116          16 :                 found = true;
    1117          16 :                 break;
    1118             :             }
    1119             :         }
    1120             : 
    1121             :         /*
    1122             :          * If it's an empty sub-transaction then we will not find the subxid
    1123             :          * here so just cleanup the subxact info and return.
    1124             :          */
    1125          24 :         if (!found)
    1126             :         {
    1127             :             /* Cleanup the subxact info */
    1128           8 :             cleanup_subxact_info();
    1129           8 :             CommitTransactionCommand();
    1130          34 :             return;
    1131             :         }
    1132             : 
    1133          16 :         Assert((subidx >= 0) && (subidx < subxact_data.nsubxacts));
    1134             : 
    1135          16 :         ent = (StreamXidHash *) hash_search(xidhash,
    1136             :                                             (void *) &xid,
    1137             :                                             HASH_FIND,
    1138             :                                             &found);
    1139          16 :         Assert(found);
    1140             : 
    1141             :         /* open the changes file */
    1142          16 :         changes_filename(path, MyLogicalRepWorker->subid, xid);
    1143          16 :         fd = BufFileOpenShared(ent->stream_fileset, path, O_RDWR);
    1144             : 
    1145             :         /* OK, truncate the file at the right offset */
    1146          16 :         BufFileTruncateShared(fd, subxact_data.subxacts[subidx].fileno,
    1147          16 :                               subxact_data.subxacts[subidx].offset);
    1148          16 :         BufFileClose(fd);
    1149             : 
    1150             :         /* discard the subxacts added later */
    1151          16 :         subxact_data.nsubxacts = subidx;
    1152             : 
    1153             :         /* write the updated subxact list */
    1154          16 :         subxact_info_write(MyLogicalRepWorker->subid, xid);
    1155          16 :         CommitTransactionCommand();
    1156             :     }
    1157             : }
    1158             : 
    1159             : /*
    1160             :  * Common spoolfile processing.
    1161             :  * Returns how many changes were applied.
    1162             :  */
    1163             : static int
    1164          40 : apply_spooled_messages(TransactionId xid, XLogRecPtr lsn)
    1165             : {
    1166             :     StringInfoData s2;
    1167             :     int         nchanges;
    1168             :     char        path[MAXPGPATH];
    1169          40 :     char       *buffer = NULL;
    1170             :     bool        found;
    1171             :     StreamXidHash *ent;
    1172             :     MemoryContext oldcxt;
    1173             :     BufFile    *fd;
    1174             : 
    1175             :     /*
    1176             :      * Allocate file handle and memory required to process all the messages in
    1177             :      * TopTransactionContext to avoid them getting reset after each message is
    1178             :      * processed.
    1179             :      */
    1180          40 :     oldcxt = MemoryContextSwitchTo(TopTransactionContext);
    1181             : 
    1182             :     /* open the spool file for the committed transaction */
    1183          40 :     changes_filename(path, MyLogicalRepWorker->subid, xid);
    1184          40 :     elog(DEBUG1, "replaying changes from file \"%s\"", path);
    1185          40 :     ent = (StreamXidHash *) hash_search(xidhash,
    1186             :                                         (void *) &xid,
    1187             :                                         HASH_FIND,
    1188             :                                         &found);
    1189          40 :     Assert(found);
    1190          40 :     fd = BufFileOpenShared(ent->stream_fileset, path, O_RDONLY);
    1191             : 
    1192          40 :     buffer = palloc(BLCKSZ);
    1193          40 :     initStringInfo(&s2);
    1194             : 
    1195          40 :     MemoryContextSwitchTo(oldcxt);
    1196             : 
    1197          40 :     remote_final_lsn = lsn;
    1198             : 
    1199             :     /*
    1200             :      * Make sure the handle apply_dispatch methods are aware we're in a remote
    1201             :      * transaction.
    1202             :      */
    1203          40 :     in_remote_transaction = true;
    1204          40 :     pgstat_report_activity(STATE_RUNNING, NULL);
    1205             : 
    1206             :     /*
    1207             :      * Read the entries one by one and pass them through the same logic as in
    1208             :      * apply_dispatch.
    1209             :      */
    1210          40 :     nchanges = 0;
    1211             :     while (true)
    1212             :     {
    1213             :         int         nbytes;
    1214             :         int         len;
    1215             : 
    1216      226192 :         CHECK_FOR_INTERRUPTS();
    1217             : 
    1218             :         /* read length of the on-disk record */
    1219      226192 :         nbytes = BufFileRead(fd, &len, sizeof(len));
    1220             : 
    1221             :         /* have we reached end of the file? */
    1222      226192 :         if (nbytes == 0)
    1223          40 :             break;
    1224             : 
    1225             :         /* do we have a correct length? */
    1226      226152 :         if (nbytes != sizeof(len))
    1227           0 :             ereport(ERROR,
    1228             :                     (errcode_for_file_access(),
    1229             :                      errmsg("could not read from streaming transaction's changes file \"%s\": %m",
    1230             :                             path)));
    1231             : 
    1232      226152 :         Assert(len > 0);
    1233             : 
    1234             :         /* make sure we have sufficiently large buffer */
    1235      226152 :         buffer = repalloc(buffer, len);
    1236             : 
    1237             :         /* and finally read the data into the buffer */
    1238      226152 :         if (BufFileRead(fd, buffer, len) != len)
    1239           0 :             ereport(ERROR,
    1240             :                     (errcode_for_file_access(),
    1241             :                      errmsg("could not read from streaming transaction's changes file \"%s\": %m",
    1242             :                             path)));
    1243             : 
    1244             :         /* copy the buffer to the stringinfo and call apply_dispatch */
    1245      226152 :         resetStringInfo(&s2);
    1246      226152 :         appendBinaryStringInfo(&s2, buffer, len);
    1247             : 
    1248             :         /* Ensure we are reading the data into our memory context. */
    1249      226152 :         oldcxt = MemoryContextSwitchTo(ApplyMessageContext);
    1250             : 
    1251      226152 :         apply_dispatch(&s2);
    1252             : 
    1253      226152 :         MemoryContextReset(ApplyMessageContext);
    1254             : 
    1255      226152 :         MemoryContextSwitchTo(oldcxt);
    1256             : 
    1257      226152 :         nchanges++;
    1258             : 
    1259      226152 :         if (nchanges % 1000 == 0)
    1260         214 :             elog(DEBUG1, "replayed %d changes from file '%s'",
    1261             :                  nchanges, path);
    1262      226152 :     }
    1263             : 
    1264          40 :     BufFileClose(fd);
    1265             : 
    1266          40 :     pfree(buffer);
    1267          40 :     pfree(s2.data);
    1268             : 
    1269          40 :     elog(DEBUG1, "replayed %d (all) changes from file \"%s\"",
    1270             :          nchanges, path);
    1271             : 
    1272          40 :     return nchanges;
    1273             : }
    1274             : 
    1275             : /*
    1276             :  * Handle STREAM COMMIT message.
    1277             :  */
    1278             : static void
    1279          28 : apply_handle_stream_commit(StringInfo s)
    1280             : {
    1281             :     TransactionId xid;
    1282             :     LogicalRepCommitData commit_data;
    1283          28 :     int nchanges = 0;
    1284             : 
    1285          28 :     Assert(!in_streamed_transaction);
    1286             : 
    1287          28 :     xid = logicalrep_read_stream_commit(s, &commit_data);
    1288             : 
    1289          28 :     elog(DEBUG1, "received commit for streamed transaction %u", xid);
    1290             : 
    1291          28 :     ensure_transaction();
    1292             : 
    1293          28 :     nchanges = apply_spooled_messages(xid, commit_data.commit_lsn);
    1294             : 
    1295             :     /*
    1296             :      * Update origin state so we can restart streaming from correct position
    1297             :      * in case of crash.
    1298             :      */
    1299          28 :     replorigin_session_origin_lsn = commit_data.end_lsn;
    1300          28 :     replorigin_session_origin_timestamp = commit_data.committime;
    1301             : 
    1302          28 :     CommitTransactionCommand();
    1303          28 :     pgstat_report_stat(false);
    1304             : 
    1305          28 :     store_flush_position(commit_data.end_lsn);
    1306             : 
    1307          28 :     elog(DEBUG1, "apply_handle_stream_commit: replayed %d (all) changes.", nchanges);
    1308             : 
    1309          28 :     in_remote_transaction = false;
    1310             : 
    1311             :     /* Process any tables that are being synchronized in parallel. */
    1312          28 :     process_syncing_tables(commit_data.end_lsn);
    1313             : 
    1314             :     /* unlink the files with serialized changes and subxact info */
    1315          28 :     stream_cleanup_files(MyLogicalRepWorker->subid, xid);
    1316             : 
    1317          28 :     pgstat_report_activity(STATE_IDLE, NULL);
    1318          28 : }
    1319             : 
    1320             : /*
    1321             :  * Handle RELATION message.
    1322             :  *
    1323             :  * Note we don't do validation against local schema here. The validation
    1324             :  * against local schema is postponed until first change for given relation
    1325             :  * comes as we only care about it when applying changes for it anyway and we
    1326             :  * do less locking this way.
    1327             :  */
    1328             : static void
    1329         308 : apply_handle_relation(StringInfo s)
    1330             : {
    1331             :     LogicalRepRelation *rel;
    1332             : 
    1333         308 :     if (handle_streamed_transaction('R', s))
    1334         368 :         return;
    1335             : 
    1336         248 :     rel = logicalrep_read_rel(s);
    1337         248 :     logicalrep_relmap_update(rel);
    1338             : }
    1339             : 
    1340             : /*
    1341             :  * Handle TYPE message.
    1342             :  *
    1343             :  * Note we don't do local mapping here, that's done when the type is
    1344             :  * actually used.
    1345             :  */
    1346             : static void
    1347          32 : apply_handle_type(StringInfo s)
    1348             : {
    1349             :     LogicalRepTyp typ;
    1350             : 
    1351          32 :     if (handle_streamed_transaction('Y', s))
    1352          32 :         return;
    1353             : 
    1354          32 :     logicalrep_read_typ(s, &typ);
    1355          32 :     logicalrep_typmap_update(&typ);
    1356             : }
    1357             : 
    1358             : /*
    1359             :  * Get replica identity index or if it is not defined a primary key.
    1360             :  *
    1361             :  * If neither is defined, returns InvalidOid
    1362             :  */
    1363             : static Oid
    1364      124878 : GetRelationIdentityOrPK(Relation rel)
    1365             : {
    1366             :     Oid         idxoid;
    1367             : 
    1368      124878 :     idxoid = RelationGetReplicaIndex(rel);
    1369             : 
    1370      124878 :     if (!OidIsValid(idxoid))
    1371         246 :         idxoid = RelationGetPrimaryKeyIndex(rel);
    1372             : 
    1373      124878 :     return idxoid;
    1374             : }
    1375             : 
    1376             : /*
    1377             :  * Handle INSERT message.
    1378             :  */
    1379             : 
    1380             : static void
    1381      224744 : apply_handle_insert(StringInfo s)
    1382             : {
    1383             :     ResultRelInfo *resultRelInfo;
    1384             :     LogicalRepRelMapEntry *rel;
    1385             :     LogicalRepTupleData newtup;
    1386             :     LogicalRepRelId relid;
    1387             :     EState     *estate;
    1388             :     TupleTableSlot *remoteslot;
    1389             :     MemoryContext oldctx;
    1390             : 
    1391      224744 :     if (handle_streamed_transaction('I', s))
    1392      243800 :         return;
    1393             : 
    1394      102856 :     ensure_transaction();
    1395             : 
    1396      102852 :     relid = logicalrep_read_insert(s, &newtup);
    1397      102852 :     rel = logicalrep_rel_open(relid, RowExclusiveLock);
    1398      102850 :     if (!should_apply_changes_for_rel(rel))
    1399             :     {
    1400             :         /*
    1401             :          * The relation can't become interesting in the middle of the
    1402             :          * transaction so it's safe to unlock it.
    1403             :          */
    1404          24 :         logicalrep_rel_close(rel, RowExclusiveLock);
    1405          24 :         return;
    1406             :     }
    1407             : 
    1408             :     /* Initialize the executor state. */
    1409      102826 :     estate = create_estate_for_relation(rel);
    1410      102826 :     remoteslot = ExecInitExtraTupleSlot(estate,
    1411      102826 :                                         RelationGetDescr(rel->localrel),
    1412             :                                         &TTSOpsVirtual);
    1413      102826 :     resultRelInfo = makeNode(ResultRelInfo);
    1414      102826 :     InitResultRelInfo(resultRelInfo, rel->localrel, 1, NULL, 0);
    1415             : 
    1416             :     /* Input functions may need an active snapshot, so get one */
    1417      102826 :     PushActiveSnapshot(GetTransactionSnapshot());
    1418             : 
    1419             :     /* Process and store remote tuple in the slot */
    1420      102826 :     oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
    1421      102826 :     slot_store_data(remoteslot, rel, &newtup);
    1422      102826 :     slot_fill_defaults(rel, estate, remoteslot);
    1423      102826 :     MemoryContextSwitchTo(oldctx);
    1424             : 
    1425             :     /* For a partitioned table, insert the tuple into a partition. */
    1426      102826 :     if (rel->localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
    1427          36 :         apply_handle_tuple_routing(resultRelInfo, estate,
    1428             :                                    remoteslot, NULL, rel, CMD_INSERT);
    1429             :     else
    1430      102790 :         apply_handle_insert_internal(resultRelInfo, estate,
    1431             :                                      remoteslot);
    1432             : 
    1433      102826 :     PopActiveSnapshot();
    1434             : 
    1435             :     /* Handle queued AFTER triggers. */
    1436      102826 :     AfterTriggerEndQuery(estate);
    1437             : 
    1438      102826 :     ExecResetTupleTable(estate->es_tupleTable, false);
    1439      102826 :     FreeExecutorState(estate);
    1440             : 
    1441      102826 :     logicalrep_rel_close(rel, NoLock);
    1442             : 
    1443      102826 :     CommandCounterIncrement();
    1444             : }
    1445             : 
    1446             : /* Workhorse for apply_handle_insert() */
    1447             : static void
    1448      102828 : apply_handle_insert_internal(ResultRelInfo *relinfo,
    1449             :                              EState *estate, TupleTableSlot *remoteslot)
    1450             : {
    1451      102828 :     ExecOpenIndices(relinfo, false);
    1452             : 
    1453             :     /* Do the insert. */
    1454      102828 :     ExecSimpleRelationInsert(relinfo, estate, remoteslot);
    1455             : 
    1456             :     /* Cleanup. */
    1457      102828 :     ExecCloseIndices(relinfo);
    1458      102828 : }
    1459             : 
    1460             : /*
    1461             :  * Check if the logical replication relation is updatable and throw
    1462             :  * appropriate error if it isn't.
    1463             :  */
    1464             : static void
    1465      124876 : check_relation_updatable(LogicalRepRelMapEntry *rel)
    1466             : {
    1467             :     /* Updatable, no error. */
    1468      124876 :     if (rel->updatable)
    1469      249752 :         return;
    1470             : 
    1471             :     /*
    1472             :      * We are in error mode so it's fine this is somewhat slow. It's better to
    1473             :      * give user correct error.
    1474             :      */
    1475           0 :     if (OidIsValid(GetRelationIdentityOrPK(rel->localrel)))
    1476             :     {
    1477           0 :         ereport(ERROR,
    1478             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    1479             :                  errmsg("publisher did not send replica identity column "
    1480             :                         "expected by the logical replication target relation \"%s.%s\"",
    1481             :                         rel->remoterel.nspname, rel->remoterel.relname)));
    1482             :     }
    1483             : 
    1484           0 :     ereport(ERROR,
    1485             :             (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    1486             :              errmsg("logical replication target relation \"%s.%s\" has "
    1487             :                     "neither REPLICA IDENTITY index nor PRIMARY "
    1488             :                     "KEY and published relation does not have "
    1489             :                     "REPLICA IDENTITY FULL",
    1490             :                     rel->remoterel.nspname, rel->remoterel.relname)));
    1491             : }
    1492             : 
    1493             : /*
    1494             :  * Handle UPDATE message.
    1495             :  *
    1496             :  * TODO: FDW support
    1497             :  */
    1498             : static void
    1499      128020 : apply_handle_update(StringInfo s)
    1500             : {
    1501             :     ResultRelInfo *resultRelInfo;
    1502             :     LogicalRepRelMapEntry *rel;
    1503             :     LogicalRepRelId relid;
    1504             :     EState     *estate;
    1505             :     LogicalRepTupleData oldtup;
    1506             :     LogicalRepTupleData newtup;
    1507             :     bool        has_oldtup;
    1508             :     TupleTableSlot *remoteslot;
    1509             :     RangeTblEntry *target_rte;
    1510             :     MemoryContext oldctx;
    1511             : 
    1512      128020 :     if (handle_streamed_transaction('U', s))
    1513      130488 :         return;
    1514             : 
    1515       62776 :     ensure_transaction();
    1516             : 
    1517       62776 :     relid = logicalrep_read_update(s, &has_oldtup, &oldtup,
    1518             :                                    &newtup);
    1519       62776 :     rel = logicalrep_rel_open(relid, RowExclusiveLock);
    1520       62776 :     if (!should_apply_changes_for_rel(rel))
    1521             :     {
    1522             :         /*
    1523             :          * The relation can't become interesting in the middle of the
    1524             :          * transaction so it's safe to unlock it.
    1525             :          */
    1526           0 :         logicalrep_rel_close(rel, RowExclusiveLock);
    1527           0 :         return;
    1528             :     }
    1529             : 
    1530             :     /* Check if we can do the update. */
    1531       62776 :     check_relation_updatable(rel);
    1532             : 
    1533             :     /* Initialize the executor state. */
    1534       62776 :     estate = create_estate_for_relation(rel);
    1535       62776 :     remoteslot = ExecInitExtraTupleSlot(estate,
    1536       62776 :                                         RelationGetDescr(rel->localrel),
    1537             :                                         &TTSOpsVirtual);
    1538       62776 :     resultRelInfo = makeNode(ResultRelInfo);
    1539       62776 :     InitResultRelInfo(resultRelInfo, rel->localrel, 1, NULL, 0);
    1540             : 
    1541             :     /*
    1542             :      * Populate updatedCols so that per-column triggers can fire.  This could
    1543             :      * include more columns than were actually changed on the publisher
    1544             :      * because the logical replication protocol doesn't contain that
    1545             :      * information.  But it would for example exclude columns that only exist
    1546             :      * on the subscriber, since we are not touching those.
    1547             :      */
    1548       62776 :     target_rte = list_nth(estate->es_range_table, 0);
    1549      313368 :     for (int i = 0; i < remoteslot->tts_tupleDescriptor->natts; i++)
    1550             :     {
    1551      250592 :         Form_pg_attribute att = TupleDescAttr(remoteslot->tts_tupleDescriptor, i);
    1552      250592 :         int         remoteattnum = rel->attrmap->attnums[i];
    1553             : 
    1554      250592 :         if (!att->attisdropped && remoteattnum >= 0)
    1555             :         {
    1556      125512 :             Assert(remoteattnum < newtup.ncols);
    1557      125512 :             if (newtup.colstatus[remoteattnum] != LOGICALREP_COLUMN_UNCHANGED)
    1558      125506 :                 target_rte->updatedCols =
    1559      125506 :                     bms_add_member(target_rte->updatedCols,
    1560             :                                    i + 1 - FirstLowInvalidHeapAttributeNumber);
    1561             :         }
    1562             :     }
    1563             : 
    1564       62776 :     fill_extraUpdatedCols(target_rte, RelationGetDescr(rel->localrel));
    1565             : 
    1566       62776 :     PushActiveSnapshot(GetTransactionSnapshot());
    1567             : 
    1568             :     /* Build the search tuple. */
    1569       62776 :     oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
    1570       62776 :     slot_store_data(remoteslot, rel,
    1571             :                     has_oldtup ? &oldtup : &newtup);
    1572       62776 :     MemoryContextSwitchTo(oldctx);
    1573             : 
    1574             :     /* For a partitioned table, apply update to correct partition. */
    1575       62776 :     if (rel->localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
    1576          10 :         apply_handle_tuple_routing(resultRelInfo, estate,
    1577             :                                    remoteslot, &newtup, rel, CMD_UPDATE);
    1578             :     else
    1579       62766 :         apply_handle_update_internal(resultRelInfo, estate,
    1580             :                                      remoteslot, &newtup, rel);
    1581             : 
    1582       62776 :     PopActiveSnapshot();
    1583             : 
    1584             :     /* Handle queued AFTER triggers. */
    1585       62776 :     AfterTriggerEndQuery(estate);
    1586             : 
    1587       62776 :     ExecResetTupleTable(estate->es_tupleTable, false);
    1588       62776 :     FreeExecutorState(estate);
    1589             : 
    1590       62776 :     logicalrep_rel_close(rel, NoLock);
    1591             : 
    1592       62776 :     CommandCounterIncrement();
    1593             : }
    1594             : 
    1595             : /* Workhorse for apply_handle_update() */
    1596             : static void
    1597       62766 : apply_handle_update_internal(ResultRelInfo *relinfo,
    1598             :                              EState *estate, TupleTableSlot *remoteslot,
    1599             :                              LogicalRepTupleData *newtup,
    1600             :                              LogicalRepRelMapEntry *relmapentry)
    1601             : {
    1602       62766 :     Relation    localrel = relinfo->ri_RelationDesc;
    1603             :     EPQState    epqstate;
    1604             :     TupleTableSlot *localslot;
    1605             :     bool        found;
    1606             :     MemoryContext oldctx;
    1607             : 
    1608       62766 :     EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
    1609       62766 :     ExecOpenIndices(relinfo, false);
    1610             : 
    1611       62766 :     found = FindReplTupleInLocalRel(estate, localrel,
    1612             :                                     &relmapentry->remoterel,
    1613             :                                     remoteslot, &localslot);
    1614       62766 :     ExecClearTuple(remoteslot);
    1615             : 
    1616             :     /*
    1617             :      * Tuple found.
    1618             :      *
    1619             :      * Note this will fail if there are other conflicting unique indexes.
    1620             :      */
    1621       62766 :     if (found)
    1622             :     {
    1623             :         /* Process and store remote tuple in the slot */
    1624       62766 :         oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
    1625       62766 :         slot_modify_data(remoteslot, localslot, relmapentry, newtup);
    1626       62766 :         MemoryContextSwitchTo(oldctx);
    1627             : 
    1628       62766 :         EvalPlanQualSetSlot(&epqstate, remoteslot);
    1629             : 
    1630             :         /* Do the actual update. */
    1631       62766 :         ExecSimpleRelationUpdate(relinfo, estate, &epqstate, localslot,
    1632             :                                  remoteslot);
    1633             :     }
    1634             :     else
    1635             :     {
    1636             :         /*
    1637             :          * The tuple to be updated could not be found.
    1638             :          *
    1639             :          * TODO what to do here, change the log level to LOG perhaps?
    1640             :          */
    1641           0 :         elog(DEBUG1,
    1642             :              "logical replication did not find row for update "
    1643             :              "in replication target relation \"%s\"",
    1644             :              RelationGetRelationName(localrel));
    1645             :     }
    1646             : 
    1647             :     /* Cleanup. */
    1648       62766 :     ExecCloseIndices(relinfo);
    1649       62766 :     EvalPlanQualEnd(&epqstate);
    1650       62766 : }
    1651             : 
    1652             : /*
    1653             :  * Handle DELETE message.
    1654             :  *
    1655             :  * TODO: FDW support
    1656             :  */
    1657             : static void
    1658      123746 : apply_handle_delete(StringInfo s)
    1659             : {
    1660             :     ResultRelInfo *resultRelInfo;
    1661             :     LogicalRepRelMapEntry *rel;
    1662             :     LogicalRepTupleData oldtup;
    1663             :     LogicalRepRelId relid;
    1664             :     EState     *estate;
    1665             :     TupleTableSlot *remoteslot;
    1666             :     MemoryContext oldctx;
    1667             : 
    1668      123746 :     if (handle_streamed_transaction('D', s))
    1669      123292 :         return;
    1670             : 
    1671       62100 :     ensure_transaction();
    1672             : 
    1673       62100 :     relid = logicalrep_read_delete(s, &oldtup);
    1674       62100 :     rel = logicalrep_rel_open(relid, RowExclusiveLock);
    1675       62100 :     if (!should_apply_changes_for_rel(rel))
    1676             :     {
    1677             :         /*
    1678             :          * The relation can't become interesting in the middle of the
    1679             :          * transaction so it's safe to unlock it.
    1680             :          */
    1681           0 :         logicalrep_rel_close(rel, RowExclusiveLock);
    1682           0 :         return;
    1683             :     }
    1684             : 
    1685             :     /* Check if we can do the delete. */
    1686       62100 :     check_relation_updatable(rel);
    1687             : 
    1688             :     /* Initialize the executor state. */
    1689       62100 :     estate = create_estate_for_relation(rel);
    1690       62100 :     remoteslot = ExecInitExtraTupleSlot(estate,
    1691       62100 :                                         RelationGetDescr(rel->localrel),
    1692             :                                         &TTSOpsVirtual);
    1693       62100 :     resultRelInfo = makeNode(ResultRelInfo);
    1694       62100 :     InitResultRelInfo(resultRelInfo, rel->localrel, 1, NULL, 0);
    1695             : 
    1696       62100 :     PushActiveSnapshot(GetTransactionSnapshot());
    1697             : 
    1698             :     /* Build the search tuple. */
    1699       62100 :     oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
    1700       62100 :     slot_store_data(remoteslot, rel, &oldtup);
    1701       62100 :     MemoryContextSwitchTo(oldctx);
    1702             : 
    1703             :     /* For a partitioned table, apply delete to correct partition. */
    1704       62100 :     if (rel->localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
    1705          24 :         apply_handle_tuple_routing(resultRelInfo, estate,
    1706             :                                    remoteslot, NULL, rel, CMD_DELETE);
    1707             :     else
    1708       62076 :         apply_handle_delete_internal(resultRelInfo, estate,
    1709             :                                      remoteslot, &rel->remoterel);
    1710             : 
    1711       62100 :     PopActiveSnapshot();
    1712             : 
    1713             :     /* Handle queued AFTER triggers. */
    1714       62100 :     AfterTriggerEndQuery(estate);
    1715             : 
    1716       62100 :     ExecResetTupleTable(estate->es_tupleTable, false);
    1717       62100 :     FreeExecutorState(estate);
    1718             : 
    1719       62100 :     logicalrep_rel_close(rel, NoLock);
    1720             : 
    1721       62100 :     CommandCounterIncrement();
    1722             : }
    1723             : 
    1724             : /* Workhorse for apply_handle_delete() */
    1725             : static void
    1726       62102 : apply_handle_delete_internal(ResultRelInfo *relinfo, EState *estate,
    1727             :                              TupleTableSlot *remoteslot,
    1728             :                              LogicalRepRelation *remoterel)
    1729             : {
    1730       62102 :     Relation    localrel = relinfo->ri_RelationDesc;
    1731             :     EPQState    epqstate;
    1732             :     TupleTableSlot *localslot;
    1733             :     bool        found;
    1734             : 
    1735       62102 :     EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
    1736       62102 :     ExecOpenIndices(relinfo, false);
    1737             : 
    1738       62102 :     found = FindReplTupleInLocalRel(estate, localrel, remoterel,
    1739             :                                     remoteslot, &localslot);
    1740             : 
    1741             :     /* If found delete it. */
    1742       62102 :     if (found)
    1743             :     {
    1744       62102 :         EvalPlanQualSetSlot(&epqstate, localslot);
    1745             : 
    1746             :         /* Do the actual delete. */
    1747       62102 :         ExecSimpleRelationDelete(relinfo, estate, &epqstate, localslot);
    1748             :     }
    1749             :     else
    1750             :     {
    1751             :         /* The tuple to be deleted could not be found. */
    1752           0 :         elog(DEBUG1,
    1753             :              "logical replication could not find row for delete "
    1754             :              "in replication target relation \"%s\"",
    1755             :              RelationGetRelationName(localrel));
    1756             :     }
    1757             : 
    1758             :     /* Cleanup. */
    1759       62102 :     ExecCloseIndices(relinfo);
    1760       62102 :     EvalPlanQualEnd(&epqstate);
    1761       62102 : }
    1762             : 
    1763             : /*
    1764             :  * Try to find a tuple received from the publication side (in 'remoteslot') in
    1765             :  * the corresponding local relation using either replica identity index,
    1766             :  * primary key or if needed, sequential scan.
    1767             :  *
    1768             :  * Local tuple, if found, is returned in '*localslot'.
    1769             :  */
    1770             : static bool
    1771      124878 : FindReplTupleInLocalRel(EState *estate, Relation localrel,
    1772             :                         LogicalRepRelation *remoterel,
    1773             :                         TupleTableSlot *remoteslot,
    1774             :                         TupleTableSlot **localslot)
    1775             : {
    1776             :     Oid         idxoid;
    1777             :     bool        found;
    1778             : 
    1779      124878 :     *localslot = table_slot_create(localrel, &estate->es_tupleTable);
    1780             : 
    1781      124878 :     idxoid = GetRelationIdentityOrPK(localrel);
    1782      124878 :     Assert(OidIsValid(idxoid) ||
    1783             :            (remoterel->replident == REPLICA_IDENTITY_FULL));
    1784             : 
    1785      124878 :     if (OidIsValid(idxoid))
    1786      124634 :         found = RelationFindReplTupleByIndex(localrel, idxoid,
    1787             :                                              LockTupleExclusive,
    1788             :                                              remoteslot, *localslot);
    1789             :     else
    1790         244 :         found = RelationFindReplTupleSeq(localrel, LockTupleExclusive,
    1791             :                                          remoteslot, *localslot);
    1792             : 
    1793      124878 :     return found;
    1794             : }
    1795             : 
    1796             : /*
    1797             :  * This handles insert, update, delete on a partitioned table.
    1798             :  */
    1799             : static void
    1800          70 : apply_handle_tuple_routing(ResultRelInfo *relinfo,
    1801             :                            EState *estate,
    1802             :                            TupleTableSlot *remoteslot,
    1803             :                            LogicalRepTupleData *newtup,
    1804             :                            LogicalRepRelMapEntry *relmapentry,
    1805             :                            CmdType operation)
    1806             : {
    1807          70 :     Relation    parentrel = relinfo->ri_RelationDesc;
    1808          70 :     ModifyTableState *mtstate = NULL;
    1809          70 :     PartitionTupleRouting *proute = NULL;
    1810             :     ResultRelInfo *partrelinfo;
    1811             :     Relation    partrel;
    1812             :     TupleTableSlot *remoteslot_part;
    1813             :     TupleConversionMap *map;
    1814             :     MemoryContext oldctx;
    1815             : 
    1816             :     /* ModifyTableState is needed for ExecFindPartition(). */
    1817          70 :     mtstate = makeNode(ModifyTableState);
    1818          70 :     mtstate->ps.plan = NULL;
    1819          70 :     mtstate->ps.state = estate;
    1820          70 :     mtstate->operation = operation;
    1821          70 :     mtstate->resultRelInfo = relinfo;
    1822          70 :     proute = ExecSetupPartitionTupleRouting(estate, mtstate, parentrel);
    1823             : 
    1824             :     /*
    1825             :      * Find the partition to which the "search tuple" belongs.
    1826             :      */
    1827          70 :     Assert(remoteslot != NULL);
    1828          70 :     oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
    1829          70 :     partrelinfo = ExecFindPartition(mtstate, relinfo, proute,
    1830             :                                     remoteslot, estate);
    1831          70 :     Assert(partrelinfo != NULL);
    1832          70 :     partrel = partrelinfo->ri_RelationDesc;
    1833             : 
    1834             :     /*
    1835             :      * To perform any of the operations below, the tuple must match the
    1836             :      * partition's rowtype. Convert if needed or just copy, using a dedicated
    1837             :      * slot to store the tuple in any case.
    1838             :      */
    1839          70 :     remoteslot_part = partrelinfo->ri_PartitionTupleSlot;
    1840          70 :     if (remoteslot_part == NULL)
    1841          22 :         remoteslot_part = table_slot_create(partrel, &estate->es_tupleTable);
    1842          70 :     map = partrelinfo->ri_RootToPartitionMap;
    1843          70 :     if (map != NULL)
    1844          48 :         remoteslot_part = execute_attr_map_slot(map->attrMap, remoteslot,
    1845             :                                                 remoteslot_part);
    1846             :     else
    1847             :     {
    1848          22 :         remoteslot_part = ExecCopySlot(remoteslot_part, remoteslot);
    1849          22 :         slot_getallattrs(remoteslot_part);
    1850             :     }
    1851          70 :     MemoryContextSwitchTo(oldctx);
    1852             : 
    1853          70 :     switch (operation)
    1854             :     {
    1855             :         case CMD_INSERT:
    1856          36 :             apply_handle_insert_internal(partrelinfo, estate,
    1857             :                                          remoteslot_part);
    1858          36 :             break;
    1859             : 
    1860             :         case CMD_DELETE:
    1861          24 :             apply_handle_delete_internal(partrelinfo, estate,
    1862             :                                          remoteslot_part,
    1863             :                                          &relmapentry->remoterel);
    1864          24 :             break;
    1865             : 
    1866             :         case CMD_UPDATE:
    1867             : 
    1868             :             /*
    1869             :              * For UPDATE, depending on whether or not the updated tuple
    1870             :              * satisfies the partition's constraint, perform a simple UPDATE
    1871             :              * of the partition or move the updated tuple into a different
    1872             :              * suitable partition.
    1873             :              */
    1874             :             {
    1875          10 :                 AttrMap    *attrmap = map ? map->attrMap : NULL;
    1876             :                 LogicalRepRelMapEntry *part_entry;
    1877             :                 TupleTableSlot *localslot;
    1878             :                 ResultRelInfo *partrelinfo_new;
    1879             :                 bool        found;
    1880             : 
    1881          10 :                 part_entry = logicalrep_partition_open(relmapentry, partrel,
    1882             :                                                        attrmap);
    1883             : 
    1884             :                 /* Get the matching local tuple from the partition. */
    1885          10 :                 found = FindReplTupleInLocalRel(estate, partrel,
    1886             :                                                 &part_entry->remoterel,
    1887             :                                                 remoteslot_part, &localslot);
    1888             : 
    1889          10 :                 oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
    1890          10 :                 if (found)
    1891             :                 {
    1892             :                     /* Apply the update.  */
    1893          10 :                     slot_modify_data(remoteslot_part, localslot,
    1894             :                                      part_entry,
    1895             :                                      newtup);
    1896          10 :                     MemoryContextSwitchTo(oldctx);
    1897             :                 }
    1898             :                 else
    1899             :                 {
    1900             :                     /*
    1901             :                      * The tuple to be updated could not be found.
    1902             :                      *
    1903             :                      * TODO what to do here, change the log level to LOG
    1904             :                      * perhaps?
    1905             :                      */
    1906           0 :                     elog(DEBUG1,
    1907             :                          "logical replication did not find row for update "
    1908             :                          "in replication target relation \"%s\"",
    1909             :                          RelationGetRelationName(partrel));
    1910             :                 }
    1911             : 
    1912             :                 /*
    1913             :                  * Does the updated tuple still satisfy the current
    1914             :                  * partition's constraint?
    1915             :                  */
    1916          20 :                 if (!partrel->rd_rel->relispartition ||
    1917          10 :                     ExecPartitionCheck(partrelinfo, remoteslot_part, estate,
    1918             :                                        false))
    1919           8 :                 {
    1920             :                     /*
    1921             :                      * Yes, so simply UPDATE the partition.  We don't call
    1922             :                      * apply_handle_update_internal() here, which would
    1923             :                      * normally do the following work, to avoid repeating some
    1924             :                      * work already done above to find the local tuple in the
    1925             :                      * partition.
    1926             :                      */
    1927             :                     EPQState    epqstate;
    1928             : 
    1929           8 :                     EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
    1930           8 :                     ExecOpenIndices(partrelinfo, false);
    1931             : 
    1932           8 :                     EvalPlanQualSetSlot(&epqstate, remoteslot_part);
    1933           8 :                     ExecSimpleRelationUpdate(partrelinfo, estate, &epqstate,
    1934             :                                              localslot, remoteslot_part);
    1935           8 :                     ExecCloseIndices(partrelinfo);
    1936           8 :                     EvalPlanQualEnd(&epqstate);
    1937             :                 }
    1938             :                 else
    1939             :                 {
    1940             :                     /* Move the tuple into the new partition. */
    1941             : 
    1942             :                     /*
    1943             :                      * New partition will be found using tuple routing, which
    1944             :                      * can only occur via the parent table.  We might need to
    1945             :                      * convert the tuple to the parent's rowtype.  Note that
    1946             :                      * this is the tuple found in the partition, not the
    1947             :                      * original search tuple received by this function.
    1948             :                      */
    1949           2 :                     if (map)
    1950             :                     {
    1951           2 :                         TupleConversionMap *PartitionToRootMap =
    1952           2 :                         convert_tuples_by_name(RelationGetDescr(partrel),
    1953             :                                                RelationGetDescr(parentrel));
    1954             : 
    1955           2 :                         remoteslot =
    1956           2 :                             execute_attr_map_slot(PartitionToRootMap->attrMap,
    1957             :                                                   remoteslot_part, remoteslot);
    1958             :                     }
    1959             :                     else
    1960             :                     {
    1961           0 :                         remoteslot = ExecCopySlot(remoteslot, remoteslot_part);
    1962           0 :                         slot_getallattrs(remoteslot);
    1963             :                     }
    1964             : 
    1965             : 
    1966             :                     /* Find the new partition. */
    1967           2 :                     oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
    1968           2 :                     partrelinfo_new = ExecFindPartition(mtstate, relinfo,
    1969             :                                                         proute, remoteslot,
    1970             :                                                         estate);
    1971           2 :                     MemoryContextSwitchTo(oldctx);
    1972           2 :                     Assert(partrelinfo_new != partrelinfo);
    1973             : 
    1974             :                     /* DELETE old tuple found in the old partition. */
    1975           2 :                     apply_handle_delete_internal(partrelinfo, estate,
    1976             :                                                  localslot,
    1977             :                                                  &relmapentry->remoterel);
    1978             : 
    1979             :                     /* INSERT new tuple into the new partition. */
    1980             : 
    1981             :                     /*
    1982             :                      * Convert the replacement tuple to match the destination
    1983             :                      * partition rowtype.
    1984             :                      */
    1985           2 :                     oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
    1986           2 :                     partrel = partrelinfo_new->ri_RelationDesc;
    1987           2 :                     remoteslot_part = partrelinfo_new->ri_PartitionTupleSlot;
    1988           2 :                     if (remoteslot_part == NULL)
    1989           2 :                         remoteslot_part = table_slot_create(partrel,
    1990             :                                                             &estate->es_tupleTable);
    1991           2 :                     map = partrelinfo_new->ri_RootToPartitionMap;
    1992           2 :                     if (map != NULL)
    1993             :                     {
    1994           0 :                         remoteslot_part = execute_attr_map_slot(map->attrMap,
    1995             :                                                                 remoteslot,
    1996             :                                                                 remoteslot_part);
    1997             :                     }
    1998             :                     else
    1999             :                     {
    2000           2 :                         remoteslot_part = ExecCopySlot(remoteslot_part,
    2001             :                                                        remoteslot);
    2002           2 :                         slot_getallattrs(remoteslot);
    2003             :                     }
    2004           2 :                     MemoryContextSwitchTo(oldctx);
    2005           2 :                     apply_handle_insert_internal(partrelinfo_new, estate,
    2006             :                                                  remoteslot_part);
    2007             :                 }
    2008             :             }
    2009          10 :             break;
    2010             : 
    2011             :         default:
    2012           0 :             elog(ERROR, "unrecognized CmdType: %d", (int) operation);
    2013             :             break;
    2014             :     }
    2015             : 
    2016          70 :     ExecCleanupTupleRouting(mtstate, proute);
    2017          70 : }
    2018             : 
    2019             : /*
    2020             :  * Handle TRUNCATE message.
    2021             :  *
    2022             :  * TODO: FDW support
    2023             :  */
    2024             : static void
    2025          24 : apply_handle_truncate(StringInfo s)
    2026             : {
    2027          24 :     bool        cascade = false;
    2028          24 :     bool        restart_seqs = false;
    2029          24 :     List       *remote_relids = NIL;
    2030          24 :     List       *remote_rels = NIL;
    2031          24 :     List       *rels = NIL;
    2032          24 :     List       *part_rels = NIL;
    2033          24 :     List       *relids = NIL;
    2034          24 :     List       *relids_logged = NIL;
    2035             :     ListCell   *lc;
    2036             : 
    2037          24 :     if (handle_streamed_transaction('T', s))
    2038          24 :         return;
    2039             : 
    2040          24 :     ensure_transaction();
    2041             : 
    2042          24 :     remote_relids = logicalrep_read_truncate(s, &cascade, &restart_seqs);
    2043             : 
    2044          64 :     foreach(lc, remote_relids)
    2045             :     {
    2046          40 :         LogicalRepRelId relid = lfirst_oid(lc);
    2047             :         LogicalRepRelMapEntry *rel;
    2048             : 
    2049          40 :         rel = logicalrep_rel_open(relid, RowExclusiveLock);
    2050          40 :         if (!should_apply_changes_for_rel(rel))
    2051             :         {
    2052             :             /*
    2053             :              * The relation can't become interesting in the middle of the
    2054             :              * transaction so it's safe to unlock it.
    2055             :              */
    2056           0 :             logicalrep_rel_close(rel, RowExclusiveLock);
    2057           0 :             continue;
    2058             :         }
    2059             : 
    2060          40 :         remote_rels = lappend(remote_rels, rel);
    2061          40 :         rels = lappend(rels, rel->localrel);
    2062          40 :         relids = lappend_oid(relids, rel->localreloid);
    2063          40 :         if (RelationIsLogicallyLogged(rel->localrel))
    2064          40 :             relids_logged = lappend_oid(relids_logged, rel->localreloid);
    2065             : 
    2066             :         /*
    2067             :          * Truncate partitions if we got a message to truncate a partitioned
    2068             :          * table.
    2069             :          */
    2070          40 :         if (rel->localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
    2071             :         {
    2072             :             ListCell   *child;
    2073           8 :             List       *children = find_all_inheritors(rel->localreloid,
    2074             :                                                        RowExclusiveLock,
    2075             :                                                        NULL);
    2076             : 
    2077          30 :             foreach(child, children)
    2078             :             {
    2079          22 :                 Oid         childrelid = lfirst_oid(child);
    2080             :                 Relation    childrel;
    2081             : 
    2082          22 :                 if (list_member_oid(relids, childrelid))
    2083           8 :                     continue;
    2084             : 
    2085             :                 /* find_all_inheritors already got lock */
    2086          14 :                 childrel = table_open(childrelid, NoLock);
    2087             : 
    2088             :                 /*
    2089             :                  * Ignore temp tables of other backends.  See similar code in
    2090             :                  * ExecuteTruncate().
    2091             :                  */
    2092          14 :                 if (RELATION_IS_OTHER_TEMP(childrel))
    2093             :                 {
    2094           0 :                     table_close(childrel, RowExclusiveLock);
    2095           0 :                     continue;
    2096             :                 }
    2097             : 
    2098          14 :                 rels = lappend(rels, childrel);
    2099          14 :                 part_rels = lappend(part_rels, childrel);
    2100          14 :                 relids = lappend_oid(relids, childrelid);
    2101             :                 /* Log this relation only if needed for logical decoding */
    2102          14 :                 if (RelationIsLogicallyLogged(childrel))
    2103          14 :                     relids_logged = lappend_oid(relids_logged, childrelid);
    2104             :             }
    2105             :         }
    2106             :     }
    2107             : 
    2108             :     /*
    2109             :      * Even if we used CASCADE on the upstream primary we explicitly default
    2110             :      * to replaying changes without further cascading. This might be later
    2111             :      * changeable with a user specified option.
    2112             :      */
    2113          24 :     ExecuteTruncateGuts(rels, relids, relids_logged, DROP_RESTRICT, restart_seqs);
    2114             : 
    2115          64 :     foreach(lc, remote_rels)
    2116             :     {
    2117          40 :         LogicalRepRelMapEntry *rel = lfirst(lc);
    2118             : 
    2119          40 :         logicalrep_rel_close(rel, NoLock);
    2120             :     }
    2121          38 :     foreach(lc, part_rels)
    2122             :     {
    2123          14 :         Relation    rel = lfirst(lc);
    2124             : 
    2125          14 :         table_close(rel, NoLock);
    2126             :     }
    2127             : 
    2128          24 :     CommandCounterIncrement();
    2129             : }
    2130             : 
    2131             : 
    2132             : /*
    2133             :  * Logical replication protocol message dispatcher.
    2134             :  */
    2135             : static void
    2136      479178 : apply_dispatch(StringInfo s)
    2137             : {
    2138      479178 :     char        action = pq_getmsgbyte(s);
    2139             : 
    2140      479178 :     switch (action)
    2141             :     {
    2142             :             /* BEGIN */
    2143             :         case 'B':
    2144         498 :             apply_handle_begin(s);
    2145         498 :             break;
    2146             :             /* COMMIT/ABORT */
    2147             :         case 'C':
    2148         486 :             apply_handle_commit(s);
    2149         486 :             break;
    2150             :             /* PREPARE and [COMMIT|ROLLBACK] PREPARED */
    2151             :         case 'P':
    2152          26 :             apply_handle_prepare(s);
    2153          26 :             break;
    2154             :             /* INSERT */
    2155             :         case 'I':
    2156      224744 :             apply_handle_insert(s);
    2157      224738 :             break;
    2158             :             /* UPDATE */
    2159             :         case 'U':
    2160      128020 :             apply_handle_update(s);
    2161      128020 :             break;
    2162             :             /* DELETE */
    2163             :         case 'D':
    2164      123746 :             apply_handle_delete(s);
    2165      123746 :             break;
    2166             :             /* TRUNCATE */
    2167             :         case 'T':
    2168          24 :             apply_handle_truncate(s);
    2169          24 :             break;
    2170             :             /* RELATION */
    2171             :         case 'R':
    2172         308 :             apply_handle_relation(s);
    2173         308 :             break;
    2174             :             /* TYPE */
    2175             :         case 'Y':
    2176          32 :             apply_handle_type(s);
    2177          32 :             break;
    2178             :             /* ORIGIN */
    2179             :         case 'O':
    2180           0 :             apply_handle_origin(s);
    2181           0 :             break;
    2182             :             /* STREAM START */
    2183             :         case 'S':
    2184         614 :             apply_handle_stream_start(s);
    2185         614 :             break;
    2186             :             /* STREAM END */
    2187             :         case 'E':
    2188         614 :             apply_handle_stream_stop(s);
    2189         614 :             break;
    2190             :             /* STREAM ABORT */
    2191             :         case 'A':
    2192          26 :             apply_handle_stream_abort(s);
    2193          26 :             break;
    2194             :             /* STREAM COMMIT */
    2195             :         case 'c':
    2196          28 :             apply_handle_stream_commit(s);
    2197          28 :             break;
    2198             :             /* STREAM PREPARE */
    2199             :         case 'p':
    2200          12 :             apply_handle_stream_prepare(s);
    2201          12 :             break;
    2202             :         default:
    2203           0 :             ereport(ERROR,
    2204             :                     (errcode(ERRCODE_PROTOCOL_VIOLATION),
    2205             :                      errmsg("invalid logical replication message type \"%c\"", action)));
    2206             :     }
    2207      479172 : }
    2208             : 
    2209             : /*
    2210             :  * Figure out which write/flush positions to report to the walsender process.
    2211             :  *
    2212             :  * We can't simply report back the last LSN the walsender sent us because the
    2213             :  * local transaction might not yet be flushed to disk locally. Instead we
    2214             :  * build a list that associates local with remote LSNs for every commit. When
    2215             :  * reporting back the flush position to the sender we iterate that list and
    2216             :  * check which entries on it are already locally flushed. Those we can report
    2217             :  * as having been flushed.
    2218             :  *
    2219             :  * The have_pending_txes is true if there are outstanding transactions that
    2220             :  * need to be flushed.
    2221             :  */
    2222             : static void
    2223       66192 : get_flush_position(XLogRecPtr *write, XLogRecPtr *flush,
    2224             :                    bool *have_pending_txes)
    2225             : {
    2226             :     dlist_mutable_iter iter;
    2227       66192 :     XLogRecPtr  local_flush = GetFlushRecPtr();
    2228             : 
    2229       66192 :     *write = InvalidXLogRecPtr;
    2230       66192 :     *flush = InvalidXLogRecPtr;
    2231             : 
    2232       66582 :     dlist_foreach_modify(iter, &lsn_mapping)
    2233             :     {
    2234       32882 :         FlushPosition *pos =
    2235       32882 :         dlist_container(FlushPosition, node, iter.cur);
    2236             : 
    2237       32882 :         *write = pos->remote_end;
    2238             : 
    2239       32882 :         if (pos->local_end <= local_flush)
    2240             :         {
    2241         390 :             *flush = pos->remote_end;
    2242         390 :             dlist_delete(iter.cur);
    2243         390 :             pfree(pos);
    2244             :         }
    2245             :         else
    2246             :         {
    2247             :             /*
    2248             :              * Don't want to uselessly iterate over the rest of the list which
    2249             :              * could potentially be long. Instead get the last element and
    2250             :              * grab the write position from there.
    2251             :              */
    2252       32492 :             pos = dlist_tail_element(FlushPosition, node,
    2253             :                                      &lsn_mapping);
    2254       32492 :             *write = pos->remote_end;
    2255       32492 :             *have_pending_txes = true;
    2256       64984 :             return;
    2257             :         }
    2258             :     }
    2259             : 
    2260       33700 :     *have_pending_txes = !dlist_is_empty(&lsn_mapping);
    2261             : }
    2262             : 
    2263             : /*
    2264             :  * Store current remote/local lsn pair in the tracking list.
    2265             :  */
    2266             : static void
    2267         434 : store_flush_position(XLogRecPtr remote_lsn)
    2268             : {
    2269             :     FlushPosition *flushpos;
    2270             : 
    2271             :     /* Need to do this in permanent context */
    2272         434 :     MemoryContextSwitchTo(ApplyContext);
    2273             : 
    2274             :     /* Track commit lsn  */
    2275         434 :     flushpos = (FlushPosition *) palloc(sizeof(FlushPosition));
    2276         434 :     flushpos->local_end = XactLastCommitEnd;
    2277         434 :     flushpos->remote_end = remote_lsn;
    2278             : 
    2279         434 :     dlist_push_tail(&lsn_mapping, &flushpos->node);
    2280         434 :     MemoryContextSwitchTo(ApplyMessageContext);
    2281         434 : }
    2282             : 
    2283             : 
    2284             : /* Update statistics of the worker. */
    2285             : static void
    2286      257434 : UpdateWorkerStats(XLogRecPtr last_lsn, TimestampTz send_time, bool reply)
    2287             : {
    2288      257434 :     MyLogicalRepWorker->last_lsn = last_lsn;
    2289      257434 :     MyLogicalRepWorker->last_send_time = send_time;
    2290      257434 :     MyLogicalRepWorker->last_recv_time = GetCurrentTimestamp();
    2291      257434 :     if (reply)
    2292             :     {
    2293        4408 :         MyLogicalRepWorker->reply_lsn = last_lsn;
    2294        4408 :         MyLogicalRepWorker->reply_time = send_time;
    2295             :     }
    2296      257434 : }
    2297             : 
    2298             : /*
    2299             :  * Apply main loop.
    2300             :  */
    2301             : static void
    2302         198 : LogicalRepApplyLoop(XLogRecPtr last_received)
    2303             : {
    2304         198 :     TimestampTz last_recv_timestamp = GetCurrentTimestamp();
    2305         198 :     bool        ping_sent = false;
    2306             :     TimeLineID  tli;
    2307             : 
    2308             :     /*
    2309             :      * Init the ApplyMessageContext which we clean up after each replication
    2310             :      * protocol message.
    2311             :      */
    2312         198 :     ApplyMessageContext = AllocSetContextCreate(ApplyContext,
    2313             :                                                 "ApplyMessageContext",
    2314             :                                                 ALLOCSET_DEFAULT_SIZES);
    2315             : 
    2316             :     /*
    2317             :      * This memory context is used for per-stream data when the streaming mode
    2318             :      * is enabled. This context is reset on each stream stop.
    2319             :      */
    2320         198 :     LogicalStreamingContext = AllocSetContextCreate(ApplyContext,
    2321             :                                                     "LogicalStreamingContext",
    2322             :                                                     ALLOCSET_DEFAULT_SIZES);
    2323             : 
    2324             :     /* mark as idle, before starting to loop */
    2325         198 :     pgstat_report_activity(STATE_IDLE, NULL);
    2326             : 
    2327             :     /* This outer loop iterates once per wait. */
    2328             :     for (;;)
    2329             :     {
    2330       61770 :         pgsocket    fd = PGINVALID_SOCKET;
    2331             :         int         rc;
    2332             :         int         len;
    2333       61770 :         char       *buf = NULL;
    2334       61770 :         bool        endofstream = false;
    2335             :         long        wait_time;
    2336             : 
    2337       61770 :         CHECK_FOR_INTERRUPTS();
    2338             : 
    2339       61770 :         MemoryContextSwitchTo(ApplyMessageContext);
    2340             : 
    2341       61770 :         len = walrcv_receive(wrconn, &buf, &fd);
    2342             : 
    2343       61758 :         if (len != 0)
    2344             :         {
    2345             :             /* Loop to process all available data (without blocking). */
    2346             :             for (;;)
    2347             :             {
    2348      318960 :                 CHECK_FOR_INTERRUPTS();
    2349             : 
    2350      318960 :                 if (len == 0)
    2351             :                 {
    2352       61520 :                     break;
    2353             :                 }
    2354      257440 :                 else if (len < 0)
    2355             :                 {
    2356           4 :                     ereport(LOG,
    2357             :                             (errmsg("data stream from publisher has ended")));
    2358           4 :                     endofstream = true;
    2359           4 :                     break;
    2360             :                 }
    2361             :                 else
    2362             :                 {
    2363             :                     int         c;
    2364             :                     StringInfoData s;
    2365             : 
    2366             :                     /* Reset timeout. */
    2367      257436 :                     last_recv_timestamp = GetCurrentTimestamp();
    2368      257436 :                     ping_sent = false;
    2369             : 
    2370             :                     /* Ensure we are reading the data into our memory context. */
    2371      257436 :                     MemoryContextSwitchTo(ApplyMessageContext);
    2372             : 
    2373      257436 :                     s.data = buf;
    2374      257436 :                     s.len = len;
    2375      257436 :                     s.cursor = 0;
    2376      257436 :                     s.maxlen = -1;
    2377             : 
    2378      257436 :                     c = pq_getmsgbyte(&s);
    2379             : 
    2380      257436 :                     if (c == 'w')
    2381             :                     {
    2382             :                         XLogRecPtr  start_lsn;
    2383             :                         XLogRecPtr  end_lsn;
    2384             :                         TimestampTz send_time;
    2385             : 
    2386      253026 :                         start_lsn = pq_getmsgint64(&s);
    2387      253026 :                         end_lsn = pq_getmsgint64(&s);
    2388      253026 :                         send_time = pq_getmsgint64(&s);
    2389             : 
    2390      253026 :                         if (last_received < start_lsn)
    2391      235492 :                             last_received = start_lsn;
    2392             : 
    2393      253026 :                         if (last_received < end_lsn)
    2394           0 :                             last_received = end_lsn;
    2395             : 
    2396      253026 :                         UpdateWorkerStats(last_received, send_time, false);
    2397             : 
    2398      253026 :                         apply_dispatch(&s);
    2399             :                     }
    2400        4410 :                     else if (c == 'k')
    2401             :                     {
    2402             :                         XLogRecPtr  end_lsn;
    2403             :                         TimestampTz timestamp;
    2404             :                         bool        reply_requested;
    2405             : 
    2406        4410 :                         end_lsn = pq_getmsgint64(&s);
    2407        4410 :                         timestamp = pq_getmsgint64(&s);
    2408        4410 :                         reply_requested = pq_getmsgbyte(&s);
    2409             : 
    2410        4410 :                         if (last_received < end_lsn)
    2411         296 :                             last_received = end_lsn;
    2412             : 
    2413        4410 :                         send_feedback(last_received, reply_requested, false);
    2414        4408 :                         UpdateWorkerStats(last_received, timestamp, true);
    2415             :                     }
    2416             :                     /* other message types are purposefully ignored */
    2417             : 
    2418      257428 :                     MemoryContextReset(ApplyMessageContext);
    2419             :                 }
    2420             : 
    2421      257428 :                 len = walrcv_receive(wrconn, &buf, &fd);
    2422      257428 :             }
    2423             :         }
    2424             : 
    2425             :         /* confirm all writes so far */
    2426       61750 :         send_feedback(last_received, false, false);
    2427             : 
    2428       61750 :         if (!in_remote_transaction && !in_streamed_transaction)
    2429             :         {
    2430             :             /*
    2431             :              * If we didn't get any transactions for a while there might be
    2432             :              * unconsumed invalidation messages in the queue, consume them
    2433             :              * now.
    2434             :              */
    2435        5034 :             AcceptInvalidationMessages();
    2436        5034 :             maybe_reread_subscription();
    2437             : 
    2438             :             /* Process any table synchronization changes. */
    2439        5026 :             process_syncing_tables(last_received);
    2440             :         }
    2441             : 
    2442             :         /* Cleanup the memory. */
    2443       61620 :         MemoryContextResetAndDeleteChildren(ApplyMessageContext);
    2444       61620 :         MemoryContextSwitchTo(TopMemoryContext);
    2445             : 
    2446             :         /* Check if we need to exit the streaming loop. */
    2447       61620 :         if (endofstream)
    2448           4 :             break;
    2449             : 
    2450             :         /*
    2451             :          * Wait for more data or latch.  If we have unflushed transactions,
    2452             :          * wake up after WalWriterDelay to see if they've been flushed yet (in
    2453             :          * which case we should send a feedback message).  Otherwise, there's
    2454             :          * no particular urgency about waking up unless we get data or a
    2455             :          * signal.
    2456             :          */
    2457       61616 :         if (!dlist_is_empty(&lsn_mapping))
    2458       28706 :             wait_time = WalWriterDelay;
    2459             :         else
    2460       32910 :             wait_time = NAPTIME_PER_CYCLE;
    2461             : 
    2462       61616 :         rc = WaitLatchOrSocket(MyLatch,
    2463             :                                WL_SOCKET_READABLE | WL_LATCH_SET |
    2464             :                                WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
    2465             :                                fd, wait_time,
    2466             :                                WAIT_EVENT_LOGICAL_APPLY_MAIN);
    2467             : 
    2468       61616 :         if (rc & WL_LATCH_SET)
    2469             :         {
    2470         204 :             ResetLatch(MyLatch);
    2471         204 :             CHECK_FOR_INTERRUPTS();
    2472             :         }
    2473             : 
    2474       61572 :         if (ConfigReloadPending)
    2475             :         {
    2476           0 :             ConfigReloadPending = false;
    2477           0 :             ProcessConfigFile(PGC_SIGHUP);
    2478             :         }
    2479             : 
    2480       61572 :         if (rc & WL_TIMEOUT)
    2481             :         {
    2482             :             /*
    2483             :              * We didn't receive anything new. If we haven't heard anything
    2484             :              * from the server for more than wal_receiver_timeout / 2, ping
    2485             :              * the server. Also, if it's been longer than
    2486             :              * wal_receiver_status_interval since the last update we sent,
    2487             :              * send a status update to the primary anyway, to report any
    2488             :              * progress in applying WAL.
    2489             :              */
    2490          32 :             bool        requestReply = false;
    2491             : 
    2492             :             /*
    2493             :              * Check if time since last receive from standby has reached the
    2494             :              * configured limit.
    2495             :              */
    2496          32 :             if (wal_receiver_timeout > 0)
    2497             :             {
    2498          32 :                 TimestampTz now = GetCurrentTimestamp();
    2499             :                 TimestampTz timeout;
    2500             : 
    2501          32 :                 timeout =
    2502          32 :                     TimestampTzPlusMilliseconds(last_recv_timestamp,
    2503             :                                                 wal_receiver_timeout);
    2504             : 
    2505          32 :                 if (now >= timeout)
    2506           0 :                     ereport(ERROR,
    2507             :                             (errmsg("terminating logical replication worker due to timeout")));
    2508             : 
    2509             :                 /* Check to see if it's time for a ping. */
    2510          32 :                 if (!ping_sent)
    2511             :                 {
    2512          32 :                     timeout = TimestampTzPlusMilliseconds(last_recv_timestamp,
    2513             :                                                           (wal_receiver_timeout / 2));
    2514          32 :                     if (now >= timeout)
    2515             :                     {
    2516           0 :                         requestReply = true;
    2517           0 :                         ping_sent = true;
    2518             :                     }
    2519             :                 }
    2520             :             }
    2521             : 
    2522          32 :             send_feedback(last_received, requestReply, requestReply);
    2523             :         }
    2524       61572 :     }
    2525             : 
    2526             :     /* All done */
    2527           4 :     walrcv_endstreaming(wrconn, &tli);
    2528           0 : }
    2529             : 
    2530             : /*
    2531             :  * Send a Standby Status Update message to server.
    2532             :  *
    2533             :  * 'recvpos' is the latest LSN we've received data to, force is set if we need
    2534             :  * to send a response to avoid timeouts.
    2535             :  */
    2536             : static void
    2537       66192 : send_feedback(XLogRecPtr recvpos, bool force, bool requestReply)
    2538             : {
    2539             :     static StringInfo reply_message = NULL;
    2540             :     static TimestampTz send_time = 0;
    2541             : 
    2542             :     static XLogRecPtr last_recvpos = InvalidXLogRecPtr;
    2543             :     static XLogRecPtr last_writepos = InvalidXLogRecPtr;
    2544             :     static XLogRecPtr last_flushpos = InvalidXLogRecPtr;
    2545             : 
    2546             :     XLogRecPtr  writepos;
    2547             :     XLogRecPtr  flushpos;
    2548             :     TimestampTz now;
    2549             :     bool        have_pending_txes;
    2550             : 
    2551             :     /*
    2552             :      * If the user doesn't want status to be reported to the publisher, be
    2553             :      * sure to exit before doing anything at all.
    2554             :      */
    2555       66192 :     if (!force && wal_receiver_status_interval <= 0)
    2556       32770 :         return;
    2557             : 
    2558             :     /* It's legal to not pass a recvpos */
    2559       66192 :     if (recvpos < last_recvpos)
    2560           0 :         recvpos = last_recvpos;
    2561             : 
    2562       66192 :     get_flush_position(&writepos, &flushpos, &have_pending_txes);
    2563             : 
    2564             :     /*
    2565             :      * No outstanding transactions to flush, we can report the latest received
    2566             :      * position. This is important for synchronous replication.
    2567             :      */
    2568       66192 :     if (!have_pending_txes)
    2569       33700 :         flushpos = writepos = recvpos;
    2570             : 
    2571       66192 :     if (writepos < last_writepos)
    2572           0 :         writepos = last_writepos;
    2573             : 
    2574       66192 :     if (flushpos < last_flushpos)
    2575       32452 :         flushpos = last_flushpos;
    2576             : 
    2577       66192 :     now = GetCurrentTimestamp();
    2578             : 
    2579             :     /* if we've already reported everything we're good */
    2580      129040 :     if (!force &&
    2581       95642 :         writepos == last_writepos &&
    2582       65566 :         flushpos == last_flushpos &&
    2583       32772 :         !TimestampDifferenceExceeds(send_time, now,
    2584             :                                     wal_receiver_status_interval * 1000))
    2585       32770 :         return;
    2586       33422 :     send_time = now;
    2587             : 
    2588       33422 :     if (!reply_message)
    2589             :     {
    2590         198 :         MemoryContext oldctx = MemoryContextSwitchTo(ApplyContext);
    2591             : 
    2592         198 :         reply_message = makeStringInfo();
    2593         198 :         MemoryContextSwitchTo(oldctx);
    2594             :     }
    2595             :     else
    2596       33224 :         resetStringInfo(reply_message);
    2597             : 
    2598       33422 :     pq_sendbyte(reply_message, 'r');
    2599       33422 :     pq_sendint64(reply_message, recvpos);   /* write */
    2600       33422 :     pq_sendint64(reply_message, flushpos);  /* flush */
    2601       33422 :     pq_sendint64(reply_message, writepos);  /* apply */
    2602       33422 :     pq_sendint64(reply_message, now);   /* sendTime */
    2603       33422 :     pq_sendbyte(reply_message, requestReply);   /* replyRequested */
    2604             : 
    2605       33422 :     elog(DEBUG2, "sending feedback (force %d) to recv %X/%X, write %X/%X, flush %X/%X",
    2606             :          force,
    2607             :          (uint32) (recvpos >> 32), (uint32) recvpos,
    2608             :          (uint32) (writepos >> 32), (uint32) writepos,
    2609             :          (uint32) (flushpos >> 32), (uint32) flushpos
    2610             :         );
    2611             : 
    2612       33422 :     walrcv_send(wrconn, reply_message->data, reply_message->len);
    2613             : 
    2614       33420 :     if (recvpos > last_recvpos)
    2615       30058 :         last_recvpos = recvpos;
    2616       33420 :     if (writepos > last_writepos)
    2617       30054 :         last_writepos = writepos;
    2618       33420 :     if (flushpos > last_flushpos)
    2619       29748 :         last_flushpos = flushpos;
    2620             : }
    2621             : 
    2622             : /*
    2623             :  * Reread subscription info if needed. Most changes will be exit.
    2624             :  */
    2625             : static void
    2626        6230 : maybe_reread_subscription(void)
    2627             : {
    2628             :     MemoryContext oldctx;
    2629             :     Subscription *newsub;
    2630        6230 :     bool        started_tx = false;
    2631             : 
    2632             :     /* When cache state is valid there is nothing to do here. */
    2633        6230 :     if (MySubscriptionValid)
    2634       12424 :         return;
    2635             : 
    2636             :     /* This function might be called inside or outside of transaction. */
    2637          24 :     if (!IsTransactionState())
    2638             :     {
    2639          18 :         StartTransactionCommand();
    2640          18 :         started_tx = true;
    2641             :     }
    2642             : 
    2643             :     /* Ensure allocations in permanent context. */
    2644          24 :     oldctx = MemoryContextSwitchTo(ApplyContext);
    2645             : 
    2646          24 :     newsub = GetSubscription(MyLogicalRepWorker->subid, true);
    2647             : 
    2648             :     /*
    2649             :      * Exit if the subscription was removed. This normally should not happen
    2650             :      * as the worker gets killed during DROP SUBSCRIPTION.
    2651             :      */
    2652          24 :     if (!newsub)
    2653             :     {
    2654           0 :         ereport(LOG,
    2655             :                 (errmsg("logical replication apply worker for subscription \"%s\" will "
    2656             :                         "stop because the subscription was removed",
    2657             :                         MySubscription->name)));
    2658             : 
    2659           0 :         proc_exit(0);
    2660             :     }
    2661             : 
    2662             :     /*
    2663             :      * Exit if the subscription was disabled. This normally should not happen
    2664             :      * as the worker gets killed during ALTER SUBSCRIPTION ... DISABLE.
    2665             :      */
    2666          24 :     if (!newsub->enabled)
    2667             :     {
    2668           0 :         ereport(LOG,
    2669             :                 (errmsg("logical replication apply worker for subscription \"%s\" will "
    2670             :                         "stop because the subscription was disabled",
    2671             :                         MySubscription->name)));
    2672             : 
    2673           0 :         proc_exit(0);
    2674             :     }
    2675             : 
    2676             :     /* !slotname should never happen when enabled is true. */
    2677          24 :     Assert(newsub->slotname);
    2678             : 
    2679             :     /*
    2680             :      * Exit if any parameter that affects the remote connection was changed.
    2681             :      * The launcher will start a new worker.
    2682             :      */
    2683          46 :     if (strcmp(newsub->conninfo, MySubscription->conninfo) != 0 ||
    2684          42 :         strcmp(newsub->name, MySubscription->name) != 0 ||
    2685          40 :         strcmp(newsub->slotname, MySubscription->slotname) != 0 ||
    2686          34 :         newsub->binary != MySubscription->binary ||
    2687          28 :         newsub->stream != MySubscription->stream ||
    2688          14 :         !equal(newsub->publications, MySubscription->publications))
    2689             :     {
    2690          12 :         ereport(LOG,
    2691             :                 (errmsg("logical replication apply worker for subscription \"%s\" will restart because of a parameter change",
    2692             :                         MySubscription->name)));
    2693             : 
    2694          12 :         proc_exit(0);
    2695             :     }
    2696             : 
    2697             :     /* Check for other changes that should never happen too. */
    2698          12 :     if (newsub->dbid != MySubscription->dbid)
    2699             :     {
    2700           0 :         elog(ERROR, "subscription %u changed unexpectedly",
    2701             :              MyLogicalRepWorker->subid);
    2702             :     }
    2703             : 
    2704             :     /* Clean old subscription info and switch to new one. */
    2705          12 :     FreeSubscription(MySubscription);
    2706          12 :     MySubscription = newsub;
    2707             : 
    2708          12 :     MemoryContextSwitchTo(oldctx);
    2709             : 
    2710             :     /* Change synchronous commit according to the user's wishes */
    2711          12 :     SetConfigOption("synchronous_commit", MySubscription->synccommit,
    2712             :                     PGC_BACKEND, PGC_S_OVERRIDE);
    2713             : 
    2714          12 :     if (started_tx)
    2715          10 :         CommitTransactionCommand();
    2716             : 
    2717          12 :     MySubscriptionValid = true;
    2718             : }
    2719             : 
    2720             : /*
    2721             :  * Callback from subscription syscache invalidation.
    2722             :  */
    2723             : static void
    2724          24 : subscription_change_cb(Datum arg, int cacheid, uint32 hashvalue)
    2725             : {
    2726          24 :     MySubscriptionValid = false;
    2727          24 : }
    2728             : 
    2729             : /*
    2730             :  * subxact_info_write
    2731             :  *    Store information about subxacts for a toplevel transaction.
    2732             :  *
    2733             :  * For each subxact we store offset of it's first change in the main file.
    2734             :  * The file is always over-written as a whole.
    2735             :  *
    2736             :  * XXX We should only store subxacts that were not aborted yet.
    2737             :  */
    2738             : static void
    2739         630 : subxact_info_write(Oid subid, TransactionId xid)
    2740             : {
    2741             :     char        path[MAXPGPATH];
    2742             :     bool        found;
    2743             :     Size        len;
    2744             :     StreamXidHash *ent;
    2745             :     BufFile    *fd;
    2746             : 
    2747         630 :     Assert(TransactionIdIsValid(xid));
    2748             : 
    2749             :     /* find the xid entry in the xidhash */
    2750         630 :     ent = (StreamXidHash *) hash_search(xidhash,
    2751             :                                         (void *) &xid,
    2752             :                                         HASH_FIND,
    2753             :                                         &found);
    2754             :     /* we must found the entry for its top transaction by this time */
    2755         630 :     Assert(found);
    2756             : 
    2757             :     /*
    2758             :      * If there is no subtransaction then nothing to do, but if already have
    2759             :      * subxact file then delete that.
    2760             :      */
    2761         630 :     if (subxact_data.nsubxacts == 0)
    2762             :     {
    2763         548 :         if (ent->subxact_fileset)
    2764             :         {
    2765           6 :             cleanup_subxact_info();
    2766           6 :             SharedFileSetDeleteAll(ent->subxact_fileset);
    2767           6 :             pfree(ent->subxact_fileset);
    2768           6 :             ent->subxact_fileset = NULL;
    2769             :         }
    2770        1178 :         return;
    2771             :     }
    2772             : 
    2773          82 :     subxact_filename(path, subid, xid);
    2774             : 
    2775             :     /*
    2776             :      * Create the subxact file if it not already created, otherwise open the
    2777             :      * existing file.
    2778             :      */
    2779          82 :     if (ent->subxact_fileset == NULL)
    2780             :     {
    2781             :         MemoryContext oldctx;
    2782             : 
    2783             :         /*
    2784             :          * We need to maintain shared fileset across multiple stream
    2785             :          * start/stop calls.  So, need to allocate it in a persistent context.
    2786             :          */
    2787          14 :         oldctx = MemoryContextSwitchTo(ApplyContext);
    2788          14 :         ent->subxact_fileset = palloc(sizeof(SharedFileSet));
    2789          14 :         SharedFileSetInit(ent->subxact_fileset, NULL);
    2790          14 :         MemoryContextSwitchTo(oldctx);
    2791             : 
    2792          14 :         fd = BufFileCreateShared(ent->subxact_fileset, path);
    2793             :     }
    2794             :     else
    2795          68 :         fd = BufFileOpenShared(ent->subxact_fileset, path, O_RDWR);
    2796             : 
    2797          82 :     len = sizeof(SubXactInfo) * subxact_data.nsubxacts;
    2798             : 
    2799             :     /* Write the subxact count and subxact info */
    2800          82 :     BufFileWrite(fd, &subxact_data.nsubxacts, sizeof(subxact_data.nsubxacts));
    2801          82 :     BufFileWrite(fd, subxact_data.subxacts, len);
    2802             : 
    2803          82 :     BufFileClose(fd);
    2804             : 
    2805             :     /* free the memory allocated for subxact info */
    2806          82 :     cleanup_subxact_info();
    2807             : }
    2808             : 
    2809             : /*
    2810             :  * subxact_info_read
    2811             :  *    Restore information about subxacts of a streamed transaction.
    2812             :  *
    2813             :  * Read information about subxacts into the structure subxact_data that can be
    2814             :  * used later.
    2815             :  */
    2816             : static void
    2817         594 : subxact_info_read(Oid subid, TransactionId xid)
    2818             : {
    2819             :     char        path[MAXPGPATH];
    2820             :     bool        found;
    2821             :     Size        len;
    2822             :     BufFile    *fd;
    2823             :     StreamXidHash *ent;
    2824             :     MemoryContext oldctx;
    2825             : 
    2826         594 :     Assert(TransactionIdIsValid(xid));
    2827         594 :     Assert(!subxact_data.subxacts);
    2828         594 :     Assert(subxact_data.nsubxacts == 0);
    2829         594 :     Assert(subxact_data.nsubxacts_max == 0);
    2830             : 
    2831             :     /* Find the stream xid entry in the xidhash */
    2832         594 :     ent = (StreamXidHash *) hash_search(xidhash,
    2833             :                                         (void *) &xid,
    2834             :                                         HASH_FIND,
    2835             :                                         &found);
    2836             : 
    2837             :     /*
    2838             :      * If subxact_fileset is not valid that mean we don't have any subxact
    2839             :      * info
    2840             :      */
    2841         594 :     if (ent->subxact_fileset == NULL)
    2842        1112 :         return;
    2843             : 
    2844          76 :     subxact_filename(path, subid, xid);
    2845             : 
    2846          76 :     fd = BufFileOpenShared(ent->subxact_fileset, path, O_RDONLY);
    2847             : 
    2848             :     /* read number of subxact items */
    2849          76 :     if (BufFileRead(fd, &subxact_data.nsubxacts,
    2850             :                     sizeof(subxact_data.nsubxacts)) !=
    2851             :         sizeof(subxact_data.nsubxacts))
    2852           0 :         ereport(ERROR,
    2853             :                 (errcode_for_file_access(),
    2854             :                  errmsg("could not read from streaming transaction's subxact file \"%s\": %m",
    2855             :                         path)));
    2856             : 
    2857          76 :     len = sizeof(SubXactInfo) * subxact_data.nsubxacts;
    2858             : 
    2859             :     /* we keep the maximum as a power of 2 */
    2860          76 :     subxact_data.nsubxacts_max = 1 << my_log2(subxact_data.nsubxacts);
    2861             : 
    2862             :     /*
    2863             :      * Allocate subxact information in the logical streaming context. We need
    2864             :      * this information during the complete stream so that we can add the sub
    2865             :      * transaction info to this. On stream stop we will flush this information
    2866             :      * to the subxact file and reset the logical streaming context.
    2867             :      */
    2868          76 :     oldctx = MemoryContextSwitchTo(LogicalStreamingContext);
    2869          76 :     subxact_data.subxacts = palloc(subxact_data.nsubxacts_max *
    2870             :                                    sizeof(SubXactInfo));
    2871          76 :     MemoryContextSwitchTo(oldctx);
    2872             : 
    2873          76 :     if ((len > 0) && ((BufFileRead(fd, subxact_data.subxacts, len)) != len))
    2874           0 :         ereport(ERROR,
    2875             :                 (errcode_for_file_access(),
    2876             :                  errmsg("could not read from streaming transaction's subxact file \"%s\": %m",
    2877             :                         path)));
    2878             : 
    2879          76 :     BufFileClose(fd);
    2880             : }
    2881             : 
    2882             : /*
    2883             :  * subxact_info_add
    2884             :  *    Add information about a subxact (offset in the main file).
    2885             :  */
    2886             : static void
    2887      248838 : subxact_info_add(TransactionId xid)
    2888             : {
    2889      248838 :     SubXactInfo *subxacts = subxact_data.subxacts;
    2890             :     int64       i;
    2891             : 
    2892             :     /* We must have a valid top level stream xid and a stream fd. */
    2893      248838 :     Assert(TransactionIdIsValid(stream_xid));
    2894      248838 :     Assert(stream_fd != NULL);
    2895             : 
    2896             :     /*
    2897             :      * If the XID matches the toplevel transaction, we don't want to add it.
    2898             :      */
    2899      248838 :     if (stream_xid == xid)
    2900      223208 :         return;
    2901             : 
    2902             :     /*
    2903             :      * In most cases we're checking the same subxact as we've already seen in
    2904             :      * the last call, so make sure to ignore it (this change comes later).
    2905             :      */
    2906       25630 :     if (subxact_data.subxact_last == xid)
    2907       25536 :         return;
    2908             : 
    2909             :     /* OK, remember we're processing this XID. */
    2910          94 :     subxact_data.subxact_last = xid;
    2911             : 
    2912             :     /*
    2913             :      * Check if the transaction is already present in the array of subxact. We
    2914             :      * intentionally scan the array from the tail, because we're likely adding
    2915             :      * a change for the most recent subtransactions.
    2916             :      *
    2917             :      * XXX Can we rely on the subxact XIDs arriving in sorted order? That
    2918             :      * would allow us to use binary search here.
    2919             :      */
    2920         132 :     for (i = subxact_data.nsubxacts; i > 0; i--)
    2921             :     {
    2922             :         /* found, so we're done */
    2923          96 :         if (subxacts[i - 1].xid == xid)
    2924          58 :             return;
    2925             :     }
    2926             : 
    2927             :     /* This is a new subxact, so we need to add it to the array. */
    2928          36 :     if (subxact_data.nsubxacts == 0)
    2929             :     {
    2930             :         MemoryContext oldctx;
    2931             : 
    2932          14 :         subxact_data.nsubxacts_max = 128;
    2933             : 
    2934             :         /*
    2935             :          * Allocate this memory for subxacts in per-stream context, see
    2936             :          * subxact_info_read.
    2937             :          */
    2938          14 :         oldctx = MemoryContextSwitchTo(LogicalStreamingContext);
    2939          14 :         subxacts = palloc(subxact_data.nsubxacts_max * sizeof(SubXactInfo));
    2940          14 :         MemoryContextSwitchTo(oldctx);
    2941             :     }
    2942          22 :     else if (subxact_data.nsubxacts == subxact_data.nsubxacts_max)
    2943             :     {
    2944          20 :         subxact_data.nsubxacts_max *= 2;
    2945          20 :         subxacts = repalloc(subxacts,
    2946          20 :                             subxact_data.nsubxacts_max * sizeof(SubXactInfo));
    2947             :     }
    2948             : 
    2949          36 :     subxacts[subxact_data.nsubxacts].xid = xid;
    2950             : 
    2951             :     /*
    2952             :      * Get the current offset of the stream file and store it as offset of
    2953             :      * this subxact.
    2954             :      */
    2955          72 :     BufFileTell(stream_fd,
    2956          36 :                 &subxacts[subxact_data.nsubxacts].fileno,
    2957          36 :                 &subxacts[subxact_data.nsubxacts].offset);
    2958             : 
    2959          36 :     subxact_data.nsubxacts++;
    2960          36 :     subxact_data.subxacts = subxacts;
    2961             : }
    2962             : 
    2963             : /* format filename for file containing the info about subxacts */
    2964             : static inline void
    2965         166 : subxact_filename(char *path, Oid subid, TransactionId xid)
    2966             : {
    2967         166 :     snprintf(path, MAXPGPATH, "%u-%u.subxacts", subid, xid);
    2968         166 : }
    2969             : 
    2970             : /* format filename for file containing serialized changes */
    2971             : static inline void
    2972         712 : changes_filename(char *path, Oid subid, TransactionId xid)
    2973             : {
    2974         712 :     snprintf(path, MAXPGPATH, "%u-%u.changes", subid, xid);
    2975         712 : }
    2976             : 
    2977             : /*
    2978             :  * stream_cleanup_files
    2979             :  *    Cleanup files for a subscription / toplevel transaction.
    2980             :  *
    2981             :  * Remove files with serialized changes and subxact info for a particular
    2982             :  * toplevel transaction. Each subscription has a separate set of files.
    2983             :  */
    2984             : static void
    2985          42 : stream_cleanup_files(Oid subid, TransactionId xid)
    2986             : {
    2987             :     char        path[MAXPGPATH];
    2988             :     StreamXidHash *ent;
    2989             : 
    2990             :     /* Remove the xid entry from the stream xid hash */
    2991          42 :     ent = (StreamXidHash *) hash_search(xidhash,
    2992             :                                         (void *) &xid,
    2993             :                                         HASH_REMOVE,
    2994             :                                         NULL);
    2995             :     /* By this time we must have created the transaction entry */
    2996          42 :     Assert(ent != NULL);
    2997             : 
    2998             :     /* Delete the change file and release the stream fileset memory */
    2999          42 :     changes_filename(path, subid, xid);
    3000          42 :     SharedFileSetDeleteAll(ent->stream_fileset);
    3001          42 :     pfree(ent->stream_fileset);
    3002          42 :     ent->stream_fileset = NULL;
    3003             : 
    3004             :     /* Delete the subxact file and release the memory, if it exist */
    3005          42 :     if (ent->subxact_fileset)
    3006             :     {
    3007           8 :         subxact_filename(path, subid, xid);
    3008           8 :         SharedFileSetDeleteAll(ent->subxact_fileset);
    3009           8 :         pfree(ent->subxact_fileset);
    3010           8 :         ent->subxact_fileset = NULL;
    3011             :     }
    3012          42 : }
    3013             : 
    3014             : /*
    3015             :  * stream_open_file
    3016             :  *    Open a file that we'll use to serialize changes for a toplevel
    3017             :  * transaction.
    3018             :  *
    3019             :  * Open a file for streamed changes from a toplevel transaction identified
    3020             :  * by stream_xid (global variable). If it's the first chunk of streamed
    3021             :  * changes for this transaction, initialize the shared fileset and create the
    3022             :  * buffile, otherwise open the previously created file.
    3023             :  *
    3024             :  * This can only be called at the beginning of a "streaming" block, i.e.
    3025             :  * between stream_start/stream_stop messages from the upstream.
    3026             :  */
    3027             : static void
    3028         614 : stream_open_file(Oid subid, TransactionId xid, bool first_segment)
    3029             : {
    3030             :     char        path[MAXPGPATH];
    3031             :     bool        found;
    3032             :     MemoryContext oldcxt;
    3033             :     StreamXidHash *ent;
    3034             : 
    3035         614 :     Assert(in_streamed_transaction);
    3036         614 :     Assert(OidIsValid(subid));
    3037         614 :     Assert(TransactionIdIsValid(xid));
    3038         614 :     Assert(stream_fd == NULL);
    3039             : 
    3040             :     /* create or find the xid entry in the xidhash */
    3041         614 :     ent = (StreamXidHash *) hash_search(xidhash,
    3042             :                                         (void *) &xid,
    3043             :                                         HASH_ENTER | HASH_FIND,
    3044             :                                         &found);
    3045         614 :     Assert(first_segment || found);
    3046         614 :     changes_filename(path, subid, xid);
    3047         614 :     elog(DEBUG1, "opening file \"%s\" for streamed changes", path);
    3048             : 
    3049             :     /*
    3050             :      * Create/open the buffiles under the logical streaming context so that we
    3051             :      * have those files until stream stop.
    3052             :      */
    3053         614 :     oldcxt = MemoryContextSwitchTo(LogicalStreamingContext);
    3054             : 
    3055             :     /*
    3056             :      * If this is the first streamed segment, the file must not exist, so make
    3057             :      * sure we're the ones creating it. Otherwise just open the file for
    3058             :      * writing, in append mode.
    3059             :      */
    3060         614 :     if (first_segment)
    3061             :     {
    3062             :         MemoryContext savectx;
    3063             :         SharedFileSet *fileset;
    3064             : 
    3065             :         /*
    3066             :          * We need to maintain shared fileset across multiple stream
    3067             :          * start/stop calls. So, need to allocate it in a persistent context.
    3068             :          */
    3069          44 :         savectx = MemoryContextSwitchTo(ApplyContext);
    3070          44 :         fileset = palloc(sizeof(SharedFileSet));
    3071             : 
    3072          44 :         SharedFileSetInit(fileset, NULL);
    3073          44 :         MemoryContextSwitchTo(savectx);
    3074             : 
    3075          44 :         stream_fd = BufFileCreateShared(fileset, path);
    3076             : 
    3077             :         /* Remember the fileset for the next stream of the same transaction */
    3078          44 :         ent->xid = xid;
    3079          44 :         ent->stream_fileset = fileset;
    3080          44 :         ent->subxact_fileset = NULL;
    3081             :     }
    3082             :     else
    3083             :     {
    3084             :         /*
    3085             :          * Open the file and seek to the end of the file because we always
    3086             :          * append the changes file.
    3087             :          */
    3088         570 :         stream_fd = BufFileOpenShared(ent->stream_fileset, path, O_RDWR);
    3089         570 :         BufFileSeek(stream_fd, 0, 0, SEEK_END);
    3090             :     }
    3091             : 
    3092         614 :     MemoryContextSwitchTo(oldcxt);
    3093         614 : }
    3094             : 
    3095             : /*
    3096             :  * stream_close_file
    3097             :  *    Close the currently open file with streamed changes.
    3098             :  *
    3099             :  * This can only be called at the end of a streaming block, i.e. at stream_stop
    3100             :  * message from the upstream.
    3101             :  */
    3102             : static void
    3103         614 : stream_close_file(void)
    3104             : {
    3105         614 :     Assert(in_streamed_transaction);
    3106         614 :     Assert(TransactionIdIsValid(stream_xid));
    3107         614 :     Assert(stream_fd != NULL);
    3108             : 
    3109         614 :     BufFileClose(stream_fd);
    3110             : 
    3111         614 :     stream_xid = InvalidTransactionId;
    3112         614 :     stream_fd = NULL;
    3113         614 : }
    3114             : 
    3115             : /*
    3116             :  * stream_write_change
    3117             :  *    Serialize a change to a file for the current toplevel transaction.
    3118             :  *
    3119             :  * The change is serialized in a simple format, with length (not including
    3120             :  * the length), action code (identifying the message type) and message
    3121             :  * contents (without the subxact TransactionId value).
    3122             :  */
    3123             : static void
    3124      248838 : stream_write_change(char action, StringInfo s)
    3125             : {
    3126             :     int         len;
    3127             : 
    3128      248838 :     Assert(in_streamed_transaction);
    3129      248838 :     Assert(TransactionIdIsValid(stream_xid));
    3130      248838 :     Assert(stream_fd != NULL);
    3131             : 
    3132             :     /* total on-disk size, including the action type character */
    3133      248838 :     len = (s->len - s->cursor) + sizeof(char);
    3134             : 
    3135             :     /* first write the size */
    3136      248838 :     BufFileWrite(stream_fd, &len, sizeof(len));
    3137             : 
    3138             :     /* then the action */
    3139      248838 :     BufFileWrite(stream_fd, &action, sizeof(action));
    3140             : 
    3141             :     /* and finally the remaining part of the buffer (after the XID) */
    3142      248838 :     len = (s->len - s->cursor);
    3143             : 
    3144      248838 :     BufFileWrite(stream_fd, &s->data[s->cursor], len);
    3145      248838 : }
    3146             : 
    3147             : /*
    3148             :  * Cleanup the memory for subxacts and reset the related variables.
    3149             :  */
    3150             : static inline void
    3151          96 : cleanup_subxact_info()
    3152             : {
    3153          96 :     if (subxact_data.subxacts)
    3154          90 :         pfree(subxact_data.subxacts);
    3155             : 
    3156          96 :     subxact_data.subxacts = NULL;
    3157          96 :     subxact_data.subxact_last = InvalidTransactionId;
    3158          96 :     subxact_data.nsubxacts = 0;
    3159          96 :     subxact_data.nsubxacts_max = 0;
    3160          96 : }
    3161             : 
    3162             : /* Logical Replication Apply worker entry point */
    3163             : void
    3164         210 : ApplyWorkerMain(Datum main_arg)
    3165             : {
    3166         210 :     int         worker_slot = DatumGetInt32(main_arg);
    3167             :     MemoryContext oldctx;
    3168             :     char        originname[NAMEDATALEN];
    3169             :     XLogRecPtr  origin_startpos;
    3170             :     char       *myslotname;
    3171             :     WalRcvStreamOptions options;
    3172             : 
    3173             :     /* Attach to slot */
    3174         210 :     logicalrep_worker_attach(worker_slot);
    3175             : 
    3176             :     /* Setup signal handling */
    3177         210 :     pqsignal(SIGHUP, SignalHandlerForConfigReload);
    3178         210 :     pqsignal(SIGTERM, die);
    3179         210 :     BackgroundWorkerUnblockSignals();
    3180             : 
    3181             :     /*
    3182             :      * We don't currently need any ResourceOwner in a walreceiver process, but
    3183             :      * if we did, we could call CreateAuxProcessResourceOwner here.
    3184             :      */
    3185             : 
    3186             :     /* Initialise stats to a sanish value */
    3187         420 :     MyLogicalRepWorker->last_send_time = MyLogicalRepWorker->last_recv_time =
    3188         210 :         MyLogicalRepWorker->reply_time = GetCurrentTimestamp();
    3189             : 
    3190             :     /* Load the libpq-specific functions */
    3191         210 :     load_file("libpqwalreceiver", false);
    3192             : 
    3193             :     /* Run as replica session replication role. */
    3194         210 :     SetConfigOption("session_replication_role", "replica",
    3195             :                     PGC_SUSET, PGC_S_OVERRIDE);
    3196             : 
    3197             :     /* Connect to our database. */
    3198         210 :     BackgroundWorkerInitializeConnectionByOid(MyLogicalRepWorker->dbid,
    3199         210 :                                               MyLogicalRepWorker->userid,
    3200             :                                               0);
    3201             : 
    3202             :     /*
    3203             :      * Set always-secure search path, so malicious users can't redirect user
    3204             :      * code (e.g. pg_index.indexprs).
    3205             :      */
    3206         210 :     SetConfigOption("search_path", "", PGC_SUSET, PGC_S_OVERRIDE);
    3207             : 
    3208             :     /* Load the subscription into persistent memory context. */
    3209         210 :     ApplyContext = AllocSetContextCreate(TopMemoryContext,
    3210             :                                          "ApplyContext",
    3211             :                                          ALLOCSET_DEFAULT_SIZES);
    3212         210 :     StartTransactionCommand();
    3213         210 :     oldctx = MemoryContextSwitchTo(ApplyContext);
    3214             : 
    3215         210 :     MySubscription = GetSubscription(MyLogicalRepWorker->subid, true);
    3216         210 :     if (!MySubscription)
    3217             :     {
    3218           0 :         ereport(LOG,
    3219             :                 (errmsg("logical replication apply worker for subscription %u will not "
    3220             :                         "start because the subscription was removed during startup",
    3221             :                         MyLogicalRepWorker->subid)));
    3222           0 :         proc_exit(0);
    3223             :     }
    3224             : 
    3225         210 :     MySubscriptionValid = true;
    3226         210 :     MemoryContextSwitchTo(oldctx);
    3227             : 
    3228         210 :     if (!MySubscription->enabled)
    3229             :     {
    3230           0 :         ereport(LOG,
    3231             :                 (errmsg("logical replication apply worker for subscription \"%s\" will not "
    3232             :                         "start because the subscription was disabled during startup",
    3233             :                         MySubscription->name)));
    3234             : 
    3235           0 :         proc_exit(0);
    3236             :     }
    3237             : 
    3238             :     /* Setup synchronous commit according to the user's wishes */
    3239         210 :     SetConfigOption("synchronous_commit", MySubscription->synccommit,
    3240             :                     PGC_BACKEND, PGC_S_OVERRIDE);
    3241             : 
    3242             :     /* Keep us informed about subscription changes. */
    3243         210 :     CacheRegisterSyscacheCallback(SUBSCRIPTIONOID,
    3244             :                                   subscription_change_cb,
    3245             :                                   (Datum) 0);
    3246             : 
    3247         210 :     if (am_tablesync_worker())
    3248         126 :         ereport(LOG,
    3249             :                 (errmsg("logical replication table synchronization worker for subscription \"%s\", table \"%s\" has started",
    3250             :                         MySubscription->name, get_rel_name(MyLogicalRepWorker->relid))));
    3251             :     else
    3252          84 :         ereport(LOG,
    3253             :                 (errmsg("logical replication apply worker for subscription \"%s\" has started",
    3254             :                         MySubscription->name)));
    3255             : 
    3256         210 :     CommitTransactionCommand();
    3257             : 
    3258             :     /* Connect to the origin and start the replication. */
    3259         210 :     elog(DEBUG1, "connecting to publisher using connection string \"%s\"",
    3260             :          MySubscription->conninfo);
    3261             : 
    3262         210 :     if (am_tablesync_worker())
    3263             :     {
    3264             :         char       *syncslotname;
    3265             : 
    3266             :         /* This is table synchronization worker, call initial sync. */
    3267         126 :         syncslotname = LogicalRepSyncTableStart(&origin_startpos);
    3268             : 
    3269             :         /* allocate slot name in long-lived context */
    3270         122 :         myslotname = MemoryContextStrdup(ApplyContext, syncslotname);
    3271             : 
    3272         122 :         pfree(syncslotname);
    3273             :     }
    3274             :     else
    3275             :     {
    3276             :         /* This is main apply worker */
    3277             :         RepOriginId originid;
    3278             :         TimeLineID  startpointTLI;
    3279             :         char       *err;
    3280             : 
    3281          84 :         myslotname = MySubscription->slotname;
    3282             : 
    3283             :         /*
    3284             :          * This shouldn't happen if the subscription is enabled, but guard
    3285             :          * against DDL bugs or manual catalog changes.  (libpqwalreceiver will
    3286             :          * crash if slot is NULL.)
    3287             :          */
    3288          84 :         if (!myslotname)
    3289           0 :             ereport(ERROR,
    3290             :                     (errmsg("subscription has no replication slot set")));
    3291             : 
    3292             :         /* Setup replication origin tracking. */
    3293          84 :         StartTransactionCommand();
    3294          84 :         snprintf(originname, sizeof(originname), "pg_%u", MySubscription->oid);
    3295          84 :         originid = replorigin_by_name(originname, true);
    3296          84 :         if (!OidIsValid(originid))
    3297           0 :             originid = replorigin_create(originname);
    3298          84 :         replorigin_session_setup(originid);
    3299          84 :         replorigin_session_origin = originid;
    3300          84 :         origin_startpos = replorigin_session_get_progress(false);
    3301          84 :         CommitTransactionCommand();
    3302             : 
    3303          84 :         wrconn = walrcv_connect(MySubscription->conninfo, true, MySubscription->name,
    3304             :                                 &err);
    3305          84 :         if (wrconn == NULL)
    3306           8 :             ereport(ERROR,
    3307             :                     (errmsg("could not connect to the publisher: %s", err)));
    3308             : 
    3309             :         /*
    3310             :          * We don't really use the output identify_system for anything but it
    3311             :          * does some initializations on the upstream so let's still call it.
    3312             :          */
    3313          76 :         (void) walrcv_identify_system(wrconn, &startpointTLI);
    3314             :     }
    3315             : 
    3316             :     /*
    3317             :      * Setup callback for syscache so that we know when something changes in
    3318             :      * the subscription relation state.
    3319             :      */
    3320         198 :     CacheRegisterSyscacheCallback(SUBSCRIPTIONRELMAP,
    3321             :                                   invalidate_syncing_table_states,
    3322             :                                   (Datum) 0);
    3323             : 
    3324             :     /* Build logical replication streaming options. */
    3325         198 :     options.logical = true;
    3326         198 :     options.startpoint = origin_startpos;
    3327         198 :     options.slotname = myslotname;
    3328         198 :     options.proto.logical.proto_version =
    3329         198 :         walrcv_server_version(wrconn) >= 140000 ?
    3330             :         LOGICALREP_PROTO_STREAM_VERSION_NUM : LOGICALREP_PROTO_VERSION_NUM;
    3331         198 :     options.proto.logical.publication_names = MySubscription->publications;
    3332         198 :     options.proto.logical.binary = MySubscription->binary;
    3333         198 :     options.proto.logical.streaming = MySubscription->stream;
    3334             : 
    3335             :     /* Start normal logical streaming replication. */
    3336         198 :     walrcv_startstreaming(wrconn, &options);
    3337             : 
    3338             :     /* Run the main loop. */
    3339         198 :     LogicalRepApplyLoop(origin_startpos);
    3340             : 
    3341           0 :     proc_exit(0);
    3342             : }
    3343             : 
    3344             : /*
    3345             :  * Is current process a logical replication worker?
    3346             :  */
    3347             : bool
    3348         126 : IsLogicalWorker(void)
    3349             : {
    3350         126 :     return MyLogicalRepWorker != NULL;
    3351             : }

Generated by: LCOV version 1.14