Line data Source code
1 : /*-------------------------------------------------------------------------
2 : * worker.c
3 : * PostgreSQL logical replication worker (apply)
4 : *
5 : * Copyright (c) 2016-2020, PostgreSQL Global Development Group
6 : *
7 : * IDENTIFICATION
8 : * src/backend/replication/logical/worker.c
9 : *
10 : * NOTES
11 : * This file contains the worker which applies logical changes as they come
12 : * from remote logical replication stream.
13 : *
14 : * The main worker (apply) is started by logical replication worker
15 : * launcher for every enabled subscription in a database. It uses
16 : * walsender protocol to communicate with publisher.
17 : *
18 : * This module includes server facing code and shares libpqwalreceiver
19 : * module with walreceiver for providing the libpq specific functionality.
20 : *
21 : *
22 : * STREAMED TRANSACTIONS
23 : * ---------------------
24 : * Streamed transactions (large transactions exceeding a memory limit on the
25 : * upstream) are not applied immediately, but instead, the data is written
26 : * to temporary files and then applied at once when the final commit arrives.
27 : *
28 : * Unlike the regular (non-streamed) case, handling streamed transactions has
29 : * to handle aborts of both the toplevel transaction and subtransactions. This
30 : * is achieved by tracking offsets for subtransactions, which is then used
31 : * to truncate the file with serialized changes.
32 : *
33 : * The files are placed in tmp file directory by default, and the filenames
34 : * include both the XID of the toplevel transaction and OID of the
35 : * subscription. This is necessary so that different workers processing a
36 : * remote transaction with the same XID doesn't interfere.
37 : *
38 : * We use BufFiles instead of using normal temporary files because (a) the
39 : * BufFile infrastructure supports temporary files that exceed the OS file size
40 : * limit, (b) provides a way for automatic clean up on the error and (c) provides
41 : * a way to survive these files across local transactions and allow to open and
42 : * close at stream start and close. We decided to use SharedFileSet
43 : * infrastructure as without that it deletes the files on the closure of the
44 : * file and if we decide to keep stream files open across the start/stop stream
45 : * then it will consume a lot of memory (more than 8K for each BufFile and
46 : * there could be multiple such BufFiles as the subscriber could receive
47 : * multiple start/stop streams for different transactions before getting the
48 : * commit). Moreover, if we don't use SharedFileSet then we also need to invent
49 : * a new way to pass filenames to BufFile APIs so that we are allowed to open
50 : * the file we desired across multiple stream-open calls for the same
51 : * transaction.
52 : *-------------------------------------------------------------------------
53 : */
54 :
55 : #include "postgres.h"
56 :
57 : #include <sys/stat.h>
58 : #include <unistd.h>
59 :
60 : #include "access/table.h"
61 : #include "access/tableam.h"
62 : #include "access/xact.h"
63 : #include "access/xlog_internal.h"
64 : #include "catalog/catalog.h"
65 : #include "catalog/namespace.h"
66 : #include "catalog/partition.h"
67 : #include "catalog/pg_inherits.h"
68 : #include "catalog/pg_subscription.h"
69 : #include "catalog/pg_subscription_rel.h"
70 : #include "catalog/pg_tablespace.h"
71 : #include "commands/tablecmds.h"
72 : #include "commands/tablespace.h"
73 : #include "commands/trigger.h"
74 : #include "executor/executor.h"
75 : #include "executor/execPartition.h"
76 : #include "executor/nodeModifyTable.h"
77 : #include "funcapi.h"
78 : #include "libpq/pqformat.h"
79 : #include "libpq/pqsignal.h"
80 : #include "mb/pg_wchar.h"
81 : #include "miscadmin.h"
82 : #include "nodes/makefuncs.h"
83 : #include "optimizer/optimizer.h"
84 : #include "parser/analyze.h"
85 : #include "parser/parse_relation.h"
86 : #include "pgstat.h"
87 : #include "postmaster/bgworker.h"
88 : #include "postmaster/interrupt.h"
89 : #include "postmaster/postmaster.h"
90 : #include "postmaster/walwriter.h"
91 : #include "replication/decode.h"
92 : #include "replication/logical.h"
93 : #include "replication/logicalproto.h"
94 : #include "replication/logicalrelation.h"
95 : #include "replication/logicalworker.h"
96 : #include "replication/origin.h"
97 : #include "replication/reorderbuffer.h"
98 : #include "replication/snapbuild.h"
99 : #include "replication/walreceiver.h"
100 : #include "replication/worker_internal.h"
101 : #include "rewrite/rewriteHandler.h"
102 : #include "storage/buffile.h"
103 : #include "storage/bufmgr.h"
104 : #include "storage/fd.h"
105 : #include "storage/ipc.h"
106 : #include "storage/lmgr.h"
107 : #include "storage/proc.h"
108 : #include "storage/procarray.h"
109 : #include "tcop/tcopprot.h"
110 : #include "utils/builtins.h"
111 : #include "utils/catcache.h"
112 : #include "utils/dynahash.h"
113 : #include "utils/datum.h"
114 : #include "utils/fmgroids.h"
115 : #include "utils/guc.h"
116 : #include "utils/inval.h"
117 : #include "utils/lsyscache.h"
118 : #include "utils/memutils.h"
119 : #include "utils/rel.h"
120 : #include "utils/syscache.h"
121 : #include "utils/timeout.h"
122 :
123 : #define NAPTIME_PER_CYCLE 1000 /* max sleep time between cycles (1s) */
124 :
125 : typedef struct FlushPosition
126 : {
127 : dlist_node node;
128 : XLogRecPtr local_end;
129 : XLogRecPtr remote_end;
130 : } FlushPosition;
131 :
132 : static dlist_head lsn_mapping = DLIST_STATIC_INIT(lsn_mapping);
133 :
134 : typedef struct SlotErrCallbackArg
135 : {
136 : LogicalRepRelMapEntry *rel;
137 : int local_attnum;
138 : int remote_attnum;
139 : } SlotErrCallbackArg;
140 :
141 : /*
142 : * Stream xid hash entry. Whenever we see a new xid we create this entry in the
143 : * xidhash and along with it create the streaming file and store the fileset handle.
144 : * The subxact file is created iff there is any subxact info under this xid. This
145 : * entry is used on the subsequent streams for the xid to get the corresponding
146 : * fileset handles, so storing them in hash makes the search faster.
147 : */
148 : typedef struct StreamXidHash
149 : {
150 : TransactionId xid; /* xid is the hash key and must be first */
151 : SharedFileSet *stream_fileset; /* shared file set for stream data */
152 : SharedFileSet *subxact_fileset; /* shared file set for subxact info */
153 : } StreamXidHash;
154 :
155 : static MemoryContext ApplyMessageContext = NULL;
156 : MemoryContext ApplyContext = NULL;
157 :
158 : /* per stream context for streaming transactions */
159 : static MemoryContext LogicalStreamingContext = NULL;
160 :
161 : WalReceiverConn *wrconn = NULL;
162 :
163 : Subscription *MySubscription = NULL;
164 : bool MySubscriptionValid = false;
165 :
166 : bool in_remote_transaction = false;
167 : static XLogRecPtr remote_final_lsn = InvalidXLogRecPtr;
168 :
169 : /* fields valid only when processing streamed transaction */
170 : bool in_streamed_transaction = false;
171 :
172 : static TransactionId stream_xid = InvalidTransactionId;
173 :
174 : /*
175 : * Hash table for storing the streaming xid information along with shared file
176 : * set for streaming and subxact files.
177 : */
178 : static HTAB *xidhash = NULL;
179 :
180 : /* BufFile handle of the current streaming file */
181 : static BufFile *stream_fd = NULL;
182 :
183 : typedef struct SubXactInfo
184 : {
185 : TransactionId xid; /* XID of the subxact */
186 : int fileno; /* file number in the buffile */
187 : off_t offset; /* offset in the file */
188 : } SubXactInfo;
189 :
190 : /* Sub-transaction data for the current streaming transaction */
191 : typedef struct ApplySubXactData
192 : {
193 : uint32 nsubxacts; /* number of sub-transactions */
194 : uint32 nsubxacts_max; /* current capacity of subxacts */
195 : TransactionId subxact_last; /* xid of the last sub-transaction */
196 : SubXactInfo *subxacts; /* sub-xact offset in changes file */
197 : } ApplySubXactData;
198 :
199 : static ApplySubXactData subxact_data = {0, 0, InvalidTransactionId, NULL};
200 :
201 : static inline void subxact_filename(char *path, Oid subid, TransactionId xid);
202 : static inline void changes_filename(char *path, Oid subid, TransactionId xid);
203 :
204 : /*
205 : * Information about subtransactions of a given toplevel transaction.
206 : */
207 : static void subxact_info_write(Oid subid, TransactionId xid);
208 : static void subxact_info_read(Oid subid, TransactionId xid);
209 : static void subxact_info_add(TransactionId xid);
210 : static inline void cleanup_subxact_info(void);
211 :
212 : /*
213 : * Serialize and deserialize changes for a toplevel transaction.
214 : */
215 : static void stream_cleanup_files(Oid subid, TransactionId xid);
216 : static void stream_open_file(Oid subid, TransactionId xid, bool first);
217 : static void stream_write_change(char action, StringInfo s);
218 : static void stream_close_file(void);
219 :
220 : static void send_feedback(XLogRecPtr recvpos, bool force, bool requestReply);
221 :
222 : static void store_flush_position(XLogRecPtr remote_lsn);
223 :
224 : static void maybe_reread_subscription(void);
225 :
226 : /* prototype needed because of stream_commit */
227 : static void apply_dispatch(StringInfo s);
228 :
229 : static void apply_handle_insert_internal(ResultRelInfo *relinfo,
230 : EState *estate, TupleTableSlot *remoteslot);
231 : static void apply_handle_update_internal(ResultRelInfo *relinfo,
232 : EState *estate, TupleTableSlot *remoteslot,
233 : LogicalRepTupleData *newtup,
234 : LogicalRepRelMapEntry *relmapentry);
235 : static void apply_handle_delete_internal(ResultRelInfo *relinfo, EState *estate,
236 : TupleTableSlot *remoteslot,
237 : LogicalRepRelation *remoterel);
238 : static bool FindReplTupleInLocalRel(EState *estate, Relation localrel,
239 : LogicalRepRelation *remoterel,
240 : TupleTableSlot *remoteslot,
241 : TupleTableSlot **localslot);
242 : static void apply_handle_tuple_routing(ResultRelInfo *relinfo,
243 : EState *estate,
244 : TupleTableSlot *remoteslot,
245 : LogicalRepTupleData *newtup,
246 : LogicalRepRelMapEntry *relmapentry,
247 : CmdType operation);
248 :
249 : static int apply_spooled_messages(TransactionId xid, XLogRecPtr lsn);
250 :
251 : /*
252 : * Should this worker apply changes for given relation.
253 : *
254 : * This is mainly needed for initial relation data sync as that runs in
255 : * separate worker process running in parallel and we need some way to skip
256 : * changes coming to the main apply worker during the sync of a table.
257 : *
258 : * Note we need to do smaller or equals comparison for SYNCDONE state because
259 : * it might hold position of end of initial slot consistent point WAL
260 : * record + 1 (ie start of next record) and next record can be COMMIT of
261 : * transaction we are now processing (which is what we set remote_final_lsn
262 : * to in apply_handle_begin).
263 : */
264 : static bool
265 227766 : should_apply_changes_for_rel(LogicalRepRelMapEntry *rel)
266 : {
267 227766 : if (am_tablesync_worker())
268 0 : return MyLogicalRepWorker->relid == rel->localreloid;
269 : else
270 455540 : return (rel->state == SUBREL_STATE_READY ||
271 40 : (rel->state == SUBREL_STATE_SYNCDONE &&
272 8 : rel->statelsn <= remote_final_lsn));
273 : }
274 :
275 : /*
276 : * Make sure that we started local transaction.
277 : *
278 : * Also switches to ApplyMessageContext as necessary.
279 : */
280 : static bool
281 228454 : ensure_transaction(void)
282 : {
283 228454 : if (IsTransactionState())
284 : {
285 227376 : SetCurrentStatementStartTimestamp();
286 :
287 227376 : if (CurrentMemoryContext != ApplyMessageContext)
288 0 : MemoryContextSwitchTo(ApplyMessageContext);
289 :
290 227376 : return false;
291 : }
292 :
293 1078 : SetCurrentStatementStartTimestamp();
294 1078 : StartTransactionCommand();
295 :
296 1078 : maybe_reread_subscription();
297 :
298 1074 : MemoryContextSwitchTo(ApplyMessageContext);
299 1074 : return true;
300 : }
301 :
302 : /*
303 : * Handle streamed transactions.
304 : *
305 : * If in streaming mode (receiving a block of streamed transaction), we
306 : * simply redirect it to a file for the proper toplevel transaction.
307 : *
308 : * Returns true for streamed transactions, false otherwise (regular mode).
309 : */
310 : static bool
311 476874 : handle_streamed_transaction(const char action, StringInfo s)
312 : {
313 : TransactionId xid;
314 :
315 : /* not in streaming mode */
316 476874 : if (!in_streamed_transaction)
317 228036 : return false;
318 :
319 248838 : Assert(stream_fd != NULL);
320 248838 : Assert(TransactionIdIsValid(stream_xid));
321 :
322 : /*
323 : * We should have received XID of the subxact as the first part of the
324 : * message, so extract it.
325 : */
326 248838 : xid = pq_getmsgint(s, 4);
327 :
328 248838 : Assert(TransactionIdIsValid(xid));
329 :
330 : /* Add the new subxact to the array (unless already there). */
331 248838 : subxact_info_add(xid);
332 :
333 : /* write the change to the current file */
334 248838 : stream_write_change(action, s);
335 :
336 248838 : return true;
337 : }
338 :
339 : /*
340 : * Executor state preparation for evaluation of constraint expressions,
341 : * indexes and triggers.
342 : *
343 : * This is based on similar code in copy.c
344 : */
345 : static EState *
346 227702 : create_estate_for_relation(LogicalRepRelMapEntry *rel)
347 : {
348 : EState *estate;
349 : RangeTblEntry *rte;
350 :
351 227702 : estate = CreateExecutorState();
352 :
353 227702 : rte = makeNode(RangeTblEntry);
354 227702 : rte->rtekind = RTE_RELATION;
355 227702 : rte->relid = RelationGetRelid(rel->localrel);
356 227702 : rte->relkind = rel->localrel->rd_rel->relkind;
357 227702 : rte->rellockmode = AccessShareLock;
358 227702 : ExecInitRangeTable(estate, list_make1(rte));
359 :
360 227702 : estate->es_output_cid = GetCurrentCommandId(true);
361 :
362 : /* Prepare to catch AFTER triggers. */
363 227702 : AfterTriggerBeginQuery();
364 :
365 227702 : return estate;
366 : }
367 :
368 : /*
369 : * Executes default values for columns for which we can't map to remote
370 : * relation columns.
371 : *
372 : * This allows us to support tables which have more columns on the downstream
373 : * than on the upstream.
374 : */
375 : static void
376 102826 : slot_fill_defaults(LogicalRepRelMapEntry *rel, EState *estate,
377 : TupleTableSlot *slot)
378 : {
379 102826 : TupleDesc desc = RelationGetDescr(rel->localrel);
380 102826 : int num_phys_attrs = desc->natts;
381 : int i;
382 : int attnum,
383 102826 : num_defaults = 0;
384 : int *defmap;
385 : ExprState **defexprs;
386 : ExprContext *econtext;
387 :
388 102826 : econtext = GetPerTupleExprContext(estate);
389 :
390 : /* We got all the data via replication, no need to evaluate anything. */
391 102826 : if (num_phys_attrs == rel->remoterel.natts)
392 103594 : return;
393 :
394 102058 : defmap = (int *) palloc(num_phys_attrs * sizeof(int));
395 102058 : defexprs = (ExprState **) palloc(num_phys_attrs * sizeof(ExprState *));
396 :
397 102058 : Assert(rel->attrmap->maplen == num_phys_attrs);
398 537176 : for (attnum = 0; attnum < num_phys_attrs; attnum++)
399 : {
400 : Expr *defexpr;
401 :
402 435118 : if (TupleDescAttr(desc, attnum)->attisdropped || TupleDescAttr(desc, attnum)->attgenerated)
403 4 : continue;
404 :
405 435114 : if (rel->attrmap->attnums[attnum] >= 0)
406 229104 : continue;
407 :
408 206010 : defexpr = (Expr *) build_column_default(rel->localrel, attnum + 1);
409 :
410 206010 : if (defexpr != NULL)
411 : {
412 : /* Run the expression through planner */
413 170042 : defexpr = expression_planner(defexpr);
414 :
415 : /* Initialize executable expression in copycontext */
416 170042 : defexprs[num_defaults] = ExecInitExpr(defexpr, NULL);
417 170042 : defmap[num_defaults] = attnum;
418 170042 : num_defaults++;
419 : }
420 :
421 : }
422 :
423 272100 : for (i = 0; i < num_defaults; i++)
424 340084 : slot->tts_values[defmap[i]] =
425 170042 : ExecEvalExpr(defexprs[i], econtext, &slot->tts_isnull[defmap[i]]);
426 : }
427 :
428 : /*
429 : * Error callback to give more context info about type conversion failure.
430 : */
431 : static void
432 0 : slot_store_error_callback(void *arg)
433 : {
434 0 : SlotErrCallbackArg *errarg = (SlotErrCallbackArg *) arg;
435 : LogicalRepRelMapEntry *rel;
436 : char *remotetypname;
437 : Oid remotetypoid,
438 : localtypoid;
439 :
440 : /* Nothing to do if remote attribute number is not set */
441 0 : if (errarg->remote_attnum < 0)
442 0 : return;
443 :
444 0 : rel = errarg->rel;
445 0 : remotetypoid = rel->remoterel.atttyps[errarg->remote_attnum];
446 :
447 : /* Fetch remote type name from the LogicalRepTypMap cache */
448 0 : remotetypname = logicalrep_typmap_gettypname(remotetypoid);
449 :
450 : /* Fetch local type OID from the local sys cache */
451 0 : localtypoid = get_atttype(rel->localreloid, errarg->local_attnum + 1);
452 :
453 0 : errcontext("processing remote data for replication target relation \"%s.%s\" column \"%s\", "
454 : "remote type %s, local type %s",
455 : rel->remoterel.nspname, rel->remoterel.relname,
456 0 : rel->remoterel.attnames[errarg->remote_attnum],
457 : remotetypname,
458 : format_type_be(localtypoid));
459 : }
460 :
461 : /*
462 : * Store tuple data into slot.
463 : *
464 : * Incoming data can be either text or binary format.
465 : */
466 : static void
467 227702 : slot_store_data(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
468 : LogicalRepTupleData *tupleData)
469 : {
470 227702 : int natts = slot->tts_tupleDescriptor->natts;
471 : int i;
472 : SlotErrCallbackArg errarg;
473 : ErrorContextCallback errcallback;
474 :
475 227702 : ExecClearTuple(slot);
476 :
477 : /* Push callback + info on the error context stack */
478 227702 : errarg.rel = rel;
479 227702 : errarg.local_attnum = -1;
480 227702 : errarg.remote_attnum = -1;
481 227702 : errcallback.callback = slot_store_error_callback;
482 227702 : errcallback.arg = (void *) &errarg;
483 227702 : errcallback.previous = error_context_stack;
484 227702 : error_context_stack = &errcallback;
485 :
486 : /* Call the "in" function for each non-dropped, non-null attribute */
487 227702 : Assert(natts == rel->attrmap->maplen);
488 1161872 : for (i = 0; i < natts; i++)
489 : {
490 934170 : Form_pg_attribute att = TupleDescAttr(slot->tts_tupleDescriptor, i);
491 934170 : int remoteattnum = rel->attrmap->attnums[i];
492 :
493 934170 : if (!att->attisdropped && remoteattnum >= 0)
494 479710 : {
495 479710 : StringInfo colvalue = &tupleData->colvalues[remoteattnum];
496 :
497 479710 : Assert(remoteattnum < tupleData->ncols);
498 :
499 479710 : errarg.local_attnum = i;
500 479710 : errarg.remote_attnum = remoteattnum;
501 :
502 479710 : if (tupleData->colstatus[remoteattnum] == LOGICALREP_COLUMN_TEXT)
503 : {
504 : Oid typinput;
505 : Oid typioparam;
506 :
507 350828 : getTypeInputInfo(att->atttypid, &typinput, &typioparam);
508 701656 : slot->tts_values[i] =
509 350828 : OidInputFunctionCall(typinput, colvalue->data,
510 : typioparam, att->atttypmod);
511 350828 : slot->tts_isnull[i] = false;
512 : }
513 128882 : else if (tupleData->colstatus[remoteattnum] == LOGICALREP_COLUMN_BINARY)
514 : {
515 : Oid typreceive;
516 : Oid typioparam;
517 :
518 : /*
519 : * In some code paths we may be asked to re-parse the same
520 : * tuple data. Reset the StringInfo's cursor so that works.
521 : */
522 66738 : colvalue->cursor = 0;
523 :
524 66738 : getTypeBinaryInputInfo(att->atttypid, &typreceive, &typioparam);
525 133476 : slot->tts_values[i] =
526 66738 : OidReceiveFunctionCall(typreceive, colvalue,
527 : typioparam, att->atttypmod);
528 :
529 : /* Trouble if it didn't eat the whole buffer */
530 66738 : if (colvalue->cursor != colvalue->len)
531 0 : ereport(ERROR,
532 : (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
533 : errmsg("incorrect binary data format in logical replication column %d",
534 : remoteattnum + 1)));
535 66738 : slot->tts_isnull[i] = false;
536 : }
537 : else
538 : {
539 : /*
540 : * NULL value from remote. (We don't expect to see
541 : * LOGICALREP_COLUMN_UNCHANGED here, but if we do, treat it as
542 : * NULL.)
543 : */
544 62144 : slot->tts_values[i] = (Datum) 0;
545 62144 : slot->tts_isnull[i] = true;
546 : }
547 :
548 479710 : errarg.local_attnum = -1;
549 479710 : errarg.remote_attnum = -1;
550 : }
551 : else
552 : {
553 : /*
554 : * We assign NULL to dropped attributes and missing values
555 : * (missing values should be later filled using
556 : * slot_fill_defaults).
557 : */
558 454460 : slot->tts_values[i] = (Datum) 0;
559 454460 : slot->tts_isnull[i] = true;
560 : }
561 : }
562 :
563 : /* Pop the error context stack */
564 227702 : error_context_stack = errcallback.previous;
565 :
566 227702 : ExecStoreVirtualTuple(slot);
567 227702 : }
568 :
569 : /*
570 : * Replace updated columns with data from the LogicalRepTupleData struct.
571 : * This is somewhat similar to heap_modify_tuple but also calls the type
572 : * input functions on the user data.
573 : *
574 : * "slot" is filled with a copy of the tuple in "srcslot", replacing
575 : * columns provided in "tupleData" and leaving others as-is.
576 : *
577 : * Caution: unreplaced pass-by-ref columns in "slot" will point into the
578 : * storage for "srcslot". This is OK for current usage, but someday we may
579 : * need to materialize "slot" at the end to make it independent of "srcslot".
580 : */
581 : static void
582 62776 : slot_modify_data(TupleTableSlot *slot, TupleTableSlot *srcslot,
583 : LogicalRepRelMapEntry *rel,
584 : LogicalRepTupleData *tupleData)
585 : {
586 62776 : int natts = slot->tts_tupleDescriptor->natts;
587 : int i;
588 : SlotErrCallbackArg errarg;
589 : ErrorContextCallback errcallback;
590 :
591 : /* We'll fill "slot" with a virtual tuple, so we must start with ... */
592 62776 : ExecClearTuple(slot);
593 :
594 : /*
595 : * Copy all the column data from srcslot, so that we'll have valid values
596 : * for unreplaced columns.
597 : */
598 62776 : Assert(natts == srcslot->tts_tupleDescriptor->natts);
599 62776 : slot_getallattrs(srcslot);
600 62776 : memcpy(slot->tts_values, srcslot->tts_values, natts * sizeof(Datum));
601 62776 : memcpy(slot->tts_isnull, srcslot->tts_isnull, natts * sizeof(bool));
602 :
603 : /* For error reporting, push callback + info on the error context stack */
604 62776 : errarg.rel = rel;
605 62776 : errarg.local_attnum = -1;
606 62776 : errarg.remote_attnum = -1;
607 62776 : errcallback.callback = slot_store_error_callback;
608 62776 : errcallback.arg = (void *) &errarg;
609 62776 : errcallback.previous = error_context_stack;
610 62776 : error_context_stack = &errcallback;
611 :
612 : /* Call the "in" function for each replaced attribute */
613 62776 : Assert(natts == rel->attrmap->maplen);
614 313368 : for (i = 0; i < natts; i++)
615 : {
616 250592 : Form_pg_attribute att = TupleDescAttr(slot->tts_tupleDescriptor, i);
617 250592 : int remoteattnum = rel->attrmap->attnums[i];
618 :
619 250592 : if (remoteattnum < 0)
620 125080 : continue;
621 :
622 125512 : Assert(remoteattnum < tupleData->ncols);
623 :
624 125512 : if (tupleData->colstatus[remoteattnum] != LOGICALREP_COLUMN_UNCHANGED)
625 : {
626 125506 : StringInfo colvalue = &tupleData->colvalues[remoteattnum];
627 :
628 125506 : errarg.local_attnum = i;
629 125506 : errarg.remote_attnum = remoteattnum;
630 :
631 125506 : if (tupleData->colstatus[remoteattnum] == LOGICALREP_COLUMN_TEXT)
632 : {
633 : Oid typinput;
634 : Oid typioparam;
635 :
636 82060 : getTypeInputInfo(att->atttypid, &typinput, &typioparam);
637 164120 : slot->tts_values[i] =
638 82060 : OidInputFunctionCall(typinput, colvalue->data,
639 : typioparam, att->atttypmod);
640 82060 : slot->tts_isnull[i] = false;
641 : }
642 43446 : else if (tupleData->colstatus[remoteattnum] == LOGICALREP_COLUMN_BINARY)
643 : {
644 : Oid typreceive;
645 : Oid typioparam;
646 :
647 : /*
648 : * In some code paths we may be asked to re-parse the same
649 : * tuple data. Reset the StringInfo's cursor so that works.
650 : */
651 43362 : colvalue->cursor = 0;
652 :
653 43362 : getTypeBinaryInputInfo(att->atttypid, &typreceive, &typioparam);
654 86724 : slot->tts_values[i] =
655 43362 : OidReceiveFunctionCall(typreceive, colvalue,
656 : typioparam, att->atttypmod);
657 :
658 : /* Trouble if it didn't eat the whole buffer */
659 43362 : if (colvalue->cursor != colvalue->len)
660 0 : ereport(ERROR,
661 : (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
662 : errmsg("incorrect binary data format in logical replication column %d",
663 : remoteattnum + 1)));
664 43362 : slot->tts_isnull[i] = false;
665 : }
666 : else
667 : {
668 : /* must be LOGICALREP_COLUMN_NULL */
669 84 : slot->tts_values[i] = (Datum) 0;
670 84 : slot->tts_isnull[i] = true;
671 : }
672 :
673 125506 : errarg.local_attnum = -1;
674 125506 : errarg.remote_attnum = -1;
675 : }
676 : }
677 :
678 : /* Pop the error context stack */
679 62776 : error_context_stack = errcallback.previous;
680 :
681 : /* And finally, declare that "slot" contains a valid virtual tuple */
682 62776 : ExecStoreVirtualTuple(slot);
683 62776 : }
684 :
685 : /*
686 : * Handle BEGIN message.
687 : */
688 : static void
689 498 : apply_handle_begin(StringInfo s)
690 : {
691 : LogicalRepBeginData begin_data;
692 :
693 498 : logicalrep_read_begin(s, &begin_data);
694 :
695 498 : remote_final_lsn = begin_data.final_lsn;
696 :
697 498 : in_remote_transaction = true;
698 :
699 498 : pgstat_report_activity(STATE_RUNNING, NULL);
700 498 : }
701 :
702 : /*
703 : * Handle COMMIT message.
704 : *
705 : * TODO, support tracking of multiple origins
706 : */
707 : static void
708 486 : apply_handle_commit(StringInfo s)
709 : {
710 : LogicalRepCommitData commit_data;
711 :
712 486 : logicalrep_read_commit(s, &commit_data);
713 :
714 486 : Assert(commit_data.commit_lsn == remote_final_lsn);
715 :
716 : /* The synchronization worker runs in single transaction. */
717 486 : if (IsTransactionState() && !am_tablesync_worker())
718 : {
719 : /*
720 : * Update origin state so we can restart streaming from correct
721 : * position in case of crash.
722 : */
723 368 : replorigin_session_origin_lsn = commit_data.end_lsn;
724 368 : replorigin_session_origin_timestamp = commit_data.committime;
725 :
726 368 : CommitTransactionCommand();
727 :
728 368 : pgstat_report_stat(false);
729 :
730 368 : store_flush_position(commit_data.end_lsn);
731 : }
732 : else
733 : {
734 : /* Process any invalidation messages that might have accumulated. */
735 118 : AcceptInvalidationMessages();
736 118 : maybe_reread_subscription();
737 : }
738 :
739 486 : in_remote_transaction = false;
740 :
741 : /* Process any tables that are being synchronized in parallel. */
742 486 : process_syncing_tables(commit_data.end_lsn);
743 :
744 486 : pgstat_report_activity(STATE_IDLE, NULL);
745 486 : }
746 :
747 : /*
748 : * Called from apply_handle_prepare to handle a PREPARE TRANSACTION.
749 : */
750 : static void
751 6 : apply_handle_prepare_txn(LogicalRepPrepareData * prepare_data)
752 : {
753 6 : Assert(prepare_data->prepare_lsn == remote_final_lsn);
754 :
755 : /* The synchronization worker runs in single transaction. */
756 6 : if (IsTransactionState() && !am_tablesync_worker())
757 : {
758 : /* End the earlier transaction and start a new one */
759 6 : BeginTransactionBlock();
760 6 : CommitTransactionCommand();
761 6 : StartTransactionCommand();
762 :
763 : /*
764 : * Update origin state so we can restart streaming from correct
765 : * position in case of crash.
766 : */
767 6 : replorigin_session_origin_lsn = prepare_data->end_lsn;
768 6 : replorigin_session_origin_timestamp = prepare_data->preparetime;
769 :
770 6 : PrepareTransactionBlock(prepare_data->gid);
771 6 : CommitTransactionCommand();
772 6 : pgstat_report_stat(false);
773 :
774 6 : store_flush_position(prepare_data->end_lsn);
775 : }
776 : else
777 : {
778 : /* Process any invalidation messages that might have accumulated. */
779 0 : AcceptInvalidationMessages();
780 0 : maybe_reread_subscription();
781 : }
782 :
783 6 : in_remote_transaction = false;
784 :
785 : /* Process any tables that are being synchronized in parallel. */
786 6 : process_syncing_tables(prepare_data->end_lsn);
787 :
788 6 : pgstat_report_activity(STATE_IDLE, NULL);
789 6 : }
790 :
791 : /*
792 : * Called from apply_handle_prepare to handle a COMMIT PREPARED of a previously
793 : * PREPARED transaction.
794 : */
795 : static void
796 16 : apply_handle_commit_prepared_txn(LogicalRepPrepareData * prepare_data)
797 : {
798 : /* there is no transaction when COMMIT PREPARED is called */
799 16 : ensure_transaction();
800 :
801 : /*
802 : * Update origin state so we can restart streaming from correct position
803 : * in case of crash.
804 : */
805 16 : replorigin_session_origin_lsn = prepare_data->end_lsn;
806 16 : replorigin_session_origin_timestamp = prepare_data->preparetime;
807 :
808 16 : FinishPreparedTransaction(prepare_data->gid, true);
809 16 : CommitTransactionCommand();
810 16 : pgstat_report_stat(false);
811 :
812 16 : store_flush_position(prepare_data->end_lsn);
813 16 : in_remote_transaction = false;
814 :
815 : /* Process any tables that are being synchronized in parallel. */
816 16 : process_syncing_tables(prepare_data->end_lsn);
817 :
818 16 : pgstat_report_activity(STATE_IDLE, NULL);
819 16 : }
820 :
821 : /*
822 : * Called from apply_handle_prepare to handle a ROLLBACK PREPARED of a previously
823 : * PREPARED TRANSACTION.
824 : */
825 : static void
826 4 : apply_handle_rollback_prepared_txn(LogicalRepPrepareData * prepare_data)
827 : {
828 : /*
829 : * Update origin state so we can restart streaming from correct position
830 : * in case of crash.
831 : */
832 4 : replorigin_session_origin_lsn = prepare_data->end_lsn;
833 4 : replorigin_session_origin_timestamp = prepare_data->preparetime;
834 :
835 : /*
836 : * During logical decoding, on the apply side, it's possible that a
837 : * prepared transaction got aborted while decoding. In that case, we stop
838 : * the decoding and abort the transaction immediately. However the
839 : * ROLLBACK prepared processing still reaches the subscriber. In that case
840 : * it's ok to have a missing gid
841 : */
842 4 : if (LookupGXact(prepare_data->gid))
843 : {
844 : /* there is no transaction when ABORT/ROLLBACK PREPARED is called */
845 4 : ensure_transaction();
846 4 : FinishPreparedTransaction(prepare_data->gid, false);
847 4 : CommitTransactionCommand();
848 : }
849 :
850 4 : pgstat_report_stat(false);
851 :
852 4 : store_flush_position(prepare_data->end_lsn);
853 4 : in_remote_transaction = false;
854 :
855 : /* Process any tables that are being synchronized in parallel. */
856 4 : process_syncing_tables(prepare_data->end_lsn);
857 :
858 4 : pgstat_report_activity(STATE_IDLE, NULL);
859 4 : }
860 :
861 : /*
862 : * Handle PREPARE message.
863 : */
864 : static void
865 26 : apply_handle_prepare(StringInfo s)
866 : {
867 : LogicalRepPrepareData prepare_data;
868 :
869 26 : logicalrep_read_prepare(s, &prepare_data);
870 :
871 26 : switch (prepare_data.prepare_type)
872 : {
873 : case LOGICALREP_IS_PREPARE:
874 6 : apply_handle_prepare_txn(&prepare_data);
875 6 : break;
876 :
877 : case LOGICALREP_IS_COMMIT_PREPARED:
878 16 : apply_handle_commit_prepared_txn(&prepare_data);
879 16 : break;
880 :
881 : case LOGICALREP_IS_ROLLBACK_PREPARED:
882 4 : apply_handle_rollback_prepared_txn(&prepare_data);
883 4 : break;
884 :
885 : default:
886 0 : ereport(ERROR,
887 : (errcode(ERRCODE_PROTOCOL_VIOLATION),
888 : errmsg("unexpected type of prepare message: %d",
889 : prepare_data.prepare_type)));
890 : }
891 26 : }
892 :
893 : /*
894 : * Handle STREAM PREPARE.
895 : *
896 : * Logic is in two parts:
897 : * 1. Replay all the spooled operations
898 : * 2. Mark the transaction as prepared
899 : */
900 : static void
901 12 : apply_handle_stream_prepare(StringInfo s)
902 : {
903 12 : int nchanges = 0;
904 : LogicalRepPrepareData prepare_data;
905 : TransactionId xid;
906 :
907 12 : Assert(!in_streamed_transaction);
908 :
909 12 : xid = logicalrep_read_stream_prepare(s, &prepare_data);
910 12 : elog(DEBUG1, "received prepare for streamed transaction %u", xid);
911 :
912 : /*
913 : * This should be a PREPARE only. The COMMIT PREPARED and ROLLBACK PREPARED
914 : * for streaming are handled by the non-streaming APIs.
915 : */
916 12 : Assert(prepare_data.prepare_type == LOGICALREP_IS_PREPARE);
917 :
918 : /*
919 : * ========================================
920 : * 1. Replay all the spooled operations
921 : * - This code is same as what apply_handle_stream_commit does for NON two-phase stream commit
922 : * ========================================
923 : */
924 :
925 12 : ensure_transaction();
926 :
927 12 : nchanges = apply_spooled_messages(xid, prepare_data.prepare_lsn);
928 :
929 : /*
930 : * ========================================
931 : * 2. Mark the transaction as prepared.
932 : * - This code is same as what apply_handle_prepare_txn does for two-phase prepare of the non-streamed tx
933 : * ========================================
934 : */
935 12 : BeginTransactionBlock();
936 12 : CommitTransactionCommand();
937 12 : StartTransactionCommand();
938 :
939 : /*
940 : * Update origin state so we can restart streaming from correct position
941 : * in case of crash.
942 : */
943 12 : replorigin_session_origin_lsn = prepare_data.end_lsn;
944 12 : replorigin_session_origin_timestamp = prepare_data.preparetime;
945 :
946 12 : PrepareTransactionBlock(prepare_data.gid);
947 12 : CommitTransactionCommand();
948 :
949 12 : pgstat_report_stat(false);
950 :
951 12 : store_flush_position(prepare_data.end_lsn);
952 :
953 12 : elog(DEBUG1, "apply_handle_stream_prepare_txn: replayed %d (all) changes.", nchanges);
954 :
955 12 : in_remote_transaction = false;
956 :
957 : /* Process any tables that are being synchronized in parallel. */
958 12 : process_syncing_tables(prepare_data.end_lsn);
959 :
960 : /* unlink the files with serialized changes and subxact info */
961 12 : stream_cleanup_files(MyLogicalRepWorker->subid, xid);
962 :
963 12 : pgstat_report_activity(STATE_IDLE, NULL);
964 12 : }
965 :
966 : /*
967 : * Handle ORIGIN message.
968 : *
969 : * TODO, support tracking of multiple origins
970 : */
971 : static void
972 0 : apply_handle_origin(StringInfo s)
973 : {
974 : /*
975 : * ORIGIN message can only come inside streaming transaction or inside
976 : * remote transaction and before any actual writes.
977 : */
978 0 : if (!in_streamed_transaction &&
979 0 : (!in_remote_transaction ||
980 0 : (IsTransactionState() && !am_tablesync_worker())))
981 0 : ereport(ERROR,
982 : (errcode(ERRCODE_PROTOCOL_VIOLATION),
983 : errmsg("ORIGIN message sent out of order")));
984 0 : }
985 :
986 : /*
987 : * Handle STREAM START message.
988 : */
989 : static void
990 614 : apply_handle_stream_start(StringInfo s)
991 : {
992 : bool first_segment;
993 : HASHCTL hash_ctl;
994 :
995 614 : Assert(!in_streamed_transaction);
996 :
997 : /*
998 : * Start a transaction on stream start, this transaction will be committed
999 : * on the stream stop. We need the transaction for handling the buffile,
1000 : * used for serializing the streaming data and subxact info.
1001 : */
1002 614 : ensure_transaction();
1003 :
1004 : /* notify handle methods we're processing a remote transaction */
1005 614 : in_streamed_transaction = true;
1006 :
1007 : /* extract XID of the top-level transaction */
1008 614 : stream_xid = logicalrep_read_stream_start(s, &first_segment);
1009 :
1010 : /*
1011 : * Initialize the xidhash table if we haven't yet. This will be used for
1012 : * the entire duration of the apply worker so create it in permanent
1013 : * context.
1014 : */
1015 614 : if (xidhash == NULL)
1016 : {
1017 16 : hash_ctl.keysize = sizeof(TransactionId);
1018 16 : hash_ctl.entrysize = sizeof(StreamXidHash);
1019 16 : hash_ctl.hcxt = ApplyContext;
1020 16 : xidhash = hash_create("StreamXidHash", 1024, &hash_ctl,
1021 : HASH_ELEM | HASH_CONTEXT);
1022 : }
1023 :
1024 : /* open the spool file for this transaction */
1025 614 : stream_open_file(MyLogicalRepWorker->subid, stream_xid, first_segment);
1026 :
1027 : /* if this is not the first segment, open existing subxact file */
1028 614 : if (!first_segment)
1029 570 : subxact_info_read(MyLogicalRepWorker->subid, stream_xid);
1030 :
1031 614 : pgstat_report_activity(STATE_RUNNING, NULL);
1032 614 : }
1033 :
1034 : /*
1035 : * Handle STREAM STOP message.
1036 : */
1037 : static void
1038 614 : apply_handle_stream_stop(StringInfo s)
1039 : {
1040 614 : Assert(in_streamed_transaction);
1041 :
1042 : /*
1043 : * Close the file with serialized changes, and serialize information about
1044 : * subxacts for the toplevel transaction.
1045 : */
1046 614 : subxact_info_write(MyLogicalRepWorker->subid, stream_xid);
1047 614 : stream_close_file();
1048 :
1049 : /* We must be in a valid transaction state */
1050 614 : Assert(IsTransactionState());
1051 :
1052 : /* Commit the per-stream transaction */
1053 614 : CommitTransactionCommand();
1054 :
1055 614 : in_streamed_transaction = false;
1056 :
1057 : /* Reset per-stream context */
1058 614 : MemoryContextReset(LogicalStreamingContext);
1059 :
1060 614 : pgstat_report_activity(STATE_IDLE, NULL);
1061 614 : }
1062 :
1063 : /*
1064 : * Handle STREAM abort message.
1065 : */
1066 : static void
1067 26 : apply_handle_stream_abort(StringInfo s)
1068 : {
1069 : TransactionId xid;
1070 : TransactionId subxid;
1071 :
1072 26 : Assert(!in_streamed_transaction);
1073 :
1074 26 : logicalrep_read_stream_abort(s, &xid, &subxid);
1075 :
1076 : /*
1077 : * If the two XIDs are the same, it's in fact abort of toplevel xact, so
1078 : * just delete the files with serialized info.
1079 : */
1080 26 : if (xid == subxid)
1081 2 : stream_cleanup_files(MyLogicalRepWorker->subid, xid);
1082 : else
1083 : {
1084 : /*
1085 : * OK, so it's a subxact. We need to read the subxact file for the
1086 : * toplevel transaction, determine the offset tracked for the subxact,
1087 : * and truncate the file with changes. We also remove the subxacts
1088 : * with higher offsets (or rather higher XIDs).
1089 : *
1090 : * We intentionally scan the array from the tail, because we're likely
1091 : * aborting a change for the most recent subtransactions.
1092 : *
1093 : * We can't use the binary search here as subxact XIDs won't
1094 : * necessarily arrive in sorted order, consider the case where we have
1095 : * released the savepoint for multiple subtransactions and then
1096 : * performed rollback to savepoint for one of the earlier
1097 : * sub-transaction.
1098 : */
1099 :
1100 : int64 i;
1101 : int64 subidx;
1102 : BufFile *fd;
1103 24 : bool found = false;
1104 : char path[MAXPGPATH];
1105 : StreamXidHash *ent;
1106 :
1107 24 : subidx = -1;
1108 24 : ensure_transaction();
1109 24 : subxact_info_read(MyLogicalRepWorker->subid, xid);
1110 :
1111 28 : for (i = subxact_data.nsubxacts; i > 0; i--)
1112 : {
1113 20 : if (subxact_data.subxacts[i - 1].xid == subxid)
1114 : {
1115 16 : subidx = (i - 1);
1116 16 : found = true;
1117 16 : break;
1118 : }
1119 : }
1120 :
1121 : /*
1122 : * If it's an empty sub-transaction then we will not find the subxid
1123 : * here so just cleanup the subxact info and return.
1124 : */
1125 24 : if (!found)
1126 : {
1127 : /* Cleanup the subxact info */
1128 8 : cleanup_subxact_info();
1129 8 : CommitTransactionCommand();
1130 34 : return;
1131 : }
1132 :
1133 16 : Assert((subidx >= 0) && (subidx < subxact_data.nsubxacts));
1134 :
1135 16 : ent = (StreamXidHash *) hash_search(xidhash,
1136 : (void *) &xid,
1137 : HASH_FIND,
1138 : &found);
1139 16 : Assert(found);
1140 :
1141 : /* open the changes file */
1142 16 : changes_filename(path, MyLogicalRepWorker->subid, xid);
1143 16 : fd = BufFileOpenShared(ent->stream_fileset, path, O_RDWR);
1144 :
1145 : /* OK, truncate the file at the right offset */
1146 16 : BufFileTruncateShared(fd, subxact_data.subxacts[subidx].fileno,
1147 16 : subxact_data.subxacts[subidx].offset);
1148 16 : BufFileClose(fd);
1149 :
1150 : /* discard the subxacts added later */
1151 16 : subxact_data.nsubxacts = subidx;
1152 :
1153 : /* write the updated subxact list */
1154 16 : subxact_info_write(MyLogicalRepWorker->subid, xid);
1155 16 : CommitTransactionCommand();
1156 : }
1157 : }
1158 :
1159 : /*
1160 : * Common spoolfile processing.
1161 : * Returns how many changes were applied.
1162 : */
1163 : static int
1164 40 : apply_spooled_messages(TransactionId xid, XLogRecPtr lsn)
1165 : {
1166 : StringInfoData s2;
1167 : int nchanges;
1168 : char path[MAXPGPATH];
1169 40 : char *buffer = NULL;
1170 : bool found;
1171 : StreamXidHash *ent;
1172 : MemoryContext oldcxt;
1173 : BufFile *fd;
1174 :
1175 : /*
1176 : * Allocate file handle and memory required to process all the messages in
1177 : * TopTransactionContext to avoid them getting reset after each message is
1178 : * processed.
1179 : */
1180 40 : oldcxt = MemoryContextSwitchTo(TopTransactionContext);
1181 :
1182 : /* open the spool file for the committed transaction */
1183 40 : changes_filename(path, MyLogicalRepWorker->subid, xid);
1184 40 : elog(DEBUG1, "replaying changes from file \"%s\"", path);
1185 40 : ent = (StreamXidHash *) hash_search(xidhash,
1186 : (void *) &xid,
1187 : HASH_FIND,
1188 : &found);
1189 40 : Assert(found);
1190 40 : fd = BufFileOpenShared(ent->stream_fileset, path, O_RDONLY);
1191 :
1192 40 : buffer = palloc(BLCKSZ);
1193 40 : initStringInfo(&s2);
1194 :
1195 40 : MemoryContextSwitchTo(oldcxt);
1196 :
1197 40 : remote_final_lsn = lsn;
1198 :
1199 : /*
1200 : * Make sure the handle apply_dispatch methods are aware we're in a remote
1201 : * transaction.
1202 : */
1203 40 : in_remote_transaction = true;
1204 40 : pgstat_report_activity(STATE_RUNNING, NULL);
1205 :
1206 : /*
1207 : * Read the entries one by one and pass them through the same logic as in
1208 : * apply_dispatch.
1209 : */
1210 40 : nchanges = 0;
1211 : while (true)
1212 : {
1213 : int nbytes;
1214 : int len;
1215 :
1216 226192 : CHECK_FOR_INTERRUPTS();
1217 :
1218 : /* read length of the on-disk record */
1219 226192 : nbytes = BufFileRead(fd, &len, sizeof(len));
1220 :
1221 : /* have we reached end of the file? */
1222 226192 : if (nbytes == 0)
1223 40 : break;
1224 :
1225 : /* do we have a correct length? */
1226 226152 : if (nbytes != sizeof(len))
1227 0 : ereport(ERROR,
1228 : (errcode_for_file_access(),
1229 : errmsg("could not read from streaming transaction's changes file \"%s\": %m",
1230 : path)));
1231 :
1232 226152 : Assert(len > 0);
1233 :
1234 : /* make sure we have sufficiently large buffer */
1235 226152 : buffer = repalloc(buffer, len);
1236 :
1237 : /* and finally read the data into the buffer */
1238 226152 : if (BufFileRead(fd, buffer, len) != len)
1239 0 : ereport(ERROR,
1240 : (errcode_for_file_access(),
1241 : errmsg("could not read from streaming transaction's changes file \"%s\": %m",
1242 : path)));
1243 :
1244 : /* copy the buffer to the stringinfo and call apply_dispatch */
1245 226152 : resetStringInfo(&s2);
1246 226152 : appendBinaryStringInfo(&s2, buffer, len);
1247 :
1248 : /* Ensure we are reading the data into our memory context. */
1249 226152 : oldcxt = MemoryContextSwitchTo(ApplyMessageContext);
1250 :
1251 226152 : apply_dispatch(&s2);
1252 :
1253 226152 : MemoryContextReset(ApplyMessageContext);
1254 :
1255 226152 : MemoryContextSwitchTo(oldcxt);
1256 :
1257 226152 : nchanges++;
1258 :
1259 226152 : if (nchanges % 1000 == 0)
1260 214 : elog(DEBUG1, "replayed %d changes from file '%s'",
1261 : nchanges, path);
1262 226152 : }
1263 :
1264 40 : BufFileClose(fd);
1265 :
1266 40 : pfree(buffer);
1267 40 : pfree(s2.data);
1268 :
1269 40 : elog(DEBUG1, "replayed %d (all) changes from file \"%s\"",
1270 : nchanges, path);
1271 :
1272 40 : return nchanges;
1273 : }
1274 :
1275 : /*
1276 : * Handle STREAM COMMIT message.
1277 : */
1278 : static void
1279 28 : apply_handle_stream_commit(StringInfo s)
1280 : {
1281 : TransactionId xid;
1282 : LogicalRepCommitData commit_data;
1283 28 : int nchanges = 0;
1284 :
1285 28 : Assert(!in_streamed_transaction);
1286 :
1287 28 : xid = logicalrep_read_stream_commit(s, &commit_data);
1288 :
1289 28 : elog(DEBUG1, "received commit for streamed transaction %u", xid);
1290 :
1291 28 : ensure_transaction();
1292 :
1293 28 : nchanges = apply_spooled_messages(xid, commit_data.commit_lsn);
1294 :
1295 : /*
1296 : * Update origin state so we can restart streaming from correct position
1297 : * in case of crash.
1298 : */
1299 28 : replorigin_session_origin_lsn = commit_data.end_lsn;
1300 28 : replorigin_session_origin_timestamp = commit_data.committime;
1301 :
1302 28 : CommitTransactionCommand();
1303 28 : pgstat_report_stat(false);
1304 :
1305 28 : store_flush_position(commit_data.end_lsn);
1306 :
1307 28 : elog(DEBUG1, "apply_handle_stream_commit: replayed %d (all) changes.", nchanges);
1308 :
1309 28 : in_remote_transaction = false;
1310 :
1311 : /* Process any tables that are being synchronized in parallel. */
1312 28 : process_syncing_tables(commit_data.end_lsn);
1313 :
1314 : /* unlink the files with serialized changes and subxact info */
1315 28 : stream_cleanup_files(MyLogicalRepWorker->subid, xid);
1316 :
1317 28 : pgstat_report_activity(STATE_IDLE, NULL);
1318 28 : }
1319 :
1320 : /*
1321 : * Handle RELATION message.
1322 : *
1323 : * Note we don't do validation against local schema here. The validation
1324 : * against local schema is postponed until first change for given relation
1325 : * comes as we only care about it when applying changes for it anyway and we
1326 : * do less locking this way.
1327 : */
1328 : static void
1329 308 : apply_handle_relation(StringInfo s)
1330 : {
1331 : LogicalRepRelation *rel;
1332 :
1333 308 : if (handle_streamed_transaction('R', s))
1334 368 : return;
1335 :
1336 248 : rel = logicalrep_read_rel(s);
1337 248 : logicalrep_relmap_update(rel);
1338 : }
1339 :
1340 : /*
1341 : * Handle TYPE message.
1342 : *
1343 : * Note we don't do local mapping here, that's done when the type is
1344 : * actually used.
1345 : */
1346 : static void
1347 32 : apply_handle_type(StringInfo s)
1348 : {
1349 : LogicalRepTyp typ;
1350 :
1351 32 : if (handle_streamed_transaction('Y', s))
1352 32 : return;
1353 :
1354 32 : logicalrep_read_typ(s, &typ);
1355 32 : logicalrep_typmap_update(&typ);
1356 : }
1357 :
1358 : /*
1359 : * Get replica identity index or if it is not defined a primary key.
1360 : *
1361 : * If neither is defined, returns InvalidOid
1362 : */
1363 : static Oid
1364 124878 : GetRelationIdentityOrPK(Relation rel)
1365 : {
1366 : Oid idxoid;
1367 :
1368 124878 : idxoid = RelationGetReplicaIndex(rel);
1369 :
1370 124878 : if (!OidIsValid(idxoid))
1371 246 : idxoid = RelationGetPrimaryKeyIndex(rel);
1372 :
1373 124878 : return idxoid;
1374 : }
1375 :
1376 : /*
1377 : * Handle INSERT message.
1378 : */
1379 :
1380 : static void
1381 224744 : apply_handle_insert(StringInfo s)
1382 : {
1383 : ResultRelInfo *resultRelInfo;
1384 : LogicalRepRelMapEntry *rel;
1385 : LogicalRepTupleData newtup;
1386 : LogicalRepRelId relid;
1387 : EState *estate;
1388 : TupleTableSlot *remoteslot;
1389 : MemoryContext oldctx;
1390 :
1391 224744 : if (handle_streamed_transaction('I', s))
1392 243800 : return;
1393 :
1394 102856 : ensure_transaction();
1395 :
1396 102852 : relid = logicalrep_read_insert(s, &newtup);
1397 102852 : rel = logicalrep_rel_open(relid, RowExclusiveLock);
1398 102850 : if (!should_apply_changes_for_rel(rel))
1399 : {
1400 : /*
1401 : * The relation can't become interesting in the middle of the
1402 : * transaction so it's safe to unlock it.
1403 : */
1404 24 : logicalrep_rel_close(rel, RowExclusiveLock);
1405 24 : return;
1406 : }
1407 :
1408 : /* Initialize the executor state. */
1409 102826 : estate = create_estate_for_relation(rel);
1410 102826 : remoteslot = ExecInitExtraTupleSlot(estate,
1411 102826 : RelationGetDescr(rel->localrel),
1412 : &TTSOpsVirtual);
1413 102826 : resultRelInfo = makeNode(ResultRelInfo);
1414 102826 : InitResultRelInfo(resultRelInfo, rel->localrel, 1, NULL, 0);
1415 :
1416 : /* Input functions may need an active snapshot, so get one */
1417 102826 : PushActiveSnapshot(GetTransactionSnapshot());
1418 :
1419 : /* Process and store remote tuple in the slot */
1420 102826 : oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
1421 102826 : slot_store_data(remoteslot, rel, &newtup);
1422 102826 : slot_fill_defaults(rel, estate, remoteslot);
1423 102826 : MemoryContextSwitchTo(oldctx);
1424 :
1425 : /* For a partitioned table, insert the tuple into a partition. */
1426 102826 : if (rel->localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1427 36 : apply_handle_tuple_routing(resultRelInfo, estate,
1428 : remoteslot, NULL, rel, CMD_INSERT);
1429 : else
1430 102790 : apply_handle_insert_internal(resultRelInfo, estate,
1431 : remoteslot);
1432 :
1433 102826 : PopActiveSnapshot();
1434 :
1435 : /* Handle queued AFTER triggers. */
1436 102826 : AfterTriggerEndQuery(estate);
1437 :
1438 102826 : ExecResetTupleTable(estate->es_tupleTable, false);
1439 102826 : FreeExecutorState(estate);
1440 :
1441 102826 : logicalrep_rel_close(rel, NoLock);
1442 :
1443 102826 : CommandCounterIncrement();
1444 : }
1445 :
1446 : /* Workhorse for apply_handle_insert() */
1447 : static void
1448 102828 : apply_handle_insert_internal(ResultRelInfo *relinfo,
1449 : EState *estate, TupleTableSlot *remoteslot)
1450 : {
1451 102828 : ExecOpenIndices(relinfo, false);
1452 :
1453 : /* Do the insert. */
1454 102828 : ExecSimpleRelationInsert(relinfo, estate, remoteslot);
1455 :
1456 : /* Cleanup. */
1457 102828 : ExecCloseIndices(relinfo);
1458 102828 : }
1459 :
1460 : /*
1461 : * Check if the logical replication relation is updatable and throw
1462 : * appropriate error if it isn't.
1463 : */
1464 : static void
1465 124876 : check_relation_updatable(LogicalRepRelMapEntry *rel)
1466 : {
1467 : /* Updatable, no error. */
1468 124876 : if (rel->updatable)
1469 249752 : return;
1470 :
1471 : /*
1472 : * We are in error mode so it's fine this is somewhat slow. It's better to
1473 : * give user correct error.
1474 : */
1475 0 : if (OidIsValid(GetRelationIdentityOrPK(rel->localrel)))
1476 : {
1477 0 : ereport(ERROR,
1478 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1479 : errmsg("publisher did not send replica identity column "
1480 : "expected by the logical replication target relation \"%s.%s\"",
1481 : rel->remoterel.nspname, rel->remoterel.relname)));
1482 : }
1483 :
1484 0 : ereport(ERROR,
1485 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1486 : errmsg("logical replication target relation \"%s.%s\" has "
1487 : "neither REPLICA IDENTITY index nor PRIMARY "
1488 : "KEY and published relation does not have "
1489 : "REPLICA IDENTITY FULL",
1490 : rel->remoterel.nspname, rel->remoterel.relname)));
1491 : }
1492 :
1493 : /*
1494 : * Handle UPDATE message.
1495 : *
1496 : * TODO: FDW support
1497 : */
1498 : static void
1499 128020 : apply_handle_update(StringInfo s)
1500 : {
1501 : ResultRelInfo *resultRelInfo;
1502 : LogicalRepRelMapEntry *rel;
1503 : LogicalRepRelId relid;
1504 : EState *estate;
1505 : LogicalRepTupleData oldtup;
1506 : LogicalRepTupleData newtup;
1507 : bool has_oldtup;
1508 : TupleTableSlot *remoteslot;
1509 : RangeTblEntry *target_rte;
1510 : MemoryContext oldctx;
1511 :
1512 128020 : if (handle_streamed_transaction('U', s))
1513 130488 : return;
1514 :
1515 62776 : ensure_transaction();
1516 :
1517 62776 : relid = logicalrep_read_update(s, &has_oldtup, &oldtup,
1518 : &newtup);
1519 62776 : rel = logicalrep_rel_open(relid, RowExclusiveLock);
1520 62776 : if (!should_apply_changes_for_rel(rel))
1521 : {
1522 : /*
1523 : * The relation can't become interesting in the middle of the
1524 : * transaction so it's safe to unlock it.
1525 : */
1526 0 : logicalrep_rel_close(rel, RowExclusiveLock);
1527 0 : return;
1528 : }
1529 :
1530 : /* Check if we can do the update. */
1531 62776 : check_relation_updatable(rel);
1532 :
1533 : /* Initialize the executor state. */
1534 62776 : estate = create_estate_for_relation(rel);
1535 62776 : remoteslot = ExecInitExtraTupleSlot(estate,
1536 62776 : RelationGetDescr(rel->localrel),
1537 : &TTSOpsVirtual);
1538 62776 : resultRelInfo = makeNode(ResultRelInfo);
1539 62776 : InitResultRelInfo(resultRelInfo, rel->localrel, 1, NULL, 0);
1540 :
1541 : /*
1542 : * Populate updatedCols so that per-column triggers can fire. This could
1543 : * include more columns than were actually changed on the publisher
1544 : * because the logical replication protocol doesn't contain that
1545 : * information. But it would for example exclude columns that only exist
1546 : * on the subscriber, since we are not touching those.
1547 : */
1548 62776 : target_rte = list_nth(estate->es_range_table, 0);
1549 313368 : for (int i = 0; i < remoteslot->tts_tupleDescriptor->natts; i++)
1550 : {
1551 250592 : Form_pg_attribute att = TupleDescAttr(remoteslot->tts_tupleDescriptor, i);
1552 250592 : int remoteattnum = rel->attrmap->attnums[i];
1553 :
1554 250592 : if (!att->attisdropped && remoteattnum >= 0)
1555 : {
1556 125512 : Assert(remoteattnum < newtup.ncols);
1557 125512 : if (newtup.colstatus[remoteattnum] != LOGICALREP_COLUMN_UNCHANGED)
1558 125506 : target_rte->updatedCols =
1559 125506 : bms_add_member(target_rte->updatedCols,
1560 : i + 1 - FirstLowInvalidHeapAttributeNumber);
1561 : }
1562 : }
1563 :
1564 62776 : fill_extraUpdatedCols(target_rte, RelationGetDescr(rel->localrel));
1565 :
1566 62776 : PushActiveSnapshot(GetTransactionSnapshot());
1567 :
1568 : /* Build the search tuple. */
1569 62776 : oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
1570 62776 : slot_store_data(remoteslot, rel,
1571 : has_oldtup ? &oldtup : &newtup);
1572 62776 : MemoryContextSwitchTo(oldctx);
1573 :
1574 : /* For a partitioned table, apply update to correct partition. */
1575 62776 : if (rel->localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1576 10 : apply_handle_tuple_routing(resultRelInfo, estate,
1577 : remoteslot, &newtup, rel, CMD_UPDATE);
1578 : else
1579 62766 : apply_handle_update_internal(resultRelInfo, estate,
1580 : remoteslot, &newtup, rel);
1581 :
1582 62776 : PopActiveSnapshot();
1583 :
1584 : /* Handle queued AFTER triggers. */
1585 62776 : AfterTriggerEndQuery(estate);
1586 :
1587 62776 : ExecResetTupleTable(estate->es_tupleTable, false);
1588 62776 : FreeExecutorState(estate);
1589 :
1590 62776 : logicalrep_rel_close(rel, NoLock);
1591 :
1592 62776 : CommandCounterIncrement();
1593 : }
1594 :
1595 : /* Workhorse for apply_handle_update() */
1596 : static void
1597 62766 : apply_handle_update_internal(ResultRelInfo *relinfo,
1598 : EState *estate, TupleTableSlot *remoteslot,
1599 : LogicalRepTupleData *newtup,
1600 : LogicalRepRelMapEntry *relmapentry)
1601 : {
1602 62766 : Relation localrel = relinfo->ri_RelationDesc;
1603 : EPQState epqstate;
1604 : TupleTableSlot *localslot;
1605 : bool found;
1606 : MemoryContext oldctx;
1607 :
1608 62766 : EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
1609 62766 : ExecOpenIndices(relinfo, false);
1610 :
1611 62766 : found = FindReplTupleInLocalRel(estate, localrel,
1612 : &relmapentry->remoterel,
1613 : remoteslot, &localslot);
1614 62766 : ExecClearTuple(remoteslot);
1615 :
1616 : /*
1617 : * Tuple found.
1618 : *
1619 : * Note this will fail if there are other conflicting unique indexes.
1620 : */
1621 62766 : if (found)
1622 : {
1623 : /* Process and store remote tuple in the slot */
1624 62766 : oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
1625 62766 : slot_modify_data(remoteslot, localslot, relmapentry, newtup);
1626 62766 : MemoryContextSwitchTo(oldctx);
1627 :
1628 62766 : EvalPlanQualSetSlot(&epqstate, remoteslot);
1629 :
1630 : /* Do the actual update. */
1631 62766 : ExecSimpleRelationUpdate(relinfo, estate, &epqstate, localslot,
1632 : remoteslot);
1633 : }
1634 : else
1635 : {
1636 : /*
1637 : * The tuple to be updated could not be found.
1638 : *
1639 : * TODO what to do here, change the log level to LOG perhaps?
1640 : */
1641 0 : elog(DEBUG1,
1642 : "logical replication did not find row for update "
1643 : "in replication target relation \"%s\"",
1644 : RelationGetRelationName(localrel));
1645 : }
1646 :
1647 : /* Cleanup. */
1648 62766 : ExecCloseIndices(relinfo);
1649 62766 : EvalPlanQualEnd(&epqstate);
1650 62766 : }
1651 :
1652 : /*
1653 : * Handle DELETE message.
1654 : *
1655 : * TODO: FDW support
1656 : */
1657 : static void
1658 123746 : apply_handle_delete(StringInfo s)
1659 : {
1660 : ResultRelInfo *resultRelInfo;
1661 : LogicalRepRelMapEntry *rel;
1662 : LogicalRepTupleData oldtup;
1663 : LogicalRepRelId relid;
1664 : EState *estate;
1665 : TupleTableSlot *remoteslot;
1666 : MemoryContext oldctx;
1667 :
1668 123746 : if (handle_streamed_transaction('D', s))
1669 123292 : return;
1670 :
1671 62100 : ensure_transaction();
1672 :
1673 62100 : relid = logicalrep_read_delete(s, &oldtup);
1674 62100 : rel = logicalrep_rel_open(relid, RowExclusiveLock);
1675 62100 : if (!should_apply_changes_for_rel(rel))
1676 : {
1677 : /*
1678 : * The relation can't become interesting in the middle of the
1679 : * transaction so it's safe to unlock it.
1680 : */
1681 0 : logicalrep_rel_close(rel, RowExclusiveLock);
1682 0 : return;
1683 : }
1684 :
1685 : /* Check if we can do the delete. */
1686 62100 : check_relation_updatable(rel);
1687 :
1688 : /* Initialize the executor state. */
1689 62100 : estate = create_estate_for_relation(rel);
1690 62100 : remoteslot = ExecInitExtraTupleSlot(estate,
1691 62100 : RelationGetDescr(rel->localrel),
1692 : &TTSOpsVirtual);
1693 62100 : resultRelInfo = makeNode(ResultRelInfo);
1694 62100 : InitResultRelInfo(resultRelInfo, rel->localrel, 1, NULL, 0);
1695 :
1696 62100 : PushActiveSnapshot(GetTransactionSnapshot());
1697 :
1698 : /* Build the search tuple. */
1699 62100 : oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
1700 62100 : slot_store_data(remoteslot, rel, &oldtup);
1701 62100 : MemoryContextSwitchTo(oldctx);
1702 :
1703 : /* For a partitioned table, apply delete to correct partition. */
1704 62100 : if (rel->localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1705 24 : apply_handle_tuple_routing(resultRelInfo, estate,
1706 : remoteslot, NULL, rel, CMD_DELETE);
1707 : else
1708 62076 : apply_handle_delete_internal(resultRelInfo, estate,
1709 : remoteslot, &rel->remoterel);
1710 :
1711 62100 : PopActiveSnapshot();
1712 :
1713 : /* Handle queued AFTER triggers. */
1714 62100 : AfterTriggerEndQuery(estate);
1715 :
1716 62100 : ExecResetTupleTable(estate->es_tupleTable, false);
1717 62100 : FreeExecutorState(estate);
1718 :
1719 62100 : logicalrep_rel_close(rel, NoLock);
1720 :
1721 62100 : CommandCounterIncrement();
1722 : }
1723 :
1724 : /* Workhorse for apply_handle_delete() */
1725 : static void
1726 62102 : apply_handle_delete_internal(ResultRelInfo *relinfo, EState *estate,
1727 : TupleTableSlot *remoteslot,
1728 : LogicalRepRelation *remoterel)
1729 : {
1730 62102 : Relation localrel = relinfo->ri_RelationDesc;
1731 : EPQState epqstate;
1732 : TupleTableSlot *localslot;
1733 : bool found;
1734 :
1735 62102 : EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
1736 62102 : ExecOpenIndices(relinfo, false);
1737 :
1738 62102 : found = FindReplTupleInLocalRel(estate, localrel, remoterel,
1739 : remoteslot, &localslot);
1740 :
1741 : /* If found delete it. */
1742 62102 : if (found)
1743 : {
1744 62102 : EvalPlanQualSetSlot(&epqstate, localslot);
1745 :
1746 : /* Do the actual delete. */
1747 62102 : ExecSimpleRelationDelete(relinfo, estate, &epqstate, localslot);
1748 : }
1749 : else
1750 : {
1751 : /* The tuple to be deleted could not be found. */
1752 0 : elog(DEBUG1,
1753 : "logical replication could not find row for delete "
1754 : "in replication target relation \"%s\"",
1755 : RelationGetRelationName(localrel));
1756 : }
1757 :
1758 : /* Cleanup. */
1759 62102 : ExecCloseIndices(relinfo);
1760 62102 : EvalPlanQualEnd(&epqstate);
1761 62102 : }
1762 :
1763 : /*
1764 : * Try to find a tuple received from the publication side (in 'remoteslot') in
1765 : * the corresponding local relation using either replica identity index,
1766 : * primary key or if needed, sequential scan.
1767 : *
1768 : * Local tuple, if found, is returned in '*localslot'.
1769 : */
1770 : static bool
1771 124878 : FindReplTupleInLocalRel(EState *estate, Relation localrel,
1772 : LogicalRepRelation *remoterel,
1773 : TupleTableSlot *remoteslot,
1774 : TupleTableSlot **localslot)
1775 : {
1776 : Oid idxoid;
1777 : bool found;
1778 :
1779 124878 : *localslot = table_slot_create(localrel, &estate->es_tupleTable);
1780 :
1781 124878 : idxoid = GetRelationIdentityOrPK(localrel);
1782 124878 : Assert(OidIsValid(idxoid) ||
1783 : (remoterel->replident == REPLICA_IDENTITY_FULL));
1784 :
1785 124878 : if (OidIsValid(idxoid))
1786 124634 : found = RelationFindReplTupleByIndex(localrel, idxoid,
1787 : LockTupleExclusive,
1788 : remoteslot, *localslot);
1789 : else
1790 244 : found = RelationFindReplTupleSeq(localrel, LockTupleExclusive,
1791 : remoteslot, *localslot);
1792 :
1793 124878 : return found;
1794 : }
1795 :
1796 : /*
1797 : * This handles insert, update, delete on a partitioned table.
1798 : */
1799 : static void
1800 70 : apply_handle_tuple_routing(ResultRelInfo *relinfo,
1801 : EState *estate,
1802 : TupleTableSlot *remoteslot,
1803 : LogicalRepTupleData *newtup,
1804 : LogicalRepRelMapEntry *relmapentry,
1805 : CmdType operation)
1806 : {
1807 70 : Relation parentrel = relinfo->ri_RelationDesc;
1808 70 : ModifyTableState *mtstate = NULL;
1809 70 : PartitionTupleRouting *proute = NULL;
1810 : ResultRelInfo *partrelinfo;
1811 : Relation partrel;
1812 : TupleTableSlot *remoteslot_part;
1813 : TupleConversionMap *map;
1814 : MemoryContext oldctx;
1815 :
1816 : /* ModifyTableState is needed for ExecFindPartition(). */
1817 70 : mtstate = makeNode(ModifyTableState);
1818 70 : mtstate->ps.plan = NULL;
1819 70 : mtstate->ps.state = estate;
1820 70 : mtstate->operation = operation;
1821 70 : mtstate->resultRelInfo = relinfo;
1822 70 : proute = ExecSetupPartitionTupleRouting(estate, mtstate, parentrel);
1823 :
1824 : /*
1825 : * Find the partition to which the "search tuple" belongs.
1826 : */
1827 70 : Assert(remoteslot != NULL);
1828 70 : oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
1829 70 : partrelinfo = ExecFindPartition(mtstate, relinfo, proute,
1830 : remoteslot, estate);
1831 70 : Assert(partrelinfo != NULL);
1832 70 : partrel = partrelinfo->ri_RelationDesc;
1833 :
1834 : /*
1835 : * To perform any of the operations below, the tuple must match the
1836 : * partition's rowtype. Convert if needed or just copy, using a dedicated
1837 : * slot to store the tuple in any case.
1838 : */
1839 70 : remoteslot_part = partrelinfo->ri_PartitionTupleSlot;
1840 70 : if (remoteslot_part == NULL)
1841 22 : remoteslot_part = table_slot_create(partrel, &estate->es_tupleTable);
1842 70 : map = partrelinfo->ri_RootToPartitionMap;
1843 70 : if (map != NULL)
1844 48 : remoteslot_part = execute_attr_map_slot(map->attrMap, remoteslot,
1845 : remoteslot_part);
1846 : else
1847 : {
1848 22 : remoteslot_part = ExecCopySlot(remoteslot_part, remoteslot);
1849 22 : slot_getallattrs(remoteslot_part);
1850 : }
1851 70 : MemoryContextSwitchTo(oldctx);
1852 :
1853 70 : switch (operation)
1854 : {
1855 : case CMD_INSERT:
1856 36 : apply_handle_insert_internal(partrelinfo, estate,
1857 : remoteslot_part);
1858 36 : break;
1859 :
1860 : case CMD_DELETE:
1861 24 : apply_handle_delete_internal(partrelinfo, estate,
1862 : remoteslot_part,
1863 : &relmapentry->remoterel);
1864 24 : break;
1865 :
1866 : case CMD_UPDATE:
1867 :
1868 : /*
1869 : * For UPDATE, depending on whether or not the updated tuple
1870 : * satisfies the partition's constraint, perform a simple UPDATE
1871 : * of the partition or move the updated tuple into a different
1872 : * suitable partition.
1873 : */
1874 : {
1875 10 : AttrMap *attrmap = map ? map->attrMap : NULL;
1876 : LogicalRepRelMapEntry *part_entry;
1877 : TupleTableSlot *localslot;
1878 : ResultRelInfo *partrelinfo_new;
1879 : bool found;
1880 :
1881 10 : part_entry = logicalrep_partition_open(relmapentry, partrel,
1882 : attrmap);
1883 :
1884 : /* Get the matching local tuple from the partition. */
1885 10 : found = FindReplTupleInLocalRel(estate, partrel,
1886 : &part_entry->remoterel,
1887 : remoteslot_part, &localslot);
1888 :
1889 10 : oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
1890 10 : if (found)
1891 : {
1892 : /* Apply the update. */
1893 10 : slot_modify_data(remoteslot_part, localslot,
1894 : part_entry,
1895 : newtup);
1896 10 : MemoryContextSwitchTo(oldctx);
1897 : }
1898 : else
1899 : {
1900 : /*
1901 : * The tuple to be updated could not be found.
1902 : *
1903 : * TODO what to do here, change the log level to LOG
1904 : * perhaps?
1905 : */
1906 0 : elog(DEBUG1,
1907 : "logical replication did not find row for update "
1908 : "in replication target relation \"%s\"",
1909 : RelationGetRelationName(partrel));
1910 : }
1911 :
1912 : /*
1913 : * Does the updated tuple still satisfy the current
1914 : * partition's constraint?
1915 : */
1916 20 : if (!partrel->rd_rel->relispartition ||
1917 10 : ExecPartitionCheck(partrelinfo, remoteslot_part, estate,
1918 : false))
1919 8 : {
1920 : /*
1921 : * Yes, so simply UPDATE the partition. We don't call
1922 : * apply_handle_update_internal() here, which would
1923 : * normally do the following work, to avoid repeating some
1924 : * work already done above to find the local tuple in the
1925 : * partition.
1926 : */
1927 : EPQState epqstate;
1928 :
1929 8 : EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
1930 8 : ExecOpenIndices(partrelinfo, false);
1931 :
1932 8 : EvalPlanQualSetSlot(&epqstate, remoteslot_part);
1933 8 : ExecSimpleRelationUpdate(partrelinfo, estate, &epqstate,
1934 : localslot, remoteslot_part);
1935 8 : ExecCloseIndices(partrelinfo);
1936 8 : EvalPlanQualEnd(&epqstate);
1937 : }
1938 : else
1939 : {
1940 : /* Move the tuple into the new partition. */
1941 :
1942 : /*
1943 : * New partition will be found using tuple routing, which
1944 : * can only occur via the parent table. We might need to
1945 : * convert the tuple to the parent's rowtype. Note that
1946 : * this is the tuple found in the partition, not the
1947 : * original search tuple received by this function.
1948 : */
1949 2 : if (map)
1950 : {
1951 2 : TupleConversionMap *PartitionToRootMap =
1952 2 : convert_tuples_by_name(RelationGetDescr(partrel),
1953 : RelationGetDescr(parentrel));
1954 :
1955 2 : remoteslot =
1956 2 : execute_attr_map_slot(PartitionToRootMap->attrMap,
1957 : remoteslot_part, remoteslot);
1958 : }
1959 : else
1960 : {
1961 0 : remoteslot = ExecCopySlot(remoteslot, remoteslot_part);
1962 0 : slot_getallattrs(remoteslot);
1963 : }
1964 :
1965 :
1966 : /* Find the new partition. */
1967 2 : oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
1968 2 : partrelinfo_new = ExecFindPartition(mtstate, relinfo,
1969 : proute, remoteslot,
1970 : estate);
1971 2 : MemoryContextSwitchTo(oldctx);
1972 2 : Assert(partrelinfo_new != partrelinfo);
1973 :
1974 : /* DELETE old tuple found in the old partition. */
1975 2 : apply_handle_delete_internal(partrelinfo, estate,
1976 : localslot,
1977 : &relmapentry->remoterel);
1978 :
1979 : /* INSERT new tuple into the new partition. */
1980 :
1981 : /*
1982 : * Convert the replacement tuple to match the destination
1983 : * partition rowtype.
1984 : */
1985 2 : oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
1986 2 : partrel = partrelinfo_new->ri_RelationDesc;
1987 2 : remoteslot_part = partrelinfo_new->ri_PartitionTupleSlot;
1988 2 : if (remoteslot_part == NULL)
1989 2 : remoteslot_part = table_slot_create(partrel,
1990 : &estate->es_tupleTable);
1991 2 : map = partrelinfo_new->ri_RootToPartitionMap;
1992 2 : if (map != NULL)
1993 : {
1994 0 : remoteslot_part = execute_attr_map_slot(map->attrMap,
1995 : remoteslot,
1996 : remoteslot_part);
1997 : }
1998 : else
1999 : {
2000 2 : remoteslot_part = ExecCopySlot(remoteslot_part,
2001 : remoteslot);
2002 2 : slot_getallattrs(remoteslot);
2003 : }
2004 2 : MemoryContextSwitchTo(oldctx);
2005 2 : apply_handle_insert_internal(partrelinfo_new, estate,
2006 : remoteslot_part);
2007 : }
2008 : }
2009 10 : break;
2010 :
2011 : default:
2012 0 : elog(ERROR, "unrecognized CmdType: %d", (int) operation);
2013 : break;
2014 : }
2015 :
2016 70 : ExecCleanupTupleRouting(mtstate, proute);
2017 70 : }
2018 :
2019 : /*
2020 : * Handle TRUNCATE message.
2021 : *
2022 : * TODO: FDW support
2023 : */
2024 : static void
2025 24 : apply_handle_truncate(StringInfo s)
2026 : {
2027 24 : bool cascade = false;
2028 24 : bool restart_seqs = false;
2029 24 : List *remote_relids = NIL;
2030 24 : List *remote_rels = NIL;
2031 24 : List *rels = NIL;
2032 24 : List *part_rels = NIL;
2033 24 : List *relids = NIL;
2034 24 : List *relids_logged = NIL;
2035 : ListCell *lc;
2036 :
2037 24 : if (handle_streamed_transaction('T', s))
2038 24 : return;
2039 :
2040 24 : ensure_transaction();
2041 :
2042 24 : remote_relids = logicalrep_read_truncate(s, &cascade, &restart_seqs);
2043 :
2044 64 : foreach(lc, remote_relids)
2045 : {
2046 40 : LogicalRepRelId relid = lfirst_oid(lc);
2047 : LogicalRepRelMapEntry *rel;
2048 :
2049 40 : rel = logicalrep_rel_open(relid, RowExclusiveLock);
2050 40 : if (!should_apply_changes_for_rel(rel))
2051 : {
2052 : /*
2053 : * The relation can't become interesting in the middle of the
2054 : * transaction so it's safe to unlock it.
2055 : */
2056 0 : logicalrep_rel_close(rel, RowExclusiveLock);
2057 0 : continue;
2058 : }
2059 :
2060 40 : remote_rels = lappend(remote_rels, rel);
2061 40 : rels = lappend(rels, rel->localrel);
2062 40 : relids = lappend_oid(relids, rel->localreloid);
2063 40 : if (RelationIsLogicallyLogged(rel->localrel))
2064 40 : relids_logged = lappend_oid(relids_logged, rel->localreloid);
2065 :
2066 : /*
2067 : * Truncate partitions if we got a message to truncate a partitioned
2068 : * table.
2069 : */
2070 40 : if (rel->localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
2071 : {
2072 : ListCell *child;
2073 8 : List *children = find_all_inheritors(rel->localreloid,
2074 : RowExclusiveLock,
2075 : NULL);
2076 :
2077 30 : foreach(child, children)
2078 : {
2079 22 : Oid childrelid = lfirst_oid(child);
2080 : Relation childrel;
2081 :
2082 22 : if (list_member_oid(relids, childrelid))
2083 8 : continue;
2084 :
2085 : /* find_all_inheritors already got lock */
2086 14 : childrel = table_open(childrelid, NoLock);
2087 :
2088 : /*
2089 : * Ignore temp tables of other backends. See similar code in
2090 : * ExecuteTruncate().
2091 : */
2092 14 : if (RELATION_IS_OTHER_TEMP(childrel))
2093 : {
2094 0 : table_close(childrel, RowExclusiveLock);
2095 0 : continue;
2096 : }
2097 :
2098 14 : rels = lappend(rels, childrel);
2099 14 : part_rels = lappend(part_rels, childrel);
2100 14 : relids = lappend_oid(relids, childrelid);
2101 : /* Log this relation only if needed for logical decoding */
2102 14 : if (RelationIsLogicallyLogged(childrel))
2103 14 : relids_logged = lappend_oid(relids_logged, childrelid);
2104 : }
2105 : }
2106 : }
2107 :
2108 : /*
2109 : * Even if we used CASCADE on the upstream primary we explicitly default
2110 : * to replaying changes without further cascading. This might be later
2111 : * changeable with a user specified option.
2112 : */
2113 24 : ExecuteTruncateGuts(rels, relids, relids_logged, DROP_RESTRICT, restart_seqs);
2114 :
2115 64 : foreach(lc, remote_rels)
2116 : {
2117 40 : LogicalRepRelMapEntry *rel = lfirst(lc);
2118 :
2119 40 : logicalrep_rel_close(rel, NoLock);
2120 : }
2121 38 : foreach(lc, part_rels)
2122 : {
2123 14 : Relation rel = lfirst(lc);
2124 :
2125 14 : table_close(rel, NoLock);
2126 : }
2127 :
2128 24 : CommandCounterIncrement();
2129 : }
2130 :
2131 :
2132 : /*
2133 : * Logical replication protocol message dispatcher.
2134 : */
2135 : static void
2136 479178 : apply_dispatch(StringInfo s)
2137 : {
2138 479178 : char action = pq_getmsgbyte(s);
2139 :
2140 479178 : switch (action)
2141 : {
2142 : /* BEGIN */
2143 : case 'B':
2144 498 : apply_handle_begin(s);
2145 498 : break;
2146 : /* COMMIT/ABORT */
2147 : case 'C':
2148 486 : apply_handle_commit(s);
2149 486 : break;
2150 : /* PREPARE and [COMMIT|ROLLBACK] PREPARED */
2151 : case 'P':
2152 26 : apply_handle_prepare(s);
2153 26 : break;
2154 : /* INSERT */
2155 : case 'I':
2156 224744 : apply_handle_insert(s);
2157 224738 : break;
2158 : /* UPDATE */
2159 : case 'U':
2160 128020 : apply_handle_update(s);
2161 128020 : break;
2162 : /* DELETE */
2163 : case 'D':
2164 123746 : apply_handle_delete(s);
2165 123746 : break;
2166 : /* TRUNCATE */
2167 : case 'T':
2168 24 : apply_handle_truncate(s);
2169 24 : break;
2170 : /* RELATION */
2171 : case 'R':
2172 308 : apply_handle_relation(s);
2173 308 : break;
2174 : /* TYPE */
2175 : case 'Y':
2176 32 : apply_handle_type(s);
2177 32 : break;
2178 : /* ORIGIN */
2179 : case 'O':
2180 0 : apply_handle_origin(s);
2181 0 : break;
2182 : /* STREAM START */
2183 : case 'S':
2184 614 : apply_handle_stream_start(s);
2185 614 : break;
2186 : /* STREAM END */
2187 : case 'E':
2188 614 : apply_handle_stream_stop(s);
2189 614 : break;
2190 : /* STREAM ABORT */
2191 : case 'A':
2192 26 : apply_handle_stream_abort(s);
2193 26 : break;
2194 : /* STREAM COMMIT */
2195 : case 'c':
2196 28 : apply_handle_stream_commit(s);
2197 28 : break;
2198 : /* STREAM PREPARE */
2199 : case 'p':
2200 12 : apply_handle_stream_prepare(s);
2201 12 : break;
2202 : default:
2203 0 : ereport(ERROR,
2204 : (errcode(ERRCODE_PROTOCOL_VIOLATION),
2205 : errmsg("invalid logical replication message type \"%c\"", action)));
2206 : }
2207 479172 : }
2208 :
2209 : /*
2210 : * Figure out which write/flush positions to report to the walsender process.
2211 : *
2212 : * We can't simply report back the last LSN the walsender sent us because the
2213 : * local transaction might not yet be flushed to disk locally. Instead we
2214 : * build a list that associates local with remote LSNs for every commit. When
2215 : * reporting back the flush position to the sender we iterate that list and
2216 : * check which entries on it are already locally flushed. Those we can report
2217 : * as having been flushed.
2218 : *
2219 : * The have_pending_txes is true if there are outstanding transactions that
2220 : * need to be flushed.
2221 : */
2222 : static void
2223 66192 : get_flush_position(XLogRecPtr *write, XLogRecPtr *flush,
2224 : bool *have_pending_txes)
2225 : {
2226 : dlist_mutable_iter iter;
2227 66192 : XLogRecPtr local_flush = GetFlushRecPtr();
2228 :
2229 66192 : *write = InvalidXLogRecPtr;
2230 66192 : *flush = InvalidXLogRecPtr;
2231 :
2232 66582 : dlist_foreach_modify(iter, &lsn_mapping)
2233 : {
2234 32882 : FlushPosition *pos =
2235 32882 : dlist_container(FlushPosition, node, iter.cur);
2236 :
2237 32882 : *write = pos->remote_end;
2238 :
2239 32882 : if (pos->local_end <= local_flush)
2240 : {
2241 390 : *flush = pos->remote_end;
2242 390 : dlist_delete(iter.cur);
2243 390 : pfree(pos);
2244 : }
2245 : else
2246 : {
2247 : /*
2248 : * Don't want to uselessly iterate over the rest of the list which
2249 : * could potentially be long. Instead get the last element and
2250 : * grab the write position from there.
2251 : */
2252 32492 : pos = dlist_tail_element(FlushPosition, node,
2253 : &lsn_mapping);
2254 32492 : *write = pos->remote_end;
2255 32492 : *have_pending_txes = true;
2256 64984 : return;
2257 : }
2258 : }
2259 :
2260 33700 : *have_pending_txes = !dlist_is_empty(&lsn_mapping);
2261 : }
2262 :
2263 : /*
2264 : * Store current remote/local lsn pair in the tracking list.
2265 : */
2266 : static void
2267 434 : store_flush_position(XLogRecPtr remote_lsn)
2268 : {
2269 : FlushPosition *flushpos;
2270 :
2271 : /* Need to do this in permanent context */
2272 434 : MemoryContextSwitchTo(ApplyContext);
2273 :
2274 : /* Track commit lsn */
2275 434 : flushpos = (FlushPosition *) palloc(sizeof(FlushPosition));
2276 434 : flushpos->local_end = XactLastCommitEnd;
2277 434 : flushpos->remote_end = remote_lsn;
2278 :
2279 434 : dlist_push_tail(&lsn_mapping, &flushpos->node);
2280 434 : MemoryContextSwitchTo(ApplyMessageContext);
2281 434 : }
2282 :
2283 :
2284 : /* Update statistics of the worker. */
2285 : static void
2286 257434 : UpdateWorkerStats(XLogRecPtr last_lsn, TimestampTz send_time, bool reply)
2287 : {
2288 257434 : MyLogicalRepWorker->last_lsn = last_lsn;
2289 257434 : MyLogicalRepWorker->last_send_time = send_time;
2290 257434 : MyLogicalRepWorker->last_recv_time = GetCurrentTimestamp();
2291 257434 : if (reply)
2292 : {
2293 4408 : MyLogicalRepWorker->reply_lsn = last_lsn;
2294 4408 : MyLogicalRepWorker->reply_time = send_time;
2295 : }
2296 257434 : }
2297 :
2298 : /*
2299 : * Apply main loop.
2300 : */
2301 : static void
2302 198 : LogicalRepApplyLoop(XLogRecPtr last_received)
2303 : {
2304 198 : TimestampTz last_recv_timestamp = GetCurrentTimestamp();
2305 198 : bool ping_sent = false;
2306 : TimeLineID tli;
2307 :
2308 : /*
2309 : * Init the ApplyMessageContext which we clean up after each replication
2310 : * protocol message.
2311 : */
2312 198 : ApplyMessageContext = AllocSetContextCreate(ApplyContext,
2313 : "ApplyMessageContext",
2314 : ALLOCSET_DEFAULT_SIZES);
2315 :
2316 : /*
2317 : * This memory context is used for per-stream data when the streaming mode
2318 : * is enabled. This context is reset on each stream stop.
2319 : */
2320 198 : LogicalStreamingContext = AllocSetContextCreate(ApplyContext,
2321 : "LogicalStreamingContext",
2322 : ALLOCSET_DEFAULT_SIZES);
2323 :
2324 : /* mark as idle, before starting to loop */
2325 198 : pgstat_report_activity(STATE_IDLE, NULL);
2326 :
2327 : /* This outer loop iterates once per wait. */
2328 : for (;;)
2329 : {
2330 61770 : pgsocket fd = PGINVALID_SOCKET;
2331 : int rc;
2332 : int len;
2333 61770 : char *buf = NULL;
2334 61770 : bool endofstream = false;
2335 : long wait_time;
2336 :
2337 61770 : CHECK_FOR_INTERRUPTS();
2338 :
2339 61770 : MemoryContextSwitchTo(ApplyMessageContext);
2340 :
2341 61770 : len = walrcv_receive(wrconn, &buf, &fd);
2342 :
2343 61758 : if (len != 0)
2344 : {
2345 : /* Loop to process all available data (without blocking). */
2346 : for (;;)
2347 : {
2348 318960 : CHECK_FOR_INTERRUPTS();
2349 :
2350 318960 : if (len == 0)
2351 : {
2352 61520 : break;
2353 : }
2354 257440 : else if (len < 0)
2355 : {
2356 4 : ereport(LOG,
2357 : (errmsg("data stream from publisher has ended")));
2358 4 : endofstream = true;
2359 4 : break;
2360 : }
2361 : else
2362 : {
2363 : int c;
2364 : StringInfoData s;
2365 :
2366 : /* Reset timeout. */
2367 257436 : last_recv_timestamp = GetCurrentTimestamp();
2368 257436 : ping_sent = false;
2369 :
2370 : /* Ensure we are reading the data into our memory context. */
2371 257436 : MemoryContextSwitchTo(ApplyMessageContext);
2372 :
2373 257436 : s.data = buf;
2374 257436 : s.len = len;
2375 257436 : s.cursor = 0;
2376 257436 : s.maxlen = -1;
2377 :
2378 257436 : c = pq_getmsgbyte(&s);
2379 :
2380 257436 : if (c == 'w')
2381 : {
2382 : XLogRecPtr start_lsn;
2383 : XLogRecPtr end_lsn;
2384 : TimestampTz send_time;
2385 :
2386 253026 : start_lsn = pq_getmsgint64(&s);
2387 253026 : end_lsn = pq_getmsgint64(&s);
2388 253026 : send_time = pq_getmsgint64(&s);
2389 :
2390 253026 : if (last_received < start_lsn)
2391 235492 : last_received = start_lsn;
2392 :
2393 253026 : if (last_received < end_lsn)
2394 0 : last_received = end_lsn;
2395 :
2396 253026 : UpdateWorkerStats(last_received, send_time, false);
2397 :
2398 253026 : apply_dispatch(&s);
2399 : }
2400 4410 : else if (c == 'k')
2401 : {
2402 : XLogRecPtr end_lsn;
2403 : TimestampTz timestamp;
2404 : bool reply_requested;
2405 :
2406 4410 : end_lsn = pq_getmsgint64(&s);
2407 4410 : timestamp = pq_getmsgint64(&s);
2408 4410 : reply_requested = pq_getmsgbyte(&s);
2409 :
2410 4410 : if (last_received < end_lsn)
2411 296 : last_received = end_lsn;
2412 :
2413 4410 : send_feedback(last_received, reply_requested, false);
2414 4408 : UpdateWorkerStats(last_received, timestamp, true);
2415 : }
2416 : /* other message types are purposefully ignored */
2417 :
2418 257428 : MemoryContextReset(ApplyMessageContext);
2419 : }
2420 :
2421 257428 : len = walrcv_receive(wrconn, &buf, &fd);
2422 257428 : }
2423 : }
2424 :
2425 : /* confirm all writes so far */
2426 61750 : send_feedback(last_received, false, false);
2427 :
2428 61750 : if (!in_remote_transaction && !in_streamed_transaction)
2429 : {
2430 : /*
2431 : * If we didn't get any transactions for a while there might be
2432 : * unconsumed invalidation messages in the queue, consume them
2433 : * now.
2434 : */
2435 5034 : AcceptInvalidationMessages();
2436 5034 : maybe_reread_subscription();
2437 :
2438 : /* Process any table synchronization changes. */
2439 5026 : process_syncing_tables(last_received);
2440 : }
2441 :
2442 : /* Cleanup the memory. */
2443 61620 : MemoryContextResetAndDeleteChildren(ApplyMessageContext);
2444 61620 : MemoryContextSwitchTo(TopMemoryContext);
2445 :
2446 : /* Check if we need to exit the streaming loop. */
2447 61620 : if (endofstream)
2448 4 : break;
2449 :
2450 : /*
2451 : * Wait for more data or latch. If we have unflushed transactions,
2452 : * wake up after WalWriterDelay to see if they've been flushed yet (in
2453 : * which case we should send a feedback message). Otherwise, there's
2454 : * no particular urgency about waking up unless we get data or a
2455 : * signal.
2456 : */
2457 61616 : if (!dlist_is_empty(&lsn_mapping))
2458 28706 : wait_time = WalWriterDelay;
2459 : else
2460 32910 : wait_time = NAPTIME_PER_CYCLE;
2461 :
2462 61616 : rc = WaitLatchOrSocket(MyLatch,
2463 : WL_SOCKET_READABLE | WL_LATCH_SET |
2464 : WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
2465 : fd, wait_time,
2466 : WAIT_EVENT_LOGICAL_APPLY_MAIN);
2467 :
2468 61616 : if (rc & WL_LATCH_SET)
2469 : {
2470 204 : ResetLatch(MyLatch);
2471 204 : CHECK_FOR_INTERRUPTS();
2472 : }
2473 :
2474 61572 : if (ConfigReloadPending)
2475 : {
2476 0 : ConfigReloadPending = false;
2477 0 : ProcessConfigFile(PGC_SIGHUP);
2478 : }
2479 :
2480 61572 : if (rc & WL_TIMEOUT)
2481 : {
2482 : /*
2483 : * We didn't receive anything new. If we haven't heard anything
2484 : * from the server for more than wal_receiver_timeout / 2, ping
2485 : * the server. Also, if it's been longer than
2486 : * wal_receiver_status_interval since the last update we sent,
2487 : * send a status update to the primary anyway, to report any
2488 : * progress in applying WAL.
2489 : */
2490 32 : bool requestReply = false;
2491 :
2492 : /*
2493 : * Check if time since last receive from standby has reached the
2494 : * configured limit.
2495 : */
2496 32 : if (wal_receiver_timeout > 0)
2497 : {
2498 32 : TimestampTz now = GetCurrentTimestamp();
2499 : TimestampTz timeout;
2500 :
2501 32 : timeout =
2502 32 : TimestampTzPlusMilliseconds(last_recv_timestamp,
2503 : wal_receiver_timeout);
2504 :
2505 32 : if (now >= timeout)
2506 0 : ereport(ERROR,
2507 : (errmsg("terminating logical replication worker due to timeout")));
2508 :
2509 : /* Check to see if it's time for a ping. */
2510 32 : if (!ping_sent)
2511 : {
2512 32 : timeout = TimestampTzPlusMilliseconds(last_recv_timestamp,
2513 : (wal_receiver_timeout / 2));
2514 32 : if (now >= timeout)
2515 : {
2516 0 : requestReply = true;
2517 0 : ping_sent = true;
2518 : }
2519 : }
2520 : }
2521 :
2522 32 : send_feedback(last_received, requestReply, requestReply);
2523 : }
2524 61572 : }
2525 :
2526 : /* All done */
2527 4 : walrcv_endstreaming(wrconn, &tli);
2528 0 : }
2529 :
2530 : /*
2531 : * Send a Standby Status Update message to server.
2532 : *
2533 : * 'recvpos' is the latest LSN we've received data to, force is set if we need
2534 : * to send a response to avoid timeouts.
2535 : */
2536 : static void
2537 66192 : send_feedback(XLogRecPtr recvpos, bool force, bool requestReply)
2538 : {
2539 : static StringInfo reply_message = NULL;
2540 : static TimestampTz send_time = 0;
2541 :
2542 : static XLogRecPtr last_recvpos = InvalidXLogRecPtr;
2543 : static XLogRecPtr last_writepos = InvalidXLogRecPtr;
2544 : static XLogRecPtr last_flushpos = InvalidXLogRecPtr;
2545 :
2546 : XLogRecPtr writepos;
2547 : XLogRecPtr flushpos;
2548 : TimestampTz now;
2549 : bool have_pending_txes;
2550 :
2551 : /*
2552 : * If the user doesn't want status to be reported to the publisher, be
2553 : * sure to exit before doing anything at all.
2554 : */
2555 66192 : if (!force && wal_receiver_status_interval <= 0)
2556 32770 : return;
2557 :
2558 : /* It's legal to not pass a recvpos */
2559 66192 : if (recvpos < last_recvpos)
2560 0 : recvpos = last_recvpos;
2561 :
2562 66192 : get_flush_position(&writepos, &flushpos, &have_pending_txes);
2563 :
2564 : /*
2565 : * No outstanding transactions to flush, we can report the latest received
2566 : * position. This is important for synchronous replication.
2567 : */
2568 66192 : if (!have_pending_txes)
2569 33700 : flushpos = writepos = recvpos;
2570 :
2571 66192 : if (writepos < last_writepos)
2572 0 : writepos = last_writepos;
2573 :
2574 66192 : if (flushpos < last_flushpos)
2575 32452 : flushpos = last_flushpos;
2576 :
2577 66192 : now = GetCurrentTimestamp();
2578 :
2579 : /* if we've already reported everything we're good */
2580 129040 : if (!force &&
2581 95642 : writepos == last_writepos &&
2582 65566 : flushpos == last_flushpos &&
2583 32772 : !TimestampDifferenceExceeds(send_time, now,
2584 : wal_receiver_status_interval * 1000))
2585 32770 : return;
2586 33422 : send_time = now;
2587 :
2588 33422 : if (!reply_message)
2589 : {
2590 198 : MemoryContext oldctx = MemoryContextSwitchTo(ApplyContext);
2591 :
2592 198 : reply_message = makeStringInfo();
2593 198 : MemoryContextSwitchTo(oldctx);
2594 : }
2595 : else
2596 33224 : resetStringInfo(reply_message);
2597 :
2598 33422 : pq_sendbyte(reply_message, 'r');
2599 33422 : pq_sendint64(reply_message, recvpos); /* write */
2600 33422 : pq_sendint64(reply_message, flushpos); /* flush */
2601 33422 : pq_sendint64(reply_message, writepos); /* apply */
2602 33422 : pq_sendint64(reply_message, now); /* sendTime */
2603 33422 : pq_sendbyte(reply_message, requestReply); /* replyRequested */
2604 :
2605 33422 : elog(DEBUG2, "sending feedback (force %d) to recv %X/%X, write %X/%X, flush %X/%X",
2606 : force,
2607 : (uint32) (recvpos >> 32), (uint32) recvpos,
2608 : (uint32) (writepos >> 32), (uint32) writepos,
2609 : (uint32) (flushpos >> 32), (uint32) flushpos
2610 : );
2611 :
2612 33422 : walrcv_send(wrconn, reply_message->data, reply_message->len);
2613 :
2614 33420 : if (recvpos > last_recvpos)
2615 30058 : last_recvpos = recvpos;
2616 33420 : if (writepos > last_writepos)
2617 30054 : last_writepos = writepos;
2618 33420 : if (flushpos > last_flushpos)
2619 29748 : last_flushpos = flushpos;
2620 : }
2621 :
2622 : /*
2623 : * Reread subscription info if needed. Most changes will be exit.
2624 : */
2625 : static void
2626 6230 : maybe_reread_subscription(void)
2627 : {
2628 : MemoryContext oldctx;
2629 : Subscription *newsub;
2630 6230 : bool started_tx = false;
2631 :
2632 : /* When cache state is valid there is nothing to do here. */
2633 6230 : if (MySubscriptionValid)
2634 12424 : return;
2635 :
2636 : /* This function might be called inside or outside of transaction. */
2637 24 : if (!IsTransactionState())
2638 : {
2639 18 : StartTransactionCommand();
2640 18 : started_tx = true;
2641 : }
2642 :
2643 : /* Ensure allocations in permanent context. */
2644 24 : oldctx = MemoryContextSwitchTo(ApplyContext);
2645 :
2646 24 : newsub = GetSubscription(MyLogicalRepWorker->subid, true);
2647 :
2648 : /*
2649 : * Exit if the subscription was removed. This normally should not happen
2650 : * as the worker gets killed during DROP SUBSCRIPTION.
2651 : */
2652 24 : if (!newsub)
2653 : {
2654 0 : ereport(LOG,
2655 : (errmsg("logical replication apply worker for subscription \"%s\" will "
2656 : "stop because the subscription was removed",
2657 : MySubscription->name)));
2658 :
2659 0 : proc_exit(0);
2660 : }
2661 :
2662 : /*
2663 : * Exit if the subscription was disabled. This normally should not happen
2664 : * as the worker gets killed during ALTER SUBSCRIPTION ... DISABLE.
2665 : */
2666 24 : if (!newsub->enabled)
2667 : {
2668 0 : ereport(LOG,
2669 : (errmsg("logical replication apply worker for subscription \"%s\" will "
2670 : "stop because the subscription was disabled",
2671 : MySubscription->name)));
2672 :
2673 0 : proc_exit(0);
2674 : }
2675 :
2676 : /* !slotname should never happen when enabled is true. */
2677 24 : Assert(newsub->slotname);
2678 :
2679 : /*
2680 : * Exit if any parameter that affects the remote connection was changed.
2681 : * The launcher will start a new worker.
2682 : */
2683 46 : if (strcmp(newsub->conninfo, MySubscription->conninfo) != 0 ||
2684 42 : strcmp(newsub->name, MySubscription->name) != 0 ||
2685 40 : strcmp(newsub->slotname, MySubscription->slotname) != 0 ||
2686 34 : newsub->binary != MySubscription->binary ||
2687 28 : newsub->stream != MySubscription->stream ||
2688 14 : !equal(newsub->publications, MySubscription->publications))
2689 : {
2690 12 : ereport(LOG,
2691 : (errmsg("logical replication apply worker for subscription \"%s\" will restart because of a parameter change",
2692 : MySubscription->name)));
2693 :
2694 12 : proc_exit(0);
2695 : }
2696 :
2697 : /* Check for other changes that should never happen too. */
2698 12 : if (newsub->dbid != MySubscription->dbid)
2699 : {
2700 0 : elog(ERROR, "subscription %u changed unexpectedly",
2701 : MyLogicalRepWorker->subid);
2702 : }
2703 :
2704 : /* Clean old subscription info and switch to new one. */
2705 12 : FreeSubscription(MySubscription);
2706 12 : MySubscription = newsub;
2707 :
2708 12 : MemoryContextSwitchTo(oldctx);
2709 :
2710 : /* Change synchronous commit according to the user's wishes */
2711 12 : SetConfigOption("synchronous_commit", MySubscription->synccommit,
2712 : PGC_BACKEND, PGC_S_OVERRIDE);
2713 :
2714 12 : if (started_tx)
2715 10 : CommitTransactionCommand();
2716 :
2717 12 : MySubscriptionValid = true;
2718 : }
2719 :
2720 : /*
2721 : * Callback from subscription syscache invalidation.
2722 : */
2723 : static void
2724 24 : subscription_change_cb(Datum arg, int cacheid, uint32 hashvalue)
2725 : {
2726 24 : MySubscriptionValid = false;
2727 24 : }
2728 :
2729 : /*
2730 : * subxact_info_write
2731 : * Store information about subxacts for a toplevel transaction.
2732 : *
2733 : * For each subxact we store offset of it's first change in the main file.
2734 : * The file is always over-written as a whole.
2735 : *
2736 : * XXX We should only store subxacts that were not aborted yet.
2737 : */
2738 : static void
2739 630 : subxact_info_write(Oid subid, TransactionId xid)
2740 : {
2741 : char path[MAXPGPATH];
2742 : bool found;
2743 : Size len;
2744 : StreamXidHash *ent;
2745 : BufFile *fd;
2746 :
2747 630 : Assert(TransactionIdIsValid(xid));
2748 :
2749 : /* find the xid entry in the xidhash */
2750 630 : ent = (StreamXidHash *) hash_search(xidhash,
2751 : (void *) &xid,
2752 : HASH_FIND,
2753 : &found);
2754 : /* we must found the entry for its top transaction by this time */
2755 630 : Assert(found);
2756 :
2757 : /*
2758 : * If there is no subtransaction then nothing to do, but if already have
2759 : * subxact file then delete that.
2760 : */
2761 630 : if (subxact_data.nsubxacts == 0)
2762 : {
2763 548 : if (ent->subxact_fileset)
2764 : {
2765 6 : cleanup_subxact_info();
2766 6 : SharedFileSetDeleteAll(ent->subxact_fileset);
2767 6 : pfree(ent->subxact_fileset);
2768 6 : ent->subxact_fileset = NULL;
2769 : }
2770 1178 : return;
2771 : }
2772 :
2773 82 : subxact_filename(path, subid, xid);
2774 :
2775 : /*
2776 : * Create the subxact file if it not already created, otherwise open the
2777 : * existing file.
2778 : */
2779 82 : if (ent->subxact_fileset == NULL)
2780 : {
2781 : MemoryContext oldctx;
2782 :
2783 : /*
2784 : * We need to maintain shared fileset across multiple stream
2785 : * start/stop calls. So, need to allocate it in a persistent context.
2786 : */
2787 14 : oldctx = MemoryContextSwitchTo(ApplyContext);
2788 14 : ent->subxact_fileset = palloc(sizeof(SharedFileSet));
2789 14 : SharedFileSetInit(ent->subxact_fileset, NULL);
2790 14 : MemoryContextSwitchTo(oldctx);
2791 :
2792 14 : fd = BufFileCreateShared(ent->subxact_fileset, path);
2793 : }
2794 : else
2795 68 : fd = BufFileOpenShared(ent->subxact_fileset, path, O_RDWR);
2796 :
2797 82 : len = sizeof(SubXactInfo) * subxact_data.nsubxacts;
2798 :
2799 : /* Write the subxact count and subxact info */
2800 82 : BufFileWrite(fd, &subxact_data.nsubxacts, sizeof(subxact_data.nsubxacts));
2801 82 : BufFileWrite(fd, subxact_data.subxacts, len);
2802 :
2803 82 : BufFileClose(fd);
2804 :
2805 : /* free the memory allocated for subxact info */
2806 82 : cleanup_subxact_info();
2807 : }
2808 :
2809 : /*
2810 : * subxact_info_read
2811 : * Restore information about subxacts of a streamed transaction.
2812 : *
2813 : * Read information about subxacts into the structure subxact_data that can be
2814 : * used later.
2815 : */
2816 : static void
2817 594 : subxact_info_read(Oid subid, TransactionId xid)
2818 : {
2819 : char path[MAXPGPATH];
2820 : bool found;
2821 : Size len;
2822 : BufFile *fd;
2823 : StreamXidHash *ent;
2824 : MemoryContext oldctx;
2825 :
2826 594 : Assert(TransactionIdIsValid(xid));
2827 594 : Assert(!subxact_data.subxacts);
2828 594 : Assert(subxact_data.nsubxacts == 0);
2829 594 : Assert(subxact_data.nsubxacts_max == 0);
2830 :
2831 : /* Find the stream xid entry in the xidhash */
2832 594 : ent = (StreamXidHash *) hash_search(xidhash,
2833 : (void *) &xid,
2834 : HASH_FIND,
2835 : &found);
2836 :
2837 : /*
2838 : * If subxact_fileset is not valid that mean we don't have any subxact
2839 : * info
2840 : */
2841 594 : if (ent->subxact_fileset == NULL)
2842 1112 : return;
2843 :
2844 76 : subxact_filename(path, subid, xid);
2845 :
2846 76 : fd = BufFileOpenShared(ent->subxact_fileset, path, O_RDONLY);
2847 :
2848 : /* read number of subxact items */
2849 76 : if (BufFileRead(fd, &subxact_data.nsubxacts,
2850 : sizeof(subxact_data.nsubxacts)) !=
2851 : sizeof(subxact_data.nsubxacts))
2852 0 : ereport(ERROR,
2853 : (errcode_for_file_access(),
2854 : errmsg("could not read from streaming transaction's subxact file \"%s\": %m",
2855 : path)));
2856 :
2857 76 : len = sizeof(SubXactInfo) * subxact_data.nsubxacts;
2858 :
2859 : /* we keep the maximum as a power of 2 */
2860 76 : subxact_data.nsubxacts_max = 1 << my_log2(subxact_data.nsubxacts);
2861 :
2862 : /*
2863 : * Allocate subxact information in the logical streaming context. We need
2864 : * this information during the complete stream so that we can add the sub
2865 : * transaction info to this. On stream stop we will flush this information
2866 : * to the subxact file and reset the logical streaming context.
2867 : */
2868 76 : oldctx = MemoryContextSwitchTo(LogicalStreamingContext);
2869 76 : subxact_data.subxacts = palloc(subxact_data.nsubxacts_max *
2870 : sizeof(SubXactInfo));
2871 76 : MemoryContextSwitchTo(oldctx);
2872 :
2873 76 : if ((len > 0) && ((BufFileRead(fd, subxact_data.subxacts, len)) != len))
2874 0 : ereport(ERROR,
2875 : (errcode_for_file_access(),
2876 : errmsg("could not read from streaming transaction's subxact file \"%s\": %m",
2877 : path)));
2878 :
2879 76 : BufFileClose(fd);
2880 : }
2881 :
2882 : /*
2883 : * subxact_info_add
2884 : * Add information about a subxact (offset in the main file).
2885 : */
2886 : static void
2887 248838 : subxact_info_add(TransactionId xid)
2888 : {
2889 248838 : SubXactInfo *subxacts = subxact_data.subxacts;
2890 : int64 i;
2891 :
2892 : /* We must have a valid top level stream xid and a stream fd. */
2893 248838 : Assert(TransactionIdIsValid(stream_xid));
2894 248838 : Assert(stream_fd != NULL);
2895 :
2896 : /*
2897 : * If the XID matches the toplevel transaction, we don't want to add it.
2898 : */
2899 248838 : if (stream_xid == xid)
2900 223208 : return;
2901 :
2902 : /*
2903 : * In most cases we're checking the same subxact as we've already seen in
2904 : * the last call, so make sure to ignore it (this change comes later).
2905 : */
2906 25630 : if (subxact_data.subxact_last == xid)
2907 25536 : return;
2908 :
2909 : /* OK, remember we're processing this XID. */
2910 94 : subxact_data.subxact_last = xid;
2911 :
2912 : /*
2913 : * Check if the transaction is already present in the array of subxact. We
2914 : * intentionally scan the array from the tail, because we're likely adding
2915 : * a change for the most recent subtransactions.
2916 : *
2917 : * XXX Can we rely on the subxact XIDs arriving in sorted order? That
2918 : * would allow us to use binary search here.
2919 : */
2920 132 : for (i = subxact_data.nsubxacts; i > 0; i--)
2921 : {
2922 : /* found, so we're done */
2923 96 : if (subxacts[i - 1].xid == xid)
2924 58 : return;
2925 : }
2926 :
2927 : /* This is a new subxact, so we need to add it to the array. */
2928 36 : if (subxact_data.nsubxacts == 0)
2929 : {
2930 : MemoryContext oldctx;
2931 :
2932 14 : subxact_data.nsubxacts_max = 128;
2933 :
2934 : /*
2935 : * Allocate this memory for subxacts in per-stream context, see
2936 : * subxact_info_read.
2937 : */
2938 14 : oldctx = MemoryContextSwitchTo(LogicalStreamingContext);
2939 14 : subxacts = palloc(subxact_data.nsubxacts_max * sizeof(SubXactInfo));
2940 14 : MemoryContextSwitchTo(oldctx);
2941 : }
2942 22 : else if (subxact_data.nsubxacts == subxact_data.nsubxacts_max)
2943 : {
2944 20 : subxact_data.nsubxacts_max *= 2;
2945 20 : subxacts = repalloc(subxacts,
2946 20 : subxact_data.nsubxacts_max * sizeof(SubXactInfo));
2947 : }
2948 :
2949 36 : subxacts[subxact_data.nsubxacts].xid = xid;
2950 :
2951 : /*
2952 : * Get the current offset of the stream file and store it as offset of
2953 : * this subxact.
2954 : */
2955 72 : BufFileTell(stream_fd,
2956 36 : &subxacts[subxact_data.nsubxacts].fileno,
2957 36 : &subxacts[subxact_data.nsubxacts].offset);
2958 :
2959 36 : subxact_data.nsubxacts++;
2960 36 : subxact_data.subxacts = subxacts;
2961 : }
2962 :
2963 : /* format filename for file containing the info about subxacts */
2964 : static inline void
2965 166 : subxact_filename(char *path, Oid subid, TransactionId xid)
2966 : {
2967 166 : snprintf(path, MAXPGPATH, "%u-%u.subxacts", subid, xid);
2968 166 : }
2969 :
2970 : /* format filename for file containing serialized changes */
2971 : static inline void
2972 712 : changes_filename(char *path, Oid subid, TransactionId xid)
2973 : {
2974 712 : snprintf(path, MAXPGPATH, "%u-%u.changes", subid, xid);
2975 712 : }
2976 :
2977 : /*
2978 : * stream_cleanup_files
2979 : * Cleanup files for a subscription / toplevel transaction.
2980 : *
2981 : * Remove files with serialized changes and subxact info for a particular
2982 : * toplevel transaction. Each subscription has a separate set of files.
2983 : */
2984 : static void
2985 42 : stream_cleanup_files(Oid subid, TransactionId xid)
2986 : {
2987 : char path[MAXPGPATH];
2988 : StreamXidHash *ent;
2989 :
2990 : /* Remove the xid entry from the stream xid hash */
2991 42 : ent = (StreamXidHash *) hash_search(xidhash,
2992 : (void *) &xid,
2993 : HASH_REMOVE,
2994 : NULL);
2995 : /* By this time we must have created the transaction entry */
2996 42 : Assert(ent != NULL);
2997 :
2998 : /* Delete the change file and release the stream fileset memory */
2999 42 : changes_filename(path, subid, xid);
3000 42 : SharedFileSetDeleteAll(ent->stream_fileset);
3001 42 : pfree(ent->stream_fileset);
3002 42 : ent->stream_fileset = NULL;
3003 :
3004 : /* Delete the subxact file and release the memory, if it exist */
3005 42 : if (ent->subxact_fileset)
3006 : {
3007 8 : subxact_filename(path, subid, xid);
3008 8 : SharedFileSetDeleteAll(ent->subxact_fileset);
3009 8 : pfree(ent->subxact_fileset);
3010 8 : ent->subxact_fileset = NULL;
3011 : }
3012 42 : }
3013 :
3014 : /*
3015 : * stream_open_file
3016 : * Open a file that we'll use to serialize changes for a toplevel
3017 : * transaction.
3018 : *
3019 : * Open a file for streamed changes from a toplevel transaction identified
3020 : * by stream_xid (global variable). If it's the first chunk of streamed
3021 : * changes for this transaction, initialize the shared fileset and create the
3022 : * buffile, otherwise open the previously created file.
3023 : *
3024 : * This can only be called at the beginning of a "streaming" block, i.e.
3025 : * between stream_start/stream_stop messages from the upstream.
3026 : */
3027 : static void
3028 614 : stream_open_file(Oid subid, TransactionId xid, bool first_segment)
3029 : {
3030 : char path[MAXPGPATH];
3031 : bool found;
3032 : MemoryContext oldcxt;
3033 : StreamXidHash *ent;
3034 :
3035 614 : Assert(in_streamed_transaction);
3036 614 : Assert(OidIsValid(subid));
3037 614 : Assert(TransactionIdIsValid(xid));
3038 614 : Assert(stream_fd == NULL);
3039 :
3040 : /* create or find the xid entry in the xidhash */
3041 614 : ent = (StreamXidHash *) hash_search(xidhash,
3042 : (void *) &xid,
3043 : HASH_ENTER | HASH_FIND,
3044 : &found);
3045 614 : Assert(first_segment || found);
3046 614 : changes_filename(path, subid, xid);
3047 614 : elog(DEBUG1, "opening file \"%s\" for streamed changes", path);
3048 :
3049 : /*
3050 : * Create/open the buffiles under the logical streaming context so that we
3051 : * have those files until stream stop.
3052 : */
3053 614 : oldcxt = MemoryContextSwitchTo(LogicalStreamingContext);
3054 :
3055 : /*
3056 : * If this is the first streamed segment, the file must not exist, so make
3057 : * sure we're the ones creating it. Otherwise just open the file for
3058 : * writing, in append mode.
3059 : */
3060 614 : if (first_segment)
3061 : {
3062 : MemoryContext savectx;
3063 : SharedFileSet *fileset;
3064 :
3065 : /*
3066 : * We need to maintain shared fileset across multiple stream
3067 : * start/stop calls. So, need to allocate it in a persistent context.
3068 : */
3069 44 : savectx = MemoryContextSwitchTo(ApplyContext);
3070 44 : fileset = palloc(sizeof(SharedFileSet));
3071 :
3072 44 : SharedFileSetInit(fileset, NULL);
3073 44 : MemoryContextSwitchTo(savectx);
3074 :
3075 44 : stream_fd = BufFileCreateShared(fileset, path);
3076 :
3077 : /* Remember the fileset for the next stream of the same transaction */
3078 44 : ent->xid = xid;
3079 44 : ent->stream_fileset = fileset;
3080 44 : ent->subxact_fileset = NULL;
3081 : }
3082 : else
3083 : {
3084 : /*
3085 : * Open the file and seek to the end of the file because we always
3086 : * append the changes file.
3087 : */
3088 570 : stream_fd = BufFileOpenShared(ent->stream_fileset, path, O_RDWR);
3089 570 : BufFileSeek(stream_fd, 0, 0, SEEK_END);
3090 : }
3091 :
3092 614 : MemoryContextSwitchTo(oldcxt);
3093 614 : }
3094 :
3095 : /*
3096 : * stream_close_file
3097 : * Close the currently open file with streamed changes.
3098 : *
3099 : * This can only be called at the end of a streaming block, i.e. at stream_stop
3100 : * message from the upstream.
3101 : */
3102 : static void
3103 614 : stream_close_file(void)
3104 : {
3105 614 : Assert(in_streamed_transaction);
3106 614 : Assert(TransactionIdIsValid(stream_xid));
3107 614 : Assert(stream_fd != NULL);
3108 :
3109 614 : BufFileClose(stream_fd);
3110 :
3111 614 : stream_xid = InvalidTransactionId;
3112 614 : stream_fd = NULL;
3113 614 : }
3114 :
3115 : /*
3116 : * stream_write_change
3117 : * Serialize a change to a file for the current toplevel transaction.
3118 : *
3119 : * The change is serialized in a simple format, with length (not including
3120 : * the length), action code (identifying the message type) and message
3121 : * contents (without the subxact TransactionId value).
3122 : */
3123 : static void
3124 248838 : stream_write_change(char action, StringInfo s)
3125 : {
3126 : int len;
3127 :
3128 248838 : Assert(in_streamed_transaction);
3129 248838 : Assert(TransactionIdIsValid(stream_xid));
3130 248838 : Assert(stream_fd != NULL);
3131 :
3132 : /* total on-disk size, including the action type character */
3133 248838 : len = (s->len - s->cursor) + sizeof(char);
3134 :
3135 : /* first write the size */
3136 248838 : BufFileWrite(stream_fd, &len, sizeof(len));
3137 :
3138 : /* then the action */
3139 248838 : BufFileWrite(stream_fd, &action, sizeof(action));
3140 :
3141 : /* and finally the remaining part of the buffer (after the XID) */
3142 248838 : len = (s->len - s->cursor);
3143 :
3144 248838 : BufFileWrite(stream_fd, &s->data[s->cursor], len);
3145 248838 : }
3146 :
3147 : /*
3148 : * Cleanup the memory for subxacts and reset the related variables.
3149 : */
3150 : static inline void
3151 96 : cleanup_subxact_info()
3152 : {
3153 96 : if (subxact_data.subxacts)
3154 90 : pfree(subxact_data.subxacts);
3155 :
3156 96 : subxact_data.subxacts = NULL;
3157 96 : subxact_data.subxact_last = InvalidTransactionId;
3158 96 : subxact_data.nsubxacts = 0;
3159 96 : subxact_data.nsubxacts_max = 0;
3160 96 : }
3161 :
3162 : /* Logical Replication Apply worker entry point */
3163 : void
3164 210 : ApplyWorkerMain(Datum main_arg)
3165 : {
3166 210 : int worker_slot = DatumGetInt32(main_arg);
3167 : MemoryContext oldctx;
3168 : char originname[NAMEDATALEN];
3169 : XLogRecPtr origin_startpos;
3170 : char *myslotname;
3171 : WalRcvStreamOptions options;
3172 :
3173 : /* Attach to slot */
3174 210 : logicalrep_worker_attach(worker_slot);
3175 :
3176 : /* Setup signal handling */
3177 210 : pqsignal(SIGHUP, SignalHandlerForConfigReload);
3178 210 : pqsignal(SIGTERM, die);
3179 210 : BackgroundWorkerUnblockSignals();
3180 :
3181 : /*
3182 : * We don't currently need any ResourceOwner in a walreceiver process, but
3183 : * if we did, we could call CreateAuxProcessResourceOwner here.
3184 : */
3185 :
3186 : /* Initialise stats to a sanish value */
3187 420 : MyLogicalRepWorker->last_send_time = MyLogicalRepWorker->last_recv_time =
3188 210 : MyLogicalRepWorker->reply_time = GetCurrentTimestamp();
3189 :
3190 : /* Load the libpq-specific functions */
3191 210 : load_file("libpqwalreceiver", false);
3192 :
3193 : /* Run as replica session replication role. */
3194 210 : SetConfigOption("session_replication_role", "replica",
3195 : PGC_SUSET, PGC_S_OVERRIDE);
3196 :
3197 : /* Connect to our database. */
3198 210 : BackgroundWorkerInitializeConnectionByOid(MyLogicalRepWorker->dbid,
3199 210 : MyLogicalRepWorker->userid,
3200 : 0);
3201 :
3202 : /*
3203 : * Set always-secure search path, so malicious users can't redirect user
3204 : * code (e.g. pg_index.indexprs).
3205 : */
3206 210 : SetConfigOption("search_path", "", PGC_SUSET, PGC_S_OVERRIDE);
3207 :
3208 : /* Load the subscription into persistent memory context. */
3209 210 : ApplyContext = AllocSetContextCreate(TopMemoryContext,
3210 : "ApplyContext",
3211 : ALLOCSET_DEFAULT_SIZES);
3212 210 : StartTransactionCommand();
3213 210 : oldctx = MemoryContextSwitchTo(ApplyContext);
3214 :
3215 210 : MySubscription = GetSubscription(MyLogicalRepWorker->subid, true);
3216 210 : if (!MySubscription)
3217 : {
3218 0 : ereport(LOG,
3219 : (errmsg("logical replication apply worker for subscription %u will not "
3220 : "start because the subscription was removed during startup",
3221 : MyLogicalRepWorker->subid)));
3222 0 : proc_exit(0);
3223 : }
3224 :
3225 210 : MySubscriptionValid = true;
3226 210 : MemoryContextSwitchTo(oldctx);
3227 :
3228 210 : if (!MySubscription->enabled)
3229 : {
3230 0 : ereport(LOG,
3231 : (errmsg("logical replication apply worker for subscription \"%s\" will not "
3232 : "start because the subscription was disabled during startup",
3233 : MySubscription->name)));
3234 :
3235 0 : proc_exit(0);
3236 : }
3237 :
3238 : /* Setup synchronous commit according to the user's wishes */
3239 210 : SetConfigOption("synchronous_commit", MySubscription->synccommit,
3240 : PGC_BACKEND, PGC_S_OVERRIDE);
3241 :
3242 : /* Keep us informed about subscription changes. */
3243 210 : CacheRegisterSyscacheCallback(SUBSCRIPTIONOID,
3244 : subscription_change_cb,
3245 : (Datum) 0);
3246 :
3247 210 : if (am_tablesync_worker())
3248 126 : ereport(LOG,
3249 : (errmsg("logical replication table synchronization worker for subscription \"%s\", table \"%s\" has started",
3250 : MySubscription->name, get_rel_name(MyLogicalRepWorker->relid))));
3251 : else
3252 84 : ereport(LOG,
3253 : (errmsg("logical replication apply worker for subscription \"%s\" has started",
3254 : MySubscription->name)));
3255 :
3256 210 : CommitTransactionCommand();
3257 :
3258 : /* Connect to the origin and start the replication. */
3259 210 : elog(DEBUG1, "connecting to publisher using connection string \"%s\"",
3260 : MySubscription->conninfo);
3261 :
3262 210 : if (am_tablesync_worker())
3263 : {
3264 : char *syncslotname;
3265 :
3266 : /* This is table synchronization worker, call initial sync. */
3267 126 : syncslotname = LogicalRepSyncTableStart(&origin_startpos);
3268 :
3269 : /* allocate slot name in long-lived context */
3270 122 : myslotname = MemoryContextStrdup(ApplyContext, syncslotname);
3271 :
3272 122 : pfree(syncslotname);
3273 : }
3274 : else
3275 : {
3276 : /* This is main apply worker */
3277 : RepOriginId originid;
3278 : TimeLineID startpointTLI;
3279 : char *err;
3280 :
3281 84 : myslotname = MySubscription->slotname;
3282 :
3283 : /*
3284 : * This shouldn't happen if the subscription is enabled, but guard
3285 : * against DDL bugs or manual catalog changes. (libpqwalreceiver will
3286 : * crash if slot is NULL.)
3287 : */
3288 84 : if (!myslotname)
3289 0 : ereport(ERROR,
3290 : (errmsg("subscription has no replication slot set")));
3291 :
3292 : /* Setup replication origin tracking. */
3293 84 : StartTransactionCommand();
3294 84 : snprintf(originname, sizeof(originname), "pg_%u", MySubscription->oid);
3295 84 : originid = replorigin_by_name(originname, true);
3296 84 : if (!OidIsValid(originid))
3297 0 : originid = replorigin_create(originname);
3298 84 : replorigin_session_setup(originid);
3299 84 : replorigin_session_origin = originid;
3300 84 : origin_startpos = replorigin_session_get_progress(false);
3301 84 : CommitTransactionCommand();
3302 :
3303 84 : wrconn = walrcv_connect(MySubscription->conninfo, true, MySubscription->name,
3304 : &err);
3305 84 : if (wrconn == NULL)
3306 8 : ereport(ERROR,
3307 : (errmsg("could not connect to the publisher: %s", err)));
3308 :
3309 : /*
3310 : * We don't really use the output identify_system for anything but it
3311 : * does some initializations on the upstream so let's still call it.
3312 : */
3313 76 : (void) walrcv_identify_system(wrconn, &startpointTLI);
3314 : }
3315 :
3316 : /*
3317 : * Setup callback for syscache so that we know when something changes in
3318 : * the subscription relation state.
3319 : */
3320 198 : CacheRegisterSyscacheCallback(SUBSCRIPTIONRELMAP,
3321 : invalidate_syncing_table_states,
3322 : (Datum) 0);
3323 :
3324 : /* Build logical replication streaming options. */
3325 198 : options.logical = true;
3326 198 : options.startpoint = origin_startpos;
3327 198 : options.slotname = myslotname;
3328 198 : options.proto.logical.proto_version =
3329 198 : walrcv_server_version(wrconn) >= 140000 ?
3330 : LOGICALREP_PROTO_STREAM_VERSION_NUM : LOGICALREP_PROTO_VERSION_NUM;
3331 198 : options.proto.logical.publication_names = MySubscription->publications;
3332 198 : options.proto.logical.binary = MySubscription->binary;
3333 198 : options.proto.logical.streaming = MySubscription->stream;
3334 :
3335 : /* Start normal logical streaming replication. */
3336 198 : walrcv_startstreaming(wrconn, &options);
3337 :
3338 : /* Run the main loop. */
3339 198 : LogicalRepApplyLoop(origin_startpos);
3340 :
3341 0 : proc_exit(0);
3342 : }
3343 :
3344 : /*
3345 : * Is current process a logical replication worker?
3346 : */
3347 : bool
3348 126 : IsLogicalWorker(void)
3349 : {
3350 126 : return MyLogicalRepWorker != NULL;
3351 : }
|