Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pgoutput.c
4 : * Logical Replication output plugin
5 : *
6 : * Copyright (c) 2012-2020, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/replication/pgoutput/pgoutput.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 : #include "postgres.h"
14 :
15 : #include "access/tupconvert.h"
16 : #include "catalog/partition.h"
17 : #include "catalog/pg_publication.h"
18 : #include "commands/defrem.h"
19 : #include "fmgr.h"
20 : #include "replication/logical.h"
21 : #include "replication/logicalproto.h"
22 : #include "replication/origin.h"
23 : #include "replication/pgoutput.h"
24 : #include "utils/int8.h"
25 : #include "utils/inval.h"
26 : #include "utils/lsyscache.h"
27 : #include "utils/memutils.h"
28 : #include "utils/syscache.h"
29 : #include "utils/varlena.h"
30 :
31 260 : PG_MODULE_MAGIC;
32 :
33 : extern void _PG_output_plugin_init(OutputPluginCallbacks *cb);
34 :
35 : static void pgoutput_startup(LogicalDecodingContext *ctx,
36 : OutputPluginOptions *opt, bool is_init);
37 : static void pgoutput_shutdown(LogicalDecodingContext *ctx);
38 : static void pgoutput_begin_txn(LogicalDecodingContext *ctx,
39 : ReorderBufferTXN *txn);
40 : static void pgoutput_commit_txn(LogicalDecodingContext *ctx,
41 : ReorderBufferTXN *txn, XLogRecPtr commit_lsn);
42 : static void pgoutput_change(LogicalDecodingContext *ctx,
43 : ReorderBufferTXN *txn, Relation rel,
44 : ReorderBufferChange *change);
45 : static void pgoutput_truncate(LogicalDecodingContext *ctx,
46 : ReorderBufferTXN *txn, int nrelations, Relation relations[],
47 : ReorderBufferChange *change);
48 : static bool pgoutput_origin_filter(LogicalDecodingContext *ctx,
49 : RepOriginId origin_id);
50 : static void pgoutput_prepare_txn(LogicalDecodingContext *ctx,
51 : ReorderBufferTXN *txn, XLogRecPtr prepare_lsn);
52 : static void pgoutput_commit_prepared_txn(LogicalDecodingContext *ctx,
53 : ReorderBufferTXN *txn, XLogRecPtr prepare_lsn);
54 : static void pgoutput_rollback_prepared_txn(LogicalDecodingContext *ctx,
55 : ReorderBufferTXN *txn, XLogRecPtr prepare_lsn);
56 : static void pgoutput_stream_start(struct LogicalDecodingContext *ctx,
57 : ReorderBufferTXN *txn);
58 : static void pgoutput_stream_stop(struct LogicalDecodingContext *ctx,
59 : ReorderBufferTXN *txn);
60 : static void pgoutput_stream_abort(struct LogicalDecodingContext *ctx,
61 : ReorderBufferTXN *txn,
62 : XLogRecPtr abort_lsn);
63 : static void pgoutput_stream_commit(struct LogicalDecodingContext *ctx,
64 : ReorderBufferTXN *txn,
65 : XLogRecPtr commit_lsn);
66 : static void pgoutput_stream_prepare_txn(LogicalDecodingContext *ctx,
67 : ReorderBufferTXN *txn, XLogRecPtr prepare_lsn);
68 : static bool publications_valid;
69 : static bool in_streaming;
70 :
71 : static List *LoadPublications(List *pubnames);
72 : static void publication_invalidation_cb(Datum arg, int cacheid,
73 : uint32 hashvalue);
74 : static void send_relation_and_attrs(Relation relation, TransactionId xid,
75 : LogicalDecodingContext *ctx);
76 :
77 : /*
78 : * Entry in the map used to remember which relation schemas we sent.
79 : *
80 : * The schema_sent flag determines if the current schema record was already
81 : * sent to the subscriber (in which case we don't need to send it again).
82 : *
83 : * The schema cache on downstream is however updated only at commit time,
84 : * and with streamed transactions the commit order may be different from
85 : * the order the transactions are sent in. Also, the (sub) transactions
86 : * might get aborted so we need to send the schema for each (sub) transaction
87 : * so that we don't lose the schema information on abort. For handling this,
88 : * we maintain the list of xids (streamed_txns) for those we have already sent
89 : * the schema.
90 : *
91 : * For partitions, 'pubactions' considers not only the table's own
92 : * publications, but also those of all of its ancestors.
93 : */
94 : typedef struct RelationSyncEntry
95 : {
96 : Oid relid; /* relation oid */
97 :
98 : /*
99 : * Did we send the schema? If ancestor relid is set, its schema must also
100 : * have been sent for this to be true.
101 : */
102 : bool schema_sent;
103 : List *streamed_txns; /* streamed toplevel transactions with this
104 : * schema */
105 :
106 : bool replicate_valid;
107 : PublicationActions pubactions;
108 :
109 : /*
110 : * OID of the relation to publish changes as. For a partition, this may
111 : * be set to one of its ancestors whose schema will be used when
112 : * replicating changes, if publish_via_partition_root is set for the
113 : * publication.
114 : */
115 : Oid publish_as_relid;
116 :
117 : /*
118 : * Map used when replicating using an ancestor's schema to convert tuples
119 : * from partition's type to the ancestor's; NULL if publish_as_relid is
120 : * same as 'relid' or if unnecessary due to partition and the ancestor
121 : * having identical TupleDesc.
122 : */
123 : TupleConversionMap *map;
124 : } RelationSyncEntry;
125 :
126 : /* Map used to remember which relation schemas we sent. */
127 : static HTAB *RelationSyncCache = NULL;
128 :
129 : static void init_rel_sync_cache(MemoryContext decoding_context);
130 : static void cleanup_rel_sync_cache(TransactionId xid, bool is_commit);
131 : static RelationSyncEntry *get_rel_sync_entry(PGOutputData *data, Oid relid);
132 : static void rel_sync_cache_relation_cb(Datum arg, Oid relid);
133 : static void rel_sync_cache_publication_cb(Datum arg, int cacheid,
134 : uint32 hashvalue);
135 : static void set_schema_sent_in_streamed_txn(RelationSyncEntry *entry,
136 : TransactionId xid);
137 : static bool get_schema_sent_in_streamed_txn(RelationSyncEntry *entry,
138 : TransactionId xid);
139 :
140 : /*
141 : * Specify output plugin callbacks
142 : */
143 : void
144 388 : _PG_output_plugin_init(OutputPluginCallbacks *cb)
145 : {
146 : AssertVariableIsOfType(&_PG_output_plugin_init, LogicalOutputPluginInit);
147 :
148 388 : cb->startup_cb = pgoutput_startup;
149 388 : cb->begin_cb = pgoutput_begin_txn;
150 388 : cb->change_cb = pgoutput_change;
151 388 : cb->truncate_cb = pgoutput_truncate;
152 388 : cb->commit_cb = pgoutput_commit_txn;
153 :
154 388 : cb->prepare_cb = pgoutput_prepare_txn;
155 388 : cb->commit_prepared_cb = pgoutput_commit_prepared_txn;
156 388 : cb->rollback_prepared_cb = pgoutput_rollback_prepared_txn;
157 388 : cb->filter_by_origin_cb = pgoutput_origin_filter;
158 388 : cb->shutdown_cb = pgoutput_shutdown;
159 :
160 : /* transaction streaming */
161 388 : cb->stream_start_cb = pgoutput_stream_start;
162 388 : cb->stream_stop_cb = pgoutput_stream_stop;
163 388 : cb->stream_abort_cb = pgoutput_stream_abort;
164 388 : cb->stream_commit_cb = pgoutput_stream_commit;
165 388 : cb->stream_change_cb = pgoutput_change;
166 388 : cb->stream_truncate_cb = pgoutput_truncate;
167 : /* transaction streaming - two-phase commit */
168 388 : cb->stream_prepare_cb = pgoutput_stream_prepare_txn;
169 388 : }
170 :
171 : static void
172 196 : parse_output_parameters(List *options, uint32 *protocol_version,
173 : List **publication_names, bool *binary,
174 : bool *enable_streaming)
175 : {
176 : ListCell *lc;
177 196 : bool protocol_version_given = false;
178 196 : bool publication_names_given = false;
179 196 : bool binary_option_given = false;
180 196 : bool streaming_given = false;
181 :
182 196 : *binary = false;
183 :
184 630 : foreach(lc, options)
185 : {
186 434 : DefElem *defel = (DefElem *) lfirst(lc);
187 :
188 434 : Assert(defel->arg == NULL || IsA(defel->arg, String));
189 :
190 : /* Check each param, whether or not we recognize it */
191 434 : if (strcmp(defel->defname, "proto_version") == 0)
192 : {
193 : int64 parsed;
194 :
195 196 : if (protocol_version_given)
196 0 : ereport(ERROR,
197 : (errcode(ERRCODE_SYNTAX_ERROR),
198 : errmsg("conflicting or redundant options")));
199 196 : protocol_version_given = true;
200 :
201 196 : if (!scanint8(strVal(defel->arg), true, &parsed))
202 0 : ereport(ERROR,
203 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
204 : errmsg("invalid proto_version")));
205 :
206 196 : if (parsed > PG_UINT32_MAX || parsed < 0)
207 0 : ereport(ERROR,
208 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
209 : errmsg("proto_version \"%s\" out of range",
210 : strVal(defel->arg))));
211 :
212 196 : *protocol_version = (uint32) parsed;
213 : }
214 238 : else if (strcmp(defel->defname, "publication_names") == 0)
215 : {
216 196 : if (publication_names_given)
217 0 : ereport(ERROR,
218 : (errcode(ERRCODE_SYNTAX_ERROR),
219 : errmsg("conflicting or redundant options")));
220 196 : publication_names_given = true;
221 :
222 196 : if (!SplitIdentifierString(strVal(defel->arg), ',',
223 : publication_names))
224 0 : ereport(ERROR,
225 : (errcode(ERRCODE_INVALID_NAME),
226 : errmsg("invalid publication_names syntax")));
227 : }
228 42 : else if (strcmp(defel->defname, "binary") == 0)
229 : {
230 10 : if (binary_option_given)
231 0 : ereport(ERROR,
232 : (errcode(ERRCODE_SYNTAX_ERROR),
233 : errmsg("conflicting or redundant options")));
234 10 : binary_option_given = true;
235 :
236 10 : *binary = defGetBoolean(defel);
237 : }
238 32 : else if (strcmp(defel->defname, "streaming") == 0)
239 : {
240 32 : if (streaming_given)
241 0 : ereport(ERROR,
242 : (errcode(ERRCODE_SYNTAX_ERROR),
243 : errmsg("conflicting or redundant options")));
244 32 : streaming_given = true;
245 :
246 32 : *enable_streaming = defGetBoolean(defel);
247 : }
248 : else
249 0 : elog(ERROR, "unrecognized pgoutput option: %s", defel->defname);
250 : }
251 196 : }
252 :
253 : /*
254 : * Initialize this plugin
255 : */
256 : static void
257 388 : pgoutput_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt,
258 : bool is_init)
259 : {
260 388 : bool enable_streaming = false;
261 388 : PGOutputData *data = palloc0(sizeof(PGOutputData));
262 :
263 : /* Create our memory context for private allocations. */
264 388 : data->context = AllocSetContextCreate(ctx->context,
265 : "logical replication output context",
266 : ALLOCSET_DEFAULT_SIZES);
267 :
268 388 : ctx->output_plugin_private = data;
269 :
270 : /* This plugin uses binary protocol. */
271 388 : opt->output_type = OUTPUT_PLUGIN_BINARY_OUTPUT;
272 :
273 : /*
274 : * This is replication start and not slot initialization.
275 : *
276 : * Parse and validate options passed by the client.
277 : */
278 388 : if (!is_init)
279 : {
280 : /* Parse the params and ERROR if we see any we don't recognize */
281 196 : parse_output_parameters(ctx->output_plugin_options,
282 : &data->protocol_version,
283 : &data->publication_names,
284 : &data->binary,
285 : &enable_streaming);
286 :
287 : /* Check if we support requested protocol */
288 196 : if (data->protocol_version > LOGICALREP_PROTO_MAX_VERSION_NUM)
289 0 : ereport(ERROR,
290 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
291 : errmsg("client sent proto_version=%d but we only support protocol %d or lower",
292 : data->protocol_version, LOGICALREP_PROTO_MAX_VERSION_NUM)));
293 :
294 196 : if (data->protocol_version < LOGICALREP_PROTO_MIN_VERSION_NUM)
295 0 : ereport(ERROR,
296 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
297 : errmsg("client sent proto_version=%d but we only support protocol %d or higher",
298 : data->protocol_version, LOGICALREP_PROTO_MIN_VERSION_NUM)));
299 :
300 196 : if (list_length(data->publication_names) < 1)
301 0 : ereport(ERROR,
302 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
303 : errmsg("publication_names parameter missing")));
304 :
305 : /*
306 : * Decide whether to enable streaming. It is disabled by default, in
307 : * which case we just update the flag in decoding context. Otherwise
308 : * we only allow it with sufficient version of the protocol, and when
309 : * the output plugin supports it.
310 : */
311 196 : if (!enable_streaming)
312 164 : ctx->streaming = false;
313 32 : else if (data->protocol_version < LOGICALREP_PROTO_STREAM_VERSION_NUM)
314 0 : ereport(ERROR,
315 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
316 : errmsg("requested proto_version=%d does not support streaming, need %d or higher",
317 : data->protocol_version, LOGICALREP_PROTO_STREAM_VERSION_NUM)));
318 32 : else if (!ctx->streaming)
319 0 : ereport(ERROR,
320 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
321 : errmsg("streaming requested, but not supported by output plugin")));
322 :
323 : /* Also remember we're currently not streaming any transaction. */
324 196 : in_streaming = false;
325 :
326 : /* Init publication state. */
327 196 : data->publications = NIL;
328 196 : publications_valid = false;
329 196 : CacheRegisterSyscacheCallback(PUBLICATIONOID,
330 : publication_invalidation_cb,
331 : (Datum) 0);
332 :
333 : /* Initialize relation schema cache. */
334 196 : init_rel_sync_cache(CacheMemoryContext);
335 : }
336 : else
337 : {
338 : /* Disable the streaming during the slot initialization mode. */
339 192 : ctx->streaming = false;
340 : }
341 388 : }
342 :
343 : /*
344 : * BEGIN callback
345 : */
346 : static void
347 352 : pgoutput_begin_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn)
348 : {
349 352 : bool send_replication_origin = txn->origin_id != InvalidRepOriginId;
350 :
351 352 : OutputPluginPrepareWrite(ctx, !send_replication_origin);
352 352 : logicalrep_write_begin(ctx->out, txn);
353 :
354 352 : if (send_replication_origin)
355 : {
356 : char *origin;
357 :
358 : /* Message boundary */
359 0 : OutputPluginWrite(ctx, false);
360 0 : OutputPluginPrepareWrite(ctx, true);
361 :
362 : /*----------
363 : * XXX: which behaviour do we want here?
364 : *
365 : * Alternatives:
366 : * - don't send origin message if origin name not found
367 : * (that's what we do now)
368 : * - throw error - that will break replication, not good
369 : * - send some special "unknown" origin
370 : *----------
371 : */
372 0 : if (replorigin_by_oid(txn->origin_id, true, &origin))
373 0 : logicalrep_write_origin(ctx->out, origin, txn->origin_lsn);
374 : }
375 :
376 352 : OutputPluginWrite(ctx, true);
377 352 : }
378 :
379 : /*
380 : * COMMIT callback
381 : */
382 : static void
383 338 : pgoutput_commit_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
384 : XLogRecPtr commit_lsn)
385 : {
386 338 : OutputPluginUpdateProgress(ctx);
387 :
388 338 : OutputPluginPrepareWrite(ctx, true);
389 338 : logicalrep_write_commit(ctx->out, txn, commit_lsn);
390 338 : OutputPluginWrite(ctx, true);
391 338 : }
392 :
393 : /*
394 : * PREPARE callback
395 : */
396 : static void
397 14 : pgoutput_prepare_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
398 : XLogRecPtr prepare_lsn)
399 : {
400 14 : OutputPluginUpdateProgress(ctx);
401 :
402 14 : OutputPluginPrepareWrite(ctx, true);
403 14 : logicalrep_write_prepare(ctx->out, txn, prepare_lsn);
404 14 : OutputPluginWrite(ctx, true);
405 14 : }
406 :
407 : /*
408 : * COMMIT PREPARED callback
409 : */
410 : static void
411 20 : pgoutput_commit_prepared_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
412 : XLogRecPtr prepare_lsn)
413 : {
414 20 : OutputPluginUpdateProgress(ctx);
415 :
416 20 : OutputPluginPrepareWrite(ctx, true);
417 20 : logicalrep_write_prepare(ctx->out, txn, prepare_lsn);
418 20 : OutputPluginWrite(ctx, true);
419 20 : }
420 :
421 : /*
422 : * ROLLBACK PREPARED callback
423 : */
424 : static void
425 16 : pgoutput_rollback_prepared_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
426 : XLogRecPtr prepare_lsn)
427 : {
428 16 : OutputPluginUpdateProgress(ctx);
429 :
430 16 : OutputPluginPrepareWrite(ctx, true);
431 16 : logicalrep_write_prepare(ctx->out, txn, prepare_lsn);
432 16 : OutputPluginWrite(ctx, true);
433 16 : }
434 :
435 : /*
436 : * Write the current schema of the relation and its ancestor (if any) if not
437 : * done yet.
438 : */
439 : static void
440 319778 : maybe_send_schema(LogicalDecodingContext *ctx,
441 : ReorderBufferTXN *txn, ReorderBufferChange *change,
442 : Relation relation, RelationSyncEntry *relentry)
443 : {
444 : bool schema_sent;
445 319778 : TransactionId xid = InvalidTransactionId;
446 319778 : TransactionId topxid = InvalidTransactionId;
447 :
448 : /*
449 : * Remember XID of the (sub)transaction for the change. We don't care if
450 : * it's top-level transaction or not (we have already sent that XID in
451 : * start of the current streaming block).
452 : *
453 : * If we're not in a streaming block, just use InvalidTransactionId and
454 : * the write methods will not include it.
455 : */
456 319778 : if (in_streaming)
457 318282 : xid = change->txn->xid;
458 :
459 319778 : if (change->txn->toptxn)
460 25618 : topxid = change->txn->toptxn->xid;
461 : else
462 294160 : topxid = xid;
463 :
464 : /*
465 : * Do we need to send the schema? We do track streamed transactions
466 : * separately, because those may be applied later (and the regular
467 : * transactions won't see their effects until then) and in an order that
468 : * we don't know at this point.
469 : *
470 : * XXX There is a scope of optimization here. Currently, we always send
471 : * the schema first time in a streaming transaction but we can probably
472 : * avoid that by checking 'relentry->schema_sent' flag. However, before
473 : * doing that we need to study its impact on the case where we have a mix
474 : * of streaming and non-streaming transactions.
475 : */
476 319778 : if (in_streaming)
477 318282 : schema_sent = get_schema_sent_in_streamed_txn(relentry, topxid);
478 : else
479 1496 : schema_sent = relentry->schema_sent;
480 :
481 319778 : if (schema_sent)
482 639362 : return;
483 :
484 : /* If needed, send the ancestor's schema first. */
485 194 : if (relentry->publish_as_relid != RelationGetRelid(relation))
486 : {
487 0 : Relation ancestor = RelationIdGetRelation(relentry->publish_as_relid);
488 0 : TupleDesc indesc = RelationGetDescr(relation);
489 0 : TupleDesc outdesc = RelationGetDescr(ancestor);
490 : MemoryContext oldctx;
491 :
492 : /* Map must live as long as the session does. */
493 0 : oldctx = MemoryContextSwitchTo(CacheMemoryContext);
494 0 : relentry->map = convert_tuples_by_name(CreateTupleDescCopy(indesc),
495 : CreateTupleDescCopy(outdesc));
496 0 : MemoryContextSwitchTo(oldctx);
497 0 : send_relation_and_attrs(ancestor, xid, ctx);
498 0 : RelationClose(ancestor);
499 : }
500 :
501 194 : send_relation_and_attrs(relation, xid, ctx);
502 :
503 194 : if (in_streaming)
504 68 : set_schema_sent_in_streamed_txn(relentry, topxid);
505 : else
506 126 : relentry->schema_sent = true;
507 : }
508 :
509 : /*
510 : * Sends a relation
511 : */
512 : static void
513 194 : send_relation_and_attrs(Relation relation, TransactionId xid,
514 : LogicalDecodingContext *ctx)
515 : {
516 194 : TupleDesc desc = RelationGetDescr(relation);
517 : int i;
518 :
519 : /*
520 : * Write out type info if needed. We do that only for user-created types.
521 : * We use FirstGenbkiObjectId as the cutoff, so that we only consider
522 : * objects with hand-assigned OIDs to be "built in", not for instance any
523 : * function or type defined in the information_schema. This is important
524 : * because only hand-assigned OIDs can be expected to remain stable across
525 : * major versions.
526 : */
527 622 : for (i = 0; i < desc->natts; i++)
528 : {
529 428 : Form_pg_attribute att = TupleDescAttr(desc, i);
530 :
531 428 : if (att->attisdropped || att->attgenerated)
532 2 : continue;
533 :
534 426 : if (att->atttypid < FirstGenbkiObjectId)
535 394 : continue;
536 :
537 32 : OutputPluginPrepareWrite(ctx, false);
538 32 : logicalrep_write_typ(ctx->out, xid, att->atttypid);
539 32 : OutputPluginWrite(ctx, false);
540 : }
541 :
542 194 : OutputPluginPrepareWrite(ctx, false);
543 194 : logicalrep_write_rel(ctx->out, xid, relation);
544 194 : OutputPluginWrite(ctx, false);
545 194 : }
546 :
547 : /*
548 : * Sends the decoded DML over wire.
549 : *
550 : * This is called both in streaming and non-streaming modes.
551 : */
552 : static void
553 321956 : pgoutput_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
554 : Relation relation, ReorderBufferChange *change)
555 : {
556 321956 : PGOutputData *data = (PGOutputData *) ctx->output_plugin_private;
557 : MemoryContext old;
558 : RelationSyncEntry *relentry;
559 321956 : TransactionId xid = InvalidTransactionId;
560 :
561 321956 : if (!is_publishable_relation(relation))
562 4 : return;
563 :
564 : /*
565 : * Remember the xid for the change in streaming mode. We need to send xid
566 : * with each change in the streaming mode so that subscriber can make
567 : * their association and on aborts, it can discard the corresponding
568 : * changes.
569 : */
570 321952 : if (in_streaming)
571 318282 : xid = change->txn->xid;
572 :
573 321952 : relentry = get_rel_sync_entry(data, RelationGetRelid(relation));
574 :
575 : /* First check the table filter */
576 321952 : switch (change->action)
577 : {
578 : case REORDER_BUFFER_CHANGE_INSERT:
579 162714 : if (!relentry->pubactions.pubinsert)
580 0 : return;
581 162714 : break;
582 : case REORDER_BUFFER_CHANGE_UPDATE:
583 85548 : if (!relentry->pubactions.pubupdate)
584 80 : return;
585 85468 : break;
586 : case REORDER_BUFFER_CHANGE_DELETE:
587 73690 : if (!relentry->pubactions.pubdelete)
588 2104 : return;
589 71586 : break;
590 : default:
591 0 : Assert(false);
592 : }
593 :
594 : /* Avoid leaking memory by using and resetting our own context */
595 319768 : old = MemoryContextSwitchTo(data->context);
596 :
597 319768 : maybe_send_schema(ctx, txn, change, relation, relentry);
598 :
599 : /* Send the data */
600 319768 : switch (change->action)
601 : {
602 : case REORDER_BUFFER_CHANGE_INSERT:
603 : {
604 162714 : HeapTuple tuple = &change->data.tp.newtuple->tuple;
605 :
606 : /* Switch relation if publishing via root. */
607 162714 : if (relentry->publish_as_relid != RelationGetRelid(relation))
608 : {
609 0 : Assert(relation->rd_rel->relispartition);
610 0 : relation = RelationIdGetRelation(relentry->publish_as_relid);
611 : /* Convert tuple if needed. */
612 0 : if (relentry->map)
613 0 : tuple = execute_attr_map_tuple(tuple, relentry->map);
614 : }
615 :
616 162714 : OutputPluginPrepareWrite(ctx, true);
617 162714 : logicalrep_write_insert(ctx->out, xid, relation, tuple,
618 162714 : data->binary);
619 162714 : OutputPluginWrite(ctx, true);
620 162714 : break;
621 : }
622 : case REORDER_BUFFER_CHANGE_UPDATE:
623 : {
624 170936 : HeapTuple oldtuple = change->data.tp.oldtuple ?
625 85468 : &change->data.tp.oldtuple->tuple : NULL;
626 85468 : HeapTuple newtuple = &change->data.tp.newtuple->tuple;
627 :
628 : /* Switch relation if publishing via root. */
629 85468 : if (relentry->publish_as_relid != RelationGetRelid(relation))
630 : {
631 0 : Assert(relation->rd_rel->relispartition);
632 0 : relation = RelationIdGetRelation(relentry->publish_as_relid);
633 : /* Convert tuples if needed. */
634 0 : if (relentry->map)
635 : {
636 0 : oldtuple = execute_attr_map_tuple(oldtuple, relentry->map);
637 0 : newtuple = execute_attr_map_tuple(newtuple, relentry->map);
638 : }
639 : }
640 :
641 85468 : OutputPluginPrepareWrite(ctx, true);
642 85468 : logicalrep_write_update(ctx->out, xid, relation, oldtuple,
643 85468 : newtuple, data->binary);
644 85468 : OutputPluginWrite(ctx, true);
645 85466 : break;
646 : }
647 : case REORDER_BUFFER_CHANGE_DELETE:
648 71586 : if (change->data.tp.oldtuple)
649 : {
650 71586 : HeapTuple oldtuple = &change->data.tp.oldtuple->tuple;
651 :
652 : /* Switch relation if publishing via root. */
653 71586 : if (relentry->publish_as_relid != RelationGetRelid(relation))
654 : {
655 0 : Assert(relation->rd_rel->relispartition);
656 0 : relation = RelationIdGetRelation(relentry->publish_as_relid);
657 : /* Convert tuple if needed. */
658 0 : if (relentry->map)
659 0 : oldtuple = execute_attr_map_tuple(oldtuple, relentry->map);
660 : }
661 :
662 71586 : OutputPluginPrepareWrite(ctx, true);
663 71586 : logicalrep_write_delete(ctx->out, xid, relation, oldtuple,
664 71586 : data->binary);
665 71586 : OutputPluginWrite(ctx, true);
666 : }
667 : else
668 0 : elog(DEBUG1, "didn't send DELETE change because of missing oldtuple");
669 71582 : break;
670 : default:
671 0 : Assert(false);
672 : }
673 :
674 : /* Cleanup */
675 319762 : MemoryContextSwitchTo(old);
676 319762 : MemoryContextReset(data->context);
677 : }
678 :
679 : static void
680 14 : pgoutput_truncate(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
681 : int nrelations, Relation relations[], ReorderBufferChange *change)
682 : {
683 14 : PGOutputData *data = (PGOutputData *) ctx->output_plugin_private;
684 : MemoryContext old;
685 : RelationSyncEntry *relentry;
686 : int i;
687 : int nrelids;
688 : Oid *relids;
689 14 : TransactionId xid = InvalidTransactionId;
690 :
691 : /* Remember the xid for the change in streaming mode. See pgoutput_change. */
692 14 : if (in_streaming)
693 0 : xid = change->txn->xid;
694 :
695 14 : old = MemoryContextSwitchTo(data->context);
696 :
697 14 : relids = palloc0(nrelations * sizeof(Oid));
698 14 : nrelids = 0;
699 :
700 36 : for (i = 0; i < nrelations; i++)
701 : {
702 22 : Relation relation = relations[i];
703 22 : Oid relid = RelationGetRelid(relation);
704 :
705 22 : if (!is_publishable_relation(relation))
706 0 : continue;
707 :
708 22 : relentry = get_rel_sync_entry(data, relid);
709 :
710 22 : if (!relentry->pubactions.pubtruncate)
711 12 : continue;
712 :
713 : /*
714 : * Don't send partitions if the publication wants to send only the
715 : * root tables through it.
716 : */
717 18 : if (relation->rd_rel->relispartition &&
718 8 : relentry->publish_as_relid != relid)
719 0 : continue;
720 :
721 10 : relids[nrelids++] = relid;
722 10 : maybe_send_schema(ctx, txn, change, relation, relentry);
723 : }
724 :
725 14 : if (nrelids > 0)
726 : {
727 6 : OutputPluginPrepareWrite(ctx, true);
728 12 : logicalrep_write_truncate(ctx->out,
729 : xid,
730 : nrelids,
731 : relids,
732 6 : change->data.truncate.cascade,
733 6 : change->data.truncate.restart_seqs);
734 6 : OutputPluginWrite(ctx, true);
735 : }
736 :
737 14 : MemoryContextSwitchTo(old);
738 14 : MemoryContextReset(data->context);
739 14 : }
740 :
741 : /*
742 : * Currently we always forward.
743 : */
744 : static bool
745 603210 : pgoutput_origin_filter(LogicalDecodingContext *ctx,
746 : RepOriginId origin_id)
747 : {
748 603210 : return false;
749 : }
750 :
751 : /*
752 : * Shutdown the output plugin.
753 : *
754 : * Note, we don't need to clean the data->context as it's child context
755 : * of the ctx->context so it will be cleaned up by logical decoding machinery.
756 : */
757 : static void
758 314 : pgoutput_shutdown(LogicalDecodingContext *ctx)
759 : {
760 314 : if (RelationSyncCache)
761 : {
762 122 : hash_destroy(RelationSyncCache);
763 122 : RelationSyncCache = NULL;
764 : }
765 314 : }
766 :
767 : /*
768 : * Load publications from the list of publication names.
769 : */
770 : static List *
771 66 : LoadPublications(List *pubnames)
772 : {
773 66 : List *result = NIL;
774 : ListCell *lc;
775 :
776 134 : foreach(lc, pubnames)
777 : {
778 68 : char *pubname = (char *) lfirst(lc);
779 68 : Publication *pub = GetPublicationByName(pubname, false);
780 :
781 68 : result = lappend(result, pub);
782 : }
783 :
784 66 : return result;
785 : }
786 :
787 : /*
788 : * Publication cache invalidation callback.
789 : */
790 : static void
791 32 : publication_invalidation_cb(Datum arg, int cacheid, uint32 hashvalue)
792 : {
793 32 : publications_valid = false;
794 :
795 : /*
796 : * Also invalidate per-relation cache so that next time the filtering info
797 : * is checked it will be updated with the new publication settings.
798 : */
799 32 : rel_sync_cache_publication_cb(arg, cacheid, hashvalue);
800 32 : }
801 :
802 : /*
803 : * START STREAM callback
804 : */
805 : static void
806 802 : pgoutput_stream_start(struct LogicalDecodingContext *ctx,
807 : ReorderBufferTXN *txn)
808 : {
809 802 : bool send_replication_origin = txn->origin_id != InvalidRepOriginId;
810 :
811 : /* we can't nest streaming of transactions */
812 802 : Assert(!in_streaming);
813 :
814 : /*
815 : * If we already sent the first stream for this transaction then don't
816 : * send the origin id in the subsequent streams.
817 : */
818 802 : if (rbtxn_is_streamed(txn))
819 748 : send_replication_origin = false;
820 :
821 802 : OutputPluginPrepareWrite(ctx, !send_replication_origin);
822 802 : logicalrep_write_stream_start(ctx->out, txn->xid, !rbtxn_is_streamed(txn));
823 :
824 802 : if (send_replication_origin)
825 : {
826 : char *origin;
827 :
828 : /* Message boundary */
829 0 : OutputPluginWrite(ctx, false);
830 0 : OutputPluginPrepareWrite(ctx, true);
831 :
832 0 : if (replorigin_by_oid(txn->origin_id, true, &origin))
833 0 : logicalrep_write_origin(ctx->out, origin, InvalidXLogRecPtr);
834 : }
835 :
836 802 : OutputPluginWrite(ctx, true);
837 :
838 : /* we're streaming a chunk of transaction now */
839 802 : in_streaming = true;
840 802 : }
841 :
842 : /*
843 : * STOP STREAM callback
844 : */
845 : static void
846 796 : pgoutput_stream_stop(struct LogicalDecodingContext *ctx,
847 : ReorderBufferTXN *txn)
848 : {
849 : /* we should be streaming a trasanction */
850 796 : Assert(in_streaming);
851 :
852 796 : OutputPluginPrepareWrite(ctx, true);
853 796 : logicalrep_write_stream_stop(ctx->out);
854 796 : OutputPluginWrite(ctx, true);
855 :
856 : /* we've stopped streaming a transaction */
857 796 : in_streaming = false;
858 796 : }
859 :
860 : /*
861 : * Notify downstream to discard the streamed transaction (along with all
862 : * it's subtransactions, if it's a toplevel transaction).
863 : */
864 : static void
865 26 : pgoutput_stream_abort(struct LogicalDecodingContext *ctx,
866 : ReorderBufferTXN *txn,
867 : XLogRecPtr abort_lsn)
868 : {
869 : ReorderBufferTXN *toptxn;
870 :
871 : /*
872 : * The abort should happen outside streaming block, even for streamed
873 : * transactions. The transaction has to be marked as streamed, though.
874 : */
875 26 : Assert(!in_streaming);
876 :
877 : /* determine the toplevel transaction */
878 26 : toptxn = (txn->toptxn) ? txn->toptxn : txn;
879 :
880 26 : Assert(rbtxn_is_streamed(toptxn));
881 :
882 26 : OutputPluginPrepareWrite(ctx, true);
883 26 : logicalrep_write_stream_abort(ctx->out, toptxn->xid, txn->xid);
884 26 : OutputPluginWrite(ctx, true);
885 :
886 26 : cleanup_rel_sync_cache(toptxn->xid, false);
887 26 : }
888 :
889 : /*
890 : * Notify downstream to apply the streamed transaction (along with all
891 : * it's subtransactions).
892 : */
893 : static void
894 28 : pgoutput_stream_commit(struct LogicalDecodingContext *ctx,
895 : ReorderBufferTXN *txn,
896 : XLogRecPtr commit_lsn)
897 : {
898 : /*
899 : * The commit should happen outside streaming block, even for streamed
900 : * transactions. The transaction has to be marked as streamed, though.
901 : */
902 28 : Assert(!in_streaming);
903 28 : Assert(rbtxn_is_streamed(txn));
904 :
905 28 : OutputPluginUpdateProgress(ctx);
906 :
907 28 : OutputPluginPrepareWrite(ctx, true);
908 28 : logicalrep_write_stream_commit(ctx->out, txn, commit_lsn);
909 28 : OutputPluginWrite(ctx, true);
910 :
911 28 : cleanup_rel_sync_cache(txn->xid, true);
912 28 : }
913 :
914 : /*
915 : * PREPARE callback (for streaming two-phase commit).
916 : *
917 : * Notify the downstream to prepare the transaction.
918 : */
919 : static void
920 18 : pgoutput_stream_prepare_txn(LogicalDecodingContext *ctx,
921 : ReorderBufferTXN *txn,
922 : XLogRecPtr prepare_lsn)
923 : {
924 18 : Assert(rbtxn_is_streamed(txn));
925 :
926 18 : OutputPluginUpdateProgress(ctx);
927 18 : OutputPluginPrepareWrite(ctx, true);
928 18 : logicalrep_write_stream_prepare(ctx->out, txn, prepare_lsn);
929 18 : OutputPluginWrite(ctx, true);
930 18 : }
931 :
932 : /*
933 : * Initialize the relation schema sync cache for a decoding session.
934 : *
935 : * The hash table is destroyed at the end of a decoding session. While
936 : * relcache invalidations still exist and will still be invoked, they
937 : * will just see the null hash table global and take no action.
938 : */
939 : static void
940 196 : init_rel_sync_cache(MemoryContext cachectx)
941 : {
942 : HASHCTL ctl;
943 : MemoryContext old_ctxt;
944 :
945 196 : if (RelationSyncCache != NULL)
946 196 : return;
947 :
948 : /* Make a new hash table for the cache */
949 196 : MemSet(&ctl, 0, sizeof(ctl));
950 196 : ctl.keysize = sizeof(Oid);
951 196 : ctl.entrysize = sizeof(RelationSyncEntry);
952 196 : ctl.hcxt = cachectx;
953 :
954 196 : old_ctxt = MemoryContextSwitchTo(cachectx);
955 196 : RelationSyncCache = hash_create("logical replication output relation cache",
956 : 128, &ctl,
957 : HASH_ELEM | HASH_CONTEXT | HASH_BLOBS);
958 196 : (void) MemoryContextSwitchTo(old_ctxt);
959 :
960 196 : Assert(RelationSyncCache != NULL);
961 :
962 196 : CacheRegisterRelcacheCallback(rel_sync_cache_relation_cb, (Datum) 0);
963 196 : CacheRegisterSyscacheCallback(PUBLICATIONRELMAP,
964 : rel_sync_cache_publication_cb,
965 : (Datum) 0);
966 : }
967 :
968 : /*
969 : * We expect relatively small number of streamed transactions.
970 : */
971 : static bool
972 318282 : get_schema_sent_in_streamed_txn(RelationSyncEntry *entry, TransactionId xid)
973 : {
974 : ListCell *lc;
975 :
976 633924 : foreach(lc, entry->streamed_txns)
977 : {
978 633856 : if (xid == (uint32) lfirst_int(lc))
979 318214 : return true;
980 : }
981 :
982 68 : return false;
983 : }
984 :
985 : /*
986 : * Add the xid in the rel sync entry for which we have already sent the schema
987 : * of the relation.
988 : */
989 : static void
990 68 : set_schema_sent_in_streamed_txn(RelationSyncEntry *entry, TransactionId xid)
991 : {
992 : MemoryContext oldctx;
993 :
994 68 : oldctx = MemoryContextSwitchTo(CacheMemoryContext);
995 :
996 68 : entry->streamed_txns = lappend_int(entry->streamed_txns, xid);
997 :
998 68 : MemoryContextSwitchTo(oldctx);
999 68 : }
1000 :
1001 : /*
1002 : * Find or create entry in the relation schema cache.
1003 : *
1004 : * This looks up publications that the given relation is directly or
1005 : * indirectly part of (the latter if it's really the relation's ancestor that
1006 : * is part of a publication) and fills up the found entry with the information
1007 : * about which operations to publish and whether to use an ancestor's schema
1008 : * when publishing.
1009 : */
1010 : static RelationSyncEntry *
1011 321974 : get_rel_sync_entry(PGOutputData *data, Oid relid)
1012 : {
1013 : RelationSyncEntry *entry;
1014 321974 : bool am_partition = get_rel_relispartition(relid);
1015 321974 : char relkind = get_rel_relkind(relid);
1016 : bool found;
1017 : MemoryContext oldctx;
1018 :
1019 321974 : Assert(RelationSyncCache != NULL);
1020 :
1021 : /* Find cached relation info, creating if not found */
1022 321974 : entry = (RelationSyncEntry *) hash_search(RelationSyncCache,
1023 : (void *) &relid,
1024 : HASH_ENTER, &found);
1025 321974 : Assert(entry != NULL);
1026 :
1027 : /* Not found means schema wasn't sent */
1028 321974 : if (!found)
1029 : {
1030 : /* immediately make a new entry valid enough to satisfy callbacks */
1031 130 : entry->schema_sent = false;
1032 130 : entry->streamed_txns = NIL;
1033 130 : entry->replicate_valid = false;
1034 130 : entry->pubactions.pubinsert = entry->pubactions.pubupdate =
1035 130 : entry->pubactions.pubdelete = entry->pubactions.pubtruncate = false;
1036 130 : entry->publish_as_relid = InvalidOid;
1037 : }
1038 :
1039 : /* Validate the entry */
1040 321974 : if (!entry->replicate_valid)
1041 : {
1042 132 : List *pubids = GetRelationPublications(relid);
1043 : ListCell *lc;
1044 132 : Oid publish_as_relid = relid;
1045 :
1046 : /* Reload publications if needed before use. */
1047 132 : if (!publications_valid)
1048 : {
1049 66 : oldctx = MemoryContextSwitchTo(CacheMemoryContext);
1050 66 : if (data->publications)
1051 4 : list_free_deep(data->publications);
1052 :
1053 66 : data->publications = LoadPublications(data->publication_names);
1054 66 : MemoryContextSwitchTo(oldctx);
1055 66 : publications_valid = true;
1056 : }
1057 :
1058 : /*
1059 : * Build publication cache. We can't use one provided by relcache as
1060 : * relcache considers all publications given relation is in, but here
1061 : * we only need to consider ones that the subscriber requested.
1062 : */
1063 158 : foreach(lc, data->publications)
1064 : {
1065 134 : Publication *pub = lfirst(lc);
1066 134 : bool publish = false;
1067 :
1068 134 : if (pub->alltables)
1069 : {
1070 60 : publish = true;
1071 60 : if (pub->pubviaroot && am_partition)
1072 0 : publish_as_relid = llast_oid(get_partition_ancestors(relid));
1073 : }
1074 :
1075 134 : if (!publish)
1076 : {
1077 74 : bool ancestor_published = false;
1078 :
1079 : /*
1080 : * For a partition, check if any of the ancestors are
1081 : * published. If so, note down the topmost ancestor that is
1082 : * published via this publication, which will be used as the
1083 : * relation via which to publish the partition's changes.
1084 : */
1085 74 : if (am_partition)
1086 : {
1087 6 : List *ancestors = get_partition_ancestors(relid);
1088 : ListCell *lc2;
1089 :
1090 : /*
1091 : * Find the "topmost" ancestor that is in this
1092 : * publication.
1093 : */
1094 12 : foreach(lc2, ancestors)
1095 : {
1096 6 : Oid ancestor = lfirst_oid(lc2);
1097 :
1098 6 : if (list_member_oid(GetRelationPublications(ancestor),
1099 : pub->oid))
1100 : {
1101 6 : ancestor_published = true;
1102 6 : if (pub->pubviaroot)
1103 0 : publish_as_relid = ancestor;
1104 : }
1105 : }
1106 : }
1107 :
1108 74 : if (list_member_oid(pubids, pub->oid) || ancestor_published)
1109 62 : publish = true;
1110 : }
1111 :
1112 : /*
1113 : * Don't publish changes for partitioned tables, because
1114 : * publishing those of its partitions suffices, unless partition
1115 : * changes won't be published due to pubviaroot being set.
1116 : */
1117 134 : if (publish &&
1118 2 : (relkind != RELKIND_PARTITIONED_TABLE || pub->pubviaroot))
1119 : {
1120 120 : entry->pubactions.pubinsert |= pub->pubactions.pubinsert;
1121 120 : entry->pubactions.pubupdate |= pub->pubactions.pubupdate;
1122 120 : entry->pubactions.pubdelete |= pub->pubactions.pubdelete;
1123 120 : entry->pubactions.pubtruncate |= pub->pubactions.pubtruncate;
1124 : }
1125 :
1126 242 : if (entry->pubactions.pubinsert && entry->pubactions.pubupdate &&
1127 216 : entry->pubactions.pubdelete && entry->pubactions.pubtruncate)
1128 108 : break;
1129 : }
1130 :
1131 132 : list_free(pubids);
1132 :
1133 132 : entry->publish_as_relid = publish_as_relid;
1134 132 : entry->replicate_valid = true;
1135 : }
1136 :
1137 321974 : return entry;
1138 : }
1139 :
1140 : /*
1141 : * Cleanup list of streamed transactions and update the schema_sent flag.
1142 : *
1143 : * When a streamed transaction commits or aborts, we need to remove the
1144 : * toplevel XID from the schema cache. If the transaction aborted, the
1145 : * subscriber will simply throw away the schema records we streamed, so
1146 : * we don't need to do anything else.
1147 : *
1148 : * If the transaction is committed, the subscriber will update the relation
1149 : * cache - so tweak the schema_sent flag accordingly.
1150 : */
1151 : static void
1152 54 : cleanup_rel_sync_cache(TransactionId xid, bool is_commit)
1153 : {
1154 : HASH_SEQ_STATUS hash_seq;
1155 : RelationSyncEntry *entry;
1156 : ListCell *lc;
1157 :
1158 54 : Assert(RelationSyncCache != NULL);
1159 :
1160 54 : hash_seq_init(&hash_seq, RelationSyncCache);
1161 162 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
1162 : {
1163 : /*
1164 : * We can set the schema_sent flag for an entry that has committed xid
1165 : * in the list as that ensures that the subscriber would have the
1166 : * corresponding schema and we don't need to send it unless there is
1167 : * any invalidation for that relation.
1168 : */
1169 74 : foreach(lc, entry->streamed_txns)
1170 : {
1171 48 : if (xid == (uint32) lfirst_int(lc))
1172 : {
1173 28 : if (is_commit)
1174 20 : entry->schema_sent = true;
1175 :
1176 28 : entry->streamed_txns =
1177 28 : foreach_delete_current(entry->streamed_txns, lc);
1178 28 : break;
1179 : }
1180 : }
1181 : }
1182 54 : }
1183 :
1184 : /*
1185 : * Relcache invalidation callback
1186 : */
1187 : static void
1188 1042 : rel_sync_cache_relation_cb(Datum arg, Oid relid)
1189 : {
1190 : RelationSyncEntry *entry;
1191 :
1192 : /*
1193 : * We can get here if the plugin was used in SQL interface as the
1194 : * RelSchemaSyncCache is destroyed when the decoding finishes, but there
1195 : * is no way to unregister the relcache invalidation callback.
1196 : */
1197 1042 : if (RelationSyncCache == NULL)
1198 1042 : return;
1199 :
1200 : /*
1201 : * Nobody keeps pointers to entries in this hash table around outside
1202 : * logical decoding callback calls - but invalidation events can come in
1203 : * *during* a callback if we access the relcache in the callback. Because
1204 : * of that we must mark the cache entry as invalid but not remove it from
1205 : * the hash while it could still be referenced, then prune it at a later
1206 : * safe point.
1207 : *
1208 : * Getting invalidations for relations that aren't in the table is
1209 : * entirely normal, since there's no way to unregister for an invalidation
1210 : * event. So we don't care if it's found or not.
1211 : */
1212 1042 : entry = (RelationSyncEntry *) hash_search(RelationSyncCache, &relid,
1213 : HASH_FIND, NULL);
1214 :
1215 : /*
1216 : * Reset schema sent status as the relation definition may have changed.
1217 : */
1218 1042 : if (entry != NULL)
1219 : {
1220 238 : entry->schema_sent = false;
1221 238 : list_free(entry->streamed_txns);
1222 238 : entry->streamed_txns = NIL;
1223 : }
1224 : }
1225 :
1226 : /*
1227 : * Publication relation map syscache invalidation callback
1228 : */
1229 : static void
1230 68 : rel_sync_cache_publication_cb(Datum arg, int cacheid, uint32 hashvalue)
1231 : {
1232 : HASH_SEQ_STATUS status;
1233 : RelationSyncEntry *entry;
1234 :
1235 : /*
1236 : * We can get here if the plugin was used in SQL interface as the
1237 : * RelSchemaSyncCache is destroyed when the decoding finishes, but there
1238 : * is no way to unregister the relcache invalidation callback.
1239 : */
1240 68 : if (RelationSyncCache == NULL)
1241 68 : return;
1242 :
1243 : /*
1244 : * There is no way to find which entry in our cache the hash belongs to so
1245 : * mark the whole cache as invalid.
1246 : */
1247 68 : hash_seq_init(&status, RelationSyncCache);
1248 328 : while ((entry = (RelationSyncEntry *) hash_seq_search(&status)) != NULL)
1249 192 : entry->replicate_valid = false;
1250 : }
|