Line data Source code
1 : /*-------------------------------------------------------------------------
2 : * launcher.c
3 : * PostgreSQL logical replication worker launcher process
4 : *
5 : * Copyright (c) 2016-2020, PostgreSQL Global Development Group
6 : *
7 : * IDENTIFICATION
8 : * src/backend/replication/logical/launcher.c
9 : *
10 : * NOTES
11 : * This module contains the logical replication worker launcher which
12 : * uses the background worker infrastructure to start the logical
13 : * replication workers for every enabled subscription.
14 : *
15 : *-------------------------------------------------------------------------
16 : */
17 :
18 : #include "postgres.h"
19 :
20 : #include "access/heapam.h"
21 : #include "access/htup.h"
22 : #include "access/htup_details.h"
23 : #include "access/tableam.h"
24 : #include "access/xact.h"
25 : #include "catalog/pg_subscription.h"
26 : #include "catalog/pg_subscription_rel.h"
27 : #include "funcapi.h"
28 : #include "libpq/pqsignal.h"
29 : #include "miscadmin.h"
30 : #include "pgstat.h"
31 : #include "postmaster/bgworker.h"
32 : #include "postmaster/fork_process.h"
33 : #include "postmaster/interrupt.h"
34 : #include "postmaster/postmaster.h"
35 : #include "replication/logicallauncher.h"
36 : #include "replication/logicalworker.h"
37 : #include "replication/slot.h"
38 : #include "replication/walreceiver.h"
39 : #include "replication/worker_internal.h"
40 : #include "storage/ipc.h"
41 : #include "storage/proc.h"
42 : #include "storage/procarray.h"
43 : #include "storage/procsignal.h"
44 : #include "tcop/tcopprot.h"
45 : #include "utils/memutils.h"
46 : #include "utils/pg_lsn.h"
47 : #include "utils/ps_status.h"
48 : #include "utils/snapmgr.h"
49 : #include "utils/timeout.h"
50 :
51 : /* max sleep time between cycles (3min) */
52 : #define DEFAULT_NAPTIME_PER_CYCLE 180000L
53 :
54 : int max_logical_replication_workers = 4;
55 : int max_sync_workers_per_subscription = 2;
56 :
57 : LogicalRepWorker *MyLogicalRepWorker = NULL;
58 :
59 : typedef struct LogicalRepCtxStruct
60 : {
61 : /* Supervisor process. */
62 : pid_t launcher_pid;
63 :
64 : /* Background workers. */
65 : LogicalRepWorker workers[FLEXIBLE_ARRAY_MEMBER];
66 : } LogicalRepCtxStruct;
67 :
68 : LogicalRepCtxStruct *LogicalRepCtx;
69 :
70 : typedef struct LogicalRepWorkerId
71 : {
72 : Oid subid;
73 : Oid relid;
74 : } LogicalRepWorkerId;
75 :
76 : typedef struct StopWorkersData
77 : {
78 : int nestDepth; /* Sub-transaction nest level */
79 : List *workers; /* List of LogicalRepWorkerId */
80 : struct StopWorkersData *parent; /* This need not be an immediate
81 : * subtransaction parent */
82 : } StopWorkersData;
83 :
84 : /*
85 : * Stack of StopWorkersData elements. Each stack element contains the workers
86 : * to be stopped for that subtransaction.
87 : */
88 : static StopWorkersData *on_commit_stop_workers = NULL;
89 :
90 : static void ApplyLauncherWakeup(void);
91 : static void logicalrep_launcher_onexit(int code, Datum arg);
92 : static void logicalrep_worker_onexit(int code, Datum arg);
93 : static void logicalrep_worker_detach(void);
94 : static void logicalrep_worker_cleanup(LogicalRepWorker *worker);
95 :
96 : static bool on_commit_launcher_wakeup = false;
97 :
98 : Datum pg_stat_get_subscription(PG_FUNCTION_ARGS);
99 :
100 :
101 : /*
102 : * Load the list of subscriptions.
103 : *
104 : * Only the fields interesting for worker start/stop functions are filled for
105 : * each subscription.
106 : */
107 : static List *
108 294 : get_subscription_list(void)
109 : {
110 294 : List *res = NIL;
111 : Relation rel;
112 : TableScanDesc scan;
113 : HeapTuple tup;
114 : MemoryContext resultcxt;
115 :
116 : /* This is the context that we will allocate our output data in */
117 294 : resultcxt = CurrentMemoryContext;
118 :
119 : /*
120 : * Start a transaction so we can access pg_database, and get a snapshot.
121 : * We don't have a use for the snapshot itself, but we're interested in
122 : * the secondary effect that it sets RecentGlobalXmin. (This is critical
123 : * for anything that reads heap pages, because HOT may decide to prune
124 : * them even if the process doesn't attempt to modify any tuples.)
125 : *
126 : * FIXME: This comment is inaccurate / the code buggy. A snapshot that is
127 : * not pushed/active does not reliably prevent HOT pruning (->xmin could
128 : * e.g. be cleared when cache invalidations are processed).
129 : */
130 294 : StartTransactionCommand();
131 294 : (void) GetTransactionSnapshot();
132 :
133 294 : rel = table_open(SubscriptionRelationId, AccessShareLock);
134 294 : scan = table_beginscan_catalog(rel, 0, NULL);
135 :
136 704 : while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection)))
137 : {
138 116 : Form_pg_subscription subform = (Form_pg_subscription) GETSTRUCT(tup);
139 : Subscription *sub;
140 : MemoryContext oldcxt;
141 :
142 : /*
143 : * Allocate our results in the caller's context, not the
144 : * transaction's. We do this inside the loop, and restore the original
145 : * context at the end, so that leaky things like heap_getnext() are
146 : * not called in a potentially long-lived context.
147 : */
148 116 : oldcxt = MemoryContextSwitchTo(resultcxt);
149 :
150 116 : sub = (Subscription *) palloc0(sizeof(Subscription));
151 116 : sub->oid = subform->oid;
152 116 : sub->dbid = subform->subdbid;
153 116 : sub->owner = subform->subowner;
154 116 : sub->enabled = subform->subenabled;
155 116 : sub->name = pstrdup(NameStr(subform->subname));
156 : /* We don't fill fields we are not interested in. */
157 :
158 116 : res = lappend(res, sub);
159 116 : MemoryContextSwitchTo(oldcxt);
160 : }
161 :
162 294 : table_endscan(scan);
163 294 : table_close(rel, AccessShareLock);
164 :
165 294 : CommitTransactionCommand();
166 :
167 294 : return res;
168 : }
169 :
170 : /*
171 : * Wait for a background worker to start up and attach to the shmem context.
172 : *
173 : * This is only needed for cleaning up the shared memory in case the worker
174 : * fails to attach.
175 : */
176 : static void
177 542 : WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
178 : uint16 generation,
179 : BackgroundWorkerHandle *handle)
180 : {
181 : BgwHandleStatus status;
182 : int rc;
183 :
184 : for (;;)
185 : {
186 : pid_t pid;
187 :
188 542 : CHECK_FOR_INTERRUPTS();
189 :
190 542 : LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
191 :
192 : /* Worker either died or has started; no need to do anything. */
193 542 : if (!worker->in_use || worker->proc)
194 : {
195 180 : LWLockRelease(LogicalRepWorkerLock);
196 180 : return;
197 : }
198 :
199 362 : LWLockRelease(LogicalRepWorkerLock);
200 :
201 : /* Check if worker has died before attaching, and clean up after it. */
202 362 : status = GetBackgroundWorkerPid(handle, &pid);
203 :
204 362 : if (status == BGWH_STOPPED)
205 : {
206 0 : LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
207 : /* Ensure that this was indeed the worker we waited for. */
208 0 : if (generation == worker->generation)
209 0 : logicalrep_worker_cleanup(worker);
210 0 : LWLockRelease(LogicalRepWorkerLock);
211 0 : return;
212 : }
213 :
214 : /*
215 : * We need timeout because we generally don't get notified via latch
216 : * about the worker attach. But we don't expect to have to wait long.
217 : */
218 362 : rc = WaitLatch(MyLatch,
219 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
220 : 10L, WAIT_EVENT_BGWORKER_STARTUP);
221 :
222 362 : if (rc & WL_LATCH_SET)
223 : {
224 218 : ResetLatch(MyLatch);
225 218 : CHECK_FOR_INTERRUPTS();
226 : }
227 362 : }
228 : }
229 :
230 : /*
231 : * Walks the workers array and searches for one that matches given
232 : * subscription id and relid.
233 : */
234 : LogicalRepWorker *
235 1554 : logicalrep_worker_find(Oid subid, Oid relid, bool only_running)
236 : {
237 : int i;
238 1554 : LogicalRepWorker *res = NULL;
239 :
240 1554 : Assert(LWLockHeldByMe(LogicalRepWorkerLock));
241 :
242 : /* Search for attached worker for a given subscription id. */
243 5514 : for (i = 0; i < max_logical_replication_workers; i++)
244 : {
245 4670 : LogicalRepWorker *w = &LogicalRepCtx->workers[i];
246 :
247 5380 : if (w->in_use && w->subid == subid && w->relid == relid &&
248 832 : (!only_running || w->proc))
249 : {
250 710 : res = w;
251 710 : break;
252 : }
253 : }
254 :
255 1554 : return res;
256 : }
257 :
258 : /*
259 : * Similar to logicalrep_worker_find(), but returns list of all workers for
260 : * the subscription, instead just one.
261 : */
262 : List *
263 20 : logicalrep_workers_find(Oid subid, bool only_running)
264 : {
265 : int i;
266 20 : List *res = NIL;
267 :
268 20 : Assert(LWLockHeldByMe(LogicalRepWorkerLock));
269 :
270 : /* Search for attached worker for a given subscription id. */
271 100 : for (i = 0; i < max_logical_replication_workers; i++)
272 : {
273 80 : LogicalRepWorker *w = &LogicalRepCtx->workers[i];
274 :
275 80 : if (w->in_use && w->subid == subid && (!only_running || w->proc))
276 20 : res = lappend(res, w);
277 : }
278 :
279 20 : return res;
280 : }
281 :
282 : /*
283 : * Start new apply background worker, if possible.
284 : */
285 : void
286 186 : logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid,
287 : Oid relid)
288 : {
289 : BackgroundWorker bgw;
290 : BackgroundWorkerHandle *bgw_handle;
291 : uint16 generation;
292 : int i;
293 186 : int slot = 0;
294 186 : LogicalRepWorker *worker = NULL;
295 : int nsyncworkers;
296 : TimestampTz now;
297 :
298 186 : ereport(DEBUG1,
299 : (errmsg("starting logical replication worker for subscription \"%s\"",
300 : subname)));
301 :
302 : /* Report this after the initial starting message for consistency. */
303 186 : if (max_replication_slots == 0)
304 0 : ereport(ERROR,
305 : (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
306 : errmsg("cannot start logical replication workers when max_replication_slots = 0")));
307 :
308 : /*
309 : * We need to do the modification of the shared memory under lock so that
310 : * we have consistent view.
311 : */
312 186 : LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
313 :
314 : retry:
315 : /* Find unused worker slot. */
316 370 : for (i = 0; i < max_logical_replication_workers; i++)
317 : {
318 364 : LogicalRepWorker *w = &LogicalRepCtx->workers[i];
319 :
320 364 : if (!w->in_use)
321 : {
322 180 : worker = w;
323 180 : slot = i;
324 180 : break;
325 : }
326 : }
327 :
328 186 : nsyncworkers = logicalrep_sync_worker_count(subid);
329 :
330 186 : now = GetCurrentTimestamp();
331 :
332 : /*
333 : * If we didn't find a free slot, try to do garbage collection. The
334 : * reason we do this is because if some worker failed to start up and its
335 : * parent has crashed while waiting, the in_use state was never cleared.
336 : */
337 186 : if (worker == NULL || nsyncworkers >= max_sync_workers_per_subscription)
338 : {
339 6 : bool did_cleanup = false;
340 :
341 30 : for (i = 0; i < max_logical_replication_workers; i++)
342 : {
343 24 : LogicalRepWorker *w = &LogicalRepCtx->workers[i];
344 :
345 : /*
346 : * If the worker was marked in use but didn't manage to attach in
347 : * time, clean it up.
348 : */
349 24 : if (w->in_use && !w->proc &&
350 0 : TimestampDifferenceExceeds(w->launch_time, now,
351 : wal_receiver_timeout))
352 : {
353 0 : elog(WARNING,
354 : "logical replication worker for subscription %u took too long to start; canceled",
355 : w->subid);
356 :
357 0 : logicalrep_worker_cleanup(w);
358 0 : did_cleanup = true;
359 : }
360 : }
361 :
362 6 : if (did_cleanup)
363 0 : goto retry;
364 : }
365 :
366 : /*
367 : * If we reached the sync worker limit per subscription, just exit
368 : * silently as we might get here because of an otherwise harmless race
369 : * condition.
370 : */
371 186 : if (nsyncworkers >= max_sync_workers_per_subscription)
372 : {
373 0 : LWLockRelease(LogicalRepWorkerLock);
374 0 : return;
375 : }
376 :
377 : /*
378 : * However if there are no more free worker slots, inform user about it
379 : * before exiting.
380 : */
381 186 : if (worker == NULL)
382 : {
383 6 : LWLockRelease(LogicalRepWorkerLock);
384 6 : ereport(WARNING,
385 : (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
386 : errmsg("out of logical replication worker slots"),
387 : errhint("You might need to increase max_logical_replication_workers.")));
388 6 : return;
389 : }
390 :
391 : /* Prepare the worker slot. */
392 180 : worker->launch_time = now;
393 180 : worker->in_use = true;
394 180 : worker->generation++;
395 180 : worker->proc = NULL;
396 180 : worker->dbid = dbid;
397 180 : worker->userid = userid;
398 180 : worker->subid = subid;
399 180 : worker->relid = relid;
400 180 : worker->relstate = SUBREL_STATE_UNKNOWN;
401 180 : worker->relstate_lsn = InvalidXLogRecPtr;
402 180 : worker->last_lsn = InvalidXLogRecPtr;
403 180 : TIMESTAMP_NOBEGIN(worker->last_send_time);
404 180 : TIMESTAMP_NOBEGIN(worker->last_recv_time);
405 180 : worker->reply_lsn = InvalidXLogRecPtr;
406 180 : TIMESTAMP_NOBEGIN(worker->reply_time);
407 :
408 : /* Before releasing lock, remember generation for future identification. */
409 180 : generation = worker->generation;
410 :
411 180 : LWLockRelease(LogicalRepWorkerLock);
412 :
413 : /* Register the new dynamic worker. */
414 180 : memset(&bgw, 0, sizeof(bgw));
415 180 : bgw.bgw_flags = BGWORKER_SHMEM_ACCESS |
416 : BGWORKER_BACKEND_DATABASE_CONNECTION;
417 180 : bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
418 180 : snprintf(bgw.bgw_library_name, BGW_MAXLEN, "postgres");
419 180 : snprintf(bgw.bgw_function_name, BGW_MAXLEN, "ApplyWorkerMain");
420 180 : if (OidIsValid(relid))
421 116 : snprintf(bgw.bgw_name, BGW_MAXLEN,
422 : "logical replication worker for subscription %u sync %u", subid, relid);
423 : else
424 64 : snprintf(bgw.bgw_name, BGW_MAXLEN,
425 : "logical replication worker for subscription %u", subid);
426 180 : snprintf(bgw.bgw_type, BGW_MAXLEN, "logical replication worker");
427 :
428 180 : bgw.bgw_restart_time = BGW_NEVER_RESTART;
429 180 : bgw.bgw_notify_pid = MyProcPid;
430 180 : bgw.bgw_main_arg = Int32GetDatum(slot);
431 :
432 180 : if (!RegisterDynamicBackgroundWorker(&bgw, &bgw_handle))
433 : {
434 : /* Failed to start worker, so clean up the worker slot. */
435 0 : LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
436 0 : Assert(generation == worker->generation);
437 0 : logicalrep_worker_cleanup(worker);
438 0 : LWLockRelease(LogicalRepWorkerLock);
439 :
440 0 : ereport(WARNING,
441 : (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
442 : errmsg("out of background worker slots"),
443 : errhint("You might need to increase max_worker_processes.")));
444 0 : return;
445 : }
446 :
447 : /* Now wait until it attaches. */
448 180 : WaitForReplicationWorkerAttach(worker, generation, bgw_handle);
449 : }
450 :
451 : /*
452 : * Stop the logical replication worker for subid/relid, if any, and wait until
453 : * it detaches from the slot.
454 : */
455 : void
456 40 : logicalrep_worker_stop(Oid subid, Oid relid)
457 : {
458 : LogicalRepWorker *worker;
459 : uint16 generation;
460 :
461 40 : LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
462 :
463 40 : worker = logicalrep_worker_find(subid, relid, false);
464 :
465 : /* No worker, nothing to do. */
466 40 : if (!worker)
467 : {
468 20 : LWLockRelease(LogicalRepWorkerLock);
469 20 : return;
470 : }
471 :
472 : /*
473 : * Remember which generation was our worker so we can check if what we see
474 : * is still the same one.
475 : */
476 20 : generation = worker->generation;
477 :
478 : /*
479 : * If we found a worker but it does not have proc set then it is still
480 : * starting up; wait for it to finish starting and then kill it.
481 : */
482 40 : while (worker->in_use && !worker->proc)
483 : {
484 : int rc;
485 :
486 0 : LWLockRelease(LogicalRepWorkerLock);
487 :
488 : /* Wait a bit --- we don't expect to have to wait long. */
489 0 : rc = WaitLatch(MyLatch,
490 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
491 : 10L, WAIT_EVENT_BGWORKER_STARTUP);
492 :
493 0 : if (rc & WL_LATCH_SET)
494 : {
495 0 : ResetLatch(MyLatch);
496 0 : CHECK_FOR_INTERRUPTS();
497 : }
498 :
499 : /* Recheck worker status. */
500 0 : LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
501 :
502 : /*
503 : * Check whether the worker slot is no longer used, which would mean
504 : * that the worker has exited, or whether the worker generation is
505 : * different, meaning that a different worker has taken the slot.
506 : */
507 0 : if (!worker->in_use || worker->generation != generation)
508 : {
509 0 : LWLockRelease(LogicalRepWorkerLock);
510 0 : return;
511 : }
512 :
513 : /* Worker has assigned proc, so it has started. */
514 0 : if (worker->proc)
515 0 : break;
516 : }
517 :
518 : /* Now terminate the worker ... */
519 20 : kill(worker->proc->pid, SIGTERM);
520 :
521 : /* ... and wait for it to die. */
522 : for (;;)
523 : {
524 : int rc;
525 :
526 : /* is it gone? */
527 40 : if (!worker->proc || worker->generation != generation)
528 : break;
529 :
530 20 : LWLockRelease(LogicalRepWorkerLock);
531 :
532 : /* Wait a bit --- we don't expect to have to wait long. */
533 20 : rc = WaitLatch(MyLatch,
534 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
535 : 10L, WAIT_EVENT_BGWORKER_SHUTDOWN);
536 :
537 20 : if (rc & WL_LATCH_SET)
538 : {
539 0 : ResetLatch(MyLatch);
540 0 : CHECK_FOR_INTERRUPTS();
541 : }
542 :
543 20 : LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
544 20 : }
545 :
546 20 : LWLockRelease(LogicalRepWorkerLock);
547 : }
548 :
549 : /*
550 : * Request worker for specified sub/rel to be stopped on commit.
551 : */
552 : void
553 20 : logicalrep_worker_stop_at_commit(Oid subid, Oid relid)
554 : {
555 20 : int nestDepth = GetCurrentTransactionNestLevel();
556 : LogicalRepWorkerId *wid;
557 : MemoryContext oldctx;
558 :
559 : /* Make sure we store the info in context that survives until commit. */
560 20 : oldctx = MemoryContextSwitchTo(TopTransactionContext);
561 :
562 : /* Check that previous transactions were properly cleaned up. */
563 20 : Assert(on_commit_stop_workers == NULL ||
564 : nestDepth >= on_commit_stop_workers->nestDepth);
565 :
566 : /*
567 : * Push a new stack element if we don't already have one for the current
568 : * nestDepth.
569 : */
570 36 : if (on_commit_stop_workers == NULL ||
571 16 : nestDepth > on_commit_stop_workers->nestDepth)
572 : {
573 4 : StopWorkersData *newdata = palloc(sizeof(StopWorkersData));
574 :
575 4 : newdata->nestDepth = nestDepth;
576 4 : newdata->workers = NIL;
577 4 : newdata->parent = on_commit_stop_workers;
578 4 : on_commit_stop_workers = newdata;
579 : }
580 :
581 : /*
582 : * Finally add a new worker into the worker list of the current
583 : * subtransaction.
584 : */
585 20 : wid = palloc(sizeof(LogicalRepWorkerId));
586 20 : wid->subid = subid;
587 20 : wid->relid = relid;
588 40 : on_commit_stop_workers->workers =
589 20 : lappend(on_commit_stop_workers->workers, wid);
590 :
591 20 : MemoryContextSwitchTo(oldctx);
592 20 : }
593 :
594 : /*
595 : * Wake up (using latch) any logical replication worker for specified sub/rel.
596 : */
597 : void
598 122 : logicalrep_worker_wakeup(Oid subid, Oid relid)
599 : {
600 : LogicalRepWorker *worker;
601 :
602 122 : LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
603 :
604 122 : worker = logicalrep_worker_find(subid, relid, true);
605 :
606 122 : if (worker)
607 122 : logicalrep_worker_wakeup_ptr(worker);
608 :
609 122 : LWLockRelease(LogicalRepWorkerLock);
610 122 : }
611 :
612 : /*
613 : * Wake up (using latch) the specified logical replication worker.
614 : *
615 : * Caller must hold lock, else worker->proc could change under us.
616 : */
617 : void
618 356 : logicalrep_worker_wakeup_ptr(LogicalRepWorker *worker)
619 : {
620 356 : Assert(LWLockHeldByMe(LogicalRepWorkerLock));
621 :
622 356 : SetLatch(&worker->proc->procLatch);
623 356 : }
624 :
625 : /*
626 : * Attach to a slot.
627 : */
628 : void
629 210 : logicalrep_worker_attach(int slot)
630 : {
631 : /* Block concurrent access. */
632 210 : LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
633 :
634 210 : Assert(slot >= 0 && slot < max_logical_replication_workers);
635 210 : MyLogicalRepWorker = &LogicalRepCtx->workers[slot];
636 :
637 210 : if (!MyLogicalRepWorker->in_use)
638 : {
639 0 : LWLockRelease(LogicalRepWorkerLock);
640 0 : ereport(ERROR,
641 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
642 : errmsg("logical replication worker slot %d is empty, cannot attach",
643 : slot)));
644 : }
645 :
646 210 : if (MyLogicalRepWorker->proc)
647 : {
648 0 : LWLockRelease(LogicalRepWorkerLock);
649 0 : ereport(ERROR,
650 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
651 : errmsg("logical replication worker slot %d is already used by "
652 : "another worker, cannot attach", slot)));
653 : }
654 :
655 210 : MyLogicalRepWorker->proc = MyProc;
656 210 : before_shmem_exit(logicalrep_worker_onexit, (Datum) 0);
657 :
658 210 : LWLockRelease(LogicalRepWorkerLock);
659 210 : }
660 :
661 : /*
662 : * Detach the worker (cleans up the worker info).
663 : */
664 : static void
665 210 : logicalrep_worker_detach(void)
666 : {
667 : /* Block concurrent access. */
668 210 : LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
669 :
670 210 : logicalrep_worker_cleanup(MyLogicalRepWorker);
671 :
672 210 : LWLockRelease(LogicalRepWorkerLock);
673 210 : }
674 :
675 : /*
676 : * Clean up worker info.
677 : */
678 : static void
679 210 : logicalrep_worker_cleanup(LogicalRepWorker *worker)
680 : {
681 210 : Assert(LWLockHeldByMeInMode(LogicalRepWorkerLock, LW_EXCLUSIVE));
682 :
683 210 : worker->in_use = false;
684 210 : worker->proc = NULL;
685 210 : worker->dbid = InvalidOid;
686 210 : worker->userid = InvalidOid;
687 210 : worker->subid = InvalidOid;
688 210 : worker->relid = InvalidOid;
689 210 : }
690 :
691 : /*
692 : * Cleanup function for logical replication launcher.
693 : *
694 : * Called on logical replication launcher exit.
695 : */
696 : static void
697 82 : logicalrep_launcher_onexit(int code, Datum arg)
698 : {
699 82 : LogicalRepCtx->launcher_pid = 0;
700 82 : }
701 :
702 : /*
703 : * Cleanup function.
704 : *
705 : * Called on logical replication worker exit.
706 : */
707 : static void
708 210 : logicalrep_worker_onexit(int code, Datum arg)
709 : {
710 : /* Disconnect gracefully from the remote side. */
711 210 : if (wrconn)
712 202 : walrcv_disconnect(wrconn);
713 :
714 210 : logicalrep_worker_detach();
715 :
716 210 : ApplyLauncherWakeup();
717 210 : }
718 :
719 : /*
720 : * Count the number of registered (not necessarily running) sync workers
721 : * for a subscription.
722 : */
723 : int
724 834 : logicalrep_sync_worker_count(Oid subid)
725 : {
726 : int i;
727 834 : int res = 0;
728 :
729 834 : Assert(LWLockHeldByMe(LogicalRepWorkerLock));
730 :
731 : /* Search for attached worker for a given subscription id. */
732 4170 : for (i = 0; i < max_logical_replication_workers; i++)
733 : {
734 3336 : LogicalRepWorker *w = &LogicalRepCtx->workers[i];
735 :
736 3336 : if (w->subid == subid && OidIsValid(w->relid))
737 1142 : res++;
738 : }
739 :
740 834 : return res;
741 : }
742 :
743 : /*
744 : * ApplyLauncherShmemSize
745 : * Compute space needed for replication launcher shared memory
746 : */
747 : Size
748 10704 : ApplyLauncherShmemSize(void)
749 : {
750 : Size size;
751 :
752 : /*
753 : * Need the fixed struct and the array of LogicalRepWorker.
754 : */
755 10704 : size = sizeof(LogicalRepCtxStruct);
756 10704 : size = MAXALIGN(size);
757 10704 : size = add_size(size, mul_size(max_logical_replication_workers,
758 : sizeof(LogicalRepWorker)));
759 10704 : return size;
760 : }
761 :
762 : /*
763 : * ApplyLauncherRegister
764 : * Register a background worker running the logical replication launcher.
765 : */
766 : void
767 3184 : ApplyLauncherRegister(void)
768 : {
769 : BackgroundWorker bgw;
770 :
771 3184 : if (max_logical_replication_workers == 0)
772 3184 : return;
773 :
774 3184 : memset(&bgw, 0, sizeof(bgw));
775 3184 : bgw.bgw_flags = BGWORKER_SHMEM_ACCESS |
776 : BGWORKER_BACKEND_DATABASE_CONNECTION;
777 3184 : bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
778 3184 : snprintf(bgw.bgw_library_name, BGW_MAXLEN, "postgres");
779 3184 : snprintf(bgw.bgw_function_name, BGW_MAXLEN, "ApplyLauncherMain");
780 3184 : snprintf(bgw.bgw_name, BGW_MAXLEN,
781 : "logical replication launcher");
782 3184 : snprintf(bgw.bgw_type, BGW_MAXLEN,
783 : "logical replication launcher");
784 3184 : bgw.bgw_restart_time = 5;
785 3184 : bgw.bgw_notify_pid = 0;
786 3184 : bgw.bgw_main_arg = (Datum) 0;
787 :
788 3184 : RegisterBackgroundWorker(&bgw);
789 : }
790 :
791 : /*
792 : * ApplyLauncherShmemInit
793 : * Allocate and initialize replication launcher shared memory
794 : */
795 : void
796 3568 : ApplyLauncherShmemInit(void)
797 : {
798 : bool found;
799 :
800 3568 : LogicalRepCtx = (LogicalRepCtxStruct *)
801 3568 : ShmemInitStruct("Logical Replication Launcher Data",
802 : ApplyLauncherShmemSize(),
803 : &found);
804 :
805 3568 : if (!found)
806 : {
807 : int slot;
808 :
809 3568 : memset(LogicalRepCtx, 0, ApplyLauncherShmemSize());
810 :
811 : /* Initialize memory and spin locks for each worker slot. */
812 17840 : for (slot = 0; slot < max_logical_replication_workers; slot++)
813 : {
814 14272 : LogicalRepWorker *worker = &LogicalRepCtx->workers[slot];
815 :
816 14272 : memset(worker, 0, sizeof(LogicalRepWorker));
817 14272 : SpinLockInit(&worker->relmutex);
818 : }
819 : }
820 3568 : }
821 :
822 : /*
823 : * Check whether current transaction has manipulated logical replication
824 : * workers.
825 : */
826 : bool
827 72 : XactManipulatesLogicalReplicationWorkers(void)
828 : {
829 72 : return (on_commit_stop_workers != NULL);
830 : }
831 :
832 : /*
833 : * Wakeup the launcher on commit if requested.
834 : */
835 : void
836 75060 : AtEOXact_ApplyLauncher(bool isCommit)
837 : {
838 :
839 75060 : Assert(on_commit_stop_workers == NULL ||
840 : (on_commit_stop_workers->nestDepth == 1 &&
841 : on_commit_stop_workers->parent == NULL));
842 :
843 75060 : if (isCommit)
844 : {
845 : ListCell *lc;
846 :
847 73770 : if (on_commit_stop_workers != NULL)
848 : {
849 4 : List *workers = on_commit_stop_workers->workers;
850 :
851 24 : foreach(lc, workers)
852 : {
853 20 : LogicalRepWorkerId *wid = lfirst(lc);
854 :
855 20 : logicalrep_worker_stop(wid->subid, wid->relid);
856 : }
857 : }
858 :
859 73770 : if (on_commit_launcher_wakeup)
860 58 : ApplyLauncherWakeup();
861 : }
862 :
863 : /*
864 : * No need to pfree on_commit_stop_workers. It was allocated in
865 : * transaction memory context, which is going to be cleaned soon.
866 : */
867 75060 : on_commit_stop_workers = NULL;
868 75060 : on_commit_launcher_wakeup = false;
869 75060 : }
870 :
871 : /*
872 : * On commit, merge the current on_commit_stop_workers list into the
873 : * immediate parent, if present.
874 : * On rollback, discard the current on_commit_stop_workers list.
875 : * Pop out the stack.
876 : */
877 : void
878 1978 : AtEOSubXact_ApplyLauncher(bool isCommit, int nestDepth)
879 : {
880 : StopWorkersData *parent;
881 :
882 : /* Exit immediately if there's no work to do at this level. */
883 1978 : if (on_commit_stop_workers == NULL ||
884 0 : on_commit_stop_workers->nestDepth < nestDepth)
885 1978 : return;
886 :
887 0 : Assert(on_commit_stop_workers->nestDepth == nestDepth);
888 :
889 0 : parent = on_commit_stop_workers->parent;
890 :
891 0 : if (isCommit)
892 : {
893 : /*
894 : * If the upper stack element is not an immediate parent
895 : * subtransaction, just decrement the notional nesting depth without
896 : * doing any real work. Else, we need to merge the current workers
897 : * list into the parent.
898 : */
899 0 : if (!parent || parent->nestDepth < nestDepth - 1)
900 : {
901 0 : on_commit_stop_workers->nestDepth--;
902 0 : return;
903 : }
904 :
905 0 : parent->workers =
906 0 : list_concat(parent->workers, on_commit_stop_workers->workers);
907 : }
908 : else
909 : {
910 : /*
911 : * Abandon everything that was done at this nesting level. Explicitly
912 : * free memory to avoid a transaction-lifespan leak.
913 : */
914 0 : list_free_deep(on_commit_stop_workers->workers);
915 : }
916 :
917 : /*
918 : * We have taken care of the current subtransaction workers list for both
919 : * abort or commit. So we are ready to pop the stack.
920 : */
921 0 : pfree(on_commit_stop_workers);
922 0 : on_commit_stop_workers = parent;
923 : }
924 :
925 : /*
926 : * Request wakeup of the launcher on commit of the transaction.
927 : *
928 : * This is used to send launcher signal to stop sleeping and process the
929 : * subscriptions when current transaction commits. Should be used when new
930 : * tuple was added to the pg_subscription catalog.
931 : */
932 : void
933 58 : ApplyLauncherWakeupAtCommit(void)
934 : {
935 58 : if (!on_commit_launcher_wakeup)
936 58 : on_commit_launcher_wakeup = true;
937 58 : }
938 :
939 : static void
940 268 : ApplyLauncherWakeup(void)
941 : {
942 268 : if (LogicalRepCtx->launcher_pid != 0)
943 254 : kill(LogicalRepCtx->launcher_pid, SIGUSR1);
944 268 : }
945 :
946 : /*
947 : * Main loop for the apply launcher process.
948 : */
949 : void
950 82 : ApplyLauncherMain(Datum main_arg)
951 : {
952 82 : TimestampTz last_start_time = 0;
953 :
954 82 : ereport(DEBUG1,
955 : (errmsg("logical replication launcher started")));
956 :
957 82 : before_shmem_exit(logicalrep_launcher_onexit, (Datum) 0);
958 :
959 82 : Assert(LogicalRepCtx->launcher_pid == 0);
960 82 : LogicalRepCtx->launcher_pid = MyProcPid;
961 :
962 : /* Establish signal handlers. */
963 82 : pqsignal(SIGHUP, SignalHandlerForConfigReload);
964 82 : pqsignal(SIGTERM, die);
965 82 : BackgroundWorkerUnblockSignals();
966 :
967 : /*
968 : * Establish connection to nailed catalogs (we only ever access
969 : * pg_subscription).
970 : */
971 82 : BackgroundWorkerInitializeConnection(NULL, NULL, 0);
972 :
973 : /* Enter main loop */
974 : for (;;)
975 : {
976 : int rc;
977 : List *sublist;
978 : ListCell *lc;
979 : MemoryContext subctx;
980 : MemoryContext oldctx;
981 : TimestampTz now;
982 412 : long wait_time = DEFAULT_NAPTIME_PER_CYCLE;
983 :
984 412 : CHECK_FOR_INTERRUPTS();
985 :
986 412 : now = GetCurrentTimestamp();
987 :
988 : /* Limit the start retry to once a wal_retrieve_retry_interval */
989 412 : if (TimestampDifferenceExceeds(last_start_time, now,
990 : wal_retrieve_retry_interval))
991 : {
992 : /* Use temporary context for the database list and worker info. */
993 294 : subctx = AllocSetContextCreate(TopMemoryContext,
994 : "Logical Replication Launcher sublist",
995 : ALLOCSET_DEFAULT_SIZES);
996 294 : oldctx = MemoryContextSwitchTo(subctx);
997 :
998 : /* search for subscriptions to start or stop. */
999 294 : sublist = get_subscription_list();
1000 :
1001 : /* Start the missing workers for enabled subscriptions. */
1002 410 : foreach(lc, sublist)
1003 : {
1004 116 : Subscription *sub = (Subscription *) lfirst(lc);
1005 : LogicalRepWorker *w;
1006 :
1007 116 : if (!sub->enabled)
1008 0 : continue;
1009 :
1010 116 : LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
1011 116 : w = logicalrep_worker_find(sub->oid, InvalidOid, false);
1012 116 : LWLockRelease(LogicalRepWorkerLock);
1013 :
1014 116 : if (w == NULL)
1015 : {
1016 64 : last_start_time = now;
1017 64 : wait_time = wal_retrieve_retry_interval;
1018 :
1019 64 : logicalrep_worker_launch(sub->dbid, sub->oid, sub->name,
1020 : sub->owner, InvalidOid);
1021 : }
1022 : }
1023 :
1024 : /* Switch back to original memory context. */
1025 294 : MemoryContextSwitchTo(oldctx);
1026 : /* Clean the temporary memory. */
1027 294 : MemoryContextDelete(subctx);
1028 : }
1029 : else
1030 : {
1031 : /*
1032 : * The wait in previous cycle was interrupted in less than
1033 : * wal_retrieve_retry_interval since last worker was started, this
1034 : * usually means crash of the worker, so we should retry in
1035 : * wal_retrieve_retry_interval again.
1036 : */
1037 118 : wait_time = wal_retrieve_retry_interval;
1038 : }
1039 :
1040 : /* Wait for more work. */
1041 412 : rc = WaitLatch(MyLatch,
1042 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
1043 : wait_time,
1044 : WAIT_EVENT_LOGICAL_LAUNCHER_MAIN);
1045 :
1046 412 : if (rc & WL_LATCH_SET)
1047 : {
1048 370 : ResetLatch(MyLatch);
1049 370 : CHECK_FOR_INTERRUPTS();
1050 : }
1051 :
1052 330 : if (ConfigReloadPending)
1053 : {
1054 0 : ConfigReloadPending = false;
1055 0 : ProcessConfigFile(PGC_SIGHUP);
1056 : }
1057 330 : }
1058 :
1059 : /* Not reachable */
1060 : }
1061 :
1062 : /*
1063 : * Is current process the logical replication launcher?
1064 : */
1065 : bool
1066 82 : IsLogicalLauncher(void)
1067 : {
1068 82 : return LogicalRepCtx->launcher_pid == MyProcPid;
1069 : }
1070 :
1071 : /*
1072 : * Returns state of the subscriptions.
1073 : */
1074 : Datum
1075 2 : pg_stat_get_subscription(PG_FUNCTION_ARGS)
1076 : {
1077 : #define PG_STAT_GET_SUBSCRIPTION_COLS 8
1078 2 : Oid subid = PG_ARGISNULL(0) ? InvalidOid : PG_GETARG_OID(0);
1079 : int i;
1080 2 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1081 : TupleDesc tupdesc;
1082 : Tuplestorestate *tupstore;
1083 : MemoryContext per_query_ctx;
1084 : MemoryContext oldcontext;
1085 :
1086 : /* check to see if caller supports us returning a tuplestore */
1087 2 : if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
1088 0 : ereport(ERROR,
1089 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1090 : errmsg("set-valued function called in context that cannot accept a set")));
1091 2 : if (!(rsinfo->allowedModes & SFRM_Materialize))
1092 0 : ereport(ERROR,
1093 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1094 : errmsg("materialize mode required, but it is not allowed in this context")));
1095 :
1096 : /* Build a tuple descriptor for our result type */
1097 2 : if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1098 0 : elog(ERROR, "return type must be a row type");
1099 :
1100 2 : per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
1101 2 : oldcontext = MemoryContextSwitchTo(per_query_ctx);
1102 :
1103 2 : tupstore = tuplestore_begin_heap(true, false, work_mem);
1104 2 : rsinfo->returnMode = SFRM_Materialize;
1105 2 : rsinfo->setResult = tupstore;
1106 2 : rsinfo->setDesc = tupdesc;
1107 :
1108 2 : MemoryContextSwitchTo(oldcontext);
1109 :
1110 : /* Make sure we get consistent view of the workers. */
1111 2 : LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
1112 :
1113 12 : for (i = 0; i <= max_logical_replication_workers; i++)
1114 : {
1115 : /* for each row */
1116 : Datum values[PG_STAT_GET_SUBSCRIPTION_COLS];
1117 : bool nulls[PG_STAT_GET_SUBSCRIPTION_COLS];
1118 : int worker_pid;
1119 : LogicalRepWorker worker;
1120 :
1121 10 : memcpy(&worker, &LogicalRepCtx->workers[i],
1122 : sizeof(LogicalRepWorker));
1123 10 : if (!worker.proc || !IsBackendPid(worker.proc->pid))
1124 12 : continue;
1125 :
1126 4 : if (OidIsValid(subid) && worker.subid != subid)
1127 0 : continue;
1128 :
1129 4 : worker_pid = worker.proc->pid;
1130 :
1131 4 : MemSet(values, 0, sizeof(values));
1132 4 : MemSet(nulls, 0, sizeof(nulls));
1133 :
1134 4 : values[0] = ObjectIdGetDatum(worker.subid);
1135 4 : if (OidIsValid(worker.relid))
1136 0 : values[1] = ObjectIdGetDatum(worker.relid);
1137 : else
1138 4 : nulls[1] = true;
1139 4 : values[2] = Int32GetDatum(worker_pid);
1140 4 : if (XLogRecPtrIsInvalid(worker.last_lsn))
1141 0 : nulls[3] = true;
1142 : else
1143 4 : values[3] = LSNGetDatum(worker.last_lsn);
1144 4 : if (worker.last_send_time == 0)
1145 0 : nulls[4] = true;
1146 : else
1147 4 : values[4] = TimestampTzGetDatum(worker.last_send_time);
1148 4 : if (worker.last_recv_time == 0)
1149 0 : nulls[5] = true;
1150 : else
1151 4 : values[5] = TimestampTzGetDatum(worker.last_recv_time);
1152 4 : if (XLogRecPtrIsInvalid(worker.reply_lsn))
1153 0 : nulls[6] = true;
1154 : else
1155 4 : values[6] = LSNGetDatum(worker.reply_lsn);
1156 4 : if (worker.reply_time == 0)
1157 0 : nulls[7] = true;
1158 : else
1159 4 : values[7] = TimestampTzGetDatum(worker.reply_time);
1160 :
1161 4 : tuplestore_putvalues(tupstore, tupdesc, values, nulls);
1162 :
1163 : /*
1164 : * If only a single subscription was requested, and we found it,
1165 : * break.
1166 : */
1167 4 : if (OidIsValid(subid))
1168 0 : break;
1169 : }
1170 :
1171 2 : LWLockRelease(LogicalRepWorkerLock);
1172 :
1173 : /* clean up and return the tuplestore */
1174 : tuplestore_donestoring(tupstore);
1175 :
1176 2 : return (Datum) 0;
1177 : }
|