Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * walreceiver.c
4 : *
5 : * The WAL receiver process (walreceiver) is new as of Postgres 9.0. It
6 : * is the process in the standby server that takes charge of receiving
7 : * XLOG records from a primary server during streaming replication.
8 : *
9 : * When the startup process determines that it's time to start streaming,
10 : * it instructs postmaster to start walreceiver. Walreceiver first connects
11 : * to the primary server (it will be served by a walsender process
12 : * in the primary server), and then keeps receiving XLOG records and
13 : * writing them to the disk as long as the connection is alive. As XLOG
14 : * records are received and flushed to disk, it updates the
15 : * WalRcv->flushedUpto variable in shared memory, to inform the startup
16 : * process of how far it can proceed with XLOG replay.
17 : *
18 : * A WAL receiver cannot directly load GUC parameters used when establishing
19 : * its connection to the primary. Instead it relies on parameter values
20 : * that are passed down by the startup process when streaming is requested.
21 : * This applies, for example, to the replication slot and the connection
22 : * string to be used for the connection with the primary.
23 : *
24 : * If the primary server ends streaming, but doesn't disconnect, walreceiver
25 : * goes into "waiting" mode, and waits for the startup process to give new
26 : * instructions. The startup process will treat that the same as
27 : * disconnection, and will rescan the archive/pg_wal directory. But when the
28 : * startup process wants to try streaming replication again, it will just
29 : * nudge the existing walreceiver process that's waiting, instead of launching
30 : * a new one.
31 : *
32 : * Normal termination is by SIGTERM, which instructs the walreceiver to
33 : * exit(0). Emergency termination is by SIGQUIT; like any postmaster child
34 : * process, the walreceiver will simply abort and exit on SIGQUIT. A close
35 : * of the connection and a FATAL error are treated not as a crash but as
36 : * normal operation.
37 : *
38 : * This file contains the server-facing parts of walreceiver. The libpq-
39 : * specific parts are in the libpqwalreceiver module. It's loaded
40 : * dynamically to avoid linking the server with libpq.
41 : *
42 : * Portions Copyright (c) 2010-2020, PostgreSQL Global Development Group
43 : *
44 : *
45 : * IDENTIFICATION
46 : * src/backend/replication/walreceiver.c
47 : *
48 : *-------------------------------------------------------------------------
49 : */
50 : #include "postgres.h"
51 :
52 : #include <unistd.h>
53 :
54 : #include "access/htup_details.h"
55 : #include "access/timeline.h"
56 : #include "access/transam.h"
57 : #include "access/xlog_internal.h"
58 : #include "access/xlogarchive.h"
59 : #include "catalog/pg_authid.h"
60 : #include "catalog/pg_type.h"
61 : #include "common/ip.h"
62 : #include "funcapi.h"
63 : #include "libpq/pqformat.h"
64 : #include "libpq/pqsignal.h"
65 : #include "miscadmin.h"
66 : #include "pgstat.h"
67 : #include "postmaster/interrupt.h"
68 : #include "replication/walreceiver.h"
69 : #include "replication/walsender.h"
70 : #include "storage/ipc.h"
71 : #include "storage/pmsignal.h"
72 : #include "storage/procarray.h"
73 : #include "storage/procsignal.h"
74 : #include "utils/acl.h"
75 : #include "utils/builtins.h"
76 : #include "utils/guc.h"
77 : #include "utils/pg_lsn.h"
78 : #include "utils/ps_status.h"
79 : #include "utils/resowner.h"
80 : #include "utils/timestamp.h"
81 :
82 :
83 : /*
84 : * GUC variables. (Other variables that affect walreceiver are in xlog.c
85 : * because they're passed down from the startup process, for better
86 : * synchronization.)
87 : */
88 : int wal_receiver_status_interval;
89 : int wal_receiver_timeout;
90 : bool hot_standby_feedback;
91 :
92 : /* libpqwalreceiver connection */
93 : static WalReceiverConn *wrconn = NULL;
94 : WalReceiverFunctionsType *WalReceiverFunctions = NULL;
95 :
96 : #define NAPTIME_PER_CYCLE 100 /* max sleep time between cycles (100ms) */
97 :
98 : /*
99 : * These variables are used similarly to openLogFile/SegNo,
100 : * but for walreceiver to write the XLOG. recvFileTLI is the TimeLineID
101 : * corresponding the filename of recvFile.
102 : */
103 : static int recvFile = -1;
104 : static TimeLineID recvFileTLI = 0;
105 : static XLogSegNo recvSegNo = 0;
106 :
107 : /*
108 : * Flags set by interrupt handlers of walreceiver for later service in the
109 : * main loop.
110 : */
111 : static volatile sig_atomic_t got_SIGHUP = false;
112 : static volatile sig_atomic_t got_SIGTERM = false;
113 :
114 : /*
115 : * LogstreamResult indicates the byte positions that we have already
116 : * written/fsynced.
117 : */
118 : static struct
119 : {
120 : XLogRecPtr Write; /* last byte + 1 written out in the standby */
121 : XLogRecPtr Flush; /* last byte + 1 flushed in the standby */
122 : } LogstreamResult;
123 :
124 : static StringInfoData reply_message;
125 : static StringInfoData incoming_message;
126 :
127 : /* Prototypes for private functions */
128 : static void WalRcvFetchTimeLineHistoryFiles(TimeLineID first, TimeLineID last);
129 : static void WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI);
130 : static void WalRcvDie(int code, Datum arg);
131 : static void XLogWalRcvProcessMsg(unsigned char type, char *buf, Size len);
132 : static void XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr);
133 : static void XLogWalRcvFlush(bool dying);
134 : static void XLogWalRcvSendReply(bool force, bool requestReply);
135 : static void XLogWalRcvSendHSFeedback(bool immed);
136 : static void ProcessWalSndrMessage(XLogRecPtr walEnd, TimestampTz sendTime);
137 :
138 : /* Signal handlers */
139 : static void WalRcvSigHupHandler(SIGNAL_ARGS);
140 : static void WalRcvShutdownHandler(SIGNAL_ARGS);
141 :
142 :
143 : /*
144 : * Process any interrupts the walreceiver process may have received.
145 : * This should be called any time the process's latch has become set.
146 : *
147 : * Currently, only SIGTERM is of interest. We can't just exit(1) within the
148 : * SIGTERM signal handler, because the signal might arrive in the middle of
149 : * some critical operation, like while we're holding a spinlock. Instead, the
150 : * signal handler sets a flag variable as well as setting the process's latch.
151 : * We must check the flag (by calling ProcessWalRcvInterrupts) anytime the
152 : * latch has become set. Operations that could block for a long time, such as
153 : * reading from a remote server, must pay attention to the latch too; see
154 : * libpqrcv_PQgetResult for example.
155 : */
156 : void
157 700 : ProcessWalRcvInterrupts(void)
158 : {
159 : /*
160 : * Although walreceiver interrupt handling doesn't use the same scheme as
161 : * regular backends, call CHECK_FOR_INTERRUPTS() to make sure we receive
162 : * any incoming signals on Win32, and also to make sure we process any
163 : * barrier events.
164 : */
165 700 : CHECK_FOR_INTERRUPTS();
166 :
167 700 : if (got_SIGTERM)
168 : {
169 0 : ereport(FATAL,
170 : (errcode(ERRCODE_ADMIN_SHUTDOWN),
171 : errmsg("terminating walreceiver process due to administrator command")));
172 : }
173 700 : }
174 :
175 :
176 : /* Main entry point for walreceiver process */
177 : void
178 0 : WalReceiverMain(void)
179 : {
180 : char conninfo[MAXCONNINFO];
181 : char *tmp_conninfo;
182 : char slotname[NAMEDATALEN];
183 : bool is_temp_slot;
184 : XLogRecPtr startpoint;
185 : TimeLineID startpointTLI;
186 : TimeLineID primaryTLI;
187 : bool first_stream;
188 0 : WalRcvData *walrcv = WalRcv;
189 : TimestampTz last_recv_timestamp;
190 : TimestampTz now;
191 : bool ping_sent;
192 : char *err;
193 0 : char *sender_host = NULL;
194 0 : int sender_port = 0;
195 :
196 : /*
197 : * WalRcv should be set up already (if we are a backend, we inherit this
198 : * by fork() or EXEC_BACKEND mechanism from the postmaster).
199 : */
200 0 : Assert(walrcv != NULL);
201 :
202 0 : now = GetCurrentTimestamp();
203 :
204 : /*
205 : * Mark walreceiver as running in shared memory.
206 : *
207 : * Do this as early as possible, so that if we fail later on, we'll set
208 : * state to STOPPED. If we die before this, the startup process will keep
209 : * waiting for us to start up, until it times out.
210 : */
211 0 : SpinLockAcquire(&walrcv->mutex);
212 0 : Assert(walrcv->pid == 0);
213 0 : switch (walrcv->walRcvState)
214 : {
215 : case WALRCV_STOPPING:
216 : /* If we've already been requested to stop, don't start up. */
217 0 : walrcv->walRcvState = WALRCV_STOPPED;
218 : /* fall through */
219 :
220 : case WALRCV_STOPPED:
221 0 : SpinLockRelease(&walrcv->mutex);
222 0 : proc_exit(1);
223 : break;
224 :
225 : case WALRCV_STARTING:
226 : /* The usual case */
227 0 : break;
228 :
229 : case WALRCV_WAITING:
230 : case WALRCV_STREAMING:
231 : case WALRCV_RESTARTING:
232 : default:
233 : /* Shouldn't happen */
234 0 : SpinLockRelease(&walrcv->mutex);
235 0 : elog(PANIC, "walreceiver still running according to shared memory state");
236 : }
237 : /* Advertise our PID so that the startup process can kill us */
238 0 : walrcv->pid = MyProcPid;
239 0 : walrcv->walRcvState = WALRCV_STREAMING;
240 :
241 : /* Fetch information required to start streaming */
242 0 : walrcv->ready_to_display = false;
243 0 : strlcpy(conninfo, (char *) walrcv->conninfo, MAXCONNINFO);
244 0 : strlcpy(slotname, (char *) walrcv->slotname, NAMEDATALEN);
245 0 : is_temp_slot = walrcv->is_temp_slot;
246 0 : startpoint = walrcv->receiveStart;
247 0 : startpointTLI = walrcv->receiveStartTLI;
248 :
249 : /*
250 : * At most one of is_temp_slot and slotname can be set; otherwise,
251 : * RequestXLogStreaming messed up.
252 : */
253 0 : Assert(!is_temp_slot || (slotname[0] == '\0'));
254 :
255 : /* Initialise to a sanish value */
256 0 : walrcv->lastMsgSendTime =
257 0 : walrcv->lastMsgReceiptTime = walrcv->latestWalEndTime = now;
258 :
259 : /* Report the latch to use to awaken this process */
260 0 : walrcv->latch = &MyProc->procLatch;
261 :
262 0 : SpinLockRelease(&walrcv->mutex);
263 :
264 0 : pg_atomic_init_u64(&WalRcv->writtenUpto, 0);
265 :
266 : /* Arrange to clean up at walreceiver exit */
267 0 : on_shmem_exit(WalRcvDie, 0);
268 :
269 : /* Properly accept or ignore signals the postmaster might send us */
270 0 : pqsignal(SIGHUP, WalRcvSigHupHandler); /* set flag to read config file */
271 0 : pqsignal(SIGINT, SIG_IGN);
272 0 : pqsignal(SIGTERM, WalRcvShutdownHandler); /* request shutdown */
273 : /* SIGQUIT handler was already set up by InitPostmasterChild */
274 0 : pqsignal(SIGALRM, SIG_IGN);
275 0 : pqsignal(SIGPIPE, SIG_IGN);
276 0 : pqsignal(SIGUSR1, procsignal_sigusr1_handler);
277 0 : pqsignal(SIGUSR2, SIG_IGN);
278 :
279 : /* Reset some signals that are accepted by postmaster but not here */
280 0 : pqsignal(SIGCHLD, SIG_DFL);
281 :
282 : /* Load the libpq-specific functions */
283 0 : load_file("libpqwalreceiver", false);
284 0 : if (WalReceiverFunctions == NULL)
285 0 : elog(ERROR, "libpqwalreceiver didn't initialize correctly");
286 :
287 : /* Unblock signals (they were blocked when the postmaster forked us) */
288 0 : PG_SETMASK(&UnBlockSig);
289 :
290 : /* Establish the connection to the primary for XLOG streaming */
291 0 : wrconn = walrcv_connect(conninfo, false, cluster_name[0] ? cluster_name : "walreceiver", &err);
292 0 : if (!wrconn)
293 0 : ereport(ERROR,
294 : (errmsg("could not connect to the primary server: %s", err)));
295 :
296 : /*
297 : * Save user-visible connection string. This clobbers the original
298 : * conninfo, for security. Also save host and port of the sender server
299 : * this walreceiver is connected to.
300 : */
301 0 : tmp_conninfo = walrcv_get_conninfo(wrconn);
302 0 : walrcv_get_senderinfo(wrconn, &sender_host, &sender_port);
303 0 : SpinLockAcquire(&walrcv->mutex);
304 0 : memset(walrcv->conninfo, 0, MAXCONNINFO);
305 0 : if (tmp_conninfo)
306 0 : strlcpy((char *) walrcv->conninfo, tmp_conninfo, MAXCONNINFO);
307 :
308 0 : memset(walrcv->sender_host, 0, NI_MAXHOST);
309 0 : if (sender_host)
310 0 : strlcpy((char *) walrcv->sender_host, sender_host, NI_MAXHOST);
311 :
312 0 : walrcv->sender_port = sender_port;
313 0 : walrcv->ready_to_display = true;
314 0 : SpinLockRelease(&walrcv->mutex);
315 :
316 0 : if (tmp_conninfo)
317 0 : pfree(tmp_conninfo);
318 :
319 0 : if (sender_host)
320 0 : pfree(sender_host);
321 :
322 0 : first_stream = true;
323 : for (;;)
324 : {
325 : char *primary_sysid;
326 : char standby_sysid[32];
327 : WalRcvStreamOptions options;
328 :
329 : /*
330 : * Check that we're connected to a valid server using the
331 : * IDENTIFY_SYSTEM replication command.
332 : */
333 0 : primary_sysid = walrcv_identify_system(wrconn, &primaryTLI);
334 :
335 0 : snprintf(standby_sysid, sizeof(standby_sysid), UINT64_FORMAT,
336 : GetSystemIdentifier());
337 0 : if (strcmp(primary_sysid, standby_sysid) != 0)
338 : {
339 0 : ereport(ERROR,
340 : (errmsg("database system identifier differs between the primary and standby"),
341 : errdetail("The primary's identifier is %s, the standby's identifier is %s.",
342 : primary_sysid, standby_sysid)));
343 : }
344 :
345 : /*
346 : * Confirm that the current timeline of the primary is the same or
347 : * ahead of ours.
348 : */
349 0 : if (primaryTLI < startpointTLI)
350 0 : ereport(ERROR,
351 : (errmsg("highest timeline %u of the primary is behind recovery timeline %u",
352 : primaryTLI, startpointTLI)));
353 :
354 : /*
355 : * Get any missing history files. We do this always, even when we're
356 : * not interested in that timeline, so that if we're promoted to
357 : * become the primary later on, we don't select the same timeline that
358 : * was already used in the current primary. This isn't bullet-proof -
359 : * you'll need some external software to manage your cluster if you
360 : * need to ensure that a unique timeline id is chosen in every case,
361 : * but let's avoid the confusion of timeline id collisions where we
362 : * can.
363 : */
364 0 : WalRcvFetchTimeLineHistoryFiles(startpointTLI, primaryTLI);
365 :
366 : /*
367 : * Create temporary replication slot if requested, and update slot
368 : * name in shared memory. (Note the slot name cannot already be set
369 : * in this case.)
370 : */
371 0 : if (is_temp_slot)
372 : {
373 0 : snprintf(slotname, sizeof(slotname),
374 : "pg_walreceiver_%lld",
375 0 : (long long int) walrcv_get_backend_pid(wrconn));
376 :
377 0 : walrcv_create_slot(wrconn, slotname, true, 0, NULL);
378 :
379 0 : SpinLockAcquire(&walrcv->mutex);
380 0 : strlcpy(walrcv->slotname, slotname, NAMEDATALEN);
381 0 : SpinLockRelease(&walrcv->mutex);
382 : }
383 :
384 : /*
385 : * Start streaming.
386 : *
387 : * We'll try to start at the requested starting point and timeline,
388 : * even if it's different from the server's latest timeline. In case
389 : * we've already reached the end of the old timeline, the server will
390 : * finish the streaming immediately, and we will go back to await
391 : * orders from the startup process. If recovery_target_timeline is
392 : * 'latest', the startup process will scan pg_wal and find the new
393 : * history file, bump recovery target timeline, and ask us to restart
394 : * on the new timeline.
395 : */
396 0 : options.logical = false;
397 0 : options.startpoint = startpoint;
398 0 : options.slotname = slotname[0] != '\0' ? slotname : NULL;
399 0 : options.proto.physical.startpointTLI = startpointTLI;
400 0 : ThisTimeLineID = startpointTLI;
401 0 : if (walrcv_startstreaming(wrconn, &options))
402 : {
403 0 : if (first_stream)
404 0 : ereport(LOG,
405 : (errmsg("started streaming WAL from primary at %X/%X on timeline %u",
406 : (uint32) (startpoint >> 32), (uint32) startpoint,
407 : startpointTLI)));
408 : else
409 0 : ereport(LOG,
410 : (errmsg("restarted WAL streaming at %X/%X on timeline %u",
411 : (uint32) (startpoint >> 32), (uint32) startpoint,
412 : startpointTLI)));
413 0 : first_stream = false;
414 :
415 : /* Initialize LogstreamResult and buffers for processing messages */
416 0 : LogstreamResult.Write = LogstreamResult.Flush = GetXLogReplayRecPtr(NULL);
417 0 : initStringInfo(&reply_message);
418 0 : initStringInfo(&incoming_message);
419 :
420 : /* Initialize the last recv timestamp */
421 0 : last_recv_timestamp = GetCurrentTimestamp();
422 0 : ping_sent = false;
423 :
424 : /* Loop until end-of-streaming or error */
425 : for (;;)
426 : {
427 : char *buf;
428 : int len;
429 0 : bool endofwal = false;
430 0 : pgsocket wait_fd = PGINVALID_SOCKET;
431 : int rc;
432 :
433 : /*
434 : * Exit walreceiver if we're not in recovery. This should not
435 : * happen, but cross-check the status here.
436 : */
437 0 : if (!RecoveryInProgress())
438 0 : ereport(FATAL,
439 : (errmsg("cannot continue WAL streaming, recovery has already ended")));
440 :
441 : /* Process any requests or signals received recently */
442 0 : ProcessWalRcvInterrupts();
443 :
444 0 : if (got_SIGHUP)
445 : {
446 0 : got_SIGHUP = false;
447 0 : ProcessConfigFile(PGC_SIGHUP);
448 0 : XLogWalRcvSendHSFeedback(true);
449 : }
450 :
451 : /* See if we can read data immediately */
452 0 : len = walrcv_receive(wrconn, &buf, &wait_fd);
453 0 : if (len != 0)
454 : {
455 : /*
456 : * Process the received data, and any subsequent data we
457 : * can read without blocking.
458 : */
459 : for (;;)
460 : {
461 0 : if (len > 0)
462 : {
463 : /*
464 : * Something was received from primary, so reset
465 : * timeout
466 : */
467 0 : last_recv_timestamp = GetCurrentTimestamp();
468 0 : ping_sent = false;
469 0 : XLogWalRcvProcessMsg(buf[0], &buf[1], len - 1);
470 : }
471 0 : else if (len == 0)
472 0 : break;
473 0 : else if (len < 0)
474 : {
475 0 : ereport(LOG,
476 : (errmsg("replication terminated by primary server"),
477 : errdetail("End of WAL reached on timeline %u at %X/%X.",
478 : startpointTLI,
479 : (uint32) (LogstreamResult.Write >> 32), (uint32) LogstreamResult.Write)));
480 0 : endofwal = true;
481 0 : break;
482 : }
483 0 : len = walrcv_receive(wrconn, &buf, &wait_fd);
484 0 : }
485 :
486 : /* Let the primary know that we received some data. */
487 0 : XLogWalRcvSendReply(false, false);
488 :
489 : /*
490 : * If we've written some records, flush them to disk and
491 : * let the startup process and primary server know about
492 : * them.
493 : */
494 0 : XLogWalRcvFlush(false);
495 : }
496 :
497 : /* Check if we need to exit the streaming loop. */
498 0 : if (endofwal)
499 0 : break;
500 :
501 : /*
502 : * Ideally we would reuse a WaitEventSet object repeatedly
503 : * here to avoid the overheads of WaitLatchOrSocket on epoll
504 : * systems, but we can't be sure that libpq (or any other
505 : * walreceiver implementation) has the same socket (even if
506 : * the fd is the same number, it may have been closed and
507 : * reopened since the last time). In future, if there is a
508 : * function for removing sockets from WaitEventSet, then we
509 : * could add and remove just the socket each time, potentially
510 : * avoiding some system calls.
511 : */
512 0 : Assert(wait_fd != PGINVALID_SOCKET);
513 0 : rc = WaitLatchOrSocket(walrcv->latch,
514 : WL_EXIT_ON_PM_DEATH | WL_SOCKET_READABLE |
515 : WL_TIMEOUT | WL_LATCH_SET,
516 : wait_fd,
517 : NAPTIME_PER_CYCLE,
518 : WAIT_EVENT_WAL_RECEIVER_MAIN);
519 0 : if (rc & WL_LATCH_SET)
520 : {
521 0 : ResetLatch(walrcv->latch);
522 0 : ProcessWalRcvInterrupts();
523 :
524 0 : if (walrcv->force_reply)
525 : {
526 : /*
527 : * The recovery process has asked us to send apply
528 : * feedback now. Make sure the flag is really set to
529 : * false in shared memory before sending the reply, so
530 : * we don't miss a new request for a reply.
531 : */
532 0 : walrcv->force_reply = false;
533 0 : pg_memory_barrier();
534 0 : XLogWalRcvSendReply(true, false);
535 : }
536 : }
537 0 : if (rc & WL_TIMEOUT)
538 : {
539 : /*
540 : * We didn't receive anything new. If we haven't heard
541 : * anything from the server for more than
542 : * wal_receiver_timeout / 2, ping the server. Also, if
543 : * it's been longer than wal_receiver_status_interval
544 : * since the last update we sent, send a status update to
545 : * the primary anyway, to report any progress in applying
546 : * WAL.
547 : */
548 0 : bool requestReply = false;
549 :
550 : /*
551 : * Check if time since last receive from standby has
552 : * reached the configured limit.
553 : */
554 0 : if (wal_receiver_timeout > 0)
555 : {
556 0 : TimestampTz now = GetCurrentTimestamp();
557 : TimestampTz timeout;
558 :
559 0 : timeout =
560 0 : TimestampTzPlusMilliseconds(last_recv_timestamp,
561 : wal_receiver_timeout);
562 :
563 0 : if (now >= timeout)
564 0 : ereport(ERROR,
565 : (errmsg("terminating walreceiver due to timeout")));
566 :
567 : /*
568 : * We didn't receive anything new, for half of
569 : * receiver replication timeout. Ping the server.
570 : */
571 0 : if (!ping_sent)
572 : {
573 0 : timeout = TimestampTzPlusMilliseconds(last_recv_timestamp,
574 : (wal_receiver_timeout / 2));
575 0 : if (now >= timeout)
576 : {
577 0 : requestReply = true;
578 0 : ping_sent = true;
579 : }
580 : }
581 : }
582 :
583 0 : XLogWalRcvSendReply(requestReply, requestReply);
584 0 : XLogWalRcvSendHSFeedback(false);
585 : }
586 0 : }
587 :
588 : /*
589 : * The backend finished streaming. Exit streaming COPY-mode from
590 : * our side, too.
591 : */
592 0 : walrcv_endstreaming(wrconn, &primaryTLI);
593 :
594 : /*
595 : * If the server had switched to a new timeline that we didn't
596 : * know about when we began streaming, fetch its timeline history
597 : * file now.
598 : */
599 0 : WalRcvFetchTimeLineHistoryFiles(startpointTLI, primaryTLI);
600 : }
601 : else
602 0 : ereport(LOG,
603 : (errmsg("primary server contains no more WAL on requested timeline %u",
604 : startpointTLI)));
605 :
606 : /*
607 : * End of WAL reached on the requested timeline. Close the last
608 : * segment, and await for new orders from the startup process.
609 : */
610 0 : if (recvFile >= 0)
611 : {
612 : char xlogfname[MAXFNAMELEN];
613 :
614 0 : XLogWalRcvFlush(false);
615 0 : XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size);
616 0 : if (close(recvFile) != 0)
617 0 : ereport(PANIC,
618 : (errcode_for_file_access(),
619 : errmsg("could not close log segment %s: %m",
620 : xlogfname)));
621 :
622 : /*
623 : * Create .done file forcibly to prevent the streamed segment from
624 : * being archived later.
625 : */
626 0 : if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS)
627 0 : XLogArchiveForceDone(xlogfname);
628 : else
629 0 : XLogArchiveNotify(xlogfname);
630 : }
631 0 : recvFile = -1;
632 :
633 0 : elog(DEBUG1, "walreceiver ended streaming and awaits new instructions");
634 0 : WalRcvWaitForStartPosition(&startpoint, &startpointTLI);
635 0 : }
636 : /* not reached */
637 : }
638 :
639 : /*
640 : * Wait for startup process to set receiveStart and receiveStartTLI.
641 : */
642 : static void
643 0 : WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI)
644 : {
645 0 : WalRcvData *walrcv = WalRcv;
646 : int state;
647 :
648 0 : SpinLockAcquire(&walrcv->mutex);
649 0 : state = walrcv->walRcvState;
650 0 : if (state != WALRCV_STREAMING)
651 : {
652 0 : SpinLockRelease(&walrcv->mutex);
653 0 : if (state == WALRCV_STOPPING)
654 0 : proc_exit(0);
655 : else
656 0 : elog(FATAL, "unexpected walreceiver state");
657 : }
658 0 : walrcv->walRcvState = WALRCV_WAITING;
659 0 : walrcv->receiveStart = InvalidXLogRecPtr;
660 0 : walrcv->receiveStartTLI = 0;
661 0 : SpinLockRelease(&walrcv->mutex);
662 :
663 0 : set_ps_display("idle");
664 :
665 : /*
666 : * nudge startup process to notice that we've stopped streaming and are
667 : * now waiting for instructions.
668 : */
669 0 : WakeupRecovery();
670 : for (;;)
671 : {
672 0 : ResetLatch(walrcv->latch);
673 :
674 0 : ProcessWalRcvInterrupts();
675 :
676 0 : SpinLockAcquire(&walrcv->mutex);
677 0 : Assert(walrcv->walRcvState == WALRCV_RESTARTING ||
678 : walrcv->walRcvState == WALRCV_WAITING ||
679 : walrcv->walRcvState == WALRCV_STOPPING);
680 0 : if (walrcv->walRcvState == WALRCV_RESTARTING)
681 : {
682 : /*
683 : * No need to handle changes in primary_conninfo or
684 : * primary_slotname here. Startup process will signal us to
685 : * terminate in case those change.
686 : */
687 0 : *startpoint = walrcv->receiveStart;
688 0 : *startpointTLI = walrcv->receiveStartTLI;
689 0 : walrcv->walRcvState = WALRCV_STREAMING;
690 0 : SpinLockRelease(&walrcv->mutex);
691 0 : break;
692 : }
693 0 : if (walrcv->walRcvState == WALRCV_STOPPING)
694 : {
695 : /*
696 : * We should've received SIGTERM if the startup process wants us
697 : * to die, but might as well check it here too.
698 : */
699 0 : SpinLockRelease(&walrcv->mutex);
700 0 : exit(1);
701 : }
702 0 : SpinLockRelease(&walrcv->mutex);
703 :
704 0 : (void) WaitLatch(walrcv->latch, WL_LATCH_SET | WL_EXIT_ON_PM_DEATH, 0,
705 : WAIT_EVENT_WAL_RECEIVER_WAIT_START);
706 0 : }
707 :
708 0 : if (update_process_title)
709 : {
710 : char activitymsg[50];
711 :
712 0 : snprintf(activitymsg, sizeof(activitymsg), "restarting at %X/%X",
713 0 : (uint32) (*startpoint >> 32),
714 0 : (uint32) *startpoint);
715 0 : set_ps_display(activitymsg);
716 : }
717 0 : }
718 :
719 : /*
720 : * Fetch any missing timeline history files between 'first' and 'last'
721 : * (inclusive) from the server.
722 : */
723 : static void
724 0 : WalRcvFetchTimeLineHistoryFiles(TimeLineID first, TimeLineID last)
725 : {
726 : TimeLineID tli;
727 :
728 0 : for (tli = first; tli <= last; tli++)
729 : {
730 : /* there's no history file for timeline 1 */
731 0 : if (tli != 1 && !existsTimeLineHistory(tli))
732 : {
733 : char *fname;
734 : char *content;
735 : int len;
736 : char expectedfname[MAXFNAMELEN];
737 :
738 0 : ereport(LOG,
739 : (errmsg("fetching timeline history file for timeline %u from primary server",
740 : tli)));
741 :
742 0 : walrcv_readtimelinehistoryfile(wrconn, tli, &fname, &content, &len);
743 :
744 : /*
745 : * Check that the filename on the primary matches what we
746 : * calculated ourselves. This is just a sanity check, it should
747 : * always match.
748 : */
749 0 : TLHistoryFileName(expectedfname, tli);
750 0 : if (strcmp(fname, expectedfname) != 0)
751 0 : ereport(ERROR,
752 : (errcode(ERRCODE_PROTOCOL_VIOLATION),
753 : errmsg_internal("primary reported unexpected file name for timeline history file of timeline %u",
754 : tli)));
755 :
756 : /*
757 : * Write the file to pg_wal.
758 : */
759 0 : writeTimeLineHistoryFile(tli, content, len);
760 :
761 : /*
762 : * Mark the streamed history file as ready for archiving
763 : * if archive_mode is always.
764 : */
765 0 : if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS)
766 0 : XLogArchiveForceDone(fname);
767 : else
768 0 : XLogArchiveNotify(fname);
769 :
770 0 : pfree(fname);
771 0 : pfree(content);
772 : }
773 : }
774 0 : }
775 :
776 : /*
777 : * Mark us as STOPPED in shared memory at exit.
778 : */
779 : static void
780 0 : WalRcvDie(int code, Datum arg)
781 : {
782 0 : WalRcvData *walrcv = WalRcv;
783 :
784 : /* Ensure that all WAL records received are flushed to disk */
785 0 : XLogWalRcvFlush(true);
786 :
787 : /* Mark ourselves inactive in shared memory */
788 0 : SpinLockAcquire(&walrcv->mutex);
789 0 : Assert(walrcv->walRcvState == WALRCV_STREAMING ||
790 : walrcv->walRcvState == WALRCV_RESTARTING ||
791 : walrcv->walRcvState == WALRCV_STARTING ||
792 : walrcv->walRcvState == WALRCV_WAITING ||
793 : walrcv->walRcvState == WALRCV_STOPPING);
794 0 : Assert(walrcv->pid == MyProcPid);
795 0 : walrcv->walRcvState = WALRCV_STOPPED;
796 0 : walrcv->pid = 0;
797 0 : walrcv->ready_to_display = false;
798 0 : walrcv->latch = NULL;
799 0 : SpinLockRelease(&walrcv->mutex);
800 :
801 : /* Terminate the connection gracefully. */
802 0 : if (wrconn != NULL)
803 0 : walrcv_disconnect(wrconn);
804 :
805 : /* Wake up the startup process to notice promptly that we're gone */
806 0 : WakeupRecovery();
807 0 : }
808 :
809 : /* SIGHUP: set flag to re-read config file at next convenient time */
810 : static void
811 0 : WalRcvSigHupHandler(SIGNAL_ARGS)
812 : {
813 0 : got_SIGHUP = true;
814 0 : }
815 :
816 :
817 : /* SIGTERM: set flag for ProcessWalRcvInterrupts */
818 : static void
819 0 : WalRcvShutdownHandler(SIGNAL_ARGS)
820 : {
821 0 : int save_errno = errno;
822 :
823 0 : got_SIGTERM = true;
824 :
825 0 : if (WalRcv->latch)
826 0 : SetLatch(WalRcv->latch);
827 :
828 0 : errno = save_errno;
829 0 : }
830 :
831 : /*
832 : * Accept the message from XLOG stream, and process it.
833 : */
834 : static void
835 0 : XLogWalRcvProcessMsg(unsigned char type, char *buf, Size len)
836 : {
837 : int hdrlen;
838 : XLogRecPtr dataStart;
839 : XLogRecPtr walEnd;
840 : TimestampTz sendTime;
841 : bool replyRequested;
842 :
843 0 : resetStringInfo(&incoming_message);
844 :
845 0 : switch (type)
846 : {
847 : case 'w': /* WAL records */
848 : {
849 : /* copy message to StringInfo */
850 0 : hdrlen = sizeof(int64) + sizeof(int64) + sizeof(int64);
851 0 : if (len < hdrlen)
852 0 : ereport(ERROR,
853 : (errcode(ERRCODE_PROTOCOL_VIOLATION),
854 : errmsg_internal("invalid WAL message received from primary")));
855 0 : appendBinaryStringInfo(&incoming_message, buf, hdrlen);
856 :
857 : /* read the fields */
858 0 : dataStart = pq_getmsgint64(&incoming_message);
859 0 : walEnd = pq_getmsgint64(&incoming_message);
860 0 : sendTime = pq_getmsgint64(&incoming_message);
861 0 : ProcessWalSndrMessage(walEnd, sendTime);
862 :
863 0 : buf += hdrlen;
864 0 : len -= hdrlen;
865 0 : XLogWalRcvWrite(buf, len, dataStart);
866 0 : break;
867 : }
868 : case 'k': /* Keepalive */
869 : {
870 : /* copy message to StringInfo */
871 0 : hdrlen = sizeof(int64) + sizeof(int64) + sizeof(char);
872 0 : if (len != hdrlen)
873 0 : ereport(ERROR,
874 : (errcode(ERRCODE_PROTOCOL_VIOLATION),
875 : errmsg_internal("invalid keepalive message received from primary")));
876 0 : appendBinaryStringInfo(&incoming_message, buf, hdrlen);
877 :
878 : /* read the fields */
879 0 : walEnd = pq_getmsgint64(&incoming_message);
880 0 : sendTime = pq_getmsgint64(&incoming_message);
881 0 : replyRequested = pq_getmsgbyte(&incoming_message);
882 :
883 0 : ProcessWalSndrMessage(walEnd, sendTime);
884 :
885 : /* If the primary requested a reply, send one immediately */
886 0 : if (replyRequested)
887 0 : XLogWalRcvSendReply(true, false);
888 0 : break;
889 : }
890 : default:
891 0 : ereport(ERROR,
892 : (errcode(ERRCODE_PROTOCOL_VIOLATION),
893 : errmsg_internal("invalid replication message type %d",
894 : type)));
895 : }
896 0 : }
897 :
898 : /*
899 : * Write XLOG data to disk.
900 : */
901 : static void
902 0 : XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
903 : {
904 : int startoff;
905 : int byteswritten;
906 :
907 0 : while (nbytes > 0)
908 : {
909 : int segbytes;
910 :
911 0 : if (recvFile < 0 || !XLByteInSeg(recptr, recvSegNo, wal_segment_size))
912 : {
913 : bool use_existent;
914 :
915 : /*
916 : * fsync() and close current file before we switch to next one. We
917 : * would otherwise have to reopen this file to fsync it later
918 : */
919 0 : if (recvFile >= 0)
920 : {
921 : char xlogfname[MAXFNAMELEN];
922 :
923 0 : XLogWalRcvFlush(false);
924 :
925 0 : XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size);
926 :
927 : /*
928 : * XLOG segment files will be re-read by recovery in startup
929 : * process soon, so we don't advise the OS to release cache
930 : * pages associated with the file like XLogFileClose() does.
931 : */
932 0 : if (close(recvFile) != 0)
933 0 : ereport(PANIC,
934 : (errcode_for_file_access(),
935 : errmsg("could not close log segment %s: %m",
936 : xlogfname)));
937 :
938 : /*
939 : * Create .done file forcibly to prevent the streamed segment
940 : * from being archived later.
941 : */
942 0 : if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS)
943 0 : XLogArchiveForceDone(xlogfname);
944 : else
945 0 : XLogArchiveNotify(xlogfname);
946 : }
947 0 : recvFile = -1;
948 :
949 : /* Create/use new log file */
950 0 : XLByteToSeg(recptr, recvSegNo, wal_segment_size);
951 0 : use_existent = true;
952 0 : recvFile = XLogFileInit(recvSegNo, &use_existent, true);
953 0 : recvFileTLI = ThisTimeLineID;
954 : }
955 :
956 : /* Calculate the start offset of the received logs */
957 0 : startoff = XLogSegmentOffset(recptr, wal_segment_size);
958 :
959 0 : if (startoff + nbytes > wal_segment_size)
960 0 : segbytes = wal_segment_size - startoff;
961 : else
962 0 : segbytes = nbytes;
963 :
964 : /* OK to write the logs */
965 0 : errno = 0;
966 :
967 0 : byteswritten = pg_pwrite(recvFile, buf, segbytes, (off_t) startoff);
968 0 : if (byteswritten <= 0)
969 : {
970 : char xlogfname[MAXFNAMELEN];
971 : int save_errno;
972 :
973 : /* if write didn't set errno, assume no disk space */
974 0 : if (errno == 0)
975 0 : errno = ENOSPC;
976 :
977 0 : save_errno = errno;
978 0 : XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size);
979 0 : errno = save_errno;
980 0 : ereport(PANIC,
981 : (errcode_for_file_access(),
982 : errmsg("could not write to log segment %s "
983 : "at offset %u, length %lu: %m",
984 : xlogfname, startoff, (unsigned long) segbytes)));
985 : }
986 :
987 : /* Update state for write */
988 0 : recptr += byteswritten;
989 :
990 0 : nbytes -= byteswritten;
991 0 : buf += byteswritten;
992 :
993 0 : LogstreamResult.Write = recptr;
994 : }
995 :
996 : /* Update shared-memory status */
997 0 : pg_atomic_write_u64(&WalRcv->writtenUpto, LogstreamResult.Write);
998 0 : }
999 :
1000 : /*
1001 : * Flush the log to disk.
1002 : *
1003 : * If we're in the midst of dying, it's unwise to do anything that might throw
1004 : * an error, so we skip sending a reply in that case.
1005 : */
1006 : static void
1007 0 : XLogWalRcvFlush(bool dying)
1008 : {
1009 0 : if (LogstreamResult.Flush < LogstreamResult.Write)
1010 : {
1011 0 : WalRcvData *walrcv = WalRcv;
1012 :
1013 0 : issue_xlog_fsync(recvFile, recvSegNo);
1014 :
1015 0 : LogstreamResult.Flush = LogstreamResult.Write;
1016 :
1017 : /* Update shared-memory status */
1018 0 : SpinLockAcquire(&walrcv->mutex);
1019 0 : if (walrcv->flushedUpto < LogstreamResult.Flush)
1020 : {
1021 0 : walrcv->latestChunkStart = walrcv->flushedUpto;
1022 0 : walrcv->flushedUpto = LogstreamResult.Flush;
1023 0 : walrcv->receivedTLI = ThisTimeLineID;
1024 : }
1025 0 : SpinLockRelease(&walrcv->mutex);
1026 :
1027 : /* Signal the startup process and walsender that new WAL has arrived */
1028 0 : WakeupRecovery();
1029 0 : if (AllowCascadeReplication())
1030 0 : WalSndWakeup();
1031 :
1032 : /* Report XLOG streaming progress in PS display */
1033 0 : if (update_process_title)
1034 : {
1035 : char activitymsg[50];
1036 :
1037 0 : snprintf(activitymsg, sizeof(activitymsg), "streaming %X/%X",
1038 0 : (uint32) (LogstreamResult.Write >> 32),
1039 0 : (uint32) LogstreamResult.Write);
1040 0 : set_ps_display(activitymsg);
1041 : }
1042 :
1043 : /* Also let the primary know that we made some progress */
1044 0 : if (!dying)
1045 : {
1046 0 : XLogWalRcvSendReply(false, false);
1047 0 : XLogWalRcvSendHSFeedback(false);
1048 : }
1049 : }
1050 0 : }
1051 :
1052 : /*
1053 : * Send reply message to primary, indicating our current WAL locations, oldest
1054 : * xmin and the current time.
1055 : *
1056 : * If 'force' is not set, the message is only sent if enough time has
1057 : * passed since last status update to reach wal_receiver_status_interval.
1058 : * If wal_receiver_status_interval is disabled altogether and 'force' is
1059 : * false, this is a no-op.
1060 : *
1061 : * If 'requestReply' is true, requests the server to reply immediately upon
1062 : * receiving this message. This is used for heartbeats, when approaching
1063 : * wal_receiver_timeout.
1064 : */
1065 : static void
1066 0 : XLogWalRcvSendReply(bool force, bool requestReply)
1067 : {
1068 : static XLogRecPtr writePtr = 0;
1069 : static XLogRecPtr flushPtr = 0;
1070 : XLogRecPtr applyPtr;
1071 : static TimestampTz sendTime = 0;
1072 : TimestampTz now;
1073 :
1074 : /*
1075 : * If the user doesn't want status to be reported to the primary, be sure
1076 : * to exit before doing anything at all.
1077 : */
1078 0 : if (!force && wal_receiver_status_interval <= 0)
1079 0 : return;
1080 :
1081 : /* Get current timestamp. */
1082 0 : now = GetCurrentTimestamp();
1083 :
1084 : /*
1085 : * We can compare the write and flush positions to the last message we
1086 : * sent without taking any lock, but the apply position requires a spin
1087 : * lock, so we don't check that unless something else has changed or 10
1088 : * seconds have passed. This means that the apply WAL location will
1089 : * appear, from the primary's point of view, to lag slightly, but since
1090 : * this is only for reporting purposes and only on idle systems, that's
1091 : * probably OK.
1092 : */
1093 0 : if (!force
1094 0 : && writePtr == LogstreamResult.Write
1095 0 : && flushPtr == LogstreamResult.Flush
1096 0 : && !TimestampDifferenceExceeds(sendTime, now,
1097 : wal_receiver_status_interval * 1000))
1098 0 : return;
1099 0 : sendTime = now;
1100 :
1101 : /* Construct a new message */
1102 0 : writePtr = LogstreamResult.Write;
1103 0 : flushPtr = LogstreamResult.Flush;
1104 0 : applyPtr = GetXLogReplayRecPtr(NULL);
1105 :
1106 0 : resetStringInfo(&reply_message);
1107 0 : pq_sendbyte(&reply_message, 'r');
1108 0 : pq_sendint64(&reply_message, writePtr);
1109 0 : pq_sendint64(&reply_message, flushPtr);
1110 0 : pq_sendint64(&reply_message, applyPtr);
1111 0 : pq_sendint64(&reply_message, GetCurrentTimestamp());
1112 0 : pq_sendbyte(&reply_message, requestReply ? 1 : 0);
1113 :
1114 : /* Send it */
1115 0 : elog(DEBUG2, "sending write %X/%X flush %X/%X apply %X/%X%s",
1116 : (uint32) (writePtr >> 32), (uint32) writePtr,
1117 : (uint32) (flushPtr >> 32), (uint32) flushPtr,
1118 : (uint32) (applyPtr >> 32), (uint32) applyPtr,
1119 : requestReply ? " (reply requested)" : "");
1120 :
1121 0 : walrcv_send(wrconn, reply_message.data, reply_message.len);
1122 : }
1123 :
1124 : /*
1125 : * Send hot standby feedback message to primary, plus the current time,
1126 : * in case they don't have a watch.
1127 : *
1128 : * If the user disables feedback, send one final message to tell sender
1129 : * to forget about the xmin on this standby. We also send this message
1130 : * on first connect because a previous connection might have set xmin
1131 : * on a replication slot. (If we're not using a slot it's harmless to
1132 : * send a feedback message explicitly setting InvalidTransactionId).
1133 : */
1134 : static void
1135 0 : XLogWalRcvSendHSFeedback(bool immed)
1136 : {
1137 : TimestampTz now;
1138 : FullTransactionId nextFullXid;
1139 : TransactionId nextXid;
1140 : uint32 xmin_epoch,
1141 : catalog_xmin_epoch;
1142 : TransactionId xmin,
1143 : catalog_xmin;
1144 : static TimestampTz sendTime = 0;
1145 :
1146 : /* initially true so we always send at least one feedback message */
1147 : static bool primary_has_standby_xmin = true;
1148 :
1149 : /*
1150 : * If the user doesn't want status to be reported to the primary, be sure
1151 : * to exit before doing anything at all.
1152 : */
1153 0 : if ((wal_receiver_status_interval <= 0 || !hot_standby_feedback) &&
1154 0 : !primary_has_standby_xmin)
1155 0 : return;
1156 :
1157 : /* Get current timestamp. */
1158 0 : now = GetCurrentTimestamp();
1159 :
1160 0 : if (!immed)
1161 : {
1162 : /*
1163 : * Send feedback at most once per wal_receiver_status_interval.
1164 : */
1165 0 : if (!TimestampDifferenceExceeds(sendTime, now,
1166 : wal_receiver_status_interval * 1000))
1167 0 : return;
1168 0 : sendTime = now;
1169 : }
1170 :
1171 : /*
1172 : * If Hot Standby is not yet accepting connections there is nothing to
1173 : * send. Check this after the interval has expired to reduce number of
1174 : * calls.
1175 : *
1176 : * Bailing out here also ensures that we don't send feedback until we've
1177 : * read our own replication slot state, so we don't tell the primary to
1178 : * discard needed xmin or catalog_xmin from any slots that may exist on
1179 : * this replica.
1180 : */
1181 0 : if (!HotStandbyActive())
1182 0 : return;
1183 :
1184 : /*
1185 : * Make the expensive call to get the oldest xmin once we are certain
1186 : * everything else has been checked.
1187 : */
1188 0 : if (hot_standby_feedback)
1189 : {
1190 0 : GetReplicationHorizons(&xmin, &catalog_xmin);
1191 : }
1192 : else
1193 : {
1194 0 : xmin = InvalidTransactionId;
1195 0 : catalog_xmin = InvalidTransactionId;
1196 : }
1197 :
1198 : /*
1199 : * Get epoch and adjust if nextXid and oldestXmin are different sides of
1200 : * the epoch boundary.
1201 : */
1202 0 : nextFullXid = ReadNextFullTransactionId();
1203 0 : nextXid = XidFromFullTransactionId(nextFullXid);
1204 0 : xmin_epoch = EpochFromFullTransactionId(nextFullXid);
1205 0 : catalog_xmin_epoch = xmin_epoch;
1206 0 : if (nextXid < xmin)
1207 0 : xmin_epoch--;
1208 0 : if (nextXid < catalog_xmin)
1209 0 : catalog_xmin_epoch--;
1210 :
1211 0 : elog(DEBUG2, "sending hot standby feedback xmin %u epoch %u catalog_xmin %u catalog_xmin_epoch %u",
1212 : xmin, xmin_epoch, catalog_xmin, catalog_xmin_epoch);
1213 :
1214 : /* Construct the message and send it. */
1215 0 : resetStringInfo(&reply_message);
1216 0 : pq_sendbyte(&reply_message, 'h');
1217 0 : pq_sendint64(&reply_message, GetCurrentTimestamp());
1218 0 : pq_sendint32(&reply_message, xmin);
1219 0 : pq_sendint32(&reply_message, xmin_epoch);
1220 0 : pq_sendint32(&reply_message, catalog_xmin);
1221 0 : pq_sendint32(&reply_message, catalog_xmin_epoch);
1222 0 : walrcv_send(wrconn, reply_message.data, reply_message.len);
1223 0 : if (TransactionIdIsValid(xmin) || TransactionIdIsValid(catalog_xmin))
1224 0 : primary_has_standby_xmin = true;
1225 : else
1226 0 : primary_has_standby_xmin = false;
1227 : }
1228 :
1229 : /*
1230 : * Update shared memory status upon receiving a message from primary.
1231 : *
1232 : * 'walEnd' and 'sendTime' are the end-of-WAL and timestamp of the latest
1233 : * message, reported by primary.
1234 : */
1235 : static void
1236 0 : ProcessWalSndrMessage(XLogRecPtr walEnd, TimestampTz sendTime)
1237 : {
1238 0 : WalRcvData *walrcv = WalRcv;
1239 :
1240 0 : TimestampTz lastMsgReceiptTime = GetCurrentTimestamp();
1241 :
1242 : /* Update shared-memory status */
1243 0 : SpinLockAcquire(&walrcv->mutex);
1244 0 : if (walrcv->latestWalEnd < walEnd)
1245 0 : walrcv->latestWalEndTime = sendTime;
1246 0 : walrcv->latestWalEnd = walEnd;
1247 0 : walrcv->lastMsgSendTime = sendTime;
1248 0 : walrcv->lastMsgReceiptTime = lastMsgReceiptTime;
1249 0 : SpinLockRelease(&walrcv->mutex);
1250 :
1251 0 : if (log_min_messages <= DEBUG2)
1252 : {
1253 : char *sendtime;
1254 : char *receipttime;
1255 : int applyDelay;
1256 :
1257 : /* Copy because timestamptz_to_str returns a static buffer */
1258 0 : sendtime = pstrdup(timestamptz_to_str(sendTime));
1259 0 : receipttime = pstrdup(timestamptz_to_str(lastMsgReceiptTime));
1260 0 : applyDelay = GetReplicationApplyDelay();
1261 :
1262 : /* apply delay is not available */
1263 0 : if (applyDelay == -1)
1264 0 : elog(DEBUG2, "sendtime %s receipttime %s replication apply delay (N/A) transfer latency %d ms",
1265 : sendtime,
1266 : receipttime,
1267 : GetReplicationTransferLatency());
1268 : else
1269 0 : elog(DEBUG2, "sendtime %s receipttime %s replication apply delay %d ms transfer latency %d ms",
1270 : sendtime,
1271 : receipttime,
1272 : applyDelay,
1273 : GetReplicationTransferLatency());
1274 :
1275 0 : pfree(sendtime);
1276 0 : pfree(receipttime);
1277 : }
1278 0 : }
1279 :
1280 : /*
1281 : * Wake up the walreceiver main loop.
1282 : *
1283 : * This is called by the startup process whenever interesting xlog records
1284 : * are applied, so that walreceiver can check if it needs to send an apply
1285 : * notification back to the primary which may be waiting in a COMMIT with
1286 : * synchronous_commit = remote_apply.
1287 : */
1288 : void
1289 0 : WalRcvForceReply(void)
1290 : {
1291 : Latch *latch;
1292 :
1293 0 : WalRcv->force_reply = true;
1294 : /* fetching the latch pointer might not be atomic, so use spinlock */
1295 0 : SpinLockAcquire(&WalRcv->mutex);
1296 0 : latch = WalRcv->latch;
1297 0 : SpinLockRelease(&WalRcv->mutex);
1298 0 : if (latch)
1299 0 : SetLatch(latch);
1300 0 : }
1301 :
1302 : /*
1303 : * Return a string constant representing the state. This is used
1304 : * in system functions and views, and should *not* be translated.
1305 : */
1306 : static const char *
1307 0 : WalRcvGetStateString(WalRcvState state)
1308 : {
1309 0 : switch (state)
1310 : {
1311 : case WALRCV_STOPPED:
1312 0 : return "stopped";
1313 : case WALRCV_STARTING:
1314 0 : return "starting";
1315 : case WALRCV_STREAMING:
1316 0 : return "streaming";
1317 : case WALRCV_WAITING:
1318 0 : return "waiting";
1319 : case WALRCV_RESTARTING:
1320 0 : return "restarting";
1321 : case WALRCV_STOPPING:
1322 0 : return "stopping";
1323 : }
1324 0 : return "UNKNOWN";
1325 : }
1326 :
1327 : /*
1328 : * Returns activity of WAL receiver, including pid, state and xlog locations
1329 : * received from the WAL sender of another server.
1330 : */
1331 : Datum
1332 0 : pg_stat_get_wal_receiver(PG_FUNCTION_ARGS)
1333 : {
1334 : TupleDesc tupdesc;
1335 : Datum *values;
1336 : bool *nulls;
1337 : int pid;
1338 : bool ready_to_display;
1339 : WalRcvState state;
1340 : XLogRecPtr receive_start_lsn;
1341 : TimeLineID receive_start_tli;
1342 : XLogRecPtr written_lsn;
1343 : XLogRecPtr flushed_lsn;
1344 : TimeLineID received_tli;
1345 : TimestampTz last_send_time;
1346 : TimestampTz last_receipt_time;
1347 : XLogRecPtr latest_end_lsn;
1348 : TimestampTz latest_end_time;
1349 : char sender_host[NI_MAXHOST];
1350 0 : int sender_port = 0;
1351 : char slotname[NAMEDATALEN];
1352 : char conninfo[MAXCONNINFO];
1353 :
1354 : /* Take a lock to ensure value consistency */
1355 0 : SpinLockAcquire(&WalRcv->mutex);
1356 0 : pid = (int) WalRcv->pid;
1357 0 : ready_to_display = WalRcv->ready_to_display;
1358 0 : state = WalRcv->walRcvState;
1359 0 : receive_start_lsn = WalRcv->receiveStart;
1360 0 : receive_start_tli = WalRcv->receiveStartTLI;
1361 0 : written_lsn = pg_atomic_read_u64(&WalRcv->writtenUpto);
1362 0 : flushed_lsn = WalRcv->flushedUpto;
1363 0 : received_tli = WalRcv->receivedTLI;
1364 0 : last_send_time = WalRcv->lastMsgSendTime;
1365 0 : last_receipt_time = WalRcv->lastMsgReceiptTime;
1366 0 : latest_end_lsn = WalRcv->latestWalEnd;
1367 0 : latest_end_time = WalRcv->latestWalEndTime;
1368 0 : strlcpy(slotname, (char *) WalRcv->slotname, sizeof(slotname));
1369 0 : strlcpy(sender_host, (char *) WalRcv->sender_host, sizeof(sender_host));
1370 0 : sender_port = WalRcv->sender_port;
1371 0 : strlcpy(conninfo, (char *) WalRcv->conninfo, sizeof(conninfo));
1372 0 : SpinLockRelease(&WalRcv->mutex);
1373 :
1374 : /*
1375 : * No WAL receiver (or not ready yet), just return a tuple with NULL
1376 : * values
1377 : */
1378 0 : if (pid == 0 || !ready_to_display)
1379 0 : PG_RETURN_NULL();
1380 :
1381 : /* determine result type */
1382 0 : if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1383 0 : elog(ERROR, "return type must be a row type");
1384 :
1385 0 : values = palloc0(sizeof(Datum) * tupdesc->natts);
1386 0 : nulls = palloc0(sizeof(bool) * tupdesc->natts);
1387 :
1388 : /* Fetch values */
1389 0 : values[0] = Int32GetDatum(pid);
1390 :
1391 0 : if (!is_member_of_role(GetUserId(), DEFAULT_ROLE_READ_ALL_STATS))
1392 : {
1393 : /*
1394 : * Only superusers and members of pg_read_all_stats can see details.
1395 : * Other users only get the pid value to know whether it is a WAL
1396 : * receiver, but no details.
1397 : */
1398 0 : MemSet(&nulls[1], true, sizeof(bool) * (tupdesc->natts - 1));
1399 : }
1400 : else
1401 : {
1402 0 : values[1] = CStringGetTextDatum(WalRcvGetStateString(state));
1403 :
1404 0 : if (XLogRecPtrIsInvalid(receive_start_lsn))
1405 0 : nulls[2] = true;
1406 : else
1407 0 : values[2] = LSNGetDatum(receive_start_lsn);
1408 0 : values[3] = Int32GetDatum(receive_start_tli);
1409 0 : if (XLogRecPtrIsInvalid(written_lsn))
1410 0 : nulls[4] = true;
1411 : else
1412 0 : values[4] = LSNGetDatum(written_lsn);
1413 0 : if (XLogRecPtrIsInvalid(flushed_lsn))
1414 0 : nulls[5] = true;
1415 : else
1416 0 : values[5] = LSNGetDatum(flushed_lsn);
1417 0 : values[6] = Int32GetDatum(received_tli);
1418 0 : if (last_send_time == 0)
1419 0 : nulls[7] = true;
1420 : else
1421 0 : values[7] = TimestampTzGetDatum(last_send_time);
1422 0 : if (last_receipt_time == 0)
1423 0 : nulls[8] = true;
1424 : else
1425 0 : values[8] = TimestampTzGetDatum(last_receipt_time);
1426 0 : if (XLogRecPtrIsInvalid(latest_end_lsn))
1427 0 : nulls[9] = true;
1428 : else
1429 0 : values[9] = LSNGetDatum(latest_end_lsn);
1430 0 : if (latest_end_time == 0)
1431 0 : nulls[10] = true;
1432 : else
1433 0 : values[10] = TimestampTzGetDatum(latest_end_time);
1434 0 : if (*slotname == '\0')
1435 0 : nulls[11] = true;
1436 : else
1437 0 : values[11] = CStringGetTextDatum(slotname);
1438 0 : if (*sender_host == '\0')
1439 0 : nulls[12] = true;
1440 : else
1441 0 : values[12] = CStringGetTextDatum(sender_host);
1442 0 : if (sender_port == 0)
1443 0 : nulls[13] = true;
1444 : else
1445 0 : values[13] = Int32GetDatum(sender_port);
1446 0 : if (*conninfo == '\0')
1447 0 : nulls[14] = true;
1448 : else
1449 0 : values[14] = CStringGetTextDatum(conninfo);
1450 : }
1451 :
1452 : /* Returns the record as Datum */
1453 0 : PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
1454 : }
|