From 90c03545c09d29e9daf64e9151047bdd2a93348e Mon Sep 17 00:00:00 2001 From: Shlok Kyal Date: Tue, 9 Jan 2024 20:53:47 +0530 Subject: [PATCH v1] Restrict pg_subscriber to standby node Earlier pg_subscriber can run on normal backup cluster and the command gets stuck. With this patch we are restricting pg_subscriber to run only for standby server. Also added a timeout of 60 seconds so that the process ends if it get stuck. --- src/bin/pg_basebackup/pg_subscriber.c | 59 ++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/src/bin/pg_basebackup/pg_subscriber.c b/src/bin/pg_basebackup/pg_subscriber.c index b96ce26ed7..25ef10b0e7 100644 --- a/src/bin/pg_basebackup/pg_subscriber.c +++ b/src/bin/pg_basebackup/pg_subscriber.c @@ -72,9 +72,13 @@ static void drop_subscription(PGconn *conn, LogicalRepInfo *dbinfo); static void set_replication_progress(PGconn *conn, LogicalRepInfo *dbinfo, const char *lsn); static void enable_subscription(PGconn *conn, LogicalRepInfo *dbinfo); +#define DEFAULT_WAIT 60 #define USEC_PER_SEC 1000000 +#define WAITS_PER_SEC 10 /* should divide USEC_PER_SEC evenly */ #define WAIT_INTERVAL 1 /* 1 second */ +static int wait_seconds = DEFAULT_WAIT; + /* Options */ static const char *progname; @@ -756,6 +760,9 @@ wait_for_end_recovery(const char *conninfo) PGconn *conn; PGresult *res; int status = POSTMASTER_STILL_STARTING; + int cnt; + int rc; + char *pg_ctl_cmd; pg_log_info("waiting the postmaster to reach the consistent state"); @@ -763,7 +770,7 @@ wait_for_end_recovery(const char *conninfo) if (conn == NULL) exit(1); - for (;;) + for (cnt = 0; cnt < wait_seconds * WAITS_PER_SEC; cnt++) { bool in_recovery; @@ -796,11 +803,25 @@ wait_for_end_recovery(const char *conninfo) } /* Keep waiting. */ - pg_usleep(WAIT_INTERVAL * USEC_PER_SEC); + pg_usleep(USEC_PER_SEC / WAITS_PER_SEC); } disconnect_database(conn); + /* + * if timeout is reached exit the pg_subscriber and stop the standby node + */ + if (cnt >= wait_seconds * WAITS_PER_SEC) + { + pg_log_error("recovery timed out"); + + pg_ctl_cmd = psprintf("\"%s\" stop -D \"%s\" -s", pg_ctl_path, subscriber_dir); + rc = system(pg_ctl_cmd); + pg_ctl_status(pg_ctl_cmd, rc, 0); + + exit(1); + } + if (status == POSTMASTER_STILL_STARTING) { pg_log_error("server did not end recovery"); @@ -1160,6 +1181,7 @@ main(int argc, char **argv) struct stat statbuf; PGconn *conn; + PGresult *res; char *consistent_lsn; PQExpBuffer recoveryconfcontents = NULL; @@ -1167,6 +1189,7 @@ main(int argc, char **argv) char pidfile[MAXPGPATH]; int i; + bool in_recovery; pg_logging_init(argv[0]); pg_logging_set_level(PG_LOG_WARNING); @@ -1340,6 +1363,38 @@ main(int argc, char **argv) /* subscriber PID file. */ snprintf(pidfile, MAXPGPATH, "%s/postmaster.pid", subscriber_dir); + /* + * Exit the pg_subscriber if the node is not a standby server. + */ + conn = connect_database(dbinfo[0].subconninfo); + if (conn == NULL) + exit(1); + + res = PQexec(conn, "SELECT pg_catalog.pg_is_in_recovery()"); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + pg_log_error("could not obtain recovery progress"); + exit(1); + } + + if (PQntuples(res) != 1) + { + pg_log_error("unexpected result from pg_is_in_recovery function"); + exit(1); + } + + in_recovery = (strcmp(PQgetvalue(res, 0, 0), "t") == 0); + + if (!in_recovery) + { + pg_log_error("pg_subscriber is supported only on standby server"); + exit(1); + } + + PQclear(res); + disconnect_database(conn); + /* * Stop the subscriber if it is a standby server. Before executing the * transformation steps, make sure the subscriber is not running because -- 2.34.1