Re: Drop replslot after pgstat_shutdown cause assert coredump - Mailing list pgsql-hackers
| From | Kyotaro Horiguchi |
|---|---|
| Subject | Re: Drop replslot after pgstat_shutdown cause assert coredump |
| Date | |
| Msg-id | 20211022.114308.1275563786142184967.horikyota.ntt@gmail.com Whole thread Raw |
| In response to | Re: Drop replslot after pgstat_shutdown cause assert coredump (Kyotaro Horiguchi <horikyota.ntt@gmail.com>) |
| Responses |
Re: Drop replslot after pgstat_shutdown cause assert coredump
|
| List | pgsql-hackers |
I said:
> Considering the coming shared-memory based stats collector, pgstat
> must be shutdown before shared memory shutdown. Every operation that
> requires stats collector also must be shut down before the pgstat
> shutdown. A naive solution would be having before-pgstat-shutdown hook
> but I'm not sure it's the right direction.
For this particular issue, we can add an explicit initilization phase
of replication slot per backend, which simply registers before_shmem
callback. It would work fine unless we carelessly place the
initialization before pgstat_initialize() (not pgstat_init()) call.
(Honestly, I haven't been able to reproduce the issue itself for
myself yet..)
> > on_proc_exit_list (I'm not sure if this change is safe, though).
> > Or maybe pgstat logic for replication slot drop needs to be
> > overhauled.
>
> I think we don't want to lose the stats numbers of the to-be-dropped
> slot. So the slot-drop must happen before pgstat shutdown.
I haven't sought other similar issues. I'm going to check it if they,
if any, can be fixe the same way.
regards.
--
Kyotaro Horiguchi
NTT Open Source Software Center
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index b7d0fbaefd..13762f82af 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -306,6 +306,8 @@ static bool pgstat_is_initialized = false;
static bool pgstat_is_shutdown = false;
#endif
+/* per-backend variable for assertion */
+bool pgstat_initialized PG_USED_FOR_ASSERTS_ONLY = false;
/* ----------
* Local function forward declarations
@@ -3036,6 +3038,7 @@ pgstat_initialize(void)
/* Set up a process-exit hook to clean up */
before_shmem_exit(pgstat_shutdown_hook, 0);
+ pgstat_initialized = true;
#ifdef USE_ASSERT_CHECKING
pgstat_is_initialized = true;
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 1c6c0c7ce2..e0430aefa9 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -46,6 +46,7 @@
#include "pgstat.h"
#include "replication/slot.h"
#include "storage/fd.h"
+#include "storage/ipc.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "utils/builtins.h"
@@ -160,6 +161,33 @@ ReplicationSlotsShmemInit(void)
}
}
+/*
+ * Exit hook to cleanup replication slots.
+ */
+static void
+ReplicationSlotShutdown(int code, Datum arg)
+{
+ /* Make sure active replication slots are released */
+ if (MyReplicationSlot != NULL)
+ ReplicationSlotRelease();
+
+ /* Also cleanup all the temporary slots. */
+ ReplicationSlotCleanup();
+}
+
+/*
+ * Initialize of replication slot facility per backend.
+ */
+void
+ReplicationSlotInit(void)
+{
+ if (max_replication_slots > 0)
+ {
+ assert_pgstat_initialized();
+ before_shmem_exit(ReplicationSlotShutdown, (Datum) 0);
+ }
+}
+
/*
* Check whether the passed slot name is valid and report errors at elevel.
*
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index b7d9da0aa9..b593ec8964 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -41,7 +41,6 @@
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
-#include "replication/slot.h"
#include "replication/syncrep.h"
#include "replication/walsender.h"
#include "storage/condition_variable.h"
@@ -847,13 +846,6 @@ ProcKill(int code, Datum arg)
/* Cancel any pending condition variable sleep, too */
ConditionVariableCancelSleep();
- /* Make sure active replication slots are released */
- if (MyReplicationSlot != NULL)
- ReplicationSlotRelease();
-
- /* Also cleanup all the temporary slots. */
- ReplicationSlotCleanup();
-
/*
* Detach from any lock group of which we are a member. If the leader
* exist before all other group members, its PGPROC will remain allocated
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 78bc64671e..dd83864b54 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -40,6 +40,7 @@
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "postmaster/postmaster.h"
+#include "replication/slot.h"
#include "replication/walsender.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
@@ -531,6 +532,12 @@ BaseInit(void)
*/
pgstat_initialize();
+ /*
+ * Initialize replication slot. This must be after pgstat_initialize() so
+ * that the cleanup happnes before the shutdown of pgstat facility.
+ */
+ ReplicationSlotInit();
+
/* Do local initialization of storage and buffer managers */
InitSync();
smgrinit();
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index bcd3588ea2..f06810c115 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -992,6 +992,14 @@ extern PgStat_Counter pgStatTransactionIdleTime;
*/
extern SessionEndType pgStatSessionEndCause;
+/*
+ * modules requires pgstat required to install their before-shmem hook after
+ * pgstat. This variable is used to make sure that.
+ */
+extern bool pgstat_initialized;
+#define assert_pgstat_initialized() Assert (pgstat_initialized);
+
+
/* ----------
* Functions called from postmaster
* ----------
diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h
index 53d773ccff..124d107662 100644
--- a/src/include/replication/slot.h
+++ b/src/include/replication/slot.h
@@ -193,6 +193,9 @@ extern PGDLLIMPORT int max_replication_slots;
extern Size ReplicationSlotsShmemSize(void);
extern void ReplicationSlotsShmemInit(void);
+/* per-backend initialization */
+extern void ReplicationSlotInit(void);
+
/* management of individual slots */
extern void ReplicationSlotCreate(const char *name, bool db_specific,
ReplicationSlotPersistency p, bool two_phase);
pgsql-hackers by date: