From e3d1f39138babfebd28a92f4dd8136eeb173d2c2 Mon Sep 17 00:00:00 2001 From: Jehan-Guillaume de Rorthais Date: Fri, 11 Jun 2021 18:17:47 +0200 Subject: [PATCH v1] PoC: Trace wait events to logfile when log_statement_stats=on --- src/backend/storage/lmgr/lwlock.c | 19 +++ src/backend/storage/lmgr/proc.c | 2 + src/backend/tcop/postgres.c | 48 ++++++- src/backend/utils/activity/wait_event.c | 158 ++++++++++++++++++++++++ src/backend/utils/adt/pgstatfuncs.c | 74 +++++++++++ src/include/catalog/pg_proc.dat | 9 ++ src/include/storage/lock.h | 1 + src/include/storage/lwlock.h | 1 + src/include/utils/wait_event.h | 77 ++++++++++++ 9 files changed, 387 insertions(+), 2 deletions(-) diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 55b9d7970e..e9a120805b 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -634,6 +634,25 @@ LWLockNewTrancheId(void) return result; } +/* + * Get a last tranche ID. + */ +int +LWLockGetLastTrancheId(void) +{ + int result; + int *LWLockCounter; + + Assert(!lock_named_request_allowed); + + LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int)); + SpinLockAcquire(ShmemLock); + result = *LWLockCounter; + SpinLockRelease(ShmemLock); + + return result; +} + /* * Register a dynamic tranche name in the lookup table of the current process. * diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 2575ea1ca0..52891c26a9 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -450,6 +450,8 @@ InitProcess(void) /* now that we have a proc, report wait events to shared memory */ pgstat_set_wait_event_storage(&MyProc->wait_event_info); + /* init wait event tracking structure*/ + pgstat_init_waitaccums(); /* * We might be reusing a semaphore that belonged to a failed process. So diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 8cea10c901..25500e8f76 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -207,7 +207,6 @@ static void log_disconnections(int code, Datum arg); static void enable_statement_timeout(void); static void disable_statement_timeout(void); - /* ---------------------------------------------------------------- * routines to obtain user input * ---------------------------------------------------------------- @@ -978,7 +977,10 @@ exec_simple_query(const char *query_string) * results because ResetUsage wasn't called. */ if (save_log_statement_stats) + { ResetUsage(); + pgstat_set_report_waits(); + } /* * Start up a transaction command. All queries generated by the @@ -1308,7 +1310,10 @@ exec_simple_query(const char *query_string) } if (save_log_statement_stats) + { ShowUsage("QUERY STATISTICS"); + pgstat_reset_report_waits(); + } TRACE_POSTGRESQL_QUERY_DONE(query_string); @@ -1346,7 +1351,10 @@ exec_parse_message(const char *query_string, /* string to execute */ set_ps_display("PARSE"); if (save_log_statement_stats) + { ResetUsage(); + pgstat_set_report_waits(); + } ereport(DEBUG2, (errmsg_internal("parse %s: %s", @@ -1567,7 +1575,10 @@ exec_parse_message(const char *query_string, /* string to execute */ } if (save_log_statement_stats) + { ShowUsage("PARSE MESSAGE STATISTICS"); + pgstat_reset_report_waits(); + } debug_query_string = NULL; } @@ -1637,7 +1648,10 @@ exec_bind_message(StringInfo input_message) set_ps_display("BIND"); if (save_log_statement_stats) + { ResetUsage(); + pgstat_set_report_waits(); + } /* * Start up a transaction command so we can call functions etc. (Note that @@ -2029,7 +2043,10 @@ exec_bind_message(StringInfo input_message) } if (save_log_statement_stats) + { ShowUsage("BIND MESSAGE STATISTICS"); + pgstat_reset_report_waits(); + } debug_query_string = NULL; } @@ -2122,7 +2139,10 @@ exec_execute_message(const char *portal_name, long max_rows) set_ps_display(GetCommandTagName(portal->commandTag)); if (save_log_statement_stats) + { ResetUsage(); + pgstat_set_report_waits(); + } BeginCommand(portal->commandTag, dest); @@ -2269,7 +2289,10 @@ exec_execute_message(const char *portal_name, long max_rows) } if (save_log_statement_stats) + { ShowUsage("EXECUTE MESSAGE STATISTICS"); + pgstat_reset_report_waits(); + } debug_query_string = NULL; } @@ -3914,7 +3937,6 @@ process_postgres_switches(int argc, char *argv[], GucContext ctx, #endif } - /* ---------------------------------------------------------------- * PostgresMain * postgres main loop -- all backends, interactive or otherwise start here @@ -4795,10 +4817,13 @@ void ShowUsage(const char *title) { StringInfoData str; + WaitAccumEntry *entry = wa_events.events; struct timeval user, sys; struct timeval elapse_t; struct rusage r; + bool save_log_statement_stats = log_statement_stats; + int i; getrusage(RUSAGE_SELF, &r); gettimeofday(&elapse_t, NULL); @@ -4879,6 +4904,25 @@ ShowUsage(const char *title) r.ru_nvcsw, r.ru_nivcsw); #endif /* HAVE_GETRUSAGE */ + if (save_log_statement_stats) + { + for (i = 0; i < wa_events.num_events; i++, entry++) + { + const char *wait_event_type = NULL; + const char *wait_event = NULL; + + if (entry->calls < 1) + continue; + + wait_event_type = pgstat_get_wait_event_type(entry->wait_event_info); + wait_event = pgstat_get_wait_event(entry->wait_event_info); + + appendStringInfo(&str, "!\t%s/%s %lu calls, %lu us elapsed\n", + wait_event_type, wait_event, entry->calls, + INSTR_TIME_GET_MICROSEC(entry->times)); + } + } + /* remove trailing newline */ if (str.data[str.len - 1] == '\n') str.data[--str.len] = '\0'; diff --git a/src/backend/utils/activity/wait_event.c b/src/backend/utils/activity/wait_event.c index 6baf67740c..61dd125341 100644 --- a/src/backend/utils/activity/wait_event.c +++ b/src/backend/utils/activity/wait_event.c @@ -32,11 +32,127 @@ static const char *pgstat_get_wait_client(WaitEventClient w); static const char *pgstat_get_wait_ipc(WaitEventIPC w); static const char *pgstat_get_wait_timeout(WaitEventTimeout w); static const char *pgstat_get_wait_io(WaitEventIO w); +static inline void accum_pgstat_report_wait_start(); +static inline void accum_pgstat_report_wait_end(); static uint32 local_my_wait_event_info; uint32 *my_wait_event_info = &local_my_wait_event_info; +WaitEvents wa_events; +instr_time waitEventStart; +report_wait_start_function my_pgstat_report_wait_start = standard_pgstat_report_wait_start; +report_wait_end_function my_pgstat_report_wait_end = standard_pgstat_report_wait_end; + +static WaitAccumEntry * +pgstat_get_wait_entry(uint32 wait_event_info) +{ + uint32 classId; + uint16 eventId; + WaitAccumEntry *entry = NULL; + + /* report process as not waiting. */ + if (wait_event_info == 0) + return NULL; + + classId = wait_event_info & 0xFF000000; + eventId = wait_event_info & 0x0000FFFF; + + switch (classId) + { + case PG_WAIT_LWLOCK: + entry = wa_events.wa_lwlock + eventId; + break; + case PG_WAIT_LOCK: + entry = wa_events.wa_lock + eventId; + break; + case PG_WAIT_BUFFER_PIN: + entry = wa_events.wa_buffer_pin + eventId; + break; + case PG_WAIT_ACTIVITY: + entry = wa_events.wa_activity + eventId; + break; + case PG_WAIT_CLIENT: + entry = wa_events.wa_client + eventId; + break; + // FIXME + // case PG_WAIT_EXTENSION: + // break; + case PG_WAIT_IPC: + entry = wa_events.wa_ipc + eventId; + break; + case PG_WAIT_TIMEOUT: + entry = wa_events.wa_timeout + eventId; + break; + case PG_WAIT_IO: + entry = wa_events.wa_io + eventId; + break; + default: + break; + } + + return entry; +} + +void +pgstat_init_waitaccums(void) +{ + WaitAccumEntry *event; + int num_lwlocks; + int i; + + num_lwlocks = LWLockGetLastTrancheId() +1; + wa_events.num_events = num_lwlocks + + PG_WAIT_ACTIVITY_NUM + + PG_WAIT_CLIENT_NUM + + PG_WAIT_IPC_NUM + + PG_WAIT_TIMEOUT_NUM + + PG_WAIT_IO_NUM + + LOCKTAG_NUM + + PG_WAIT_BUFFER_PIN_NUM; + + wa_events.events = MemoryContextAllocZero(TopMemoryContext, + wa_events.num_events + * sizeof(WaitAccumEntry)); + + event = wa_events.events; + + // FIXME: support extensions + + wa_events.wa_lwlock = event; + for (i = 0; i < num_lwlocks; i++, event++) + event->wait_event_info = PG_WAIT_LWLOCK|i; + + wa_events.wa_lock = event; + for (i = 0; i < LOCKTAG_NUM; i++, event++) + event->wait_event_info = PG_WAIT_LOCK|i; + + wa_events.wa_buffer_pin = event; + for (i = 0; i < PG_WAIT_BUFFER_PIN_NUM; i++, event++) + event->wait_event_info = PG_WAIT_BUFFER_PIN|i; + + wa_events.wa_activity = event; + for (i = 0; i < PG_WAIT_ACTIVITY_NUM; i++, event++) + event->wait_event_info = PG_WAIT_ACTIVITY|i; + + wa_events.wa_client = event; + for (i = 0; i < PG_WAIT_CLIENT_NUM; i++, event++) + event->wait_event_info = PG_WAIT_CLIENT|i; + + wa_events.wa_ipc = event; + for (i = 0; i < PG_WAIT_IPC_NUM; i++, event++) + event->wait_event_info = PG_WAIT_IPC|i; + + wa_events.wa_timeout = event; + for (i = 0; i < PG_WAIT_TIMEOUT_NUM; i++, event++) + event->wait_event_info = PG_WAIT_TIMEOUT|i; + + wa_events.wa_io = event; + for (i = 0; i < PG_WAIT_IO_NUM; i++, event++) + event->wait_event_info = PG_WAIT_IO|i; + + pgstat_reset_report_waits(); +} /* * Configure wait event reporting to report wait events to *wait_event_info. @@ -732,3 +848,45 @@ pgstat_get_wait_io(WaitEventIO w) return event_name; } + +static inline void +accum_pgstat_report_wait_start(uint32 wait_event_info) +{ + standard_pgstat_report_wait_start(wait_event_info); + + INSTR_TIME_SET_CURRENT(waitEventStart); +} + +static inline void +accum_pgstat_report_wait_end(void) +{ + WaitAccumEntry *entry; + instr_time diff; + + INSTR_TIME_SET_CURRENT(diff); + INSTR_TIME_SUBTRACT(diff, waitEventStart); + + entry = pgstat_get_wait_entry(*my_wait_event_info); + + if (entry) + { + entry->calls++; + INSTR_TIME_ADD(entry->times, diff); + } + + standard_pgstat_report_wait_end(); +} + +void +pgstat_set_report_waits(void) +{ + my_pgstat_report_wait_start = accum_pgstat_report_wait_start; + my_pgstat_report_wait_end = accum_pgstat_report_wait_end; +} + +void +pgstat_reset_report_waits(void) +{ + my_pgstat_report_wait_start = standard_pgstat_report_wait_start; + my_pgstat_report_wait_end = standard_pgstat_report_wait_end; +} diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 14056f5347..cf4db57bff 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -2380,3 +2380,77 @@ pg_stat_get_replication_slot(PG_FUNCTION_ARGS) /* Returns the record as Datum */ PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); } + +Datum +pg_stat_get_waitaccum(PG_FUNCTION_ARGS) +{ +#define PG_STAT_GET_WAITACCUM_COLS 4 + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupdesc; + Tuplestorestate *tupstore; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + WaitAccumEntry *entry = wa_events.events; + int i; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not " \ + "allowed in this context"))); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; + + MemoryContextSwitchTo(oldcontext); + + for (i = 0; i < wa_events.num_events; i++, entry++) + { + Datum values[PG_STAT_GET_WAITACCUM_COLS]; + bool nulls[PG_STAT_GET_WAITACCUM_COLS]; + const char *wait_event_type = NULL; + const char *wait_event = NULL; + + /* Initialise values and NULL flags arrays */ + MemSet(values, 0, sizeof(values)); + MemSet(nulls, 0, sizeof(nulls)); + + /* Fill values and NULLs */ + { + uint32 raw_wait_event; + + raw_wait_event = UINT32_ACCESS_ONCE(entry->wait_event_info); + wait_event_type = pgstat_get_wait_event_type(raw_wait_event); + wait_event = pgstat_get_wait_event(raw_wait_event); + } + + values[0] = CStringGetTextDatum(wait_event_type); + + values[1] = CStringGetTextDatum(wait_event); + + values[2] = Int64GetDatum(entry->calls); + + values[3] = UInt64GetDatum(INSTR_TIME_GET_MICROSEC(entry->times)); + + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + } + + /* clean up and return the tuplestore */ + tuplestore_donestoring(tupstore); + + return (Datum) 0; +} diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index acbcae4607..b066863a97 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5284,6 +5284,15 @@ proargmodes => '{i,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}', proargnames => '{pid,datid,pid,usesysid,application_name,state,query,wait_event_type,wait_event,xact_start,query_start,backend_start,state_change,client_addr,client_hostname,client_port,backend_xid,backend_xmin,backend_type,ssl,sslversion,sslcipher,sslbits,ssl_client_dn,ssl_client_serial,ssl_issuer_dn,gss_auth,gss_princ,gss_enc,leader_pid,query_id}', prosrc => 'pg_stat_get_activity' }, +{ oid => '8316', + descr => 'statistics: accumulated wait event stats', + proname => 'pg_stat_get_waitaccum', prorows => '200', proisstrict => 'f', + proretset => 't', provolatile => 'v', proparallel => 'r', + prorettype => 'record', proargtypes => '', + proallargtypes => '{text,text,int8,int8}', + proargmodes => '{o,o,o,o}', + proargnames => '{wait_event_type,wait_event,calls,times}', + prosrc => 'pg_stat_get_waitaccum' }, { oid => '3318', descr => 'statistics: information about progress of backends running maintenance command', proname => 'pg_stat_get_progress_info', prorows => '100', proretset => 't', diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index 9b2a421c32..885664fa54 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -152,6 +152,7 @@ typedef enum LockTagType } LockTagType; #define LOCKTAG_LAST_TYPE LOCKTAG_ADVISORY +#define LOCKTAG_NUM LOCKTAG_LAST_TYPE extern const char *const LockTagTypeNames[]; diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index a8f052e484..e6638b2c45 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -154,6 +154,7 @@ extern LWLockPadded *GetNamedLWLockTranche(const char *tranche_name); * registration in the main shared memory segment wouldn't work for that case. */ extern int LWLockNewTrancheId(void); +extern int LWLockGetLastTrancheId(void); extern void LWLockRegisterTranche(int tranche_id, const char *tranche_name); extern void LWLockInitialize(LWLock *lock, int tranche_id); diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h index 6c6ec2e711..9da4b714e2 100644 --- a/src/include/utils/wait_event.h +++ b/src/include/utils/wait_event.h @@ -10,6 +10,8 @@ #ifndef WAIT_EVENT_H #define WAIT_EVENT_H +#include "utils/memutils.h" +#include "portability/instr_time.h" /* ---------- * Wait Classes @@ -25,6 +27,40 @@ #define PG_WAIT_TIMEOUT 0x09000000U #define PG_WAIT_IO 0x0A000000U +/* ---------- + * WaitAccumEntry Entry in backend/background's per-wait_event_info hash table + * ---------- + */ +typedef struct WaitAccumEntry +{ + uint32 wait_event_info; + uint64 calls; + instr_time times; +} WaitAccumEntry; + +typedef struct WaitEvents +{ + uint32 num_events; + WaitAccumEntry *events; + WaitAccumEntry *wa_lwlock; + WaitAccumEntry *wa_lock; + WaitAccumEntry *wa_buffer_pin; + WaitAccumEntry *wa_activity; + WaitAccumEntry *wa_client; + // FIXME WaitAccumEntry *wa_extension; + WaitAccumEntry *wa_ipc; + WaitAccumEntry *wa_timeout; + WaitAccumEntry *wa_io; +} WaitEvents; + +extern WaitEvents wa_events; + +/* + * There's only one entry in PG_WAIT_BUFFER_PIN class: + * PG_WAIT_BUFFER_PIN itself + */ +#define PG_WAIT_BUFFER_PIN_NUM 1 + /* ---------- * Wait Events - Activity * @@ -50,6 +86,9 @@ typedef enum WAIT_EVENT_WAL_WRITER_MAIN } WaitEventActivity; +#define PG_WAIT_ACTIVITY_LAST_TYPE WAIT_EVENT_WAL_WRITER_MAIN +#define PG_WAIT_ACTIVITY_NUM ( WAIT_EVENT_WAL_WRITER_MAIN - PG_WAIT_ACTIVITY ) + /* ---------- * Wait Events - Client * @@ -70,6 +109,9 @@ typedef enum WAIT_EVENT_WAL_SENDER_WRITE_DATA, } WaitEventClient; +#define PG_WAIT_CLIENT_LAST_TYPE WAIT_EVENT_WAL_SENDER_WRITE_DATA +#define PG_WAIT_CLIENT_NUM ( PG_WAIT_CLIENT_LAST_TYPE - PG_WAIT_CLIENT ) + /* ---------- * Wait Events - IPC * @@ -128,6 +170,9 @@ typedef enum WAIT_EVENT_XACT_GROUP_UPDATE } WaitEventIPC; +#define PG_WAIT_IPC_LAST_TYPE WAIT_EVENT_XACT_GROUP_UPDATE +#define PG_WAIT_IPC_NUM ( PG_WAIT_IPC_LAST_TYPE - PG_WAIT_IPC ) + /* ---------- * Wait Events - Timeout * @@ -143,6 +188,9 @@ typedef enum WAIT_EVENT_VACUUM_DELAY } WaitEventTimeout; +#define PG_WAIT_TIMEOUT_LAST_TYPE WAIT_EVENT_VACUUM_DELAY +#define PG_WAIT_TIMEOUT_NUM ( PG_WAIT_TIMEOUT_LAST_TYPE - PG_WAIT_TIMEOUT ) + /* ---------- * Wait Events - IO * @@ -227,14 +275,30 @@ typedef enum WAIT_EVENT_LOGICAL_SUBXACT_WRITE } WaitEventIO; +#define PG_WAIT_IO_LAST_TYPE WAIT_EVENT_LOGICAL_SUBXACT_WRITE +#define PG_WAIT_IO_NUM ( PG_WAIT_IO_LAST_TYPE - PG_WAIT_IO ) + +/* track start time of current wait event */ +extern instr_time waitEventStart; + +void pgstat_init_waitaccums(void); extern const char *pgstat_get_wait_event(uint32 wait_event_info); extern const char *pgstat_get_wait_event_type(uint32 wait_event_info); static inline void pgstat_report_wait_start(uint32 wait_event_info); +static inline void standard_pgstat_report_wait_start(uint32 wait_event_info); static inline void pgstat_report_wait_end(void); +static inline void standard_pgstat_report_wait_end(void); +extern void pgstat_set_report_waits(void); +extern void pgstat_reset_report_waits(void); extern void pgstat_set_wait_event_storage(uint32 *wait_event_info); extern void pgstat_reset_wait_event_storage(void); +typedef void (*report_wait_start_function) (uint32 wait_event_info); +extern PGDLLIMPORT report_wait_start_function my_pgstat_report_wait_start; +typedef void (*report_wait_end_function) (); +extern PGDLLIMPORT report_wait_end_function my_pgstat_report_wait_end; + extern PGDLLIMPORT uint32 *my_wait_event_info; @@ -259,6 +323,12 @@ extern PGDLLIMPORT uint32 *my_wait_event_info; */ static inline void pgstat_report_wait_start(uint32 wait_event_info) +{ + (*my_pgstat_report_wait_start)(wait_event_info); +} + +static inline void +standard_pgstat_report_wait_start(uint32 wait_event_info) { /* * Since this is a four-byte field which is always read and written as @@ -275,6 +345,13 @@ pgstat_report_wait_start(uint32 wait_event_info) */ static inline void pgstat_report_wait_end(void) +{ + (*my_pgstat_report_wait_end)(); +} + + +static inline void +standard_pgstat_report_wait_end(void) { /* see pgstat_report_wait_start() */ *(volatile uint32 *) my_wait_event_info = 0; -- 2.20.1