From 9e527947d49be7715cb01addda2890ff54ed5c16 Mon Sep 17 00:00:00 2001 From: Lukas Fittl Date: Sun, 5 Apr 2026 19:30:56 -0700 Subject: [PATCH v15 7/9] instrumentation: Optimize ExecProcNodeInstr instructions by inlining For most queries, the bulk of the overhead of EXPLAIN ANALYZE happens in ExecProcNodeInstr when starting/stopping instrumentation for that node. Previously each ExecProcNodeInstr would check which instrumentation options are active in the InstrStartNode/InstrStopNode calls, and do the corresponding work (timers, instrumentation stack, etc.). These conditionals being checked for each tuple being emitted add up, and cause non-optimal set of instructions to be generated by the compiler. Because we already have an existing mechanism to specify a function pointer when instrumentation is enabled, we can instead create specialized functions that are tailored to the instrumentation options enabled, and avoid conditionals on subsequent ExecProcNodeInstr calls. This results in the overhead for EXPLAIN (ANALYZE, TIMING OFF, BUFFERS OFF) for a stress test with a large COUNT(*) that does many ExecProcNode calls from ~ 20% on top of actual runtime to ~ 3%. When using BUFFERS ON the same query goes from ~ 20% to ~ 10% on top of actual runtime. Author: Lukas Fittl Reviewed-by: Zsolt Parragi Discussion: https://www.postgresql.org/message-id/flat/CAP53PkxFP7i7-wy98ZmEJ11edYq-RrPvJoa4kzGhBBjERA4Nyw%40mail.gmail.com#e8dfd018a07d7f8d41565a079d40c564 --- src/backend/executor/execProcnode.c | 22 +---- src/backend/executor/instrument.c | 144 ++++++++++++++++++++++++---- src/include/executor/instrument.h | 5 + 3 files changed, 130 insertions(+), 41 deletions(-) diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index f006931c94d..ac400670fea 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -121,7 +121,6 @@ #include "nodes/nodeFuncs.h" static TupleTableSlot *ExecProcNodeFirst(PlanState *node); -static TupleTableSlot *ExecProcNodeInstr(PlanState *node); static bool ExecShutdownNode_walker(PlanState *node, void *context); static bool ExecFinalizeNodeInstrumentation_walker(PlanState *node, void *context); static bool ExecFinalizeWorkerInstrumentation_walker(PlanState *node, void *context); @@ -466,7 +465,7 @@ ExecProcNodeFirst(PlanState *node) * have ExecProcNode() directly call the relevant function from now on. */ if (node->instrument) - node->ExecProcNode = ExecProcNodeInstr; + node->ExecProcNode = InstrNodeSetupExecProcNode(node->instrument); else node->ExecProcNode = node->ExecProcNodeReal; @@ -474,25 +473,6 @@ ExecProcNodeFirst(PlanState *node) } -/* - * ExecProcNode wrapper that performs instrumentation calls. By keeping - * this a separate function, we avoid overhead in the normal case where - * no instrumentation is wanted. - */ -static TupleTableSlot * -ExecProcNodeInstr(PlanState *node) -{ - TupleTableSlot *result; - - InstrStartNode(node->instrument); - - result = node->ExecProcNodeReal(node); - - InstrStopNode(node->instrument, TupIsNull(result) ? 0.0 : 1.0); - - return result; -} - /* ---------------------------------------------------------------- * MultiExecProcNode diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c index cfedbad0eba..5a17be9aa53 100644 --- a/src/backend/executor/instrument.c +++ b/src/backend/executor/instrument.c @@ -66,19 +66,25 @@ InstrInitOptions(Instrumentation *instr, int instrument_options) instr->need_timer = (instrument_options & INSTRUMENT_TIMER) != 0; } -void -InstrStart(Instrumentation *instr) +static inline void +InstrStartTimer(Instrumentation *instr) { - if (instr->need_timer) - { - if (!INSTR_TIME_IS_ZERO(instr->starttime)) - elog(ERROR, "InstrStart called twice in a row"); - else - INSTR_TIME_SET_CURRENT(instr->starttime); - } + Assert(INSTR_TIME_IS_ZERO(instr->starttime)); - if (instr->need_stack) - InstrPushStack(instr); + INSTR_TIME_SET_CURRENT(instr->starttime); +} + +static inline void +InstrStopTimer(Instrumentation *instr, instr_time *accum_time) +{ + instr_time endtime; + + Assert(!INSTR_TIME_IS_ZERO(instr->starttime)); + + INSTR_TIME_SET_CURRENT(endtime); + INSTR_TIME_ACCUM_DIFF(*accum_time, endtime, instr->starttime); + + INSTR_TIME_SET_ZERO(instr->starttime); } /* @@ -88,18 +94,13 @@ InstrStart(Instrumentation *instr) static inline void InstrStopCommon(Instrumentation *instr, instr_time *accum_time) { - instr_time endtime; - /* update the time only if the timer was requested */ if (instr->need_timer) { if (INSTR_TIME_IS_ZERO(instr->starttime)) elog(ERROR, "InstrStop called without start"); - INSTR_TIME_SET_CURRENT(endtime); - INSTR_TIME_ACCUM_DIFF(*accum_time, endtime, instr->starttime); - - INSTR_TIME_SET_ZERO(instr->starttime); + InstrStopTimer(instr, accum_time); } /* pop the stack, unless InstrStopFinalize previously cleaned up */ @@ -107,6 +108,16 @@ InstrStopCommon(Instrumentation *instr, instr_time *accum_time) InstrPopStack(instr); } +void +InstrStart(Instrumentation *instr) +{ + if (instr->need_timer) + InstrStartTimer(instr); + + if (instr->need_stack) + InstrPushStack(instr); +} + void InstrStop(Instrumentation *instr) { @@ -398,15 +409,14 @@ InstrInitNode(NodeInstrumentation *instr, int instrument_options, bool async_mod instr->async_mode = async_mode; } -/* Entry to a plan node */ +/* Entry to a plan node. If you modify this, check InstrNodeSetupExecProcNode. */ void InstrStartNode(NodeInstrumentation *instr) { InstrStart(&instr->instr); } - -/* Exit from a plan node */ +/* Exit from a plan node. If you modify this, check InstrNodeSetupExecProcNode. */ void InstrStopNode(NodeInstrumentation *instr, double nTuples) { @@ -495,6 +505,100 @@ InstrAggNode(NodeInstrumentation *dst, NodeInstrumentation *add) InstrAccumStack(&dst->instr, &add->instr); } +/* + * Specialized handling of instrumented ExecProcNode + * + * These functions are equivalent to running ExecProcNodeReal wrapped in + * InstrStartNode and InstrStopNode, but avoid the conditionals in the hot path + * by checking the instrumentation options when the ExecProcNode pointer gets + * first set, and then using a special-purpose function for each. This results + * in a more optimized set of compiled instructions. + */ + +#include "executor/tuptable.h" +#include "nodes/execnodes.h" + +/* Simplified pop: restore saved state instead of re-deriving from array */ +static inline void +InstrPopStackTo(Instrumentation *prev) +{ + Assert(instr_stack.stack_size > 0); + Assert(instr_stack.stack_size > 1 ? instr_stack.entries[instr_stack.stack_size - 2] == prev : &instr_top == prev); + instr_stack.entries[instr_stack.stack_size - 1]->on_stack = false; + instr_stack.stack_size--; + instr_stack.current = prev; +} + +static pg_attribute_always_inline TupleTableSlot * +ExecProcNodeInstr(PlanState *node, bool need_timer, bool need_stack) +{ + NodeInstrumentation *instr = node->instrument; + Instrumentation *prev = instr_stack.current; + TupleTableSlot *result; + + if (need_stack) + InstrPushStack(&instr->instr); + if (need_timer) + InstrStartTimer(&instr->instr); + + result = node->ExecProcNodeReal(node); + + if (need_timer) + InstrStopTimer(&instr->instr, &instr->counter); + if (need_stack) + InstrPopStackTo(prev); + + instr->running = true; + if (!TupIsNull(result)) + instr->tuplecount += 1.0; + + return result; +} + +static TupleTableSlot * +ExecProcNodeInstrFull(PlanState *node) +{ + return ExecProcNodeInstr(node, true, true); +} + +static TupleTableSlot * +ExecProcNodeInstrRowsStackOnly(PlanState *node) +{ + return ExecProcNodeInstr(node, false, true); +} + +static TupleTableSlot * +ExecProcNodeInstrRowsTimerOnly(PlanState *node) +{ + return ExecProcNodeInstr(node, true, false); +} + +static TupleTableSlot * +ExecProcNodeInstrRowsOnly(PlanState *node) +{ + return ExecProcNodeInstr(node, false, false); +} + +/* + * Returns an ExecProcNode wrapper that performs instrumentation calls, + * tailored to the instrumentation options enabled for the node. + */ +ExecProcNodeMtd +InstrNodeSetupExecProcNode(NodeInstrumentation *instr) +{ + bool need_timer = instr->instr.need_timer; + bool need_stack = instr->instr.need_stack; + + if (need_timer && need_stack) + return ExecProcNodeInstrFull; + else if (need_stack) + return ExecProcNodeInstrRowsStackOnly; + else if (need_timer) + return ExecProcNodeInstrRowsTimerOnly; + else + return ExecProcNodeInstrRowsOnly; +} + /* Trigger instrumentation handling */ TriggerInstrumentation * InstrAllocTrigger(QueryInstrumentation *qinstr, int instrument_options, int n) diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h index 5bb698d686d..bd481afd0de 100644 --- a/src/include/executor/instrument.h +++ b/src/include/executor/instrument.h @@ -300,6 +300,11 @@ extern void InstrUpdateTupleCount(NodeInstrumentation *instr, double nTuples); extern void InstrEndLoop(NodeInstrumentation *instr); extern void InstrAggNode(NodeInstrumentation *dst, NodeInstrumentation *add); +typedef struct TupleTableSlot TupleTableSlot; +typedef struct PlanState PlanState; +typedef TupleTableSlot *(*ExecProcNodeMtd) (PlanState *pstate); +extern ExecProcNodeMtd InstrNodeSetupExecProcNode(NodeInstrumentation *instr); + extern TriggerInstrumentation *InstrAllocTrigger(QueryInstrumentation *qinstr, int instrument_options, int n); extern void InstrStartTrigger(TriggerInstrumentation *tginstr); -- 2.47.1