From 7b2e31cc1a11444ab20b045ddc4052a48f83602c Mon Sep 17 00:00:00 2001 From: Lukas Fittl Date: Sat, 7 Mar 2026 17:52:24 -0800 Subject: [PATCH v11 7/9] instrumentation: Optimize ExecProcNodeInstr instructions by inlining For most queries, the bulk of the overhead of EXPLAIN ANALYZE happens in ExecProcNodeInstr when starting/stopping instrumentation for that node. Previously each ExecProcNodeInstr would check which instrumentation options are active in the InstrStartNode/InstrStopNode calls, and do the corresponding work (timers, instrumentation stack, etc.). These conditionals being checked for each tuple being emitted add up, and cause non-optimal set of instructions to be generated by the compiler. Because we already have an existing mechanism to specify a function pointer when instrumentation is enabled, we can instead create specialized functions that are tailored to the instrumentation options enabled, and avoid conditionals on subsequent ExecProcNodeInstr calls. This results in the overhead for EXPLAIN (ANALYZE, TIMING OFF, BUFFERS OFF) for a stress test with a large COUNT(*) that does many ExecProcNode calls from ~ 20% on top of actual runtime to ~ 3%. When using BUFFERS ON the same query goes from ~ 20% to ~ 10% on top of actual runtime. Author: Lukas Fittl Reviewed-by: Zsolt Parragi Discussion: https://www.postgresql.org/message-id/flat/CAP53PkxFP7i7-wy98ZmEJ11edYq-RrPvJoa4kzGhBBjERA4Nyw%40mail.gmail.com#e8dfd018a07d7f8d41565a079d40c564 --- src/backend/executor/execProcnode.c | 22 +--- src/backend/executor/instrument.c | 198 ++++++++++++++++++++-------- src/include/executor/instrument.h | 5 + 3 files changed, 148 insertions(+), 77 deletions(-) diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 21ad1b04a57..9f5698063f0 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -121,7 +121,6 @@ #include "nodes/nodeFuncs.h" static TupleTableSlot *ExecProcNodeFirst(PlanState *node); -static TupleTableSlot *ExecProcNodeInstr(PlanState *node); static bool ExecShutdownNode_walker(PlanState *node, void *context); static bool ExecRememberNodeInstrumentation_walker(PlanState *node, void *context); static bool ExecFinalizeNodeInstrumentation_walker(PlanState *node, void *context); @@ -465,7 +464,7 @@ ExecProcNodeFirst(PlanState *node) * have ExecProcNode() directly call the relevant function from now on. */ if (node->instrument) - node->ExecProcNode = ExecProcNodeInstr; + node->ExecProcNode = InstrNodeSetupExecProcNode(node->instrument); else node->ExecProcNode = node->ExecProcNodeReal; @@ -473,25 +472,6 @@ ExecProcNodeFirst(PlanState *node) } -/* - * ExecProcNode wrapper that performs instrumentation calls. By keeping - * this a separate function, we avoid overhead in the normal case where - * no instrumentation is wanted. - */ -static TupleTableSlot * -ExecProcNodeInstr(PlanState *node) -{ - TupleTableSlot *result; - - InstrStartNode(node->instrument); - - result = node->ExecProcNodeReal(node); - - InstrStopNode(node->instrument, TupIsNull(result) ? 0.0 : 1.0); - - return result; -} - /* ---------------------------------------------------------------- * MultiExecProcNode diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c index 09d5ffe8651..4ea807e295f 100644 --- a/src/backend/executor/instrument.c +++ b/src/backend/executor/instrument.c @@ -59,29 +59,20 @@ InstrInitOptions(Instrumentation *instr, int instrument_options) instr->need_timer = (instrument_options & INSTRUMENT_TIMER) != 0; } -void -InstrStart(Instrumentation *instr) +static inline void +InstrStartTimer(Instrumentation *instr) { - if (instr->need_timer) - { - if (!INSTR_TIME_IS_ZERO(instr->starttime)) - elog(ERROR, "InstrStart called twice in a row"); - else - INSTR_TIME_SET_CURRENT(instr->starttime); - } + Assert(INSTR_TIME_IS_ZERO(instr->starttime)); - if (instr->need_stack) - InstrPushStack(instr); + INSTR_TIME_SET_CURRENT(instr->starttime); } -static void +static inline void InstrStopTimer(Instrumentation *instr) { instr_time endtime; - /* let's update the time only if the timer was requested */ - if (INSTR_TIME_IS_ZERO(instr->starttime)) - elog(ERROR, "InstrStop called without start"); + Assert(!INSTR_TIME_IS_ZERO(instr->starttime)); INSTR_TIME_SET_CURRENT(endtime); INSTR_TIME_ACCUM_DIFF(instr->total, endtime, instr->starttime); @@ -89,6 +80,16 @@ InstrStopTimer(Instrumentation *instr) INSTR_TIME_SET_ZERO(instr->starttime); } +void +InstrStart(Instrumentation *instr) +{ + if (instr->need_timer) + InstrStartTimer(instr); + + if (instr->need_stack) + InstrPushStack(instr); +} + void InstrStop(Instrumentation *instr) { @@ -372,65 +373,57 @@ InstrInitNode(NodeInstrumentation *instr, int instrument_options) InstrInitOptions(&instr->instr, instrument_options); } -/* Entry to a plan node */ +/* Entry to a plan node. If you modify this, check InstrNodeSetupExecProcNode. */ void InstrStartNode(NodeInstrumentation *instr) { InstrStart(&instr->instr); } - -/* Exit from a plan node */ -void -InstrStopNode(NodeInstrumentation *instr, double nTuples) +/* + * Updates the node instrumentation time counter. + * + * Note this is different from InstrStop because total is only updated in + * InstrEndLoop. We need the separate counter variable because we need to + * calculate start-up time for the first tuple in each cycle, and then + * accumulate it together. + */ +static inline void +InstrStopNodeTimer(NodeInstrumentation *instr) { - double save_tuplecount = instr->tuplecount; instr_time endtime; - /* count the returned tuples */ - instr->tuplecount += nTuples; + Assert(!INSTR_TIME_IS_ZERO(instr->instr.starttime)); + + INSTR_TIME_SET_CURRENT(endtime); + INSTR_TIME_ACCUM_DIFF(instr->counter, endtime, instr->instr.starttime); + INSTR_TIME_SET_ZERO(instr->instr.starttime); /* - * Update the time only if the timer was requested. + * Is this the first tuple of this cycle? * - * Note this is different from InstrStop because total is only updated in - * InstrEndLoop. We need the separate counter variable because we need to - * calculate start-up time for the first tuple in each cycle, and then - * accumulate it together. + * In async mode, if the plan node hadn't emitted any tuples before, this + * might be the first tuple */ - if (instr->instr.need_timer) - { - if (INSTR_TIME_IS_ZERO(instr->instr.starttime)) - elog(ERROR, "InstrStopNode called without start"); - - INSTR_TIME_SET_CURRENT(endtime); - INSTR_TIME_ACCUM_DIFF(instr->counter, endtime, instr->instr.starttime); + if (!instr->running || (instr->async_mode && instr->tuplecount < 1.0)) + instr->firsttuple = instr->counter; +} - INSTR_TIME_SET_ZERO(instr->instr.starttime); - } +/* Exit from a plan node. If you modify this, check InstrNodeSetupExecProcNode. */ +void +InstrStopNode(NodeInstrumentation *instr, double nTuples) +{ + if (instr->instr.need_timer) + InstrStopNodeTimer(instr); - /* - * Only pop the stack, accumulation runs in - * ExecFinalizeNodeInstrumentation - */ + /* Only pop the stack, accumulation runs in InstrFinalizeNode */ if (instr->instr.need_stack) InstrPopStack(&instr->instr); - /* Is this the first tuple of this cycle? */ - if (!instr->running) - { - instr->running = true; - instr->firsttuple = instr->counter; - } - else - { - /* - * In async mode, if the plan node hadn't emitted any tuples before, - * this might be the first tuple - */ - if (instr->async_mode && save_tuplecount < 1.0) - instr->firsttuple = instr->counter; - } + instr->running = true; + + /* count the returned tuples */ + instr->tuplecount += nTuples; } /* Update tuple count */ @@ -495,6 +488,99 @@ InstrAggNode(NodeInstrumentation *dst, NodeInstrumentation *add) InstrAccumStack(&dst->instr, &add->instr); } +/* + * Specialized handling of instrumented ExecProcNode + * + * These functions are equivalent to running ExecProcNodeReal wrapped in + * InstrStartNode and InstrStopNode, but avoid the conditionals in the hot path + * by checking the instrumentation options when the ExecProcNode pointer gets + * first set, and then using a special-purpose function for each. This results + * in a more optimized set of compiled instructions. + */ + +#include "executor/tuptable.h" +#include "nodes/execnodes.h" + +/* Simplified pop: restore saved state instead of re-deriving from array */ +static inline void +InstrPopStackTo(Instrumentation *prev) +{ + Assert(instr_stack.stack_size > 0); + Assert(instr_stack.stack_size > 1 ? instr_stack.entries[instr_stack.stack_size - 2] == prev : &instr_top == prev); + instr_stack.stack_size--; + instr_stack.current = prev; +} + +static inline TupleTableSlot * +ExecProcNodeInstr(PlanState *node, bool need_timer, bool need_stack) +{ + NodeInstrumentation *instr = node->instrument; + Instrumentation *prev = instr_stack.current; + TupleTableSlot *result; + + if (need_stack) + InstrPushStack(&instr->instr); + if (need_timer) + InstrStartTimer(&instr->instr); + + result = node->ExecProcNodeReal(node); + + if (need_timer) + InstrStopNodeTimer(instr); + if (need_stack) + InstrPopStackTo(prev); + + instr->running = true; + if (!TupIsNull(result)) + instr->tuplecount += 1.0; + + return result; +} + +static TupleTableSlot * +ExecProcNodeInstrFull(PlanState *node) +{ + return ExecProcNodeInstr(node, true, true); +} + +static TupleTableSlot * +ExecProcNodeInstrRowsStackOnly(PlanState *node) +{ + return ExecProcNodeInstr(node, false, true); +} + +static TupleTableSlot * +ExecProcNodeInstrRowsTimerOnly(PlanState *node) +{ + return ExecProcNodeInstr(node, true, false); +} + +static TupleTableSlot * +ExecProcNodeInstrRowsOnly(PlanState *node) +{ + return ExecProcNodeInstr(node, false, false); +} + +/* + * Returns an ExecProcNode wrapper that performs instrumentation calls, + * tailored to the instrumentation options enabled for the node. + */ +ExecProcNodeMtd +InstrNodeSetupExecProcNode(NodeInstrumentation *instr) +{ + bool need_timer = instr->instr.need_timer; + bool need_stack = instr->instr.need_stack; + + if (need_timer && need_stack) + return ExecProcNodeInstrFull; + else if (need_stack) + return ExecProcNodeInstrRowsStackOnly; + else if (need_timer) + return ExecProcNodeInstrRowsTimerOnly; + else + return ExecProcNodeInstrRowsOnly; +} + /* Trigger instrumentation handling */ TriggerInstrumentation * InstrAllocTrigger(QueryInstrumentation *qinstr, int n) diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h index b30a15bc027..cad052a3a90 100644 --- a/src/include/executor/instrument.h +++ b/src/include/executor/instrument.h @@ -294,6 +294,11 @@ extern void InstrUpdateTupleCount(NodeInstrumentation *instr, double nTuples); extern void InstrEndLoop(NodeInstrumentation *instr); extern void InstrAggNode(NodeInstrumentation *dst, NodeInstrumentation *add); +typedef struct TupleTableSlot TupleTableSlot; +typedef struct PlanState PlanState; +typedef TupleTableSlot *(*ExecProcNodeMtd) (PlanState *pstate); +extern ExecProcNodeMtd InstrNodeSetupExecProcNode(NodeInstrumentation *instr); + extern TriggerInstrumentation *InstrAllocTrigger(QueryInstrumentation *qinstr, int n); extern void InstrStartTrigger(QueryInstrumentation *qinstr, TriggerInstrumentation *tginstr); -- 2.47.1