From f2f6ef266bdaa2670f08e061bf6a94f121be76d5 Mon Sep 17 00:00:00 2001 From: Lukas Fittl Date: Tue, 7 Apr 2026 12:32:36 -0700 Subject: [PATCH v17a] instrumentation: Move ExecProcNodeInstr to allow inlining This moves the implementation of ExecProcNodeInstr, the ExecProcNode variant that gets used when instrumentation is on, to be defined in instrument.c instead of execProcNode.c, and marks functions it uses as inline. This allows compilers to generate an optimized implementation, and shows a 2 to 5% reduction in instrumentation overhead for queries that move lots of rows. Author: Lukas Fittl Suggested-by: Andres Freund Reviewed-by: Discussion: https://www.postgresql.org/message-id/flat/CAP53PkzdBK8VJ1fS4AZ481LgMN8f9mJiC39ZRHqkFUSYq6KWmg@mail.gmail.com --- src/include/executor/executor.h | 7 ++++++ src/backend/executor/execProcnode.c | 20 ---------------- src/backend/executor/instrument.c | 37 ++++++++++++++++++++++++----- 3 files changed, 38 insertions(+), 26 deletions(-) diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 491c4886506..6980c6dceda 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -303,6 +303,13 @@ extern void ExecEndNode(PlanState *node); extern void ExecShutdownNode(PlanState *node); extern void ExecSetTupleBound(int64 tuples_needed, PlanState *child_node); +/* + * ExecProcNodeInstr() is implemented in instrument.c, as that allows for + * inlining of the instrumentation functions, but thematically it ought to be + * in execProcnode.c. + */ +extern TupleTableSlot *ExecProcNodeInstr(PlanState *node); + /* ---------------------------------------------------------------- * ExecProcNode diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 132fe37ef60..7c4c66e323f 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -121,7 +121,6 @@ #include "nodes/nodeFuncs.h" static TupleTableSlot *ExecProcNodeFirst(PlanState *node); -static TupleTableSlot *ExecProcNodeInstr(PlanState *node); static bool ExecShutdownNode_walker(PlanState *node, void *context); @@ -471,25 +470,6 @@ ExecProcNodeFirst(PlanState *node) } -/* - * ExecProcNode wrapper that performs instrumentation calls. By keeping - * this a separate function, we avoid overhead in the normal case where - * no instrumentation is wanted. - */ -static TupleTableSlot * -ExecProcNodeInstr(PlanState *node) -{ - TupleTableSlot *result; - - InstrStartNode(node->instrument); - - result = node->ExecProcNodeReal(node); - - InstrStopNode(node->instrument, TupIsNull(result) ? 0.0 : 1.0); - - return result; -} - /* ---------------------------------------------------------------- * MultiExecProcNode diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c index 4c3aec7fdee..ffbcd572133 100644 --- a/src/backend/executor/instrument.c +++ b/src/backend/executor/instrument.c @@ -15,7 +15,10 @@ #include +#include "executor/executor.h" #include "executor/instrument.h" +#include "executor/tuptable.h" +#include "nodes/execnodes.h" #include "portability/instr_time.h" #include "utils/guc_hooks.h" @@ -46,7 +49,7 @@ InstrInitOptions(Instrumentation *instr, int instrument_options) instr->need_timer = (instrument_options & INSTRUMENT_TIMER) != 0; } -void +inline void InstrStart(Instrumentation *instr) { if (instr->need_timer) @@ -125,14 +128,14 @@ InstrInitNode(NodeInstrumentation *instr, int instrument_options, bool async_mod } /* Entry to a plan node */ -void +inline void InstrStartNode(NodeInstrumentation *instr) { InstrStart(&instr->instr); } /* Exit from a plan node */ -void +inline void InstrStopNode(NodeInstrumentation *instr, double nTuples) { double save_tuplecount = instr->tuplecount; @@ -166,6 +169,28 @@ InstrStopNode(NodeInstrumentation *instr, double nTuples) } } +/* + * ExecProcNode wrapper that performs instrumentation calls. By keeping + * this a separate function, we avoid overhead in the normal case where + * no instrumentation is wanted. + * + * This is implemented in instrument.c as all the functions it calls directly + * are here, allowing them to be inlined even when not using LTO. + */ +TupleTableSlot * +ExecProcNodeInstr(PlanState *node) +{ + TupleTableSlot *result; + + InstrStartNode(node->instrument); + + result = node->ExecProcNodeReal(node); + + InstrStopNode(node->instrument, TupIsNull(result) ? 0.0 : 1.0); + + return result; +} + /* Update tuple count */ void InstrUpdateTupleCount(NodeInstrumentation *instr, double nTuples) @@ -298,7 +323,7 @@ BufferUsageAdd(BufferUsage *dst, const BufferUsage *add) } /* dst += add - sub */ -void +inline void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub) @@ -328,7 +353,7 @@ BufferUsageAccumDiff(BufferUsage *dst, } /* helper functions for WAL usage accumulation */ -static void +static inline void WalUsageAdd(WalUsage *dst, WalUsage *add) { dst->wal_bytes += add->wal_bytes; @@ -338,7 +363,7 @@ WalUsageAdd(WalUsage *dst, WalUsage *add) dst->wal_buffers_full += add->wal_buffers_full; } -void +inline void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub) { dst->wal_bytes += add->wal_bytes - sub->wal_bytes; -- 2.53.0.1.gb2826b52eb