From 09b8c12569671525de5741e77710a905b1bbd1a0 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Thu, 26 Sep 2019 11:23:43 -0700 Subject: [PATCH v1 2/2] Don't generate EEOP_*_FETCHSOME operations for slots know to be virtual. That avoids unnecessary work during both interpreted and JIT compiled expression evaluation. Author: Soumyadeep Chakraborty, Andres Freund Discussion: https://postgr.es/m/CAE-ML+9OKSN71+mHtfMD-L24oDp8dGTfaVjDU6U+j+FNAW5kRQ@mail.gmail.com --- src/backend/executor/execExpr.c | 43 ++++++--- src/backend/executor/execExprInterp.c | 133 +++++++++++++++++++++++++- src/backend/jit/llvm/llvmjit_expr.c | 6 +- 3 files changed, 160 insertions(+), 22 deletions(-) diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index 39442f8866f..1ff13d1079a 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -65,7 +65,7 @@ static void ExecInitFunc(ExprEvalStep *scratch, Expr *node, List *args, static void ExecInitExprSlots(ExprState *state, Node *node); static void ExecPushExprSlots(ExprState *state, LastAttnumInfo *info); static bool get_last_attnums_walker(Node *node, LastAttnumInfo *info); -static void ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op); +static bool ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op); static void ExecInitWholeRowVar(ExprEvalStep *scratch, Var *variable, ExprState *state); static void ExecInitSubscriptingRef(ExprEvalStep *scratch, @@ -2285,8 +2285,8 @@ ExecPushExprSlots(ExprState *state, LastAttnumInfo *info) scratch.d.fetch.fixed = false; scratch.d.fetch.kind = NULL; scratch.d.fetch.known_desc = NULL; - ExecComputeSlotInfo(state, &scratch); - ExprEvalPushStep(state, &scratch); + if (ExecComputeSlotInfo(state, &scratch)) + ExprEvalPushStep(state, &scratch); } if (info->last_outer > 0) { @@ -2295,8 +2295,8 @@ ExecPushExprSlots(ExprState *state, LastAttnumInfo *info) scratch.d.fetch.fixed = false; scratch.d.fetch.kind = NULL; scratch.d.fetch.known_desc = NULL; - ExecComputeSlotInfo(state, &scratch); - ExprEvalPushStep(state, &scratch); + if (ExecComputeSlotInfo(state, &scratch)) + ExprEvalPushStep(state, &scratch); } if (info->last_scan > 0) { @@ -2305,8 +2305,8 @@ ExecPushExprSlots(ExprState *state, LastAttnumInfo *info) scratch.d.fetch.fixed = false; scratch.d.fetch.kind = NULL; scratch.d.fetch.known_desc = NULL; - ExecComputeSlotInfo(state, &scratch); - ExprEvalPushStep(state, &scratch); + if (ExecComputeSlotInfo(state, &scratch)) + ExprEvalPushStep(state, &scratch); } } @@ -2364,14 +2364,21 @@ get_last_attnums_walker(Node *node, LastAttnumInfo *info) * The goal is to determine whether a slot is 'fixed', that is, every * evaluation of the expression will have the same type of slot, with an * equivalent descriptor. + * + * Returns true if the the deforming step is required, false otherwise. */ -static void +static bool ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op) { PlanState *parent = state->parent; TupleDesc desc = NULL; const TupleTableSlotOps *tts_ops = NULL; bool isfixed = false; + ExprEvalOp opcode = op->opcode; + + Assert(opcode == EEOP_INNER_FETCHSOME || + opcode == EEOP_OUTER_FETCHSOME || + opcode == EEOP_SCAN_FETCHSOME); if (op->d.fetch.known_desc != NULL) { @@ -2383,7 +2390,7 @@ ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op) { isfixed = false; } - else if (op->opcode == EEOP_INNER_FETCHSOME) + else if (opcode == EEOP_INNER_FETCHSOME) { PlanState *is = innerPlanState(parent); @@ -2402,7 +2409,7 @@ ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op) desc = ExecGetResultType(is); } } - else if (op->opcode == EEOP_OUTER_FETCHSOME) + else if (opcode == EEOP_OUTER_FETCHSOME) { PlanState *os = outerPlanState(parent); @@ -2421,7 +2428,7 @@ ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op) desc = ExecGetResultType(os); } } - else if (op->opcode == EEOP_SCAN_FETCHSOME) + else if (opcode == EEOP_SCAN_FETCHSOME) { desc = parent->scandesc; @@ -2444,6 +2451,12 @@ ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op) op->d.fetch.kind = NULL; op->d.fetch.known_desc = NULL; } + + /* if the slot is known to always virtual we never need to deform */ + if (op->d.fetch.fixed && op->d.fetch.kind == &TTSOpsVirtual) + return false; + + return true; } /* @@ -3358,16 +3371,16 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, scratch.d.fetch.fixed = false; scratch.d.fetch.known_desc = ldesc; scratch.d.fetch.kind = lops; - ExecComputeSlotInfo(state, &scratch); - ExprEvalPushStep(state, &scratch); + if (ExecComputeSlotInfo(state, &scratch)) + ExprEvalPushStep(state, &scratch); scratch.opcode = EEOP_OUTER_FETCHSOME; scratch.d.fetch.last_var = maxatt; scratch.d.fetch.fixed = false; scratch.d.fetch.known_desc = rdesc; scratch.d.fetch.kind = rops; - ExecComputeSlotInfo(state, &scratch); - ExprEvalPushStep(state, &scratch); + if (ExecComputeSlotInfo(state, &scratch)) + ExprEvalPushStep(state, &scratch); /* * Start comparing at the last field (least significant sort key). That's diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index e876160a0e7..ccea030cd70 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -160,6 +160,12 @@ static Datum ExecJustAssignOuterVar(ExprState *state, ExprContext *econtext, boo static Datum ExecJustAssignScanVar(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustApplyFuncToCase(ExprState *state, ExprContext *econtext, bool *isnull); static Datum ExecJustConst(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustAssignInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustAssignOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); +static Datum ExecJustAssignScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull); /* @@ -255,11 +261,45 @@ ExecReadyInterpretedExpr(ExprState *state) return; } } - else if (state->steps_len == 2 && - state->steps[0].opcode == EEOP_CONST) + else if (state->steps_len == 2) { - state->evalfunc_private = (void *) ExecJustConst; - return; + ExprEvalOp step0 = state->steps[0].opcode; + + if (step0 == EEOP_CONST) + { + state->evalfunc_private = (void *) ExecJustConst; + return; + } + else if (step0 == EEOP_INNER_VAR) + { + state->evalfunc_private = (void *) ExecJustInnerVarVirt; + return; + } + else if (step0 == EEOP_OUTER_VAR) + { + state->evalfunc_private = (void *) ExecJustOuterVarVirt; + return; + } + else if (step0 == EEOP_SCAN_VAR) + { + state->evalfunc_private = (void *) ExecJustScanVarVirt; + return; + } + else if (step0 == EEOP_ASSIGN_INNER_VAR) + { + state->evalfunc_private = (void *) ExecJustAssignInnerVarVirt; + return; + } + else if (step0 == EEOP_ASSIGN_OUTER_VAR) + { + state->evalfunc_private = (void *) ExecJustAssignOuterVarVirt; + return; + } + else if (step0 == EEOP_ASSIGN_SCAN_VAR) + { + state->evalfunc_private = (void *) ExecJustAssignScanVarVirt; + return; + } } #if defined(EEO_USE_COMPUTED_GOTO) @@ -2096,6 +2136,91 @@ ExecJustConst(ExprState *state, ExprContext *econtext, bool *isnull) return op->d.constval.value; } +/* implementation of ExecJust(Inner|Outer|Scan)VarVirt */ +static pg_attribute_always_inline Datum +ExecJustVarVirtImpl(ExprState *state, TupleTableSlot *slot, bool *isnull) +{ + ExprEvalStep *op = &state->steps[0]; + int attnum = op->d.var.attnum; + + /* + * As it is guaranteed that a virtual slot is used, there never is a need + * to perform tuple deforming (nor would it be possible). Therefore + * execExpr.c has not emitted a EEOP_*_FETCHSOME step. Verify, as much as + * possible, that that determination was accurate. + */ + Assert(slot->tts_ops == &TTSOpsVirtual); + Assert(TTS_FIXED(slot)); + Assert(attnum >= 0 && attnum < slot->tts_nvalid); + + *isnull = slot->tts_isnull[attnum]; + + return slot->tts_values[attnum]; +} + +/* Like ExecJustInnerVar, optimized for virtual slots */ +static Datum +ExecJustInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustVarVirtImpl(state, econtext->ecxt_innertuple, isnull); +} + +/* Like ExecJustOuterVar, optimized for virtual slots */ +static Datum +ExecJustOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustVarVirtImpl(state, econtext->ecxt_outertuple, isnull); +} + +/* Like ExecJustScanVar, optimized for virtual slots */ +static Datum +ExecJustScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustVarVirtImpl(state, econtext->ecxt_scantuple, isnull); +} + +/* implementation of ExecJustAssign(Inner|Outer|Scan)VarVirt */ +static pg_attribute_always_inline Datum +ExecJustAssignVarVirtImpl(ExprState *state, TupleTableSlot *inslot, bool *isnull) +{ + ExprEvalStep *op = &state->steps[0]; + int attnum = op->d.assign_var.attnum; + int resultnum = op->d.assign_var.resultnum; + TupleTableSlot *outslot = state->resultslot; + + /* see ExecJustVarVirtImpl for comments */ + + Assert(inslot->tts_ops == &TTSOpsVirtual); + Assert(TTS_FIXED(inslot)); + Assert(attnum >= 0 && attnum < inslot->tts_nvalid); + + outslot->tts_values[resultnum] = inslot->tts_values[attnum]; + outslot->tts_isnull[resultnum] = inslot->tts_isnull[attnum]; + + return 0; +} + +/* Like ExecJustAssignInnerVar, optimized for virtual slots */ +static Datum +ExecJustAssignInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustAssignVarVirtImpl(state, econtext->ecxt_innertuple, isnull); +} + +/* Like ExecJustAssignOuterVar, optimized for virtual slots */ +static Datum +ExecJustAssignOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustAssignVarVirtImpl(state, econtext->ecxt_outertuple, isnull); +} + +/* Like ExecJustAssignScanVar, optimized for virtual slots */ +static Datum +ExecJustAssignScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull) +{ + return ExecJustAssignVarVirtImpl(state, econtext->ecxt_scantuple, isnull); +} + #if defined(EEO_USE_COMPUTED_GOTO) /* * Comparator used when building address->opcode lookup table for diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c index 30133634c70..d09324637b9 100644 --- a/src/backend/jit/llvm/llvmjit_expr.c +++ b/src/backend/jit/llvm/llvmjit_expr.c @@ -287,6 +287,9 @@ llvm_compile_expr(ExprState *state) if (op->d.fetch.fixed) tts_ops = op->d.fetch.kind; + /* step should not have been generated */ + Assert(tts_ops != &TTSOpsVirtual); + if (opcode == EEOP_INNER_FETCHSOME) v_slot = v_innerslot; else if (opcode == EEOP_OUTER_FETCHSOME) @@ -297,9 +300,6 @@ llvm_compile_expr(ExprState *state) /* * Check if all required attributes are available, or * whether deforming is required. - * - * TODO: skip nvalid check if slot is fixed and known to - * be a virtual slot. */ v_nvalid = l_load_struct_gep(b, v_slot, -- 2.23.0.162.gf1d4a28250