From 73aa4192fec1e8388873941d9072a0ea5c80ee96 Mon Sep 17 00:00:00 2001 From: "dgrowley@gmail.com" Date: Sun, 1 Apr 2018 01:20:08 +1300 Subject: [PATCH v18 4/5] Allow MergeAppend's subnodes to be pruned during execution Already supported for Append nodes, this commit allows partition pruning to occur in MergeAppend using values which are only known during execution. --- src/backend/executor/nodeMergeAppend.c | 136 ++++++++++++++++++++---- src/backend/nodes/copyfuncs.c | 1 + src/backend/nodes/outfuncs.c | 2 + src/backend/nodes/readfuncs.c | 1 + src/backend/optimizer/plan/createplan.c | 34 ++++++ src/include/nodes/execnodes.h | 9 ++ src/include/nodes/plannodes.h | 4 + src/test/regress/expected/partition_prune.out | 145 ++++++++++++++++++++++++++ src/test/regress/sql/partition_prune.sql | 41 ++++++++ 9 files changed, 351 insertions(+), 22 deletions(-) diff --git a/src/backend/executor/nodeMergeAppend.c b/src/backend/executor/nodeMergeAppend.c index 118f4ef07d..d9cf911f4f 100644 --- a/src/backend/executor/nodeMergeAppend.c +++ b/src/backend/executor/nodeMergeAppend.c @@ -39,6 +39,7 @@ #include "postgres.h" #include "executor/execdebug.h" +#include "executor/execPartition.h" #include "executor/nodeMergeAppend.h" #include "lib/binaryheap.h" #include "miscadmin.h" @@ -65,8 +66,9 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags) { MergeAppendState *mergestate = makeNode(MergeAppendState); PlanState **mergeplanstates; + Bitmapset *validsubplans; int nplans; - int i; + int i, j; ListCell *lc; /* check for unsupported flags */ @@ -78,19 +80,89 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags) */ ExecLockNonLeafAppendTables(node->partitioned_rels, estate); - /* - * Set up empty vector of subplan states - */ - nplans = list_length(node->mergeplans); - - mergeplanstates = (PlanState **) palloc0(nplans * sizeof(PlanState *)); - /* * create new MergeAppendState for our node */ mergestate->ps.plan = (Plan *) node; mergestate->ps.state = estate; mergestate->ps.ExecProcNode = ExecMergeAppend; + mergestate->ms_noopscan = false; + + /* If run-time partition pruning is enabled, then setup that up now */ + if (node->part_prune_infos != NIL) + { + PartitionPruning *partprune; + + ExecAssignExprContext(estate, &mergestate->ps); + + partprune = ExecSetupPartitionPruning(&mergestate->ps, + node->part_prune_infos); + + /* + * When there are external params matching the partition key we may be + * able to prune away MergeAppend subplans now. + */ + if (!bms_is_empty(partprune->extparams)) + { + /* Determine which subplans match the external params */ + validsubplans = ExecFindInitialMatchingSubPlans(partprune, + list_length(node->mergeplans)); + + /* + * If no subplans match the given parameters then we must handle + * this case in a special way. The problem here is that code in + * explain.c requires a MergeAppend to have at least one subplan + * in order for it to properly determine the Vars in that + * subplan's targetlist. We sidestep this issue by just + * initializing the first subplan, but we set a noop flag so that + * we never actually bother scanning it. + */ + if (bms_is_empty(validsubplans)) + { + mergestate->ms_noopscan = true; + + /* Mark the first as valid so that it's initialized below */ + validsubplans = bms_make_singleton(0); + } + + nplans = bms_num_members(validsubplans); + } + else + { + /* We'll need to initialize all subplans */ + nplans = list_length(node->mergeplans); + validsubplans = bms_add_range(NULL, 0, nplans - 1); + } + + /* + * If there are no exec params then no further pruning can be done, we + * can just set the valid subplans to all remaining subplans. + * Otherwise we set the valid subplans to NULL so that they can be + * determined during actual execution. + */ + if (bms_is_empty(partprune->execparams)) + mergestate->ms_valid_subplans = bms_add_range(NULL, 0, nplans - 1); + else + mergestate->ms_valid_subplans = NULL; + + + mergestate->partition_pruning = partprune; + + } + else + { + nplans = list_length(node->mergeplans); + + /* + * When run-time partition pruning is not enabled we can just mark + * all subplans as valid. + */ + mergestate->ms_valid_subplans = validsubplans = + bms_add_range(NULL, 0, nplans - 1); + mergestate->partition_pruning = NULL; + } + + mergeplanstates = (PlanState **) palloc(nplans * sizeof(PlanState *)); mergestate->mergeplans = mergeplanstates; mergestate->ms_nplans = nplans; @@ -101,26 +173,23 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags) /* * Miscellaneous initialization * - * MergeAppend plans don't have expression contexts because they never - * call ExecQual or ExecProject. - */ - - /* * MergeAppend nodes do have Result slots, which hold pointers to tuples, * so we have to initialize them. */ ExecInitResultTupleSlotTL(estate, &mergestate->ps); /* - * call ExecInitNode on each of the plans to be executed and save the - * results into the array "mergeplans". + * call ExecInitNode on each of the valid plans to be executed and save + * the results into the mergeplanstates array. */ - i = 0; + j = i = 0; foreach(lc, node->mergeplans) { - Plan *initNode = (Plan *) lfirst(lc); - - mergeplanstates[i] = ExecInitNode(initNode, estate, eflags); + if (bms_is_member(i, validsubplans)) + { + Plan *initNode = (Plan *) lfirst(lc); + mergeplanstates[j++] = ExecInitNode(initNode, estate, eflags); + } i++; } @@ -178,11 +247,21 @@ ExecMergeAppend(PlanState *pstate) if (!node->ms_initialized) { + /* Handle the case for when all subplans were pruned */ + if (node->ms_noopscan) + return ExecClearTuple(node->ps.ps_ResultTupleSlot); + + /* Determine minimum set of matching partitions, if not already set */ + if (node->ms_valid_subplans == NULL) + node->ms_valid_subplans = + ExecFindMatchingSubPlans(node->partition_pruning); + /* - * First time through: pull the first tuple from each subplan, and set - * up the heap. + * First time through: pull the first tuple from each valid subplan, + * and set up the heap. */ - for (i = 0; i < node->ms_nplans; i++) + i = -1; + while ((i = bms_next_member(node->ms_valid_subplans, i)) >= 0) { node->ms_slots[i] = ExecProcNode(node->mergeplans[i]); if (!TupIsNull(node->ms_slots[i])) @@ -295,6 +374,19 @@ ExecReScanMergeAppend(MergeAppendState *node) { int i; + /* + * If any of the parameters being used for partition pruning have changed, + * then we'd better unset the valid subplans so that they are reselected + * for the new parameter values. + */ + if (node->partition_pruning && + bms_overlap(node->ps.chgParam, + node->partition_pruning->execparams)) + { + bms_free(node->ms_valid_subplans); + node->ms_valid_subplans = NULL; + } + for (i = 0; i < node->ms_nplans; i++) { PlanState *subnode = node->mergeplans[i]; diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index b67c5b86ab..9f2fd52a0b 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -273,6 +273,7 @@ _copyMergeAppend(const MergeAppend *from) COPY_POINTER_FIELD(sortOperators, from->numCols * sizeof(Oid)); COPY_POINTER_FIELD(collations, from->numCols * sizeof(Oid)); COPY_POINTER_FIELD(nullsFirst, from->numCols * sizeof(bool)); + COPY_NODE_FIELD(part_prune_infos); return newnode; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 3a88ccbea9..cace730efa 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -434,6 +434,8 @@ _outMergeAppend(StringInfo str, const MergeAppend *node) appendStringInfoString(str, " :nullsFirst"); for (i = 0; i < node->numCols; i++) appendStringInfo(str, " %s", booltostr(node->nullsFirst[i])); + + WRITE_NODE_FIELD(part_prune_infos); } static void diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 1632c13ce9..31fed08627 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1673,6 +1673,7 @@ _readMergeAppend(void) READ_OID_ARRAY(sortOperators, local_node->numCols); READ_OID_ARRAY(collations, local_node->numCols); READ_BOOL_ARRAY(nullsFirst, local_node->numCols); + READ_NODE_FIELD(part_prune_infos); READ_DONE(); } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index f498952118..090b22224c 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -1140,6 +1140,8 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) List *pathkeys = best_path->path.pathkeys; List *subplans = NIL; ListCell *subpaths; + RelOptInfo *rel = best_path->path.parent; + List *partpruneinfos = NIL; /* * We don't have the actual creation of the MergeAppend node split out @@ -1225,8 +1227,40 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) subplans = lappend(subplans, subplan); } + if (rel->reloptkind == RELOPT_BASEREL && + best_path->partitioned_rels != NIL) + { + List *prunequal; + + prunequal = extract_actual_clauses(rel->baserestrictinfo, false); + + if (best_path->path.param_info) + { + + List *prmquals = best_path->path.param_info->ppi_clauses; + + prmquals = extract_actual_clauses(prmquals, false); + prmquals = (List *) replace_nestloop_params(root, + (Node *) prmquals); + + prunequal = list_concat(prunequal, prmquals); + } + + /* + * If any quals exist, then these may be useful to allow us to perform + * further partition pruning during execution. We'll generate a + * PartitionPruneInfo for each partitioned rel to store these quals + * and allow translation of partition indexes into subpath indexes. + */ + if (prunequal != NIL) + partpruneinfos = make_partition_pruneinfo(root, + best_path->partitioned_rels, NIL, + best_path->subpaths, prunequal); + } + node->partitioned_rels = best_path->partitioned_rels; node->mergeplans = subplans; + node->part_prune_infos = partpruneinfos; return (Plan *) node; } diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 3888674389..681779d42f 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1092,6 +1092,12 @@ struct AppendState * slots current output tuple of each subplan * heap heap of active tuples * initialized true if we have fetched first tuple from each subplan + * noopscan true if partition pruning proved that none of the + * mergeplans can contain a record to satisfy this query. + * partition_pruning details required to allow partitions to be + * eliminated from the scan, or NULL if not possible. + * ms_valid_subplans for runtime pruning, valid mergeplans indexes to + * scan. * ---------------- */ typedef struct MergeAppendState @@ -1104,6 +1110,9 @@ typedef struct MergeAppendState TupleTableSlot **ms_slots; /* array of length ms_nplans */ struct binaryheap *ms_heap; /* binary heap of slot indices */ bool ms_initialized; /* are subplans started? */ + bool ms_noopscan; /* true if no subplans need scanned */ + struct PartitionPruning *partition_pruning; + Bitmapset *ms_valid_subplans; } MergeAppendState; /* ---------------- diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index e33799d919..c90c763a86 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -276,6 +276,10 @@ typedef struct MergeAppend Oid *sortOperators; /* OIDs of operators to sort them by */ Oid *collations; /* OIDs of collations */ bool *nullsFirst; /* NULLS FIRST/LAST directions */ + /* + * Mapping details for run-time subplan pruning, one per partitioned_rels + */ + List *part_prune_infos; } MergeAppend; /* ---------------- diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index ef8537f3ac..bb54924578 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -2652,3 +2652,148 @@ select * from boolp where a = (select value from boolvalues where not value); (9 rows) drop table boolp; +-- +-- Test run-time pruning of MergeAppend subnodes +-- +set enable_seqscan = off; +set enable_sort = off; +create table ma_test (a int) partition by range (a); +create table ma_test_p1 partition of ma_test for values from (0) to (10); +create table ma_test_p2 partition of ma_test for values from (10) to (20); +create table ma_test_p3 partition of ma_test for values from (20) to (30); +insert into ma_test select x from generate_series(0,29) t(x); +create index on ma_test (a); +analyze ma_test; +prepare mt_q1 (int) as select * from ma_test where a >= $1 and a % 10 = 5 order by a; +-- Execute query 5 times to allow choose_custom_plan +-- to start considering a generic plan. +execute mt_q1(0); + a +---- + 5 + 15 + 25 +(3 rows) + +execute mt_q1(0); + a +---- + 5 + 15 + 25 +(3 rows) + +execute mt_q1(0); + a +---- + 5 + 15 + 25 +(3 rows) + +execute mt_q1(0); + a +---- + 5 + 15 + 25 +(3 rows) + +execute mt_q1(0); + a +---- + 5 + 15 + 25 +(3 rows) + +explain (analyze, costs off, summary off, timing off) execute mt_q1(15); + QUERY PLAN +------------------------------------------------------------------------------------ + Merge Append (actual rows=2 loops=1) + Sort Key: ma_test_p2.a + Subplans Pruned: 1 + -> Index Only Scan using ma_test_p2_a_idx on ma_test_p2 (actual rows=1 loops=1) + Index Cond: (a >= $1) + Filter: ((a % 10) = 5) + Rows Removed by Filter: 4 + Heap Fetches: 5 + -> Index Only Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=1 loops=1) + Index Cond: (a >= $1) + Filter: ((a % 10) = 5) + Rows Removed by Filter: 9 + Heap Fetches: 10 +(13 rows) + +execute mt_q1(15); + a +---- + 15 + 25 +(2 rows) + +explain (analyze, costs off, summary off, timing off) execute mt_q1(25); + QUERY PLAN +------------------------------------------------------------------------------------ + Merge Append (actual rows=1 loops=1) + Sort Key: ma_test_p3.a + Subplans Pruned: 2 + -> Index Only Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=1 loops=1) + Index Cond: (a >= $1) + Filter: ((a % 10) = 5) + Rows Removed by Filter: 4 + Heap Fetches: 5 +(8 rows) + +execute mt_q1(25); + a +---- + 25 +(1 row) + +-- Ensure MergeAppend behaves correctly when no subplans match +explain (analyze, costs off, summary off, timing off) execute mt_q1(35); + QUERY PLAN +----------------------------------------------------------------------------- + Merge Append (actual rows=0 loops=1) + Sort Key: ma_test_p1.a + Subplans Pruned: 2 + -> Index Only Scan using ma_test_p1_a_idx on ma_test_p1 (never executed) + Index Cond: (a >= $1) + Filter: ((a % 10) = 5) + Heap Fetches: 0 +(7 rows) + +execute mt_q1(35); + a +--- +(0 rows) + +deallocate mt_q1; +-- ensure initplan params properly prune partitions +explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(a) from ma_test_p2) order by a; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------- + Merge Append (actual rows=20 loops=1) + Sort Key: ma_test_p1.a + InitPlan 2 (returns $1) + -> Result (actual rows=1 loops=1) + InitPlan 1 (returns $0) + -> Limit (actual rows=1 loops=1) + -> Index Only Scan using ma_test_p2_a_idx on ma_test_p2 ma_test_p2_1 (actual rows=1 loops=1) + Index Cond: (a IS NOT NULL) + Heap Fetches: 1 + -> Index Only Scan using ma_test_p1_a_idx on ma_test_p1 (never executed) + Index Cond: (a >= $1) + Heap Fetches: 0 + -> Index Only Scan using ma_test_p2_a_idx on ma_test_p2 (actual rows=10 loops=1) + Index Cond: (a >= $1) + Heap Fetches: 10 + -> Index Only Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=10 loops=1) + Index Cond: (a >= $1) + Heap Fetches: 10 +(18 rows) + +reset enable_seqscan; +reset enable_sort; +drop table ma_test; diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql index dc1ae6d975..e87cf52536 100644 --- a/src/test/regress/sql/partition_prune.sql +++ b/src/test/regress/sql/partition_prune.sql @@ -620,3 +620,44 @@ explain (analyze, costs off, summary off, timing off) select * from boolp where a = (select value from boolvalues where not value); drop table boolp; + +-- +-- Test run-time pruning of MergeAppend subnodes +-- +set enable_seqscan = off; +set enable_sort = off; +create table ma_test (a int) partition by range (a); +create table ma_test_p1 partition of ma_test for values from (0) to (10); +create table ma_test_p2 partition of ma_test for values from (10) to (20); +create table ma_test_p3 partition of ma_test for values from (20) to (30); +insert into ma_test select x from generate_series(0,29) t(x); +create index on ma_test (a); + +analyze ma_test; +prepare mt_q1 (int) as select * from ma_test where a >= $1 and a % 10 = 5 order by a; + +-- Execute query 5 times to allow choose_custom_plan +-- to start considering a generic plan. +execute mt_q1(0); +execute mt_q1(0); +execute mt_q1(0); +execute mt_q1(0); +execute mt_q1(0); + +explain (analyze, costs off, summary off, timing off) execute mt_q1(15); +execute mt_q1(15); +explain (analyze, costs off, summary off, timing off) execute mt_q1(25); +execute mt_q1(25); +-- Ensure MergeAppend behaves correctly when no subplans match +explain (analyze, costs off, summary off, timing off) execute mt_q1(35); +execute mt_q1(35); + +deallocate mt_q1; + +-- ensure initplan params properly prune partitions +explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(a) from ma_test_p2) order by a; + +reset enable_seqscan; +reset enable_sort; + +drop table ma_test; -- 2.16.2.windows.1