From ab1549156e5231c8d9d303a0570eb6358d159371 Mon Sep 17 00:00:00 2001 From: "dgrowley@gmail.com" Date: Fri, 6 Apr 2018 00:36:33 +1200 Subject: [PATCH v20 4/5] Allow MergeAppend's subnodes to be pruned during execution Already supported for Append nodes, this commit allows partition pruning to occur in MergeAppend using values which are only known during execution. --- src/backend/executor/nodeMergeAppend.c | 136 ++++++++++++++++++++---- src/backend/nodes/copyfuncs.c | 1 + src/backend/nodes/outfuncs.c | 2 + src/backend/nodes/readfuncs.c | 1 + src/backend/optimizer/plan/createplan.c | 34 ++++++ src/include/nodes/execnodes.h | 9 ++ src/include/nodes/plannodes.h | 5 + src/test/regress/expected/partition_prune.out | 145 ++++++++++++++++++++++++++ src/test/regress/sql/partition_prune.sql | 41 ++++++++ 9 files changed, 353 insertions(+), 21 deletions(-) diff --git a/src/backend/executor/nodeMergeAppend.c b/src/backend/executor/nodeMergeAppend.c index 118f4ef07d..b44a38f53a 100644 --- a/src/backend/executor/nodeMergeAppend.c +++ b/src/backend/executor/nodeMergeAppend.c @@ -39,6 +39,7 @@ #include "postgres.h" #include "executor/execdebug.h" +#include "executor/execPartition.h" #include "executor/nodeMergeAppend.h" #include "lib/binaryheap.h" #include "miscadmin.h" @@ -65,8 +66,10 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags) { MergeAppendState *mergestate = makeNode(MergeAppendState); PlanState **mergeplanstates; + Bitmapset *validsubplans; int nplans; - int i; + int i, + j; ListCell *lc; /* check for unsupported flags */ @@ -78,19 +81,89 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags) */ ExecLockNonLeafAppendTables(node->partitioned_rels, estate); - /* - * Set up empty vector of subplan states - */ - nplans = list_length(node->mergeplans); - - mergeplanstates = (PlanState **) palloc0(nplans * sizeof(PlanState *)); - /* * create new MergeAppendState for our node */ mergestate->ps.plan = (Plan *) node; mergestate->ps.state = estate; mergestate->ps.ExecProcNode = ExecMergeAppend; + mergestate->ms_noopscan = false; + + /* If run-time partition pruning is enabled, then setup that up now */ + if (node->part_prune_infos != NIL) + { + PartitionPruning *partprune; + + ExecAssignExprContext(estate, &mergestate->ps); + + partprune = ExecSetupPartitionPruning(&mergestate->ps, + node->part_prune_infos); + + /* + * When there are external params matching the partition key we may be + * able to prune away MergeAppend subplans now. + */ + if (!bms_is_empty(partprune->extparams)) + { + /* Determine which subplans match the external params */ + validsubplans = ExecFindInitialMatchingSubPlans(partprune, + list_length(node->mergeplans)); + + /* + * If no subplans match the given parameters then we must handle + * this case in a special way. The problem here is that code in + * explain.c requires a MergeAppend to have at least one subplan + * in order for it to properly determine the Vars in that + * subplan's targetlist. We sidestep this issue by just + * initializing the first subplan, but we set a noop flag so that + * we never actually bother scanning it. + */ + if (bms_is_empty(validsubplans)) + { + mergestate->ms_noopscan = true; + + /* Mark the first as valid so that it's initialized below */ + validsubplans = bms_make_singleton(0); + } + + nplans = bms_num_members(validsubplans); + } + else + { + /* We'll need to initialize all subplans */ + nplans = list_length(node->mergeplans); + validsubplans = bms_add_range(NULL, 0, nplans - 1); + } + + /* + * If there are no exec params then no further pruning can be done, we + * can just set the valid subplans to all remaining subplans. + * Otherwise we set the valid subplans to NULL so that they can be + * determined during actual execution. + */ + if (bms_is_empty(partprune->execparams)) + mergestate->ms_valid_subplans = bms_add_range(NULL, 0, nplans - 1); + else + mergestate->ms_valid_subplans = NULL; + + + mergestate->partition_pruning = partprune; + + } + else + { + nplans = list_length(node->mergeplans); + + /* + * When run-time partition pruning is not enabled we can just mark all + * subplans as valid. + */ + mergestate->ms_valid_subplans = validsubplans = + bms_add_range(NULL, 0, nplans - 1); + mergestate->partition_pruning = NULL; + } + + mergeplanstates = (PlanState **) palloc(nplans * sizeof(PlanState *)); mergestate->mergeplans = mergeplanstates; mergestate->ms_nplans = nplans; @@ -101,26 +174,24 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags) /* * Miscellaneous initialization * - * MergeAppend plans don't have expression contexts because they never - * call ExecQual or ExecProject. - */ - - /* * MergeAppend nodes do have Result slots, which hold pointers to tuples, * so we have to initialize them. */ ExecInitResultTupleSlotTL(estate, &mergestate->ps); /* - * call ExecInitNode on each of the plans to be executed and save the - * results into the array "mergeplans". + * call ExecInitNode on each of the valid plans to be executed and save + * the results into the mergeplanstates array. */ - i = 0; + j = i = 0; foreach(lc, node->mergeplans) { - Plan *initNode = (Plan *) lfirst(lc); + if (bms_is_member(i, validsubplans)) + { + Plan *initNode = (Plan *) lfirst(lc); - mergeplanstates[i] = ExecInitNode(initNode, estate, eflags); + mergeplanstates[j++] = ExecInitNode(initNode, estate, eflags); + } i++; } @@ -178,11 +249,21 @@ ExecMergeAppend(PlanState *pstate) if (!node->ms_initialized) { + /* Handle the case for when all subplans were pruned */ + if (node->ms_noopscan) + return ExecClearTuple(node->ps.ps_ResultTupleSlot); + + /* Determine minimum set of matching partitions, if not already set */ + if (node->ms_valid_subplans == NULL) + node->ms_valid_subplans = + ExecFindMatchingSubPlans(node->partition_pruning); + /* - * First time through: pull the first tuple from each subplan, and set - * up the heap. + * First time through: pull the first tuple from each valid subplan, + * and set up the heap. */ - for (i = 0; i < node->ms_nplans; i++) + i = -1; + while ((i = bms_next_member(node->ms_valid_subplans, i)) >= 0) { node->ms_slots[i] = ExecProcNode(node->mergeplans[i]); if (!TupIsNull(node->ms_slots[i])) @@ -295,6 +376,19 @@ ExecReScanMergeAppend(MergeAppendState *node) { int i; + /* + * If any of the parameters being used for partition pruning have changed, + * then we'd better unset the valid subplans so that they are reselected + * for the new parameter values. + */ + if (node->partition_pruning && + bms_overlap(node->ps.chgParam, + node->partition_pruning->execparams)) + { + bms_free(node->ms_valid_subplans); + node->ms_valid_subplans = NULL; + } + for (i = 0; i < node->ms_nplans; i++) { PlanState *subnode = node->mergeplans[i]; diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index ee4c70aef8..739a023965 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -276,6 +276,7 @@ _copyMergeAppend(const MergeAppend *from) COPY_POINTER_FIELD(sortOperators, from->numCols * sizeof(Oid)); COPY_POINTER_FIELD(collations, from->numCols * sizeof(Oid)); COPY_POINTER_FIELD(nullsFirst, from->numCols * sizeof(bool)); + COPY_NODE_FIELD(part_prune_infos); return newnode; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index e22ad4d4dd..e31b6a9c33 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -450,6 +450,8 @@ _outMergeAppend(StringInfo str, const MergeAppend *node) appendStringInfoString(str, " :nullsFirst"); for (i = 0; i < node->numCols; i++) appendStringInfo(str, " %s", booltostr(node->nullsFirst[i])); + + WRITE_NODE_FIELD(part_prune_infos); } static void diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 2e22da6c4f..5bf3d28c51 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1697,6 +1697,7 @@ _readMergeAppend(void) READ_OID_ARRAY(sortOperators, local_node->numCols); READ_OID_ARRAY(collations, local_node->numCols); READ_BOOL_ARRAY(nullsFirst, local_node->numCols); + READ_NODE_FIELD(part_prune_infos); READ_DONE(); } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 7560be9522..4e54fe6d25 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -1144,6 +1144,8 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) List *pathkeys = best_path->path.pathkeys; List *subplans = NIL; ListCell *subpaths; + RelOptInfo *rel = best_path->path.parent; + List *partpruneinfos = NIL; /* * We don't have the actual creation of the MergeAppend node split out @@ -1229,8 +1231,40 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) subplans = lappend(subplans, subplan); } + if (rel->reloptkind == RELOPT_BASEREL && + best_path->partitioned_rels != NIL) + { + List *prunequal; + + prunequal = extract_actual_clauses(rel->baserestrictinfo, false); + + if (best_path->path.param_info) + { + + List *prmquals = best_path->path.param_info->ppi_clauses; + + prmquals = extract_actual_clauses(prmquals, false); + prmquals = (List *) replace_nestloop_params(root, + (Node *) prmquals); + + prunequal = list_concat(prunequal, prmquals); + } + + /* + * If any quals exist, then these may be useful to allow us to perform + * further partition pruning during execution. We'll generate a + * PartitionPruneInfo for each partitioned rel to store these quals + * and allow translation of partition indexes into subpath indexes. + */ + if (prunequal != NIL) + partpruneinfos = make_partition_pruneinfo(root, + best_path->partitioned_rels, NIL, + best_path->subpaths, prunequal); + } + node->partitioned_rels = best_path->partitioned_rels; node->mergeplans = subplans; + node->part_prune_infos = partpruneinfos; return (Plan *) node; } diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index b63c0c5329..1986abaa9c 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1155,6 +1155,12 @@ struct AppendState * slots current output tuple of each subplan * heap heap of active tuples * initialized true if we have fetched first tuple from each subplan + * noopscan true if partition pruning proved that none of the + * mergeplans can contain a record to satisfy this query. + * partition_pruning details required to allow partitions to be + * eliminated from the scan, or NULL if not possible. + * ms_valid_subplans for runtime pruning, valid mergeplans indexes to + * scan. * ---------------- */ typedef struct MergeAppendState @@ -1167,6 +1173,9 @@ typedef struct MergeAppendState TupleTableSlot **ms_slots; /* array of length ms_nplans */ struct binaryheap *ms_heap; /* binary heap of slot indices */ bool ms_initialized; /* are subplans started? */ + bool ms_noopscan; /* true if no subplans need scanned */ + struct PartitionPruning *partition_pruning; + Bitmapset *ms_valid_subplans; } MergeAppendState; /* ---------------- diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index c3e5c2c79f..a7dbd31466 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -280,6 +280,11 @@ typedef struct MergeAppend Oid *sortOperators; /* OIDs of operators to sort them by */ Oid *collations; /* OIDs of collations */ bool *nullsFirst; /* NULLS FIRST/LAST directions */ + + /* + * Mapping details for run-time subplan pruning, one per partitioned_rels + */ + List *part_prune_infos; } MergeAppend; /* ---------------- diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index 0a2517125c..792924fe0b 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -2651,3 +2651,148 @@ select * from boolp where a = (select value from boolvalues where not value); (9 rows) drop table boolp; +-- +-- Test run-time pruning of MergeAppend subnodes +-- +set enable_seqscan = off; +set enable_sort = off; +create table ma_test (a int) partition by range (a); +create table ma_test_p1 partition of ma_test for values from (0) to (10); +create table ma_test_p2 partition of ma_test for values from (10) to (20); +create table ma_test_p3 partition of ma_test for values from (20) to (30); +insert into ma_test select x from generate_series(0,29) t(x); +create index on ma_test (a); +analyze ma_test; +prepare mt_q1 (int) as select * from ma_test where a >= $1 and a % 10 = 5 order by a; +-- Execute query 5 times to allow choose_custom_plan +-- to start considering a generic plan. +execute mt_q1(0); + a +---- + 5 + 15 + 25 +(3 rows) + +execute mt_q1(0); + a +---- + 5 + 15 + 25 +(3 rows) + +execute mt_q1(0); + a +---- + 5 + 15 + 25 +(3 rows) + +execute mt_q1(0); + a +---- + 5 + 15 + 25 +(3 rows) + +execute mt_q1(0); + a +---- + 5 + 15 + 25 +(3 rows) + +explain (analyze, costs off, summary off, timing off) execute mt_q1(15); + QUERY PLAN +------------------------------------------------------------------------------------ + Merge Append (actual rows=2 loops=1) + Sort Key: ma_test_p2.a + Subplans Pruned: 1 + -> Index Only Scan using ma_test_p2_a_idx on ma_test_p2 (actual rows=1 loops=1) + Index Cond: (a >= $1) + Filter: ((a % 10) = 5) + Rows Removed by Filter: 4 + Heap Fetches: 5 + -> Index Only Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=1 loops=1) + Index Cond: (a >= $1) + Filter: ((a % 10) = 5) + Rows Removed by Filter: 9 + Heap Fetches: 10 +(13 rows) + +execute mt_q1(15); + a +---- + 15 + 25 +(2 rows) + +explain (analyze, costs off, summary off, timing off) execute mt_q1(25); + QUERY PLAN +------------------------------------------------------------------------------------ + Merge Append (actual rows=1 loops=1) + Sort Key: ma_test_p3.a + Subplans Pruned: 2 + -> Index Only Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=1 loops=1) + Index Cond: (a >= $1) + Filter: ((a % 10) = 5) + Rows Removed by Filter: 4 + Heap Fetches: 5 +(8 rows) + +execute mt_q1(25); + a +---- + 25 +(1 row) + +-- Ensure MergeAppend behaves correctly when no subplans match +explain (analyze, costs off, summary off, timing off) execute mt_q1(35); + QUERY PLAN +----------------------------------------------------------------------------- + Merge Append (actual rows=0 loops=1) + Sort Key: ma_test_p1.a + Subplans Pruned: 2 + -> Index Only Scan using ma_test_p1_a_idx on ma_test_p1 (never executed) + Index Cond: (a >= $1) + Filter: ((a % 10) = 5) + Heap Fetches: 0 +(7 rows) + +execute mt_q1(35); + a +--- +(0 rows) + +deallocate mt_q1; +-- ensure initplan params properly prune partitions +explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(a) from ma_test_p2) order by a; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------- + Merge Append (actual rows=20 loops=1) + Sort Key: ma_test_p1.a + InitPlan 2 (returns $1) + -> Result (actual rows=1 loops=1) + InitPlan 1 (returns $0) + -> Limit (actual rows=1 loops=1) + -> Index Only Scan using ma_test_p2_a_idx on ma_test_p2 ma_test_p2_1 (actual rows=1 loops=1) + Index Cond: (a IS NOT NULL) + Heap Fetches: 1 + -> Index Only Scan using ma_test_p1_a_idx on ma_test_p1 (never executed) + Index Cond: (a >= $1) + Heap Fetches: 0 + -> Index Only Scan using ma_test_p2_a_idx on ma_test_p2 (actual rows=10 loops=1) + Index Cond: (a >= $1) + Heap Fetches: 10 + -> Index Only Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=10 loops=1) + Index Cond: (a >= $1) + Heap Fetches: 10 +(18 rows) + +reset enable_seqscan; +reset enable_sort; +drop table ma_test; diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql index eefbf32e93..6f389bed1f 100644 --- a/src/test/regress/sql/partition_prune.sql +++ b/src/test/regress/sql/partition_prune.sql @@ -618,3 +618,44 @@ explain (analyze, costs off, summary off, timing off) select * from boolp where a = (select value from boolvalues where not value); drop table boolp; + +-- +-- Test run-time pruning of MergeAppend subnodes +-- +set enable_seqscan = off; +set enable_sort = off; +create table ma_test (a int) partition by range (a); +create table ma_test_p1 partition of ma_test for values from (0) to (10); +create table ma_test_p2 partition of ma_test for values from (10) to (20); +create table ma_test_p3 partition of ma_test for values from (20) to (30); +insert into ma_test select x from generate_series(0,29) t(x); +create index on ma_test (a); + +analyze ma_test; +prepare mt_q1 (int) as select * from ma_test where a >= $1 and a % 10 = 5 order by a; + +-- Execute query 5 times to allow choose_custom_plan +-- to start considering a generic plan. +execute mt_q1(0); +execute mt_q1(0); +execute mt_q1(0); +execute mt_q1(0); +execute mt_q1(0); + +explain (analyze, costs off, summary off, timing off) execute mt_q1(15); +execute mt_q1(15); +explain (analyze, costs off, summary off, timing off) execute mt_q1(25); +execute mt_q1(25); +-- Ensure MergeAppend behaves correctly when no subplans match +explain (analyze, costs off, summary off, timing off) execute mt_q1(35); +execute mt_q1(35); + +deallocate mt_q1; + +-- ensure initplan params properly prune partitions +explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(a) from ma_test_p2) order by a; + +reset enable_seqscan; +reset enable_sort; + +drop table ma_test; -- 2.16.2.windows.1