From a56cb787bb302d8bc1a2f53bf1e82bc1cb37fb01 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sun, 28 Jul 2024 16:36:34 +0300 Subject: [PATCH v29 2/2] Teach bitmap scan about transforming OR clauses to ANY expression Now, (expr op C1) OR (expr op C2) ... are transformet to expr op ANY(ARRAY[C1, C2, ...]) during matching a clause to index. This commit teaches bitmap scan planning to take advantage of this transformation. The similar clauses are grouped together before considering bitmap-OR. Groups of similar clauses are matched to indexes both together and one-by-one. --- src/backend/optimizer/path/indxpath.c | 266 ++++++++++++++++++++- src/test/regress/expected/create_index.out | 28 +-- src/test/regress/expected/join.out | 56 +++-- src/tools/pgindent/typedefs.list | 1 + 4 files changed, 304 insertions(+), 47 deletions(-) diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 4fd0bbce2ce..f697b95db39 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -1177,6 +1177,248 @@ build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel, return result; } +typedef struct +{ + int indexnum; + int colnum; + Oid opno; + int argindex; +} OrArgIndexMatch; + +static int +or_arg_index_match_cmp(const void *a, const void *b) +{ + const OrArgIndexMatch *match_a = (const OrArgIndexMatch *) a; + const OrArgIndexMatch *match_b = (const OrArgIndexMatch *) b; + + if (match_a->indexnum < match_b->indexnum) + return -1; + else if (match_a->indexnum > match_b->indexnum) + return 1; + + if (match_a->colnum < match_b->colnum) + return -1; + else if (match_a->colnum > match_b->colnum) + return 1; + + if (match_a->opno < match_b->opno) + return -1; + else if (match_a->opno > match_b->opno) + return 1; + + if (match_a->argindex < match_b->argindex) + return -1; + else if (match_a->argindex > match_b->argindex) + return 1; + + return 0; +} + +/* + * Group clauses "col op const" belonging to the single index clause into + * dedicated ORs. + */ +static List * +group_or_args(PlannerInfo *root, RelOptInfo *rel, + RestrictInfo *rinfo, List *orargs) +{ + int n = list_length(orargs); + int i; + int group_start; + OrArgIndexMatch *matches; + ListCell *lc; + ListCell *lc2; + List *result = NIL; + + /* + * Find corresponding index column for each clause in target form. + */ + i = -1; + matches = (OrArgIndexMatch *) palloc(sizeof(OrArgIndexMatch) * n); + foreach(lc, orargs) + { + Node *arg = lfirst(lc); + RestrictInfo *argrinfo; + OpExpr *clause; + Oid opno; + Node *leftop, + *rightop; + Node *nconst_expr; + int indexnum; + int colnum; + + i++; + matches[i].argindex = i; + matches[i].indexnum = -1; + matches[i].colnum = -1; + matches[i].opno = InvalidOid; + + if (!IsA(arg, RestrictInfo)) + continue; + + argrinfo = castNode(RestrictInfo, arg); + + if (!IsA(argrinfo->clause, OpExpr)) + continue; + + clause = (OpExpr *) argrinfo->clause; + opno = clause->opno; + + leftop = get_leftop(clause); + if (IsA(leftop, RelabelType)) + leftop = (Node *) ((RelabelType *) leftop)->arg; + + rightop = get_rightop(clause); + if (IsA(rightop, RelabelType)) + rightop = (Node *) ((RelabelType *) rightop)->arg; + + if (IsA(leftop, Const) || IsA(leftop, Param)) + { + opno = get_commutator(opno); + + if (!OidIsValid(opno)) + { + /* commutator doesn't exist, we can't reverse the order */ + continue; + } + + nconst_expr = rightop; + } + else if (IsA(rightop, Const) || IsA(rightop, Param)) + { + nconst_expr = leftop; + } + else + { + continue; + } + + indexnum = 0; + foreach(lc2, rel->indexlist) + { + IndexOptInfo *index = (IndexOptInfo *) lfirst(lc2); + + /* Ignore index if it doesn't support bitmap scans */ + if (!index->amhasgetbitmap) + continue; + + for (colnum = 0; colnum < index->nkeycolumns; colnum++) + { + if (match_index_to_operand(nconst_expr, colnum, index)) + { + matches[i].indexnum = indexnum; + matches[i].colnum = colnum; + matches[i].opno = opno; + } + } + indexnum++; + } + } + + /* Sort clauses to make similar clauses go together */ + pg_qsort(matches, n, sizeof(OrArgIndexMatch), or_arg_index_match_cmp); + + /* Group similar clauses */ + group_start = 0; + for (i = 1; i <= n; i++) + { + if (group_start >= 0 && + (i == n || + matches[i].indexnum != matches[group_start].indexnum || + matches[i].colnum != matches[group_start].colnum || + matches[i].opno != matches[group_start].opno || + matches[i].indexnum == -1)) + { + if (i - group_start == 1) + { + result = lappend(result, list_nth(orargs, matches[group_start].argindex)); + } + else + { + List *args = NIL; + RestrictInfo *subrinfo = makeNode(RestrictInfo); + int j; + + for (j = group_start; j < i; j++) + args = lappend(args, list_nth(orargs, matches[j].argindex)); + + *subrinfo = *rinfo; + subrinfo->clause = make_orclause(args); + subrinfo->orclause = subrinfo->clause; + result = lappend(result, subrinfo); + } + + group_start = i; + } + } + return result; +} + +/* + * Generate bitmap paths for group produced by group_or_args() + */ +static List * +make_bitmap_paths_for_or_group(PlannerInfo *root, RelOptInfo *rel, + RestrictInfo *ri, List *other_clauses) +{ + List *jointlist = NIL; + List *splitlist = NIL; + ListCell *lc; + List *orargs; + List *args = ((BoolExpr *) ri->clause)->args; + Cost jointcost = 0.0, + splitcost = 0.0; + Path *bitmapqual; + List *indlist; + + /* + * First, try to match the whole group to the one index. + */ + orargs = list_make1(ri); + indlist = build_paths_for_OR(root, rel, + orargs, + other_clauses); + if (indlist != NIL) + { + bitmapqual = choose_bitmap_and(root, rel, indlist); + jointcost = bitmapqual->total_cost; + jointlist = list_make1(bitmapqual); + } + + /* + * Also try to match all containing clauses one-by-one. + */ + foreach(lc, args) + { + orargs = list_make1(lfirst(lc)); + + indlist = build_paths_for_OR(root, rel, + orargs, + other_clauses); + + if (indlist == NIL) + { + splitlist = NIL; + break; + } + + bitmapqual = choose_bitmap_and(root, rel, indlist); + splitcost += bitmapqual->total_cost; + splitlist = lappend(splitlist, bitmapqual); + } + + /* + * Pick the best option. + */ + if (splitlist == NIL) + return jointlist; + else if (jointlist == NIL) + return splitlist; + else + return (jointcost < splitcost) ? jointlist : splitlist; +} + + /* * generate_bitmap_or_paths * Look through the list of clauses to find OR clauses, and generate @@ -1207,6 +1449,7 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, List *pathlist; Path *bitmapqual; ListCell *j; + List *grouped; /* Ignore RestrictInfos that aren't ORs */ if (!restriction_is_or_clause(rinfo)) @@ -1217,7 +1460,8 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, * the OR, else we can't use it. */ pathlist = NIL; - foreach(j, ((BoolExpr *) rinfo->orclause)->args) + grouped = group_or_args(root, rel, rinfo, ((BoolExpr *) rinfo->orclause)->args); + foreach(j, grouped) { Node *orarg = (Node *) lfirst(j); List *indlist; @@ -1237,12 +1481,28 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, andargs, all_clauses)); } + else if (restriction_is_or_clause(castNode(RestrictInfo, orarg))) + { + RestrictInfo *ri = castNode(RestrictInfo, orarg); + + indlist = make_bitmap_paths_for_or_group(root, rel, ri, list_delete(all_clauses, rinfo)); + + if (indlist == NIL) + { + pathlist = NIL; + break; + } + else + { + pathlist = list_concat(pathlist, indlist); + continue; + } + } else { RestrictInfo *ri = castNode(RestrictInfo, orarg); List *orargs; - Assert(!restriction_is_or_clause(ri)); orargs = list_make1(ri); indlist = build_paths_for_OR(root, rel, @@ -2878,7 +3138,7 @@ match_orclause_to_indexcol(PlannerInfo *root, if (!OidIsValid(opno)) { /* commutator doesn't exist, we can't reverse the order */ - return false; + return NULL; } nconst_expr = rightop; diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index c2b25936c8c..c6feef03810 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -1982,25 +1982,24 @@ SELECT count(*) FROM tenk1 EXPLAIN (COSTS OFF) SELECT count(*) FROM tenk1 WHERE hundred = 42 AND (thousand = 42 OR thousand = 99 OR tenthous < 2) OR thousand = 41; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------- Aggregate -> Bitmap Heap Scan on tenk1 - Recheck Cond: (((hundred = 42) AND ((thousand = 42) OR (thousand = 99) OR (tenthous < 2))) OR (thousand = 41)) + Recheck Cond: (((hundred = 42) AND ((thousand = ANY ('{42,99}'::integer[])) OR (tenthous < 2))) OR (thousand = 41)) + Filter: (((hundred = 42) AND ((thousand = 42) OR (thousand = 99) OR (tenthous < 2))) OR (thousand = 41)) -> BitmapOr -> BitmapAnd -> Bitmap Index Scan on tenk1_hundred Index Cond: (hundred = 42) -> BitmapOr -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: (thousand = 42) - -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: (thousand = 99) + Index Cond: (thousand = ANY ('{42,99}'::integer[])) -> Bitmap Index Scan on tenk1_thous_tenthous Index Cond: (tenthous < 2) -> Bitmap Index Scan on tenk1_thous_tenthous Index Cond: (thousand = 41) -(16 rows) +(15 rows) SELECT count(*) FROM tenk1 WHERE hundred = 42 AND (thousand = 42 OR thousand = 99 OR tenthous < 2) OR thousand = 41; @@ -2012,22 +2011,21 @@ SELECT count(*) FROM tenk1 EXPLAIN (COSTS OFF) SELECT count(*) FROM tenk1 WHERE hundred = 42 AND (thousand = 42 OR thousand = 41 OR thousand = 99 AND tenthous = 2); - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------ Aggregate -> Bitmap Heap Scan on tenk1 - Recheck Cond: ((hundred = 42) AND ((thousand = 42) OR (thousand = 41) OR ((thousand = 99) AND (tenthous = 2)))) + Recheck Cond: ((hundred = 42) AND (((thousand = 99) AND (tenthous = 2)) OR (thousand = ANY ('{42,41}'::integer[])))) + Filter: ((thousand = 42) OR (thousand = 41) OR ((thousand = 99) AND (tenthous = 2))) -> BitmapAnd -> Bitmap Index Scan on tenk1_hundred Index Cond: (hundred = 42) -> BitmapOr - -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: (thousand = 42) - -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: (thousand = 41) -> Bitmap Index Scan on tenk1_thous_tenthous Index Cond: ((thousand = 99) AND (tenthous = 2)) -(13 rows) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = ANY ('{42,41}'::integer[])) +(12 rows) SELECT count(*) FROM tenk1 WHERE hundred = 42 AND (thousand = 42 OR thousand = 41 OR thousand = 99 AND tenthous = 2); diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index abe98ff3c53..d26a93831d5 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -4226,20 +4226,20 @@ select * from tenk1 a join tenk1 b on Nested Loop Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR ((a.unique2 = 3) AND (b.hundred = 4))) -> Bitmap Heap Scan on tenk1 b - Recheck Cond: ((unique1 = 2) OR (hundred = 4)) + Recheck Cond: ((hundred = 4) OR (unique1 = 2)) -> BitmapOr - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 = 2) -> Bitmap Index Scan on tenk1_hundred Index Cond: (hundred = 4) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 2) -> Materialize -> Bitmap Heap Scan on tenk1 a - Recheck Cond: ((unique1 = 1) OR (unique2 = 3)) + Recheck Cond: ((unique2 = 3) OR (unique1 = 1)) -> BitmapOr - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 = 1) -> Bitmap Index Scan on tenk1_unique2 Index Cond: (unique2 = 3) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 1) (17 rows) explain (costs off) @@ -4253,12 +4253,12 @@ select * from tenk1 a join tenk1 b on Filter: ((unique1 = 2) OR (ten = 4)) -> Materialize -> Bitmap Heap Scan on tenk1 a - Recheck Cond: ((unique1 = 1) OR (unique2 = 3)) + Recheck Cond: ((unique2 = 3) OR (unique1 = 1)) -> BitmapOr - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 = 1) -> Bitmap Index Scan on tenk1_unique2 Index Cond: (unique2 = 3) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 1) (12 rows) explain (costs off) @@ -4270,21 +4270,21 @@ select * from tenk1 a join tenk1 b on Nested Loop Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4))) -> Bitmap Heap Scan on tenk1 b - Recheck Cond: ((unique1 = 2) OR (hundred = 4)) + Recheck Cond: ((hundred = 4) OR (unique1 = 2)) -> BitmapOr - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 = 2) -> Bitmap Index Scan on tenk1_hundred Index Cond: (hundred = 4) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 2) -> Materialize -> Bitmap Heap Scan on tenk1 a - Recheck Cond: ((unique1 = 1) OR (unique2 = ANY ('{3,7}'::integer[]))) + Recheck Cond: ((unique2 = ANY ('{3,7}'::integer[])) OR (unique1 = 1)) Filter: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7)) -> BitmapOr - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 = 1) -> Bitmap Index Scan on tenk1_unique2 Index Cond: (unique2 = ANY ('{3,7}'::integer[])) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 1) (18 rows) explain (costs off) @@ -4296,21 +4296,21 @@ select * from tenk1 a join tenk1 b on Nested Loop Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4))) -> Bitmap Heap Scan on tenk1 b - Recheck Cond: ((unique1 = 2) OR (hundred = 4)) + Recheck Cond: ((hundred = 4) OR (unique1 = 2)) -> BitmapOr - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 = 2) -> Bitmap Index Scan on tenk1_hundred Index Cond: (hundred = 4) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 2) -> Materialize -> Bitmap Heap Scan on tenk1 a - Recheck Cond: ((unique1 = 1) OR (unique2 = ANY ('{3,7}'::integer[]))) + Recheck Cond: ((unique2 = ANY ('{3,7}'::integer[])) OR (unique1 = 1)) Filter: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7)) -> BitmapOr - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 = 1) -> Bitmap Index Scan on tenk1_unique2 Index Cond: (unique2 = ANY ('{3,7}'::integer[])) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 1) (18 rows) explain (costs off) @@ -4324,18 +4324,16 @@ select * from tenk1 a join tenk1 b on -> Seq Scan on tenk1 b -> Materialize -> Bitmap Heap Scan on tenk1 a - Recheck Cond: ((unique1 < 20) OR (unique1 = 3) OR (unique1 = 1) OR (unique2 = ANY ('{3,7}'::integer[]))) + Recheck Cond: ((unique2 = ANY ('{3,7}'::integer[])) OR (unique1 = ANY ('{3,1}'::integer[])) OR (unique1 < 20)) Filter: ((unique1 < 20) OR (unique1 = 3) OR (unique1 = 1) OR (unique2 = 3) OR (unique2 = 7)) -> BitmapOr - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 < 20) - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 = 3) - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 = 1) -> Bitmap Index Scan on tenk1_unique2 Index Cond: (unique2 = ANY ('{3,7}'::integer[])) -(16 rows) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = ANY ('{3,1}'::integer[])) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 < 20) +(14 rows) -- -- test placement of movable quals in a parameterized join tree diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 992c80f9350..ba66983cf45 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1764,6 +1764,7 @@ OprCacheKey OprInfo OprProofCacheEntry OprProofCacheKey +OrArgIndexMatch OuterJoinClauseInfo OutputPluginCallbacks OutputPluginOptions -- 2.39.3 (Apple Git-145)