Re: Case expression pushdown - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Re: Case expression pushdown |
Date | |
Msg-id | 915605.1627592067@sss.pgh.pa.us Whole thread Raw |
In response to | Re: Case expression pushdown (Alexander Pyhalov <a.pyhalov@postgrespro.ru>) |
Responses |
Re: Case expression pushdown
|
List | pgsql-hackers |
Alexander Pyhalov <a.pyhalov@postgrespro.ru> writes: > [ 0001-Allow-pushing-CASE-expression-to-foreign-server-v7.patch ] I looked this over. It's better than before, but the collation handling is still not at all correct. We have to consider that a CASE's arg expression supplies the collation for a contained CaseTestExpr, otherwise we'll come to the wrong conclusions about whether "CASE foreignvar WHEN ..." is shippable, if the foreignvar is what's determining collation of the comparisons. This means that the CaseExpr level of recursion has to pass data down to the CaseTestExpr level. In the attached, I did that by adding an additional argument to foreign_expr_walker(). That's a bit invasive, but it's not awful. I thought about instead adding fields to the foreign_loc_cxt struct. But that seemed considerably messier in the end, because we'd then have some fields that are information sourced at one recursion level and some that are info sourced at another level. I also whacked the regression test cases around a lot. They seemed to spend a lot of time on irrelevant combinations, while failing to check the things that matter, namely whether collation-based pushdown decisions are made correctly. regards, tom lane diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c index 31919fda8c..d0cbfa8ad9 100644 --- a/contrib/postgres_fdw/deparse.c +++ b/contrib/postgres_fdw/deparse.c @@ -116,7 +116,8 @@ typedef struct deparse_expr_cxt */ static bool foreign_expr_walker(Node *node, foreign_glob_cxt *glob_cxt, - foreign_loc_cxt *outer_cxt); + foreign_loc_cxt *outer_cxt, + foreign_loc_cxt *case_arg_cxt); static char *deparse_type_name(Oid type_oid, int32 typemod); /* @@ -158,6 +159,7 @@ static void deparseScalarArrayOpExpr(ScalarArrayOpExpr *node, static void deparseRelabelType(RelabelType *node, deparse_expr_cxt *context); static void deparseBoolExpr(BoolExpr *node, deparse_expr_cxt *context); static void deparseNullTest(NullTest *node, deparse_expr_cxt *context); +static void deparseCaseExpr(CaseExpr *node, deparse_expr_cxt *context); static void deparseArrayExpr(ArrayExpr *node, deparse_expr_cxt *context); static void printRemoteParam(int paramindex, Oid paramtype, int32 paramtypmod, deparse_expr_cxt *context); @@ -254,7 +256,7 @@ is_foreign_expr(PlannerInfo *root, glob_cxt.relids = baserel->relids; loc_cxt.collation = InvalidOid; loc_cxt.state = FDW_COLLATE_NONE; - if (!foreign_expr_walker((Node *) expr, &glob_cxt, &loc_cxt)) + if (!foreign_expr_walker((Node *) expr, &glob_cxt, &loc_cxt, NULL)) return false; /* @@ -283,6 +285,10 @@ is_foreign_expr(PlannerInfo *root, * * In addition, *outer_cxt is updated with collation information. * + * case_arg_cxt is NULL if this subexpression is not inside a CASE-with-arg. + * Otherwise, it points to the collation info derived from the arg expression, + * which must be consulted by any CaseTestExpr. + * * We must check that the expression contains only node types we can deparse, * that all types/functions/operators are safe to send (they are "shippable"), * and that all collations used in the expression derive from Vars of the @@ -294,7 +300,8 @@ is_foreign_expr(PlannerInfo *root, static bool foreign_expr_walker(Node *node, foreign_glob_cxt *glob_cxt, - foreign_loc_cxt *outer_cxt) + foreign_loc_cxt *outer_cxt, + foreign_loc_cxt *case_arg_cxt) { bool check_type = true; PgFdwRelationInfo *fpinfo; @@ -432,17 +439,17 @@ foreign_expr_walker(Node *node, * result, so do those first and reset inner_cxt afterwards. */ if (!foreign_expr_walker((Node *) sr->refupperindexpr, - glob_cxt, &inner_cxt)) + glob_cxt, &inner_cxt, case_arg_cxt)) return false; inner_cxt.collation = InvalidOid; inner_cxt.state = FDW_COLLATE_NONE; if (!foreign_expr_walker((Node *) sr->reflowerindexpr, - glob_cxt, &inner_cxt)) + glob_cxt, &inner_cxt, case_arg_cxt)) return false; inner_cxt.collation = InvalidOid; inner_cxt.state = FDW_COLLATE_NONE; if (!foreign_expr_walker((Node *) sr->refexpr, - glob_cxt, &inner_cxt)) + glob_cxt, &inner_cxt, case_arg_cxt)) return false; /* @@ -478,7 +485,7 @@ foreign_expr_walker(Node *node, * Recurse to input subexpressions. */ if (!foreign_expr_walker((Node *) fe->args, - glob_cxt, &inner_cxt)) + glob_cxt, &inner_cxt, case_arg_cxt)) return false; /* @@ -526,7 +533,7 @@ foreign_expr_walker(Node *node, * Recurse to input subexpressions. */ if (!foreign_expr_walker((Node *) oe->args, - glob_cxt, &inner_cxt)) + glob_cxt, &inner_cxt, case_arg_cxt)) return false; /* @@ -566,7 +573,7 @@ foreign_expr_walker(Node *node, * Recurse to input subexpressions. */ if (!foreign_expr_walker((Node *) oe->args, - glob_cxt, &inner_cxt)) + glob_cxt, &inner_cxt, case_arg_cxt)) return false; /* @@ -592,7 +599,7 @@ foreign_expr_walker(Node *node, * Recurse to input subexpression. */ if (!foreign_expr_walker((Node *) r->arg, - glob_cxt, &inner_cxt)) + glob_cxt, &inner_cxt, case_arg_cxt)) return false; /* @@ -619,7 +626,7 @@ foreign_expr_walker(Node *node, * Recurse to input subexpressions. */ if (!foreign_expr_walker((Node *) b->args, - glob_cxt, &inner_cxt)) + glob_cxt, &inner_cxt, case_arg_cxt)) return false; /* Output is always boolean and so noncollatable. */ @@ -635,7 +642,7 @@ foreign_expr_walker(Node *node, * Recurse to input subexpressions. */ if (!foreign_expr_walker((Node *) nt->arg, - glob_cxt, &inner_cxt)) + glob_cxt, &inner_cxt, case_arg_cxt)) return false; /* Output is always boolean and so noncollatable. */ @@ -643,6 +650,124 @@ foreign_expr_walker(Node *node, state = FDW_COLLATE_NONE; } break; + case T_CaseExpr: + { + CaseExpr *ce = (CaseExpr *) node; + foreign_loc_cxt arg_cxt; + foreign_loc_cxt tmp_cxt; + ListCell *lc; + + /* + * Recurse to CASE's arg expression, if any. Its collation + * has to be saved aside for use while examining CaseTestExprs + * within the WHEN expressions. + */ + arg_cxt.collation = InvalidOid; + arg_cxt.state = FDW_COLLATE_NONE; + if (ce->arg) + { + if (!foreign_expr_walker((Node *) ce->arg, + glob_cxt, &arg_cxt, case_arg_cxt)) + return false; + } + + /* Examine the CaseWhen subexpressions. */ + foreach(lc, ce->args) + { + CaseWhen *cw = lfirst_node(CaseWhen, lc); + Node *whenExpr = (Node *) cw->expr; + + if (ce->arg) + { + /* + * In a CASE-with-arg, the parser should have produced + * WHEN clauses of the form "CaseTestExpr = RHS", + * possibly with an implicit coercion inserted above + * the CaseTestExpr. However in an expression that's + * been through the optimizer, the WHEN clause could + * be almost anything (since the equality operator + * could have been expanded into an inline function). + * In such cases forbid pushdown, because + * deparseCaseExpr can't handle it. + */ + List *whenExprArgs; + + if (!IsA(whenExpr, OpExpr)) + return false; + + whenExprArgs = ((OpExpr *) whenExpr)->args; + if ((list_length(whenExprArgs) != 2) || + !IsA(strip_implicit_coercions(linitial(whenExprArgs)), CaseTestExpr)) + return false; + } + + /* + * Recurse to WHEN expression, passing down the arg info. + * Its collation doesn't affect the result (really, it + * should be boolean and thus not have a collation). + */ + tmp_cxt.collation = InvalidOid; + tmp_cxt.state = FDW_COLLATE_NONE; + if (!foreign_expr_walker((Node *) cw->expr, + glob_cxt, &tmp_cxt, &arg_cxt)) + return false; + + /* Recurse to THEN expression. */ + if (!foreign_expr_walker((Node *) cw->result, + glob_cxt, &inner_cxt, case_arg_cxt)) + return false; + } + + /* Recurse to ELSE expression. */ + if (!foreign_expr_walker((Node *) ce->defresult, + glob_cxt, &inner_cxt, case_arg_cxt)) + return false; + + /* + * Detect whether node is introducing a collation not derived + * from a foreign Var. (If so, we just mark it unsafe for now + * rather than immediately returning false, since the parent + * node might not care.) This is the same as for function + * nodes, except that the input collation is derived from only + * the THEN and ELSE subexpressions. + */ + collation = ce->casecollid; + if (collation == InvalidOid) + state = FDW_COLLATE_NONE; + else if (inner_cxt.state == FDW_COLLATE_SAFE && + collation == inner_cxt.collation) + state = FDW_COLLATE_SAFE; + else if (collation == DEFAULT_COLLATION_OID) + state = FDW_COLLATE_NONE; + else + state = FDW_COLLATE_UNSAFE; + } + break; + case T_CaseTestExpr: + { + CaseTestExpr *c = (CaseTestExpr *) node; + + /* Punt if we seem not to be inside a CASE arg WHEN. */ + if (!case_arg_cxt) + return false; + + /* + * Otherwise, any nondefault collation attached to the + * CaseTestExpr node must be derived from foreign Var(s) in + * the CASE arg. + */ + collation = c->collation; + if (collation == InvalidOid) + state = FDW_COLLATE_NONE; + else if (case_arg_cxt->state == FDW_COLLATE_SAFE && + collation == case_arg_cxt->collation) + state = FDW_COLLATE_SAFE; + else if (collation == DEFAULT_COLLATION_OID) + state = FDW_COLLATE_NONE; + else + state = FDW_COLLATE_UNSAFE; + } + break; case T_ArrayExpr: { ArrayExpr *a = (ArrayExpr *) node; @@ -651,7 +776,7 @@ foreign_expr_walker(Node *node, * Recurse to input subexpressions. */ if (!foreign_expr_walker((Node *) a->elements, - glob_cxt, &inner_cxt)) + glob_cxt, &inner_cxt, case_arg_cxt)) return false; /* @@ -681,7 +806,7 @@ foreign_expr_walker(Node *node, foreach(lc, l) { if (!foreign_expr_walker((Node *) lfirst(lc), - glob_cxt, &inner_cxt)) + glob_cxt, &inner_cxt, case_arg_cxt)) return false; } @@ -730,7 +855,8 @@ foreign_expr_walker(Node *node, n = (Node *) tle->expr; } - if (!foreign_expr_walker(n, glob_cxt, &inner_cxt)) + if (!foreign_expr_walker(n, + glob_cxt, &inner_cxt, case_arg_cxt)) return false; } @@ -765,7 +891,7 @@ foreign_expr_walker(Node *node, /* Check aggregate filter */ if (!foreign_expr_walker((Node *) agg->aggfilter, - glob_cxt, &inner_cxt)) + glob_cxt, &inner_cxt, case_arg_cxt)) return false; /* @@ -2456,6 +2582,9 @@ deparseExpr(Expr *node, deparse_expr_cxt *context) case T_NullTest: deparseNullTest((NullTest *) node, context); break; + case T_CaseExpr: + deparseCaseExpr((CaseExpr *) node, context); + break; case T_ArrayExpr: deparseArrayExpr((ArrayExpr *) node, context); break; @@ -3007,6 +3136,56 @@ deparseNullTest(NullTest *node, deparse_expr_cxt *context) } } +/* + * Deparse CASE expression + */ +static void +deparseCaseExpr(CaseExpr *node, deparse_expr_cxt *context) +{ + StringInfo buf = context->buf; + ListCell *lc; + + appendStringInfoString(buf, "(CASE"); + + /* If this is a CASE arg WHEN then emit the arg expression */ + if (node->arg != NULL) + { + appendStringInfoChar(buf, ' '); + deparseExpr(node->arg, context); + } + + /* Add each condition/result of the CASE clause */ + foreach(lc, node->args) + { + CaseWhen *whenclause = (CaseWhen *) lfirst(lc); + + /* WHEN */ + appendStringInfoString(buf, " WHEN "); + if (node->arg == NULL) /* CASE WHEN */ + deparseExpr(whenclause->expr, context); + else /* CASE arg WHEN */ + { + /* Ignore the CaseTestExpr and equality operator. */ + deparseExpr(lsecond(castNode(OpExpr, whenclause->expr)->args), + context); + } + + /* THEN */ + appendStringInfoString(buf, " THEN "); + deparseExpr(whenclause->result, context); + } + + /* add ELSE if present */ + if (node->defresult != NULL) + { + appendStringInfoString(buf, " ELSE "); + deparseExpr(node->defresult, context); + } + + /* append END */ + appendStringInfoString(buf, " END)"); +} + /* * Deparse ARRAY[...] construct. */ diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index ed25e7a743..d01b91efab 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -1067,6 +1067,96 @@ SELECT * FROM ft1 t1 WHERE t1.c1 === t1.c2 order by t1.c2 limit 1; 1 | 1 | 00001 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo (1 row) +-- Test CASE pushdown +EXPLAIN (VERBOSE, COSTS OFF) +SELECT c1,c2,c3 FROM ft2 WHERE CASE WHEN c1 > 990 THEN c1 END < 1000 ORDER BY c1; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan on public.ft2 + Output: c1, c2, c3 + Remote SQL: SELECT "C 1", c2, c3 FROM "S 1"."T 1" WHERE (((CASE WHEN ("C 1" > 990) THEN "C 1" ELSE NULL::integer END)< 1000)) ORDER BY "C 1" ASC NULLS LAST +(3 rows) + +SELECT c1,c2,c3 FROM ft2 WHERE CASE WHEN c1 > 990 THEN c1 END < 1000 ORDER BY c1; + c1 | c2 | c3 +-----+----+------- + 991 | 1 | 00991 + 992 | 2 | 00992 + 993 | 3 | 00993 + 994 | 4 | 00994 + 995 | 5 | 00995 + 996 | 6 | 00996 + 997 | 7 | 00997 + 998 | 8 | 00998 + 999 | 9 | 00999 +(9 rows) + +-- Nested CASE +EXPLAIN (VERBOSE, COSTS OFF) +SELECT c1,c2,c3 FROM ft2 WHERE CASE CASE WHEN c2 > 0 THEN c2 END WHEN 100 THEN 601 WHEN c2 THEN c2 ELSE 0 END > 600 ORDERBY c1; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan on public.ft2 + Output: c1, c2, c3 + Remote SQL: SELECT "C 1", c2, c3 FROM "S 1"."T 1" WHERE (((CASE (CASE WHEN (c2 > 0) THEN c2 ELSE NULL::integer END) WHEN100 THEN 601 WHEN c2 THEN c2 ELSE 0 END) > 600)) ORDER BY "C 1" ASC NULLS LAST +(3 rows) + +SELECT c1,c2,c3 FROM ft2 WHERE CASE CASE WHEN c2 > 0 THEN c2 END WHEN 100 THEN 601 WHEN c2 THEN c2 ELSE 0 END > 600 ORDERBY c1; + c1 | c2 | c3 +----+----+---- +(0 rows) + +-- CASE arg WHEN +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM ft1 WHERE c1 > (CASE mod(c1, 4) WHEN 0 THEN 1 WHEN 2 THEN 50 ELSE 100 END); + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan on public.ft1 + Output: c1, c2, c3, c4, c5, c6, c7, c8 + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" > (CASE mod("C 1", 4) WHEN 0 THEN1 WHEN 2 THEN 50 ELSE 100 END))) +(3 rows) + +-- CASE cannot be pushed down because of unshippable arg clause +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM ft1 WHERE c1 > (CASE random()::integer WHEN 0 THEN 1 WHEN 2 THEN 50 ELSE 100 END); + QUERY PLAN +----------------------------------------------------------------------------------------- + Foreign Scan on public.ft1 + Output: c1, c2, c3, c4, c5, c6, c7, c8 + Filter: (ft1.c1 > CASE (random())::integer WHEN 0 THEN 1 WHEN 2 THEN 50 ELSE 100 END) + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" +(4 rows) + +-- these are shippable +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM ft1 WHERE CASE c6 WHEN 'foo' THEN true ELSE c3 < 'bar' END; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan on public.ft1 + Output: c1, c2, c3, c4, c5, c6, c7, c8 + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((CASE c6 WHEN 'foo'::text THEN true ELSE(c3 < 'bar'::text) END)) +(3 rows) + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM ft1 WHERE CASE c3 WHEN c6 THEN true ELSE c3 < 'bar' END; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan on public.ft1 + Output: c1, c2, c3, c4, c5, c6, c7, c8 + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((CASE c3 WHEN c6 THEN true ELSE (c3 < 'bar'::text)END)) +(3 rows) + +-- but this is not because of collation +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM ft1 WHERE CASE c3 COLLATE "C" WHEN c6 THEN true ELSE c3 < 'bar' END; + QUERY PLAN +------------------------------------------------------------------------------------- + Foreign Scan on public.ft1 + Output: c1, c2, c3, c4, c5, c6, c7, c8 + Filter: CASE (ft1.c3)::text WHEN ft1.c6 THEN true ELSE (ft1.c3 < 'bar'::text) END + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" +(4 rows) + -- =================================================================== -- JOIN queries -- =================================================================== diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 02a6b15a13..af7ad15c8f 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -408,6 +408,35 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE t1.c1 === t1.c2 order by t1.c2 limit 1; SELECT * FROM ft1 t1 WHERE t1.c1 === t1.c2 order by t1.c2 limit 1; +-- Test CASE pushdown +EXPLAIN (VERBOSE, COSTS OFF) +SELECT c1,c2,c3 FROM ft2 WHERE CASE WHEN c1 > 990 THEN c1 END < 1000 ORDER BY c1; +SELECT c1,c2,c3 FROM ft2 WHERE CASE WHEN c1 > 990 THEN c1 END < 1000 ORDER BY c1; + +-- Nested CASE +EXPLAIN (VERBOSE, COSTS OFF) +SELECT c1,c2,c3 FROM ft2 WHERE CASE CASE WHEN c2 > 0 THEN c2 END WHEN 100 THEN 601 WHEN c2 THEN c2 ELSE 0 END > 600 ORDERBY c1; + +SELECT c1,c2,c3 FROM ft2 WHERE CASE CASE WHEN c2 > 0 THEN c2 END WHEN 100 THEN 601 WHEN c2 THEN c2 ELSE 0 END > 600 ORDERBY c1; + +-- CASE arg WHEN +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM ft1 WHERE c1 > (CASE mod(c1, 4) WHEN 0 THEN 1 WHEN 2 THEN 50 ELSE 100 END); + +-- CASE cannot be pushed down because of unshippable arg clause +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM ft1 WHERE c1 > (CASE random()::integer WHEN 0 THEN 1 WHEN 2 THEN 50 ELSE 100 END); + +-- these are shippable +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM ft1 WHERE CASE c6 WHEN 'foo' THEN true ELSE c3 < 'bar' END; +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM ft1 WHERE CASE c3 WHEN c6 THEN true ELSE c3 < 'bar' END; + +-- but this is not because of collation +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM ft1 WHERE CASE c3 COLLATE "C" WHEN c6 THEN true ELSE c3 < 'bar' END; + -- =================================================================== -- JOIN queries -- ===================================================================
pgsql-hackers by date: