Add planner support function for starts_with() - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Add planner support function for starts_with() |
Date | |
Msg-id | 232599.1633800229@sss.pgh.pa.us Whole thread Raw |
Responses |
Re: Add planner support function for starts_with()
|
List | pgsql-hackers |
When starts_with() and the equivalent ^@ operator were added, they were plugged into the planner in only a rather half-baked way. Selectivity estimation got taught about the operator, but the other infrastructure associated with LIKE/regex matching wasn't updated. This causes these operators to be planned more stupidly than a functionally-equivalent LIKE/regex pattern [1]. With the (admittedly later) introduction of planner support functions, it's really quite easy to do better. The attached patch adds a planner support function for starts_with(), with these benefits: * A condition such as "textcol ^@ constant" can now use a regular btree index, not only an SP-GiST index, so long as the index's collation is C. (This works just like "textcol LIKE 'foo%'".) * "starts_with(textcol, constant)" can be optimized the same as "textcol ^@ constant". I also rejiggered match_pattern_prefix() a bit, with the effect that fixed-prefix LIKE and regex patterns are now more like starts_with() in another way: if you apply one to an SPGiST-indexed column, you'll get an index condition using ^@ rather than two index conditions with >= and <. That should be more efficient at runtime, though I didn't try to do any performance testing. regards, tom lane [1] https://www.postgresql.org/message-id/CADT4RqB13KQHOJqqQ%2BWXmYtJrukS2UiFdtfTvT-XA3qYLyB6Cw%40mail.gmail.com diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c index 241e6f0f59..988568825e 100644 --- a/src/backend/utils/adt/like_support.c +++ b/src/backend/utils/adt/like_support.c @@ -143,6 +143,14 @@ texticregexeq_support(PG_FUNCTION_ARGS) PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex_IC)); } +Datum +text_starts_with_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Prefix)); +} + /* Common code for the above */ static Node * like_regex_support(Node *rawreq, Pattern_Type ptype) @@ -246,6 +254,7 @@ match_pattern_prefix(Node *leftop, Oid eqopr; Oid ltopr; Oid geopr; + Oid preopr = InvalidOid; bool collation_aware; Expr *expr; FmgrInfo ltproc; @@ -302,12 +311,20 @@ match_pattern_prefix(Node *leftop, switch (ldatatype) { case TEXTOID: - if (opfamily == TEXT_PATTERN_BTREE_FAM_OID || - opfamily == TEXT_SPGIST_FAM_OID) + if (opfamily == TEXT_PATTERN_BTREE_FAM_OID) + { + eqopr = TextEqualOperator; + ltopr = TextPatternLessOperator; + geopr = TextPatternGreaterEqualOperator; + collation_aware = false; + } + else if (opfamily == TEXT_SPGIST_FAM_OID) { eqopr = TextEqualOperator; ltopr = TextPatternLessOperator; geopr = TextPatternGreaterEqualOperator; + /* This opfamily has direct support for prefixing */ + preopr = TextPrefixOperator; collation_aware = false; } else @@ -360,20 +377,6 @@ match_pattern_prefix(Node *leftop, return NIL; } - /* - * If necessary, verify that the index's collation behavior is compatible. - * For an exact-match case, we don't have to be picky. Otherwise, insist - * that the index collation be "C". Note that here we are looking at the - * index's collation, not the expression's collation -- this test is *not* - * dependent on the LIKE/regex operator's collation. - */ - if (collation_aware) - { - if (!(pstatus == Pattern_Prefix_Exact || - lc_collate_is_c(indexcollation))) - return NIL; - } - /* * If necessary, coerce the prefix constant to the right type. The given * prefix constant is either text or bytea type, therefore the only case @@ -409,8 +412,31 @@ match_pattern_prefix(Node *leftop, } /* - * Otherwise, we have a nonempty required prefix of the values. - * + * Otherwise, we have a nonempty required prefix of the values. Some + * opclasses support prefix checks directly, otherwise we'll try to + * generate a range constraint. + */ + if (OidIsValid(preopr) && op_in_opfamily(preopr, opfamily)) + { + expr = make_opclause(preopr, BOOLOID, false, + (Expr *) leftop, (Expr *) prefix, + InvalidOid, indexcollation); + result = list_make1(expr); + return result; + } + + /* + * Since we need a range constraint, it's only going to work reliably if + * the index is collation-insensitive or has "C" collation. Note that + * here we are looking at the index's collation, not the expression's + * collation -- this test is *not* dependent on the LIKE/regex operator's + * collation. + */ + if (collation_aware && + !lc_collate_is_c(indexcollation)) + return NIL; + + /* * We can always say "x >= prefix". */ if (!op_in_opfamily(geopr, opfamily)) @@ -1165,7 +1191,6 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation, case Pattern_Type_Prefix: /* Prefix type work is trivial. */ result = Pattern_Prefix_Partial; - *rest_selec = 1.0; /* all */ *prefix = makeConst(patt->consttype, patt->consttypmod, patt->constcollid, @@ -1175,6 +1200,8 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation, patt->constlen), patt->constisnull, patt->constbyval); + if (rest_selec != NULL) + *rest_selec = 1.0; /* all */ break; default: elog(ERROR, "unrecognized ptype: %d", (int) ptype); diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat index 89c73acd68..0075a02f32 100644 --- a/src/include/catalog/pg_operator.dat +++ b/src/include/catalog/pg_operator.dat @@ -102,7 +102,7 @@ oprright => 'text', oprresult => 'bool', oprcom => '=(text,text)', oprnegate => '<>(text,text)', oprcode => 'texteq', oprrest => 'eqsel', oprjoin => 'eqjoinsel' }, -{ oid => '3877', descr => 'starts with', +{ oid => '3877', oid_symbol => 'TextPrefixOperator', descr => 'starts with', oprname => '^@', oprleft => 'text', oprright => 'text', oprresult => 'bool', oprcode => 'starts_with', oprrest => 'prefixsel', oprjoin => 'prefixjoinsel' }, diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index d068d6532e..6412f369f1 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -167,8 +167,12 @@ proname => 'texteq', proleakproof => 't', prorettype => 'bool', proargtypes => 'text text', prosrc => 'texteq' }, { oid => '3696', - proname => 'starts_with', proleakproof => 't', prorettype => 'bool', - proargtypes => 'text text', prosrc => 'text_starts_with' }, + proname => 'starts_with', prosupport => 'text_starts_with_support', + proleakproof => 't', prorettype => 'bool', proargtypes => 'text text', + prosrc => 'text_starts_with' }, +{ oid => '8923', descr => 'planner support for text_starts_with', + proname => 'text_starts_with_support', prorettype => 'internal', + proargtypes => 'internal', prosrc => 'text_starts_with_support' }, { oid => '68', proname => 'xideq', proleakproof => 't', prorettype => 'bool', proargtypes => 'xid xid', prosrc => 'xideq' }, diff --git a/src/test/regress/expected/create_index_spgist.out b/src/test/regress/expected/create_index_spgist.out index f4fb08a289..5c04df9c01 100644 --- a/src/test/regress/expected/create_index_spgist.out +++ b/src/test/regress/expected/create_index_spgist.out @@ -804,6 +804,22 @@ SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth'; 2 (1 row) +EXPLAIN (COSTS OFF) +SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth'); + QUERY PLAN +------------------------------------------------------------ + Aggregate + -> Index Only Scan using sp_radix_ind on radix_text_tbl + Index Cond: (t ^@ 'Worth'::text) + Filter: starts_with(t, 'Worth'::text) +(4 rows) + +SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth'); + count +------- + 2 +(1 row) + -- Now check the results from bitmap indexscan SET enable_seqscan = OFF; SET enable_indexscan = OFF; @@ -1333,6 +1349,23 @@ SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth'; 2 (1 row) +EXPLAIN (COSTS OFF) +SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth'); + QUERY PLAN +------------------------------------------------ + Aggregate + -> Bitmap Heap Scan on radix_text_tbl + Filter: starts_with(t, 'Worth'::text) + -> Bitmap Index Scan on sp_radix_ind + Index Cond: (t ^@ 'Worth'::text) +(5 rows) + +SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth'); + count +------- + 2 +(1 row) + RESET enable_seqscan; RESET enable_indexscan; RESET enable_bitmapscan; diff --git a/src/test/regress/sql/create_index_spgist.sql b/src/test/regress/sql/create_index_spgist.sql index b126dae629..660bfc6193 100644 --- a/src/test/regress/sql/create_index_spgist.sql +++ b/src/test/regress/sql/create_index_spgist.sql @@ -295,6 +295,10 @@ EXPLAIN (COSTS OFF) SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth'; SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth'; +EXPLAIN (COSTS OFF) +SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth'); +SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth'); + -- Now check the results from bitmap indexscan SET enable_seqscan = OFF; SET enable_indexscan = OFF; @@ -424,6 +428,10 @@ EXPLAIN (COSTS OFF) SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth'; SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth'; +EXPLAIN (COSTS OFF) +SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth'); +SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth'); + RESET enable_seqscan; RESET enable_indexscan; RESET enable_bitmapscan;
pgsql-hackers by date: