From e3ba1e16dba3813170c564b3f3653a83eee8d1f5 Mon Sep 17 00:00:00 2001 From: jian he Date: Thu, 16 Oct 2025 14:47:32 +0800 Subject: [PATCH v1 1/1] first try to refactor func-matching.sgml make it less clustered. Discussion: https://postgr.es/m/ --- doc/src/sgml/func/func-matching.sgml | 168 ++++++++++++++++----------- doc/src/sgml/func/func-string.sgml | 22 ++-- 2 files changed, 114 insertions(+), 76 deletions(-) diff --git a/doc/src/sgml/func/func-matching.sgml b/doc/src/sgml/func/func-matching.sgml index ebe0b22c8f6..4833304e99f 100644 --- a/doc/src/sgml/func/func-matching.sgml +++ b/doc/src/sgml/func/func-matching.sgml @@ -580,12 +580,22 @@ substring('foobar' similar '#"o_b#"%' escape '#') NULL + + POSIX Regular Expression Functions + + This section describes the available function for pattern matching using POSIX regular expressions. + + + + substring + - The substring function with two parameters, - substring(string from - pattern), provides extraction of a - substring - that matches a POSIX regular expression pattern. It returns null if + The substring function provides extraction of a + substring that matches a POSIX regular expression pattern. It has syntax: + +substring(string from pattern) text + + It returns null if there is no match, otherwise the first portion of the text that matched the pattern. But if the pattern contains any parentheses, the portion of the text that matched the first parenthesized subexpression (the @@ -604,16 +614,17 @@ substring('foobar' from 'o.b') oob substring('foobar' from 'o(.)b') o + + + regexp_count The regexp_count function counts the number of places where a POSIX regular expression pattern matches a string. - It has the syntax - regexp_count(string, - pattern - , start - , flags - ). + It has the syntax: + +regexp_count(string, pattern , start , flags ) integer + pattern is searched for in string, normally from the beginning of the string, but if the start parameter is @@ -633,20 +644,19 @@ regexp_count('ABCABCAXYaxy', 'A.') 3 regexp_count('ABCABCAXYaxy', 'A.', 1, 'i') 4 + + + + regexp_instr The regexp_instr function returns the starting or ending position of the N'th match of a POSIX regular expression pattern to a string, or zero if there is no - such match. It has the syntax - regexp_instr(string, - pattern - , start - , N - , endoption - , flags - , subexpr - ). + such match. It has the syntax: + +regexp_instr(string, pattern , start , N , endoption , flags , subexpr ) integer + pattern is searched for in string, normally from the beginning of the string, but if the start parameter is @@ -682,14 +692,17 @@ regexp_instr(string=>'ABCDEFGHI', pattern=>'(c..)(...)', start=>1, "N"=>1, endop 6 + + + regexp_like The regexp_like function checks whether a match of a POSIX regular expression pattern occurs within a string, - returning boolean true or false. It has the syntax - regexp_like(string, - pattern - , flags ). + returning boolean true or false. It has the syntax: + +regexp_like(string, pattern , flags ) boolean + The flags parameter is an optional text string containing zero or more single-letter flags that change the function's behavior. Supported flags are described @@ -707,13 +720,17 @@ regexp_like('Hello World', 'world') false regexp_like('Hello World', 'world', 'i') true + + + regexp_match The regexp_match function returns a text array of matching substring(s) within the first match of a POSIX - regular expression pattern to a string. It has the syntax - regexp_match(string, - pattern , flags ). + regular expression pattern to a string. It has the syntax: + +regexp_match(string, pattern , flags ) text[] + If there is no match, the result is NULL. If a match is found, and the pattern contains no parenthesized subexpressions, then the result is a single-element text @@ -765,12 +782,19 @@ SELECT (regexp_match('foobarbequebaz', 'bar.*que'))[1]; + + + regexp_matches The regexp_matches function returns a set of text arrays of matching substring(s) within matches of a POSIX regular - expression pattern to a string. It has the same syntax as - regexp_match. + expression pattern to a string. The input argument has the same meaning as + regexp_match. + It has the syntax: + +regexp_matches(string, pattern , flags ) setof text[] + This function returns no rows if there is no match, one row if there is a match and the g flag is not given, or N rows if there are N matches and the g flag @@ -819,20 +843,18 @@ SELECT col1, (SELECT regexp_matches(col2, '(bar)(beque)')) FROM tab; without a match, which is typically not the desired behavior. + + + regexp_replace The regexp_replace function provides substitution of new text for substrings that match POSIX regular expression patterns. - It has the syntax - regexp_replace(string, - pattern, replacement - , flags ) - or - regexp_replace(string, - pattern, replacement, - start - , N - , flags ). + It has the syntax: + +regexp_replace(string, pattern, replacement , flags ) text +regexp_replace(string, pattern, replacement, start , N, flags ) text + The source string is returned unchanged if there is no match to the pattern. If there is a match, the string is returned with the @@ -880,12 +902,16 @@ regexp_replace(string=>'A PostgreSQL function', pattern=>'a|e|i|o|u', replacemen A PostgrXSQL function + + + regexp_split_to_table The regexp_split_to_table function splits a string using a POSIX - regular expression pattern as a delimiter. It has the syntax - regexp_split_to_table(string, pattern - , flags ). + regular expression pattern as a delimiter. It has the syntax: + +regexp_split_to_table(string, pattern , flags ) setof text + If there is no match to the pattern, the function returns the string. If there is at least one match, for each match it returns the text from the end of the last match (or the beginning of the string) @@ -897,15 +923,6 @@ regexp_replace(string=>'A PostgreSQL function', pattern=>'a|e|i|o|u', replacemen . - - The regexp_split_to_array function behaves the same as - regexp_split_to_table, except that regexp_split_to_array - returns its result as an array of text. It has the syntax - regexp_split_to_array(string, pattern - , flags ). - The parameters are the same as for regexp_split_to_table. - - Some examples: @@ -923,12 +940,6 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox jumps over the lazy d dog (9 rows) -SELECT regexp_split_to_array('the quick brown fox jumps over the lazy dog', '\s+'); - regexp_split_to_array ------------------------------------------------ - {the,quick,brown,fox,jumps,over,the,lazy,dog} -(1 row) - SELECT foo FROM regexp_split_to_table('the quick brown fox', '\s*') AS foo; foo ----- @@ -960,18 +971,43 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', '\s*') AS foo; the other regexp functions, but is usually the most convenient behavior in practice. Other software systems such as Perl use similar definitions. + + + + regexp_split_to_array + + The regexp_split_to_array function behaves the same as + regexp_split_to_table, + except that regexp_split_to_array + returns its result as an array of text. It has the syntax: + +regexp_split_to_array(string, pattern , flags ) text[] + + The parameters are the same as for regexp_split_to_table. + + + + Some examples: + +SELECT regexp_split_to_array('the quick brown fox jumps over the lazy dog', '\s+'); + regexp_split_to_array +----------------------------------------------- + {the,quick,brown,fox,jumps,over,the,lazy,dog} +(1 row) + + + + + + regexp_substr The regexp_substr function returns the substring that matches a POSIX regular expression pattern, - or NULL if there is no match. It has the syntax - regexp_substr(string, - pattern - , start - , N - , flags - , subexpr - ). + or NULL if there is no match. It has the syntax: + +regexp_substr(string, pattern , start , N , flags , subexpr ) text + pattern is searched for in string, normally from the beginning of the string, but if the start parameter is @@ -1001,6 +1037,8 @@ regexp_substr('ABCDEFGHI', '(c..)(...)', 1, 1, 'i', 2) FGH + + diff --git a/doc/src/sgml/func/func-string.sgml b/doc/src/sgml/func/func-string.sgml index 01cc94c234e..87823de35b0 100644 --- a/doc/src/sgml/func/func-string.sgml +++ b/doc/src/sgml/func/func-string.sgml @@ -431,7 +431,7 @@ Extracts the first substring matching POSIX regular expression; see - . + . substring('Thomas' from '...$') @@ -961,7 +961,7 @@ Returns the number of times the POSIX regular expression pattern matches in the string; see - . + . regexp_count('123456789012', '\d\d\d', 2) @@ -986,7 +986,7 @@ Returns the position within string where the N'th match of the POSIX regular expression pattern occurs, or zero if there is - no such match; see . + no such match; see . regexp_instr('ABCDEF', 'c(.)(..)', 1, 1, 0, 'i') @@ -1011,7 +1011,7 @@ Checks whether a match of the POSIX regular expression pattern occurs within string; see - . + . regexp_like('Hello World', 'world$', 'i') @@ -1031,7 +1031,7 @@ Returns substrings within the first match of the POSIX regular expression pattern to the string; see - . + . regexp_match('foobarbequebaz', '(bar)(beque)') @@ -1052,7 +1052,7 @@ expression pattern to the string, or substrings within all such matches if the g flag is used; - see . + see . regexp_matches('foobarbequebaz', 'ba.', 'g') @@ -1077,7 +1077,7 @@ Replaces the substring that is the first match to the POSIX regular expression pattern, or all such matches if the g flag is used; see - . + . regexp_replace('Thomas', '.[mN]a.', 'M') @@ -1100,7 +1100,7 @@ search beginning at the start'th character of string. If N is omitted, it defaults to 1. See - . + . regexp_replace('Thomas', '.', 'X', 3, 2) @@ -1123,7 +1123,7 @@ Splits string using a POSIX regular expression as the delimiter, producing an array of results; see - . + . regexp_split_to_array('hello world', '\s+') @@ -1142,7 +1142,7 @@ Splits string using a POSIX regular expression as the delimiter, producing a set of results; see - . + . regexp_split_to_table('hello world', '\s+') @@ -1171,7 +1171,7 @@ matches the N'th occurrence of the POSIX regular expression pattern, or NULL if there is no such match; see - . + . regexp_substr('ABCDEF', 'c(.)(..)', 1, 1, 'i') -- 2.34.1