From e3ba1e16dba3813170c564b3f3653a83eee8d1f5 Mon Sep 17 00:00:00 2001
From: jian he <jian.universality@gmail.com>
Date: Thu, 16 Oct 2025 14:47:32 +0800
Subject: [PATCH v1 1/1] first try to refactor func-matching.sgml

make it less clustered.

Discussion: https://postgr.es/m/
---
 doc/src/sgml/func/func-matching.sgml | 168 ++++++++++++++++-----------
 doc/src/sgml/func/func-string.sgml   |  22 ++--
 2 files changed, 114 insertions(+), 76 deletions(-)
diff --git a/doc/src/sgml/func/func-matching.sgml b/doc/src/sgml/func/func-matching.sgml
index ebe0b22c8f6..4833304e99f 100644
--- a/doc/src/sgml/func/func-matching.sgml
+++ b/doc/src/sgml/func/func-matching.sgml
@@ -580,12 +580,22 @@ substring('foobar' similar '#"o_b#"%' escape '#')    <lineannotation>NULL</linea
      greater detail below.
     </para>
 
+   <sect3 id="posix-regex-functions">
+    <title>POSIX Regular Expression Functions</title>
+   <para>
+    This section describes the available function for pattern matching using POSIX regular expressions.
+   </para>
+
+   <sect4 id="function-substring">
+   <title>substring</title>
+
     <para>
-     The <function>substring</function> function with two parameters,
-     <function>substring(<replaceable>string</replaceable> from
-     <replaceable>pattern</replaceable>)</function>, provides extraction of a
-     substring
-     that matches a POSIX regular expression pattern.  It returns null if
+     The <function>substring</function> function provides extraction of a
+     substring that matches a POSIX regular expression pattern. It has syntax:
+<synopsis>
+substring(<replaceable>string</replaceable> from <replaceable>pattern</replaceable>) <returnvalue>text</returnvalue>
+</synopsis>
+     It returns null if
      there is no match, otherwise the first portion of the text that matched the
      pattern.  But if the pattern contains any parentheses, the portion
      of the text that matched the first parenthesized subexpression (the
@@ -604,16 +614,17 @@ substring('foobar' from 'o.b')     <lineannotation>oob</lineannotation>
 substring('foobar' from 'o(.)b')   <lineannotation>o</lineannotation>
 </programlisting>
    </para>
+ </sect4>
 
+   <sect4 id="function-regexp_count">
+   <title>regexp_count</title>
     <para>
      The <function>regexp_count</function> function counts the number of
      places where a POSIX regular expression pattern matches a string.
-     It has the syntax
-     <function>regexp_count</function>(<replaceable>string</replaceable>,
-     <replaceable>pattern</replaceable>
-     <optional>, <replaceable>start</replaceable>
-     <optional>, <replaceable>flags</replaceable>
-     </optional></optional>).
+     It has the syntax:
+<synopsis>
+regexp_count(<replaceable>string</replaceable>, <replaceable>pattern</replaceable> <optional>, <replaceable>start</replaceable> <optional>, <replaceable>flags</replaceable> </optional></optional>) <returnvalue>integer</returnvalue>
+</synopsis>
      <replaceable>pattern</replaceable> is searched for
      in <replaceable>string</replaceable>, normally from the beginning of
      the string, but if the <replaceable>start</replaceable> parameter is
@@ -633,20 +644,19 @@ regexp_count('ABCABCAXYaxy', 'A.')          <lineannotation>3</lineannotation>
 regexp_count('ABCABCAXYaxy', 'A.', 1, 'i')  <lineannotation>4</lineannotation>
 </programlisting>
     </para>
+    </sect4>
+
+   <sect4 id="functions-regexp_instr">
+   <title>regexp_instr</title>
 
     <para>
      The <function>regexp_instr</function> function returns the starting or
      ending position of the <replaceable>N</replaceable>'th match of a
      POSIX regular expression pattern to a string, or zero if there is no
-     such match.  It has the syntax
-     <function>regexp_instr</function>(<replaceable>string</replaceable>,
-     <replaceable>pattern</replaceable>
-     <optional>, <replaceable>start</replaceable>
-     <optional>, <replaceable>N</replaceable>
-     <optional>, <replaceable>endoption</replaceable>
-     <optional>, <replaceable>flags</replaceable>
-     <optional>, <replaceable>subexpr</replaceable>
-     </optional></optional></optional></optional></optional>).
+     such match.  It has the syntax:
+<synopsis>
+regexp_instr(<replaceable>string</replaceable>, <replaceable>pattern</replaceable> <optional>, <replaceable>start</replaceable> <optional>, <replaceable>N</replaceable> <optional>, <replaceable>endoption</replaceable> <optional>, <replaceable>flags</replaceable> <optional>, <replaceable>subexpr</replaceable> </optional></optional></optional></optional></optional>) <returnvalue>integer</returnvalue>
+</synopsis>
      <replaceable>pattern</replaceable> is searched for
      in <replaceable>string</replaceable>, normally from the beginning of
      the string, but if the <replaceable>start</replaceable> parameter is
@@ -682,14 +692,17 @@ regexp_instr(string=>'ABCDEFGHI', pattern=>'(c..)(...)', start=>1, "N"=>1, endop
                                    <lineannotation>6</lineannotation>
 </programlisting>
     </para>
+</sect4>
 
+   <sect4 id="function-regexp_like">
+   <title>regexp_like</title>
     <para>
      The <function>regexp_like</function> function checks whether a match
      of a POSIX regular expression pattern occurs within a string,
-     returning boolean true or false.  It has the syntax
-     <function>regexp_like</function>(<replaceable>string</replaceable>,
-     <replaceable>pattern</replaceable>
-     <optional>, <replaceable>flags</replaceable> </optional>).
+     returning boolean true or false.  It has the syntax:
+<synopsis>
+regexp_like(<replaceable>string</replaceable>, <replaceable>pattern</replaceable> <optional>, <replaceable>flags</replaceable> </optional>) <returnvalue>boolean</returnvalue>
+</synopsis>
      The <replaceable>flags</replaceable> parameter is an optional text
      string containing zero or more single-letter flags that change the
      function's behavior.  Supported flags are described
@@ -707,13 +720,17 @@ regexp_like('Hello World', 'world')       <lineannotation>false</lineannotation>
 regexp_like('Hello World', 'world', 'i')  <lineannotation>true</lineannotation>
 </programlisting>
     </para>
+</sect4>
 
+   <sect4 id="function-regexp_match">
+   <title>regexp_match</title>
     <para>
      The <function>regexp_match</function> function returns a text array of
      matching substring(s) within the first match of a POSIX
-     regular expression pattern to a string.  It has the syntax
-     <function>regexp_match</function>(<replaceable>string</replaceable>,
-     <replaceable>pattern</replaceable> <optional>, <replaceable>flags</replaceable> </optional>).
+     regular expression pattern to a string.  It has the syntax:
+<synopsis>
+regexp_match(<replaceable>string</replaceable>, <replaceable>pattern</replaceable> <optional>, <replaceable>flags</replaceable> </optional>) <returnvalue>text[]</returnvalue>
+</synopsis>
      If there is no match, the result is <literal>NULL</literal>.
      If a match is found, and the <replaceable>pattern</replaceable> contains no
      parenthesized subexpressions, then the result is a single-element text
@@ -765,12 +782,19 @@ SELECT (regexp_match('foobarbequebaz', 'bar.*que'))[1];
 </programlisting>
      </para>
     </tip>
+</sect4>
 
+   <sect4 id="function-regexp_matches">
+   <title>regexp_matches</title>
     <para>
      The <function>regexp_matches</function> function returns a set of text arrays
      of matching substring(s) within matches of a POSIX regular
-     expression pattern to a string.  It has the same syntax as
-     <function>regexp_match</function>.
+     expression pattern to a string. The input argument has the same meaning as
+     <link linkend="function-regexp_match">regexp_match</link>.
+     It has the syntax:
+<synopsis>
+regexp_matches(<replaceable>string</replaceable>, <replaceable>pattern</replaceable> <optional>, <replaceable>flags</replaceable> </optional>) <returnvalue>setof text[]</returnvalue>
+</synopsis>
      This function returns no rows if there is no match, one row if there is
      a match and the <literal>g</literal> flag is not given, or <replaceable>N</replaceable>
      rows if there are <replaceable>N</replaceable> matches and the <literal>g</literal> flag
@@ -819,20 +843,18 @@ SELECT col1, (SELECT regexp_matches(col2, '(bar)(beque)')) FROM tab;
      without a match, which is typically not the desired behavior.
     </para>
    </tip>
+</sect4>
 
+   <sect4 id="function-regexp_replace">
+   <title>regexp_replace</title>
     <para>
      The <function>regexp_replace</function> function provides substitution of
      new text for substrings that match POSIX regular expression patterns.
-     It has the syntax
-     <function>regexp_replace</function>(<replaceable>string</replaceable>,
-     <replaceable>pattern</replaceable>, <replaceable>replacement</replaceable>
-     <optional>, <replaceable>flags</replaceable> </optional>)
-     or
-     <function>regexp_replace</function>(<replaceable>string</replaceable>,
-     <replaceable>pattern</replaceable>, <replaceable>replacement</replaceable>,
-     <replaceable>start</replaceable>
-     <optional>, <replaceable>N</replaceable>
-     <optional>, <replaceable>flags</replaceable> </optional></optional>).
+     It has the syntax:
+<synopsis>
+regexp_replace(<replaceable>string</replaceable>, <replaceable>pattern</replaceable>, <replaceable>replacement</replaceable> <optional>, <replaceable>flags</replaceable> </optional>) <returnvalue>text</returnvalue>
+regexp_replace(<replaceable>string</replaceable>, <replaceable>pattern</replaceable>, <replaceable>replacement</replaceable>, <replaceable>start</replaceable> <optional>, <replaceable>N</replaceable><optional>, <replaceable>flags</replaceable> </optional></optional>) <returnvalue>text</returnvalue>
+</synopsis>
      The source <replaceable>string</replaceable> is returned unchanged if
      there is no match to the <replaceable>pattern</replaceable>.  If there is a
      match, the <replaceable>string</replaceable> is returned with the
@@ -880,12 +902,16 @@ regexp_replace(string=>'A PostgreSQL function', pattern=>'a|e|i|o|u', replacemen
                                    <lineannotation>A PostgrXSQL function</lineannotation>
 </programlisting>
    </para>
+</sect4>
 
+  <sect4 id="function-regexp_split_to_table">
+   <title>regexp_split_to_table</title>
     <para>
      The <function>regexp_split_to_table</function> function splits a string using a POSIX
-     regular expression pattern as a delimiter.  It has the syntax
-     <function>regexp_split_to_table</function>(<replaceable>string</replaceable>, <replaceable>pattern</replaceable>
-     <optional>, <replaceable>flags</replaceable> </optional>).
+     regular expression pattern as a delimiter.  It has the syntax:
+<synopsis>
+regexp_split_to_table(<replaceable>string</replaceable>, <replaceable>pattern</replaceable> <optional>, <replaceable>flags</replaceable> </optional>) <returnvalue>setof text</returnvalue>
+</synopsis>
      If there is no match to the <replaceable>pattern</replaceable>, the function returns the
      <replaceable>string</replaceable>.  If there is at least one match, for each match it returns
      the text from the end of the last match (or the beginning of the string)
@@ -897,15 +923,6 @@ regexp_replace(string=>'A PostgreSQL function', pattern=>'a|e|i|o|u', replacemen
      <xref linkend="posix-embedded-options-table"/>.
     </para>
 
-    <para>
-     The <function>regexp_split_to_array</function> function behaves the same as
-     <function>regexp_split_to_table</function>, except that <function>regexp_split_to_array</function>
-     returns its result as an array of <type>text</type>.  It has the syntax
-     <function>regexp_split_to_array</function>(<replaceable>string</replaceable>, <replaceable>pattern</replaceable>
-     <optional>, <replaceable>flags</replaceable> </optional>).
-     The parameters are the same as for <function>regexp_split_to_table</function>.
-    </para>
-
    <para>
     Some examples:
 <programlisting>
@@ -923,12 +940,6 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox jumps over the lazy d
  dog
 (9 rows)
 
-SELECT regexp_split_to_array('the quick brown fox jumps over the lazy dog', '\s+');
-              regexp_split_to_array
------------------------------------------------
- {the,quick,brown,fox,jumps,over,the,lazy,dog}
-(1 row)
-
 SELECT foo FROM regexp_split_to_table('the quick brown fox', '\s*') AS foo;
  foo
 -----
@@ -960,18 +971,43 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', '\s*') AS foo;
     the other regexp functions, but is usually the most convenient behavior
     in practice.  Other software systems such as Perl use similar definitions.
    </para>
+</sect4>
+
+  <sect4 id="function-regexp_split_to_array">
+   <title>regexp_split_to_array</title>
+    <para>
+     The <function>regexp_split_to_array</function> function behaves the same as
+     <link linkend="function-regexp_split_to_table">regexp_split_to_table</link>,
+     except that <function>regexp_split_to_array</function>
+     returns its result as an array of <type>text</type>.  It has the syntax:
+<synopsis>
+regexp_split_to_array(<replaceable>string</replaceable>, <replaceable>pattern</replaceable> <optional>, <replaceable>flags</replaceable> </optional>) <returnvalue>text[]</returnvalue>
+</synopsis>
+     The parameters are the same as for <function>regexp_split_to_table</function>.
+    </para>
+
+   <para>
+    Some examples:
+<programlisting>
+SELECT regexp_split_to_array('the quick brown fox jumps over the lazy dog', '\s+');
+              regexp_split_to_array
+-----------------------------------------------
+ {the,quick,brown,fox,jumps,over,the,lazy,dog}
+(1 row)
+</programlisting>
+</para>
+</sect4>
+
+   <sect4 id="function-regexp_substr">
+    <title>regexp_substr</title>
 
     <para>
      The <function>regexp_substr</function> function returns the substring
      that matches a POSIX regular expression pattern,
-     or <literal>NULL</literal> if there is no match.  It has the syntax
-     <function>regexp_substr</function>(<replaceable>string</replaceable>,
-     <replaceable>pattern</replaceable>
-     <optional>, <replaceable>start</replaceable>
-     <optional>, <replaceable>N</replaceable>
-     <optional>, <replaceable>flags</replaceable>
-     <optional>, <replaceable>subexpr</replaceable>
-     </optional></optional></optional></optional>).
+     or <literal>NULL</literal> if there is no match.  It has the syntax:
+<synopsis>
+regexp_substr(<replaceable>string</replaceable>, <replaceable>pattern</replaceable> <optional>, <replaceable>start</replaceable> <optional>, <replaceable>N</replaceable> <optional>, <replaceable>flags</replaceable> <optional>, <replaceable>subexpr</replaceable> </optional></optional></optional></optional>) <returnvalue>text</returnvalue>
+</synopsis>
      <replaceable>pattern</replaceable> is searched for
      in <replaceable>string</replaceable>, normally from the beginning of
      the string, but if the <replaceable>start</replaceable> parameter is
@@ -1001,6 +1037,8 @@ regexp_substr('ABCDEFGHI', '(c..)(...)', 1, 1, 'i', 2)
                                    <lineannotation>FGH</lineannotation>
 </programlisting>
     </para>
+ </sect4>
+</sect3>
 
 <!-- derived from the re_syntax.n man page -->
 
diff --git a/doc/src/sgml/func/func-string.sgml b/doc/src/sgml/func/func-string.sgml
index 01cc94c234e..87823de35b0 100644
--- a/doc/src/sgml/func/func-string.sgml
+++ b/doc/src/sgml/func/func-string.sgml
@@ -431,7 +431,7 @@
        </para>
        <para>
         Extracts the first substring matching POSIX regular expression; see
-        <xref linkend="functions-posix-regexp"/>.
+        <xref linkend="function-substring"/>.
        </para>
        <para>
         <literal>substring('Thomas' from '...$')</literal>
@@ -961,7 +961,7 @@
         Returns the number of times the POSIX regular
         expression <parameter>pattern</parameter> matches in
         the <parameter>string</parameter>; see
-        <xref linkend="functions-posix-regexp"/>.
+        <xref linkend="function-regexp_count"/>.
        </para>
        <para>
         <literal>regexp_count('123456789012', '\d\d\d', 2)</literal>
@@ -986,7 +986,7 @@
         Returns the position within <parameter>string</parameter> where
         the <parameter>N</parameter>'th match of the POSIX regular
         expression <parameter>pattern</parameter> occurs, or zero if there is
-        no such match; see <xref linkend="functions-posix-regexp"/>.
+        no such match; see <xref linkend="function-regexp_substr"/>.
        </para>
        <para>
         <literal>regexp_instr('ABCDEF', 'c(.)(..)', 1, 1, 0, 'i')</literal>
@@ -1011,7 +1011,7 @@
         Checks whether a match of the POSIX regular
         expression <parameter>pattern</parameter> occurs
         within <parameter>string</parameter>; see
-        <xref linkend="functions-posix-regexp"/>.
+        <xref linkend="function-regexp_like"/>.
        </para>
        <para>
         <literal>regexp_like('Hello World', 'world$', 'i')</literal>
@@ -1031,7 +1031,7 @@
         Returns substrings within the first match of the POSIX regular
         expression <parameter>pattern</parameter> to
         the <parameter>string</parameter>; see
-        <xref linkend="functions-posix-regexp"/>.
+        <xref linkend="function-regexp_match"/>.
        </para>
        <para>
         <literal>regexp_match('foobarbequebaz', '(bar)(beque)')</literal>
@@ -1052,7 +1052,7 @@
         expression <parameter>pattern</parameter> to
         the <parameter>string</parameter>, or substrings within all
         such matches if the <literal>g</literal> flag is used;
-        see <xref linkend="functions-posix-regexp"/>.
+        see <xref linkend="function-regexp_matches"/>.
        </para>
        <para>
         <literal>regexp_matches('foobarbequebaz', 'ba.', 'g')</literal>
@@ -1077,7 +1077,7 @@
         Replaces the substring that is the first match to the POSIX
         regular expression <parameter>pattern</parameter>, or all such
         matches if the <literal>g</literal> flag is used; see
-        <xref linkend="functions-posix-regexp"/>.
+        <xref linkend="function-regexp_replace"/>.
        </para>
        <para>
         <literal>regexp_replace('Thomas', '.[mN]a.', 'M')</literal>
@@ -1100,7 +1100,7 @@
         search beginning at the <parameter>start</parameter>'th character
         of <parameter>string</parameter>.  If <parameter>N</parameter> is
         omitted, it defaults to 1.  See
-        <xref linkend="functions-posix-regexp"/>.
+        <xref linkend="function-regexp_replace"/>.
        </para>
        <para>
         <literal>regexp_replace('Thomas', '.', 'X', 3, 2)</literal>
@@ -1123,7 +1123,7 @@
        <para>
         Splits <parameter>string</parameter> using a POSIX regular
         expression as the delimiter, producing an array of results; see
-        <xref linkend="functions-posix-regexp"/>.
+        <xref linkend="function-regexp_split_to_array"/>.
        </para>
        <para>
         <literal>regexp_split_to_array('hello world', '\s+')</literal>
@@ -1142,7 +1142,7 @@
        <para>
         Splits <parameter>string</parameter> using a POSIX regular
         expression as the delimiter, producing a set of results; see
-        <xref linkend="functions-posix-regexp"/>.
+        <xref linkend="function-regexp_split_to_table"/>.
        </para>
        <para>
         <literal>regexp_split_to_table('hello world', '\s+')</literal>
@@ -1171,7 +1171,7 @@
         matches the <parameter>N</parameter>'th occurrence of the POSIX
         regular expression <parameter>pattern</parameter>,
         or <literal>NULL</literal> if there is no such match; see
-        <xref linkend="functions-posix-regexp"/>.
+        <xref linkend="function-regexp_substr"/>.
        </para>
        <para>
         <literal>regexp_substr('ABCDEF', 'c(.)(..)', 1, 1, 'i')</literal>
-- 
2.34.1