From 6821e4e418429ac51035f5204262ced45012805d Mon Sep 17 00:00:00 2001
From: "David G. Johnston" <David.G.Johnston@Gmail.com>
Date: Tue, 11 Mar 2025 14:37:26 -0700
Subject: [PATCH] 0001 Introduce new processing.sgml chapter

---
 doc/src/sgml/filelist.sgml              |    4 +
 doc/src/sgml/mvcc.sgml                  |   72 +-
 doc/src/sgml/parallel.sgml              |   40 +-
 doc/src/sgml/perform.sgml               | 1611 +----------------------
 doc/src/sgml/postgres.sgml              |    4 +-
 doc/src/sgml/processing-explain.sgml    | 1096 +++++++++++++++
 doc/src/sgml/processing-nodes.sgml      |  118 ++
 doc/src/sgml/processing-statistics.sgml |  464 +++++++
 doc/src/sgml/processing.sgml            |   34 +
 9 files changed, 1798 insertions(+), 1645 deletions(-)
 create mode 100644 doc/src/sgml/processing-explain.sgml
 create mode 100644 doc/src/sgml/processing-nodes.sgml
 create mode 100644 doc/src/sgml/processing-statistics.sgml
 create mode 100644 doc/src/sgml/processing.sgml
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml
index f28a69e993..c0a7968da8 100644
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -24,6 +24,10 @@
 <!ENTITY nullvalues SYSTEM "nullvalues.sgml">
 <!ENTITY parallel   SYSTEM "parallel.sgml">
 <!ENTITY perform    SYSTEM "perform.sgml">
+<!ENTITY processing-explain     SYSTEM "processing-explain.sgml">
+<!ENTITY processing-nodes       SYSTEM "processing-nodes.sgml">
+<!ENTITY processing-statistics  SYSTEM "processing-statistics.sgml">
+<!ENTITY processing SYSTEM "processing.sgml">
 <!ENTITY queries    SYSTEM "queries.sgml">
 <!ENTITY rangetypes SYSTEM "rangetypes.sgml">
 <!ENTITY rowtypes   SYSTEM "rowtypes.sgml">
diff --git a/doc/src/sgml/mvcc.sgml b/doc/src/sgml/mvcc.sgml
index 049ee75a4b..f0c66c189e 100644
--- a/doc/src/sgml/mvcc.sgml
+++ b/doc/src/sgml/mvcc.sgml
@@ -1,6 +1,6 @@
 <!-- doc/src/sgml/mvcc.sgml -->
 
- <chapter id="mvcc">
+ <sect1 id="mvcc">
   <title>Concurrency Control</title>
 
   <indexterm>
@@ -17,7 +17,7 @@
    in this chapter.
   </para>
 
-  <sect1 id="mvcc-intro">
+  <sect2 id="mvcc-intro">
    <title>Introduction</title>
 
    <indexterm>
@@ -76,9 +76,9 @@
     locks provide a mechanism for acquiring locks that are not tied
     to a single transaction.
    </para>
-  </sect1>
+  </sect2>
 
-  <sect1 id="transaction-iso">
+  <sect2 id="transaction-iso">
    <title>Transaction Isolation</title>
 
    <indexterm>
@@ -302,7 +302,7 @@
      </para>
    </important>
 
-  <sect2 id="xact-read-committed">
+  <sect3 id="xact-read-committed">
    <title>Read Committed Isolation Level</title>
 
    <indexterm>
@@ -475,9 +475,9 @@ COMMIT;
     that do complex queries and updates might require a more rigorously
     consistent view of the database than Read Committed mode provides.
    </para>
-  </sect2>
+  </sect3>
 
-  <sect2 id="xact-repeatable-read">
+  <sect3 id="xact-repeatable-read">
    <title>Repeatable Read Isolation Level</title>
 
    <indexterm>
@@ -595,9 +595,9 @@ ERROR:  could not serialize access due to concurrent update
      behavior, Repeatable Read should now be requested.
     </para>
    </note>
-  </sect2>
+  </sect3>
 
-  <sect2 id="xact-serializable">
+  <sect3 id="xact-serializable">
    <title>Serializable Isolation Level</title>
 
    <indexterm>
@@ -844,10 +844,10 @@ ERROR:  could not serialize access due to read/write dependencies among transact
     with other systems that use a traditional locking technique.  Please see
     <xref linkend="ports12"/> for detailed information.
    </para>
-  </sect2>
- </sect1>
+  </sect3>
+ </sect2>
 
-  <sect1 id="explicit-locking">
+  <sect2 id="explicit-locking">
    <title>Explicit Locking</title>
 
    <indexterm>
@@ -876,7 +876,7 @@ ERROR:  could not serialize access due to read/write dependencies among transact
     manager subsystem, refer to <xref linkend="monitoring"/>.
    </para>
 
-  <sect2 id="locking-tables">
+  <sect3 id="locking-tables">
    <title>Table-Level Locks</title>
 
    <indexterm zone="locking-tables">
@@ -1236,9 +1236,9 @@ ERROR:  could not serialize access due to read/write dependencies among transact
       </tbody>
      </tgroup>
     </table>
-   </sect2>
+   </sect3>
 
-   <sect2 id="locking-rows">
+   <sect3 id="locking-rows">
     <title>Row-Level Locks</title>
 
     <para>
@@ -1411,9 +1411,9 @@ ERROR:  could not serialize access due to read/write dependencies among transact
       </tbody>
      </tgroup>
     </table>
-   </sect2>
+   </sect3>
 
-   <sect2 id="locking-pages">
+   <sect3 id="locking-pages">
     <title>Page-Level Locks</title>
 
     <para>
@@ -1424,9 +1424,9 @@ ERROR:  could not serialize access due to read/write dependencies among transact
      page-level locks, but they are mentioned here for completeness.
     </para>
 
-   </sect2>
+   </sect3>
 
-   <sect2 id="locking-deadlocks">
+   <sect3 id="locking-deadlocks">
     <title>Deadlocks</title>
 
     <indexterm zone="locking-deadlocks">
@@ -1508,9 +1508,9 @@ UPDATE accounts SET balance = balance - 100.00 WHERE acctnum = 22222;
      applications to hold transactions open for long periods of time
      (e.g., while waiting for user input).
     </para>
-   </sect2>
+   </sect3>
 
-   <sect2 id="advisory-locks">
+   <sect3 id="advisory-locks">
     <title>Advisory Locks</title>
 
     <indexterm zone="advisory-locks">
@@ -1609,11 +1609,11 @@ SELECT pg_advisory_lock(q.id) FROM
      The functions provided to manipulate advisory locks are described in
      <xref linkend="functions-advisory-locks"/>.
     </para>
-   </sect2>
+   </sect3>
 
-  </sect1>
+  </sect2>
 
-  <sect1 id="applevel-consistency">
+  <sect2 id="applevel-consistency">
    <title>Data Consistency Checks at the Application Level</title>
 
    <para>
@@ -1651,7 +1651,7 @@ SELECT pg_advisory_lock(q.id) FROM
     break the cycle.
    </para>
 
-   <sect2 id="serializable-consistency">
+   <sect3 id="serializable-consistency">
     <title>Enforcing Consistency with Serializable Transactions</title>
 
     <para>
@@ -1689,9 +1689,9 @@ SELECT pg_advisory_lock(q.id) FROM
       may want to use Repeatable Read and explicit locking on the primary.
      </para>
     </warning>
-   </sect2>
+   </sect3>
 
-   <sect2 id="non-serializable-consistency">
+   <sect3 id="non-serializable-consistency">
     <title>Enforcing Consistency with Explicit Blocking Locks</title>
 
     <para>
@@ -1760,10 +1760,10 @@ SELECT pg_advisory_lock(q.id) FROM
      <literal>MERGE</literal>), so it is possible to obtain locks explicitly
      before the snapshot is frozen.
     </para>
-   </sect2>
-  </sect1>
+   </sect3>
+  </sect2>
 
-  <sect1 id="mvcc-serialization-failure-handling">
+  <sect2 id="mvcc-serialization-failure-handling">
    <title>Serialization Failure Handling</title>
 
    <indexterm>
@@ -1827,9 +1827,9 @@ SELECT pg_advisory_lock(q.id) FROM
     it may not be possible to make progress until the prepared transaction
     commits or rolls back.
    </para>
-  </sect1>
+  </sect2>
 
-  <sect1 id="mvcc-caveats">
+  <sect2 id="mvcc-caveats">
    <title>Caveats</title>
 
    <para>
@@ -1869,9 +1869,9 @@ SELECT pg_advisory_lock(q.id) FROM
     rows representing concurrently created database objects, in the higher
     isolation levels.
    </para>
-  </sect1>
+  </sect2>
 
-  <sect1 id="locking-indexes">
+  <sect2 id="locking-indexes">
    <title>Locking and Indexes</title>
 
    <indexterm zone="locking-indexes">
@@ -1943,5 +1943,5 @@ SELECT pg_advisory_lock(q.id) FROM
     non-scalar data, B-trees are not useful, and GiST, SP-GiST or GIN
     indexes should be used instead.
    </para>
-  </sect1>
- </chapter>
+  </sect2>
+ </sect1>
diff --git a/doc/src/sgml/parallel.sgml b/doc/src/sgml/parallel.sgml
index 1ce9abf86f..7c808e447c 100644
--- a/doc/src/sgml/parallel.sgml
+++ b/doc/src/sgml/parallel.sgml
@@ -1,6 +1,6 @@
 <!-- doc/src/sgml/parallel.sgml -->
 
- <chapter id="parallel-query">
+ <sect1 id="parallel-query">
   <title>Parallel Query</title>
 
   <indexterm zone="parallel-query">
@@ -22,7 +22,7 @@
    used so that users who wish to make use of it can understand what to expect.
   </para>
 
- <sect1 id="how-parallel-query-works">
+ <sect2 id="how-parallel-query-works">
   <title>How Parallel Query Works</title>
 
    <para>
@@ -103,9 +103,9 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
     from the workers in whatever order is convenient, destroying any sort
     order that may have existed.
    </para>
- </sect1>
+ </sect2>
 
- <sect1 id="when-can-parallel-query-be-used">
+ <sect2 id="when-can-parallel-query-be-used">
   <title>When Can Parallel Query Be Used?</title>
 
   <para>
@@ -242,9 +242,9 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
       </para>
     </listitem>
   </itemizedlist>
- </sect1>
+ </sect2>
 
- <sect1 id="parallel-plans">
+ <sect2 id="parallel-plans">
   <title>Parallel Plans</title>
 
   <para>
@@ -262,7 +262,7 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
     must be a parallel-aware scan.
   </para>
 
- <sect2 id="parallel-scans">
+ <sect3 id="parallel-scans">
   <title>Parallel Scans</title>
 
   <para>
@@ -304,9 +304,9 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
     Other scan types, such as scans of non-btree indexes, may support
     parallel scans in the future.
   </para>
- </sect2>
+ </sect3>
 
- <sect2 id="parallel-joins">
+ <sect3 id="parallel-joins">
   <title>Parallel Joins</title>
 
   <para>
@@ -348,9 +348,9 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
       </para>
     </listitem>
   </itemizedlist>
- </sect2>
+ </sect3>
 
- <sect2 id="parallel-aggregation">
+ <sect3 id="parallel-aggregation">
   <title>Parallel Aggregation</title>
   <para>
     <productname>PostgreSQL</productname> supports parallel aggregation by aggregating in
@@ -390,9 +390,9 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
     the query are also part of the parallel portion of the plan.
   </para>
 
- </sect2>
+ </sect3>
 
- <sect2 id="parallel-append">
+ <sect3 id="parallel-append">
   <title>Parallel Append</title>
 
   <para>
@@ -438,9 +438,9 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
     <xref linkend="guc-enable-parallel-append" /> can be used to disable
     this feature.
   </para>
- </sect2>
+ </sect3>
 
- <sect2 id="parallel-plan-tips">
+ <sect3 id="parallel-plan-tips">
   <title>Parallel Plan Tips</title>
 
   <para>
@@ -465,10 +465,10 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
     performance characteristics of the plan.
   </para>
 
+ </sect3>
  </sect2>
- </sect1>
 
- <sect1 id="parallel-safety">
+ <sect2 id="parallel-safety">
   <title>Parallel Safety</title>
 
   <para>
@@ -517,7 +517,7 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
     </listitem>
   </itemizedlist>
 
- <sect2 id="parallel-labeling">
+ <sect3 id="parallel-labeling">
   <title>Parallel Labeling for Functions and Aggregates</title>
 
   <para>
@@ -585,8 +585,8 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
     node.  However, the planner does not do this.
   </para>
 
+ </sect3>
+
  </sect2>
 
  </sect1>
-
- </chapter>
diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml
index be4b49f62b..387e7f0ec8 100644
--- a/doc/src/sgml/perform.sgml
+++ b/doc/src/sgml/perform.sgml
@@ -1,6 +1,6 @@
 <!-- doc/src/sgml/perform.sgml -->
 
- <chapter id="performance-tips">
+ <sect1 id="performance-tips">
   <title>Performance Tips</title>
 
   <indexterm zone="performance-tips">
@@ -14,1569 +14,8 @@
    and tuning <productname>PostgreSQL</productname> performance.
   </para>
 
- <sect1 id="using-explain">
-  <title>Using <command>EXPLAIN</command></title>
 
-   <indexterm zone="using-explain">
-    <primary>EXPLAIN</primary>
-   </indexterm>
-
-   <indexterm zone="using-explain">
-    <primary>query plan</primary>
-   </indexterm>
-
-   <para>
-    <productname>PostgreSQL</productname> devises a <firstterm>query
-    plan</firstterm> for each query it receives.  Choosing the right
-    plan to match the query structure and the properties of the data
-    is absolutely critical for good performance, so the system includes
-    a complex <firstterm>planner</firstterm> that tries to choose good plans.
-    You can use the <link linkend="sql-explain"><command>EXPLAIN</command></link> command
-    to see what query plan the planner creates for any query.
-    Plan-reading is an art that requires some experience to master,
-    but this section attempts to cover the basics.
-   </para>
-
-   <para>
-    Examples in this section are drawn from the regression test database
-    after doing a <command>VACUUM ANALYZE</command>, using v18 development sources.
-    You should be able to get similar results if you try the examples
-    yourself, but your estimated costs and row counts might vary slightly
-    because <command>ANALYZE</command>'s statistics are random samples rather
-    than exact, and because costs are inherently somewhat platform-dependent.
-   </para>
-
-   <para>
-    The examples use <command>EXPLAIN</command>'s default <quote>text</quote> output
-    format, which is compact and convenient for humans to read.
-    If you want to feed <command>EXPLAIN</command>'s output to a program for further
-    analysis, you should use one of its machine-readable output formats
-    (XML, JSON, or YAML) instead.
-   </para>
-
-  <sect2 id="using-explain-basics">
-   <title><command>EXPLAIN</command> Basics</title>
-
-   <para>
-    The structure of a query plan is a tree of <firstterm>plan nodes</firstterm>.
-    Nodes at the bottom level of the tree are scan nodes: they return raw rows
-    from a table.  There are different types of scan nodes for different
-    table access methods: sequential scans, index scans, and bitmap index
-    scans.  There are also non-table row sources, such as <literal>VALUES</literal>
-    clauses and set-returning functions in <literal>FROM</literal>, which have their
-    own scan node types.
-    If the query requires joining, aggregation, sorting, or other
-    operations on the raw rows, then there will be additional nodes
-    above the scan nodes to perform these operations.  Again,
-    there is usually more than one possible way to do these operations,
-    so different node types can appear here too.  The output
-    of <command>EXPLAIN</command> has one line for each node in the plan
-    tree, showing the basic node type plus the cost estimates that the planner
-    made for the execution of that plan node.  Additional lines might appear,
-    indented from the node's summary line,
-    to show additional properties of the node.
-    The very first line (the summary line for the topmost
-    node) has the estimated total execution cost for the plan; it is this
-    number that the planner seeks to minimize.
-   </para>
-
-   <para>
-    Here is a trivial example, just to show what the output looks like:
-
-<screen>
-EXPLAIN SELECT * FROM tenk1;
-
-                         QUERY PLAN
--------------------------------------------------------------
- Seq Scan on tenk1  (cost=0.00..445.00 rows=10000 width=244)
-</screen>
-   </para>
-
-   <para>
-    Since this query has no <literal>WHERE</literal> clause, it must scan all the
-    rows of the table, so the planner has chosen to use a simple sequential
-    scan plan.  The numbers that are quoted in parentheses are (left
-    to right):
-
-    <itemizedlist>
-     <listitem>
-      <para>
-       Estimated start-up cost.  This is the time expended before the output
-       phase can begin, e.g., time to do the sorting in a sort node.
-      </para>
-     </listitem>
-
-     <listitem>
-      <para>
-       Estimated total cost.  This is stated on the assumption that the plan
-       node is run to completion, i.e., all available rows are retrieved.
-       In practice a node's parent node might stop short of reading all
-       available rows (see the <literal>LIMIT</literal> example below).
-      </para>
-     </listitem>
-
-     <listitem>
-      <para>
-       Estimated number of rows output by this plan node.  Again, the node
-       is assumed to be run to completion.
-      </para>
-     </listitem>
-
-     <listitem>
-      <para>
-       Estimated average width of rows output by this plan node (in bytes).
-      </para>
-     </listitem>
-    </itemizedlist>
-   </para>
-
-   <para>
-    The costs are measured in arbitrary units determined by the planner's
-    cost parameters (see <xref linkend="runtime-config-query-constants"/>).
-    Traditional practice is to measure the costs in units of disk page
-    fetches; that is, <xref linkend="guc-seq-page-cost"/> is conventionally
-    set to <literal>1.0</literal> and the other cost parameters are set relative
-    to that.  The examples in this section are run with the default cost
-    parameters.
-   </para>
-
-   <para>
-    It's important to understand that the cost of an upper-level node includes
-    the cost of all its child nodes.  It's also important to realize that
-    the cost only reflects things that the planner cares about.
-    In particular, the cost does not consider the time spent to convert
-    output values to text form or to transmit them to the client, which
-    could be important factors in the real elapsed time; but the planner
-    ignores those costs because it cannot change them by altering the
-    plan.  (Every correct plan will output the same row set, we trust.)
-   </para>
-
-   <para>
-    The <literal>rows</literal> value is a little tricky because it is
-    not the number of rows processed or scanned by the
-    plan node, but rather the number emitted by the node.  This is often
-    less than the number scanned, as a result of filtering by any
-    <literal>WHERE</literal>-clause conditions that are being applied at the node.
-    Ideally the top-level rows estimate will approximate the number of rows
-    actually returned, updated, or deleted by the query.
-   </para>
-
-   <para>
-    Returning to our example:
-
-<screen>
-EXPLAIN SELECT * FROM tenk1;
-
-                         QUERY PLAN
--------------------------------------------------------------
- Seq Scan on tenk1  (cost=0.00..445.00 rows=10000 width=244)
-</screen>
-   </para>
-
-   <para>
-    These numbers are derived very straightforwardly.  If you do:
-
-<programlisting>
-SELECT relpages, reltuples FROM pg_class WHERE relname = 'tenk1';
-</programlisting>
-
-    you will find that <classname>tenk1</classname> has 345 disk
-    pages and 10000 rows.  The estimated cost is computed as (disk pages read *
-    <xref linkend="guc-seq-page-cost"/>) + (rows scanned *
-    <xref linkend="guc-cpu-tuple-cost"/>).  By default,
-    <varname>seq_page_cost</varname> is 1.0 and <varname>cpu_tuple_cost</varname> is 0.01,
-    so the estimated cost is (345 * 1.0) + (10000 * 0.01) = 445.
-   </para>
-
-   <para>
-    Now let's modify the query to add a <literal>WHERE</literal> condition:
-
-<screen>
-EXPLAIN SELECT * FROM tenk1 WHERE unique1 &lt; 7000;
-
-                         QUERY PLAN
-------------------------------------------------------------
- Seq Scan on tenk1  (cost=0.00..470.00 rows=7000 width=244)
-   Filter: (unique1 &lt; 7000)
-</screen>
-
-    Notice that the <command>EXPLAIN</command> output shows the <literal>WHERE</literal>
-    clause being applied as a <quote>filter</quote> condition attached to the Seq
-    Scan plan node.  This means that
-    the plan node checks the condition for each row it scans, and outputs
-    only the ones that pass the condition.
-    The estimate of output rows has been reduced because of the
-    <literal>WHERE</literal> clause.
-    However, the scan will still have to visit all 10000 rows, so the cost
-    hasn't decreased; in fact it has gone up a bit (by 10000 * <xref
-    linkend="guc-cpu-operator-cost"/>, to be exact) to reflect the extra CPU
-    time spent checking the <literal>WHERE</literal> condition.
-   </para>
-
-   <para>
-    The actual number of rows this query would select is 7000, but the <literal>rows</literal>
-    estimate is only approximate.  If you try to duplicate this experiment,
-    you may well get a slightly different estimate; moreover, it can
-    change after each <command>ANALYZE</command> command, because the
-    statistics produced by <command>ANALYZE</command> are taken from a
-    randomized sample of the table.
-   </para>
-
-   <para>
-    Now, let's make the condition more restrictive:
-
-<screen>
-EXPLAIN SELECT * FROM tenk1 WHERE unique1 &lt; 100;
-
-                                  QUERY PLAN
--------------------------------------------------------------------&zwsp;-----------
- Bitmap Heap Scan on tenk1  (cost=5.06..224.98 rows=100 width=244)
-   Recheck Cond: (unique1 &lt; 100)
-   -&gt;  Bitmap Index Scan on tenk1_unique1  (cost=0.00..5.04 rows=100 width=0)
-         Index Cond: (unique1 &lt; 100)
-</screen>
-
-    Here the planner has decided to use a two-step plan: the child plan
-    node visits an index to find the locations of rows matching the index
-    condition, and then the upper plan node actually fetches those rows
-    from the table itself.  Fetching rows separately is much more
-    expensive than reading them sequentially, but because not all the pages
-    of the table have to be visited, this is still cheaper than a sequential
-    scan.  (The reason for using two plan levels is that the upper plan
-    node sorts the row locations identified by the index into physical order
-    before reading them, to minimize the cost of separate fetches.
-    The <quote>bitmap</quote> mentioned in the node names is the mechanism that
-    does the sorting.)
-   </para>
-
-   <para>
-    Now let's add another condition to the <literal>WHERE</literal> clause:
-
-<screen>
-EXPLAIN SELECT * FROM tenk1 WHERE unique1 &lt; 100 AND stringu1 = 'xxx';
-
-                                  QUERY PLAN
--------------------------------------------------------------------&zwsp;-----------
- Bitmap Heap Scan on tenk1  (cost=5.04..225.20 rows=1 width=244)
-   Recheck Cond: (unique1 &lt; 100)
-   Filter: (stringu1 = 'xxx'::name)
-   -&gt;  Bitmap Index Scan on tenk1_unique1  (cost=0.00..5.04 rows=100 width=0)
-         Index Cond: (unique1 &lt; 100)
-</screen>
-
-    The added condition <literal>stringu1 = 'xxx'</literal> reduces the
-    output row count estimate, but not the cost because we still have to visit
-    the same set of rows.  That's because the <literal>stringu1</literal> clause
-    cannot be applied as an index condition, since this index is only on
-    the <literal>unique1</literal> column.  Instead it is applied as a filter on
-    the rows retrieved using the index.  Thus the cost has actually gone up
-    slightly to reflect this extra checking.
-   </para>
-
-   <para>
-    In some cases the planner will prefer a <quote>simple</quote> index scan plan:
-
-<screen>
-EXPLAIN SELECT * FROM tenk1 WHERE unique1 = 42;
-
-                                 QUERY PLAN
--------------------------------------------------------------------&zwsp;----------
- Index Scan using tenk1_unique1 on tenk1  (cost=0.29..8.30 rows=1 width=244)
-   Index Cond: (unique1 = 42)
-</screen>
-
-    In this type of plan the table rows are fetched in index order, which
-    makes them even more expensive to read, but there are so few that the
-    extra cost of sorting the row locations is not worth it.  You'll most
-    often see this plan type for queries that fetch just a single row.  It's
-    also often used for queries that have an <literal>ORDER BY</literal> condition
-    that matches the index order, because then no extra sorting step is needed
-    to satisfy the <literal>ORDER BY</literal>.  In this example, adding
-    <literal>ORDER BY unique1</literal> would use the same plan because the
-    index already implicitly provides the requested ordering.
-   </para>
-
-   <para>
-     The planner may implement an <literal>ORDER BY</literal> clause in several
-     ways.  The above example shows that such an ordering clause may be
-     implemented implicitly.  The planner may also add an explicit
-     <literal>Sort</literal> step:
-
-<screen>
-EXPLAIN SELECT * FROM tenk1 ORDER BY unique1;
-
-                            QUERY PLAN
--------------------------------------------------------------------
- Sort  (cost=1109.39..1134.39 rows=10000 width=244)
-   Sort Key: unique1
-   ->  Seq Scan on tenk1  (cost=0.00..445.00 rows=10000 width=244)
-</screen>
-
-    If a part of the plan guarantees an ordering on a prefix of the
-    required sort keys, then the planner may instead decide to use an
-    <literal>Incremental Sort</literal> step:
-
-<screen>
-EXPLAIN SELECT * FROM tenk1 ORDER BY hundred, ten LIMIT 100;
-
-                                              QUERY PLAN
--------------------------------------------------------------------&zwsp;-----------------------------
- Limit  (cost=19.35..39.49 rows=100 width=244)
-   -&gt;  Incremental Sort  (cost=19.35..2033.39 rows=10000 width=244)
-         Sort Key: hundred, ten
-         Presorted Key: hundred
-         -&gt;  Index Scan using tenk1_hundred on tenk1  (cost=0.29..1574.20 rows=10000 width=244)
-</screen>
-
-    Compared to regular sorts, sorting incrementally allows returning tuples
-    before the entire result set has been sorted, which particularly enables
-    optimizations with <literal>LIMIT</literal> queries.  It may also reduce
-    memory usage and the likelihood of spilling sorts to disk, but it comes at
-    the cost of the increased overhead of splitting the result set into multiple
-    sorting batches.
-   </para>
-
-   <para>
-    If there are separate indexes on several of the columns referenced
-    in <literal>WHERE</literal>, the planner might choose to use an AND or OR
-    combination of the indexes:
-
-<screen>
-EXPLAIN SELECT * FROM tenk1 WHERE unique1 &lt; 100 AND unique2 &gt; 9000;
-
-                                     QUERY PLAN
--------------------------------------------------------------------&zwsp;------------------
- Bitmap Heap Scan on tenk1  (cost=25.07..60.11 rows=10 width=244)
-   Recheck Cond: ((unique1 &lt; 100) AND (unique2 &gt; 9000))
-   -&gt;  BitmapAnd  (cost=25.07..25.07 rows=10 width=0)
-         -&gt;  Bitmap Index Scan on tenk1_unique1  (cost=0.00..5.04 rows=100 width=0)
-               Index Cond: (unique1 &lt; 100)
-         -&gt;  Bitmap Index Scan on tenk1_unique2  (cost=0.00..19.78 rows=999 width=0)
-               Index Cond: (unique2 &gt; 9000)
-</screen>
-
-    But this requires visiting both indexes, so it's not necessarily a win
-    compared to using just one index and treating the other condition as
-    a filter.  If you vary the ranges involved you'll see the plan change
-    accordingly.
-   </para>
-
-   <para>
-    Here is an example showing the effects of <literal>LIMIT</literal>:
-
-<screen>
-EXPLAIN SELECT * FROM tenk1 WHERE unique1 &lt; 100 AND unique2 &gt; 9000 LIMIT 2;
-
-                                     QUERY PLAN
--------------------------------------------------------------------&zwsp;------------------
- Limit  (cost=0.29..14.28 rows=2 width=244)
-   -&gt;  Index Scan using tenk1_unique2 on tenk1  (cost=0.29..70.27 rows=10 width=244)
-         Index Cond: (unique2 &gt; 9000)
-         Filter: (unique1 &lt; 100)
-</screen>
-   </para>
-
-   <para>
-    This is the same query as above, but we added a <literal>LIMIT</literal> so that
-    not all the rows need be retrieved, and the planner changed its mind about
-    what to do.  Notice that the total cost and row count of the Index Scan
-    node are shown as if it were run to completion.  However, the Limit node
-    is expected to stop after retrieving only a fifth of those rows, so its
-    total cost is only a fifth as much, and that's the actual estimated cost
-    of the query.  This plan is preferred over adding a Limit node to the
-    previous plan because the Limit could not avoid paying the startup cost
-    of the bitmap scan, so the total cost would be something over 25 units
-    with that approach.
-   </para>
-
-   <para>
-    Let's try joining two tables, using the columns we have been discussing:
-
-<screen>
-EXPLAIN SELECT *
-FROM tenk1 t1, tenk2 t2
-WHERE t1.unique1 &lt; 10 AND t1.unique2 = t2.unique2;
-
-                                      QUERY PLAN
--------------------------------------------------------------------&zwsp;-------------------
- Nested Loop  (cost=4.65..118.50 rows=10 width=488)
-   -&gt;  Bitmap Heap Scan on tenk1 t1  (cost=4.36..39.38 rows=10 width=244)
-         Recheck Cond: (unique1 &lt; 10)
-         -&gt;  Bitmap Index Scan on tenk1_unique1  (cost=0.00..4.36 rows=10 width=0)
-               Index Cond: (unique1 &lt; 10)
-   -&gt;  Index Scan using tenk2_unique2 on tenk2 t2  (cost=0.29..7.90 rows=1 width=244)
-         Index Cond: (unique2 = t1.unique2)
-</screen>
-   </para>
-
-   <para>
-    In this plan, we have a nested-loop join node with two table scans as
-    inputs, or children.  The indentation of the node summary lines reflects
-    the plan tree structure.  The join's first, or <quote>outer</quote>, child
-    is a bitmap scan similar to those we saw before.  Its cost and row count
-    are the same as we'd get from <literal>SELECT ... WHERE unique1 &lt; 10</literal>
-    because we are
-    applying the <literal>WHERE</literal> clause <literal>unique1 &lt; 10</literal>
-    at that node.
-    The <literal>t1.unique2 = t2.unique2</literal> clause is not relevant yet,
-    so it doesn't affect the row count of the outer scan.  The nested-loop
-    join node will run its second,
-    or <quote>inner</quote> child once for each row obtained from the outer child.
-    Column values from the current outer row can be plugged into the inner
-    scan; here, the <literal>t1.unique2</literal> value from the outer row is available,
-    so we get a plan and costs similar to what we saw above for a simple
-    <literal>SELECT ... WHERE t2.unique2 = <replaceable>constant</replaceable></literal> case.
-    (The estimated cost is actually a bit lower than what was seen above,
-    as a result of caching that's expected to occur during the repeated
-    index scans on <literal>t2</literal>.)  The
-    costs of the loop node are then set on the basis of the cost of the outer
-    scan, plus one repetition of the inner scan for each outer row (10 * 7.90,
-    here), plus a little CPU time for join processing.
-   </para>
-
-   <para>
-    In this example the join's output row count is the same as the product
-    of the two scans' row counts, but that's not true in all cases because
-    there can be additional <literal>WHERE</literal> clauses that mention both tables
-    and so can only be applied at the join point, not to either input scan.
-    Here's an example:
-
-<screen>
-EXPLAIN SELECT *
-FROM tenk1 t1, tenk2 t2
-WHERE t1.unique1 &lt; 10 AND t2.unique2 &lt; 10 AND t1.hundred &lt; t2.hundred;
-
-                                         QUERY PLAN
--------------------------------------------------------------------&zwsp;--------------------------
- Nested Loop  (cost=4.65..49.36 rows=33 width=488)
-   Join Filter: (t1.hundred &lt; t2.hundred)
-   -&gt;  Bitmap Heap Scan on tenk1 t1  (cost=4.36..39.38 rows=10 width=244)
-         Recheck Cond: (unique1 &lt; 10)
-         -&gt;  Bitmap Index Scan on tenk1_unique1  (cost=0.00..4.36 rows=10 width=0)
-               Index Cond: (unique1 &lt; 10)
-   -&gt;  Materialize  (cost=0.29..8.51 rows=10 width=244)
-         -&gt;  Index Scan using tenk2_unique2 on tenk2 t2  (cost=0.29..8.46 rows=10 width=244)
-               Index Cond: (unique2 &lt; 10)
-</screen>
-
-    The condition <literal>t1.hundred &lt; t2.hundred</literal> can't be
-    tested in the <literal>tenk2_unique2</literal> index, so it's applied at the
-    join node.  This reduces the estimated output row count of the join node,
-    but does not change either input scan.
-   </para>
-
-   <para>
-    Notice that here the planner has chosen to <quote>materialize</quote> the inner
-    relation of the join, by putting a Materialize plan node atop it.  This
-    means that the <literal>t2</literal> index scan will be done just once, even
-    though the nested-loop join node needs to read that data ten times, once
-    for each row from the outer relation.  The Materialize node saves the data
-    in memory as it's read, and then returns the data from memory on each
-    subsequent pass.
-   </para>
-
-   <para>
-    When dealing with outer joins, you might see join plan nodes with both
-    <quote>Join Filter</quote> and plain <quote>Filter</quote> conditions attached.
-    Join Filter conditions come from the outer join's <literal>ON</literal> clause,
-    so a row that fails the Join Filter condition could still get emitted as
-    a null-extended row.  But a plain Filter condition is applied after the
-    outer-join rules and so acts to remove rows unconditionally.  In an inner
-    join there is no semantic difference between these types of filters.
-   </para>
-
-   <para>
-    If we change the query's selectivity a bit, we might get a very different
-    join plan:
-
-<screen>
-EXPLAIN SELECT *
-FROM tenk1 t1, tenk2 t2
-WHERE t1.unique1 &lt; 100 AND t1.unique2 = t2.unique2;
-
-                                        QUERY PLAN
--------------------------------------------------------------------&zwsp;-----------------------
- Hash Join  (cost=226.23..709.73 rows=100 width=488)
-   Hash Cond: (t2.unique2 = t1.unique2)
-   -&gt;  Seq Scan on tenk2 t2  (cost=0.00..445.00 rows=10000 width=244)
-   -&gt;  Hash  (cost=224.98..224.98 rows=100 width=244)
-         -&gt;  Bitmap Heap Scan on tenk1 t1  (cost=5.06..224.98 rows=100 width=244)
-               Recheck Cond: (unique1 &lt; 100)
-               -&gt;  Bitmap Index Scan on tenk1_unique1  (cost=0.00..5.04 rows=100 width=0)
-                     Index Cond: (unique1 &lt; 100)
-</screen>
-   </para>
-
-   <para>
-    Here, the planner has chosen to use a hash join, in which rows of one
-    table are entered into an in-memory hash table, after which the other
-    table is scanned and the hash table is probed for matches to each row.
-    Again note how the indentation reflects the plan structure: the bitmap
-    scan on <literal>tenk1</literal> is the input to the Hash node, which constructs
-    the hash table.  That's then returned to the Hash Join node, which reads
-    rows from its outer child plan and searches the hash table for each one.
-   </para>
-
-   <para>
-    Another possible type of join is a merge join, illustrated here:
-
-<screen>
-EXPLAIN SELECT *
-FROM tenk1 t1, onek t2
-WHERE t1.unique1 &lt; 100 AND t1.unique2 = t2.unique2;
-
-                                        QUERY PLAN
--------------------------------------------------------------------&zwsp;-----------------------
- Merge Join  (cost=0.56..233.49 rows=10 width=488)
-   Merge Cond: (t1.unique2 = t2.unique2)
-   -&gt;  Index Scan using tenk1_unique2 on tenk1 t1  (cost=0.29..643.28 rows=100 width=244)
-         Filter: (unique1 &lt; 100)
-   -&gt;  Index Scan using onek_unique2 on onek t2  (cost=0.28..166.28 rows=1000 width=244)
-</screen>
-   </para>
-
-   <para>
-    Merge join requires its input data to be sorted on the join keys.  In this
-    example each input is sorted by using an index scan to visit the rows
-    in the correct order; but a sequential scan and sort could also be used.
-    (Sequential-scan-and-sort frequently beats an index scan for sorting many rows,
-    because of the nonsequential disk access required by the index scan.)
-   </para>
-
-   <para>
-    One way to look at variant plans is to force the planner to disregard
-    whatever strategy it thought was the cheapest, using the enable/disable
-    flags described in <xref linkend="runtime-config-query-enable"/>.
-    (This is a crude tool, but useful.  See
-    also <xref linkend="explicit-joins"/>.)
-    For example, if we're unconvinced that merge join is the best join
-    type for the previous example, we could try
-
-<screen>
-SET enable_mergejoin = off;
-
-EXPLAIN SELECT *
-FROM tenk1 t1, onek t2
-WHERE t1.unique1 &lt; 100 AND t1.unique2 = t2.unique2;
-
-                                        QUERY PLAN
--------------------------------------------------------------------&zwsp;-----------------------
- Hash Join  (cost=226.23..344.08 rows=10 width=488)
-   Hash Cond: (t2.unique2 = t1.unique2)
-   -&gt;  Seq Scan on onek t2  (cost=0.00..114.00 rows=1000 width=244)
-   -&gt;  Hash  (cost=224.98..224.98 rows=100 width=244)
-         -&gt;  Bitmap Heap Scan on tenk1 t1  (cost=5.06..224.98 rows=100 width=244)
-               Recheck Cond: (unique1 &lt; 100)
-               -&gt;  Bitmap Index Scan on tenk1_unique1  (cost=0.00..5.04 rows=100 width=0)
-                     Index Cond: (unique1 &lt; 100)
-</screen>
-
-    which shows that the planner thinks that hash join would be nearly 50%
-    more expensive than merge join for this case.
-    Of course, the next question is whether it's right about that.
-    We can investigate that using <command>EXPLAIN ANALYZE</command>, as
-    discussed <link linkend="using-explain-analyze">below</link>.
-   </para>
-
-   <para>
-    When using the enable/disable flags to disable plan node types, many of
-    the flags only discourage the use of the corresponding plan node and don't
-    outright disallow the planner's ability to use the plan node type.  This
-    is by design so that the planner still maintains the ability to form a
-    plan for a given query.  When the resulting plan contains a disabled node,
-    the <command>EXPLAIN</command> output will indicate this fact.
-
-<screen>
-SET enable_seqscan = off;
-EXPLAIN SELECT * FROM unit;
-
-                       QUERY PLAN
----------------------------------------------------------
- Seq Scan on unit  (cost=0.00..21.30 rows=1130 width=44)
-   Disabled: true
-</screen>
-   </para>
-
-   <para>
-    Because the <literal>unit</literal> table has no indexes, there is no
-    other means to read the table data, so the sequential scan is the only
-    option available to the query planner.
-   </para>
-
-   <para>
-    <indexterm>
-     <primary>subplan</primary>
-    </indexterm>
-    Some query plans involve <firstterm>subplans</firstterm>, which arise
-    from sub-<literal>SELECT</literal>s in the original query.  Such
-    queries can sometimes be transformed into ordinary join plans, but
-    when they cannot be, we get plans like:
-
-<screen>
-EXPLAIN VERBOSE SELECT unique1
-FROM tenk1 t
-WHERE t.ten &lt; ALL (SELECT o.ten FROM onek o WHERE o.four = t.four);
-
-                               QUERY PLAN
--------------------------------------------------------------------&zwsp;------
- Seq Scan on public.tenk1 t  (cost=0.00..586095.00 rows=5000 width=4)
-   Output: t.unique1
-   Filter: (ALL (t.ten &lt; (SubPlan 1).col1))
-   SubPlan 1
-     -&gt;  Seq Scan on public.onek o  (cost=0.00..116.50 rows=250 width=4)
-           Output: o.ten
-           Filter: (o.four = t.four)
-</screen>
-
-    This rather artificial example serves to illustrate a couple of
-    points: values from the outer plan level can be passed down into a
-    subplan (here, <literal>t.four</literal> is passed down) and the
-    results of the sub-select are available to the outer plan.  Those
-    result values are shown by <command>EXPLAIN</command> with notations
-    like
-    <literal>(<replaceable>subplan_name</replaceable>).col<replaceable>N</replaceable></literal>,
-    which refers to the <replaceable>N</replaceable>'th output column of
-    the sub-<literal>SELECT</literal>.
-   </para>
-
-   <para>
-    <indexterm>
-     <primary>subplan</primary>
-     <secondary>hashed</secondary>
-    </indexterm>
-    In the example above, the <literal>ALL</literal> operator runs the
-    subplan again for each row of the outer query (which accounts for the
-    high estimated cost).  Some queries can use a <firstterm>hashed
-    subplan</firstterm> to avoid that:
-
-<screen>
-EXPLAIN SELECT *
-FROM tenk1 t
-WHERE t.unique1 NOT IN (SELECT o.unique1 FROM onek o);
-
-                                         QUERY PLAN
--------------------------------------------------------------------&zwsp;-------------------------
- Seq Scan on tenk1 t  (cost=61.77..531.77 rows=5000 width=244)
-   Filter: (NOT (ANY (unique1 = (hashed SubPlan 1).col1)))
-   SubPlan 1
-     -&gt;  Index Only Scan using onek_unique1 on onek o  (cost=0.28..59.27 rows=1000 width=4)
-(4 rows)
-</screen>
-
-    Here, the subplan is run a single time and its output is loaded into
-    an in-memory hash table, which is then probed by the
-    outer <literal>ANY</literal> operator.  This requires that the
-    sub-<literal>SELECT</literal> not reference any variables of the outer
-    query, and that the <literal>ANY</literal>'s comparison operator be
-    amenable to hashing.
-   </para>
-
-   <para>
-    <indexterm>
-     <primary>initplan</primary>
-    </indexterm>
-    If, in addition to not referencing any variables of the outer query,
-    the sub-<literal>SELECT</literal> cannot return more than one row,
-    it may instead be implemented as an <firstterm>initplan</firstterm>:
-
-<screen>
-EXPLAIN VERBOSE SELECT unique1
-FROM tenk1 t1 WHERE t1.ten = (SELECT (random() * 10)::integer);
-
-                             QUERY PLAN
-------------------------------------------------------------&zwsp;--------
- Seq Scan on public.tenk1 t1  (cost=0.02..470.02 rows=1000 width=4)
-   Output: t1.unique1
-   Filter: (t1.ten = (InitPlan 1).col1)
-   InitPlan 1
-     -&gt;  Result  (cost=0.00..0.02 rows=1 width=4)
-           Output: ((random() * '10'::double precision))::integer
-</screen>
-
-    An initplan is run only once per execution of the outer plan, and its
-    results are saved for re-use in later rows of the outer plan.  So in
-    this example <literal>random()</literal> is evaluated only once and
-    all the values of <literal>t1.ten</literal> are compared to the same
-    randomly-chosen integer.  That's quite different from what would
-    happen without the sub-<literal>SELECT</literal> construct.
-   </para>
-
-  </sect2>
-
-  <sect2 id="using-explain-analyze">
-   <title><command>EXPLAIN ANALYZE</command></title>
-
-   <para>
-    It is possible to check the accuracy of the planner's estimates
-    by using <command>EXPLAIN</command>'s <literal>ANALYZE</literal> option.  With this
-    option, <command>EXPLAIN</command> actually executes the query, and then displays
-    the true row counts and true run time accumulated within each plan node,
-    along with the same estimates that a plain <command>EXPLAIN</command>
-    shows.  For example, we might get a result like this:
-
-<screen>
-EXPLAIN ANALYZE SELECT *
-FROM tenk1 t1, tenk2 t2
-WHERE t1.unique1 &lt; 10 AND t1.unique2 = t2.unique2;
-
-                                                           QUERY PLAN
--------------------------------------------------------------------&zwsp;--------------------------------------------------------------
- Nested Loop  (cost=4.65..118.50 rows=10 width=488) (actual time=0.017..0.051 rows=10.00 loops=1)
-   Buffers: shared hit=36 read=6
-   -&gt;  Bitmap Heap Scan on tenk1 t1  (cost=4.36..39.38 rows=10 width=244) (actual time=0.009..0.017 rows=10.00 loops=1)
-         Recheck Cond: (unique1 &lt; 10)
-         Heap Blocks: exact=10
-         Buffers: shared hit=3 read=5 written=4
-         -&gt;  Bitmap Index Scan on tenk1_unique1  (cost=0.00..4.36 rows=10 width=0) (actual time=0.004..0.004 rows=10.00 loops=1)
-               Index Cond: (unique1 &lt; 10)
-               Buffers: shared hit=2
-   -&gt;  Index Scan using tenk2_unique2 on tenk2 t2  (cost=0.29..7.90 rows=1 width=244) (actual time=0.003..0.003 rows=1 loops=10)
-         Index Cond: (unique2 = t1.unique2)
-         Buffers: shared hit=24 read=6
- Planning:
-   Buffers: shared hit=15 dirtied=9
- Planning Time: 0.485 ms
- Execution Time: 0.073 ms
-</screen>
-
-    Note that the <quote>actual time</quote> values are in milliseconds of
-    real time, whereas the <literal>cost</literal> estimates are expressed in
-    arbitrary units; so they are unlikely to match up.
-    The thing that's usually most important to look for is whether the
-    estimated row counts are reasonably close to reality.  In this example
-    the estimates were all dead-on, but that's quite unusual in practice.
-   </para>
-
-   <para>
-    In some query plans, it is possible for a subplan node to be executed more
-    than once.  For example, the inner index scan will be executed once per
-    outer row in the above nested-loop plan.  In such cases, the
-    <literal>loops</literal> value reports the
-    total number of executions of the node, and the actual time and rows
-    values shown are averages per-execution.  This is done to make the numbers
-    comparable with the way that the cost estimates are shown.  Multiply by
-    the <literal>loops</literal> value to get the total time actually spent in
-    the node.  In the above example, we spent a total of 0.030 milliseconds
-    executing the index scans on <literal>tenk2</literal>.
-   </para>
-
-   <para>
-    In some cases <command>EXPLAIN ANALYZE</command> shows additional execution
-    statistics beyond the plan node execution times and row counts.
-    For example, Sort and Hash nodes provide extra information:
-
-<screen>
-EXPLAIN ANALYZE SELECT *
-FROM tenk1 t1, tenk2 t2
-WHERE t1.unique1 &lt; 100 AND t1.unique2 = t2.unique2 ORDER BY t1.fivethous;
-
-                                                                 QUERY PLAN
--------------------------------------------------------------------&zwsp;-------------------------------------------------------------------&zwsp;------
- Sort  (cost=713.05..713.30 rows=100 width=488) (actual time=2.995..3.002 rows=100.00 loops=1)
-   Sort Key: t1.fivethous
-   Sort Method: quicksort  Memory: 74kB
-   Buffers: shared hit=440
-   -&gt;  Hash Join  (cost=226.23..709.73 rows=100 width=488) (actual time=0.515..2.920 rows=100.00 loops=1)
-         Hash Cond: (t2.unique2 = t1.unique2)
-         Buffers: shared hit=437
-         -&gt;  Seq Scan on tenk2 t2  (cost=0.00..445.00 rows=10000 width=244) (actual time=0.026..1.790 rows=10000.00 loops=1)
-               Buffers: shared hit=345
-         -&gt;  Hash  (cost=224.98..224.98 rows=100 width=244) (actual time=0.476..0.477 rows=100.00 loops=1)
-               Buckets: 1024  Batches: 1  Memory Usage: 35kB
-               Buffers: shared hit=92
-               -&gt;  Bitmap Heap Scan on tenk1 t1  (cost=5.06..224.98 rows=100 width=244) (actual time=0.030..0.450 rows=100.00 loops=1)
-                     Recheck Cond: (unique1 &lt; 100)
-                     Heap Blocks: exact=90
-                     Buffers: shared hit=92
-                     -&gt;  Bitmap Index Scan on tenk1_unique1  (cost=0.00..5.04 rows=100 width=0) (actual time=0.013..0.013 rows=100.00 loops=1)
-                           Index Cond: (unique1 &lt; 100)
-                           Buffers: shared hit=2
- Planning:
-   Buffers: shared hit=12
- Planning Time: 0.187 ms
- Execution Time: 3.036 ms
-</screen>
-
-    The Sort node shows the sort method used (in particular, whether the sort
-    was in-memory or on-disk) and the amount of memory or disk space needed.
-    The Hash node shows the number of hash buckets and batches as well as the
-    peak amount of memory used for the hash table.  (If the number of batches
-    exceeds one, there will also be disk space usage involved, but that is not
-    shown.)
-   </para>
-
-   <para>
-    Another type of extra information is the number of rows removed by a
-    filter condition:
-
-<screen>
-EXPLAIN ANALYZE SELECT * FROM tenk1 WHERE ten &lt; 7;
-
-                                               QUERY PLAN
--------------------------------------------------------------------&zwsp;--------------------------------------
- Seq Scan on tenk1  (cost=0.00..470.00 rows=7000 width=244) (actual time=0.030..1.995 rows=7000.00 loops=1)
-   Filter: (ten &lt; 7)
-   Rows Removed by Filter: 3000
-   Buffers: shared hit=345
- Planning Time: 0.102 ms
- Execution Time: 2.145 ms
-</screen>
-
-    These counts can be particularly valuable for filter conditions applied at
-    join nodes.  The <quote>Rows Removed</quote> line only appears when at least
-    one scanned row, or potential join pair in the case of a join node,
-    is rejected by the filter condition.
-   </para>
-
-   <para>
-    A case similar to filter conditions occurs with <quote>lossy</quote>
-    index scans.  For example, consider this search for polygons containing a
-    specific point:
-
-<screen>
-EXPLAIN ANALYZE SELECT * FROM polygon_tbl WHERE f1 @&gt; polygon '(0.5,2.0)';
-
-                                              QUERY PLAN
--------------------------------------------------------------------&zwsp;-----------------------------------
- Seq Scan on polygon_tbl  (cost=0.00..1.09 rows=1 width=85) (actual time=0.023..0.023 rows=0.00 loops=1)
-   Filter: (f1 @&gt; '((0.5,2))'::polygon)
-   Rows Removed by Filter: 7
-   Buffers: shared hit=1
- Planning Time: 0.039 ms
- Execution Time: 0.033 ms
-</screen>
-
-    The planner thinks (quite correctly) that this sample table is too small
-    to bother with an index scan, so we have a plain sequential scan in which
-    all the rows got rejected by the filter condition.  But if we force an
-    index scan to be used, we see:
-
-<screen>
-SET enable_seqscan TO off;
-
-EXPLAIN ANALYZE SELECT * FROM polygon_tbl WHERE f1 @&gt; polygon '(0.5,2.0)';
-
-                                                        QUERY PLAN
--------------------------------------------------------------------&zwsp;-------------------------------------------------------
- Index Scan using gpolygonind on polygon_tbl  (cost=0.13..8.15 rows=1 width=85) (actual time=0.074..0.074 rows=0.00 loops=1)
-   Index Cond: (f1 @&gt; '((0.5,2))'::polygon)
-   Rows Removed by Index Recheck: 1
-   Buffers: shared hit=1
- Planning Time: 0.039 ms
- Execution Time: 0.098 ms
-</screen>
-
-    Here we can see that the index returned one candidate row, which was
-    then rejected by a recheck of the index condition.  This happens because a
-    GiST index is <quote>lossy</quote> for polygon containment tests: it actually
-    returns the rows with polygons that overlap the target, and then we have
-    to do the exact containment test on those rows.
-   </para>
-
-   <para>
-    <command>EXPLAIN</command> has a <literal>BUFFERS</literal> option which
-    provides additional detail about I/O operations performed during the
-    planning and execution of the given query.  The buffer numbers displayed
-    show the count of the non-distinct buffers hit, read, dirtied, and written
-    for the given node and all of its child nodes.  The
-    <literal>ANALYZE</literal> option implicitly enables the
-    <literal>BUFFERS</literal> option.  If this
-    is undesired, <literal>BUFFERS</literal> may be explicitly disabled:
-
-<screen>
-EXPLAIN (ANALYZE, BUFFERS OFF) SELECT * FROM tenk1 WHERE unique1 &lt; 100 AND unique2 &gt; 9000;
-
-                                                           QUERY PLAN
--------------------------------------------------------------------&zwsp;--------------------------------------------------------------
- Bitmap Heap Scan on tenk1  (cost=25.07..60.11 rows=10 width=244) (actual time=0.105..0.114 rows=10.00 loops=1)
-   Recheck Cond: ((unique1 &lt; 100) AND (unique2 &gt; 9000))
-   Heap Blocks: exact=10
-   -&gt;  BitmapAnd  (cost=25.07..25.07 rows=10 width=0) (actual time=0.100..0.101 rows=0.00 loops=1)
-         -&gt;  Bitmap Index Scan on tenk1_unique1  (cost=0.00..5.04 rows=100 width=0) (actual time=0.027..0.027 rows=100.00 loops=1)
-               Index Cond: (unique1 &lt; 100)
-         -&gt;  Bitmap Index Scan on tenk1_unique2  (cost=0.00..19.78 rows=999 width=0) (actual time=0.070..0.070 rows=999.00 loops=1)
-               Index Cond: (unique2 &gt; 9000)
- Planning Time: 0.162 ms
- Execution Time: 0.143 ms
-</screen>
-   </para>
-
-   <para>
-    Keep in mind that because <command>EXPLAIN ANALYZE</command> actually
-    runs the query, any side-effects will happen as usual, even though
-    whatever results the query might output are discarded in favor of
-    printing the <command>EXPLAIN</command> data.  If you want to analyze a
-    data-modifying query without changing your tables, you can
-    roll the command back afterwards, for example:
-
-<screen>
-BEGIN;
-
-EXPLAIN ANALYZE UPDATE tenk1 SET hundred = hundred + 1 WHERE unique1 &lt; 100;
-
-                                                           QUERY PLAN
--------------------------------------------------------------------&zwsp;-------------------------------------------------------------
- Update on tenk1  (cost=5.06..225.23 rows=0 width=0) (actual time=1.634..1.635 rows=0.00 loops=1)
-   -&gt;  Bitmap Heap Scan on tenk1  (cost=5.06..225.23 rows=100 width=10) (actual time=0.065..0.141 rows=100.00 loops=1)
-         Recheck Cond: (unique1 &lt; 100)
-         Heap Blocks: exact=90
-         Buffers: shared hit=4 read=2
-         -&gt;  Bitmap Index Scan on tenk1_unique1  (cost=0.00..5.04 rows=100 width=0) (actual time=0.031..0.031 rows=100.00 loops=1)
-               Index Cond: (unique1 &lt; 100)
-               Buffers: shared read=2
- Planning Time: 0.151 ms
- Execution Time: 1.856 ms
-
-ROLLBACK;
-</screen>
-   </para>
-
-   <para>
-    As seen in this example, when the query is an <command>INSERT</command>,
-    <command>UPDATE</command>, <command>DELETE</command>, or
-    <command>MERGE</command> command, the actual work of
-    applying the table changes is done by a top-level Insert, Update,
-    Delete, or Merge plan node.  The plan nodes underneath this node perform
-    the work of locating the old rows and/or computing the new data.
-    So above, we see the same sort of bitmap table scan we've seen already,
-    and its output is fed to an Update node that stores the updated rows.
-    It's worth noting that although the data-modifying node can take a
-    considerable amount of run time (here, it's consuming the lion's share
-    of the time), the planner does not currently add anything to the cost
-    estimates to account for that work.  That's because the work to be done is
-    the same for every correct query plan, so it doesn't affect planning
-    decisions.
-   </para>
-
-   <para>
-    When an <command>UPDATE</command>, <command>DELETE</command>, or
-    <command>MERGE</command> command affects a partitioned table or
-    inheritance hierarchy, the output might look like this:
-
-<screen>
-EXPLAIN UPDATE gtest_parent SET f1 = CURRENT_DATE WHERE f2 = 101;
-
-                                       QUERY PLAN
--------------------------------------------------------------------&zwsp;---------------------
- Update on gtest_parent  (cost=0.00..3.06 rows=0 width=0)
-   Update on gtest_child gtest_parent_1
-   Update on gtest_child2 gtest_parent_2
-   Update on gtest_child3 gtest_parent_3
-   -&gt;  Append  (cost=0.00..3.06 rows=3 width=14)
-         -&gt;  Seq Scan on gtest_child gtest_parent_1  (cost=0.00..1.01 rows=1 width=14)
-               Filter: (f2 = 101)
-         -&gt;  Seq Scan on gtest_child2 gtest_parent_2  (cost=0.00..1.01 rows=1 width=14)
-               Filter: (f2 = 101)
-         -&gt;  Seq Scan on gtest_child3 gtest_parent_3  (cost=0.00..1.01 rows=1 width=14)
-               Filter: (f2 = 101)
-</screen>
-
-    In this example the Update node needs to consider three child tables,
-    but not the originally-mentioned partitioned table (since that never
-    stores any data).  So there are three input
-    scanning subplans, one per table.  For clarity, the Update node is
-    annotated to show the specific target tables that will be updated, in the
-    same order as the corresponding subplans.
-   </para>
-
-   <para>
-    The <literal>Planning time</literal> shown by <command>EXPLAIN
-    ANALYZE</command> is the time it took to generate the query plan from the
-    parsed query and optimize it. It does not include parsing or rewriting.
-   </para>
-
-   <para>
-    The <literal>Execution time</literal> shown by <command>EXPLAIN
-    ANALYZE</command> includes executor start-up and shut-down time, as well
-    as the time to run any triggers that are fired, but it does not include
-    parsing, rewriting, or planning time.
-    Time spent executing <literal>BEFORE</literal> triggers, if any, is included in
-    the time for the related Insert, Update, or Delete node; but time
-    spent executing <literal>AFTER</literal> triggers is not counted there because
-    <literal>AFTER</literal> triggers are fired after completion of the whole plan.
-    The total time spent in each trigger
-    (either <literal>BEFORE</literal> or <literal>AFTER</literal>) is also shown separately.
-    Note that deferred constraint triggers will not be executed
-    until end of transaction and are thus not considered at all by
-    <command>EXPLAIN ANALYZE</command>.
-   </para>
-
-   <para>
-    The time shown for the top-level node does not include any time needed
-    to convert the query's output data into displayable form or to send it
-    to the client.  While <command>EXPLAIN ANALYZE</command> will never
-    send the data to the client, it can be told to convert the query's
-    output data to displayable form and measure the time needed for that,
-    by specifying the <literal>SERIALIZE</literal> option.  That time will
-    be shown separately, and it's also included in the
-    total <literal>Execution time</literal>.
-   </para>
-
-  </sect2>
-
-  <sect2 id="using-explain-caveats">
-   <title>Caveats</title>
-
-   <para>
-    There are two significant ways in which run times measured by
-    <command>EXPLAIN ANALYZE</command> can deviate from normal execution of
-    the same query.  First, since no output rows are delivered to the client,
-    network transmission costs are not included.  I/O conversion costs are
-    not included either unless <literal>SERIALIZE</literal> is specified.
-    Second, the measurement overhead added by <command>EXPLAIN
-    ANALYZE</command> can be significant, especially on machines with slow
-    <function>gettimeofday()</function> operating-system calls. You can use the
-    <xref linkend="pgtesttiming"/> tool to measure the overhead of timing
-    on your system.
-   </para>
-
-   <para>
-    <command>EXPLAIN</command> results should not be extrapolated to situations
-    much different from the one you are actually testing; for example,
-    results on a toy-sized table cannot be assumed to apply to large tables.
-    The planner's cost estimates are not linear and so it might choose
-    a different plan for a larger or smaller table.  An extreme example
-    is that on a table that only occupies one disk page, you'll nearly
-    always get a sequential scan plan whether indexes are available or not.
-    The planner realizes that it's going to take one disk page read to
-    process the table in any case, so there's no value in expending additional
-    page reads to look at an index.  (We saw this happening in the
-    <literal>polygon_tbl</literal> example above.)
-   </para>
-
-   <para>
-    There are cases in which the actual and estimated values won't match up
-    well, but nothing is really wrong.  One such case occurs when
-    plan node execution is stopped short by a <literal>LIMIT</literal> or similar
-    effect.  For example, in the <literal>LIMIT</literal> query we used before,
-
-<screen>
-EXPLAIN ANALYZE SELECT * FROM tenk1 WHERE unique1 &lt; 100 AND unique2 &gt; 9000 LIMIT 2;
-
-                                                          QUERY PLAN
--------------------------------------------------------------------&zwsp;------------------------------------------------------------
- Limit  (cost=0.29..14.33 rows=2 width=244) (actual time=0.051..0.071 rows=2.00 loops=1)
-   Buffers: shared hit=16
-   -&gt;  Index Scan using tenk1_unique2 on tenk1  (cost=0.29..70.50 rows=10 width=244) (actual time=0.051..0.070 rows=2.00 loops=1)
-         Index Cond: (unique2 &gt; 9000)
-         Filter: (unique1 &lt; 100)
-         Rows Removed by Filter: 287
-         Buffers: shared hit=16
- Planning Time: 0.077 ms
- Execution Time: 0.086 ms
-</screen>
-
-    the estimated cost and row count for the Index Scan node are shown as
-    though it were run to completion.  But in reality the Limit node stopped
-    requesting rows after it got two, so the actual row count is only 2 and
-    the run time is less than the cost estimate would suggest.  This is not
-    an estimation error, only a discrepancy in the way the estimates and true
-    values are displayed.
-   </para>
-
-   <para>
-    Merge joins also have measurement artifacts that can confuse the unwary.
-    A merge join will stop reading one input if it's exhausted the other input
-    and the next key value in the one input is greater than the last key value
-    of the other input; in such a case there can be no more matches and so no
-    need to scan the rest of the first input.  This results in not reading all
-    of one child, with results like those mentioned for <literal>LIMIT</literal>.
-    Also, if the outer (first) child contains rows with duplicate key values,
-    the inner (second) child is backed up and rescanned for the portion of its
-    rows matching that key value.  <command>EXPLAIN ANALYZE</command> counts these
-    repeated emissions of the same inner rows as if they were real additional
-    rows.  When there are many outer duplicates, the reported actual row count
-    for the inner child plan node can be significantly larger than the number
-    of rows that are actually in the inner relation.
-   </para>
-
-   <para>
-    BitmapAnd and BitmapOr nodes always report their actual row counts as zero,
-    due to implementation limitations.
-   </para>
-
-   <para>
-    Normally, <command>EXPLAIN</command> will display every plan node
-    created by the planner.  However, there are cases where the executor
-    can determine that certain nodes need not be executed because they
-    cannot produce any rows, based on parameter values that were not
-    available at planning time.  (Currently this can only happen for child
-    nodes of an Append or MergeAppend node that is scanning a partitioned
-    table.)  When this happens, those plan nodes are omitted from
-    the <command>EXPLAIN</command> output and a <literal>Subplans
-    Removed: <replaceable>N</replaceable></literal> annotation appears
-    instead.
-   </para>
-  </sect2>
-
- </sect1>
-
- <sect1 id="planner-stats">
-  <title>Statistics Used by the Planner</title>
-
-  <indexterm zone="planner-stats">
-   <primary>statistics</primary>
-   <secondary>of the planner</secondary>
-  </indexterm>
-
-  <sect2 id="planner-stats-single-column">
-   <title>Single-Column Statistics</title>
-  <para>
-   As we saw in the previous section, the query planner needs to estimate
-   the number of rows retrieved by a query in order to make good choices
-   of query plans.  This section provides a quick look at the statistics
-   that the system uses for these estimates.
-  </para>
-
-  <para>
-   One component of the statistics is the total number of entries in
-   each table and index, as well as the number of disk blocks occupied
-   by each table and index.  This information is kept in the table
-   <link linkend="catalog-pg-class"><structname>pg_class</structname></link>,
-   in the columns <structfield>reltuples</structfield> and
-   <structfield>relpages</structfield>.  We can look at it with
-   queries similar to this one:
-
-<screen>
-SELECT relname, relkind, reltuples, relpages
-FROM pg_class
-WHERE relname LIKE 'tenk1%';
-
-       relname        | relkind | reltuples | relpages
-----------------------+---------+-----------+----------
- tenk1                | r       |     10000 |      345
- tenk1_hundred        | i       |     10000 |       11
- tenk1_thous_tenthous | i       |     10000 |       30
- tenk1_unique1        | i       |     10000 |       30
- tenk1_unique2        | i       |     10000 |       30
-(5 rows)
-</screen>
-
-   Here we can see that <structname>tenk1</structname> contains 10000
-   rows, as do its indexes, but the indexes are (unsurprisingly) much
-   smaller than the table.
-  </para>
-
-  <para>
-   For efficiency reasons, <structfield>reltuples</structfield>
-   and <structfield>relpages</structfield> are not updated on-the-fly,
-   and so they usually contain somewhat out-of-date values.
-   They are updated by <command>VACUUM</command>, <command>ANALYZE</command>, and a
-   few DDL commands such as <command>CREATE INDEX</command>.  A <command>VACUUM</command>
-   or <command>ANALYZE</command> operation that does not scan the entire table
-   (which is commonly the case) will incrementally update the
-   <structfield>reltuples</structfield> count on the basis of the part
-   of the table it did scan, resulting in an approximate value.
-   In any case, the planner
-   will scale the values it finds in <structname>pg_class</structname>
-   to match the current physical table size, thus obtaining a closer
-   approximation.
-  </para>
-
-  <indexterm>
-   <primary>pg_statistic</primary>
-  </indexterm>
-
-  <para>
-   Most queries retrieve only a fraction of the rows in a table, due
-   to <literal>WHERE</literal> clauses that restrict the rows to be
-   examined.  The planner thus needs to make an estimate of the
-   <firstterm>selectivity</firstterm> of <literal>WHERE</literal> clauses, that is,
-   the fraction of rows that match each condition in the
-   <literal>WHERE</literal> clause.  The information used for this task is
-   stored in the
-   <link linkend="catalog-pg-statistic"><structname>pg_statistic</structname></link>
-   system catalog.  Entries in <structname>pg_statistic</structname>
-   are updated by the <command>ANALYZE</command> and <command>VACUUM
-   ANALYZE</command> commands, and are always approximate even when freshly
-   updated.
-  </para>
-
-  <indexterm>
-   <primary>pg_stats</primary>
-  </indexterm>
-
-  <para>
-   Rather than look at <structname>pg_statistic</structname> directly,
-   it's better to look at its view
-   <link linkend="view-pg-stats"><structname>pg_stats</structname></link>
-   when examining the statistics manually.  <structname>pg_stats</structname>
-   is designed to be more easily readable.  Furthermore,
-   <structname>pg_stats</structname> is readable by all, whereas
-   <structname>pg_statistic</structname> is only readable by a superuser.
-   (This prevents unprivileged users from learning something about
-   the contents of other people's tables from the statistics.  The
-   <structname>pg_stats</structname> view is restricted to show only
-   rows about tables that the current user can read.)
-   For example, we might do:
-
-<screen>
-SELECT attname, inherited, n_distinct,
-       array_to_string(most_common_vals, E'\n') as most_common_vals
-FROM pg_stats
-WHERE tablename = 'road';
-
- attname | inherited | n_distinct |          most_common_vals
----------+-----------+------------+------------------------------------
- name    | f         | -0.5681108 | I- 580                        Ramp+
-         |           |            | I- 880                        Ramp+
-         |           |            | Sp Railroad                       +
-         |           |            | I- 580                            +
-         |           |            | I- 680                        Ramp+
-         |           |            | I- 80                         Ramp+
-         |           |            | 14th                          St  +
-         |           |            | I- 880                            +
-         |           |            | Mac Arthur                    Blvd+
-         |           |            | Mission                       Blvd+
-...
- name    | t         |    -0.5125 | I- 580                        Ramp+
-         |           |            | I- 880                        Ramp+
-         |           |            | I- 580                            +
-         |           |            | I- 680                        Ramp+
-         |           |            | I- 80                         Ramp+
-         |           |            | Sp Railroad                       +
-         |           |            | I- 880                            +
-         |           |            | State Hwy 13                  Ramp+
-         |           |            | I- 80                             +
-         |           |            | State Hwy 24                  Ramp+
-...
- thepath | f         |          0 |
- thepath | t         |          0 |
-(4 rows)
-</screen>
-
-   Note that two rows are displayed for the same column, one corresponding
-   to the complete inheritance hierarchy starting at the
-   <literal>road</literal> table (<literal>inherited</literal>=<literal>t</literal>),
-   and another one including only the <literal>road</literal> table itself
-   (<literal>inherited</literal>=<literal>f</literal>).
-   (For brevity, we have only shown the first ten most-common values for
-   the <literal>name</literal> column.)
-  </para>
-
-  <para>
-   The amount of information stored in <structname>pg_statistic</structname>
-   by <command>ANALYZE</command>, in particular the maximum number of entries in the
-   <structfield>most_common_vals</structfield> and <structfield>histogram_bounds</structfield>
-   arrays for each column, can be set on a
-   column-by-column basis using the <command>ALTER TABLE SET STATISTICS</command>
-   command, or globally by setting the
-   <xref linkend="guc-default-statistics-target"/> configuration variable.
-   The default limit is presently 100 entries.  Raising the limit
-   might allow more accurate planner estimates to be made, particularly for
-   columns with irregular data distributions, at the price of consuming
-   more space in <structname>pg_statistic</structname> and slightly more
-   time to compute the estimates.  Conversely, a lower limit might be
-   sufficient for columns with simple data distributions.
-  </para>
-
-  <para>
-   Further details about the planner's use of statistics can be found in
-   <xref linkend="planner-stats-details"/>.
-  </para>
-  </sect2>
-
-  <sect2 id="planner-stats-extended">
-   <title>Extended Statistics</title>
-
-   <indexterm zone="planner-stats-extended">
-    <primary>statistics</primary>
-    <secondary>of the planner</secondary>
-   </indexterm>
-
-   <indexterm>
-    <primary>correlation</primary>
-    <secondary>in the query planner</secondary>
-   </indexterm>
-
-   <indexterm>
-    <primary>pg_statistic_ext</primary>
-   </indexterm>
-
-   <indexterm>
-    <primary>pg_statistic_ext_data</primary>
-   </indexterm>
-
-   <para>
-    It is common to see slow queries running bad execution plans because
-    multiple columns used in the query clauses are correlated.
-    The planner normally assumes that multiple conditions
-    are independent of each other,
-    an assumption that does not hold when column values are correlated.
-    Regular statistics, because of their per-individual-column nature,
-    cannot capture any knowledge about cross-column correlation.
-    However, <productname>PostgreSQL</productname> has the ability to compute
-    <firstterm>multivariate statistics</firstterm>, which can capture
-    such information.
-   </para>
-
-   <para>
-    Because the number of possible column combinations is very large,
-    it's impractical to compute multivariate statistics automatically.
-    Instead, <firstterm>extended statistics objects</firstterm>, more often
-    called just <firstterm>statistics objects</firstterm>, can be created to instruct
-    the server to obtain statistics across interesting sets of columns.
-   </para>
-
-   <para>
-    Statistics objects are created using the
-    <link linkend="sql-createstatistics"><command>CREATE STATISTICS</command></link> command.
-    Creation of such an object merely creates a catalog entry expressing
-    interest in the statistics.  Actual data collection is performed
-    by <command>ANALYZE</command> (either a manual command, or background
-    auto-analyze).  The collected values can be examined in the
-    <link linkend="catalog-pg-statistic-ext-data"><structname>pg_statistic_ext_data</structname></link>
-    catalog.
-   </para>
-
-   <para>
-    <command>ANALYZE</command> computes extended statistics based on the same
-    sample of table rows that it takes for computing regular single-column
-    statistics.  Since the sample size is increased by increasing the
-    statistics target for the table or any of its columns (as described in
-    the previous section), a larger statistics target will normally result in
-    more accurate extended statistics, as well as more time spent calculating
-    them.
-   </para>
-
-   <para>
-    The following subsections describe the kinds of extended statistics
-    that are currently supported.
-   </para>
-
-   <sect3 id="planner-stats-extended-functional-deps">
-    <title>Functional Dependencies</title>
-
-    <para>
-     The simplest kind of extended statistics tracks <firstterm>functional
-     dependencies</firstterm>, a concept used in definitions of database normal forms.
-     We say that column <structfield>b</structfield> is functionally dependent on
-     column <structfield>a</structfield> if knowledge of the value of
-     <structfield>a</structfield> is sufficient to determine the value
-     of <structfield>b</structfield>, that is there are no two rows having the same value
-     of <structfield>a</structfield> but different values of <structfield>b</structfield>.
-     In a fully normalized database, functional dependencies should exist
-     only on primary keys and superkeys. However, in practice many data sets
-     are not fully normalized for various reasons; intentional
-     denormalization for performance reasons is a common example.
-     Even in a fully normalized database, there may be partial correlation
-     between some columns, which can be expressed as partial functional
-     dependency.
-    </para>
-
-    <para>
-     The existence of functional dependencies directly affects the accuracy
-     of estimates in certain queries.  If a query contains conditions on
-     both the independent and the dependent column(s), the
-     conditions on the dependent columns do not further reduce the result
-     size; but without knowledge of the functional dependency, the query
-     planner will assume that the conditions are independent, resulting
-     in underestimating the result size.
-    </para>
-
-    <para>
-     To inform the planner about functional dependencies, <command>ANALYZE</command>
-     can collect measurements of cross-column dependency. Assessing the
-     degree of dependency between all sets of columns would be prohibitively
-     expensive, so data collection is limited to those groups of columns
-     appearing together in a statistics object defined with
-     the <literal>dependencies</literal> option.  It is advisable to create
-     <literal>dependencies</literal> statistics only for column groups that are
-     strongly correlated, to avoid unnecessary overhead in both
-     <command>ANALYZE</command> and later query planning.
-    </para>
-
-    <para>
-     Here is an example of collecting functional-dependency statistics:
-<programlisting>
-CREATE STATISTICS stts (dependencies) ON city, zip FROM zipcodes;
-
-ANALYZE zipcodes;
-
-SELECT stxname, stxkeys, stxddependencies
-  FROM pg_statistic_ext join pg_statistic_ext_data on (oid = stxoid)
-  WHERE stxname = 'stts';
- stxname | stxkeys |             stxddependencies
----------+---------+------------------------------------------
- stts    | 1 5     | {"1 => 5": 1.000000, "5 => 1": 0.423130}
-(1 row)
-</programlisting>
-     Here it can be seen that column 1 (zip code) fully determines column
-     5 (city) so the coefficient is 1.0, while city only determines zip code
-     about 42% of the time, meaning that there are many cities (58%) that are
-     represented by more than a single ZIP code.
-    </para>
-
-    <para>
-     When computing the selectivity for a query involving functionally
-     dependent columns, the planner adjusts the per-condition selectivity
-     estimates using the dependency coefficients so as not to produce
-     an underestimate.
-    </para>
-
-    <sect4 id="planner-stats-extended-functional-deps-limits">
-     <title>Limitations of Functional Dependencies</title>
-
-     <para>
-      Functional dependencies are currently only applied when considering
-      simple equality conditions that compare columns to constant values,
-      and <literal>IN</literal> clauses with constant values.
-      They are not used to improve estimates for equality conditions
-      comparing two columns or comparing a column to an expression, nor for
-      range clauses, <literal>LIKE</literal> or any other type of condition.
-     </para>
-
-     <para>
-      When estimating with functional dependencies, the planner assumes that
-      conditions on the involved columns are compatible and hence redundant.
-      If they are incompatible, the correct estimate would be zero rows, but
-      that possibility is not considered.  For example, given a query like
-<programlisting>
-SELECT * FROM zipcodes WHERE city = 'San Francisco' AND zip = '94105';
-</programlisting>
-      the planner will disregard the <structfield>city</structfield> clause as not
-      changing the selectivity, which is correct.  However, it will make
-      the same assumption about
-<programlisting>
-SELECT * FROM zipcodes WHERE city = 'San Francisco' AND zip = '90210';
-</programlisting>
-      even though there will really be zero rows satisfying this query.
-      Functional dependency statistics do not provide enough information
-      to conclude that, however.
-     </para>
-
-     <para>
-      In many practical situations, this assumption is usually satisfied;
-      for example, there might be a GUI in the application that only allows
-      selecting compatible city and ZIP code values to use in a query.
-      But if that's not the case, functional dependencies may not be a viable
-      option.
-     </para>
-    </sect4>
-   </sect3>
-
-   <sect3 id="planner-stats-extended-n-distinct-counts">
-    <title>Multivariate N-Distinct Counts</title>
-
-    <para>
-     Single-column statistics store the number of distinct values in each
-     column.  Estimates of the number of distinct values when combining more
-     than one column (for example, for <literal>GROUP BY a, b</literal>) are
-     frequently wrong when the planner only has single-column statistical
-     data, causing it to select bad plans.
-    </para>
-
-    <para>
-     To improve such estimates, <command>ANALYZE</command> can collect n-distinct
-     statistics for groups of columns.  As before, it's impractical to do
-     this for every possible column grouping, so data is collected only for
-     those groups of columns appearing together in a statistics object
-     defined with the <literal>ndistinct</literal> option.  Data will be collected
-     for each possible combination of two or more columns from the set of
-     listed columns.
-    </para>
-
-    <para>
-     Continuing the previous example, the n-distinct counts in a
-     table of ZIP codes might look like the following:
-<programlisting>
-CREATE STATISTICS stts2 (ndistinct) ON city, state, zip FROM zipcodes;
-
-ANALYZE zipcodes;
-
-SELECT stxkeys AS k, stxdndistinct AS nd
-  FROM pg_statistic_ext join pg_statistic_ext_data on (oid = stxoid)
-  WHERE stxname = 'stts2';
--[ RECORD 1 ]------------------------------------------------------&zwsp;--
-k  | 1 2 5
-nd | {"1, 2": 33178, "1, 5": 33178, "2, 5": 27435, "1, 2, 5": 33178}
-(1 row)
-</programlisting>
-     This indicates that there are three combinations of columns that
-     have 33178 distinct values: ZIP code and state; ZIP code and city;
-     and ZIP code, city and state (the fact that they are all equal is
-     expected given that ZIP code alone is unique in this table).  On the
-     other hand, the combination of city and state has only 27435 distinct
-     values.
-    </para>
-
-    <para>
-     It's advisable to create <literal>ndistinct</literal> statistics objects only
-     on combinations of columns that are actually used for grouping, and
-     for which misestimation of the number of groups is resulting in bad
-     plans.  Otherwise, the <command>ANALYZE</command> cycles are just wasted.
-    </para>
-   </sect3>
-
-   <sect3 id="planner-stats-extended-mcv-lists">
-    <title>Multivariate MCV Lists</title>
-
-    <para>
-     Another type of statistic stored for each column are most-common value
-     lists.  This allows very accurate estimates for individual columns, but
-     may result in significant misestimates for queries with conditions on
-     multiple columns.
-    </para>
-
-    <para>
-     To improve such estimates, <command>ANALYZE</command> can collect MCV
-     lists on combinations of columns.  Similarly to functional dependencies
-     and n-distinct coefficients, it's impractical to do this for every
-     possible column grouping.  Even more so in this case, as the MCV list
-     (unlike functional dependencies and n-distinct coefficients) does store
-     the common column values.  So data is collected only for those groups
-     of columns appearing together in a statistics object defined with the
-     <literal>mcv</literal> option.
-    </para>
-
-    <para>
-     Continuing the previous example, the MCV list for a table of ZIP codes
-     might look like the following (unlike for simpler types of statistics,
-     a function is required for inspection of MCV contents):
-
-<programlisting>
-CREATE STATISTICS stts3 (mcv) ON city, state FROM zipcodes;
-
-ANALYZE zipcodes;
-
-SELECT m.* FROM pg_statistic_ext join pg_statistic_ext_data on (oid = stxoid),
-                pg_mcv_list_items(stxdmcv) m WHERE stxname = 'stts3';
-
- index |         values         | nulls | frequency | base_frequency
--------+------------------------+-------+-----------+----------------
-     0 | {Washington, DC}       | {f,f} |  0.003467 |        2.7e-05
-     1 | {Apo, AE}              | {f,f} |  0.003067 |        1.9e-05
-     2 | {Houston, TX}          | {f,f} |  0.002167 |       0.000133
-     3 | {El Paso, TX}          | {f,f} |     0.002 |       0.000113
-     4 | {New York, NY}         | {f,f} |  0.001967 |       0.000114
-     5 | {Atlanta, GA}          | {f,f} |  0.001633 |        3.3e-05
-     6 | {Sacramento, CA}       | {f,f} |  0.001433 |        7.8e-05
-     7 | {Miami, FL}            | {f,f} |    0.0014 |          6e-05
-     8 | {Dallas, TX}           | {f,f} |  0.001367 |        8.8e-05
-     9 | {Chicago, IL}          | {f,f} |  0.001333 |        5.1e-05
-   ...
-(99 rows)
-</programlisting>
-     This indicates that the most common combination of city and state is
-     Washington in DC, with actual frequency (in the sample) about 0.35%.
-     The base frequency of the combination (as computed from the simple
-     per-column frequencies) is only 0.0027%, resulting in two orders of
-     magnitude under-estimates.
-    </para>
-
-    <para>
-     It's advisable to create <acronym>MCV</acronym> statistics objects only
-     on combinations of columns that are actually used in conditions together,
-     and for which misestimation of the number of groups is resulting in bad
-     plans.  Otherwise, the <command>ANALYZE</command> and planning cycles
-     are just wasted.
-    </para>
-   </sect3>
-
-  </sect2>
- </sect1>
-
- <sect1 id="explicit-joins">
+ <sect2 id="explicit-joins">
   <title>Controlling the Planner with Explicit <literal>JOIN</literal> Clauses</title>
 
   <indexterm zone="explicit-joins">
@@ -1748,9 +187,9 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse;
    differently if you are trying to fine-tune the trade-off between planning
    time and run time.
   </para>
- </sect1>
+ </sect2>
 
- <sect1 id="populate">
+ <sect2 id="populate">
   <title>Populating a Database</title>
 
   <para>
@@ -1759,7 +198,7 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse;
    this process as efficient as possible.
   </para>
 
-  <sect2 id="disable-autocommit">
+  <sect3 id="disable-autocommit">
    <title>Disable Autocommit</title>
 
    <indexterm>
@@ -1782,9 +221,9 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse;
     point would be rolled back, so you won't be stuck with partially
     loaded data.
    </para>
-  </sect2>
+  </sect3>
 
-  <sect2 id="populate-copy-from">
+  <sect3 id="populate-copy-from">
    <title>Use <command>COPY</command></title>
 
    <para>
@@ -1827,9 +266,9 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse;
     as all commands must write WAL otherwise.
    </para>
 
-  </sect2>
+  </sect3>
 
-  <sect2 id="populate-rm-indexes">
+  <sect3 id="populate-rm-indexes">
    <title>Remove Indexes</title>
 
    <para>
@@ -1850,9 +289,9 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse;
     afforded by the unique constraint will be lost while the index is
     missing.
    </para>
-  </sect2>
+  </sect3>
 
-  <sect2 id="populate-rm-fkeys">
+  <sect3 id="populate-rm-fkeys">
    <title>Remove Foreign Key Constraints</title>
 
    <para>
@@ -1875,9 +314,9 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse;
     the constraint isn't acceptable, the only other recourse may be to split
     up the load operation into smaller transactions.
    </para>
-  </sect2>
+  </sect3>
 
-  <sect2 id="populate-work-mem">
+  <sect3 id="populate-work-mem">
    <title>Increase <varname>maintenance_work_mem</varname></title>
 
    <para>
@@ -1888,9 +327,9 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse;
     It won't do much for <command>COPY</command> itself, so this advice is
     only useful when you are using one or both of the above techniques.
    </para>
-  </sect2>
+  </sect3>
 
-  <sect2 id="populate-max-wal-size">
+  <sect3 id="populate-max-wal-size">
    <title>Increase <varname>max_wal_size</varname></title>
 
    <para>
@@ -1906,9 +345,9 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse;
     data loads, the number of checkpoints that are required can be
     reduced.
    </para>
-  </sect2>
+  </sect3>
 
-  <sect2 id="populate-pitr">
+  <sect3 id="populate-pitr">
    <title>Disable WAL Archival and Streaming Replication</title>
 
    <para>
@@ -1934,9 +373,9 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse;
     can guarantee crash safety more cheaply by doing
     an <function>fsync</function> at the end than by writing WAL.)
    </para>
-  </sect2>
+  </sect3>
 
-  <sect2 id="populate-analyze">
+  <sect3 id="populate-analyze">
    <title>Run <command>ANALYZE</command> Afterwards</title>
 
    <para>
@@ -1953,9 +392,9 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse;
     <xref linkend="vacuum-for-statistics"/>
     and <xref linkend="autovacuum"/> for more information.
    </para>
-  </sect2>
+  </sect3>
 
-  <sect2 id="populate-pg-dump">
+  <sect3 id="populate-pg-dump">
    <title>Some Notes about <application>pg_dump</application></title>
 
    <para>
@@ -2055,10 +494,10 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse;
     <xref linkend="vacuum-for-statistics"/>
     and <xref linkend="autovacuum"/> for more information.
    </para>
+  </sect3>
   </sect2>
-  </sect1>
 
-  <sect1 id="non-durability">
+  <sect2 id="non-durability">
    <title>Non-Durable Settings</title>
 
    <indexterm zone="non-durability">
@@ -2129,6 +568,6 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse;
 
     </itemizedlist>
    </para>
-  </sect1>
+  </sect2>
 
- </chapter>
+ </sect1>
diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml
index af476c82fc..1e4771e505 100644
--- a/doc/src/sgml/postgres.sgml
+++ b/doc/src/sgml/postgres.sgml
@@ -110,9 +110,7 @@ break is not needed in a wider output rendering.
   &typeconv;
   &indices;
   &textsearch;
-  &mvcc;
-  &perform;
-  &parallel;
+  &processing;
 
  </part>
 
diff --git a/doc/src/sgml/processing-explain.sgml b/doc/src/sgml/processing-explain.sgml
new file mode 100644
index 0000000000..ad61ff8b06
--- /dev/null
+++ b/doc/src/sgml/processing-explain.sgml
@@ -0,0 +1,1096 @@
+ <sect1 id="using-explain">
+  <title>Using <command>EXPLAIN</command></title>
+
+   <indexterm zone="using-explain">
+    <primary>EXPLAIN</primary>
+   </indexterm>
+
+   <indexterm zone="using-explain">
+    <primary>query plan</primary>
+   </indexterm>
+
+   <para>
+    <productname>PostgreSQL</productname> devises a <firstterm>query
+    plan</firstterm> for each query it receives.  Choosing the right
+    plan to match the query structure and the properties of the data
+    is absolutely critical for good performance, so the system includes
+    a complex <firstterm>planner</firstterm> that tries to choose good plans.
+    You can use the <link linkend="sql-explain"><command>EXPLAIN</command></link> command
+    to see what query plan the planner creates for any query.
+    Plan-reading is an art that requires some experience to master,
+    but this section attempts to cover the basics.
+   </para>
+
+   <para>
+    Examples in this section are drawn from the regression test database
+    after doing a <command>VACUUM ANALYZE</command>, using v18 development sources.
+    You should be able to get similar results if you try the examples
+    yourself, but your estimated costs and row counts might vary slightly
+    because <command>ANALYZE</command>'s statistics are random samples rather
+    than exact, and because costs are inherently somewhat platform-dependent.
+   </para>
+
+   <para>
+    The examples use <command>EXPLAIN</command>'s default <quote>text</quote> output
+    format, which is compact and convenient for humans to read.
+    If you want to feed <command>EXPLAIN</command>'s output to a program for further
+    analysis, you should use one of its machine-readable output formats
+    (XML, JSON, or YAML) instead.
+   </para>
+
+  <sect2 id="using-explain-basics">
+   <title><command>EXPLAIN</command> Basics</title>
+
+   <para>
+    The structure of a query plan is a tree of <firstterm>plan nodes</firstterm>.
+    Nodes at the bottom level of the tree are scan nodes: they return raw rows
+    from a table.  There are different types of scan nodes for different
+    table access methods: sequential scans, index scans, and bitmap index
+    scans.  There are also non-table row sources, such as <literal>VALUES</literal>
+    clauses and set-returning functions in <literal>FROM</literal>, which have their
+    own scan node types.
+    If the query requires joining, aggregation, sorting, or other
+    operations on the raw rows, then there will be additional nodes
+    above the scan nodes to perform these operations.  Again,
+    there is usually more than one possible way to do these operations,
+    so different node types can appear here too.  The output
+    of <command>EXPLAIN</command> has one line for each node in the plan
+    tree, showing the basic node type plus the cost estimates that the planner
+    made for the execution of that plan node.  Additional lines might appear,
+    indented from the node's summary line,
+    to show additional properties of the node.
+    The very first line (the summary line for the topmost
+    node) has the estimated total execution cost for the plan; it is this
+    number that the planner seeks to minimize.
+   </para>
+
+   <para>
+    Here is a trivial example, just to show what the output looks like:
+
+<screen>
+EXPLAIN SELECT * FROM tenk1;
+
+                         QUERY PLAN
+-------------------------------------------------------------
+ Seq Scan on tenk1  (cost=0.00..445.00 rows=10000 width=244)
+</screen>
+   </para>
+
+   <para>
+    Since this query has no <literal>WHERE</literal> clause, it must scan all the
+    rows of the table, so the planner has chosen to use a simple sequential
+    scan plan.  The numbers that are quoted in parentheses are (left
+    to right):
+
+    <itemizedlist>
+     <listitem>
+      <para>
+       Estimated start-up cost.  This is the time expended before the output
+       phase can begin, e.g., time to do the sorting in a sort node.
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+       Estimated total cost.  This is stated on the assumption that the plan
+       node is run to completion, i.e., all available rows are retrieved.
+       In practice a node's parent node might stop short of reading all
+       available rows (see the <literal>LIMIT</literal> example below).
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+       Estimated number of rows output by this plan node.  Again, the node
+       is assumed to be run to completion.
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+       Estimated average width of rows output by this plan node (in bytes).
+      </para>
+     </listitem>
+    </itemizedlist>
+   </para>
+
+   <para>
+    The costs are measured in arbitrary units determined by the planner's
+    cost parameters (see <xref linkend="runtime-config-query-constants"/>).
+    Traditional practice is to measure the costs in units of disk page
+    fetches; that is, <xref linkend="guc-seq-page-cost"/> is conventionally
+    set to <literal>1.0</literal> and the other cost parameters are set relative
+    to that.  The examples in this section are run with the default cost
+    parameters.
+   </para>
+
+   <para>
+    It's important to understand that the cost of an upper-level node includes
+    the cost of all its child nodes.  It's also important to realize that
+    the cost only reflects things that the planner cares about.
+    In particular, the cost does not consider the time spent to convert
+    output values to text form or to transmit them to the client, which
+    could be important factors in the real elapsed time; but the planner
+    ignores those costs because it cannot change them by altering the
+    plan.  (Every correct plan will output the same row set, we trust.)
+   </para>
+
+   <para>
+    The <literal>rows</literal> value is a little tricky because it is
+    not the number of rows processed or scanned by the
+    plan node, but rather the number emitted by the node.  This is often
+    less than the number scanned, as a result of filtering by any
+    <literal>WHERE</literal>-clause conditions that are being applied at the node.
+    Ideally the top-level rows estimate will approximate the number of rows
+    actually returned, updated, or deleted by the query.
+   </para>
+
+   <para>
+    Returning to our example:
+
+<screen>
+EXPLAIN SELECT * FROM tenk1;
+
+                         QUERY PLAN
+-------------------------------------------------------------
+ Seq Scan on tenk1  (cost=0.00..445.00 rows=10000 width=244)
+</screen>
+   </para>
+
+   <para>
+    These numbers are derived very straightforwardly.  If you do:
+
+<programlisting>
+SELECT relpages, reltuples FROM pg_class WHERE relname = 'tenk1';
+</programlisting>
+
+    you will find that <classname>tenk1</classname> has 345 disk
+    pages and 10000 rows.  The estimated cost is computed as (disk pages read *
+    <xref linkend="guc-seq-page-cost"/>) + (rows scanned *
+    <xref linkend="guc-cpu-tuple-cost"/>).  By default,
+    <varname>seq_page_cost</varname> is 1.0 and <varname>cpu_tuple_cost</varname> is 0.01,
+    so the estimated cost is (345 * 1.0) + (10000 * 0.01) = 445.
+   </para>
+
+   <para>
+    Now let's modify the query to add a <literal>WHERE</literal> condition:
+
+<screen>
+EXPLAIN SELECT * FROM tenk1 WHERE unique1 &lt; 7000;
+
+                         QUERY PLAN
+------------------------------------------------------------
+ Seq Scan on tenk1  (cost=0.00..470.00 rows=7000 width=244)
+   Filter: (unique1 &lt; 7000)
+</screen>
+
+    Notice that the <command>EXPLAIN</command> output shows the <literal>WHERE</literal>
+    clause being applied as a <quote>filter</quote> condition attached to the Seq
+    Scan plan node.  This means that
+    the plan node checks the condition for each row it scans, and outputs
+    only the ones that pass the condition.
+    The estimate of output rows has been reduced because of the
+    <literal>WHERE</literal> clause.
+    However, the scan will still have to visit all 10000 rows, so the cost
+    hasn't decreased; in fact it has gone up a bit (by 10000 * <xref
+    linkend="guc-cpu-operator-cost"/>, to be exact) to reflect the extra CPU
+    time spent checking the <literal>WHERE</literal> condition.
+   </para>
+
+   <para>
+    The actual number of rows this query would select is 7000, but the <literal>rows</literal>
+    estimate is only approximate.  If you try to duplicate this experiment,
+    you may well get a slightly different estimate; moreover, it can
+    change after each <command>ANALYZE</command> command, because the
+    statistics produced by <command>ANALYZE</command> are taken from a
+    randomized sample of the table.
+   </para>
+
+   <para>
+    Now, let's make the condition more restrictive:
+
+<screen>
+EXPLAIN SELECT * FROM tenk1 WHERE unique1 &lt; 100;
+
+                                  QUERY PLAN
+-------------------------------------------------------------------&zwsp;-----------
+ Bitmap Heap Scan on tenk1  (cost=5.06..224.98 rows=100 width=244)
+   Recheck Cond: (unique1 &lt; 100)
+   -&gt;  Bitmap Index Scan on tenk1_unique1  (cost=0.00..5.04 rows=100 width=0)
+         Index Cond: (unique1 &lt; 100)
+</screen>
+
+    Here the planner has decided to use a two-step plan: the child plan
+    node visits an index to find the locations of rows matching the index
+    condition, and then the upper plan node actually fetches those rows
+    from the table itself.  Fetching rows separately is much more
+    expensive than reading them sequentially, but because not all the pages
+    of the table have to be visited, this is still cheaper than a sequential
+    scan.  (The reason for using two plan levels is that the upper plan
+    node sorts the row locations identified by the index into physical order
+    before reading them, to minimize the cost of separate fetches.
+    The <quote>bitmap</quote> mentioned in the node names is the mechanism that
+    does the sorting.)
+   </para>
+
+   <para>
+    Now let's add another condition to the <literal>WHERE</literal> clause:
+
+<screen>
+EXPLAIN SELECT * FROM tenk1 WHERE unique1 &lt; 100 AND stringu1 = 'xxx';
+
+                                  QUERY PLAN
+-------------------------------------------------------------------&zwsp;-----------
+ Bitmap Heap Scan on tenk1  (cost=5.04..225.20 rows=1 width=244)
+   Recheck Cond: (unique1 &lt; 100)
+   Filter: (stringu1 = 'xxx'::name)
+   -&gt;  Bitmap Index Scan on tenk1_unique1  (cost=0.00..5.04 rows=100 width=0)
+         Index Cond: (unique1 &lt; 100)
+</screen>
+
+    The added condition <literal>stringu1 = 'xxx'</literal> reduces the
+    output row count estimate, but not the cost because we still have to visit
+    the same set of rows.  That's because the <literal>stringu1</literal> clause
+    cannot be applied as an index condition, since this index is only on
+    the <literal>unique1</literal> column.  Instead it is applied as a filter on
+    the rows retrieved using the index.  Thus the cost has actually gone up
+    slightly to reflect this extra checking.
+   </para>
+
+   <para>
+    In some cases the planner will prefer a <quote>simple</quote> index scan plan:
+
+<screen>
+EXPLAIN SELECT * FROM tenk1 WHERE unique1 = 42;
+
+                                 QUERY PLAN
+-------------------------------------------------------------------&zwsp;----------
+ Index Scan using tenk1_unique1 on tenk1  (cost=0.29..8.30 rows=1 width=244)
+   Index Cond: (unique1 = 42)
+</screen>
+
+    In this type of plan the table rows are fetched in index order, which
+    makes them even more expensive to read, but there are so few that the
+    extra cost of sorting the row locations is not worth it.  You'll most
+    often see this plan type for queries that fetch just a single row.  It's
+    also often used for queries that have an <literal>ORDER BY</literal> condition
+    that matches the index order, because then no extra sorting step is needed
+    to satisfy the <literal>ORDER BY</literal>.  In this example, adding
+    <literal>ORDER BY unique1</literal> would use the same plan because the
+    index already implicitly provides the requested ordering.
+   </para>
+
+   <para>
+     The planner may implement an <literal>ORDER BY</literal> clause in several
+     ways.  The above example shows that such an ordering clause may be
+     implemented implicitly.  The planner may also add an explicit
+     <literal>Sort</literal> step:
+
+<screen>
+EXPLAIN SELECT * FROM tenk1 ORDER BY unique1;
+
+                            QUERY PLAN
+-------------------------------------------------------------------
+ Sort  (cost=1109.39..1134.39 rows=10000 width=244)
+   Sort Key: unique1
+   ->  Seq Scan on tenk1  (cost=0.00..445.00 rows=10000 width=244)
+</screen>
+
+    If a part of the plan guarantees an ordering on a prefix of the
+    required sort keys, then the planner may instead decide to use an
+    <literal>Incremental Sort</literal> step:
+
+<screen>
+EXPLAIN SELECT * FROM tenk1 ORDER BY hundred, ten LIMIT 100;
+
+                                              QUERY PLAN
+-------------------------------------------------------------------&zwsp;-----------------------------
+ Limit  (cost=19.35..39.49 rows=100 width=244)
+   -&gt;  Incremental Sort  (cost=19.35..2033.39 rows=10000 width=244)
+         Sort Key: hundred, ten
+         Presorted Key: hundred
+         -&gt;  Index Scan using tenk1_hundred on tenk1  (cost=0.29..1574.20 rows=10000 width=244)
+</screen>
+
+    Compared to regular sorts, sorting incrementally allows returning tuples
+    before the entire result set has been sorted, which particularly enables
+    optimizations with <literal>LIMIT</literal> queries.  It may also reduce
+    memory usage and the likelihood of spilling sorts to disk, but it comes at
+    the cost of the increased overhead of splitting the result set into multiple
+    sorting batches.
+   </para>
+
+   <para>
+    If there are separate indexes on several of the columns referenced
+    in <literal>WHERE</literal>, the planner might choose to use an AND or OR
+    combination of the indexes:
+
+<screen>
+EXPLAIN SELECT * FROM tenk1 WHERE unique1 &lt; 100 AND unique2 &gt; 9000;
+
+                                     QUERY PLAN
+-------------------------------------------------------------------&zwsp;------------------
+ Bitmap Heap Scan on tenk1  (cost=25.07..60.11 rows=10 width=244)
+   Recheck Cond: ((unique1 &lt; 100) AND (unique2 &gt; 9000))
+   -&gt;  BitmapAnd  (cost=25.07..25.07 rows=10 width=0)
+         -&gt;  Bitmap Index Scan on tenk1_unique1  (cost=0.00..5.04 rows=100 width=0)
+               Index Cond: (unique1 &lt; 100)
+         -&gt;  Bitmap Index Scan on tenk1_unique2  (cost=0.00..19.78 rows=999 width=0)
+               Index Cond: (unique2 &gt; 9000)
+</screen>
+
+    But this requires visiting both indexes, so it's not necessarily a win
+    compared to using just one index and treating the other condition as
+    a filter.  If you vary the ranges involved you'll see the plan change
+    accordingly.
+   </para>
+
+   <para>
+    Here is an example showing the effects of <literal>LIMIT</literal>:
+
+<screen>
+EXPLAIN SELECT * FROM tenk1 WHERE unique1 &lt; 100 AND unique2 &gt; 9000 LIMIT 2;
+
+                                     QUERY PLAN
+-------------------------------------------------------------------&zwsp;------------------
+ Limit  (cost=0.29..14.28 rows=2 width=244)
+   -&gt;  Index Scan using tenk1_unique2 on tenk1  (cost=0.29..70.27 rows=10 width=244)
+         Index Cond: (unique2 &gt; 9000)
+         Filter: (unique1 &lt; 100)
+</screen>
+   </para>
+
+   <para>
+    This is the same query as above, but we added a <literal>LIMIT</literal> so that
+    not all the rows need be retrieved, and the planner changed its mind about
+    what to do.  Notice that the total cost and row count of the Index Scan
+    node are shown as if it were run to completion.  However, the Limit node
+    is expected to stop after retrieving only a fifth of those rows, so its
+    total cost is only a fifth as much, and that's the actual estimated cost
+    of the query.  This plan is preferred over adding a Limit node to the
+    previous plan because the Limit could not avoid paying the startup cost
+    of the bitmap scan, so the total cost would be something over 25 units
+    with that approach.
+   </para>
+
+   <para>
+    Let's try joining two tables, using the columns we have been discussing:
+
+<screen>
+EXPLAIN SELECT *
+FROM tenk1 t1, tenk2 t2
+WHERE t1.unique1 &lt; 10 AND t1.unique2 = t2.unique2;
+
+                                      QUERY PLAN
+-------------------------------------------------------------------&zwsp;-------------------
+ Nested Loop  (cost=4.65..118.50 rows=10 width=488)
+   -&gt;  Bitmap Heap Scan on tenk1 t1  (cost=4.36..39.38 rows=10 width=244)
+         Recheck Cond: (unique1 &lt; 10)
+         -&gt;  Bitmap Index Scan on tenk1_unique1  (cost=0.00..4.36 rows=10 width=0)
+               Index Cond: (unique1 &lt; 10)
+   -&gt;  Index Scan using tenk2_unique2 on tenk2 t2  (cost=0.29..7.90 rows=1 width=244)
+         Index Cond: (unique2 = t1.unique2)
+</screen>
+   </para>
+
+   <para>
+    In this plan, we have a nested-loop join node with two table scans as
+    inputs, or children.  The indentation of the node summary lines reflects
+    the plan tree structure.  The join's first, or <quote>outer</quote>, child
+    is a bitmap scan similar to those we saw before.  Its cost and row count
+    are the same as we'd get from <literal>SELECT ... WHERE unique1 &lt; 10</literal>
+    because we are
+    applying the <literal>WHERE</literal> clause <literal>unique1 &lt; 10</literal>
+    at that node.
+    The <literal>t1.unique2 = t2.unique2</literal> clause is not relevant yet,
+    so it doesn't affect the row count of the outer scan.  The nested-loop
+    join node will run its second,
+    or <quote>inner</quote> child once for each row obtained from the outer child.
+    Column values from the current outer row can be plugged into the inner
+    scan; here, the <literal>t1.unique2</literal> value from the outer row is available,
+    so we get a plan and costs similar to what we saw above for a simple
+    <literal>SELECT ... WHERE t2.unique2 = <replaceable>constant</replaceable></literal> case.
+    (The estimated cost is actually a bit lower than what was seen above,
+    as a result of caching that's expected to occur during the repeated
+    index scans on <literal>t2</literal>.)  The
+    costs of the loop node are then set on the basis of the cost of the outer
+    scan, plus one repetition of the inner scan for each outer row (10 * 7.90,
+    here), plus a little CPU time for join processing.
+   </para>
+
+   <para>
+    In this example the join's output row count is the same as the product
+    of the two scans' row counts, but that's not true in all cases because
+    there can be additional <literal>WHERE</literal> clauses that mention both tables
+    and so can only be applied at the join point, not to either input scan.
+    Here's an example:
+
+<screen>
+EXPLAIN SELECT *
+FROM tenk1 t1, tenk2 t2
+WHERE t1.unique1 &lt; 10 AND t2.unique2 &lt; 10 AND t1.hundred &lt; t2.hundred;
+
+                                         QUERY PLAN
+-------------------------------------------------------------------&zwsp;--------------------------
+ Nested Loop  (cost=4.65..49.36 rows=33 width=488)
+   Join Filter: (t1.hundred &lt; t2.hundred)
+   -&gt;  Bitmap Heap Scan on tenk1 t1  (cost=4.36..39.38 rows=10 width=244)
+         Recheck Cond: (unique1 &lt; 10)
+         -&gt;  Bitmap Index Scan on tenk1_unique1  (cost=0.00..4.36 rows=10 width=0)
+               Index Cond: (unique1 &lt; 10)
+   -&gt;  Materialize  (cost=0.29..8.51 rows=10 width=244)
+         -&gt;  Index Scan using tenk2_unique2 on tenk2 t2  (cost=0.29..8.46 rows=10 width=244)
+               Index Cond: (unique2 &lt; 10)
+</screen>
+
+    The condition <literal>t1.hundred &lt; t2.hundred</literal> can't be
+    tested in the <literal>tenk2_unique2</literal> index, so it's applied at the
+    join node.  This reduces the estimated output row count of the join node,
+    but does not change either input scan.
+   </para>
+
+   <para>
+    Notice that here the planner has chosen to <quote>materialize</quote> the inner
+    relation of the join, by putting a Materialize plan node atop it.  This
+    means that the <literal>t2</literal> index scan will be done just once, even
+    though the nested-loop join node needs to read that data ten times, once
+    for each row from the outer relation.  The Materialize node saves the data
+    in memory as it's read, and then returns the data from memory on each
+    subsequent pass.
+   </para>
+
+   <para>
+    When dealing with outer joins, you might see join plan nodes with both
+    <quote>Join Filter</quote> and plain <quote>Filter</quote> conditions attached.
+    Join Filter conditions come from the outer join's <literal>ON</literal> clause,
+    so a row that fails the Join Filter condition could still get emitted as
+    a null-extended row.  But a plain Filter condition is applied after the
+    outer-join rules and so acts to remove rows unconditionally.  In an inner
+    join there is no semantic difference between these types of filters.
+   </para>
+
+   <para>
+    If we change the query's selectivity a bit, we might get a very different
+    join plan:
+
+<screen>
+EXPLAIN SELECT *
+FROM tenk1 t1, tenk2 t2
+WHERE t1.unique1 &lt; 100 AND t1.unique2 = t2.unique2;
+
+                                        QUERY PLAN
+-------------------------------------------------------------------&zwsp;-----------------------
+ Hash Join  (cost=226.23..709.73 rows=100 width=488)
+   Hash Cond: (t2.unique2 = t1.unique2)
+   -&gt;  Seq Scan on tenk2 t2  (cost=0.00..445.00 rows=10000 width=244)
+   -&gt;  Hash  (cost=224.98..224.98 rows=100 width=244)
+         -&gt;  Bitmap Heap Scan on tenk1 t1  (cost=5.06..224.98 rows=100 width=244)
+               Recheck Cond: (unique1 &lt; 100)
+               -&gt;  Bitmap Index Scan on tenk1_unique1  (cost=0.00..5.04 rows=100 width=0)
+                     Index Cond: (unique1 &lt; 100)
+</screen>
+   </para>
+
+   <para>
+    Here, the planner has chosen to use a hash join, in which rows of one
+    table are entered into an in-memory hash table, after which the other
+    table is scanned and the hash table is probed for matches to each row.
+    Again note how the indentation reflects the plan structure: the bitmap
+    scan on <literal>tenk1</literal> is the input to the Hash node, which constructs
+    the hash table.  That's then returned to the Hash Join node, which reads
+    rows from its outer child plan and searches the hash table for each one.
+   </para>
+
+   <para>
+    Another possible type of join is a merge join, illustrated here:
+
+<screen>
+EXPLAIN SELECT *
+FROM tenk1 t1, onek t2
+WHERE t1.unique1 &lt; 100 AND t1.unique2 = t2.unique2;
+
+                                        QUERY PLAN
+-------------------------------------------------------------------&zwsp;-----------------------
+ Merge Join  (cost=0.56..233.49 rows=10 width=488)
+   Merge Cond: (t1.unique2 = t2.unique2)
+   -&gt;  Index Scan using tenk1_unique2 on tenk1 t1  (cost=0.29..643.28 rows=100 width=244)
+         Filter: (unique1 &lt; 100)
+   -&gt;  Index Scan using onek_unique2 on onek t2  (cost=0.28..166.28 rows=1000 width=244)
+</screen>
+   </para>
+
+   <para>
+    Merge join requires its input data to be sorted on the join keys.  In this
+    example each input is sorted by using an index scan to visit the rows
+    in the correct order; but a sequential scan and sort could also be used.
+    (Sequential-scan-and-sort frequently beats an index scan for sorting many rows,
+    because of the nonsequential disk access required by the index scan.)
+   </para>
+
+   <para>
+    One way to look at variant plans is to force the planner to disregard
+    whatever strategy it thought was the cheapest, using the enable/disable
+    flags described in <xref linkend="runtime-config-query-enable"/>.
+    (This is a crude tool, but useful.  See
+    also <xref linkend="explicit-joins"/>.)
+    For example, if we're unconvinced that merge join is the best join
+    type for the previous example, we could try
+
+<screen>
+SET enable_mergejoin = off;
+
+EXPLAIN SELECT *
+FROM tenk1 t1, onek t2
+WHERE t1.unique1 &lt; 100 AND t1.unique2 = t2.unique2;
+
+                                        QUERY PLAN
+-------------------------------------------------------------------&zwsp;-----------------------
+ Hash Join  (cost=226.23..344.08 rows=10 width=488)
+   Hash Cond: (t2.unique2 = t1.unique2)
+   -&gt;  Seq Scan on onek t2  (cost=0.00..114.00 rows=1000 width=244)
+   -&gt;  Hash  (cost=224.98..224.98 rows=100 width=244)
+         -&gt;  Bitmap Heap Scan on tenk1 t1  (cost=5.06..224.98 rows=100 width=244)
+               Recheck Cond: (unique1 &lt; 100)
+               -&gt;  Bitmap Index Scan on tenk1_unique1  (cost=0.00..5.04 rows=100 width=0)
+                     Index Cond: (unique1 &lt; 100)
+</screen>
+
+    which shows that the planner thinks that hash join would be nearly 50%
+    more expensive than merge join for this case.
+    Of course, the next question is whether it's right about that.
+    We can investigate that using <command>EXPLAIN ANALYZE</command>, as
+    discussed <link linkend="using-explain-analyze">below</link>.
+   </para>
+
+   <para>
+    When using the enable/disable flags to disable plan node types, many of
+    the flags only discourage the use of the corresponding plan node and don't
+    outright disallow the planner's ability to use the plan node type.  This
+    is by design so that the planner still maintains the ability to form a
+    plan for a given query.  When the resulting plan contains a disabled node,
+    the <command>EXPLAIN</command> output will indicate this fact.
+
+<screen>
+SET enable_seqscan = off;
+EXPLAIN SELECT * FROM unit;
+
+                       QUERY PLAN
+---------------------------------------------------------
+ Seq Scan on unit  (cost=0.00..21.30 rows=1130 width=44)
+   Disabled: true
+</screen>
+   </para>
+
+   <para>
+    Because the <literal>unit</literal> table has no indexes, there is no
+    other means to read the table data, so the sequential scan is the only
+    option available to the query planner.
+   </para>
+
+   <para>
+    <indexterm>
+     <primary>subplan</primary>
+    </indexterm>
+    Some query plans involve <firstterm>subplans</firstterm>, which arise
+    from sub-<literal>SELECT</literal>s in the original query.  Such
+    queries can sometimes be transformed into ordinary join plans, but
+    when they cannot be, we get plans like:
+
+<screen>
+EXPLAIN VERBOSE SELECT unique1
+FROM tenk1 t
+WHERE t.ten &lt; ALL (SELECT o.ten FROM onek o WHERE o.four = t.four);
+
+                               QUERY PLAN
+-------------------------------------------------------------------&zwsp;------
+ Seq Scan on public.tenk1 t  (cost=0.00..586095.00 rows=5000 width=4)
+   Output: t.unique1
+   Filter: (ALL (t.ten &lt; (SubPlan 1).col1))
+   SubPlan 1
+     -&gt;  Seq Scan on public.onek o  (cost=0.00..116.50 rows=250 width=4)
+           Output: o.ten
+           Filter: (o.four = t.four)
+</screen>
+
+    This rather artificial example serves to illustrate a couple of
+    points: values from the outer plan level can be passed down into a
+    subplan (here, <literal>t.four</literal> is passed down) and the
+    results of the sub-select are available to the outer plan.  Those
+    result values are shown by <command>EXPLAIN</command> with notations
+    like
+    <literal>(<replaceable>subplan_name</replaceable>).col<replaceable>N</replaceable></literal>,
+    which refers to the <replaceable>N</replaceable>'th output column of
+    the sub-<literal>SELECT</literal>.
+   </para>
+
+   <para>
+    <indexterm>
+     <primary>subplan</primary>
+     <secondary>hashed</secondary>
+    </indexterm>
+    In the example above, the <literal>ALL</literal> operator runs the
+    subplan again for each row of the outer query (which accounts for the
+    high estimated cost).  Some queries can use a <firstterm>hashed
+    subplan</firstterm> to avoid that:
+
+<screen>
+EXPLAIN SELECT *
+FROM tenk1 t
+WHERE t.unique1 NOT IN (SELECT o.unique1 FROM onek o);
+
+                                         QUERY PLAN
+-------------------------------------------------------------------&zwsp;-------------------------
+ Seq Scan on tenk1 t  (cost=61.77..531.77 rows=5000 width=244)
+   Filter: (NOT (ANY (unique1 = (hashed SubPlan 1).col1)))
+   SubPlan 1
+     -&gt;  Index Only Scan using onek_unique1 on onek o  (cost=0.28..59.27 rows=1000 width=4)
+(4 rows)
+</screen>
+
+    Here, the subplan is run a single time and its output is loaded into
+    an in-memory hash table, which is then probed by the
+    outer <literal>ANY</literal> operator.  This requires that the
+    sub-<literal>SELECT</literal> not reference any variables of the outer
+    query, and that the <literal>ANY</literal>'s comparison operator be
+    amenable to hashing.
+   </para>
+
+   <para>
+    <indexterm>
+     <primary>initplan</primary>
+    </indexterm>
+    If, in addition to not referencing any variables of the outer query,
+    the sub-<literal>SELECT</literal> cannot return more than one row,
+    it may instead be implemented as an <firstterm>initplan</firstterm>:
+
+<screen>
+EXPLAIN VERBOSE SELECT unique1
+FROM tenk1 t1 WHERE t1.ten = (SELECT (random() * 10)::integer);
+
+                             QUERY PLAN
+------------------------------------------------------------&zwsp;--------
+ Seq Scan on public.tenk1 t1  (cost=0.02..470.02 rows=1000 width=4)
+   Output: t1.unique1
+   Filter: (t1.ten = (InitPlan 1).col1)
+   InitPlan 1
+     -&gt;  Result  (cost=0.00..0.02 rows=1 width=4)
+           Output: ((random() * '10'::double precision))::integer
+</screen>
+
+    An initplan is run only once per execution of the outer plan, and its
+    results are saved for re-use in later rows of the outer plan.  So in
+    this example <literal>random()</literal> is evaluated only once and
+    all the values of <literal>t1.ten</literal> are compared to the same
+    randomly-chosen integer.  That's quite different from what would
+    happen without the sub-<literal>SELECT</literal> construct.
+   </para>
+
+  </sect2>
+
+  <sect2 id="using-explain-analyze">
+   <title><command>EXPLAIN ANALYZE</command></title>
+
+   <para>
+    It is possible to check the accuracy of the planner's estimates
+    by using <command>EXPLAIN</command>'s <literal>ANALYZE</literal> option.  With this
+    option, <command>EXPLAIN</command> actually executes the query, and then displays
+    the true row counts and true run time accumulated within each plan node,
+    along with the same estimates that a plain <command>EXPLAIN</command>
+    shows.  For example, we might get a result like this:
+
+<screen>
+EXPLAIN ANALYZE SELECT *
+FROM tenk1 t1, tenk2 t2
+WHERE t1.unique1 &lt; 10 AND t1.unique2 = t2.unique2;
+
+                                                           QUERY PLAN
+-------------------------------------------------------------------&zwsp;--------------------------------------------------------------
+ Nested Loop  (cost=4.65..118.50 rows=10 width=488) (actual time=0.017..0.051 rows=10.00 loops=1)
+   Buffers: shared hit=36 read=6
+   -&gt;  Bitmap Heap Scan on tenk1 t1  (cost=4.36..39.38 rows=10 width=244) (actual time=0.009..0.017 rows=10.00 loops=1)
+         Recheck Cond: (unique1 &lt; 10)
+         Heap Blocks: exact=10
+         Buffers: shared hit=3 read=5 written=4
+         -&gt;  Bitmap Index Scan on tenk1_unique1  (cost=0.00..4.36 rows=10 width=0) (actual time=0.004..0.004 rows=10.00 loops=1)
+               Index Cond: (unique1 &lt; 10)
+               Buffers: shared hit=2
+   -&gt;  Index Scan using tenk2_unique2 on tenk2 t2  (cost=0.29..7.90 rows=1 width=244) (actual time=0.003..0.003 rows=1 loops=10)
+         Index Cond: (unique2 = t1.unique2)
+         Buffers: shared hit=24 read=6
+ Planning:
+   Buffers: shared hit=15 dirtied=9
+ Planning Time: 0.485 ms
+ Execution Time: 0.073 ms
+</screen>
+
+    Note that the <quote>actual time</quote> values are in milliseconds of
+    real time, whereas the <literal>cost</literal> estimates are expressed in
+    arbitrary units; so they are unlikely to match up.
+    The thing that's usually most important to look for is whether the
+    estimated row counts are reasonably close to reality.  In this example
+    the estimates were all dead-on, but that's quite unusual in practice.
+   </para>
+
+   <para>
+    In some query plans, it is possible for a subplan node to be executed more
+    than once.  For example, the inner index scan will be executed once per
+    outer row in the above nested-loop plan.  In such cases, the
+    <literal>loops</literal> value reports the
+    total number of executions of the node, and the actual time and rows
+    values shown are averages per-execution.  This is done to make the numbers
+    comparable with the way that the cost estimates are shown.  Multiply by
+    the <literal>loops</literal> value to get the total time actually spent in
+    the node.  In the above example, we spent a total of 0.030 milliseconds
+    executing the index scans on <literal>tenk2</literal>.
+   </para>
+
+   <para>
+    In some cases <command>EXPLAIN ANALYZE</command> shows additional execution
+    statistics beyond the plan node execution times and row counts.
+    For example, Sort and Hash nodes provide extra information:
+
+<screen>
+EXPLAIN ANALYZE SELECT *
+FROM tenk1 t1, tenk2 t2
+WHERE t1.unique1 &lt; 100 AND t1.unique2 = t2.unique2 ORDER BY t1.fivethous;
+
+                                                                 QUERY PLAN
+-------------------------------------------------------------------&zwsp;-------------------------------------------------------------------&zwsp;------
+ Sort  (cost=713.05..713.30 rows=100 width=488) (actual time=2.995..3.002 rows=100.00 loops=1)
+   Sort Key: t1.fivethous
+   Sort Method: quicksort  Memory: 74kB
+   Buffers: shared hit=440
+   -&gt;  Hash Join  (cost=226.23..709.73 rows=100 width=488) (actual time=0.515..2.920 rows=100.00 loops=1)
+         Hash Cond: (t2.unique2 = t1.unique2)
+         Buffers: shared hit=437
+         -&gt;  Seq Scan on tenk2 t2  (cost=0.00..445.00 rows=10000 width=244) (actual time=0.026..1.790 rows=10000.00 loops=1)
+               Buffers: shared hit=345
+         -&gt;  Hash  (cost=224.98..224.98 rows=100 width=244) (actual time=0.476..0.477 rows=100.00 loops=1)
+               Buckets: 1024  Batches: 1  Memory Usage: 35kB
+               Buffers: shared hit=92
+               -&gt;  Bitmap Heap Scan on tenk1 t1  (cost=5.06..224.98 rows=100 width=244) (actual time=0.030..0.450 rows=100.00 loops=1)
+                     Recheck Cond: (unique1 &lt; 100)
+                     Heap Blocks: exact=90
+                     Buffers: shared hit=92
+                     -&gt;  Bitmap Index Scan on tenk1_unique1  (cost=0.00..5.04 rows=100 width=0) (actual time=0.013..0.013 rows=100.00 loops=1)
+                           Index Cond: (unique1 &lt; 100)
+                           Buffers: shared hit=2
+ Planning:
+   Buffers: shared hit=12
+ Planning Time: 0.187 ms
+ Execution Time: 3.036 ms
+</screen>
+
+    The Sort node shows the sort method used (in particular, whether the sort
+    was in-memory or on-disk) and the amount of memory or disk space needed.
+    The Hash node shows the number of hash buckets and batches as well as the
+    peak amount of memory used for the hash table.  (If the number of batches
+    exceeds one, there will also be disk space usage involved, but that is not
+    shown.)
+   </para>
+
+   <para>
+    Another type of extra information is the number of rows removed by a
+    filter condition:
+
+<screen>
+EXPLAIN ANALYZE SELECT * FROM tenk1 WHERE ten &lt; 7;
+
+                                               QUERY PLAN
+-------------------------------------------------------------------&zwsp;--------------------------------------
+ Seq Scan on tenk1  (cost=0.00..470.00 rows=7000 width=244) (actual time=0.030..1.995 rows=7000.00 loops=1)
+   Filter: (ten &lt; 7)
+   Rows Removed by Filter: 3000
+   Buffers: shared hit=345
+ Planning Time: 0.102 ms
+ Execution Time: 2.145 ms
+</screen>
+
+    These counts can be particularly valuable for filter conditions applied at
+    join nodes.  The <quote>Rows Removed</quote> line only appears when at least
+    one scanned row, or potential join pair in the case of a join node,
+    is rejected by the filter condition.
+   </para>
+
+   <para>
+    A case similar to filter conditions occurs with <quote>lossy</quote>
+    index scans.  For example, consider this search for polygons containing a
+    specific point:
+
+<screen>
+EXPLAIN ANALYZE SELECT * FROM polygon_tbl WHERE f1 @&gt; polygon '(0.5,2.0)';
+
+                                              QUERY PLAN
+-------------------------------------------------------------------&zwsp;-----------------------------------
+ Seq Scan on polygon_tbl  (cost=0.00..1.09 rows=1 width=85) (actual time=0.023..0.023 rows=0.00 loops=1)
+   Filter: (f1 @&gt; '((0.5,2))'::polygon)
+   Rows Removed by Filter: 7
+   Buffers: shared hit=1
+ Planning Time: 0.039 ms
+ Execution Time: 0.033 ms
+</screen>
+
+    The planner thinks (quite correctly) that this sample table is too small
+    to bother with an index scan, so we have a plain sequential scan in which
+    all the rows got rejected by the filter condition.  But if we force an
+    index scan to be used, we see:
+
+<screen>
+SET enable_seqscan TO off;
+
+EXPLAIN ANALYZE SELECT * FROM polygon_tbl WHERE f1 @&gt; polygon '(0.5,2.0)';
+
+                                                        QUERY PLAN
+-------------------------------------------------------------------&zwsp;-------------------------------------------------------
+ Index Scan using gpolygonind on polygon_tbl  (cost=0.13..8.15 rows=1 width=85) (actual time=0.074..0.074 rows=0.00 loops=1)
+   Index Cond: (f1 @&gt; '((0.5,2))'::polygon)
+   Rows Removed by Index Recheck: 1
+   Buffers: shared hit=1
+ Planning Time: 0.039 ms
+ Execution Time: 0.098 ms
+</screen>
+
+    Here we can see that the index returned one candidate row, which was
+    then rejected by a recheck of the index condition.  This happens because a
+    GiST index is <quote>lossy</quote> for polygon containment tests: it actually
+    returns the rows with polygons that overlap the target, and then we have
+    to do the exact containment test on those rows.
+   </para>
+
+   <para>
+    <command>EXPLAIN</command> has a <literal>BUFFERS</literal> option which
+    provides additional detail about I/O operations performed during the
+    planning and execution of the given query.  The buffer numbers displayed
+    show the count of the non-distinct buffers hit, read, dirtied, and written
+    for the given node and all of its child nodes.  The
+    <literal>ANALYZE</literal> option implicitly enables the
+    <literal>BUFFERS</literal> option.  If this
+    is undesired, <literal>BUFFERS</literal> may be explicitly disabled:
+
+<screen>
+EXPLAIN (ANALYZE, BUFFERS OFF) SELECT * FROM tenk1 WHERE unique1 &lt; 100 AND unique2 &gt; 9000;
+
+                                                           QUERY PLAN
+-------------------------------------------------------------------&zwsp;--------------------------------------------------------------
+ Bitmap Heap Scan on tenk1  (cost=25.07..60.11 rows=10 width=244) (actual time=0.105..0.114 rows=10.00 loops=1)
+   Recheck Cond: ((unique1 &lt; 100) AND (unique2 &gt; 9000))
+   Heap Blocks: exact=10
+   -&gt;  BitmapAnd  (cost=25.07..25.07 rows=10 width=0) (actual time=0.100..0.101 rows=0.00 loops=1)
+         -&gt;  Bitmap Index Scan on tenk1_unique1  (cost=0.00..5.04 rows=100 width=0) (actual time=0.027..0.027 rows=100.00 loops=1)
+               Index Cond: (unique1 &lt; 100)
+         -&gt;  Bitmap Index Scan on tenk1_unique2  (cost=0.00..19.78 rows=999 width=0) (actual time=0.070..0.070 rows=999.00 loops=1)
+               Index Cond: (unique2 &gt; 9000)
+ Planning Time: 0.162 ms
+ Execution Time: 0.143 ms
+</screen>
+   </para>
+
+   <para>
+    Keep in mind that because <command>EXPLAIN ANALYZE</command> actually
+    runs the query, any side-effects will happen as usual, even though
+    whatever results the query might output are discarded in favor of
+    printing the <command>EXPLAIN</command> data.  If you want to analyze a
+    data-modifying query without changing your tables, you can
+    roll the command back afterwards, for example:
+
+<screen>
+BEGIN;
+
+EXPLAIN ANALYZE UPDATE tenk1 SET hundred = hundred + 1 WHERE unique1 &lt; 100;
+
+                                                           QUERY PLAN
+-------------------------------------------------------------------&zwsp;-------------------------------------------------------------
+ Update on tenk1  (cost=5.06..225.23 rows=0 width=0) (actual time=1.634..1.635 rows=0.00 loops=1)
+   -&gt;  Bitmap Heap Scan on tenk1  (cost=5.06..225.23 rows=100 width=10) (actual time=0.065..0.141 rows=100.00 loops=1)
+         Recheck Cond: (unique1 &lt; 100)
+         Heap Blocks: exact=90
+         Buffers: shared hit=4 read=2
+         -&gt;  Bitmap Index Scan on tenk1_unique1  (cost=0.00..5.04 rows=100 width=0) (actual time=0.031..0.031 rows=100.00 loops=1)
+               Index Cond: (unique1 &lt; 100)
+               Buffers: shared read=2
+ Planning Time: 0.151 ms
+ Execution Time: 1.856 ms
+
+ROLLBACK;
+</screen>
+   </para>
+
+   <para>
+    As seen in this example, when the query is an <command>INSERT</command>,
+    <command>UPDATE</command>, <command>DELETE</command>, or
+    <command>MERGE</command> command, the actual work of
+    applying the table changes is done by a top-level Insert, Update,
+    Delete, or Merge plan node.  The plan nodes underneath this node perform
+    the work of locating the old rows and/or computing the new data.
+    So above, we see the same sort of bitmap table scan we've seen already,
+    and its output is fed to an Update node that stores the updated rows.
+    It's worth noting that although the data-modifying node can take a
+    considerable amount of run time (here, it's consuming the lion's share
+    of the time), the planner does not currently add anything to the cost
+    estimates to account for that work.  That's because the work to be done is
+    the same for every correct query plan, so it doesn't affect planning
+    decisions.
+   </para>
+
+   <para>
+    When an <command>UPDATE</command>, <command>DELETE</command>, or
+    <command>MERGE</command> command affects a partitioned table or
+    inheritance hierarchy, the output might look like this:
+
+<screen>
+EXPLAIN UPDATE gtest_parent SET f1 = CURRENT_DATE WHERE f2 = 101;
+
+                                       QUERY PLAN
+-------------------------------------------------------------------&zwsp;---------------------
+ Update on gtest_parent  (cost=0.00..3.06 rows=0 width=0)
+   Update on gtest_child gtest_parent_1
+   Update on gtest_child2 gtest_parent_2
+   Update on gtest_child3 gtest_parent_3
+   -&gt;  Append  (cost=0.00..3.06 rows=3 width=14)
+         -&gt;  Seq Scan on gtest_child gtest_parent_1  (cost=0.00..1.01 rows=1 width=14)
+               Filter: (f2 = 101)
+         -&gt;  Seq Scan on gtest_child2 gtest_parent_2  (cost=0.00..1.01 rows=1 width=14)
+               Filter: (f2 = 101)
+         -&gt;  Seq Scan on gtest_child3 gtest_parent_3  (cost=0.00..1.01 rows=1 width=14)
+               Filter: (f2 = 101)
+</screen>
+
+    In this example the Update node needs to consider three child tables,
+    but not the originally-mentioned partitioned table (since that never
+    stores any data).  So there are three input
+    scanning subplans, one per table.  For clarity, the Update node is
+    annotated to show the specific target tables that will be updated, in the
+    same order as the corresponding subplans.
+   </para>
+
+   <para>
+    The <literal>Planning time</literal> shown by <command>EXPLAIN
+    ANALYZE</command> is the time it took to generate the query plan from the
+    parsed query and optimize it. It does not include parsing or rewriting.
+   </para>
+
+   <para>
+    The <literal>Execution time</literal> shown by <command>EXPLAIN
+    ANALYZE</command> includes executor start-up and shut-down time, as well
+    as the time to run any triggers that are fired, but it does not include
+    parsing, rewriting, or planning time.
+    Time spent executing <literal>BEFORE</literal> triggers, if any, is included in
+    the time for the related Insert, Update, or Delete node; but time
+    spent executing <literal>AFTER</literal> triggers is not counted there because
+    <literal>AFTER</literal> triggers are fired after completion of the whole plan.
+    The total time spent in each trigger
+    (either <literal>BEFORE</literal> or <literal>AFTER</literal>) is also shown separately.
+    Note that deferred constraint triggers will not be executed
+    until end of transaction and are thus not considered at all by
+    <command>EXPLAIN ANALYZE</command>.
+   </para>
+
+   <para>
+    The time shown for the top-level node does not include any time needed
+    to convert the query's output data into displayable form or to send it
+    to the client.  While <command>EXPLAIN ANALYZE</command> will never
+    send the data to the client, it can be told to convert the query's
+    output data to displayable form and measure the time needed for that,
+    by specifying the <literal>SERIALIZE</literal> option.  That time will
+    be shown separately, and it's also included in the
+    total <literal>Execution time</literal>.
+   </para>
+
+  </sect2>
+
+  <sect2 id="using-explain-caveats">
+   <title>Caveats</title>
+
+   <para>
+    There are two significant ways in which run times measured by
+    <command>EXPLAIN ANALYZE</command> can deviate from normal execution of
+    the same query.  First, since no output rows are delivered to the client,
+    network transmission costs are not included.  I/O conversion costs are
+    not included either unless <literal>SERIALIZE</literal> is specified.
+    Second, the measurement overhead added by <command>EXPLAIN
+    ANALYZE</command> can be significant, especially on machines with slow
+    <function>gettimeofday()</function> operating-system calls. You can use the
+    <xref linkend="pgtesttiming"/> tool to measure the overhead of timing
+    on your system.
+   </para>
+
+   <para>
+    <command>EXPLAIN</command> results should not be extrapolated to situations
+    much different from the one you are actually testing; for example,
+    results on a toy-sized table cannot be assumed to apply to large tables.
+    The planner's cost estimates are not linear and so it might choose
+    a different plan for a larger or smaller table.  An extreme example
+    is that on a table that only occupies one disk page, you'll nearly
+    always get a sequential scan plan whether indexes are available or not.
+    The planner realizes that it's going to take one disk page read to
+    process the table in any case, so there's no value in expending additional
+    page reads to look at an index.  (We saw this happening in the
+    <literal>polygon_tbl</literal> example above.)
+   </para>
+
+   <para>
+    There are cases in which the actual and estimated values won't match up
+    well, but nothing is really wrong.  One such case occurs when
+    plan node execution is stopped short by a <literal>LIMIT</literal> or similar
+    effect.  For example, in the <literal>LIMIT</literal> query we used before,
+
+<screen>
+EXPLAIN ANALYZE SELECT * FROM tenk1 WHERE unique1 &lt; 100 AND unique2 &gt; 9000 LIMIT 2;
+
+                                                          QUERY PLAN
+-------------------------------------------------------------------&zwsp;------------------------------------------------------------
+ Limit  (cost=0.29..14.33 rows=2 width=244) (actual time=0.051..0.071 rows=2.00 loops=1)
+   Buffers: shared hit=16
+   -&gt;  Index Scan using tenk1_unique2 on tenk1  (cost=0.29..70.50 rows=10 width=244) (actual time=0.051..0.070 rows=2.00 loops=1)
+         Index Cond: (unique2 &gt; 9000)
+         Filter: (unique1 &lt; 100)
+         Rows Removed by Filter: 287
+         Buffers: shared hit=16
+ Planning Time: 0.077 ms
+ Execution Time: 0.086 ms
+</screen>
+
+    the estimated cost and row count for the Index Scan node are shown as
+    though it were run to completion.  But in reality the Limit node stopped
+    requesting rows after it got two, so the actual row count is only 2 and
+    the run time is less than the cost estimate would suggest.  This is not
+    an estimation error, only a discrepancy in the way the estimates and true
+    values are displayed.
+   </para>
+
+   <para>
+    Merge joins also have measurement artifacts that can confuse the unwary.
+    A merge join will stop reading one input if it's exhausted the other input
+    and the next key value in the one input is greater than the last key value
+    of the other input; in such a case there can be no more matches and so no
+    need to scan the rest of the first input.  This results in not reading all
+    of one child, with results like those mentioned for <literal>LIMIT</literal>.
+    Also, if the outer (first) child contains rows with duplicate key values,
+    the inner (second) child is backed up and rescanned for the portion of its
+    rows matching that key value.  <command>EXPLAIN ANALYZE</command> counts these
+    repeated emissions of the same inner rows as if they were real additional
+    rows.  When there are many outer duplicates, the reported actual row count
+    for the inner child plan node can be significantly larger than the number
+    of rows that are actually in the inner relation.
+   </para>
+
+   <para>
+    BitmapAnd and BitmapOr nodes always report their actual row counts as zero,
+    due to implementation limitations.
+   </para>
+
+   <para>
+    Normally, <command>EXPLAIN</command> will display every plan node
+    created by the planner.  However, there are cases where the executor
+    can determine that certain nodes need not be executed because they
+    cannot produce any rows, based on parameter values that were not
+    available at planning time.  (Currently this can only happen for child
+    nodes of an Append or MergeAppend node that is scanning a partitioned
+    table.)  When this happens, those plan nodes are omitted from
+    the <command>EXPLAIN</command> output and a <literal>Subplans
+    Removed: <replaceable>N</replaceable></literal> annotation appears
+    instead.
+   </para>
+  </sect2>
+
+ </sect1>
diff --git a/doc/src/sgml/processing-nodes.sgml b/doc/src/sgml/processing-nodes.sgml
new file mode 100644
index 0000000000..94a60ccc5f
--- /dev/null
+++ b/doc/src/sgml/processing-nodes.sgml
@@ -0,0 +1,118 @@
+<!-- doc/src/sgml/processing-nodes.sgml -->
+ <sect1 id="processing-nodes">
+  <title>Processing Nodes</title>
+
+  <para>
+   This section contains explanations and examples of how specific
+   SQL query features manifest as nodes in a query plan.
+  </para>
+
+  <sect2 id="processing-nodes-windowagg">
+   <title>WindowAgg</title>
+
+   <para>
+    This node is used to compute window functions.  It is used in
+    queries that contain window functions.
+   </para>
+   <para>
+    The following is a query using multiple window functions to provide
+    a concrete example to refer to when explaining the execution behaviors
+    of such a query.  The result is shown for reference while
+    the <command>EXPLAIN</command> output is needed for the subsequent discussion.
+    A full understanding of explain output is not necessary here; the pertinent
+    details are described below.
+
+<programlisting><![CDATA[
+WITH vals (i,j) AS ( VALUES ('A',1), ('B',2),('B',3) )
+SELECT *,
+  COUNT(i) OVER (PARTITION BY i) AS count_i_noframe,
+  COUNT(j) OVER (PARTITION BY j) AS count_j,
+  SUM(j) OVER (PARTITION BY j) AS sum_j,
+  COUNT(i) OVER (
+    PARTITION BY i
+    RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+  ) AS count_i_frame
+FROM vals;
+
+ i | j | count_i_noframe | count_j | sum_j | count_i_frame
+---+---+-----------------+---------+-------+---------------
+ A | 1 |               1 |       1 |     1 |             1
+ B | 2 |               2 |       1 |     2 |             2
+ B | 3 |               2 |       1 |     3 |             2
+
+                       QUERY PLAN
+---------------------------------------------------------
+ WindowAgg
+   ->  WindowAgg
+         ->  Sort
+               Sort Key: "*VALUES*".column1
+               ->  WindowAgg
+                     ->  Sort
+                           Sort Key: "*VALUES*".column2
+                           ->  Values Scan on "*VALUES*"
+]]></programlisting>
+
+   </para>
+
+   <para>
+    In the explain output example above the bottom-most node is the unsorted data-producing
+    values scan which adds columns <literal>i</literal> and <literal>j</literal> to the output.
+    (This would  be replaced with group by results or a normal join and filter for most real queries.)
+    Next, in order to add columns <literal>count_j</literal> and <literal>sum_j</literal> to the output,
+    the data is sorted on <literal>j</literal> to match their shared partition specification.
+    Next, the data is re-sorted using <literal>i</literal> so that <literal>count_i_noframe</literal>
+    can be added; and lastly the same ordered data is traversed again to add <literal>count_i_frame</literal> to the result.
+    (Written ordering assumed but they could be swapped.)
+   </para>
+
+   <para>
+    As seen in the explain output, the presence of window functions will result
+    in a stack of <literal>WindowAgg</literal> nodes with either an inital data producer plus explicit
+    sort node (see example) or a leaf node that produces ordered results
+    (e.g., an Index Only Scan).
+    Additional sort nodes (see example) may be added if there are multiple window
+    functions each specifying different orderings.
+    Each window function is associated with one <literal>WindowAgg</literal> node during planning,
+    though the same <literal>WindowAgg</literal> node may be used for multiple window functions.
+    (This is why there is only a single <literal>WindowAgg</literal> for column2/partition by j:
+    the <literal>sum_j</literal> and <literal>count_j</literal> columns are sharing.)
+    In particular, de-duplication of <literal>WindowAgg</literal> nodes happens during parsing and
+    planning (the later taking into account any frame clause optimizations actual
+    window functions may allow).  Note that while there is an implicit default window
+    frame for any over clause that does not specify one, for purposes of uniqueness the
+    absence of a frame clause and one specifying an equivalent frame explicitly are
+    considered different.
+    (This is why there are still two <literal>WindowAgg</literal> nodes for the two window
+    functions specifying <literal>partition by i</literal>.)
+   </para>
+
+   <para>
+    When multiple window functions are used the SQL standard requires that all
+    order-equivalent invocations see the same ordering of peer rows.
+    In practice this means that <literal>WindowAgg</literal> nodes corresponding to
+    specifications containing the same
+    <literal>PARTITION BY</literal> and <literal>ORDER BY</literal>
+    clauses in their window definitions will appear together in the stack,
+    without any intervening sort node. Thereby guaranteeing they will see the same sort ordering,
+    even if the <literal>ORDER BY</literal> does not uniquely determine an ordering.
+    However, no guarantees are made about the evaluation of functions having
+    different <literal>PARTITION BY</literal> or <literal>ORDER BY</literal> specifications.
+    (In such cases a sort step is typically required between the passes of
+    window function evaluations, and the sort is not guaranteed to preserve
+    ordering of rows that its <literal>ORDER BY</literal> sees as equivalent.)
+    However, the <productname>PostgreSQL</productname> planner does attempt to determine
+    whether, given two <literal>ORDER BY</literal> specifications, one is a prefix of the other,
+    in which case both window functions will see the ordering required for the more specific one.
+   </para>
+
+   <para>
+    Currently, a query without an order by clause, but containing one or more order-specifying
+    window functions (i.e., e.g., not <literal>count(*) over ()</literal>), will still end
+    up producing sorted output since the <literal>WindowAgg</literal> node will be the the
+    row-producing node and it usually outputs sorted data.  It is not recommended to rely on this,
+    however.  Use an explicit top-level <literal>ORDER BY</literal> clause if you want to be sure
+    the results are sorted in a particular way.
+   </para>
+  </sect2>
+
+</sect1>
\ No newline at end of file
diff --git a/doc/src/sgml/processing-statistics.sgml b/doc/src/sgml/processing-statistics.sgml
new file mode 100644
index 0000000000..368551d990
--- /dev/null
+++ b/doc/src/sgml/processing-statistics.sgml
@@ -0,0 +1,464 @@
+ <sect1 id="planner-stats">
+  <title>Statistics Used by the Planner</title>
+
+  <indexterm zone="planner-stats">
+   <primary>statistics</primary>
+   <secondary>of the planner</secondary>
+  </indexterm>
+
+  <sect2 id="planner-stats-single-column">
+   <title>Single-Column Statistics</title>
+  <para>
+   As we saw in the previous section, the query planner needs to estimate
+   the number of rows retrieved by a query in order to make good choices
+   of query plans.  This section provides a quick look at the statistics
+   that the system uses for these estimates.
+  </para>
+
+  <para>
+   One component of the statistics is the total number of entries in
+   each table and index, as well as the number of disk blocks occupied
+   by each table and index.  This information is kept in the table
+   <link linkend="catalog-pg-class"><structname>pg_class</structname></link>,
+   in the columns <structfield>reltuples</structfield> and
+   <structfield>relpages</structfield>.  We can look at it with
+   queries similar to this one:
+
+<screen>
+SELECT relname, relkind, reltuples, relpages
+FROM pg_class
+WHERE relname LIKE 'tenk1%';
+
+       relname        | relkind | reltuples | relpages
+----------------------+---------+-----------+----------
+ tenk1                | r       |     10000 |      345
+ tenk1_hundred        | i       |     10000 |       11
+ tenk1_thous_tenthous | i       |     10000 |       30
+ tenk1_unique1        | i       |     10000 |       30
+ tenk1_unique2        | i       |     10000 |       30
+(5 rows)
+</screen>
+
+   Here we can see that <structname>tenk1</structname> contains 10000
+   rows, as do its indexes, but the indexes are (unsurprisingly) much
+   smaller than the table.
+  </para>
+
+  <para>
+   For efficiency reasons, <structfield>reltuples</structfield>
+   and <structfield>relpages</structfield> are not updated on-the-fly,
+   and so they usually contain somewhat out-of-date values.
+   They are updated by <command>VACUUM</command>, <command>ANALYZE</command>, and a
+   few DDL commands such as <command>CREATE INDEX</command>.  A <command>VACUUM</command>
+   or <command>ANALYZE</command> operation that does not scan the entire table
+   (which is commonly the case) will incrementally update the
+   <structfield>reltuples</structfield> count on the basis of the part
+   of the table it did scan, resulting in an approximate value.
+   In any case, the planner
+   will scale the values it finds in <structname>pg_class</structname>
+   to match the current physical table size, thus obtaining a closer
+   approximation.
+  </para>
+
+  <indexterm>
+   <primary>pg_statistic</primary>
+  </indexterm>
+
+  <para>
+   Most queries retrieve only a fraction of the rows in a table, due
+   to <literal>WHERE</literal> clauses that restrict the rows to be
+   examined.  The planner thus needs to make an estimate of the
+   <firstterm>selectivity</firstterm> of <literal>WHERE</literal> clauses, that is,
+   the fraction of rows that match each condition in the
+   <literal>WHERE</literal> clause.  The information used for this task is
+   stored in the
+   <link linkend="catalog-pg-statistic"><structname>pg_statistic</structname></link>
+   system catalog.  Entries in <structname>pg_statistic</structname>
+   are updated by the <command>ANALYZE</command> and <command>VACUUM
+   ANALYZE</command> commands, and are always approximate even when freshly
+   updated.
+  </para>
+
+  <indexterm>
+   <primary>pg_stats</primary>
+  </indexterm>
+
+  <para>
+   Rather than look at <structname>pg_statistic</structname> directly,
+   it's better to look at its view
+   <link linkend="view-pg-stats"><structname>pg_stats</structname></link>
+   when examining the statistics manually.  <structname>pg_stats</structname>
+   is designed to be more easily readable.  Furthermore,
+   <structname>pg_stats</structname> is readable by all, whereas
+   <structname>pg_statistic</structname> is only readable by a superuser.
+   (This prevents unprivileged users from learning something about
+   the contents of other people's tables from the statistics.  The
+   <structname>pg_stats</structname> view is restricted to show only
+   rows about tables that the current user can read.)
+   For example, we might do:
+
+<screen>
+SELECT attname, inherited, n_distinct,
+       array_to_string(most_common_vals, E'\n') as most_common_vals
+FROM pg_stats
+WHERE tablename = 'road';
+
+ attname | inherited | n_distinct |          most_common_vals
+---------+-----------+------------+------------------------------------
+ name    | f         | -0.5681108 | I- 580                        Ramp+
+         |           |            | I- 880                        Ramp+
+         |           |            | Sp Railroad                       +
+         |           |            | I- 580                            +
+         |           |            | I- 680                        Ramp+
+         |           |            | I- 80                         Ramp+
+         |           |            | 14th                          St  +
+         |           |            | I- 880                            +
+         |           |            | Mac Arthur                    Blvd+
+         |           |            | Mission                       Blvd+
+...
+ name    | t         |    -0.5125 | I- 580                        Ramp+
+         |           |            | I- 880                        Ramp+
+         |           |            | I- 580                            +
+         |           |            | I- 680                        Ramp+
+         |           |            | I- 80                         Ramp+
+         |           |            | Sp Railroad                       +
+         |           |            | I- 880                            +
+         |           |            | State Hwy 13                  Ramp+
+         |           |            | I- 80                             +
+         |           |            | State Hwy 24                  Ramp+
+...
+ thepath | f         |          0 |
+ thepath | t         |          0 |
+(4 rows)
+</screen>
+
+   Note that two rows are displayed for the same column, one corresponding
+   to the complete inheritance hierarchy starting at the
+   <literal>road</literal> table (<literal>inherited</literal>=<literal>t</literal>),
+   and another one including only the <literal>road</literal> table itself
+   (<literal>inherited</literal>=<literal>f</literal>).
+   (For brevity, we have only shown the first ten most-common values for
+   the <literal>name</literal> column.)
+  </para>
+
+  <para>
+   The amount of information stored in <structname>pg_statistic</structname>
+   by <command>ANALYZE</command>, in particular the maximum number of entries in the
+   <structfield>most_common_vals</structfield> and <structfield>histogram_bounds</structfield>
+   arrays for each column, can be set on a
+   column-by-column basis using the <command>ALTER TABLE SET STATISTICS</command>
+   command, or globally by setting the
+   <xref linkend="guc-default-statistics-target"/> configuration variable.
+   The default limit is presently 100 entries.  Raising the limit
+   might allow more accurate planner estimates to be made, particularly for
+   columns with irregular data distributions, at the price of consuming
+   more space in <structname>pg_statistic</structname> and slightly more
+   time to compute the estimates.  Conversely, a lower limit might be
+   sufficient for columns with simple data distributions.
+  </para>
+
+  <para>
+   Further details about the planner's use of statistics can be found in
+   <xref linkend="planner-stats-details"/>.
+  </para>
+  </sect2>
+
+  <sect2 id="planner-stats-extended">
+   <title>Extended Statistics</title>
+
+   <indexterm zone="planner-stats-extended">
+    <primary>statistics</primary>
+    <secondary>of the planner</secondary>
+   </indexterm>
+
+   <indexterm>
+    <primary>correlation</primary>
+    <secondary>in the query planner</secondary>
+   </indexterm>
+
+   <indexterm>
+    <primary>pg_statistic_ext</primary>
+   </indexterm>
+
+   <indexterm>
+    <primary>pg_statistic_ext_data</primary>
+   </indexterm>
+
+   <para>
+    It is common to see slow queries running bad execution plans because
+    multiple columns used in the query clauses are correlated.
+    The planner normally assumes that multiple conditions
+    are independent of each other,
+    an assumption that does not hold when column values are correlated.
+    Regular statistics, because of their per-individual-column nature,
+    cannot capture any knowledge about cross-column correlation.
+    However, <productname>PostgreSQL</productname> has the ability to compute
+    <firstterm>multivariate statistics</firstterm>, which can capture
+    such information.
+   </para>
+
+   <para>
+    Because the number of possible column combinations is very large,
+    it's impractical to compute multivariate statistics automatically.
+    Instead, <firstterm>extended statistics objects</firstterm>, more often
+    called just <firstterm>statistics objects</firstterm>, can be created to instruct
+    the server to obtain statistics across interesting sets of columns.
+   </para>
+
+   <para>
+    Statistics objects are created using the
+    <link linkend="sql-createstatistics"><command>CREATE STATISTICS</command></link> command.
+    Creation of such an object merely creates a catalog entry expressing
+    interest in the statistics.  Actual data collection is performed
+    by <command>ANALYZE</command> (either a manual command, or background
+    auto-analyze).  The collected values can be examined in the
+    <link linkend="catalog-pg-statistic-ext-data"><structname>pg_statistic_ext_data</structname></link>
+    catalog.
+   </para>
+
+   <para>
+    <command>ANALYZE</command> computes extended statistics based on the same
+    sample of table rows that it takes for computing regular single-column
+    statistics.  Since the sample size is increased by increasing the
+    statistics target for the table or any of its columns (as described in
+    the previous section), a larger statistics target will normally result in
+    more accurate extended statistics, as well as more time spent calculating
+    them.
+   </para>
+
+   <para>
+    The following subsections describe the kinds of extended statistics
+    that are currently supported.
+   </para>
+
+   <sect3 id="planner-stats-extended-functional-deps">
+    <title>Functional Dependencies</title>
+
+    <para>
+     The simplest kind of extended statistics tracks <firstterm>functional
+     dependencies</firstterm>, a concept used in definitions of database normal forms.
+     We say that column <structfield>b</structfield> is functionally dependent on
+     column <structfield>a</structfield> if knowledge of the value of
+     <structfield>a</structfield> is sufficient to determine the value
+     of <structfield>b</structfield>, that is there are no two rows having the same value
+     of <structfield>a</structfield> but different values of <structfield>b</structfield>.
+     In a fully normalized database, functional dependencies should exist
+     only on primary keys and superkeys. However, in practice many data sets
+     are not fully normalized for various reasons; intentional
+     denormalization for performance reasons is a common example.
+     Even in a fully normalized database, there may be partial correlation
+     between some columns, which can be expressed as partial functional
+     dependency.
+    </para>
+
+    <para>
+     The existence of functional dependencies directly affects the accuracy
+     of estimates in certain queries.  If a query contains conditions on
+     both the independent and the dependent column(s), the
+     conditions on the dependent columns do not further reduce the result
+     size; but without knowledge of the functional dependency, the query
+     planner will assume that the conditions are independent, resulting
+     in underestimating the result size.
+    </para>
+
+    <para>
+     To inform the planner about functional dependencies, <command>ANALYZE</command>
+     can collect measurements of cross-column dependency. Assessing the
+     degree of dependency between all sets of columns would be prohibitively
+     expensive, so data collection is limited to those groups of columns
+     appearing together in a statistics object defined with
+     the <literal>dependencies</literal> option.  It is advisable to create
+     <literal>dependencies</literal> statistics only for column groups that are
+     strongly correlated, to avoid unnecessary overhead in both
+     <command>ANALYZE</command> and later query planning.
+    </para>
+
+    <para>
+     Here is an example of collecting functional-dependency statistics:
+<programlisting>
+CREATE STATISTICS stts (dependencies) ON city, zip FROM zipcodes;
+
+ANALYZE zipcodes;
+
+SELECT stxname, stxkeys, stxddependencies
+  FROM pg_statistic_ext join pg_statistic_ext_data on (oid = stxoid)
+  WHERE stxname = 'stts';
+ stxname | stxkeys |             stxddependencies
+---------+---------+------------------------------------------
+ stts    | 1 5     | {"1 => 5": 1.000000, "5 => 1": 0.423130}
+(1 row)
+</programlisting>
+     Here it can be seen that column 1 (zip code) fully determines column
+     5 (city) so the coefficient is 1.0, while city only determines zip code
+     about 42% of the time, meaning that there are many cities (58%) that are
+     represented by more than a single ZIP code.
+    </para>
+
+    <para>
+     When computing the selectivity for a query involving functionally
+     dependent columns, the planner adjusts the per-condition selectivity
+     estimates using the dependency coefficients so as not to produce
+     an underestimate.
+    </para>
+
+    <sect4 id="planner-stats-extended-functional-deps-limits">
+     <title>Limitations of Functional Dependencies</title>
+
+     <para>
+      Functional dependencies are currently only applied when considering
+      simple equality conditions that compare columns to constant values,
+      and <literal>IN</literal> clauses with constant values.
+      They are not used to improve estimates for equality conditions
+      comparing two columns or comparing a column to an expression, nor for
+      range clauses, <literal>LIKE</literal> or any other type of condition.
+     </para>
+
+     <para>
+      When estimating with functional dependencies, the planner assumes that
+      conditions on the involved columns are compatible and hence redundant.
+      If they are incompatible, the correct estimate would be zero rows, but
+      that possibility is not considered.  For example, given a query like
+<programlisting>
+SELECT * FROM zipcodes WHERE city = 'San Francisco' AND zip = '94105';
+</programlisting>
+      the planner will disregard the <structfield>city</structfield> clause as not
+      changing the selectivity, which is correct.  However, it will make
+      the same assumption about
+<programlisting>
+SELECT * FROM zipcodes WHERE city = 'San Francisco' AND zip = '90210';
+</programlisting>
+      even though there will really be zero rows satisfying this query.
+      Functional dependency statistics do not provide enough information
+      to conclude that, however.
+     </para>
+
+     <para>
+      In many practical situations, this assumption is usually satisfied;
+      for example, there might be a GUI in the application that only allows
+      selecting compatible city and ZIP code values to use in a query.
+      But if that's not the case, functional dependencies may not be a viable
+      option.
+     </para>
+    </sect4>
+   </sect3>
+
+   <sect3 id="planner-stats-extended-n-distinct-counts">
+    <title>Multivariate N-Distinct Counts</title>
+
+    <para>
+     Single-column statistics store the number of distinct values in each
+     column.  Estimates of the number of distinct values when combining more
+     than one column (for example, for <literal>GROUP BY a, b</literal>) are
+     frequently wrong when the planner only has single-column statistical
+     data, causing it to select bad plans.
+    </para>
+
+    <para>
+     To improve such estimates, <command>ANALYZE</command> can collect n-distinct
+     statistics for groups of columns.  As before, it's impractical to do
+     this for every possible column grouping, so data is collected only for
+     those groups of columns appearing together in a statistics object
+     defined with the <literal>ndistinct</literal> option.  Data will be collected
+     for each possible combination of two or more columns from the set of
+     listed columns.
+    </para>
+
+    <para>
+     Continuing the previous example, the n-distinct counts in a
+     table of ZIP codes might look like the following:
+<programlisting>
+CREATE STATISTICS stts2 (ndistinct) ON city, state, zip FROM zipcodes;
+
+ANALYZE zipcodes;
+
+SELECT stxkeys AS k, stxdndistinct AS nd
+  FROM pg_statistic_ext join pg_statistic_ext_data on (oid = stxoid)
+  WHERE stxname = 'stts2';
+-[ RECORD 1 ]------------------------------------------------------&zwsp;--
+k  | 1 2 5
+nd | {"1, 2": 33178, "1, 5": 33178, "2, 5": 27435, "1, 2, 5": 33178}
+(1 row)
+</programlisting>
+     This indicates that there are three combinations of columns that
+     have 33178 distinct values: ZIP code and state; ZIP code and city;
+     and ZIP code, city and state (the fact that they are all equal is
+     expected given that ZIP code alone is unique in this table).  On the
+     other hand, the combination of city and state has only 27435 distinct
+     values.
+    </para>
+
+    <para>
+     It's advisable to create <literal>ndistinct</literal> statistics objects only
+     on combinations of columns that are actually used for grouping, and
+     for which misestimation of the number of groups is resulting in bad
+     plans.  Otherwise, the <command>ANALYZE</command> cycles are just wasted.
+    </para>
+   </sect3>
+
+   <sect3 id="planner-stats-extended-mcv-lists">
+    <title>Multivariate MCV Lists</title>
+
+    <para>
+     Another type of statistic stored for each column are most-common value
+     lists.  This allows very accurate estimates for individual columns, but
+     may result in significant misestimates for queries with conditions on
+     multiple columns.
+    </para>
+
+    <para>
+     To improve such estimates, <command>ANALYZE</command> can collect MCV
+     lists on combinations of columns.  Similarly to functional dependencies
+     and n-distinct coefficients, it's impractical to do this for every
+     possible column grouping.  Even more so in this case, as the MCV list
+     (unlike functional dependencies and n-distinct coefficients) does store
+     the common column values.  So data is collected only for those groups
+     of columns appearing together in a statistics object defined with the
+     <literal>mcv</literal> option.
+    </para>
+
+    <para>
+     Continuing the previous example, the MCV list for a table of ZIP codes
+     might look like the following (unlike for simpler types of statistics,
+     a function is required for inspection of MCV contents):
+
+<programlisting>
+CREATE STATISTICS stts3 (mcv) ON city, state FROM zipcodes;
+
+ANALYZE zipcodes;
+
+SELECT m.* FROM pg_statistic_ext join pg_statistic_ext_data on (oid = stxoid),
+                pg_mcv_list_items(stxdmcv) m WHERE stxname = 'stts3';
+
+ index |         values         | nulls | frequency | base_frequency
+-------+------------------------+-------+-----------+----------------
+     0 | {Washington, DC}       | {f,f} |  0.003467 |        2.7e-05
+     1 | {Apo, AE}              | {f,f} |  0.003067 |        1.9e-05
+     2 | {Houston, TX}          | {f,f} |  0.002167 |       0.000133
+     3 | {El Paso, TX}          | {f,f} |     0.002 |       0.000113
+     4 | {New York, NY}         | {f,f} |  0.001967 |       0.000114
+     5 | {Atlanta, GA}          | {f,f} |  0.001633 |        3.3e-05
+     6 | {Sacramento, CA}       | {f,f} |  0.001433 |        7.8e-05
+     7 | {Miami, FL}            | {f,f} |    0.0014 |          6e-05
+     8 | {Dallas, TX}           | {f,f} |  0.001367 |        8.8e-05
+     9 | {Chicago, IL}          | {f,f} |  0.001333 |        5.1e-05
+   ...
+(99 rows)
+</programlisting>
+     This indicates that the most common combination of city and state is
+     Washington in DC, with actual frequency (in the sample) about 0.35%.
+     The base frequency of the combination (as computed from the simple
+     per-column frequencies) is only 0.0027%, resulting in two orders of
+     magnitude under-estimates.
+    </para>
+
+    <para>
+     It's advisable to create <acronym>MCV</acronym> statistics objects only
+     on combinations of columns that are actually used in conditions together,
+     and for which misestimation of the number of groups is resulting in bad
+     plans.  Otherwise, the <command>ANALYZE</command> and planning cycles
+     are just wasted.
+    </para>
+   </sect3>
+
+  </sect2>
+ </sect1>
diff --git a/doc/src/sgml/processing.sgml b/doc/src/sgml/processing.sgml
new file mode 100644
index 0000000000..0d620c7289
--- /dev/null
+++ b/doc/src/sgml/processing.sgml
@@ -0,0 +1,34 @@
+<!-- doc/src/sgml/processing.sgml -->
+ <chapter id="command-processing">
+  <title>Command Processing</title>
+
+  <para>
+   Pulling together everything else discussed in this chapter
+   on The SQL Language is the <productname>PostgreSQL</productname>
+   engine.  This component receives SQL commands from the client,
+   parses them, ensuring that all of the objects referenced exist,
+   and then executes the command.  For queries, there is another stage,
+   called the planner, that takes the declarative input and figures
+   out how to physically manipulate the data to produce the required
+   result as efficiently as possible.  The plan created by the planner
+   can be seen by appending <command>EXPLAIN</command> to the front
+   of the query.  <command>EXPLAIN</command>, and other aspects of
+   command processing, are explained in the sections of this chapter.
+   Statistics, of which there are built-in and user-defined, are used
+   by the planner to estimate the costs of different actions.
+   Parallel query covers how the engine can assign workers to clients
+   to perform some aspects of query execution in parallel.  Concurrency
+   Control explains how multiple clients working on the same objects
+   have their efforts coordinated.  And, finally, there is a section
+   with tips on how to write queries so they perform as efficiently as
+   possible.
+  </para>
+
+ &processing-explain;
+ &processing-statistics;
+ &parallel;
+ &mvcc;
+ &processing-nodes;
+ &perform;
+
+</chapter>
-- 
2.34.1