[HACKERS] Account for cost and selectivity of HAVING quals - Mailing list pgsql-hackers

From Tom Lane
Subject [HACKERS] Account for cost and selectivity of HAVING quals
Date
Msg-id 20968.1509486337@sss.pgh.pa.us
Whole thread Raw
Responses Re: [HACKERS] Account for cost and selectivity of HAVING quals  ("Tels" <nospam-pg-abuse@bloodgate.com>)
Re: [HACKERS] Account for cost and selectivity of HAVING quals  (Ashutosh Bapat <ashutosh.bapat@enterprisedb.com>)
List pgsql-hackers
Pursuant to the discussion at
https://www.postgresql.org/message-id/20171029112420.8920B5FB05@mx.zeyos.com
here's a patch to fix the planner so that eval costs and selectivity of
HAVING quals are factored into the appropriate plan node numbers.
Perhaps unsurprisingly, this doesn't change the results of any
existing regression tests.

It's slightly annoying that this approach will result in calculating
the eval costs and selectivity several times, once for each aggregation
plan type we consider.  I thought about inserting RestrictInfo nodes
into the havingQual so that those numbers could be cached, but that turned
out to break various code that doesn't expect to see such nodes there.
I'm not sure it's worth going to the trouble of fixing that; in the big
scheme of things, the redundant calculations likely don't cost much, since
we aren't going to have relevant statistics.

Comments?  If anyone wants to do a real review of this, I'm happy to stick
it into the upcoming CF; but without an expression of interest, I'll just
push it.  I don't think there's anything terribly controversial here.

            regards, tom lane

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index ce32b8a..98fb16e 100644
*** a/src/backend/optimizer/path/costsize.c
--- b/src/backend/optimizer/path/costsize.c
*************** void
*** 1874,1879 ****
--- 1874,1880 ----
  cost_agg(Path *path, PlannerInfo *root,
           AggStrategy aggstrategy, const AggClauseCosts *aggcosts,
           int numGroupCols, double numGroups,
+          List *quals,
           Cost input_startup_cost, Cost input_total_cost,
           double input_tuples)
  {
*************** cost_agg(Path *path, PlannerInfo *root,
*** 1955,1960 ****
--- 1956,1981 ----
          output_tuples = numGroups;
      }

+     /*
+      * If there are quals (HAVING quals), account for their cost and
+      * selectivity.
+      */
+     if (quals)
+     {
+         QualCost    qual_cost;
+
+         cost_qual_eval(&qual_cost, quals, root);
+         startup_cost += qual_cost.startup;
+         total_cost += qual_cost.startup + output_tuples * qual_cost.per_tuple;
+
+         output_tuples = clamp_row_est(output_tuples *
+                                       clauselist_selectivity(root,
+                                                              quals,
+                                                              0,
+                                                              JOIN_INNER,
+                                                              NULL));
+     }
+
      path->rows = output_tuples;
      path->startup_cost = startup_cost;
      path->total_cost = total_cost;
*************** cost_windowagg(Path *path, PlannerInfo *
*** 2040,2051 ****
--- 2061,2075 ----
  void
  cost_group(Path *path, PlannerInfo *root,
             int numGroupCols, double numGroups,
+            List *quals,
             Cost input_startup_cost, Cost input_total_cost,
             double input_tuples)
  {
+     double        output_tuples;
      Cost        startup_cost;
      Cost        total_cost;

+     output_tuples = numGroups;
      startup_cost = input_startup_cost;
      total_cost = input_total_cost;

*************** cost_group(Path *path, PlannerInfo *root
*** 2055,2061 ****
       */
      total_cost += cpu_operator_cost * input_tuples * numGroupCols;

!     path->rows = numGroups;
      path->startup_cost = startup_cost;
      path->total_cost = total_cost;
  }
--- 2079,2105 ----
       */
      total_cost += cpu_operator_cost * input_tuples * numGroupCols;

!     /*
!      * If there are quals (HAVING quals), account for their cost and
!      * selectivity.
!      */
!     if (quals)
!     {
!         QualCost    qual_cost;
!
!         cost_qual_eval(&qual_cost, quals, root);
!         startup_cost += qual_cost.startup;
!         total_cost += qual_cost.startup + output_tuples * qual_cost.per_tuple;
!
!         output_tuples = clamp_row_est(output_tuples *
!                                       clauselist_selectivity(root,
!                                                              quals,
!                                                              0,
!                                                              JOIN_INNER,
!                                                              NULL));
!     }
!
!     path->rows = output_tuples;
      path->startup_cost = startup_cost;
      path->total_cost = total_cost;
  }
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 1c84a2c..f620243 100644
*** a/src/backend/optimizer/prep/prepunion.c
--- b/src/backend/optimizer/prep/prepunion.c
*************** choose_hashed_setop(PlannerInfo *root, L
*** 977,982 ****
--- 977,983 ----
       */
      cost_agg(&hashed_p, root, AGG_HASHED, NULL,
               numGroupCols, dNumGroups,
+              NIL,
               input_path->startup_cost, input_path->total_cost,
               input_path->rows);

*************** choose_hashed_setop(PlannerInfo *root, L
*** 991,996 ****
--- 992,998 ----
                input_path->rows, input_path->pathtarget->width,
                0.0, work_mem, -1.0);
      cost_group(&sorted_p, root, numGroupCols, dNumGroups,
+                NIL,
                 sorted_p.startup_cost, sorted_p.total_cost,
                 input_path->rows);

diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 2d491eb..eafec2a 100644
*** a/src/backend/optimizer/util/pathnode.c
--- b/src/backend/optimizer/util/pathnode.c
*************** create_result_path(PlannerInfo *root, Re
*** 1374,1379 ****
--- 1374,1380 ----
      pathnode->path.startup_cost = target->cost.startup;
      pathnode->path.total_cost = target->cost.startup +
          cpu_tuple_cost + target->cost.per_tuple;
+     /* Add cost of qual, if any --- but we ignore its selectivity */
      if (resconstantqual)
      {
          QualCost    qual_cost;
*************** create_unique_path(PlannerInfo *root, Re
*** 1596,1601 ****
--- 1597,1603 ----
              cost_agg(&agg_path, root,
                       AGG_HASHED, NULL,
                       numCols, pathnode->path.rows,
+                      NIL,
                       subpath->startup_cost,
                       subpath->total_cost,
                       rel->rows);
*************** create_group_path(PlannerInfo *root,
*** 2592,2597 ****
--- 2594,2600 ----
      cost_group(&pathnode->path, root,
                 list_length(groupClause),
                 numGroups,
+                qual,
                 subpath->startup_cost, subpath->total_cost,
                 subpath->rows);

*************** create_agg_path(PlannerInfo *root,
*** 2709,2714 ****
--- 2712,2718 ----
      cost_agg(&pathnode->path, root,
               aggstrategy, aggcosts,
               list_length(groupClause), numGroups,
+              qual,
               subpath->startup_cost, subpath->total_cost,
               subpath->rows);

*************** create_groupingsets_path(PlannerInfo *ro
*** 2817,2822 ****
--- 2821,2827 ----
                       agg_costs,
                       numGroupCols,
                       rollup->numGroups,
+                      having_qual,
                       subpath->startup_cost,
                       subpath->total_cost,
                       subpath->rows);
*************** create_groupingsets_path(PlannerInfo *ro
*** 2840,2845 ****
--- 2845,2851 ----
                           agg_costs,
                           numGroupCols,
                           rollup->numGroups,
+                          having_qual,
                           0.0, 0.0,
                           subpath->rows);
                  if (!rollup->is_hashed)
*************** create_groupingsets_path(PlannerInfo *ro
*** 2863,2868 ****
--- 2869,2875 ----
                           agg_costs,
                           numGroupCols,
                           rollup->numGroups,
+                          having_qual,
                           sort_path.startup_cost,
                           sort_path.total_cost,
                           sort_path.rows);
*************** create_minmaxagg_path(PlannerInfo *root,
*** 2931,2936 ****
--- 2938,2952 ----
      pathnode->path.startup_cost = initplan_cost + target->cost.startup;
      pathnode->path.total_cost = initplan_cost + target->cost.startup +
          target->cost.per_tuple + cpu_tuple_cost;
+     /* Add cost of qual, if any --- but we ignore its selectivity */
+     if (quals)
+     {
+         QualCost    qual_cost;
+
+         cost_qual_eval(&qual_cost, quals, root);
+         pathnode->path.startup_cost += qual_cost.startup;
+         pathnode->path.total_cost += qual_cost.startup + qual_cost.per_tuple;
+     }

      return pathnode;
  }
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 306d923..6c2317d 100644
*** a/src/include/optimizer/cost.h
--- b/src/include/optimizer/cost.h
*************** extern void cost_material(Path *path,
*** 116,121 ****
--- 116,122 ----
  extern void cost_agg(Path *path, PlannerInfo *root,
           AggStrategy aggstrategy, const AggClauseCosts *aggcosts,
           int numGroupCols, double numGroups,
+          List *quals,
           Cost input_startup_cost, Cost input_total_cost,
           double input_tuples);
  extern void cost_windowagg(Path *path, PlannerInfo *root,
*************** extern void cost_windowagg(Path *path, P
*** 124,129 ****
--- 125,131 ----
                 double input_tuples);
  extern void cost_group(Path *path, PlannerInfo *root,
             int numGroupCols, double numGroups,
+            List *quals,
             Cost input_startup_cost, Cost input_total_cost,
             double input_tuples);
  extern void initial_cost_nestloop(PlannerInfo *root,

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

pgsql-hackers by date:

Previous
From: srielau
Date:
Subject: Re: [HACKERS] proposal: schema variables
Next
From: Rob McColl
Date:
Subject: Re: [HACKERS] PostgreSQL 10 parenthesized single-column updates can produce errors