Re: Correlation in cost_index() - Mailing list pgsql-hackers

From Manfred Koizar
Subject Re: Correlation in cost_index()
Date
Msg-id p57opu40mi9tn02bkgi95llmus7dmun3n9@4ax.com
Whole thread Raw
In response to Re: Correlation in cost_index()  ("scott.marlowe" <scott.marlowe@ihs.com>)
Responses Re: Correlation in cost_index()
List pgsql-hackers
On Wed, 2 Oct 2002 14:07:19 -0600 (MDT), "scott.marlowe"
<scott.marlowe@ihs.com> wrote:
>I'd certainly be willing to do some testing on my own data with them.
>Gotta patch?

Yes, see below.  Disclaimer:  Apart from "make; make check" this is
completely untested.  Use at your own risk.  Have fun!

Servus
 Manfred
diff -ruN ../base/src/backend/optimizer/path/costsize.c src/backend/optimizer/path/costsize.c
--- ../base/src/backend/optimizer/path/costsize.c    2002-07-04 18:04:57.000000000 +0200
+++ src/backend/optimizer/path/costsize.c    2002-10-03 09:53:06.000000000 +0200
@@ -72,6 +72,7 @@
 double        cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST;
 double        cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;
 double        cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;
+int            index_cost_algorithm = DEFAULT_INDEX_COST_ALGORITHM;

 Cost        disable_cost = 100000000.0;

@@ -213,8 +214,8 @@
     Cost        indexStartupCost;
     Cost        indexTotalCost;
     Selectivity indexSelectivity;
-    double        indexCorrelation,
-                csquared;
+    double        indexCorrelation;
+    Cost        IO_cost;
     Cost        min_IO_cost,
                 max_IO_cost;
     Cost        cpu_per_tuple;
@@ -329,13 +330,62 @@
     min_IO_cost = ceil(indexSelectivity * T);
     max_IO_cost = pages_fetched * random_page_cost;

-    /*
-     * Now interpolate based on estimated index order correlation to get
-     * total disk I/O cost for main table accesses.
-     */
-    csquared = indexCorrelation * indexCorrelation;
+    switch (index_cost_algorithm) {
+    case 1: {
+        /*
+        ** Use abs(correlation) for linear interpolation
+        */
+        double absC = fabs(indexCorrelation);
+
+        IO_cost = absC * min_IO_cost + (1 - absC) * max_IO_cost;
+    }
+
+    case 2: {
+        /*
+        ** First estimate number of pages and cost per page,
+        ** then multiply the results.  min_IO_cost is used for
+        ** min_pages, because seq_page_cost = 1.
+        */
+        double absC = fabs(indexCorrelation);
+
+        double estPages = absC * min_IO_cost + (1 - absC) * pages_fetched;
+        double estPCost = absC * 1 + (1 - absC) * random_page_cost;
+        IO_cost = estPages * estPCost;
+    }
+
+    case 3: {
+        /*
+        ** Interpolate based on independence squared, thus forcing the
+        ** result to be closer to min_IO_cost
+        */
+        double independence = 1 - fabs(indexCorrelation);
+        double isquared = independence * independence;
+
+        IO_cost = (1 - isquared) * min_IO_cost + isquared * max_IO_cost;
+    }
+
+    case 4: {
+        /*
+        ** Interpolate geometrically
+        */
+        double absC = fabs(indexCorrelation);
+
+        IO_cost = exp(absC * log(min_IO_cost) +
+                      (1 - absC) * log(max_IO_cost));
+    }
+
+    default: {
+        /*
+         * Interpolate based on estimated index order correlation
+         * to get total disk I/O cost for main table accesses.
+         */
+        double csquared = indexCorrelation * indexCorrelation;
+
+        IO_cost = max_IO_cost + csquared * (min_IO_cost - max_IO_cost);
+    }
+    }

-    run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost);
+    run_cost += IO_cost;

     /*
      * Estimate CPU costs per tuple.
diff -ruN ../base/src/backend/utils/misc/guc.c src/backend/utils/misc/guc.c
--- ../base/src/backend/utils/misc/guc.c    2002-07-20 17:27:19.000000000 +0200
+++ src/backend/utils/misc/guc.c    2002-10-03 10:03:37.000000000 +0200
@@ -644,6 +644,11 @@
     },

     {
+        { "index_cost_algorithm", PGC_USERSET }, &index_cost_algorithm,
+        DEFAULT_INDEX_COST_ALGORITHM, 0, INT_MAX, NULL, NULL
+    },
+
+    {
         { NULL, 0 }, NULL, 0, 0, 0, NULL, NULL
     }
 };
diff -ruN ../base/src/include/optimizer/cost.h src/include/optimizer/cost.h
--- ../base/src/include/optimizer/cost.h    2002-06-21 02:12:29.000000000 +0200
+++ src/include/optimizer/cost.h    2002-10-03 09:56:28.000000000 +0200
@@ -24,6 +24,7 @@
 #define DEFAULT_CPU_TUPLE_COST    0.01
 #define DEFAULT_CPU_INDEX_TUPLE_COST 0.001
 #define DEFAULT_CPU_OPERATOR_COST  0.0025
+#define DEFAULT_INDEX_COST_ALGORITHM 3

 /* defaults for function attributes used for expensive function calculations */
 #define BYTE_PCT 100
@@ -43,6 +44,7 @@
 extern double cpu_tuple_cost;
 extern double cpu_index_tuple_cost;
 extern double cpu_operator_cost;
+extern int index_cost_algorithm;
 extern Cost disable_cost;
 extern bool enable_seqscan;
 extern bool enable_indexscan;

pgsql-hackers by date:

Previous
From: "Nigel J. Andrews"
Date:
Subject: anoncvs and diff
Next
From: Justin Clift
Date:
Subject: OT: Looking to Open Source the Flash training material