Re: Correlation in cost_index() - Mailing list pgsql-hackers
From | Manfred Koizar |
---|---|
Subject | Re: Correlation in cost_index() |
Date | |
Msg-id | p57opu40mi9tn02bkgi95llmus7dmun3n9@4ax.com Whole thread Raw |
In response to | Re: Correlation in cost_index() ("scott.marlowe" <scott.marlowe@ihs.com>) |
Responses |
Re: Correlation in cost_index()
|
List | pgsql-hackers |
On Wed, 2 Oct 2002 14:07:19 -0600 (MDT), "scott.marlowe" <scott.marlowe@ihs.com> wrote: >I'd certainly be willing to do some testing on my own data with them. >Gotta patch? Yes, see below. Disclaimer: Apart from "make; make check" this is completely untested. Use at your own risk. Have fun! Servus Manfred diff -ruN ../base/src/backend/optimizer/path/costsize.c src/backend/optimizer/path/costsize.c --- ../base/src/backend/optimizer/path/costsize.c 2002-07-04 18:04:57.000000000 +0200 +++ src/backend/optimizer/path/costsize.c 2002-10-03 09:53:06.000000000 +0200 @@ -72,6 +72,7 @@ double cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST; double cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST; double cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST; +int index_cost_algorithm = DEFAULT_INDEX_COST_ALGORITHM; Cost disable_cost = 100000000.0; @@ -213,8 +214,8 @@ Cost indexStartupCost; Cost indexTotalCost; Selectivity indexSelectivity; - double indexCorrelation, - csquared; + double indexCorrelation; + Cost IO_cost; Cost min_IO_cost, max_IO_cost; Cost cpu_per_tuple; @@ -329,13 +330,62 @@ min_IO_cost = ceil(indexSelectivity * T); max_IO_cost = pages_fetched * random_page_cost; - /* - * Now interpolate based on estimated index order correlation to get - * total disk I/O cost for main table accesses. - */ - csquared = indexCorrelation * indexCorrelation; + switch (index_cost_algorithm) { + case 1: { + /* + ** Use abs(correlation) for linear interpolation + */ + double absC = fabs(indexCorrelation); + + IO_cost = absC * min_IO_cost + (1 - absC) * max_IO_cost; + } + + case 2: { + /* + ** First estimate number of pages and cost per page, + ** then multiply the results. min_IO_cost is used for + ** min_pages, because seq_page_cost = 1. + */ + double absC = fabs(indexCorrelation); + + double estPages = absC * min_IO_cost + (1 - absC) * pages_fetched; + double estPCost = absC * 1 + (1 - absC) * random_page_cost; + IO_cost = estPages * estPCost; + } + + case 3: { + /* + ** Interpolate based on independence squared, thus forcing the + ** result to be closer to min_IO_cost + */ + double independence = 1 - fabs(indexCorrelation); + double isquared = independence * independence; + + IO_cost = (1 - isquared) * min_IO_cost + isquared * max_IO_cost; + } + + case 4: { + /* + ** Interpolate geometrically + */ + double absC = fabs(indexCorrelation); + + IO_cost = exp(absC * log(min_IO_cost) + + (1 - absC) * log(max_IO_cost)); + } + + default: { + /* + * Interpolate based on estimated index order correlation + * to get total disk I/O cost for main table accesses. + */ + double csquared = indexCorrelation * indexCorrelation; + + IO_cost = max_IO_cost + csquared * (min_IO_cost - max_IO_cost); + } + } - run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost); + run_cost += IO_cost; /* * Estimate CPU costs per tuple. diff -ruN ../base/src/backend/utils/misc/guc.c src/backend/utils/misc/guc.c --- ../base/src/backend/utils/misc/guc.c 2002-07-20 17:27:19.000000000 +0200 +++ src/backend/utils/misc/guc.c 2002-10-03 10:03:37.000000000 +0200 @@ -644,6 +644,11 @@ }, { + { "index_cost_algorithm", PGC_USERSET }, &index_cost_algorithm, + DEFAULT_INDEX_COST_ALGORITHM, 0, INT_MAX, NULL, NULL + }, + + { { NULL, 0 }, NULL, 0, 0, 0, NULL, NULL } }; diff -ruN ../base/src/include/optimizer/cost.h src/include/optimizer/cost.h --- ../base/src/include/optimizer/cost.h 2002-06-21 02:12:29.000000000 +0200 +++ src/include/optimizer/cost.h 2002-10-03 09:56:28.000000000 +0200 @@ -24,6 +24,7 @@ #define DEFAULT_CPU_TUPLE_COST 0.01 #define DEFAULT_CPU_INDEX_TUPLE_COST 0.001 #define DEFAULT_CPU_OPERATOR_COST 0.0025 +#define DEFAULT_INDEX_COST_ALGORITHM 3 /* defaults for function attributes used for expensive function calculations */ #define BYTE_PCT 100 @@ -43,6 +44,7 @@ extern double cpu_tuple_cost; extern double cpu_index_tuple_cost; extern double cpu_operator_cost; +extern int index_cost_algorithm; extern Cost disable_cost; extern bool enable_seqscan; extern bool enable_indexscan;
pgsql-hackers by date: