I wrote:
> Dawid Kuroczko <> writes:
>> qnex=# EXPLAIN SELECT * FROM log NATURAL JOIN useragents LIMIT 1;
>> Limit (cost=15912.20..15912.31 rows=1 width=272)
>> > Hash Join (cost=15912.20..5328368.96 rows=47044336 width=272)
>> If I set enable_hashjoin=false:
>> qnex=# EXPLAIN ANALYZE SELECT * FROM log NATURAL LEFT JOIN useragents LIMIT 1;
>> Limit (cost=0.00..3.07 rows=1 width=272) (actual time=74.214..74.216
>> rows=1 loops=1)
>> > Nested Loop Left Join (cost=0.00..144295895.01 rows=47044336
>> width=272) (actual time=74.204..74.204 rows=1 loops=1)
> This is quite strange. The nestloop plan definitely should be preferred
> in the context of the LIMIT, considering that it has far lower estimated
> cost. And it is preferred in simple tests for me.
After a suitable period of contemplating my navel, I figured out
what is going on here: the total costs involved are large enough that
the stillfairlyhigh startup cost of the hash is disregarded by
compare_fuzzy_path_costs(), and so the nestloop is discarded as not
having any significant potential advantage in startup time.
I think that this refutes the original scheme of using the same fuzz
factor for both startup and total cost comparisons, and therefore
propose the attached patch.
Comments?
regards, tom lane
*** src/backend/optimizer/util/pathnode.c.orig Fri Jul 15 13:09:25 2005
 src/backend/optimizer/util/pathnode.c Fri Jul 22 12:08:25 2005
***************
*** 98,157 ****
static int
compare_fuzzy_path_costs(Path *path1, Path *path2, CostSelector criterion)
{
 Cost fuzz;

/*
! * The fuzz factor is set at one percent of the smaller total_cost,
! * but not less than 0.01 cost units (just in case total cost is
! * zero).
*
* XXX does this percentage need to be userconfigurable?
*/
 fuzz = Min(path1>total_cost, path2>total_cost) * 0.01;
 fuzz = Max(fuzz, 0.01);

if (criterion == STARTUP_COST)
{
! if (Abs(path1>startup_cost  path2>startup_cost) > fuzz)
! {
! if (path1>startup_cost < path2>startup_cost)
! return 1;
! else
! return +1;
! }
/*
* If paths have the same startup cost (not at all unlikely),
* order them by total cost.
*/
! if (Abs(path1>total_cost  path2>total_cost) > fuzz)
! {
! if (path1>total_cost < path2>total_cost)
! return 1;
! else
! return +1;
! }
}
else
{
! if (Abs(path1>total_cost  path2>total_cost) > fuzz)
! {
! if (path1>total_cost < path2>total_cost)
! return 1;
! else
! return +1;
! }
/*
* If paths have the same total cost, order them by startup cost.
*/
! if (Abs(path1>startup_cost  path2>startup_cost) > fuzz)
! {
! if (path1>startup_cost < path2>startup_cost)
! return 1;
! else
! return +1;
! }
}
return 0;
}
 98,138 
static int
compare_fuzzy_path_costs(Path *path1, Path *path2, CostSelector criterion)
{
/*
! * We use a fuzz factor of 1% of the smaller cost.
*
* XXX does this percentage need to be userconfigurable?
*/
if (criterion == STARTUP_COST)
{
! if (path1>startup_cost > path2>startup_cost * 1.01)
! return +1;
! if (path2>startup_cost > path1>startup_cost * 1.01)
! return 1;
/*
* If paths have the same startup cost (not at all unlikely),
* order them by total cost.
*/
! if (path1>total_cost > path2>total_cost * 1.01)
! return +1;
! if (path2>total_cost > path1>total_cost * 1.01)
! return 1;
}
else
{
! if (path1>total_cost > path2>total_cost * 1.01)
! return +1;
! if (path2>total_cost > path1>total_cost * 1.01)
! return 1;
/*
* If paths have the same total cost, order them by startup cost.
*/
! if (path1>startup_cost > path2>startup_cost * 1.01)
! return +1;
! if (path2>startup_cost > path1>startup_cost * 1.01)
! return 1;
}
return 0;
}