From ae8ddc0a25cbbdead422166a0f6223c235e6e807 Mon Sep 17 00:00:00 2001
From: Chengpeng Yan <chengpeng_yan@outlook.com>
Date: Sat, 14 Feb 2026 11:44:59 +0800
Subject: [PATCH v5 2/2] add a GUC goo_greedy_strategy to choose GOO greedy
 strategies

Add goo_greedy_strategy and extend GOO candidate comparison to support
result_size, cost, and selectivity strategies. Also add combined mode,
which runs these strategies and keeps the lowest-cost result.

Signed-off-by: Chengpeng Yan <chengpeng_yan@outlook.com>
---
 src/backend/optimizer/path/goo.c          | 184 ++++++++++++++++++++--
 src/backend/utils/misc/guc_parameters.dat |  10 ++
 src/backend/utils/misc/guc_tables.c       |   8 +
 src/include/optimizer/paths.h             |   9 ++
 src/test/regress/expected/goo.out         | 102 ++++++------
 src/test/regress/sql/goo.sql              |   4 +
 6 files changed, 260 insertions(+), 57 deletions(-)

diff --git a/src/backend/optimizer/path/goo.c b/src/backend/optimizer/path/goo.c
index e49a9f372ef..b7d4a931198 100644
--- a/src/backend/optimizer/path/goo.c
+++ b/src/backend/optimizer/path/goo.c
@@ -55,6 +55,7 @@
  * Configuration defaults.  These are exposed as GUCs in guc_tables.c.
  */
 bool		enable_goo_join_search = false;
+int			goo_greedy_strategy = GOO_GREEDY_STRATEGY_COMBINED;
 
 /*
  * Working state for a single GOO search invocation.
@@ -73,6 +74,7 @@ typedef struct GooState
 	MemoryContext cand_cxt;		/* per-iteration candidate storage */
 	MemoryContext scratch_cxt;	/* per-candidate speculative evaluation */
 	List	   *clumps;			/* remaining join components (RelOptInfo *) */
+	GooGreedyStrategy strategy; /* candidate comparison heuristic */
 
 	/*
 	 * "clumps" are similar to GEQO's concept (see geqo_eval.c): join
@@ -94,11 +96,22 @@ typedef struct GooCandidate
 {
 	RelOptInfo *left;			/* left input clump */
 	RelOptInfo *right;			/* right input clump */
+	double		result_size;	/* estimated result size in bytes */
 	Cost		total_cost;		/* total cost of cheapest path */
+	double		selectivity;	/* join selectivity (output/input rows) */
 	Relids		joinrelids;		/* relids covered by this join */
 }			GooCandidate;
 
-static GooState * goo_init_state(PlannerInfo *root, List *initial_rels);
+typedef struct GooStrategyResult
+{
+	RelOptInfo *result;
+	Cost		total_cost;
+	List	   *join_rel_list;
+	struct HTAB *join_rel_hash;
+}			GooStrategyResult;
+
+static GooState * goo_init_state(PlannerInfo *root, List *initial_rels,
+								 GooGreedyStrategy strategy);
 static void goo_destroy_state(GooState * state);
 static RelOptInfo *goo_search_internal(GooState * state);
 static void goo_reset_probe_state(GooState * state, int saved_rel_len,
@@ -106,9 +119,15 @@ static void goo_reset_probe_state(GooState * state, int saved_rel_len,
 static GooCandidate * goo_build_candidate(GooState * state, RelOptInfo *left,
 										  RelOptInfo *right);
 static RelOptInfo *goo_commit_join(GooState * state, GooCandidate * cand);
-static bool goo_candidate_better(GooCandidate * a, GooCandidate * b);
+static bool goo_candidate_better(GooGreedyStrategy strategy,
+								 GooCandidate * a, GooCandidate * b);
 static bool goo_candidate_prunable(GooState * state, RelOptInfo *left,
 								   RelOptInfo *right);
+static const char *goo_strategy_name(GooGreedyStrategy strategy);
+static GooStrategyResult goo_run_strategy(PlannerInfo *root, List *initial_rels,
+										  List *base_join_rel_list,
+										  struct HTAB *base_hash,
+										  GooGreedyStrategy strategy);
 
 /*
  * goo_join_search
@@ -130,8 +149,63 @@ goo_join_search(PlannerInfo *root, int levels_needed,
 	int			base_rel_count;
 	struct HTAB *base_hash;
 
+	/* If COMBINED mode, try all strategies and return the better one */
+	if (goo_greedy_strategy == GOO_GREEDY_STRATEGY_COMBINED)
+	{
+		static const GooGreedyStrategy combined_strategies[] = {
+			GOO_GREEDY_STRATEGY_RESULT_SIZE,
+			GOO_GREEDY_STRATEGY_COST,
+			GOO_GREEDY_STRATEGY_SELECTIVITY
+		};
+		GooStrategyResult best_result = {0};
+		GooGreedyStrategy best_strategy = GOO_GREEDY_STRATEGY_COST;
+		List	   *base_join_rel_list;
+		bool		have_best = false;
+
+		base_join_rel_list = root->join_rel_list;
+		base_hash = root->join_rel_hash;
+
+		for (int i = 0; i < lengthof(combined_strategies); i++)
+		{
+			GooGreedyStrategy strategy = combined_strategies[i];
+			GooStrategyResult result;
+
+			result = goo_run_strategy(root, initial_rels,
+									  base_join_rel_list, base_hash,
+									  strategy);
+
+			if (result.result == NULL)
+				continue;
+
+			if (!have_best || result.total_cost < best_result.total_cost)
+			{
+				best_result = result;
+				best_strategy = strategy;
+				have_best = true;
+			}
+		}
+
+		/*
+		 * During development/testing, fail fast when every strategy fails.
+		 */
+		if (!have_best)
+			elog(ERROR, "GOO join search failed: all strategies exhausted without a valid join order");
+
+		/*
+		 * Pick the lowest-cost result across strategies.
+		 */
+		root->join_rel_list = best_result.join_rel_list;
+		root->join_rel_hash = best_result.join_rel_hash;
+
+		elog(DEBUG1, "GOO COMBINED mode: %s strategy chosen (cost: %.2f)",
+			 goo_strategy_name(best_strategy), best_result.total_cost);
+
+		return best_result.result;
+	}
+
+	/* Normal single-strategy mode */
 	/* Initialize search state and memory contexts */
-	state = goo_init_state(root, initial_rels);
+	state = goo_init_state(root, initial_rels, goo_greedy_strategy);
 
 	/*
 	 * Save initial state of join_rel_list and join_rel_hash so we can restore
@@ -172,7 +246,8 @@ goo_join_search(PlannerInfo *root, int levels_needed,
  * where we may evaluate hundreds or thousands of candidate joins.
  */
 static GooState *
-goo_init_state(PlannerInfo *root, List *initial_rels)
+goo_init_state(PlannerInfo *root, List *initial_rels,
+			   GooGreedyStrategy strategy)
 {
 	MemoryContext oldcxt;
 	GooState   *state;
@@ -183,6 +258,7 @@ goo_init_state(PlannerInfo *root, List *initial_rels)
 	state->root = root;
 	state->clumps = NIL;
 	state->prune_cartesian = false;
+	state->strategy = strategy;
 
 	/* Create the three-level memory context hierarchy */
 	state->goo_cxt = AllocSetContextCreate(root->planner_cxt, "GOOStateContext",
@@ -219,6 +295,42 @@ goo_destroy_state(GooState * state)
 	pfree(state);
 }
 
+static GooStrategyResult
+goo_run_strategy(PlannerInfo *root, List *initial_rels,
+				 List *base_join_rel_list, struct HTAB *base_hash,
+				 GooGreedyStrategy strategy)
+{
+	GooStrategyResult result;
+	GooState   *state;
+	MemoryContext oldcxt;
+
+	result.result = NULL;
+	result.total_cost = 0;
+	result.join_rel_list = NIL;
+	result.join_rel_hash = NULL;
+
+	oldcxt = MemoryContextSwitchTo(root->planner_cxt);
+	root->join_rel_list = list_copy(base_join_rel_list);
+	root->join_rel_hash = NULL;
+	MemoryContextSwitchTo(oldcxt);
+
+	state = goo_init_state(root, initial_rels, strategy);
+	result.result = goo_search_internal(state);
+
+	if (result.result != NULL)
+		result.total_cost = result.result->cheapest_total_path->total_cost;
+
+	result.join_rel_list = root->join_rel_list;
+	result.join_rel_hash = root->join_rel_hash;
+
+	goo_destroy_state(state);
+
+	root->join_rel_list = base_join_rel_list;
+	root->join_rel_hash = base_hash;
+
+	return result;
+}
+
 /*
  * goo_search_internal
  *		Main greedy search loop.
@@ -300,7 +412,8 @@ goo_search_internal(GooState * state)
 
 					/* Track the best candidate seen so far */
 					if (best_candidate == NULL ||
-						goo_candidate_better(cand, best_candidate))
+						goo_candidate_better(state->strategy,
+											 cand, best_candidate))
 						best_candidate = cand;
 				}
 			}
@@ -393,6 +506,8 @@ static GooCandidate * goo_build_candidate(GooState * state, RelOptInfo *left,
 	int			saved_rel_len;
 	struct HTAB *saved_hash;
 	RelOptInfo *joinrel;
+	double		join_rows;
+	double		result_size;
 	Cost		total_cost;
 	GooCandidate *cand;
 	bool		is_top_rel;
@@ -448,6 +563,9 @@ static GooCandidate * goo_build_candidate(GooState * state, RelOptInfo *left,
 		set_cheapest(grouped_rel);
 	}
 
+	join_rows = joinrel->rows;
+
+	result_size = join_rows * joinrel->reltarget->width;
 	total_cost = joinrel->cheapest_total_path->total_cost;
 
 	/*
@@ -466,7 +584,9 @@ static GooCandidate * goo_build_candidate(GooState * state, RelOptInfo *left,
 	cand = palloc(sizeof(GooCandidate));
 	cand->left = left;
 	cand->right = right;
+	cand->result_size = result_size;
 	cand->total_cost = total_cost;
+	cand->selectivity = join_rows / (left->rows * right->rows);
 	cand->joinrelids = bms_union(left->relids, right->relids);
 	MemoryContextSwitchTo(oldcxt);
 
@@ -597,12 +717,56 @@ goo_commit_join(GooState * state, GooCandidate * cand)
  * Returns true if candidate 'a' should be preferred over candidate 'b'.
  */
 static bool
-goo_candidate_better(GooCandidate * a, GooCandidate * b)
+goo_candidate_better(GooGreedyStrategy strategy,
+					 GooCandidate * a, GooCandidate * b)
 {
-	if (a->total_cost < b->total_cost)
-		return true;
-	if (a->total_cost > b->total_cost)
-		return false;
+	switch (strategy)
+	{
+		case GOO_GREEDY_STRATEGY_COMBINED:
+			/* Should not be called in COMBINED mode */
+			elog(ERROR, "goo_candidate_better should not be called in COMBINED mode");
+			return false;
+
+		case GOO_GREEDY_STRATEGY_RESULT_SIZE:
+			if (a->result_size < b->result_size)
+				return true;
+			if (a->result_size > b->result_size)
+				return false;
+			break;
+
+		case GOO_GREEDY_STRATEGY_COST:
+			if (a->total_cost < b->total_cost)
+				return true;
+			if (a->total_cost > b->total_cost)
+				return false;
+			break;
+
+		case GOO_GREEDY_STRATEGY_SELECTIVITY:
+		default:
+			if (a->selectivity < b->selectivity)
+				return true;
+			if (a->selectivity > b->selectivity)
+				return false;
+			break;
+	}
 
 	return bms_compare(a->joinrelids, b->joinrelids) < 0;
 }
+
+static const char *
+goo_strategy_name(GooGreedyStrategy strategy)
+{
+	switch (strategy)
+	{
+		case GOO_GREEDY_STRATEGY_RESULT_SIZE:
+			return "RESULT_SIZE";
+		case GOO_GREEDY_STRATEGY_COST:
+			return "COST";
+		case GOO_GREEDY_STRATEGY_SELECTIVITY:
+			return "SELECTIVITY";
+		case GOO_GREEDY_STRATEGY_COMBINED:
+			return "COMBINED";
+	}
+
+	return "UNKNOWN";
+}
diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat
index 624dee632c3..c17c44ca4d0 100644
--- a/src/backend/utils/misc/guc_parameters.dat
+++ b/src/backend/utils/misc/guc_parameters.dat
@@ -1164,6 +1164,16 @@
   max => 'MAX_KILOBYTES',
 },
 
+/* WIP: only for testing */
+{ name => 'goo_greedy_strategy', type => 'enum', context => 'PGC_USERSET', group => 'QUERY_TUNING_GEQO',
+  short_desc => 'Selects the heuristic used by GOO to compare join candidates.',
+  long_desc => 'Valid values are result_size, cost, selectivity, and combined.',
+  flags => 'GUC_EXPLAIN',
+  variable => 'goo_greedy_strategy',
+  boot_val => 'GOO_GREEDY_STRATEGY_COMBINED',
+  options => 'goo_greedy_strategy_options',
+},
+
 { name => 'gss_accept_delegation', type => 'bool', context => 'PGC_SIGHUP', group => 'CONN_AUTH_AUTH',
   short_desc => 'Sets whether GSSAPI delegation should be accepted from the client.',
   variable => 'pg_gss_accept_delegation',
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 741fce8dede..3b0ba5e4064 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -412,6 +412,14 @@ static const struct config_enum_entry plan_cache_mode_options[] = {
 	{NULL, 0, false}
 };
 
+static const struct config_enum_entry goo_greedy_strategy_options[] = {
+	{"result_size", GOO_GREEDY_STRATEGY_RESULT_SIZE, false},
+	{"cost", GOO_GREEDY_STRATEGY_COST, false},
+	{"selectivity", GOO_GREEDY_STRATEGY_SELECTIVITY, false},
+	{"combined", GOO_GREEDY_STRATEGY_COMBINED, false},
+	{NULL, 0, false}
+};
+
 static const struct config_enum_entry password_encryption_options[] = {
 	{"md5", PASSWORD_TYPE_MD5, false},
 	{"scram-sha-256", PASSWORD_TYPE_SCRAM_SHA_256, false},
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 734fa68884d..1782ec35066 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -16,11 +16,20 @@
 
 #include "nodes/pathnodes.h"
 
+typedef enum GooGreedyStrategy
+{
+	GOO_GREEDY_STRATEGY_RESULT_SIZE,
+	GOO_GREEDY_STRATEGY_COST,
+	GOO_GREEDY_STRATEGY_SELECTIVITY,
+	GOO_GREEDY_STRATEGY_COMBINED
+}			GooGreedyStrategy;
+
 /*
  * allpaths.c
  */
 extern PGDLLIMPORT bool enable_geqo;
 extern PGDLLIMPORT bool enable_goo_join_search;
+extern PGDLLIMPORT int goo_greedy_strategy;
 extern PGDLLIMPORT bool enable_eager_aggregate;
 extern PGDLLIMPORT int geqo_threshold;
 extern PGDLLIMPORT double min_eager_agg_group_size;
diff --git a/src/test/regress/expected/goo.out b/src/test/regress/expected/goo.out
index 3a935e626da..7f7436a971f 100644
--- a/src/test/regress/expected/goo.out
+++ b/src/test/regress/expected/goo.out
@@ -43,6 +43,13 @@ INSERT INTO t16 SELECT i, i FROM generate_series(1,10) i;
 INSERT INTO t17 SELECT i, i FROM generate_series(1,10) i;
 INSERT INTO t18 SELECT i, i FROM generate_series(1,10) i;
 ANALYZE;
+-- Verify combined is the default strategy.
+SHOW goo_greedy_strategy;
+ goo_greedy_strategy 
+---------------------
+ combined
+(1 row)
+
 --
 -- Basic 3-way join (sanity check)
 --
@@ -177,42 +184,42 @@ JOIN t12 ON t10.k = t12.k
 JOIN t13 ON t11.l = t13.l
 JOIN t14 ON t12.m = t14.m
 JOIN t15 ON t13.n = t15.n;
-                           QUERY PLAN                           
-----------------------------------------------------------------
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
  Aggregate
    ->  Hash Join
          Hash Cond: (t7.h = t9.h)
          ->  Hash Join
                Hash Cond: (t8.i = t10.i)
                ->  Hash Join
-                     Hash Cond: (t2.c = t4.c)
+                     Hash Cond: (t6.g = t8.g)
                      ->  Hash Join
-                           Hash Cond: (t3.b = t1.b)
+                           Hash Cond: (t5.f = t7.f)
                            ->  Hash Join
-                                 Hash Cond: (t5.f = t7.f)
+                                 Hash Cond: (t4.e = t6.e)
                                  ->  Hash Join
                                        Hash Cond: (t3.d = t5.d)
-                                       ->  Seq Scan on t3
+                                       ->  Hash Join
+                                             Hash Cond: (t2.c = t4.c)
+                                             ->  Hash Join
+                                                   Hash Cond: (t1.b = t3.b)
+                                                   ->  Hash Join
+                                                         Hash Cond: (t1.a = t2.a)
+                                                         ->  Seq Scan on t1
+                                                         ->  Hash
+                                                               ->  Seq Scan on t2
+                                                   ->  Hash
+                                                         ->  Seq Scan on t3
+                                             ->  Hash
+                                                   ->  Seq Scan on t4
                                        ->  Hash
                                              ->  Seq Scan on t5
                                  ->  Hash
-                                       ->  Seq Scan on t7
+                                       ->  Seq Scan on t6
                            ->  Hash
-                                 ->  Hash Join
-                                       Hash Cond: (t1.a = t2.a)
-                                       ->  Seq Scan on t1
-                                       ->  Hash
-                                             ->  Seq Scan on t2
+                                 ->  Seq Scan on t7
                      ->  Hash
-                           ->  Hash Join
-                                 Hash Cond: (t6.g = t8.g)
-                                 ->  Hash Join
-                                       Hash Cond: (t4.e = t6.e)
-                                       ->  Seq Scan on t4
-                                       ->  Hash
-                                             ->  Seq Scan on t6
-                                 ->  Hash
-                                       ->  Seq Scan on t8
+                           ->  Seq Scan on t8
                ->  Hash
                      ->  Hash Join
                            Hash Cond: (t12.m = t14.m)
@@ -279,18 +286,18 @@ WHERE t1.a = t2.a
 ----------------------------------------------
  Aggregate
    ->  Hash Join
-         Hash Cond: (t1.a = t3.b)
+         Hash Cond: (t1.a = t4.c)
          ->  Hash Join
-               Hash Cond: (t1.a = t2.a)
-               ->  Seq Scan on t1
-               ->  Hash
-                     ->  Seq Scan on t2
-         ->  Hash
+               Hash Cond: (t1.a = t3.b)
                ->  Hash Join
-                     Hash Cond: (t3.b = t4.c)
-                     ->  Seq Scan on t3
+                     Hash Cond: (t1.a = t2.a)
+                     ->  Seq Scan on t1
                      ->  Hash
-                           ->  Seq Scan on t4
+                           ->  Seq Scan on t2
+               ->  Hash
+                     ->  Seq Scan on t3
+         ->  Hash
+               ->  Seq Scan on t4
 (14 rows)
 
 --
@@ -601,30 +608,30 @@ JOIN chain1c ON chain1b.val = chain1c.id
 JOIN chain2a ON chain1a.val = chain2a.id  -- Cross-chain join
 JOIN chain2b ON chain2a.val = chain2b.id
 JOIN chain2c ON chain2b.val = chain2c.id;
-                           QUERY PLAN                            
------------------------------------------------------------------
+                              QUERY PLAN                               
+-----------------------------------------------------------------------
  Aggregate
    ->  Hash Join
-         Hash Cond: (chain1a.val = chain2a.id)
+         Hash Cond: (chain2b.val = chain2c.id)
          ->  Hash Join
-               Hash Cond: (chain1b.val = chain1c.id)
-               ->  Hash Join
-                     Hash Cond: (chain1a.id = chain1b.id)
-                     ->  Seq Scan on chain1a
-                     ->  Hash
-                           ->  Seq Scan on chain1b
-               ->  Hash
-                     ->  Seq Scan on chain1c
-         ->  Hash
+               Hash Cond: (chain2a.val = chain2b.id)
                ->  Hash Join
-                     Hash Cond: (chain2b.val = chain2c.id)
+                     Hash Cond: (chain1a.val = chain2a.id)
                      ->  Hash Join
-                           Hash Cond: (chain2a.val = chain2b.id)
-                           ->  Seq Scan on chain2a
+                           Hash Cond: (chain1b.id = chain1a.id)
+                           ->  Hash Join
+                                 Hash Cond: (chain1b.val = chain1c.id)
+                                 ->  Seq Scan on chain1b
+                                 ->  Hash
+                                       ->  Seq Scan on chain1c
                            ->  Hash
-                                 ->  Seq Scan on chain2b
+                                 ->  Seq Scan on chain1a
                      ->  Hash
-                           ->  Seq Scan on chain2c
+                           ->  Seq Scan on chain2a
+               ->  Hash
+                     ->  Seq Scan on chain2b
+         ->  Hash
+               ->  Seq Scan on chain2c
 (22 rows)
 
 --
@@ -698,3 +705,4 @@ DEALLOCATE goo_plan;
 DEALLOCATE standard_plan;
 RESET geqo_threshold;
 RESET enable_goo_join_search;
+RESET goo_greedy_strategy;
diff --git a/src/test/regress/sql/goo.sql b/src/test/regress/sql/goo.sql
index ab048d8e34e..1b2dff1d929 100644
--- a/src/test/regress/sql/goo.sql
+++ b/src/test/regress/sql/goo.sql
@@ -47,6 +47,9 @@ INSERT INTO t18 SELECT i, i FROM generate_series(1,10) i;
 
 ANALYZE;
 
+-- Verify combined is the default strategy.
+SHOW goo_greedy_strategy;
+
 --
 -- Basic 3-way join (sanity check)
 --
@@ -362,3 +365,4 @@ DEALLOCATE standard_plan;
 
 RESET geqo_threshold;
 RESET enable_goo_join_search;
+RESET goo_greedy_strategy;
\ No newline at end of file
-- 
2.50.1 (Apple Git-155)