From ae8ddc0a25cbbdead422166a0f6223c235e6e807 Mon Sep 17 00:00:00 2001 From: Chengpeng Yan Date: Sat, 14 Feb 2026 11:44:59 +0800 Subject: [PATCH v5 2/2] add a GUC goo_greedy_strategy to choose GOO greedy strategies Add goo_greedy_strategy and extend GOO candidate comparison to support result_size, cost, and selectivity strategies. Also add combined mode, which runs these strategies and keeps the lowest-cost result. Signed-off-by: Chengpeng Yan --- src/backend/optimizer/path/goo.c | 184 ++++++++++++++++++++-- src/backend/utils/misc/guc_parameters.dat | 10 ++ src/backend/utils/misc/guc_tables.c | 8 + src/include/optimizer/paths.h | 9 ++ src/test/regress/expected/goo.out | 102 ++++++------ src/test/regress/sql/goo.sql | 4 + 6 files changed, 260 insertions(+), 57 deletions(-) diff --git a/src/backend/optimizer/path/goo.c b/src/backend/optimizer/path/goo.c index e49a9f372ef..b7d4a931198 100644 --- a/src/backend/optimizer/path/goo.c +++ b/src/backend/optimizer/path/goo.c @@ -55,6 +55,7 @@ * Configuration defaults. These are exposed as GUCs in guc_tables.c. */ bool enable_goo_join_search = false; +int goo_greedy_strategy = GOO_GREEDY_STRATEGY_COMBINED; /* * Working state for a single GOO search invocation. @@ -73,6 +74,7 @@ typedef struct GooState MemoryContext cand_cxt; /* per-iteration candidate storage */ MemoryContext scratch_cxt; /* per-candidate speculative evaluation */ List *clumps; /* remaining join components (RelOptInfo *) */ + GooGreedyStrategy strategy; /* candidate comparison heuristic */ /* * "clumps" are similar to GEQO's concept (see geqo_eval.c): join @@ -94,11 +96,22 @@ typedef struct GooCandidate { RelOptInfo *left; /* left input clump */ RelOptInfo *right; /* right input clump */ + double result_size; /* estimated result size in bytes */ Cost total_cost; /* total cost of cheapest path */ + double selectivity; /* join selectivity (output/input rows) */ Relids joinrelids; /* relids covered by this join */ } GooCandidate; -static GooState * goo_init_state(PlannerInfo *root, List *initial_rels); +typedef struct GooStrategyResult +{ + RelOptInfo *result; + Cost total_cost; + List *join_rel_list; + struct HTAB *join_rel_hash; +} GooStrategyResult; + +static GooState * goo_init_state(PlannerInfo *root, List *initial_rels, + GooGreedyStrategy strategy); static void goo_destroy_state(GooState * state); static RelOptInfo *goo_search_internal(GooState * state); static void goo_reset_probe_state(GooState * state, int saved_rel_len, @@ -106,9 +119,15 @@ static void goo_reset_probe_state(GooState * state, int saved_rel_len, static GooCandidate * goo_build_candidate(GooState * state, RelOptInfo *left, RelOptInfo *right); static RelOptInfo *goo_commit_join(GooState * state, GooCandidate * cand); -static bool goo_candidate_better(GooCandidate * a, GooCandidate * b); +static bool goo_candidate_better(GooGreedyStrategy strategy, + GooCandidate * a, GooCandidate * b); static bool goo_candidate_prunable(GooState * state, RelOptInfo *left, RelOptInfo *right); +static const char *goo_strategy_name(GooGreedyStrategy strategy); +static GooStrategyResult goo_run_strategy(PlannerInfo *root, List *initial_rels, + List *base_join_rel_list, + struct HTAB *base_hash, + GooGreedyStrategy strategy); /* * goo_join_search @@ -130,8 +149,63 @@ goo_join_search(PlannerInfo *root, int levels_needed, int base_rel_count; struct HTAB *base_hash; + /* If COMBINED mode, try all strategies and return the better one */ + if (goo_greedy_strategy == GOO_GREEDY_STRATEGY_COMBINED) + { + static const GooGreedyStrategy combined_strategies[] = { + GOO_GREEDY_STRATEGY_RESULT_SIZE, + GOO_GREEDY_STRATEGY_COST, + GOO_GREEDY_STRATEGY_SELECTIVITY + }; + GooStrategyResult best_result = {0}; + GooGreedyStrategy best_strategy = GOO_GREEDY_STRATEGY_COST; + List *base_join_rel_list; + bool have_best = false; + + base_join_rel_list = root->join_rel_list; + base_hash = root->join_rel_hash; + + for (int i = 0; i < lengthof(combined_strategies); i++) + { + GooGreedyStrategy strategy = combined_strategies[i]; + GooStrategyResult result; + + result = goo_run_strategy(root, initial_rels, + base_join_rel_list, base_hash, + strategy); + + if (result.result == NULL) + continue; + + if (!have_best || result.total_cost < best_result.total_cost) + { + best_result = result; + best_strategy = strategy; + have_best = true; + } + } + + /* + * During development/testing, fail fast when every strategy fails. + */ + if (!have_best) + elog(ERROR, "GOO join search failed: all strategies exhausted without a valid join order"); + + /* + * Pick the lowest-cost result across strategies. + */ + root->join_rel_list = best_result.join_rel_list; + root->join_rel_hash = best_result.join_rel_hash; + + elog(DEBUG1, "GOO COMBINED mode: %s strategy chosen (cost: %.2f)", + goo_strategy_name(best_strategy), best_result.total_cost); + + return best_result.result; + } + + /* Normal single-strategy mode */ /* Initialize search state and memory contexts */ - state = goo_init_state(root, initial_rels); + state = goo_init_state(root, initial_rels, goo_greedy_strategy); /* * Save initial state of join_rel_list and join_rel_hash so we can restore @@ -172,7 +246,8 @@ goo_join_search(PlannerInfo *root, int levels_needed, * where we may evaluate hundreds or thousands of candidate joins. */ static GooState * -goo_init_state(PlannerInfo *root, List *initial_rels) +goo_init_state(PlannerInfo *root, List *initial_rels, + GooGreedyStrategy strategy) { MemoryContext oldcxt; GooState *state; @@ -183,6 +258,7 @@ goo_init_state(PlannerInfo *root, List *initial_rels) state->root = root; state->clumps = NIL; state->prune_cartesian = false; + state->strategy = strategy; /* Create the three-level memory context hierarchy */ state->goo_cxt = AllocSetContextCreate(root->planner_cxt, "GOOStateContext", @@ -219,6 +295,42 @@ goo_destroy_state(GooState * state) pfree(state); } +static GooStrategyResult +goo_run_strategy(PlannerInfo *root, List *initial_rels, + List *base_join_rel_list, struct HTAB *base_hash, + GooGreedyStrategy strategy) +{ + GooStrategyResult result; + GooState *state; + MemoryContext oldcxt; + + result.result = NULL; + result.total_cost = 0; + result.join_rel_list = NIL; + result.join_rel_hash = NULL; + + oldcxt = MemoryContextSwitchTo(root->planner_cxt); + root->join_rel_list = list_copy(base_join_rel_list); + root->join_rel_hash = NULL; + MemoryContextSwitchTo(oldcxt); + + state = goo_init_state(root, initial_rels, strategy); + result.result = goo_search_internal(state); + + if (result.result != NULL) + result.total_cost = result.result->cheapest_total_path->total_cost; + + result.join_rel_list = root->join_rel_list; + result.join_rel_hash = root->join_rel_hash; + + goo_destroy_state(state); + + root->join_rel_list = base_join_rel_list; + root->join_rel_hash = base_hash; + + return result; +} + /* * goo_search_internal * Main greedy search loop. @@ -300,7 +412,8 @@ goo_search_internal(GooState * state) /* Track the best candidate seen so far */ if (best_candidate == NULL || - goo_candidate_better(cand, best_candidate)) + goo_candidate_better(state->strategy, + cand, best_candidate)) best_candidate = cand; } } @@ -393,6 +506,8 @@ static GooCandidate * goo_build_candidate(GooState * state, RelOptInfo *left, int saved_rel_len; struct HTAB *saved_hash; RelOptInfo *joinrel; + double join_rows; + double result_size; Cost total_cost; GooCandidate *cand; bool is_top_rel; @@ -448,6 +563,9 @@ static GooCandidate * goo_build_candidate(GooState * state, RelOptInfo *left, set_cheapest(grouped_rel); } + join_rows = joinrel->rows; + + result_size = join_rows * joinrel->reltarget->width; total_cost = joinrel->cheapest_total_path->total_cost; /* @@ -466,7 +584,9 @@ static GooCandidate * goo_build_candidate(GooState * state, RelOptInfo *left, cand = palloc(sizeof(GooCandidate)); cand->left = left; cand->right = right; + cand->result_size = result_size; cand->total_cost = total_cost; + cand->selectivity = join_rows / (left->rows * right->rows); cand->joinrelids = bms_union(left->relids, right->relids); MemoryContextSwitchTo(oldcxt); @@ -597,12 +717,56 @@ goo_commit_join(GooState * state, GooCandidate * cand) * Returns true if candidate 'a' should be preferred over candidate 'b'. */ static bool -goo_candidate_better(GooCandidate * a, GooCandidate * b) +goo_candidate_better(GooGreedyStrategy strategy, + GooCandidate * a, GooCandidate * b) { - if (a->total_cost < b->total_cost) - return true; - if (a->total_cost > b->total_cost) - return false; + switch (strategy) + { + case GOO_GREEDY_STRATEGY_COMBINED: + /* Should not be called in COMBINED mode */ + elog(ERROR, "goo_candidate_better should not be called in COMBINED mode"); + return false; + + case GOO_GREEDY_STRATEGY_RESULT_SIZE: + if (a->result_size < b->result_size) + return true; + if (a->result_size > b->result_size) + return false; + break; + + case GOO_GREEDY_STRATEGY_COST: + if (a->total_cost < b->total_cost) + return true; + if (a->total_cost > b->total_cost) + return false; + break; + + case GOO_GREEDY_STRATEGY_SELECTIVITY: + default: + if (a->selectivity < b->selectivity) + return true; + if (a->selectivity > b->selectivity) + return false; + break; + } return bms_compare(a->joinrelids, b->joinrelids) < 0; } + +static const char * +goo_strategy_name(GooGreedyStrategy strategy) +{ + switch (strategy) + { + case GOO_GREEDY_STRATEGY_RESULT_SIZE: + return "RESULT_SIZE"; + case GOO_GREEDY_STRATEGY_COST: + return "COST"; + case GOO_GREEDY_STRATEGY_SELECTIVITY: + return "SELECTIVITY"; + case GOO_GREEDY_STRATEGY_COMBINED: + return "COMBINED"; + } + + return "UNKNOWN"; +} diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat index 624dee632c3..c17c44ca4d0 100644 --- a/src/backend/utils/misc/guc_parameters.dat +++ b/src/backend/utils/misc/guc_parameters.dat @@ -1164,6 +1164,16 @@ max => 'MAX_KILOBYTES', }, +/* WIP: only for testing */ +{ name => 'goo_greedy_strategy', type => 'enum', context => 'PGC_USERSET', group => 'QUERY_TUNING_GEQO', + short_desc => 'Selects the heuristic used by GOO to compare join candidates.', + long_desc => 'Valid values are result_size, cost, selectivity, and combined.', + flags => 'GUC_EXPLAIN', + variable => 'goo_greedy_strategy', + boot_val => 'GOO_GREEDY_STRATEGY_COMBINED', + options => 'goo_greedy_strategy_options', +}, + { name => 'gss_accept_delegation', type => 'bool', context => 'PGC_SIGHUP', group => 'CONN_AUTH_AUTH', short_desc => 'Sets whether GSSAPI delegation should be accepted from the client.', variable => 'pg_gss_accept_delegation', diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 741fce8dede..3b0ba5e4064 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -412,6 +412,14 @@ static const struct config_enum_entry plan_cache_mode_options[] = { {NULL, 0, false} }; +static const struct config_enum_entry goo_greedy_strategy_options[] = { + {"result_size", GOO_GREEDY_STRATEGY_RESULT_SIZE, false}, + {"cost", GOO_GREEDY_STRATEGY_COST, false}, + {"selectivity", GOO_GREEDY_STRATEGY_SELECTIVITY, false}, + {"combined", GOO_GREEDY_STRATEGY_COMBINED, false}, + {NULL, 0, false} +}; + static const struct config_enum_entry password_encryption_options[] = { {"md5", PASSWORD_TYPE_MD5, false}, {"scram-sha-256", PASSWORD_TYPE_SCRAM_SHA_256, false}, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 734fa68884d..1782ec35066 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -16,11 +16,20 @@ #include "nodes/pathnodes.h" +typedef enum GooGreedyStrategy +{ + GOO_GREEDY_STRATEGY_RESULT_SIZE, + GOO_GREEDY_STRATEGY_COST, + GOO_GREEDY_STRATEGY_SELECTIVITY, + GOO_GREEDY_STRATEGY_COMBINED +} GooGreedyStrategy; + /* * allpaths.c */ extern PGDLLIMPORT bool enable_geqo; extern PGDLLIMPORT bool enable_goo_join_search; +extern PGDLLIMPORT int goo_greedy_strategy; extern PGDLLIMPORT bool enable_eager_aggregate; extern PGDLLIMPORT int geqo_threshold; extern PGDLLIMPORT double min_eager_agg_group_size; diff --git a/src/test/regress/expected/goo.out b/src/test/regress/expected/goo.out index 3a935e626da..7f7436a971f 100644 --- a/src/test/regress/expected/goo.out +++ b/src/test/regress/expected/goo.out @@ -43,6 +43,13 @@ INSERT INTO t16 SELECT i, i FROM generate_series(1,10) i; INSERT INTO t17 SELECT i, i FROM generate_series(1,10) i; INSERT INTO t18 SELECT i, i FROM generate_series(1,10) i; ANALYZE; +-- Verify combined is the default strategy. +SHOW goo_greedy_strategy; + goo_greedy_strategy +--------------------- + combined +(1 row) + -- -- Basic 3-way join (sanity check) -- @@ -177,42 +184,42 @@ JOIN t12 ON t10.k = t12.k JOIN t13 ON t11.l = t13.l JOIN t14 ON t12.m = t14.m JOIN t15 ON t13.n = t15.n; - QUERY PLAN ----------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------- Aggregate -> Hash Join Hash Cond: (t7.h = t9.h) -> Hash Join Hash Cond: (t8.i = t10.i) -> Hash Join - Hash Cond: (t2.c = t4.c) + Hash Cond: (t6.g = t8.g) -> Hash Join - Hash Cond: (t3.b = t1.b) + Hash Cond: (t5.f = t7.f) -> Hash Join - Hash Cond: (t5.f = t7.f) + Hash Cond: (t4.e = t6.e) -> Hash Join Hash Cond: (t3.d = t5.d) - -> Seq Scan on t3 + -> Hash Join + Hash Cond: (t2.c = t4.c) + -> Hash Join + Hash Cond: (t1.b = t3.b) + -> Hash Join + Hash Cond: (t1.a = t2.a) + -> Seq Scan on t1 + -> Hash + -> Seq Scan on t2 + -> Hash + -> Seq Scan on t3 + -> Hash + -> Seq Scan on t4 -> Hash -> Seq Scan on t5 -> Hash - -> Seq Scan on t7 + -> Seq Scan on t6 -> Hash - -> Hash Join - Hash Cond: (t1.a = t2.a) - -> Seq Scan on t1 - -> Hash - -> Seq Scan on t2 + -> Seq Scan on t7 -> Hash - -> Hash Join - Hash Cond: (t6.g = t8.g) - -> Hash Join - Hash Cond: (t4.e = t6.e) - -> Seq Scan on t4 - -> Hash - -> Seq Scan on t6 - -> Hash - -> Seq Scan on t8 + -> Seq Scan on t8 -> Hash -> Hash Join Hash Cond: (t12.m = t14.m) @@ -279,18 +286,18 @@ WHERE t1.a = t2.a ---------------------------------------------- Aggregate -> Hash Join - Hash Cond: (t1.a = t3.b) + Hash Cond: (t1.a = t4.c) -> Hash Join - Hash Cond: (t1.a = t2.a) - -> Seq Scan on t1 - -> Hash - -> Seq Scan on t2 - -> Hash + Hash Cond: (t1.a = t3.b) -> Hash Join - Hash Cond: (t3.b = t4.c) - -> Seq Scan on t3 + Hash Cond: (t1.a = t2.a) + -> Seq Scan on t1 -> Hash - -> Seq Scan on t4 + -> Seq Scan on t2 + -> Hash + -> Seq Scan on t3 + -> Hash + -> Seq Scan on t4 (14 rows) -- @@ -601,30 +608,30 @@ JOIN chain1c ON chain1b.val = chain1c.id JOIN chain2a ON chain1a.val = chain2a.id -- Cross-chain join JOIN chain2b ON chain2a.val = chain2b.id JOIN chain2c ON chain2b.val = chain2c.id; - QUERY PLAN ------------------------------------------------------------------ + QUERY PLAN +----------------------------------------------------------------------- Aggregate -> Hash Join - Hash Cond: (chain1a.val = chain2a.id) + Hash Cond: (chain2b.val = chain2c.id) -> Hash Join - Hash Cond: (chain1b.val = chain1c.id) - -> Hash Join - Hash Cond: (chain1a.id = chain1b.id) - -> Seq Scan on chain1a - -> Hash - -> Seq Scan on chain1b - -> Hash - -> Seq Scan on chain1c - -> Hash + Hash Cond: (chain2a.val = chain2b.id) -> Hash Join - Hash Cond: (chain2b.val = chain2c.id) + Hash Cond: (chain1a.val = chain2a.id) -> Hash Join - Hash Cond: (chain2a.val = chain2b.id) - -> Seq Scan on chain2a + Hash Cond: (chain1b.id = chain1a.id) + -> Hash Join + Hash Cond: (chain1b.val = chain1c.id) + -> Seq Scan on chain1b + -> Hash + -> Seq Scan on chain1c -> Hash - -> Seq Scan on chain2b + -> Seq Scan on chain1a -> Hash - -> Seq Scan on chain2c + -> Seq Scan on chain2a + -> Hash + -> Seq Scan on chain2b + -> Hash + -> Seq Scan on chain2c (22 rows) -- @@ -698,3 +705,4 @@ DEALLOCATE goo_plan; DEALLOCATE standard_plan; RESET geqo_threshold; RESET enable_goo_join_search; +RESET goo_greedy_strategy; diff --git a/src/test/regress/sql/goo.sql b/src/test/regress/sql/goo.sql index ab048d8e34e..1b2dff1d929 100644 --- a/src/test/regress/sql/goo.sql +++ b/src/test/regress/sql/goo.sql @@ -47,6 +47,9 @@ INSERT INTO t18 SELECT i, i FROM generate_series(1,10) i; ANALYZE; +-- Verify combined is the default strategy. +SHOW goo_greedy_strategy; + -- -- Basic 3-way join (sanity check) -- @@ -362,3 +365,4 @@ DEALLOCATE standard_plan; RESET geqo_threshold; RESET enable_goo_join_search; +RESET goo_greedy_strategy; \ No newline at end of file -- 2.50.1 (Apple Git-155)