From cf539f322c91bbea79f444c746303bf49d6b4d70 Mon Sep 17 00:00:00 2001 From: Chengpeng Yan Date: Sun, 30 Nov 2025 14:05:10 +0800 Subject: [PATCH v1] Add GOO (Greedy Operator Ordering) join search as an alternative to GEQO Introduce a greedy join search algorithm (GOO) to handle large join problems. GOO builds join relations iteratively, maintaining a list of clumps (join components) and committing to the cheapest legal join at each step until only one clump remains. Signed-off-by: Chengpeng Yan --- src/backend/optimizer/path/Makefile | 1 + src/backend/optimizer/path/allpaths.c | 4 + src/backend/optimizer/path/goo.c | 601 +++++++++++++++++ src/backend/optimizer/path/meson.build | 1 + src/backend/utils/misc/guc_parameters.dat | 9 + src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/optimizer/goo.h | 23 + src/include/optimizer/paths.h | 1 + src/test/regress/expected/goo.out | 634 ++++++++++++++++++ src/test/regress/expected/sysviews.out | 3 +- src/test/regress/parallel_schedule | 9 +- src/test/regress/sql/goo.sql | 329 +++++++++ 12 files changed, 1612 insertions(+), 4 deletions(-) create mode 100644 src/backend/optimizer/path/goo.c create mode 100644 src/include/optimizer/goo.h create mode 100644 src/test/regress/expected/goo.out create mode 100644 src/test/regress/sql/goo.sql diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile index 1e199ff66f7..3bc825cd845 100644 --- a/src/backend/optimizer/path/Makefile +++ b/src/backend/optimizer/path/Makefile @@ -17,6 +17,7 @@ OBJS = \ clausesel.o \ costsize.o \ equivclass.o \ + goo.o \ indxpath.o \ joinpath.o \ joinrels.o \ diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 4c43fd0b19b..4574b1f44cc 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -35,6 +35,7 @@ #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/geqo.h" +#include "optimizer/goo.h" #include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" @@ -3845,6 +3846,9 @@ make_rel_from_joinlist(PlannerInfo *root, List *joinlist) if (join_search_hook) return (*join_search_hook) (root, levels_needed, initial_rels); + /* WIP: for now use geqo_threshold for testing */ + else if (enable_goo_join_search && levels_needed >= geqo_threshold) + return goo_join_search(root, levels_needed, initial_rels); else if (enable_geqo && levels_needed >= geqo_threshold) return geqo(root, levels_needed, initial_rels); else diff --git a/src/backend/optimizer/path/goo.c b/src/backend/optimizer/path/goo.c new file mode 100644 index 00000000000..c6dc69e8432 --- /dev/null +++ b/src/backend/optimizer/path/goo.c @@ -0,0 +1,601 @@ +/*------------------------------------------------------------------------- + * + * goo.c + * Greedy operator ordering (GOO) join search for large join problems + * + * GOO is a deterministic greedy operator ordering algorithm that constructs + * join relations iteratively, always committing to the cheapest legal join at + * each step. The algorithm maintains a list of "clumps" (join components), + * initially one per base relation. At each iteration, it evaluates all legal + * pairs of clumps, selects the pair that produces the cheapest join according + * to the planner's cost model, and replaces those two clumps with the + * resulting joinrel. This continues until only one clump remains. + * + * ALGORITHM COMPLEXITY: + * + * Time Complexity: O(n^3) where n is the number of base relations. + * - The algorithm performs (n - 1) iterations, merging two clumps each time. + * - At iteration i, there are (n - i + 1) remaining clumps, requiring + * O((n-i)^2) pair evaluations to find the cheapest join. + * - Total: Sum of (n-i)^2 for i=1 to n-1 ≈ O(n^3) + * + * REFERENCES: + * + * This implementation is based on the algorithm described in: + * + * Leonidas Fegaras, "A New Heuristic for Optimizing Large Queries", + * Proceedings of the 9th International Conference on Database and Expert + * Systems Applications (DEXA '98), August 1998, Pages 726-735. + * https://dl.acm.org/doi/10.5555/648311.754892 + * + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/optimizer/path/goo.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "miscadmin.h" +#include "nodes/bitmapset.h" +#include "nodes/pathnodes.h" +#include "optimizer/geqo.h" +#include "optimizer/goo.h" +#include "optimizer/joininfo.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "utils/hsearch.h" +#include "utils/memutils.h" + +/* + * Configuration defaults. These are exposed as GUCs in guc_tables.c. + */ +bool enable_goo_join_search = false; + +/* + * Working state for a single GOO search invocation. + * + * This structure holds all the state needed during a greedy join order search. + * It manages three memory contexts with different lifetimes to avoid memory + * bloat during large join searches. + * + * TODO: Consider using the extension_state mechanism in PlannerInfo (similar + * to GEQO's approach) instead of passing GooState separately. + */ +typedef struct GooState +{ + PlannerInfo *root; /* global planner state */ + MemoryContext goo_cxt; /* long-lived (per-search) allocations */ + MemoryContext cand_cxt; /* per-iteration candidate storage */ + MemoryContext scratch_cxt; /* per-candidate speculative evaluation */ + List *clumps; /* remaining join components (RelOptInfo *) */ + + /* + * "clumps" are similar to GEQO's concept (see geqo_eval.c): join + * components that haven't been merged yet. Initially one per base + * relation, gradually merged until one remains. + */ + bool clause_pair_present; /* any clause-connected pair exists? */ +} GooState; + +/* + * Candidate join between two clumps. + * + * This structure holds the cost metrics extracted from a speculative joinrel + * evaluation. We create this lightweight structure in cand_cxt after discarding + * the actual joinrel from scratch_cxt, allowing us to compare many candidates + * without exhausting memory. + */ +typedef struct GooCandidate +{ + RelOptInfo *left; /* left input clump */ + RelOptInfo *right; /* right input clump */ + Cost total_cost; /* total cost of cheapest path */ +} GooCandidate; + +static GooState * goo_init_state(PlannerInfo *root, List *initial_rels); +static void goo_destroy_state(GooState * state); +static RelOptInfo *goo_search_internal(GooState * state); +static void goo_reset_probe_state(GooState * state, int saved_rel_len, + struct HTAB *saved_hash); +static GooCandidate * goo_build_candidate(GooState * state, RelOptInfo *left, + RelOptInfo *right); +static RelOptInfo *goo_commit_join(GooState * state, GooCandidate * cand); +static bool goo_candidate_better(GooCandidate * a, GooCandidate * b); +static bool goo_candidate_prunable(GooState * state, RelOptInfo *left, + RelOptInfo *right); + +/* + * goo_join_search + * Entry point for Greedy Operator Ordering join search algorithm. + * + * This function is called from make_rel_from_joinlist() when + * enable_goo_join_search is true and the number of relations meets or + * exceeds geqo_threshold. + * + * Returns the final RelOptInfo representing the join of all base relations, + * or errors out if no valid join order can be found. + */ +RelOptInfo * +goo_join_search(PlannerInfo *root, int levels_needed, + List *initial_rels) +{ + GooState *state; + RelOptInfo *result; + int base_rel_count; + struct HTAB *base_hash; + + /* Initialize search state and memory contexts */ + state = goo_init_state(root, initial_rels); + + /* + * Save initial state of join_rel_list and join_rel_hash so we can restore + * them if the search fails. + */ + base_rel_count = list_length(root->join_rel_list); + base_hash = root->join_rel_hash; + + /* Run the main greedy search loop */ + result = goo_search_internal(state); + + if (result == NULL) + { + /* Restore planner state before reporting error */ + root->join_rel_list = list_truncate(root->join_rel_list, base_rel_count); + root->join_rel_hash = base_hash; + elog(ERROR, "GOO join search failed to find a valid join order"); + } + + goo_destroy_state(state); + return result; +} + +/* + * goo_init_state + * Initialize per-search state and memory contexts. + * + * Creates the GooState structure and three memory contexts with different + * lifetimes: + * + * - goo_cxt: Lives for the entire search, holds the clumps list and state. + * - cand_cxt: Reset after each iteration, holds candidate structures during + * the comparison phase. + * - scratch_cxt: Reset after each candidate evaluation, holds speculative + * joinrels that are discarded before committing to a choice. + * + * The three-context design prevents memory bloat during large join searches + * where we may evaluate hundreds or thousands of candidate joins. + */ +static GooState * goo_init_state(PlannerInfo *root, List *initial_rels) +{ + MemoryContext oldcxt; + GooState *state; + + oldcxt = MemoryContextSwitchTo(root->planner_cxt); + + state = palloc(sizeof(GooState)); + state->root = root; + state->clumps = NIL; + state->clause_pair_present = false; + + /* Create the three-level memory context hierarchy */ + state->goo_cxt = AllocSetContextCreate(root->planner_cxt, "GOOStateContext", + ALLOCSET_DEFAULT_SIZES); + state->cand_cxt = AllocSetContextCreate(state->goo_cxt, "GOOCandidateContext", + ALLOCSET_SMALL_SIZES); + state->scratch_cxt = AllocSetContextCreate( + state->goo_cxt, "GOOScratchContext", ALLOCSET_SMALL_SIZES); + + /* + * Copy the initial_rels list into goo_cxt. This becomes our working + * clumps list that we'll modify throughout the search. + */ + MemoryContextSwitchTo(state->goo_cxt); + state->clumps = list_copy(initial_rels); + + MemoryContextSwitchTo(oldcxt); + + return state; +} + +/* + * goo_destroy_state + * Free all memory allocated for the GOO search. + * + * Deletes the goo_cxt memory context (which recursively deletes cand_cxt + * and scratch_cxt as children) and then frees the state structure itself. + * This is called after the search completes successfully or fails. + */ +static void +goo_destroy_state(GooState * state) +{ + MemoryContextDelete(state->goo_cxt); + pfree(state); +} + +/* + * goo_search_internal + * Main greedy search loop. + * + * Implements a two-pass algorithm at each iteration: + * + * Pass 1: Scan all clump pairs to detect whether any clause-connected pairs + * exist. This sets the clause_pair_present flag. + * + * Pass 2: Evaluate all viable candidate pairs, keeping track of the best one + * according to our comparison criteria. If clause_pair_present is true, + * we skip Cartesian products entirely to avoid expensive cross joins. + * + * After selecting the best candidate, we permanently create its joinrel in + * planner_cxt and replace the two input clumps with this new joinrel. This + * continues until only one clump remains. + * + * The function runs primarily in goo_cxt, temporarily switching to planner_cxt + * when creating permanent joinrels and to scratch_cxt when evaluating + * speculative candidates. + * + * Returns the final joinrel spanning all base relations, or NULL on failure. + */ +static RelOptInfo * +goo_search_internal(GooState * state) +{ + PlannerInfo *root = state->root; + RelOptInfo *final_rel = NULL; + MemoryContext oldcxt; + + /* + * Switch to goo_cxt for the entire search process. This ensures that all + * operations on state->clumps and related structures happen in the + * correct memory context. + */ + oldcxt = MemoryContextSwitchTo(state->goo_cxt); + + while (list_length(state->clumps) > 1) + { + ListCell *lc1; + int i; + GooCandidate *best_candidate = NULL; + + /* Allow query cancellation during long join searches */ + CHECK_FOR_INTERRUPTS(); + + /* Reset candidate context for this iteration */ + MemoryContextReset(state->cand_cxt); + state->clause_pair_present = false; + + /* + * Pass 1: Scan all pairs to detect clause-connected joins. + * + * We need to know whether ANY clause-connected pairs exist before we + * can decide whether to skip Cartesian products. This quick scan + * allows us to prefer well-connected joins without completely + * forbidding Cartesian products (which may be necessary for + * disconnected query graphs). + */ + for (i = 0, lc1 = list_head(state->clumps); lc1 != NULL; + lc1 = lnext(state->clumps, lc1), i++) + { + RelOptInfo *left = lfirst_node(RelOptInfo, lc1); + ListCell *lc2 = lnext(state->clumps, lc1); + + for (; lc2 != NULL; lc2 = lnext(state->clumps, lc2)) + { + RelOptInfo *right = lfirst_node(RelOptInfo, lc2); + + /* Check if this pair has a join clause or order restriction */ + if (have_relevant_joinclause(root, left, right) || + have_join_order_restriction(root, left, right)) + { + /* Found at least one clause-connected pair */ + state->clause_pair_present = true; + break; + } + } + + if (state->clause_pair_present) + break; + } + + /* + * Pass 2: Evaluate all viable candidate pairs and select the best. + * + * For each pair that passes the pruning check, we do a full + * speculative evaluation using make_join_rel() to get accurate costs. + * The candidate with the best cost (according to + * goo_candidate_better) is remembered and will be committed after + * this pass. + * + * TODO: It might be worth caching cost estimates if the same join + * pair appears in multiple iterations. + */ + for (lc1 = list_head(state->clumps); lc1 != NULL; + lc1 = lnext(state->clumps, lc1)) + { + RelOptInfo *left = lfirst_node(RelOptInfo, lc1); + ListCell *lc2 = lnext(state->clumps, lc1); + + for (; lc2 != NULL; lc2 = lnext(state->clumps, lc2)) + { + RelOptInfo *right = lfirst_node(RelOptInfo, lc2); + GooCandidate *cand; + + cand = goo_build_candidate(state, left, right); + if (cand == NULL) + continue; + + /* Track the best candidate seen so far */ + if (best_candidate == NULL || + goo_candidate_better(cand, best_candidate)) + best_candidate = cand; + } + } + + /* We must have at least one valid join candidate */ + if (best_candidate == NULL) + elog(ERROR, "GOO join search failed to find any valid join candidates"); + + /* + * Commit the best candidate: create the joinrel permanently and + * update the clumps list. + */ + final_rel = goo_commit_join(state, best_candidate); + + if (final_rel == NULL) + elog(ERROR, "GOO join search failed to commit join"); + } + + /* Switch back to the original context before returning */ + MemoryContextSwitchTo(oldcxt); + + return final_rel; +} + +/* + * goo_candidate_prunable + * Determine whether a candidate pair should be skipped. + * + * We use a two-level pruning strategy: + * + * 1. Pairs with join clauses or join-order restrictions are never prunable. + * These represent natural joins or required join orders (e.g., from outer + * joins or LATERAL references). + * + * 2. If clause_pair_present is true (meaning at least one clause-connected + * pair exists in this iteration), we prune Cartesian products to avoid + * evaluating expensive cross joins when better options are available. + * + * However, if NO clause-connected pairs exist in an iteration, we allow + * Cartesian products to be considered. This ensures we can always make + * progress even with disconnected query graphs. + * + * Returns true if the pair should be pruned (skipped), false otherwise. + */ +static bool +goo_candidate_prunable(GooState * state, RelOptInfo *left, + RelOptInfo *right) +{ + PlannerInfo *root = state->root; + bool has_clause = have_relevant_joinclause(root, left, right); + bool has_restriction = have_join_order_restriction(root, left, right); + + if (has_clause || has_restriction) + return false; /* never prune clause-connected joins */ + + return state->clause_pair_present; +} + +/* + * goo_build_candidate + * Evaluate a potential join between two clumps and return a + *candidate. + * + * This function performs a speculative join evaluation to extract cost metrics + * without permanently creating the joinrel. The process is: + * + * 1. Check basic viability (pruning, overlapping relids). + * 2. Switch to scratch_cxt and create the joinrel using make_join_rel(). + * 3. Generate paths (including partitionwise and parallel variants). + * 4. Extract cost metrics from the cheapest path. + * 5. Discard the joinrel by calling goo_reset_probe_state(). + * 6. Create a lightweight GooCandidate in cand_cxt with the extracted metrics. + * + * This evaluate-and-discard pattern prevents memory bloat when evaluating + * many candidates. The winning candidate will be rebuilt permanently later + * by goo_commit_join(). + * + * Returns a GooCandidate structure, or NULL if the join is illegal or + * overlapping. Assumes the caller is in goo_cxt. + */ +static GooCandidate * goo_build_candidate(GooState * state, RelOptInfo *left, + RelOptInfo *right) +{ + PlannerInfo *root = state->root; + MemoryContext oldcxt; + int saved_rel_len; + struct HTAB *saved_hash; + RelOptInfo *joinrel; + Path *best_path; + Cost total_cost; + GooCandidate *cand; + + /* Skip if this pair should be pruned */ + if (goo_candidate_prunable(state, left, right)) + return NULL; + + /* Sanity check: ensure the clumps don't overlap */ + if (bms_overlap(left->relids, right->relids)) + return NULL; + + /* + * Save state before speculative join evaluation. We'll create the joinrel + * in scratch_cxt and then discard it. + */ + saved_rel_len = list_length(root->join_rel_list); + saved_hash = root->join_rel_hash; + + /* Switch to scratch_cxt for speculative joinrel creation */ + oldcxt = MemoryContextSwitchTo(state->scratch_cxt); + + /* + * Temporarily disable join_rel_hash so make_join_rel() doesn't find or + * cache this speculative joinrel. + */ + root->join_rel_hash = NULL; + + /* + * Create the joinrel and generate all its paths. + * + * TODO: This is the most expensive part of GOO. Each candidate evaluation + * performs full path generation via make_join_rel(). We might improve + * performance by deferring expensive path variants (partitionwise, + * parallel) until the commit phase. + */ + joinrel = make_join_rel(root, left, right); + + if (joinrel) + { + /* Generate additional path variants */ + generate_partitionwise_join_paths(root, joinrel); + if (!bms_equal(joinrel->relids, root->all_query_rels)) + generate_useful_gather_paths(root, joinrel, false); + set_cheapest(joinrel); + } + + best_path = joinrel ? joinrel->cheapest_total_path : NULL; + if (best_path == NULL) + { + /* Invalid or uninteresting join, clean up and return NULL */ + MemoryContextSwitchTo(oldcxt); + goo_reset_probe_state(state, saved_rel_len, saved_hash); + return NULL; + } + + /* + * Extract the metrics we need from the speculative joinrel. After this, + * we'll discard the entire joinrel and all its paths. + */ + total_cost = best_path->total_cost; + + /* + * Switch back to goo_cxt and discard the speculative joinrel. + * goo_reset_probe_state() will clean up join_rel_list, join_rel_hash, and + * reset scratch_cxt to free all the joinrel's memory. + */ + MemoryContextSwitchTo(oldcxt); + goo_reset_probe_state(state, saved_rel_len, saved_hash); + + /* + * Now create the candidate structure in cand_cxt. This will survive until + * the end of this iteration (when cand_cxt is reset). + */ + oldcxt = MemoryContextSwitchTo(state->cand_cxt); + cand = palloc(sizeof(GooCandidate)); + cand->left = left; + cand->right = right; + cand->total_cost = total_cost; + MemoryContextSwitchTo(oldcxt); + + return cand; +} + +/* + * goo_reset_probe_state + * Clean up after a speculative joinrel evaluation. + * + * Reverts the planner's join_rel_list and join_rel_hash to their saved state, + * removing any joinrels that were created during speculative evaluation. + * Also resets scratch_cxt to free all memory used by the discarded joinrel + * and its paths. + * + * This function is called after extracting cost metrics from a speculative + * joinrel that we don't want to keep. + */ +static void +goo_reset_probe_state(GooState * state, int saved_rel_len, + struct HTAB *saved_hash) +{ + PlannerInfo *root = state->root; + + /* Remove speculative joinrels from the planner's lists */ + root->join_rel_list = list_truncate(root->join_rel_list, saved_rel_len); + root->join_rel_hash = saved_hash; + + /* Free all memory used during speculative evaluation */ + MemoryContextReset(state->scratch_cxt); +} + +/* + * goo_commit_join + * Permanently create the chosen join and update the clumps list. + * + * After selecting the best candidate in an iteration, we need to permanently + * create its joinrel (with all paths) and integrate it into the planner state. + * This function: + * + * 1. Switches to planner_cxt and creates the joinrel using make_join_rel(). + * Unlike the speculative evaluation, this joinrel is kept permanently. + * 2. Generates partitionwise and parallel path variants. + * 3. Determines the cheapest paths. + * 4. Updates state->clumps by removing the two input clumps and adding the + * new joinrel as a single clump. + * + * The next iteration will treat this joinrel as an atomic unit that can be + * joined with other remaining clumps. + * + * Returns the newly created joinrel. Assumes the caller is in goo_cxt. + */ +static RelOptInfo * +goo_commit_join(GooState * state, GooCandidate * cand) +{ + MemoryContext oldcxt; + PlannerInfo *root = state->root; + RelOptInfo *joinrel; + + /* + * Create the joinrel permanently in planner_cxt. Unlike the speculative + * evaluation in goo_build_candidate(), this joinrel will be kept and + * added to root->join_rel_list for use by the rest of the planner. + */ + oldcxt = MemoryContextSwitchTo(root->planner_cxt); + + joinrel = make_join_rel(root, cand->left, cand->right); + if (joinrel == NULL) + { + MemoryContextSwitchTo(oldcxt); + elog(ERROR, "GOO join search failed to create join relation"); + } + + /* Generate additional path variants, just like standard_join_search() */ + generate_partitionwise_join_paths(root, joinrel); + if (!bms_equal(joinrel->relids, root->all_query_rels)) + generate_useful_gather_paths(root, joinrel, false); + set_cheapest(joinrel); + + /* + * Switch back to goo_cxt and update the clumps list. Remove the two input + * clumps and add the new joinrel as a single clump. + */ + MemoryContextSwitchTo(oldcxt); + + state->clumps = list_delete_ptr(state->clumps, cand->left); + state->clumps = list_delete_ptr(state->clumps, cand->right); + state->clumps = lappend(state->clumps, joinrel); + + return joinrel; +} + +/* + * goo_candidate_better + * Compare two join candidates and determine which is better. + * + * Returns true if candidate 'a' should be preferred over candidate 'b'. + */ +static bool +goo_candidate_better(GooCandidate * a, GooCandidate * b) +{ + return (a->total_cost < b->total_cost); +} diff --git a/src/backend/optimizer/path/meson.build b/src/backend/optimizer/path/meson.build index 12f36d85cb6..e5046365a37 100644 --- a/src/backend/optimizer/path/meson.build +++ b/src/backend/optimizer/path/meson.build @@ -5,6 +5,7 @@ backend_sources += files( 'clausesel.c', 'costsize.c', 'equivclass.c', + 'goo.c', 'indxpath.c', 'joinpath.c', 'joinrels.c', diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat index 3b9d8349078..a8ce31ab8a7 100644 --- a/src/backend/utils/misc/guc_parameters.dat +++ b/src/backend/utils/misc/guc_parameters.dat @@ -840,6 +840,15 @@ boot_val => 'true', }, +/* WIP: for now keep this in QUERY_TUNING_GEQO group for testing convenience */ +{ name => 'enable_goo_join_search', type => 'bool', context => 'PGC_USERSET', group => 'QUERY_TUNING_GEQO', + short_desc => 'Enables the planner\'s use of GOO join search for large join problems.', + long_desc => 'Greedy Operator Ordering (GOO) is a deterministic join search algorithm for queries with many relations.', + flags => 'GUC_EXPLAIN', + variable => 'enable_goo_join_search', + boot_val => 'false', +}, + { name => 'enable_group_by_reordering', type => 'bool', context => 'PGC_USERSET', group => 'QUERY_TUNING_METHOD', short_desc => 'Enables reordering of GROUP BY keys.', flags => 'GUC_EXPLAIN', diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index dc9e2255f8a..8284e8b1da7 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -456,6 +456,7 @@ # - Genetic Query Optimizer - #geqo = on +#enable_goo_join_search = off #geqo_threshold = 12 #geqo_effort = 5 # range 1-10 #geqo_pool_size = 0 # selects default based on effort diff --git a/src/include/optimizer/goo.h b/src/include/optimizer/goo.h new file mode 100644 index 00000000000..0080dfa2ac8 --- /dev/null +++ b/src/include/optimizer/goo.h @@ -0,0 +1,23 @@ +/*------------------------------------------------------------------------- + * + * goo.h + * prototype for the greedy operator ordering join search + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/include/optimizer/goo.h + * + *------------------------------------------------------------------------- + */ +#ifndef GOO_H +#define GOO_H + +#include "nodes/pathnodes.h" +#include "nodes/pg_list.h" + +extern RelOptInfo *goo_join_search(PlannerInfo *root, int levels_needed, + List *initial_rels); + +#endif /* GOO_H */ diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index f6a62df0b43..5b3ebe5f1d2 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -21,6 +21,7 @@ * allpaths.c */ extern PGDLLIMPORT bool enable_geqo; +extern PGDLLIMPORT bool enable_goo_join_search; extern PGDLLIMPORT bool enable_eager_aggregate; extern PGDLLIMPORT int geqo_threshold; extern PGDLLIMPORT double min_eager_agg_group_size; diff --git a/src/test/regress/expected/goo.out b/src/test/regress/expected/goo.out new file mode 100644 index 00000000000..88996f57508 --- /dev/null +++ b/src/test/regress/expected/goo.out @@ -0,0 +1,634 @@ +-- +-- GOO (Greedy Operator Ordering) Join Search Tests +-- +-- This test suite validates the GOO join ordering algorithm and verifies +-- correct behavior for various query patterns. +-- +-- Create test tables with various sizes and join patterns +CREATE TEMP TABLE t1 (a int, b int); +CREATE TEMP TABLE t2 (a int, c int); +CREATE TEMP TABLE t3 (b int, d int); +CREATE TEMP TABLE t4 (c int, e int); +CREATE TEMP TABLE t5 (d int, f int); +CREATE TEMP TABLE t6 (e int, g int); +CREATE TEMP TABLE t7 (f int, h int); +CREATE TEMP TABLE t8 (g int, i int); +CREATE TEMP TABLE t9 (h int, j int); +CREATE TEMP TABLE t10 (i int, k int); +CREATE TEMP TABLE t11 (j int, l int); +CREATE TEMP TABLE t12 (k int, m int); +CREATE TEMP TABLE t13 (l int, n int); +CREATE TEMP TABLE t14 (m int, o int); +CREATE TEMP TABLE t15 (n int, p int); +CREATE TEMP TABLE t16 (o int, q int); +CREATE TEMP TABLE t17 (p int, r int); +CREATE TEMP TABLE t18 (q int, s int); +-- Populate with small amount of data +INSERT INTO t1 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t2 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t3 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t4 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t5 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t6 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t7 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t8 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t9 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t10 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t11 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t12 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t13 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t14 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t15 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t16 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t17 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t18 SELECT i, i FROM generate_series(1,10) i; +ANALYZE; +-- +-- Basic 3-way join (sanity check) +-- +SET enable_goo_join_search = on; +SET geqo_threshold = 2; +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM (VALUES (1),(2)) AS a(x) +JOIN (VALUES (1),(2)) AS b(x) USING (x) +JOIN (VALUES (1),(3)) AS c(x) USING (x); + QUERY PLAN +---------------------------------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: ("*VALUES*".column1 = "*VALUES*_2".column1) + -> Hash Join + Hash Cond: ("*VALUES*".column1 = "*VALUES*_1".column1) + -> Values Scan on "*VALUES*" + -> Hash + -> Values Scan on "*VALUES*_1" + -> Hash + -> Values Scan on "*VALUES*_2" +(10 rows) + +SELECT count(*) +FROM (VALUES (1),(2)) AS a(x) +JOIN (VALUES (1),(2)) AS b(x) USING (x) +JOIN (VALUES (1),(3)) AS c(x) USING (x); + count +------- + 1 +(1 row) + +-- +-- Disconnected graph (Cartesian products required) +-- +-- This tests GOO's ability to handle queries where some relations +-- have no join clauses connecting them. GOO should allow Cartesian +-- products when no clause-connected joins are available. +-- +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM t1, t2, t5 +WHERE t1.a = 1 AND t2.c = 2 AND t5.f = 3; + QUERY PLAN +------------------------------------- + Aggregate + -> Nested Loop + -> Seq Scan on t5 + Filter: (f = 3) + -> Nested Loop + -> Seq Scan on t1 + Filter: (a = 1) + -> Seq Scan on t2 + Filter: (c = 2) +(9 rows) + +SELECT count(*) +FROM t1, t2, t5 +WHERE t1.a = 1 AND t2.c = 2 AND t5.f = 3; + count +------- + 1 +(1 row) + +-- +-- Star schema (fact table with multiple dimension tables) +-- +-- Test GOO with a typical star schema join pattern. +-- +CREATE TEMP TABLE fact (id int, dim1_id int, dim2_id int, dim3_id int, dim4_id int, value int); +CREATE TEMP TABLE dim1 (id int, name text); +CREATE TEMP TABLE dim2 (id int, name text); +CREATE TEMP TABLE dim3 (id int, name text); +CREATE TEMP TABLE dim4 (id int, name text); +INSERT INTO fact SELECT i, i, i, i, i, i FROM generate_series(1,100) i; +INSERT INTO dim1 SELECT i, 'dim1_'||i FROM generate_series(1,10) i; +INSERT INTO dim2 SELECT i, 'dim2_'||i FROM generate_series(1,10) i; +INSERT INTO dim3 SELECT i, 'dim3_'||i FROM generate_series(1,10) i; +INSERT INTO dim4 SELECT i, 'dim4_'||i FROM generate_series(1,10) i; +ANALYZE; +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM fact +JOIN dim1 ON fact.dim1_id = dim1.id +JOIN dim2 ON fact.dim2_id = dim2.id +JOIN dim3 ON fact.dim3_id = dim3.id +JOIN dim4 ON fact.dim4_id = dim4.id +WHERE dim1.id < 5; + QUERY PLAN +--------------------------------------------------------------------------- + Aggregate + -> Nested Loop + Join Filter: (fact.dim4_id = dim4.id) + -> Hash Join + Hash Cond: (dim3.id = fact.dim3_id) + -> Seq Scan on dim3 + -> Hash + -> Hash Join + Hash Cond: (dim2.id = fact.dim2_id) + -> Seq Scan on dim2 + -> Hash + -> Hash Join + Hash Cond: (fact.dim1_id = dim1.id) + -> Seq Scan on fact + -> Hash + -> Seq Scan on dim1 + Filter: (id < 5) + -> Seq Scan on dim4 +(18 rows) + +-- +-- Long join chain +-- +-- Tests GOO with a large join involving 15 relations. +-- +SET geqo_threshold = 8; +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM t1 +JOIN t2 ON t1.a = t2.a +JOIN t3 ON t1.b = t3.b +JOIN t4 ON t2.c = t4.c +JOIN t5 ON t3.d = t5.d +JOIN t6 ON t4.e = t6.e +JOIN t7 ON t5.f = t7.f +JOIN t8 ON t6.g = t8.g +JOIN t9 ON t7.h = t9.h +JOIN t10 ON t8.i = t10.i +JOIN t11 ON t9.j = t11.j +JOIN t12 ON t10.k = t12.k +JOIN t13 ON t11.l = t13.l +JOIN t14 ON t12.m = t14.m +JOIN t15 ON t13.n = t15.n; + QUERY PLAN +---------------------------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (t7.h = t9.h) + -> Hash Join + Hash Cond: (t8.i = t10.i) + -> Hash Join + Hash Cond: (t2.c = t4.c) + -> Hash Join + Hash Cond: (t3.b = t1.b) + -> Hash Join + Hash Cond: (t5.f = t7.f) + -> Hash Join + Hash Cond: (t3.d = t5.d) + -> Seq Scan on t3 + -> Hash + -> Seq Scan on t5 + -> Hash + -> Seq Scan on t7 + -> Hash + -> Hash Join + Hash Cond: (t1.a = t2.a) + -> Seq Scan on t1 + -> Hash + -> Seq Scan on t2 + -> Hash + -> Hash Join + Hash Cond: (t6.g = t8.g) + -> Hash Join + Hash Cond: (t4.e = t6.e) + -> Seq Scan on t4 + -> Hash + -> Seq Scan on t6 + -> Hash + -> Seq Scan on t8 + -> Hash + -> Hash Join + Hash Cond: (t12.m = t14.m) + -> Hash Join + Hash Cond: (t10.k = t12.k) + -> Seq Scan on t10 + -> Hash + -> Seq Scan on t12 + -> Hash + -> Seq Scan on t14 + -> Hash + -> Hash Join + Hash Cond: (t11.l = t13.l) + -> Hash Join + Hash Cond: (t9.j = t11.j) + -> Seq Scan on t9 + -> Hash + -> Seq Scan on t11 + -> Hash + -> Hash Join + Hash Cond: (t13.n = t15.n) + -> Seq Scan on t13 + -> Hash + -> Seq Scan on t15 +(58 rows) + +-- Execute to verify correctness +SELECT count(*) +FROM t1 +JOIN t2 ON t1.a = t2.a +JOIN t3 ON t1.b = t3.b +JOIN t4 ON t2.c = t4.c +JOIN t5 ON t3.d = t5.d +JOIN t6 ON t4.e = t6.e +JOIN t7 ON t5.f = t7.f +JOIN t8 ON t6.g = t8.g +JOIN t9 ON t7.h = t9.h +JOIN t10 ON t8.i = t10.i +JOIN t11 ON t9.j = t11.j +JOIN t12 ON t10.k = t12.k +JOIN t13 ON t11.l = t13.l +JOIN t14 ON t12.m = t14.m +JOIN t15 ON t13.n = t15.n; + count +------- + 10 +(1 row) + +-- +-- Bushy tree support +-- +-- Verify that GOO can produce bushy join trees, not just left-deep or right-deep. +-- With appropriate cost model, GOO should join (t1,t2) and (t3,t4) first, +-- then join those results (bushy tree). +-- +SET geqo_threshold = 4; +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM t1, t2, t3, t4 +WHERE t1.a = t2.a + AND t3.b = t4.c + AND t1.a = t3.b; + QUERY PLAN +---------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (t1.a = t3.b) + -> Hash Join + Hash Cond: (t1.a = t2.a) + -> Seq Scan on t1 + -> Hash + -> Seq Scan on t2 + -> Hash + -> Hash Join + Hash Cond: (t3.b = t4.c) + -> Seq Scan on t3 + -> Hash + -> Seq Scan on t4 +(14 rows) + +-- +-- Compare GOO vs standard join search +-- +-- Run the same query with GOO and standard join search to verify both +-- produce valid plans. Results should be identical even if plans differ. +-- +SET enable_goo_join_search = on; +PREPARE goo_plan AS +SELECT count(*) +FROM t1 JOIN t2 ON t1.a = t2.a +JOIN t3 ON t1.b = t3.b +JOIN t4 ON t2.c = t4.c +JOIN t5 ON t3.d = t5.d +JOIN t6 ON t4.e = t6.e; +EXECUTE goo_plan; + count +------- + 10 +(1 row) + +SET enable_goo_join_search = off; +SET geqo_threshold = default; +PREPARE standard_plan AS +SELECT count(*) +FROM t1 JOIN t2 ON t1.a = t2.a +JOIN t3 ON t1.b = t3.b +JOIN t4 ON t2.c = t4.c +JOIN t5 ON t3.d = t5.d +JOIN t6 ON t4.e = t6.e; +EXECUTE standard_plan; + count +------- + 10 +(1 row) + +-- Results should match +EXECUTE goo_plan; + count +------- + 10 +(1 row) + +EXECUTE standard_plan; + count +------- + 10 +(1 row) + +-- +-- Large join (18 relations) +-- +-- Test GOO with a large number of relations. +-- +SET enable_goo_join_search = on; +SET geqo_threshold = 10; +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM t1 +JOIN t2 ON t1.a = t2.a +JOIN t3 ON t1.b = t3.b +JOIN t4 ON t2.c = t4.c +JOIN t5 ON t3.d = t5.d +JOIN t6 ON t4.e = t6.e +JOIN t7 ON t5.f = t7.f +JOIN t8 ON t6.g = t8.g +JOIN t9 ON t7.h = t9.h +JOIN t10 ON t8.i = t10.i +JOIN t11 ON t9.j = t11.j +JOIN t12 ON t10.k = t12.k +JOIN t13 ON t11.l = t13.l +JOIN t14 ON t12.m = t14.m +JOIN t15 ON t13.n = t15.n +JOIN t16 ON t14.o = t16.o +JOIN t17 ON t15.p = t17.p +JOIN t18 ON t16.q = t18.q; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (t16.q = t18.q) + -> Hash Join + Hash Cond: (t15.p = t17.p) + -> Hash Join + Hash Cond: (t14.o = t16.o) + -> Hash Join + Hash Cond: (t13.n = t15.n) + -> Hash Join + Hash Cond: (t12.m = t14.m) + -> Hash Join + Hash Cond: (t11.l = t13.l) + -> Hash Join + Hash Cond: (t10.k = t12.k) + -> Hash Join + Hash Cond: (t9.j = t11.j) + -> Hash Join + Hash Cond: (t8.i = t10.i) + -> Hash Join + Hash Cond: (t7.h = t9.h) + -> Hash Join + Hash Cond: (t6.g = t8.g) + -> Hash Join + Hash Cond: (t5.f = t7.f) + -> Hash Join + Hash Cond: (t4.e = t6.e) + -> Hash Join + Hash Cond: (t3.d = t5.d) + -> Hash Join + Hash Cond: (t2.c = t4.c) + -> Hash Join + Hash Cond: (t1.b = t3.b) + -> Hash Join + Hash Cond: (t1.a = t2.a) + -> Seq Scan on t1 + -> Hash + -> Seq Scan on t2 + -> Hash + -> Seq Scan on t3 + -> Hash + -> Seq Scan on t4 + -> Hash + -> Seq Scan on t5 + -> Hash + -> Seq Scan on t6 + -> Hash + -> Seq Scan on t7 + -> Hash + -> Seq Scan on t8 + -> Hash + -> Seq Scan on t9 + -> Hash + -> Seq Scan on t10 + -> Hash + -> Seq Scan on t11 + -> Hash + -> Seq Scan on t12 + -> Hash + -> Seq Scan on t13 + -> Hash + -> Seq Scan on t14 + -> Hash + -> Seq Scan on t15 + -> Hash + -> Seq Scan on t16 + -> Hash + -> Seq Scan on t17 + -> Hash + -> Seq Scan on t18 +(70 rows) + +-- +-- Mixed connected and disconnected components +-- +-- Query with two connected components that need a Cartesian product between them. +-- +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM t1 JOIN t2 ON t1.a = t2.a, + t5 JOIN t6 ON t5.f = t6.e +WHERE t1.a < 5 AND t5.d < 3; + QUERY PLAN +------------------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (t2.a = t1.a) + -> Seq Scan on t2 + -> Hash + -> Nested Loop + -> Hash Join + Hash Cond: (t6.e = t5.f) + -> Seq Scan on t6 + -> Hash + -> Seq Scan on t5 + Filter: (d < 3) + -> Seq Scan on t1 + Filter: (a < 5) +(14 rows) + +-- +-- Outer joins +-- +-- Verify GOO handles outer joins correctly (respects join order restrictions) +-- +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM t1 +LEFT JOIN t2 ON t1.a = t2.a +LEFT JOIN t3 ON t2.a = t3.b +LEFT JOIN t4 ON t3.d = t4.c; + QUERY PLAN +---------------------------------------------- + Aggregate + -> Hash Left Join + Hash Cond: (t3.d = t4.c) + -> Hash Left Join + Hash Cond: (t2.a = t3.b) + -> Hash Left Join + Hash Cond: (t1.a = t2.a) + -> Seq Scan on t1 + -> Hash + -> Seq Scan on t2 + -> Hash + -> Seq Scan on t3 + -> Hash + -> Seq Scan on t4 +(14 rows) + +-- +-- Complete Cartesian products (disconnected graphs) +-- +-- Test GOO's ability to handle queries with no join clauses at all. +-- +SET enable_goo_join_search = on; +SET geqo_threshold = 2; +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM t1, t2; + QUERY PLAN +---------------------------------- + Aggregate + -> Nested Loop + -> Seq Scan on t1 + -> Materialize + -> Seq Scan on t2 +(5 rows) + +SELECT count(*) +FROM t1, t2; + count +------- + 100 +(1 row) + +-- +-- Join order restrictions with FULL OUTER JOIN +-- +-- FULL OUTER JOIN creates strong ordering constraints that GOO must respect +-- +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM t1 +FULL OUTER JOIN t2 ON t1.a = t2.a +FULL OUTER JOIN t3 ON t2.a = t3.b; + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Full Join + Hash Cond: (t2.a = t3.b) + -> Hash Full Join + Hash Cond: (t1.a = t2.a) + -> Seq Scan on t1 + -> Hash + -> Seq Scan on t2 + -> Hash + -> Seq Scan on t3 +(10 rows) + +-- +-- Self-join handling +-- +-- Test GOO with the same table appearing multiple times. GOO must correctly +-- handle self-joins that were not removed by Self-Join Elimination. +-- +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM t1 a +JOIN t1 b ON a.a = b.a +JOIN t2 c ON b.b = c.c; + QUERY PLAN +------------------------------------------ + Aggregate + -> Hash Join + Hash Cond: (b.b = c.c) + -> Hash Join + Hash Cond: (a.a = b.a) + -> Seq Scan on t1 a + -> Hash + -> Seq Scan on t1 b + -> Hash + -> Seq Scan on t2 c +(10 rows) + +-- +-- Complex bushy tree pattern +-- +-- Create a query that naturally leads to bushy tree: multiple independent +-- join chains that need to be combined +-- +CREATE TEMP TABLE chain1a (id int, val int); +CREATE TEMP TABLE chain1b (id int, val int); +CREATE TEMP TABLE chain1c (id int, val int); +CREATE TEMP TABLE chain2a (id int, val int); +CREATE TEMP TABLE chain2b (id int, val int); +CREATE TEMP TABLE chain2c (id int, val int); +INSERT INTO chain1a SELECT i, i FROM generate_series(1,100) i; +INSERT INTO chain1b SELECT i, i FROM generate_series(1,100) i; +INSERT INTO chain1c SELECT i, i FROM generate_series(1,100) i; +INSERT INTO chain2a SELECT i, i FROM generate_series(1,100) i; +INSERT INTO chain2b SELECT i, i FROM generate_series(1,100) i; +INSERT INTO chain2c SELECT i, i FROM generate_series(1,100) i; +ANALYZE; +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM chain1a +JOIN chain1b ON chain1a.id = chain1b.id +JOIN chain1c ON chain1b.val = chain1c.id +JOIN chain2a ON chain1a.val = chain2a.id -- Cross-chain join +JOIN chain2b ON chain2a.val = chain2b.id +JOIN chain2c ON chain2b.val = chain2c.id; + QUERY PLAN +----------------------------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (chain1a.val = chain2a.id) + -> Hash Join + Hash Cond: (chain1b.val = chain1c.id) + -> Hash Join + Hash Cond: (chain1a.id = chain1b.id) + -> Seq Scan on chain1a + -> Hash + -> Seq Scan on chain1b + -> Hash + -> Seq Scan on chain1c + -> Hash + -> Hash Join + Hash Cond: (chain2b.val = chain2c.id) + -> Hash Join + Hash Cond: (chain2a.val = chain2b.id) + -> Seq Scan on chain2a + -> Hash + -> Seq Scan on chain2b + -> Hash + -> Seq Scan on chain2c +(22 rows) + +-- Cleanup +DEALLOCATE goo_plan; +DEALLOCATE standard_plan; +RESET geqo_threshold; +RESET enable_goo_join_search; diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index 3b37fafa65b..cb0c84cebff 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -153,6 +153,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_distinct_reordering | on enable_eager_aggregate | on enable_gathermerge | on + enable_goo_join_search | off enable_group_by_reordering | on enable_hashagg | on enable_hashjoin | on @@ -173,7 +174,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_seqscan | on enable_sort | on enable_tidscan | on -(25 rows) +(26 rows) -- There are always wait event descriptions for various types. InjectionPoint -- may be present or absent, depending on history since last postmaster start. diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index cc6d799bcea..14e3a475906 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -68,6 +68,12 @@ test: select_into select_distinct select_distinct_on select_implicit select_havi # ---------- test: brin gin gist spgist privileges init_privs security_label collate matview lock replica_identity rowsecurity object_address tablesample groupingsets drop_operator password identity generated_stored join_hash +# ---------- +# Additional JOIN ORDER tests +# WIP: need to find an appropriate group for this test +# ---------- +test: goo + # ---------- # Additional BRIN tests # ---------- @@ -98,9 +104,6 @@ test: maintain_every # no relation related tests can be put in this group test: publication subscription -# ---------- -# Another group of parallel tests -# select_views depends on create_view # ---------- test: select_views portals_p2 foreign_key cluster dependency guc bitmapops combocid tsearch tsdicts foreign_data window xmlmap functional_deps advisory_lock indirect_toast equivclass stats_rewrite diff --git a/src/test/regress/sql/goo.sql b/src/test/regress/sql/goo.sql new file mode 100644 index 00000000000..c4d9cae48a4 --- /dev/null +++ b/src/test/regress/sql/goo.sql @@ -0,0 +1,329 @@ +-- +-- GOO (Greedy Operator Ordering) Join Search Tests +-- +-- This test suite validates the GOO join ordering algorithm and verifies +-- correct behavior for various query patterns. +-- + +-- Create test tables with various sizes and join patterns +CREATE TEMP TABLE t1 (a int, b int); +CREATE TEMP TABLE t2 (a int, c int); +CREATE TEMP TABLE t3 (b int, d int); +CREATE TEMP TABLE t4 (c int, e int); +CREATE TEMP TABLE t5 (d int, f int); +CREATE TEMP TABLE t6 (e int, g int); +CREATE TEMP TABLE t7 (f int, h int); +CREATE TEMP TABLE t8 (g int, i int); +CREATE TEMP TABLE t9 (h int, j int); +CREATE TEMP TABLE t10 (i int, k int); +CREATE TEMP TABLE t11 (j int, l int); +CREATE TEMP TABLE t12 (k int, m int); +CREATE TEMP TABLE t13 (l int, n int); +CREATE TEMP TABLE t14 (m int, o int); +CREATE TEMP TABLE t15 (n int, p int); +CREATE TEMP TABLE t16 (o int, q int); +CREATE TEMP TABLE t17 (p int, r int); +CREATE TEMP TABLE t18 (q int, s int); + +-- Populate with small amount of data +INSERT INTO t1 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t2 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t3 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t4 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t5 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t6 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t7 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t8 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t9 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t10 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t11 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t12 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t13 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t14 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t15 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t16 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t17 SELECT i, i FROM generate_series(1,10) i; +INSERT INTO t18 SELECT i, i FROM generate_series(1,10) i; + +ANALYZE; + +-- +-- Basic 3-way join (sanity check) +-- +SET enable_goo_join_search = on; +SET geqo_threshold = 2; + +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM (VALUES (1),(2)) AS a(x) +JOIN (VALUES (1),(2)) AS b(x) USING (x) +JOIN (VALUES (1),(3)) AS c(x) USING (x); + +SELECT count(*) +FROM (VALUES (1),(2)) AS a(x) +JOIN (VALUES (1),(2)) AS b(x) USING (x) +JOIN (VALUES (1),(3)) AS c(x) USING (x); + +-- +-- Disconnected graph (Cartesian products required) +-- +-- This tests GOO's ability to handle queries where some relations +-- have no join clauses connecting them. GOO should allow Cartesian +-- products when no clause-connected joins are available. +-- +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM t1, t2, t5 +WHERE t1.a = 1 AND t2.c = 2 AND t5.f = 3; + +SELECT count(*) +FROM t1, t2, t5 +WHERE t1.a = 1 AND t2.c = 2 AND t5.f = 3; + +-- +-- Star schema (fact table with multiple dimension tables) +-- +-- Test GOO with a typical star schema join pattern. +-- +CREATE TEMP TABLE fact (id int, dim1_id int, dim2_id int, dim3_id int, dim4_id int, value int); +CREATE TEMP TABLE dim1 (id int, name text); +CREATE TEMP TABLE dim2 (id int, name text); +CREATE TEMP TABLE dim3 (id int, name text); +CREATE TEMP TABLE dim4 (id int, name text); + +INSERT INTO fact SELECT i, i, i, i, i, i FROM generate_series(1,100) i; +INSERT INTO dim1 SELECT i, 'dim1_'||i FROM generate_series(1,10) i; +INSERT INTO dim2 SELECT i, 'dim2_'||i FROM generate_series(1,10) i; +INSERT INTO dim3 SELECT i, 'dim3_'||i FROM generate_series(1,10) i; +INSERT INTO dim4 SELECT i, 'dim4_'||i FROM generate_series(1,10) i; + +ANALYZE; + +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM fact +JOIN dim1 ON fact.dim1_id = dim1.id +JOIN dim2 ON fact.dim2_id = dim2.id +JOIN dim3 ON fact.dim3_id = dim3.id +JOIN dim4 ON fact.dim4_id = dim4.id +WHERE dim1.id < 5; + +-- +-- Long join chain +-- +-- Tests GOO with a large join involving 15 relations. +-- +SET geqo_threshold = 8; + +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM t1 +JOIN t2 ON t1.a = t2.a +JOIN t3 ON t1.b = t3.b +JOIN t4 ON t2.c = t4.c +JOIN t5 ON t3.d = t5.d +JOIN t6 ON t4.e = t6.e +JOIN t7 ON t5.f = t7.f +JOIN t8 ON t6.g = t8.g +JOIN t9 ON t7.h = t9.h +JOIN t10 ON t8.i = t10.i +JOIN t11 ON t9.j = t11.j +JOIN t12 ON t10.k = t12.k +JOIN t13 ON t11.l = t13.l +JOIN t14 ON t12.m = t14.m +JOIN t15 ON t13.n = t15.n; + +-- Execute to verify correctness +SELECT count(*) +FROM t1 +JOIN t2 ON t1.a = t2.a +JOIN t3 ON t1.b = t3.b +JOIN t4 ON t2.c = t4.c +JOIN t5 ON t3.d = t5.d +JOIN t6 ON t4.e = t6.e +JOIN t7 ON t5.f = t7.f +JOIN t8 ON t6.g = t8.g +JOIN t9 ON t7.h = t9.h +JOIN t10 ON t8.i = t10.i +JOIN t11 ON t9.j = t11.j +JOIN t12 ON t10.k = t12.k +JOIN t13 ON t11.l = t13.l +JOIN t14 ON t12.m = t14.m +JOIN t15 ON t13.n = t15.n; + +-- +-- Bushy tree support +-- +-- Verify that GOO can produce bushy join trees, not just left-deep or right-deep. +-- With appropriate cost model, GOO should join (t1,t2) and (t3,t4) first, +-- then join those results (bushy tree). +-- +SET geqo_threshold = 4; + +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM t1, t2, t3, t4 +WHERE t1.a = t2.a + AND t3.b = t4.c + AND t1.a = t3.b; + +-- +-- Compare GOO vs standard join search +-- +-- Run the same query with GOO and standard join search to verify both +-- produce valid plans. Results should be identical even if plans differ. +-- +SET enable_goo_join_search = on; +PREPARE goo_plan AS +SELECT count(*) +FROM t1 JOIN t2 ON t1.a = t2.a +JOIN t3 ON t1.b = t3.b +JOIN t4 ON t2.c = t4.c +JOIN t5 ON t3.d = t5.d +JOIN t6 ON t4.e = t6.e; + +EXECUTE goo_plan; + +SET enable_goo_join_search = off; +SET geqo_threshold = default; +PREPARE standard_plan AS +SELECT count(*) +FROM t1 JOIN t2 ON t1.a = t2.a +JOIN t3 ON t1.b = t3.b +JOIN t4 ON t2.c = t4.c +JOIN t5 ON t3.d = t5.d +JOIN t6 ON t4.e = t6.e; + +EXECUTE standard_plan; + +-- Results should match +EXECUTE goo_plan; +EXECUTE standard_plan; + +-- +-- Large join (18 relations) +-- +-- Test GOO with a large number of relations. +-- +SET enable_goo_join_search = on; +SET geqo_threshold = 10; + +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM t1 +JOIN t2 ON t1.a = t2.a +JOIN t3 ON t1.b = t3.b +JOIN t4 ON t2.c = t4.c +JOIN t5 ON t3.d = t5.d +JOIN t6 ON t4.e = t6.e +JOIN t7 ON t5.f = t7.f +JOIN t8 ON t6.g = t8.g +JOIN t9 ON t7.h = t9.h +JOIN t10 ON t8.i = t10.i +JOIN t11 ON t9.j = t11.j +JOIN t12 ON t10.k = t12.k +JOIN t13 ON t11.l = t13.l +JOIN t14 ON t12.m = t14.m +JOIN t15 ON t13.n = t15.n +JOIN t16 ON t14.o = t16.o +JOIN t17 ON t15.p = t17.p +JOIN t18 ON t16.q = t18.q; + +-- +-- Mixed connected and disconnected components +-- +-- Query with two connected components that need a Cartesian product between them. +-- +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM t1 JOIN t2 ON t1.a = t2.a, + t5 JOIN t6 ON t5.f = t6.e +WHERE t1.a < 5 AND t5.d < 3; + +-- +-- Outer joins +-- +-- Verify GOO handles outer joins correctly (respects join order restrictions) +-- +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM t1 +LEFT JOIN t2 ON t1.a = t2.a +LEFT JOIN t3 ON t2.a = t3.b +LEFT JOIN t4 ON t3.d = t4.c; + +-- +-- Complete Cartesian products (disconnected graphs) +-- +-- Test GOO's ability to handle queries with no join clauses at all. +-- +SET enable_goo_join_search = on; +SET geqo_threshold = 2; + +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM t1, t2; + +SELECT count(*) +FROM t1, t2; + +-- +-- Join order restrictions with FULL OUTER JOIN +-- +-- FULL OUTER JOIN creates strong ordering constraints that GOO must respect +-- +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM t1 +FULL OUTER JOIN t2 ON t1.a = t2.a +FULL OUTER JOIN t3 ON t2.a = t3.b; + +-- +-- Self-join handling +-- +-- Test GOO with the same table appearing multiple times. GOO must correctly +-- handle self-joins that were not removed by Self-Join Elimination. +-- +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM t1 a +JOIN t1 b ON a.a = b.a +JOIN t2 c ON b.b = c.c; + +-- +-- Complex bushy tree pattern +-- +-- Create a query that naturally leads to bushy tree: multiple independent +-- join chains that need to be combined +-- +CREATE TEMP TABLE chain1a (id int, val int); +CREATE TEMP TABLE chain1b (id int, val int); +CREATE TEMP TABLE chain1c (id int, val int); +CREATE TEMP TABLE chain2a (id int, val int); +CREATE TEMP TABLE chain2b (id int, val int); +CREATE TEMP TABLE chain2c (id int, val int); + +INSERT INTO chain1a SELECT i, i FROM generate_series(1,100) i; +INSERT INTO chain1b SELECT i, i FROM generate_series(1,100) i; +INSERT INTO chain1c SELECT i, i FROM generate_series(1,100) i; +INSERT INTO chain2a SELECT i, i FROM generate_series(1,100) i; +INSERT INTO chain2b SELECT i, i FROM generate_series(1,100) i; +INSERT INTO chain2c SELECT i, i FROM generate_series(1,100) i; + +ANALYZE; + +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM chain1a +JOIN chain1b ON chain1a.id = chain1b.id +JOIN chain1c ON chain1b.val = chain1c.id +JOIN chain2a ON chain1a.val = chain2a.id -- Cross-chain join +JOIN chain2b ON chain2a.val = chain2b.id +JOIN chain2c ON chain2b.val = chain2c.id; + +-- Cleanup +DEALLOCATE goo_plan; +DEALLOCATE standard_plan; + +RESET geqo_threshold; +RESET enable_goo_join_search; -- 2.39.3 (Apple Git-146)