From a17dd9910717681cda9c6313452055ca26d802c7 Mon Sep 17 00:00:00 2001 From: Lukas Fittl Date: Fri, 13 Mar 2026 00:12:46 -0700 Subject: [PATCH 2/2] pg_plan_advice: Allow targeting scans by partition parent When pg_plan_advice generates advice for a given plan, it will currently always spell out each individual partitioned table. This limitation is complex to address, but we can at least permit users to specify advice to the parent table if they want. This adjusts SEQ_SCAN and other per-scan advice to affect all children of the partition instead of the append rel itself. For INDEX_SCAN, etc. this allows specifying the name of indexes declared on the partitioned parent table, but doesn't permit indexes that only exist on the child tables. --- .../expected/partition_scan.out | 89 ++++++++++ contrib/pg_plan_advice/meson.build | 1 + contrib/pg_plan_advice/pgpa_planner.c | 105 +++++++++-- contrib/pg_plan_advice/pgpa_walker.c | 164 +++++++++++++++++- contrib/pg_plan_advice/sql/partition_scan.sql | 40 +++++ 5 files changed, 379 insertions(+), 20 deletions(-) create mode 100644 contrib/pg_plan_advice/expected/partition_scan.out create mode 100644 contrib/pg_plan_advice/sql/partition_scan.sql diff --git a/contrib/pg_plan_advice/expected/partition_scan.out b/contrib/pg_plan_advice/expected/partition_scan.out new file mode 100644 index 00000000000..c47e02c60b7 --- /dev/null +++ b/contrib/pg_plan_advice/expected/partition_scan.out @@ -0,0 +1,89 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 0; +SET seq_page_cost = 0.1; +SET random_page_cost = 0.1; +SET cpu_tuple_cost = 0; +SET cpu_index_tuple_cost = 0; +CREATE TABLE pt (a int primary key, b text) + PARTITION BY RANGE (a); +CREATE TABLE pt_1 PARTITION OF pt FOR VALUES FROM (1) TO (50001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt_2 PARTITION OF pt FOR VALUES FROM (50001) TO (100001) + WITH (autovacuum_enabled = false); +INSERT INTO pt SELECT g, 'some text ' || g FROM generate_series(1, 100000) g; +VACUUM ANALYZE pt; +-- By default this does a sequential scan on each partition. +EXPLAIN (COSTS OFF) SELECT * FROM pt; + QUERY PLAN +------------------------ + Append + -> Seq Scan on pt_1 + -> Seq Scan on pt_2 +(3 rows) + +-- By default a lookup by primary key uses an index scan. +EXPLAIN (COSTS OFF) SELECT * FROM pt WHERE a = 1; + QUERY PLAN +--------------------------------------- + Index Scan using pt_1_pkey on pt_1 pt + Index Cond: (a = 1) +(2 rows) + +-- SEQ_SCAN on the parent should force seq scan on all children, even when +-- an index scan would normally be used. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(pt)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM pt WHERE a = 1; + QUERY PLAN +------------------------------ + Seq Scan on pt_1 pt + Filter: (a = 1) + Supplied Plan Advice: + SEQ_SCAN(pt) /* matched */ + Generated Plan Advice: + SEQ_SCAN(pt/public.pt_1) + PARTITIONWISE(pt) + NO_GATHER(pt/public.pt_1) +(8 rows) + +COMMIT; +-- INDEX_SCAN on the parent should force index scan on all children. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(pt pt_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM pt WHERE a > 0; + QUERY PLAN +------------------------------------------------------------------------------- + Append + -> Index Scan using pt_1_pkey on pt_1 + Index Cond: (a > 0) + -> Index Scan using pt_2_pkey on pt_2 + Index Cond: (a > 0) + Supplied Plan Advice: + INDEX_SCAN(pt pt_pkey) /* matched */ + Generated Plan Advice: + INDEX_SCAN(pt/public.pt_1 public.pt_1_pkey pt/public.pt_2 public.pt_2_pkey) + PARTITIONWISE(pt) + NO_GATHER(pt/public.pt_1 pt/public.pt_2) +(11 rows) + +COMMIT; +-- TID_SCAN on the parent should force TID scan on all children. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'TID_SCAN(pt)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM pt WHERE ctid = '(0,1)'; + QUERY PLAN +-------------------------------------------- + Append + -> Tid Scan on pt_1 + TID Cond: (ctid = '(0,1)'::tid) + -> Tid Scan on pt_2 + TID Cond: (ctid = '(0,1)'::tid) + Supplied Plan Advice: + TID_SCAN(pt) /* matched */ + Generated Plan Advice: + PARTITIONWISE(pt) + TID_SCAN(pt/public.pt_1 pt/public.pt_2) + NO_GATHER(pt/public.pt_1 pt/public.pt_2) +(11 rows) + +COMMIT; diff --git a/contrib/pg_plan_advice/meson.build b/contrib/pg_plan_advice/meson.build index cf948ffaa13..7b753c0acdb 100644 --- a/contrib/pg_plan_advice/meson.build +++ b/contrib/pg_plan_advice/meson.build @@ -56,6 +56,7 @@ tests += { 'gather', 'join_order', 'join_strategy', + 'partition_scan', 'partitionwise', 'prepared', 'scan', diff --git a/contrib/pg_plan_advice/pgpa_planner.c b/contrib/pg_plan_advice/pgpa_planner.c index 5508b8af707..f64126d6f36 100644 --- a/contrib/pg_plan_advice/pgpa_planner.c +++ b/contrib/pg_plan_advice/pgpa_planner.c @@ -32,6 +32,8 @@ #include "optimizer/plancat.h" #include "optimizer/planner.h" #include "parser/parsetree.h" +#include "catalog/partition.h" +#include "catalog/pg_class.h" #include "utils/lsyscache.h" #ifdef USE_ASSERT_CHECKING @@ -471,16 +473,48 @@ pgpa_build_simple_rel(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) if (pps != NULL && pps->trove != NULL) { pgpa_identifier rid; - pgpa_trove_result tresult_scan; + pgpa_trove_result tresult_scan = {0}; pgpa_trove_result tresult_rel; /* Search for scan advice and general rel advice. */ pgpa_compute_identifier_by_rti(root, rel->relid, &rid); - pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_SCAN, 1, &rid, - &tresult_scan); + + /* + * Skip scan advice lookup for partitioned table parents. Scan + * methods are only meaningful for the leaf partitions, not the parent + * rel whose plan is an Append over child scans. Applying scan advice + * to the parent would incorrectly disable PGS_APPEND. Parent-level + * scan advice cascades to children below instead. + */ + if (rte->relkind != RELKIND_PARTITIONED_TABLE) + pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_SCAN, 1, &rid, + &tresult_scan); pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_REL, 1, &rid, &tresult_rel); + /* + * If this is a child partition, also look up scan advice targeting + * the parent table. Scan advice like SEQ_SCAN(parent) should cascade + * to all child partitions, not just the parent rel itself. We only do + * this for scan advice, not rel advice, because rel advice like + * PARTITIONWISE applies to the parent conceptually and should not be + * re-applied to individual children. + */ + if (rid.partrel != NULL) + { + pgpa_identifier parent_rid; + pgpa_trove_result tresult_parent_scan; + + parent_rid = rid; + parent_rid.partnsp = NULL; + parent_rid.partrel = NULL; + pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_SCAN, 1, + &parent_rid, &tresult_parent_scan); + tresult_scan.indexes = bms_union(tresult_scan.indexes, + tresult_parent_scan.indexes); + tresult_scan.entries = tresult_parent_scan.entries; + } + /* If relevant entries were found, apply them. */ if (tresult_scan.indexes != NULL || tresult_rel.indexes != NULL) { @@ -1670,6 +1704,54 @@ pgpa_semijoin_permits_join(int outer_count, int inner_count, /* * Apply scan advice to a RelOptInfo. */ +/* + * Find an index in the relation's index list matching the given target. + * + * First tries a direct name match. If that fails and the index is a child + * partition index, also checks whether the target names a parent index from + * which this child index inherits. This allows advice like + * INDEX_SCAN(parent parent_idx) to cascade to child partitions whose indexes + * have different names. + */ +static IndexOptInfo * +pgpa_find_matching_index(RelOptInfo *rel, pgpa_index_target *itarget) +{ + foreach_node(IndexOptInfo, index, rel->indexlist) + { + char *relname = get_rel_name(index->indexoid); + Oid nspoid = get_rel_namespace(index->indexoid); + char *relnamespace = get_namespace_name_or_temp(nspoid); + + /* Direct name match. */ + if (strcmp(itarget->indname, relname) == 0 && + (itarget->indnamespace == NULL || + strcmp(itarget->indnamespace, relnamespace) == 0)) + return index; + + /* Check whether the target names a parent index. */ + if (get_rel_relispartition(index->indexoid)) + { + Oid parent_idx; + + parent_idx = get_partition_parent(index->indexoid, true); + if (OidIsValid(parent_idx)) + { + char *parent_name = get_rel_name(parent_idx); + Oid parent_nspoid = get_rel_namespace(parent_idx); + char *parent_nsp = + get_namespace_name_or_temp(parent_nspoid); + + if (strcmp(itarget->indname, parent_name) == 0 && + (itarget->indnamespace == NULL || + strcmp(itarget->indnamespace, parent_nsp) == 0)) + return index; + } + } + } + + return NULL; +} + static void pgpa_planner_apply_scan_advice(RelOptInfo *rel, pgpa_trove_entry *scan_entries, @@ -1826,22 +1908,7 @@ pgpa_planner_apply_scan_advice(RelOptInfo *rel, scan_entry->tag == PGPA_TAG_INDEX_ONLY_SCAN)) { pgpa_index_target *itarget = scan_entry->target->itarget; - IndexOptInfo *matched_index = NULL; - - foreach_node(IndexOptInfo, index, rel->indexlist) - { - char *relname = get_rel_name(index->indexoid); - Oid nspoid = get_rel_namespace(index->indexoid); - char *relnamespace = get_namespace_name_or_temp(nspoid); - - if (strcmp(itarget->indname, relname) == 0 && - (itarget->indnamespace == NULL || - strcmp(itarget->indnamespace, relnamespace) == 0)) - { - matched_index = index; - break; - } - } + IndexOptInfo *matched_index = pgpa_find_matching_index(rel, itarget); if (matched_index == NULL) { diff --git a/contrib/pg_plan_advice/pgpa_walker.c b/contrib/pg_plan_advice/pgpa_walker.c index c1203f123a5..4e53c20cf03 100644 --- a/contrib/pg_plan_advice/pgpa_walker.c +++ b/contrib/pg_plan_advice/pgpa_walker.c @@ -15,6 +15,8 @@ #include "pgpa_scan.h" #include "pgpa_walker.h" +#include "catalog/partition.h" +#include "nodes/pathnodes.h" #include "nodes/plannodes.h" #include "parser/parsetree.h" #include "utils/lsyscache.h" @@ -45,6 +47,14 @@ static bool pgpa_walker_join_order_matches_member(pgpa_join_member *member, Index rtable_length, pgpa_identifier *rt_identifiers, pgpa_advice_target *target); +static pgpa_scan_strategy pgpa_scan_tag_to_strategy(pgpa_advice_tag_type tag); +static Bitmapset *pgpa_walker_find_child_rtis(Index parent_rti, + Index rtable_length, + pgpa_identifier *rt_identifiers); +static bool pgpa_walker_check_children_scan(pgpa_plan_walker_context *walker, + pgpa_scan_strategy strategy, + Bitmapset *child_rtis, + pgpa_index_target *itarget); static pgpa_scan *pgpa_walker_find_scan(pgpa_plan_walker_context *walker, pgpa_scan_strategy strategy, Bitmapset *relids); @@ -698,6 +708,30 @@ pgpa_walker_would_advise(pgpa_plan_walker_context *walker, if (rti == 0) return false; relids = bms_make_singleton(rti); + + /* + * For scan advice targeting a parent table (no partition specified), + * check whether child partitions exist. If so, validate the advice + * against the children, since scans are recorded against child + * partitions rather than the parent. + */ + if (target->rid.partrel == NULL) + { + Bitmapset *child_rtis; + + child_rtis = pgpa_walker_find_child_rtis(rti, rtable_length, + rt_identifiers); + if (child_rtis != NULL) + { + pgpa_scan_strategy strategy; + + strategy = pgpa_scan_tag_to_strategy(tag); + if (strategy != PGPA_SCAN_ORDINARY) + return pgpa_walker_check_children_scan(walker, strategy, + child_rtis, + target->itarget); + } + } } else { @@ -843,7 +877,35 @@ pgpa_walker_index_target_matches_plan(pgpa_index_target *itarget, Plan *plan) } /* Check whether relation name matches. */ - return (strcmp(itarget->indname, get_rel_name(indexoid)) == 0); + if (strcmp(itarget->indname, get_rel_name(indexoid)) == 0) + return true; + + /* + * For child partition indexes, also check whether the target names the + * parent index from which this child index inherits. + */ + if (get_rel_relispartition(indexoid)) + { + Oid parent_idx = get_partition_parent(indexoid, true); + + if (OidIsValid(parent_idx)) + { + if (itarget->indnamespace != NULL) + { + Oid parent_nspoid = get_rel_namespace(parent_idx); + char *parent_nsp = + get_namespace_name_or_temp(parent_nspoid); + + if (strcmp(itarget->indnamespace, parent_nsp) != 0) + return false; + } + + if (strcmp(itarget->indname, get_rel_name(parent_idx)) == 0) + return true; + } + } + + return false; } /* @@ -978,6 +1040,106 @@ pgpa_walker_find_scan(pgpa_plan_walker_context *walker, return NULL; } +/* + * Map a scan-related advice tag to a scan strategy. + * + * Returns PGPA_SCAN_ORDINARY if the tag is not a simple scan tag (e.g., + * DISABLE_INDEX, INDEX_SCAN with index target, PARTITIONWISE, or non-scan + * tags), in which case the caller should fall through to the normal logic. + */ +static pgpa_scan_strategy +pgpa_scan_tag_to_strategy(pgpa_advice_tag_type tag) +{ + switch (tag) + { + case PGPA_TAG_BITMAP_HEAP_SCAN: + return PGPA_SCAN_BITMAP_HEAP; + case PGPA_TAG_INDEX_SCAN: + return PGPA_SCAN_INDEX; + case PGPA_TAG_INDEX_ONLY_SCAN: + return PGPA_SCAN_INDEX_ONLY; + case PGPA_TAG_SEQ_SCAN: + return PGPA_SCAN_SEQ; + case PGPA_TAG_TID_SCAN: + return PGPA_SCAN_TID; + default: + return PGPA_SCAN_ORDINARY; + } +} + +/* + * Find child partition RTIs for a given parent RTI. + * + * If the given RTI corresponds to a parent table identifier (partrel == NULL), + * returns a Bitmapset of all child RTIs that share the same alias_name, + * occurrence, and plan_name. Returns NULL if no children exist. + */ +static Bitmapset * +pgpa_walker_find_child_rtis(Index parent_rti, Index rtable_length, + pgpa_identifier *rt_identifiers) +{ + pgpa_identifier *parent_rid = &rt_identifiers[parent_rti - 1]; + Bitmapset *child_rtis = NULL; + + /* Only applicable when the parent has no partition name. */ + if (parent_rid->alias_name == NULL || parent_rid->partrel != NULL) + return NULL; + + for (Index rti = 1; rti <= rtable_length; ++rti) + { + pgpa_identifier *rid = &rt_identifiers[rti - 1]; + + if (rid->alias_name == NULL || rid->partrel == NULL) + continue; + if (strcmp(rid->alias_name, parent_rid->alias_name) != 0) + continue; + if (rid->occurrence != parent_rid->occurrence) + continue; + if (!strings_equal_or_both_null(rid->plan_name, parent_rid->plan_name)) + continue; + + child_rtis = bms_add_member(child_rtis, rti); + } + + return child_rtis; +} + +/* + * Check scan advice against child partitions of a parent table. + * + * Returns true if every scanned child partition uses the specified scan + * strategy. + * + * Children that were pruned away (no scan recorded) are ignored. + */ +static bool +pgpa_walker_check_children_scan(pgpa_plan_walker_context *walker, + pgpa_scan_strategy strategy, + Bitmapset *child_rtis, + pgpa_index_target *itarget) +{ + bool found_any = false; + int rti = -1; + + while ((rti = bms_next_member(child_rtis, rti)) >= 0) + { + Bitmapset *child_relids = bms_make_singleton(rti); + pgpa_scan *scan = pgpa_walker_find_scan(walker, strategy, + child_relids); + + if (scan != NULL) + { + /* For INDEX_SCAN/INDEX_ONLY_SCAN, also verify the index. */ + if (itarget != NULL && + !pgpa_walker_index_target_matches_plan(itarget, scan->plan)) + continue; + found_any = true; + } + } + + return found_any; +} + /* * Does this walker say that the given query feature applies to the given * relid set? diff --git a/contrib/pg_plan_advice/sql/partition_scan.sql b/contrib/pg_plan_advice/sql/partition_scan.sql new file mode 100644 index 00000000000..ad462b61695 --- /dev/null +++ b/contrib/pg_plan_advice/sql/partition_scan.sql @@ -0,0 +1,40 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 0; +SET seq_page_cost = 0.1; +SET random_page_cost = 0.1; +SET cpu_tuple_cost = 0; +SET cpu_index_tuple_cost = 0; + +CREATE TABLE pt (a int primary key, b text) + PARTITION BY RANGE (a); +CREATE TABLE pt_1 PARTITION OF pt FOR VALUES FROM (1) TO (50001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt_2 PARTITION OF pt FOR VALUES FROM (50001) TO (100001) + WITH (autovacuum_enabled = false); +INSERT INTO pt SELECT g, 'some text ' || g FROM generate_series(1, 100000) g; +VACUUM ANALYZE pt; + +-- By default this does a sequential scan on each partition. +EXPLAIN (COSTS OFF) SELECT * FROM pt; + +-- By default a lookup by primary key uses an index scan. +EXPLAIN (COSTS OFF) SELECT * FROM pt WHERE a = 1; + +-- SEQ_SCAN on the parent should force seq scan on all children, even when +-- an index scan would normally be used. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(pt)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM pt WHERE a = 1; +COMMIT; + +-- INDEX_SCAN on the parent should force index scan on all children. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(pt pt_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM pt WHERE a > 0; +COMMIT; + +-- TID_SCAN on the parent should force TID scan on all children. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'TID_SCAN(pt)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM pt WHERE ctid = '(0,1)'; +COMMIT; -- 2.47.1