diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 1247433..ad13fc9 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -1103,6 +1103,20 @@ ExplainNode(PlanState *planstate, List *ancestors, { IndexOnlyScan *indexonlyscan = (IndexOnlyScan *) plan; + if (indexonlyscan->distinctPrefix > 0) + { + /* + * TODO:TM figure out how to show "for distinct (a, b)" + * instead of just the column count... + */ + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfo(es->str, " for distinct values of leading %d column(s)", + indexonlyscan->distinctPrefix); + else + ExplainPropertyInteger("Distinct Prefix", + indexonlyscan->distinctPrefix, + es); + } ExplainIndexScanDetails(indexonlyscan->indexid, indexonlyscan->indexorderdir, es); diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index 4f6f91c..fddd382 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -74,6 +74,20 @@ IndexOnlyNext(IndexOnlyScanState *node) slot = node->ss.ss_ScanTupleSlot; /* + * Check if we need to skip to the next key prefix, because we've been + * asked to implement DISTINCT. + */ + if (node->ioss_NumDistinctKeys > 0 && node->ioss_FirstTupleEmitted) + { + if (!index_skip(scandesc, direction, node->ioss_NumDistinctKeys)) + { + /* Reached end of index. */ + /* TODO:TM is this appropriate cleanup? */ + return ExecClearTuple(slot); + } + } + + /* * OK, now that we have what we need, fetch the next tuple. */ while ((tid = index_getnext_tid(scandesc, direction)) != NULL) @@ -189,6 +203,17 @@ IndexOnlyNext(IndexOnlyScanState *node) PredicateLockPage(scandesc->heapRelation, ItemPointerGetBlockNumber(tid), estate->es_snapshot); + /* + * TODO:TM Do distinct scans break SSI? We don't visit every page + * anymore! But maybe that's OK, because we only "read" the fact that + * there is *at least one* row that has this value, so a conflicting + * write would be one that removes ALL rows having the same value, and + * therefore would touch this page that happens to hold the arbitrary + * row we looked at, in the case of a disinct skip scan. Does this + * make any sense? + */ + + node->ioss_FirstTupleEmitted = true; return slot; } @@ -404,6 +429,8 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) indexstate->ss.ps.plan = (Plan *) node; indexstate->ss.ps.state = estate; indexstate->ioss_HeapFetches = 0; + indexstate->ioss_NumDistinctKeys = node->distinctPrefix; + indexstate->ioss_FirstTupleEmitted = false; /* * Miscellaneous initialization diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 71714bc..cc7ce64 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -457,6 +457,7 @@ _copyIndexOnlyScan(const IndexOnlyScan *from) COPY_NODE_FIELD(indexorderby); COPY_NODE_FIELD(indextlist); COPY_SCALAR_FIELD(indexorderdir); + COPY_SCALAR_FIELD(distinctPrefix); return newnode; } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 47158f6..614452d 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -170,7 +170,8 @@ static IndexOnlyScan *make_indexonlyscan(List *qptlist, List *qpqual, Index scanrelid, Oid indexid, List *indexqual, List *indexorderby, List *indextlist, - ScanDirection indexscandir); + ScanDirection indexscandir, + int skipprefix); static BitmapIndexScan *make_bitmap_indexscan(Index scanrelid, Oid indexid, List *indexqual, List *indexqualorig); @@ -2501,7 +2502,8 @@ create_indexscan_plan(PlannerInfo *root, fixed_indexquals, fixed_indexorderbys, best_path->indexinfo->indextlist, - best_path->indexscandir); + best_path->indexscandir, + best_path->indexskipprefix); else scan_plan = (Scan *) make_indexscan(tlist, qpqual, @@ -4721,7 +4723,8 @@ make_indexonlyscan(List *qptlist, List *indexqual, List *indexorderby, List *indextlist, - ScanDirection indexscandir) + ScanDirection indexscandir, + int skipprefix) { IndexOnlyScan *node = makeNode(IndexOnlyScan); Plan *plan = &node->scan.plan; @@ -4736,6 +4739,7 @@ make_indexonlyscan(List *qptlist, node->indexorderby = indexorderby; node->indextlist = indextlist; node->indexorderdir = indexscandir; + node->distinctPrefix = skipprefix; return node; } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index f657ffc..dfb222b 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -4157,6 +4157,16 @@ create_distinct_paths(PlannerInfo *root, path, list_length(root->distinct_pathkeys), numDistinctRows)); + + /* Also consider a skip scan, if possible. */ + if (IsA(path, IndexPath) && + path->pathtype == T_IndexOnlyScan && + ((IndexPath *) path)->indexinfo->amcanskip) + add_path(distinct_rel, (Path *) + create_skipscan_unique_path(root, distinct_rel, + path, + list_length(root->distinct_pathkeys), + numDistinctRows)); } } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index abb7507..db674c8 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -2470,6 +2470,49 @@ create_upper_unique_path(PlannerInfo *root, } /* + * create_skipscan_unique_path + * Creates a pathnode the same as an existing IndexPath except based on + * skipping duplicate values. This may or may not be cheaper than using + * create_upper_unique_path. + * + * The input path must be an IndexPath for an index that supports amskip. + */ +IndexPath * +create_skipscan_unique_path(PlannerInfo *root, + RelOptInfo *rel, + Path *subpath, + int numCols, + double numGroups) +{ + IndexPath *pathnode = makeNode(IndexPath); + + Assert(IsA(subpath, IndexPath)); + + /* + * TODO: copyObject doesn't work on paths. But we don't want to modify + * the source path. memcpy may not be very sane here; need to look into + * how exactly to clone an IndexPath without breaking any rules. + */ + memcpy(pathnode, subpath, sizeof(IndexPath)); + + /* The size of the prefix we'll use for skipping. */ + Assert(pathnode->indexinfo->amcanskip); + Assert(numCols > 0); + pathnode->indexskipprefix = numCols; + + /* + * The cost to skip to each distinct value should be roughly the same as + * the cost of finding the first key times the number of distinct values + * we expect to find. + */ + pathnode->path.startup_cost = subpath->startup_cost; + pathnode->path.total_cost = subpath->startup_cost * numGroups; + pathnode->path.rows = numGroups; + + return pathnode; +} + +/* * create_agg_path * Creates a pathnode that represents performing aggregation/grouping * diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 4fa3661..bb07e9a 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1369,6 +1369,7 @@ typedef struct IndexScanState * ScanDesc index scan descriptor * VMBuffer buffer in use for visibility map testing, if any * HeapFetches number of tuples we were forced to fetch from heap + * NumDistinctKeys number of keys for skip-based DISTINCT * ---------------- */ typedef struct IndexOnlyScanState @@ -1387,6 +1388,8 @@ typedef struct IndexOnlyScanState IndexScanDesc ioss_ScanDesc; Buffer ioss_VMBuffer; long ioss_HeapFetches; + int ioss_NumDistinctKeys; + bool ioss_FirstTupleEmitted; } IndexOnlyScanState; /* ---------------- diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index e2fbc7d..31e465d 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -377,6 +377,7 @@ typedef struct IndexOnlyScan List *indexorderby; /* list of index ORDER BY exprs */ List *indextlist; /* TargetEntry list describing index's cols */ ScanDirection indexorderdir; /* forward or backward or don't care */ + int distinctPrefix; /* the size of the prefix for distinct scans */ } IndexOnlyScan; /* ---------------- diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 0e1c8d0..774914f 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -955,6 +955,9 @@ typedef struct Path * we need not recompute them when considering using the same index in a * bitmap index/heap scan (see BitmapHeapPath). The costs of the IndexPath * itself represent the costs of an IndexScan or IndexOnlyScan plan type. + * + * 'indexskipprefix' represents the number of columns to consider for skip + * scans. *---------- */ typedef struct IndexPath @@ -969,6 +972,7 @@ typedef struct IndexPath ScanDirection indexscandir; Cost indextotalcost; Selectivity indexselectivity; + int indexskipprefix; } IndexPath; /* diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 71d9154..42b1002 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -161,6 +161,11 @@ extern UpperUniquePath *create_upper_unique_path(PlannerInfo *root, Path *subpath, int numCols, double numGroups); +extern IndexPath *create_skipscan_unique_path(PlannerInfo *root, + RelOptInfo *rel, + Path *subpath, + int numCols, + double numGroups); extern AggPath *create_agg_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,