From 794429c6636bc663b15673f25b57d2db862cf2d2 Mon Sep 17 00:00:00 2001 From: "Anton A. Melnikov" Date: Mon, 1 Jan 2024 05:02:10 +0300 Subject: [PATCH 5/5] Add knn support to btree indexes This commit implements support for knn scans in btree indexes. When knn search is requested, btree index is traversed ascending and descending simultaneously. At each step the closest tuple is returned. Filtering operators can reduce knn to regular ordered scan. Ordering operators are added to opfamilies of scalar datatypes. No extra supporting functions are required: knn-btree algorithm works using comparison function and ordering operator itself. Distance operators are not leakproof, because they throw error on overflow. Therefore we relax opr_sanity check for btree ordering operators. It's OK for them to be leaky while comparison function is leakproof. Catversion is bumped. Discussion: https://postgr.es/m/ce35e97b-cf34-3f5d-6b99-2c25bae49999%40postgrespro.ru Author: Nikita Glukhov Reviewed-by: Robert Haas, Tom Lane, Anastasia Lubennikova, Alexander Korotkov --- doc/src/sgml/btree.sgml | 47 + doc/src/sgml/indices.sgml | 11 + doc/src/sgml/xindex.sgml | 7 +- src/backend/access/brin/brin_minmax.c | 6 +- src/backend/access/nbtree/README | 22 + src/backend/access/nbtree/nbtree.c | 214 ++++- src/backend/access/nbtree/nbtsearch.c | 361 +++++++- src/backend/access/nbtree/nbtutils.c | 475 +++++++++- src/backend/access/nbtree/nbtvalidate.c | 45 +- src/backend/partitioning/partprune.c | 4 +- src/include/access/nbtree.h | 32 +- src/include/access/stratnum.h | 7 +- src/include/catalog/pg_amop.dat | 104 +++ src/test/regress/expected/alter_generic.out | 13 +- src/test/regress/expected/amutils.out | 6 +- src/test/regress/expected/btree_index.out | 954 ++++++++++++++++++++ src/test/regress/expected/opr_sanity.out | 10 +- src/test/regress/expected/psql.out | 52 +- src/test/regress/sql/alter_generic.sql | 8 +- src/test/regress/sql/btree_index.sql | 312 +++++++ src/test/regress/sql/opr_sanity.sql | 7 +- 21 files changed, 2526 insertions(+), 171 deletions(-) diff --git a/doc/src/sgml/btree.sgml b/doc/src/sgml/btree.sgml index 2b3997988cf..642e26d764b 100644 --- a/doc/src/sgml/btree.sgml +++ b/doc/src/sgml/btree.sgml @@ -200,6 +200,53 @@ planner relies on them for optimization purposes. + + In order to implement the distance ordered (nearest-neighbor) search, + one needs to define a distance operator (usually it's called + <->) with a correpsonding operator family for + distance comparison in the operator class. These operators must + satisfy the following assumptions for all non-null values + A, B, + C of the data type: + + + + + A <-> + B = + B <-> + A + (symmetric law) + + + + + if A = + B, then A + <-> C + = B + <-> C + (distance equivalence) + + + + + if (A <= + B and B + <= C) or + (A >= + B and B + >= C), + then A <-> + B <= + A <-> + C + (monotonicity) + + + + + diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml index 6d731e0701f..b841e1f0a54 100644 --- a/doc/src/sgml/indices.sgml +++ b/doc/src/sgml/indices.sgml @@ -1193,6 +1193,17 @@ SELECT x FROM tab WHERE x = 'key' AND z < 42; make this type of scan very useful in practice. + + B-tree indexes are also capable of optimizing nearest-neighbor + searches, such as + date '2017-05-05' LIMIT 10; +]]> + + which finds the ten events closest to a given target date. The ability + to do this is again dependent on the particular operator class being used. + + INCLUDE diff --git a/doc/src/sgml/xindex.sgml b/doc/src/sgml/xindex.sgml index 22d8ad1aac4..4636dce2a9a 100644 --- a/doc/src/sgml/xindex.sgml +++ b/doc/src/sgml/xindex.sgml @@ -131,6 +131,10 @@ greater than 5 + + distance + 6 + @@ -1320,7 +1324,8 @@ SELECT sum(x) OVER (ORDER BY x RANGE BETWEEN 5 PRECEDING AND 10 FOLLOWING) Ordering Operators - Some index access methods (currently, only GiST and SP-GiST) support the concept of + Some index access methods (currently, B-tree, GiST and SP-GiST) + support the concept of ordering operators. What we have been discussing so far are search operators. A search operator is one for which the index can be searched to find all rows satisfying diff --git a/src/backend/access/brin/brin_minmax.c b/src/backend/access/brin/brin_minmax.c index caf6991eb1b..2617545b8c8 100644 --- a/src/backend/access/brin/brin_minmax.c +++ b/src/backend/access/brin/brin_minmax.c @@ -23,7 +23,7 @@ typedef struct MinmaxOpaque { Oid cached_subtype; - FmgrInfo strategy_procinfos[BTMaxStrategyNumber]; + FmgrInfo strategy_procinfos[BTMaxSearchStrategyNumber]; } MinmaxOpaque; static FmgrInfo *minmax_get_strategy_procinfo(BrinDesc *bdesc, uint16 attno, @@ -264,7 +264,7 @@ minmax_get_strategy_procinfo(BrinDesc *bdesc, uint16 attno, Oid subtype, MinmaxOpaque *opaque; Assert(strategynum >= 1 && - strategynum <= BTMaxStrategyNumber); + strategynum <= BTMaxSearchStrategyNumber); opaque = (MinmaxOpaque *) bdesc->bd_info[attno - 1]->oi_opaque; @@ -277,7 +277,7 @@ minmax_get_strategy_procinfo(BrinDesc *bdesc, uint16 attno, Oid subtype, { uint16 i; - for (i = 1; i <= BTMaxStrategyNumber; i++) + for (i = 1; i <= BTMaxSearchStrategyNumber; i++) opaque->strategy_procinfos[i - 1].fn_oid = InvalidOid; opaque->cached_subtype = subtype; } diff --git a/src/backend/access/nbtree/README b/src/backend/access/nbtree/README index 52e646c7f75..0db79b64d61 100644 --- a/src/backend/access/nbtree/README +++ b/src/backend/access/nbtree/README @@ -1081,3 +1081,25 @@ item is irrelevant, and need not be stored at all. This arrangement corresponds to the fact that an L&Y non-leaf page has one more pointer than key. Suffix truncation's negative infinity attributes behave in the same way. + +Nearest-neighbor search +----------------------- + +B-tree supports a special scan strategy for nearest-neighbor (kNN) search, +which is used for queries with "ORDER BY indexed_column operator constant" +clause. See the following example. + + SELECT * FROM tab WHERE col > const1 ORDER BY col <-> const2 LIMIT k + +Unlike GiST and SP-GiST, B-tree supports kNN by the only one ordering operator +applied to the first indexed column. + +At the beginning of kNN scan, we determine the scan strategy to use: normal +unidirectional or special bidirectional. If the second distance operand falls +into the scan range, then we use bidirectional scan, otherwise we use normal +unidirectional scan. + +The bidirectional scan algorithm is quite simple. We start both forward and +backward scans starting from the tree location corresponding to the second +distance operand. Each time we need the next tuple, we return the nearest +tuple from two directions and advance scan in corresponding direction. diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index fc3954cc157..7bf6b151631 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -32,6 +32,8 @@ #include "storage/ipc.h" #include "storage/lmgr.h" #include "storage/smgr.h" +#include "utils/builtins.h" +#include "utils/datum.h" #include "utils/fmgrprotos.h" #include "utils/index_selfuncs.h" #include "utils/memutils.h" @@ -66,7 +68,8 @@ typedef enum */ typedef struct BTParallelScanDescData { - BlockNumber btps_scanPage; /* latest or next page to be scanned */ + BlockNumber btps_forwardScanPage; /* latest or next page to be scanned */ + BlockNumber btps_backwardScanPage; /* secondary kNN page to be scanned */ BTPS_State btps_pageStatus; /* indicates whether next page is * available for scan. see above for * possible states of parallel scan. */ @@ -106,8 +109,8 @@ bthandler(PG_FUNCTION_ARGS) amroutine->amsupport = BTNProcs; amroutine->amoptsprocnum = BTOPTIONS_PROC; amroutine->amcanorder = true; - amroutine->amcanorderbyop = false; - amroutine->amorderbyopfirstcol = false; + amroutine->amcanorderbyop = true; + amroutine->amorderbyopfirstcol = true; amroutine->amcanbackward = true; amroutine->amcanunique = true; amroutine->amcanmulticol = true; @@ -208,10 +211,18 @@ btgettuple(IndexScanDesc scan, ScanDirection dir) { BTScanOpaque so = (BTScanOpaque) scan->opaque; BTScanState state = &so->state; + ScanDirection arraydir = dir; bool res; + if (scan->numberOfOrderBys > 0 && !ScanDirectionIsForward(dir)) + elog(ERROR, "btree does not support backward order-by-distance scanning"); + /* btree indexes are never lossy */ scan->xs_recheck = false; + scan->xs_recheckorderby = false; + + if (so->scanDirection != NoMovementScanDirection) + dir = so->scanDirection; /* Each loop iteration performs another primitive index scan */ do @@ -221,7 +232,8 @@ btgettuple(IndexScanDesc scan, ScanDirection dir) * the appropriate direction. If we haven't done so yet, we call * _bt_first() to get the first item in the scan. */ - if (!BTScanPosIsValid(state->currPos)) + if (!BTScanPosIsValid(state->currPos) && + (!so->backwardState || !BTScanPosIsValid(so->backwardState->currPos))) res = _bt_first(scan, dir); else { @@ -256,7 +268,7 @@ btgettuple(IndexScanDesc scan, ScanDirection dir) if (res) break; /* ... otherwise see if we need another primitive index scan */ - } while (so->numArrayKeys && _bt_start_prim_scan(scan, dir)); + } while (so->numArrayKeys && _bt_start_prim_scan(scan, arraydir)); return res; } @@ -317,9 +329,6 @@ btbeginscan(Relation rel, int nkeys, int norderbys) IndexScanDesc scan; BTScanOpaque so; - /* no order by operators allowed */ - Assert(norderbys == 0); - /* get the scan */ scan = RelationGetIndexScan(rel, nkeys, norderbys); @@ -347,6 +356,9 @@ btbeginscan(Relation rel, int nkeys, int norderbys) * scan->xs_itupdesc whether we'll need it or not, since that's so cheap. */ so->state.currTuples = so->state.markTuples = NULL; + so->backwardState = NULL; + so->distanceTypeByVal = true; + so->scanDirection = NoMovementScanDirection; scan->xs_itupdesc = RelationGetDescr(rel); @@ -376,6 +388,8 @@ _bt_release_current_position(BTScanState state, Relation indexRelation, static void _bt_release_scan_state(IndexScanDesc scan, BTScanState state, bool free) { + BTScanOpaque so = (BTScanOpaque) scan->opaque; + /* No need to invalidate positions, if the RAM is about to be freed. */ _bt_release_current_position(state, scan->indexRelation, !free); @@ -392,6 +406,18 @@ _bt_release_scan_state(IndexScanDesc scan, BTScanState state, bool free) } else BTScanPosInvalidate(state->markPos); + + if (!so->distanceTypeByVal) + { + if (DatumGetPointer(state->currDistance)) + pfree(DatumGetPointer(state->currDistance)); + + if (DatumGetPointer(state->markDistance)) + pfree(DatumGetPointer(state->markDistance)); + } + + state->currDistance = (Datum) 0; + state->markDistance = (Datum) 0; } /* @@ -406,6 +432,13 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, _bt_release_scan_state(scan, state, false); + if (so->backwardState) + { + _bt_release_scan_state(scan, so->backwardState, true); + pfree(so->backwardState); + so->backwardState = NULL; + } + so->needPrimScan = false; so->scanBehind = false; @@ -437,6 +470,14 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, scan->numberOfKeys * sizeof(ScanKeyData)); so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */ so->numArrayKeys = 0; /* ditto */ + + if (orderbys && scan->numberOfOrderBys > 0) + memmove(scan->orderByData, + orderbys, + scan->numberOfOrderBys * sizeof(ScanKeyData)); + + so->scanDirection = NoMovementScanDirection; + so->distanceTypeByVal = true; } /* @@ -449,6 +490,12 @@ btendscan(IndexScanDesc scan) _bt_release_scan_state(scan, &so->state, true); + if (so->backwardState) + { + _bt_release_scan_state(scan, so->backwardState, true); + pfree(so->backwardState); + } + /* Release storage */ if (so->keyData != NULL) pfree(so->keyData); @@ -460,7 +507,7 @@ btendscan(IndexScanDesc scan) } static void -_bt_mark_current_position(BTScanState state) +_bt_mark_current_position(BTScanOpaque so, BTScanState state) { /* There may be an old mark with a pin (but no lock). */ BTScanPosUnpinIfPinned(state->markPos); @@ -478,6 +525,25 @@ _bt_mark_current_position(BTScanState state) BTScanPosInvalidate(state->markPos); state->markItemIndex = -1; } + + if (so->backwardState) + { + if (!so->distanceTypeByVal && DatumGetPointer(state->markDistance)) + pfree(DatumGetPointer(state->markDistance)); + + if (!BTScanPosIsValid(state->currPos) || state->currIsNull) + { + state->markIsNull = true; + state->markDistance = (Datum) 0; + } + else + { + state->markIsNull = false; + state->markDistance = datumCopy(state->currDistance, + so->distanceTypeByVal, + so->distanceTypeLen); + } + } } /* @@ -488,7 +554,13 @@ btmarkpos(IndexScanDesc scan) { BTScanOpaque so = (BTScanOpaque) scan->opaque; - _bt_mark_current_position(&so->state); + _bt_mark_current_position(so, &so->state); + + if (so->backwardState) + { + _bt_mark_current_position(so, so->backwardState); + so->markRightIsNearest = so->currRightIsNearest; + } } static void @@ -537,6 +609,22 @@ _bt_restore_marked_position(IndexScanDesc scan, BTScanState state) } } } + + /* + * For bidirectional nearest neighbor scan we also need to restore the + * distance to the current item. + */ + if (so->useBidirectionalKnnScan) + { + if (!so->distanceTypeByVal && DatumGetPointer(state->currDistance)) + pfree(DatumGetPointer(state->currDistance)); + + state->currIsNull = state->markIsNull; + state->currDistance = state->markIsNull ? (Datum) 0 : + datumCopy(state->markDistance, + so->distanceTypeByVal, + so->distanceTypeLen); + } } /* @@ -558,7 +646,8 @@ btinitparallelscan(void *target) BTParallelScanDesc bt_target = (BTParallelScanDesc) target; SpinLockInit(&bt_target->btps_mutex); - bt_target->btps_scanPage = InvalidBlockNumber; + bt_target->btps_forwardScanPage = InvalidBlockNumber; + bt_target->btps_backwardScanPage = InvalidBlockNumber; bt_target->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED; ConditionVariableInit(&bt_target->btps_cv); } @@ -583,7 +672,8 @@ btparallelrescan(IndexScanDesc scan) * consistency. */ SpinLockAcquire(&btscan->btps_mutex); - btscan->btps_scanPage = InvalidBlockNumber; + btscan->btps_forwardScanPage = InvalidBlockNumber; + btscan->btps_backwardScanPage = InvalidBlockNumber; btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED; SpinLockRelease(&btscan->btps_mutex); } @@ -611,13 +701,14 @@ btparallelrescan(IndexScanDesc scan) * for first=false callers that require another primitive index scan. */ bool -_bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno, bool first) +_bt_parallel_seize(IndexScanDesc scan, BTScanState state, BlockNumber *pageno, bool first) { BTScanOpaque so = (BTScanOpaque) scan->opaque; bool exit_loop = false; bool status = true; ParallelIndexScanDesc parallel_scan = scan->parallel_scan; BTParallelScanDesc btscan; + BlockNumber *scanPage; *pageno = P_NONE; @@ -649,6 +740,10 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno, bool first) btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan, parallel_scan->ps_offset); + scanPage = state == so->backwardState ? + &btscan->btps_backwardScanPage : + &btscan->btps_forwardScanPage; + while (1) { SpinLockAcquire(&btscan->btps_mutex); @@ -690,7 +785,7 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno, bool first) * of advancing it to a new page! */ btscan->btps_pageStatus = BTPARALLEL_ADVANCING; - *pageno = btscan->btps_scanPage; + *pageno = *scanPage; exit_loop = true; } SpinLockRelease(&btscan->btps_mutex); @@ -715,19 +810,44 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno, bool first) * scan lands on scan_page). */ void -_bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page) +_bt_parallel_release(IndexScanDesc scan, BTScanState state, + BlockNumber scan_page) { + BTScanOpaque so = (BTScanOpaque) scan->opaque; ParallelIndexScanDesc parallel_scan = scan->parallel_scan; BTParallelScanDesc btscan; + BlockNumber *scanPage; + BlockNumber *otherScanPage; + bool status_changed = false; + bool knnScan = so->useBidirectionalKnnScan; btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan, parallel_scan->ps_offset); + Assert(state); + if (state != so->backwardState) + { + scanPage = &btscan->btps_forwardScanPage; + otherScanPage = &btscan->btps_backwardScanPage; + } + else + { + scanPage = &btscan->btps_backwardScanPage; + otherScanPage = &btscan->btps_forwardScanPage; + } + SpinLockAcquire(&btscan->btps_mutex); - btscan->btps_scanPage = scan_page; - btscan->btps_pageStatus = BTPARALLEL_IDLE; + *scanPage = scan_page; + /* switch to idle state only if both KNN pages are initialized */ + if (!knnScan || *otherScanPage != InvalidBlockNumber) + { + btscan->btps_pageStatus = BTPARALLEL_IDLE; + status_changed = true; + } SpinLockRelease(&btscan->btps_mutex); - ConditionVariableSignal(&btscan->btps_cv); + + if (status_changed) + ConditionVariableSignal(&btscan->btps_cv); } /* @@ -738,11 +858,15 @@ _bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page) * advance to the next page. */ void -_bt_parallel_done(IndexScanDesc scan) +_bt_parallel_done(IndexScanDesc scan, BTScanState state) { + BTScanOpaque so = (BTScanOpaque) scan->opaque; ParallelIndexScanDesc parallel_scan = scan->parallel_scan; BTParallelScanDesc btscan; + BlockNumber *scanPage; + BlockNumber *otherScanPage; bool status_changed = false; + bool knnScan = so->useBidirectionalKnnScan; /* Do nothing, for non-parallel scans */ if (parallel_scan == NULL) @@ -751,16 +875,43 @@ _bt_parallel_done(IndexScanDesc scan) btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan, parallel_scan->ps_offset); + Assert(state); + if (state != so->backwardState) + { + scanPage = &btscan->btps_forwardScanPage; + otherScanPage = &btscan->btps_backwardScanPage; + } + else + { + scanPage = &btscan->btps_backwardScanPage; + otherScanPage = &btscan->btps_forwardScanPage; + } + /* * Mark the parallel scan as done, unless some other process did so * already */ SpinLockAcquire(&btscan->btps_mutex); - if (btscan->btps_pageStatus != BTPARALLEL_DONE) + + Assert(!knnScan || btscan->btps_pageStatus == BTPARALLEL_ADVANCING); + + *scanPage = P_NONE; + status_changed = true; + + /* switch to "done" state only if both KNN scans are done */ + if (!knnScan || *otherScanPage == P_NONE) { + if (btscan->btps_pageStatus == BTPARALLEL_DONE) + status_changed = false; + btscan->btps_pageStatus = BTPARALLEL_DONE; - status_changed = true; } + /* else switch to "idle" state only if both KNN scans are initialized */ + else if (*otherScanPage != InvalidBlockNumber) + btscan->btps_pageStatus = BTPARALLEL_IDLE; + else + status_changed = false; + SpinLockRelease(&btscan->btps_mutex); /* wake up all the workers associated with this parallel scan */ @@ -780,19 +931,30 @@ void _bt_parallel_primscan_schedule(IndexScanDesc scan, BlockNumber prev_scan_page) { BTScanOpaque so = (BTScanOpaque) scan->opaque; + BTScanState state = &so->state; ParallelIndexScanDesc parallel_scan = scan->parallel_scan; BTParallelScanDesc btscan; + BlockNumber *scan_page; Assert(so->numArrayKeys); btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan, parallel_scan->ps_offset); + if (state != so->backwardState) + { + scan_page = &btscan->btps_forwardScanPage; + } + else + { + scan_page = &btscan->btps_backwardScanPage; + } + SpinLockAcquire(&btscan->btps_mutex); - if (btscan->btps_scanPage == prev_scan_page && + if (*scan_page == prev_scan_page && btscan->btps_pageStatus == BTPARALLEL_IDLE) { - btscan->btps_scanPage = InvalidBlockNumber; + *scan_page = InvalidBlockNumber; btscan->btps_pageStatus = BTPARALLEL_NEED_PRIMSCAN; /* Serialize scan's current array keys */ @@ -815,6 +977,12 @@ btrestrpos(IndexScanDesc scan) BTScanOpaque so = (BTScanOpaque) scan->opaque; _bt_restore_marked_position(scan, &so->state); + + if (so->backwardState) + { + _bt_restore_marked_position(scan, so->backwardState); + so->currRightIsNearest = so->markRightIsNearest; + } } /* diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 368857d3aa5..b19802789bc 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -44,11 +44,13 @@ static bool _bt_steppage(IndexScanDesc scan, BTScanState state, ScanDirection dir); static bool _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno, ScanDirection dir); -static bool _bt_parallel_readpage(IndexScanDesc scan, BlockNumber blkno, - ScanDirection dir); +static bool _bt_parallel_readpage(IndexScanDesc scan, BTScanState state, + BlockNumber blkno, ScanDirection dir); static Buffer _bt_walk_left(Relation rel, Buffer buf); static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir); static inline void _bt_initialize_more_data(IndexScanDesc scan, BTScanState state, ScanDirection dir); +static BTScanState _bt_alloc_knn_scan(IndexScanDesc scan); +static bool _bt_start_knn_scan(IndexScanDesc scan, bool left, bool right); /* @@ -890,9 +892,11 @@ _bt_return_current_item(IndexScanDesc scan, BTScanState state) */ static bool _bt_load_first_page(IndexScanDesc scan, BTScanState state, ScanDirection dir, - OffsetNumber offnum) + OffsetNumber offnum, bool *readPageStatus) { - if (!_bt_readpage(scan, state, dir, offnum, true)) + if (!(readPageStatus ? + *readPageStatus : + _bt_readpage(scan, state, dir, offnum, true))) { /* * There's no actually-matching data on this page. Try to advance to @@ -907,6 +911,173 @@ _bt_load_first_page(IndexScanDesc scan, BTScanState state, ScanDirection dir, return true; } +/* + * _bt_calc_current_dist() -- Calculate distance from the current item + * of the scan state to the target order-by ScanKey argument. + */ +static void +_bt_calc_current_dist(IndexScanDesc scan, BTScanState state) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + BTScanPosItem *currItem = &state->currPos.items[state->currPos.itemIndex]; + IndexTuple itup = (IndexTuple) (state->currTuples + currItem->tupleOffset); + ScanKey scankey = &scan->orderByData[0]; + Datum value; + + value = index_getattr(itup, 1, scan->xs_itupdesc, &state->currIsNull); + + if (state->currIsNull) + return; /* NULL distance */ + + value = FunctionCall2Coll(&scankey->sk_func, + scankey->sk_collation, + value, + scankey->sk_argument); + + /* free previous distance value for by-ref types */ + if (!so->distanceTypeByVal && DatumGetPointer(state->currDistance)) + pfree(DatumGetPointer(state->currDistance)); + + state->currDistance = value; +} + +/* + * _bt_compare_current_dist() -- Compare current distances of the left and + *right scan states. + * + * NULL distances are considered to be greater than any non-NULL distances. + * + * Returns true if right distance is lesser than left, otherwise false. + */ +static bool +_bt_compare_current_dist(BTScanOpaque so, BTScanState rstate, BTScanState lstate) +{ + if (lstate->currIsNull) + return true; /* non-NULL < NULL */ + + if (rstate->currIsNull) + return false; /* NULL > non-NULL */ + + return DatumGetBool(FunctionCall2Coll(&so->distanceCmpProc, + InvalidOid, /* XXX collation for + * distance comparison */ + rstate->currDistance, + lstate->currDistance)); +} + +/* + * _bt_alloc_knn_backward_scan() -- Allocate additional backward scan state for KNN. + */ +static BTScanState +_bt_alloc_knn_scan(IndexScanDesc scan) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + BTScanState lstate = (BTScanState) palloc(sizeof(BTScanStateData)); + + _bt_allocate_tuple_workspaces(lstate); + + if (!scan->xs_want_itup) + { + /* We need to request index tuples for distance comparison. */ + scan->xs_want_itup = true; + _bt_allocate_tuple_workspaces(&so->state); + } + + BTScanPosInvalidate(lstate->currPos); + lstate->currPos.moreLeft = false; + lstate->currPos.moreRight = false; + BTScanPosInvalidate(lstate->markPos); + lstate->markItemIndex = -1; + lstate->killedItems = NULL; + lstate->numKilled = 0; + lstate->currDistance = (Datum) 0; + lstate->markDistance = (Datum) 0; + + return so->backwardState = lstate; +} + +static bool +_bt_start_knn_scan(IndexScanDesc scan, bool left, bool right) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + BTScanState rstate; /* right (forward) main scan state */ + BTScanState lstate; /* additional left (backward) KNN scan state */ + + if (!left && !right) + return false; /* empty result */ + + rstate = &so->state; + lstate = so->backwardState; + + if (left && right) + { + /* + * We have found items in both scan directions, determine nearest item + * to return. + */ + _bt_calc_current_dist(scan, rstate); + _bt_calc_current_dist(scan, lstate); + so->currRightIsNearest = _bt_compare_current_dist(so, rstate, lstate); + + /* + * 'right' flag determines the selected scan direction; right + * direction is selected if the right item is nearest. + */ + right = so->currRightIsNearest; + } + + /* Return current item of the selected scan direction. */ + return _bt_return_current_item(scan, right ? rstate : lstate); +} + +/* + * _bt_init_knn_scan() -- Init additional scan state for KNN search. + * + * Caller must pin and read-lock scan->state.currPos.buf buffer. + * + * If empty result was found returned false. + * Otherwise prepared current item, and returned true. + */ +static bool +_bt_init_knn_scan(IndexScanDesc scan, OffsetNumber offnum) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + BTScanState rstate = &so->state; /* right (forward) main scan state */ + BTScanState lstate; /* additional left (backward) KNN scan state */ + Buffer buf = rstate->currPos.buf; + bool left, + right; + ScanDirection rdir = ForwardScanDirection; + ScanDirection ldir = BackwardScanDirection; + OffsetNumber roffnum = offnum; + OffsetNumber loffnum = OffsetNumberPrev(offnum); + + lstate = _bt_alloc_knn_scan(scan); + + /* Bump pin and lock count before BTScanPosData copying. */ + IncrBufferRefCount(buf); + LockBuffer(buf, BT_READ); + + memcpy(&lstate->currPos, &rstate->currPos, sizeof(BTScanPosData)); + lstate->currPos.moreLeft = true; + lstate->currPos.moreRight = false; + + /* + * Load first pages from the both scans. + * + * _bt_load_first_page(right) can step to next page, and then + * _bt_parallel_seize() will deadlock if the left page number is not yet + * initialized in BTParallelScanDesc. So we must first read the left page + * using _bt_readpage(), and _bt_parallel_release() which is called inside + * will save the next page number in BTParallelScanDesc. + */ + left = _bt_readpage(scan, lstate, ldir, loffnum, true); + right = _bt_load_first_page(scan, rstate, rdir, roffnum, NULL); + left = _bt_load_first_page(scan, lstate, ldir, loffnum, &left); + + return _bt_start_knn_scan(scan, left, right); +} + /* * _bt_first() -- Find the first item in a scan. * @@ -962,10 +1133,19 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) */ if (!so->qual_ok) { - _bt_parallel_done(scan); + _bt_parallel_done(scan, &so->state); return false; } + if (scan->numberOfOrderBys > 0) + { + if (so->useBidirectionalKnnScan) + _bt_init_distance_comparison(scan); + else if (so->scanDirection != NoMovementScanDirection) + /* use selected KNN scan direction */ + dir = so->scanDirection; + } + /* * For parallel scans, get the starting page from shared state. If the * scan has not started, proceed to find out first leaf page in the usual @@ -978,7 +1158,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) */ if (scan->parallel_scan != NULL) { - status = _bt_parallel_seize(scan, &blkno, true); + status = _bt_parallel_seize(scan, &so->state, &blkno, true); /* * Initialize arrays (when _bt_parallel_seize didn't already set up @@ -989,16 +1169,47 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) if (!status) return false; - else if (blkno == P_NONE) - { - _bt_parallel_done(scan); - return false; - } else if (blkno != InvalidBlockNumber) { - if (!_bt_parallel_readpage(scan, blkno, dir)) - return false; - goto readcomplete; + bool knn = so->useBidirectionalKnnScan; + bool right; + bool left; + + if (knn) + _bt_alloc_knn_scan(scan); + + if (blkno == P_NONE) + { + _bt_parallel_done(scan, &so->state); + right = false; + } + else + right = _bt_parallel_readpage(scan, &so->state, blkno, + knn ? ForwardScanDirection : dir); + + if (!knn) + return right && _bt_return_current_item(scan, &so->state); + + /* seize additional backward KNN scan */ + left = _bt_parallel_seize(scan, so->backwardState, &blkno, true); + + if (left) + { + if (blkno == P_NONE) + { + _bt_parallel_done(scan, so->backwardState); + left = false; + } + else + { + /* backward scan should be already initialized */ + Assert(blkno != InvalidBlockNumber); + left = _bt_parallel_readpage(scan, so->backwardState, blkno, + BackwardScanDirection); + } + } + + return _bt_start_knn_scan(scan, left, right); } } else if (so->numArrayKeys && !so->needPrimScan) @@ -1070,14 +1281,20 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) * need to be kept in sync. *---------- */ - strat_total = BTEqualStrategyNumber; - if (so->numberOfKeys > 0) + if (so->useBidirectionalKnnScan) + { + keysz = _bt_init_knn_start_keys(scan, startKeys, notnullkeys); + strat_total = BTNearestStrategyNumber; + } + else if (so->numberOfKeys > 0) { AttrNumber curattr; ScanKey chosen; ScanKey impliesNN; ScanKey cur; + strat_total = BTEqualStrategyNumber; + /* * chosen is the so-far-chosen key for the current attribute, if any. * We don't cast the decision in stone until we reach keys for the @@ -1211,7 +1428,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) if (!match) { /* No match, so mark (parallel) scan finished */ - _bt_parallel_done(scan); + _bt_parallel_done(scan, &so->state); } return match; @@ -1247,7 +1464,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) Assert(subkey->sk_flags & SK_ROW_MEMBER); if (subkey->sk_flags & SK_ISNULL) { - _bt_parallel_done(scan); + _bt_parallel_done(scan, &so->state); return false; } memcpy(inskey.scankeys + i, subkey, sizeof(ScanKeyData)); @@ -1412,6 +1629,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) break; case BTGreaterEqualStrategyNumber: + case BTMaxStrategyNumber: /* * Find first item >= scankey @@ -1469,7 +1687,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) * Mark parallel scan as done, so that all the workers can finish * their scan. */ - _bt_parallel_done(scan); + _bt_parallel_done(scan, &so->state); BTScanPosInvalidate(*currPos); return false; } @@ -1503,17 +1721,22 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) * for the page. For example, when inskey is both < the leaf page's high * key and > all of its non-pivot tuples, offnum will be "maxoff + 1". */ - if (!_bt_load_first_page(scan, &so->state, dir, offnum)) - return false; + if (strat_total == BTNearestStrategyNumber) + return _bt_init_knn_scan(scan, offnum); + + if (!_bt_load_first_page(scan, &so->state, dir, offnum, NULL)) + return false; /* empty result */ -readcomplete: /* OK, currPos->itemIndex says what to return */ return _bt_return_current_item(scan, &so->state); } /* - * Advance to next tuple on current page; or if there's no more, - * try to step to the next page with data. + * _bt_next_item() -- Advance to next tuple on current page; + * or if there's no more, try to step to the next page with data. + * + * If there are any matching records in the given direction true is + * returned, otherwise false. */ static bool _bt_next_item(IndexScanDesc scan, BTScanState state, ScanDirection dir) @@ -1532,6 +1755,51 @@ _bt_next_item(IndexScanDesc scan, BTScanState state, ScanDirection dir) return _bt_steppage(scan, state, dir); } +/* + * _bt_next_nearest() -- Return next nearest item from bidirectional KNN scan. + */ +static bool +_bt_next_nearest(IndexScanDesc scan) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + BTScanState rstate = &so->state; + BTScanState lstate = so->backwardState; + bool right = BTScanPosIsValid(rstate->currPos); + bool left = BTScanPosIsValid(lstate->currPos); + bool advanceRight; + + if (right && left) + advanceRight = so->currRightIsNearest; + else if (right) + advanceRight = true; + else if (left) + advanceRight = false; + else + return false; /* end of the scan */ + + if (advanceRight) + right = _bt_next_item(scan, rstate, ForwardScanDirection); + else + left = _bt_next_item(scan, lstate, BackwardScanDirection); + + if (!left && !right) + return false; /* end of the scan */ + + if (left && right) + { + /* + * If there are items in both scans we must recalculate distance in + * the advanced scan. + */ + _bt_calc_current_dist(scan, advanceRight ? rstate : lstate); + so->currRightIsNearest = _bt_compare_current_dist(so, rstate, lstate); + right = so->currRightIsNearest; + } + + /* return nearest item */ + return _bt_return_current_item(scan, right ? rstate : lstate); +} + /* * _bt_next() -- Get the next item in a scan. * @@ -1551,6 +1819,10 @@ _bt_next(IndexScanDesc scan, ScanDirection dir) { BTScanOpaque so = (BTScanOpaque) scan->opaque; + if (so->backwardState) + /* return next neareset item from KNN scan */ + return _bt_next_nearest(scan); + if (!_bt_next_item(scan, &so->state, dir)) return false; @@ -1618,7 +1890,7 @@ _bt_readpage(IndexScanDesc scan, BTScanState state, ScanDirection dir, OffsetNum else pstate.prev_scan_page = BufferGetBlockNumber(pos->buf); - _bt_parallel_release(scan, pstate.prev_scan_page); + _bt_parallel_release(scan, state, pstate.prev_scan_page); } indnatts = IndexRelationGetNumberOfAttributes(scan->indexRelation); @@ -1709,7 +1981,7 @@ _bt_readpage(IndexScanDesc scan, BTScanState state, ScanDirection dir, OffsetNum * required < or <= strategy scan keys) during the precheck, we can safely * assume that this must also be true of all earlier tuples from the page. */ - if (!firstPage && !so->scanBehind && minoff < maxoff) + if (!so->useBidirectionalKnnScan && !firstPage && !so->scanBehind && minoff < maxoff) { ItemId iid; IndexTuple itup; @@ -2136,7 +2408,7 @@ _bt_steppage(IndexScanDesc scan, BTScanState state, ScanDirection dir) * Seize the scan to get the next block number; if the scan has * ended already, bail out. */ - status = _bt_parallel_seize(scan, &blkno, false); + status = _bt_parallel_seize(scan, state, &blkno, false); if (!status) { /* release the previous buffer, if pinned */ @@ -2168,13 +2440,19 @@ _bt_steppage(IndexScanDesc scan, BTScanState state, ScanDirection dir) * Seize the scan to get the current block number; if the scan has * ended already, bail out. */ - status = _bt_parallel_seize(scan, &blkno, false); + status = _bt_parallel_seize(scan, state, &blkno, false); BTScanPosUnpinIfPinned(*currPos); if (!status) { BTScanPosInvalidate(*currPos); return false; } + if (blkno == P_NONE) + { + _bt_parallel_done(scan, state); + BTScanPosInvalidate(*currPos); + return false; + } } else { @@ -2224,7 +2502,7 @@ _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno, */ if (blkno == P_NONE || !currPos->moreRight) { - _bt_parallel_done(scan); + _bt_parallel_done(scan, state); BTScanPosInvalidate(*currPos); return false; } @@ -2246,14 +2524,14 @@ _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno, else if (scan->parallel_scan != NULL) { /* allow next page be processed by parallel worker */ - _bt_parallel_release(scan, opaque->btpo_next); + _bt_parallel_release(scan, state, opaque->btpo_next); } /* nope, keep going */ if (scan->parallel_scan != NULL) { _bt_relbuf(rel, currPos->buf); - status = _bt_parallel_seize(scan, &blkno, false); + status = _bt_parallel_seize(scan, state, &blkno, false); if (!status) { BTScanPosInvalidate(*currPos); @@ -2303,7 +2581,7 @@ _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno, if (!currPos->moreLeft) { _bt_relbuf(rel, currPos->buf); - _bt_parallel_done(scan); + _bt_parallel_done(scan, state); BTScanPosInvalidate(*currPos); return false; } @@ -2314,7 +2592,7 @@ _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno, /* if we're physically at end of index, return failure */ if (currPos->buf == InvalidBuffer) { - _bt_parallel_done(scan); + _bt_parallel_done(scan, state); BTScanPosInvalidate(*currPos); return false; } @@ -2337,7 +2615,7 @@ _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno, else if (scan->parallel_scan != NULL) { /* allow next page be processed by parallel worker */ - _bt_parallel_release(scan, BufferGetBlockNumber(currPos->buf)); + _bt_parallel_release(scan, state, BufferGetBlockNumber(currPos->buf)); } /* @@ -2349,7 +2627,7 @@ _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno, if (scan->parallel_scan != NULL) { _bt_relbuf(rel, currPos->buf); - status = _bt_parallel_seize(scan, &blkno, false); + status = _bt_parallel_seize(scan, state, &blkno, false); if (!status) { BTScanPosInvalidate(*currPos); @@ -2370,19 +2648,20 @@ _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno, * indicate success. */ static bool -_bt_parallel_readpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir) +_bt_parallel_readpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno, + ScanDirection dir) { BTScanOpaque so = (BTScanOpaque) scan->opaque; Assert(!so->needPrimScan); - _bt_initialize_more_data(scan, &so->state, dir); + _bt_initialize_more_data(scan, state, dir); - if (!_bt_readnextpage(scan, &so->state, blkno, dir)) + if (!_bt_readnextpage(scan, state, blkno, dir)) return false; - /* We have at least one item to return as scan's next item */ - _bt_drop_lock_and_maybe_pin(scan, &so->state.currPos); + /* Drop the lock, and maybe the pin, on the current page */ + _bt_drop_lock_and_maybe_pin(scan, &state->currPos); return true; } @@ -2653,7 +2932,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir) _bt_initialize_more_data(scan, &so->state, dir); - if (!_bt_load_first_page(scan, &so->state, dir, start)) + if (!_bt_load_first_page(scan, &so->state, dir, start, NULL)) return false; /* OK, currPos->itemIndex says what to return */ diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index a82b2638d82..4f825325174 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -20,6 +20,7 @@ #include "access/nbtree.h" #include "access/reloptions.h" #include "access/relscan.h" +#include "catalog/pg_amop.h" #include "commands/progress.h" #include "lib/qunique.h" #include "miscadmin.h" @@ -28,6 +29,7 @@ #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/rel.h" +#include "utils/syscache.h" #define LOOK_AHEAD_REQUIRED_RECHECKS 3 #define LOOK_AHEAD_DEFAULT_DISTANCE 5 @@ -35,6 +37,9 @@ typedef struct BTSortArrayContext { FmgrInfo *sortproc; + FmgrInfo distflinfo; + FmgrInfo distcmpflinfo; + ScanKey distkey; Oid collation; bool reverse; } BTSortArrayContext; @@ -51,7 +56,7 @@ static void _bt_setup_array_cmp(IndexScanDesc scan, ScanKey skey, Oid elemtype, static Datum _bt_find_extreme_element(IndexScanDesc scan, ScanKey skey, Oid elemtype, StrategyNumber strat, Datum *elems, int nelems); -static int _bt_sort_array_elements(ScanKey skey, FmgrInfo *sortproc, +static int _bt_sort_array_elements(IndexScanDesc scan, ScanKey skey, FmgrInfo *sortproc, bool reverse, Datum *elems, int nelems); static bool _bt_merge_arrays(IndexScanDesc scan, ScanKey skey, FmgrInfo *sortproc, bool reverse, @@ -102,6 +107,11 @@ static void _bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate int tupnatts, TupleDesc tupdesc); static int _bt_keep_natts(Relation rel, IndexTuple lastleft, IndexTuple firstright, BTScanInsert itup_key); +static inline StrategyNumber _bt_select_knn_strategy_for_key(IndexScanDesc scan, + ScanKey cond); +static void _bt_get_distance_cmp_proc(ScanKey distkey, Oid opfamily, + Oid leftargtype, FmgrInfo *finfo, + int16 *typlen, bool *typbyval); /* @@ -441,7 +451,7 @@ _bt_preprocess_array_keys(IndexScanDesc scan) * the index's key space. */ reverse = (indoption[cur->sk_attno - 1] & INDOPTION_DESC) != 0; - num_elems = _bt_sort_array_elements(cur, sortprocp, reverse, + num_elems = _bt_sort_array_elements(scan, cur, sortprocp, reverse, elem_values, num_nonnulls); if (origarrayatt == cur->sk_attno) @@ -846,18 +856,77 @@ _bt_find_extreme_element(IndexScanDesc scan, ScanKey skey, Oid elemtype, * we sort in descending order. */ static int -_bt_sort_array_elements(ScanKey skey, FmgrInfo *sortproc, bool reverse, +_bt_sort_array_elements(IndexScanDesc scan, ScanKey skey, FmgrInfo *sortproc, bool reverse, Datum *elems, int nelems) { + Relation rel = scan->indexRelation; + Oid elemtype; + Oid opfamily; BTSortArrayContext cxt; if (nelems <= 1) return nelems; /* no work to do */ + /* + * Determine the nominal datatype of the array elements. We have to + * support the convention that sk_subtype == InvalidOid means the opclass + * input type; this is a hack to simplify life for ScanKeyInit(). + */ + elemtype = skey->sk_subtype; + if (elemtype == InvalidOid) + elemtype = rel->rd_opcintype[skey->sk_attno - 1]; + + opfamily = rel->rd_opfamily[skey->sk_attno - 1]; + + if (scan->numberOfOrderBys <= 0 || + scan->orderByData[0].sk_attno != skey->sk_attno) + { + cxt.distkey = NULL; + cxt.reverse = reverse; + } + else + { + /* Init procedures for distance calculation and comparison. */ + ScanKey distkey = &scan->orderByData[0]; + ScanKeyData distkey2; + Oid disttype = distkey->sk_subtype; + Oid distopr; + RegProcedure distproc; + + if (!OidIsValid(disttype)) + disttype = rel->rd_opcintype[skey->sk_attno - 1]; + + /* Lookup distance operator in index column's operator family. */ + distopr = get_opfamily_member(opfamily, + elemtype, + disttype, + distkey->sk_strategy); + + if (!OidIsValid(distopr)) + elog(ERROR, "missing operator (%u,%u) for strategy %d in opfamily %u", + elemtype, disttype, BTMaxStrategyNumber, opfamily); + + distproc = get_opcode(distopr); + + if (!RegProcedureIsValid(distproc)) + elog(ERROR, "missing code for operator %u", distopr); + + fmgr_info(distproc, &cxt.distflinfo); + + distkey2 = *distkey; + fmgr_info_copy(&distkey2.sk_func, &cxt.distflinfo, CurrentMemoryContext); + distkey2.sk_subtype = disttype; + + _bt_get_distance_cmp_proc(&distkey2, opfamily, elemtype, + &cxt.distcmpflinfo, NULL, NULL); + + cxt.distkey = distkey; + cxt.reverse = false; /* supported only ascending ordering */ + } + /* Sort the array elements */ cxt.sortproc = sortproc; cxt.collation = skey->sk_collation; - cxt.reverse = reverse; qsort_arg(elems, nelems, sizeof(Datum), _bt_compare_array_elements, &cxt); @@ -930,6 +999,7 @@ _bt_merge_arrays(IndexScanDesc scan, ScanKey skey, FmgrInfo *sortproc, cxt.sortproc = mergeproc; cxt.collation = skey->sk_collation; cxt.reverse = reverse; + cxt.distkey = NULL; for (int i = 0, j = 0; i < nelems_orig_start && j < nelems_next;) { @@ -1103,6 +1173,24 @@ _bt_compare_array_elements(const void *a, const void *b, void *arg) BTSortArrayContext *cxt = (BTSortArrayContext *) arg; int32 compare; + if (cxt->distkey) + { + Datum dista = FunctionCall2Coll(&cxt->distflinfo, + cxt->collation, + da, + cxt->distkey->sk_argument); + Datum distb = FunctionCall2Coll(&cxt->distflinfo, + cxt->collation, + db, + cxt->distkey->sk_argument); + bool cmp = DatumGetBool(FunctionCall2Coll(&cxt->distcmpflinfo, + cxt->collation, + dista, + distb)); + + return cmp ? -1 : 1; + } + compare = DatumGetInt32(FunctionCall2Coll(cxt->sortproc, cxt->collation, da, db)); @@ -1721,7 +1809,7 @@ _bt_start_prim_scan(IndexScanDesc scan, ScanDirection dir) /* The top-level index scan ran out of tuples in this scan direction */ if (scan->parallel_scan != NULL) - _bt_parallel_done(scan); + _bt_parallel_done(scan, &so->state); return false; } @@ -2456,6 +2544,69 @@ end_toplevel_scan: /* Caller's tuple doesn't match any qual */ return false; } +/* + * _bt_emit_scan_key() -- Emit one prepared scan key + * + * Push the scan key into the so->keyData[] array, and then mark it if it is + * required. Also update selected kNN strategy. + */ +static void +_bt_emit_scan_key(IndexScanDesc scan, ScanKey skey, int numberOfEqualCols) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + ScanKey outkey = &so->keyData[so->numberOfKeys++]; + + memcpy(outkey, skey, sizeof(ScanKeyData)); + + /* + * We can mark the qual as required (possibly only in one direction) if + * all attrs before this one had "=". + */ + if (outkey->sk_attno - 1 == numberOfEqualCols) + _bt_mark_scankey_required(outkey); + + /* Update kNN strategy if it is not already selected. */ + if (so->useBidirectionalKnnScan) + { + switch (_bt_select_knn_strategy_for_key(scan, outkey)) + { + case BTLessStrategyNumber: + case BTLessEqualStrategyNumber: + + /* + * Ordering key argument is greater than all values in scan + * range, select backward scan direction. + */ + so->scanDirection = BackwardScanDirection; + so->useBidirectionalKnnScan = false; + break; + + case BTEqualStrategyNumber: + /* Use default unidirectional scan direction. */ + so->useBidirectionalKnnScan = false; + break; + + case BTGreaterEqualStrategyNumber: + case BTGreaterStrategyNumber: + + /* + * Ordering key argument is lesser than all values in scan + * range, select forward scan direction. + */ + so->scanDirection = ForwardScanDirection; + so->useBidirectionalKnnScan = false; + break; + + case BTMaxStrategyNumber: + + /* + * Ordering key argument falls into scan range, keep using + * bidirectional scan. + */ + break; + } + } +} /* * _bt_preprocess_keys() -- Preprocess scan keys @@ -2548,12 +2699,10 @@ _bt_preprocess_keys(IndexScanDesc scan) BTScanOpaque so = (BTScanOpaque) scan->opaque; int numberOfKeys = scan->numberOfKeys; int16 *indoption = scan->indexRelation->rd_indoption; - int new_numberOfKeys; int numberOfEqualCols; ScanKey inkeys; - ScanKey outkeys; ScanKey cur; - BTScanKeyPreproc xform[BTMaxStrategyNumber]; + BTScanKeyPreproc xform[BTMaxSearchStrategyNumber]; bool test_result; int i, j; @@ -2576,6 +2725,25 @@ _bt_preprocess_keys(IndexScanDesc scan) return; } + if (scan->numberOfOrderBys > 0) + { + ScanKey ord = scan->orderByData; + + if (scan->numberOfOrderBys > 1 || ord->sk_attno != 1) + /* it should not happen, see btmatchorderby() */ + elog(ERROR, "only one btree ordering operator " + "for the first index column is supported"); + + Assert(ord->sk_strategy == BTMaxStrategyNumber); + + /* use bidirectional kNN scan by default */ + so->useBidirectionalKnnScan = true; + } + else + { + so->useBidirectionalKnnScan = false; + } + /* initialize result variables */ so->qual_ok = true; so->numberOfKeys = 0; @@ -2607,7 +2775,6 @@ _bt_preprocess_keys(IndexScanDesc scan) else inkeys = scan->keyData; - outkeys = so->keyData; cur = &inkeys[0]; /* we check that input keys are correctly ordered */ if (cur->sk_attno < 1) @@ -2619,11 +2786,9 @@ _bt_preprocess_keys(IndexScanDesc scan) /* Apply indoption to scankey (might change sk_strategy!) */ if (!_bt_fix_scankey_strategy(cur, indoption)) so->qual_ok = false; - memcpy(outkeys, cur, sizeof(ScanKeyData)); - so->numberOfKeys = 1; - /* We can mark the qual as required if it's for first index col */ - if (cur->sk_attno == 1) - _bt_mark_scankey_required(outkeys); + + _bt_emit_scan_key(scan, cur, 0); + if (arrayKeyData) { /* @@ -2636,14 +2801,12 @@ _bt_preprocess_keys(IndexScanDesc scan) (so->arrayKeys[0].scan_key == 0 && OidIsValid(so->orderProcs[0].fn_oid))); } - return; } /* * Otherwise, do the full set of pushups. */ - new_numberOfKeys = 0; numberOfEqualCols = 0; /* @@ -2716,7 +2879,7 @@ _bt_preprocess_keys(IndexScanDesc scan) Assert(OidIsValid(orderproc->fn_oid)); } - for (j = BTMaxStrategyNumber; --j >= 0;) + for (j = BTMaxSearchStrategyNumber; --j >= 0;) { ScanKey chk = xform[j].skey; @@ -2786,21 +2949,17 @@ _bt_preprocess_keys(IndexScanDesc scan) } /* - * Emit the cleaned-up keys into the outkeys[] array, and then + * Emit the cleaned-up keys into the so->keyData[] array, and then * mark them if they are required. They are required (possibly * only in one direction) if all attrs before this one had "=". */ - for (j = BTMaxStrategyNumber; --j >= 0;) + for (j = BTMaxSearchStrategyNumber; --j >= 0;) { if (xform[j].skey) { - ScanKey outkey = &outkeys[new_numberOfKeys++]; - - memcpy(outkey, xform[j].skey, sizeof(ScanKeyData)); + _bt_emit_scan_key(scan, xform[j].skey, priorNumberOfEqualCols); if (arrayKeyData) - keyDataMap[new_numberOfKeys - 1] = xform[j].ikey; - if (priorNumberOfEqualCols == attno - 1) - _bt_mark_scankey_required(outkey); + keyDataMap[so->numberOfKeys - 1] = xform[j].ikey; } } @@ -2821,19 +2980,16 @@ _bt_preprocess_keys(IndexScanDesc scan) /* if row comparison, push it directly to the output array */ if (cur->sk_flags & SK_ROW_HEADER) { - ScanKey outkey = &outkeys[new_numberOfKeys++]; - - memcpy(outkey, cur, sizeof(ScanKeyData)); + _bt_emit_scan_key(scan, cur, numberOfEqualCols); if (arrayKeyData) - keyDataMap[new_numberOfKeys - 1] = i; - if (numberOfEqualCols == attno - 1) - _bt_mark_scankey_required(outkey); + keyDataMap[so->numberOfKeys - 1] = i; /* * We don't support RowCompare using equality; such a qual would * mess up the numberOfEqualCols tracking. */ Assert(j != (BTEqualStrategyNumber - 1)); + continue; } @@ -2959,22 +3115,15 @@ _bt_preprocess_keys(IndexScanDesc scan) * even with incomplete opfamilies. _bt_advance_array_keys * depends on this. */ - ScanKey outkey = &outkeys[new_numberOfKeys++]; - - memcpy(outkey, xform[j].skey, sizeof(ScanKeyData)); + _bt_emit_scan_key(scan, xform[j].skey, numberOfEqualCols); if (arrayKeyData) - keyDataMap[new_numberOfKeys - 1] = xform[j].ikey; - if (numberOfEqualCols == attno - 1) - _bt_mark_scankey_required(outkey); + keyDataMap[so->numberOfKeys - 1] = xform[j].ikey; xform[j].skey = cur; xform[j].ikey = i; xform[j].arrayidx = arrayidx; } } } - - so->numberOfKeys = new_numberOfKeys; - /* * Now that we've built a temporary mapping from so->keyData[] (output * scan keys) to scan->keyData[] (input scan keys), fix array->scan_key @@ -4583,6 +4732,39 @@ btproperty(Oid index_oid, int attno, *res = true; return true; + case AMPROP_DISTANCE_ORDERABLE: + { + Oid opclass, + opfamily, + opcindtype; + + /* answer only for columns, not AM or whole index */ + if (attno == 0) + return false; + + opclass = get_index_column_opclass(index_oid, attno); + + if (!OidIsValid(opclass)) + { + *res = false; /* non-key attribute */ + return true; + } + + if (!get_opclass_opfamily_and_input_type(opclass, + &opfamily, &opcindtype)) + { + *isnull = true; + return true; + } + + *res = SearchSysCacheExists(AMOPSTRATEGY, + ObjectIdGetDatum(opfamily), + ObjectIdGetDatum(opcindtype), + ObjectIdGetDatum(opcindtype), + Int16GetDatum(BTMaxStrategyNumber)); + return true; + } + default: return false; /* punt to generic code */ } @@ -5179,3 +5361,216 @@ _bt_allocate_tuple_workspaces(BTScanState state) state->currTuples = (char *) palloc(BLCKSZ * 2); state->markTuples = state->currTuples + BLCKSZ; } + +static bool +_bt_compare_row_key_with_ordering_key(ScanKey row, ScanKey ord, bool *result) +{ + ScanKey subkey = (ScanKey) DatumGetPointer(row->sk_argument); + int32 cmpresult; + + Assert(subkey->sk_attno == 1); + Assert(subkey->sk_flags & SK_ROW_MEMBER); + + if (subkey->sk_flags & SK_ISNULL) + return false; + + /* Perform the test --- three-way comparison not bool operator */ + cmpresult = DatumGetInt32(FunctionCall2Coll(&subkey->sk_func, + subkey->sk_collation, + ord->sk_argument, + subkey->sk_argument)); + + if (subkey->sk_flags & SK_BT_DESC) + cmpresult = -cmpresult; + + /* + * At this point cmpresult indicates the overall result of the row + * comparison, and subkey points to the deciding column (or the last + * column if the result is "="). + */ + switch (subkey->sk_strategy) + { + /* EQ and NE cases aren't allowed here */ + case BTLessStrategyNumber: + *result = cmpresult < 0; + break; + case BTLessEqualStrategyNumber: + *result = cmpresult <= 0; + break; + case BTGreaterEqualStrategyNumber: + *result = cmpresult >= 0; + break; + case BTGreaterStrategyNumber: + *result = cmpresult > 0; + break; + default: + elog(ERROR, "unrecognized RowCompareType: %d", + (int) subkey->sk_strategy); + *result = false; /* keep compiler quiet */ + } + + return true; +} + +/* + * _bt_select_knn_strategy_for_key() -- Determine which kNN scan strategy to use: + * bidirectional or unidirectional. We are checking here if the + * ordering scankey argument falls into the scan range: if it falls + * we must use bidirectional scan, otherwise we use unidirectional. + * + * Returns BTMaxStrategyNumber for bidirectional scan or + * strategy number of non-matched scankey for unidirectional. + */ +static inline StrategyNumber +_bt_select_knn_strategy_for_key(IndexScanDesc scan, ScanKey cond) +{ + ScanKey ord = scan->orderByData; + bool result; + + /* only interesting in the first index attribute */ + if (cond->sk_attno != 1) + return BTMaxStrategyNumber; + + if (cond->sk_strategy == BTEqualStrategyNumber) + /* always use simple unidirectional scan for equals operators */ + return BTEqualStrategyNumber; + + if (cond->sk_flags & SK_ROW_HEADER) + { + if (!_bt_compare_row_key_with_ordering_key(cond, ord, &result)) + return BTEqualStrategyNumber; /* ROW(fist_index_attr, ...) IS + * NULL */ + } + else + { + if (!_bt_compare_scankey_args(scan, cond, ord, cond, NULL, NULL, &result)) + elog(ERROR, "could not compare ordering key"); + } + + if (!result) + + /* + * Ordering scankey argument is out of scan range, use unidirectional + * scan. + */ + return cond->sk_strategy; + + return BTMaxStrategyNumber; +} + +int +_bt_init_knn_start_keys(IndexScanDesc scan, ScanKey *startKeys, ScanKey bufKeys) +{ + ScanKey ord = scan->orderByData; + int indopt = scan->indexRelation->rd_indoption[ord->sk_attno - 1]; + int flags = (indopt << SK_BT_INDOPTION_SHIFT) | + SK_ORDER_BY | + SK_SEARCHNULL; /* only for invalid procedure oid, see assert + * in ScanKeyEntryInitialize() */ + int keysCount = 0; + + /* Init btree search key with ordering key argument. */ + ScanKeyEntryInitialize(&bufKeys[0], + flags, + ord->sk_attno, + BTMaxStrategyNumber, + ord->sk_subtype, + ord->sk_collation, + InvalidOid, + ord->sk_argument); + + startKeys[keysCount++] = &bufKeys[0]; + + return keysCount; +} + +static Oid +_bt_get_sortfamily_for_opfamily_op(Oid opfamily, Oid lefttype, Oid righttype, + StrategyNumber strategy) +{ + HeapTuple tp; + Form_pg_amop amop_tup; + Oid sortfamily; + + tp = SearchSysCache4(AMOPSTRATEGY, + ObjectIdGetDatum(opfamily), + ObjectIdGetDatum(lefttype), + ObjectIdGetDatum(righttype), + Int16GetDatum(strategy)); + if (!HeapTupleIsValid(tp)) + return InvalidOid; + amop_tup = (Form_pg_amop) GETSTRUCT(tp); + sortfamily = amop_tup->amopsortfamily; + ReleaseSysCache(tp); + + return sortfamily; +} + +/* + * _bt_get_distance_cmp_proc() -- Init procedure for comparsion of distances + * between "leftargtype" and "distkey". + */ +static void +_bt_get_distance_cmp_proc(ScanKey distkey, Oid opfamily, Oid leftargtype, + FmgrInfo *finfo, int16 *typlen, bool *typbyval) +{ + RegProcedure opcode; + Oid sortfamily; + Oid opno; + Oid distanceType; + + distanceType = get_func_rettype(distkey->sk_func.fn_oid); + + sortfamily = _bt_get_sortfamily_for_opfamily_op(opfamily, leftargtype, + distkey->sk_subtype, + distkey->sk_strategy); + + if (!OidIsValid(sortfamily)) + elog(ERROR, "could not find sort family for btree ordering operator"); + + opno = get_opfamily_member(sortfamily, + distanceType, + distanceType, + BTLessEqualStrategyNumber); + + if (!OidIsValid(opno)) + elog(ERROR, "could not find operator for btree distance comparison"); + + opcode = get_opcode(opno); + + if (!RegProcedureIsValid(opcode)) + elog(ERROR, + "could not find procedure for btree distance comparison operator"); + + fmgr_info(opcode, finfo); + + if (typlen) + get_typlenbyval(distanceType, typlen, typbyval); +} + +/* + * _bt_init_distance_comparison() -- Init distance typlen/typbyval and its + * comparison procedure. + */ +void +_bt_init_distance_comparison(IndexScanDesc scan) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + Relation rel = scan->indexRelation; + ScanKey ord = scan->orderByData; + + _bt_get_distance_cmp_proc(ord, + rel->rd_opfamily[ord->sk_attno - 1], + rel->rd_opcintype[ord->sk_attno - 1], + &so->distanceCmpProc, + &so->distanceTypeLen, + &so->distanceTypeByVal); + + /* + * In fact, distance values need to be initialized only for by-ref types, + * because previous distance values are pfreed before writing new ones + * (see _bt_calc_current_dist()). + */ + so->state.currDistance = (Datum) 0; + so->state.markDistance = (Datum) 0; +} diff --git a/src/backend/access/nbtree/nbtvalidate.c b/src/backend/access/nbtree/nbtvalidate.c index e9d4cd60de3..3c91d74512f 100644 --- a/src/backend/access/nbtree/nbtvalidate.c +++ b/src/backend/access/nbtree/nbtvalidate.c @@ -28,6 +28,13 @@ #include "utils/regproc.h" #include "utils/syscache.h" +#define BTRequiredOperatorSet \ + ((1 << BTLessStrategyNumber) | \ + (1 << BTLessEqualStrategyNumber) | \ + (1 << BTEqualStrategyNumber) | \ + (1 << BTGreaterEqualStrategyNumber) | \ + (1 << BTGreaterStrategyNumber)) + /* * Validator for a btree opclass. @@ -142,6 +149,7 @@ btvalidate(Oid opclassoid) { HeapTuple oprtup = &oprlist->members[i]->tuple; Form_pg_amop oprform = (Form_pg_amop) GETSTRUCT(oprtup); + Oid op_rettype; /* Check that only allowed strategy numbers exist */ if (oprform->amopstrategy < 1 || @@ -156,20 +164,29 @@ btvalidate(Oid opclassoid) result = false; } - /* btree doesn't support ORDER BY operators */ - if (oprform->amoppurpose != AMOP_SEARCH || - OidIsValid(oprform->amopsortfamily)) + /* btree supports ORDER BY operators */ + if (oprform->amoppurpose != AMOP_SEARCH) { - ereport(INFO, - (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), - errmsg("operator family \"%s\" of access method %s contains invalid ORDER BY specification for operator %s", - opfamilyname, "btree", - format_operator(oprform->amopopr)))); - result = false; + /* ... and operator result must match the claimed btree opfamily */ + op_rettype = get_op_rettype(oprform->amopopr); + if (!opfamily_can_sort_type(oprform->amopsortfamily, op_rettype)) + { + ereport(INFO, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("operator family \"%s\" of access method %s contains invalid ORDER BY specification for operator %s", + opfamilyname, "btree", + format_operator(oprform->amopopr)))); + result = false; + } + } + else + { + /* Search operators must always return bool */ + op_rettype = BOOLOID; } /* Check operator signature --- same for all btree strategies */ - if (!check_amop_signature(oprform->amopopr, BOOLOID, + if (!check_amop_signature(oprform->amopopr, op_rettype, oprform->amoplefttype, oprform->amoprighttype)) { @@ -224,12 +241,8 @@ btvalidate(Oid opclassoid) * or support functions for this datatype pair. The sortsupport, * in_range, and equalimage functions are considered optional. */ - if (thisgroup->operatorset != - ((1 << BTLessStrategyNumber) | - (1 << BTLessEqualStrategyNumber) | - (1 << BTEqualStrategyNumber) | - (1 << BTGreaterEqualStrategyNumber) | - (1 << BTGreaterStrategyNumber))) + if ((thisgroup->operatorset & BTRequiredOperatorSet) != + BTRequiredOperatorSet) { ereport(INFO, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c index 9a1a7faac7a..1b6b2ff299d 100644 --- a/src/backend/partitioning/partprune.c +++ b/src/backend/partitioning/partprune.c @@ -1385,7 +1385,7 @@ gen_prune_steps_from_opexps(GeneratePruningStepsContext *context, { PartitionScheme part_scheme = context->rel->part_scheme; List *opsteps = NIL; - List *btree_clauses[BTMaxStrategyNumber + 1], + List *btree_clauses[BTMaxSearchStrategyNumber + 1], *hash_clauses[HTMaxStrategyNumber + 1]; int i; ListCell *lc; @@ -1497,7 +1497,7 @@ gen_prune_steps_from_opexps(GeneratePruningStepsContext *context, * combinations of expressions of different keys, which * get_steps_using_prefix takes care of for us. */ - for (strat = 1; strat <= BTMaxStrategyNumber; strat++) + for (strat = 1; strat <= BTMaxSearchStrategyNumber; strat++) { foreach(lc, btree_clauses[strat]) { diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index c60cecf722a..d2da1b964f2 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -682,7 +682,7 @@ BTreeTupleGetMaxHeapTID(IndexTuple itup) * The strategy numbers are chosen so that we can commute them by * subtraction, thus: */ -#define BTCommuteStrategyNumber(strat) (BTMaxStrategyNumber + 1 - (strat)) +#define BTCommuteStrategyNumber(strat) (BTMaxSearchStrategyNumber + 1 - (strat)) /* * When a new operator class is declared, we require that the user @@ -1064,6 +1064,12 @@ typedef struct BTScanStateData /* keep these last in struct for efficiency */ BTScanPosData currPos; /* current position data */ BTScanPosData markPos; /* marked position, if any */ + + /* KNN-search fields: */ + Datum currDistance; /* distance to the current item */ + Datum markDistance; /* distance to the marked item */ + bool currIsNull; /* current item is NULL */ + bool markIsNull; /* marked item is NULL */ } BTScanStateData; typedef BTScanStateData *BTScanState; @@ -1083,8 +1089,20 @@ typedef struct BTScanOpaqueData FmgrInfo *orderProcs; /* ORDER procs for required equality keys */ MemoryContext arrayContext; /* scan-lifespan context for array data */ - /* the state of tree scan */ + /* the state of main tree scan */ BTScanStateData state; + + /* kNN-search fields: */ + bool useBidirectionalKnnScan; /* use bidirectional kNN scan? */ + BTScanState forwardState; + BTScanState backwardState; /* optional scan state for kNN search */ + ScanDirection scanDirection; /* selected scan direction for + * unidirectional kNN scan */ + FmgrInfo distanceCmpProc; /* distance comparison procedure */ + int16 distanceTypeLen; /* distance typlen */ + bool distanceTypeByVal; /* distance typebyval */ + bool currRightIsNearest; /* current right item is nearest */ + bool markRightIsNearest; /* marked right item is nearest */ } BTScanOpaqueData; typedef BTScanOpaqueData *BTScanOpaque; @@ -1199,13 +1217,14 @@ extern bool btcanreturn(Relation index, int attno); /* * prototypes for internal functions in nbtree.c */ -extern bool _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno, +extern bool _bt_parallel_seize(IndexScanDesc scan, BTScanState state, BlockNumber *pageno, bool first); -extern void _bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page); -extern void _bt_parallel_done(IndexScanDesc scan); +extern void _bt_parallel_release(IndexScanDesc scan, BTScanState state, BlockNumber scan_page); +extern void _bt_parallel_done(IndexScanDesc scan, BTScanState state); extern void _bt_parallel_primscan_schedule(IndexScanDesc scan, BlockNumber prev_scan_page); + /* * prototypes for functions in nbtdedup.c */ @@ -1322,6 +1341,9 @@ extern void _bt_check_third_page(Relation rel, Relation heap, bool needheaptidspace, Page page, IndexTuple newtup); extern bool _bt_allequalimage(Relation rel, bool debugmessage); extern void _bt_allocate_tuple_workspaces(BTScanState state); +extern void _bt_init_distance_comparison(IndexScanDesc scan); +extern int _bt_init_knn_start_keys(IndexScanDesc scan, ScanKey *startKeys, + ScanKey bufKeys); /* * prototypes for functions in nbtvalidate.c diff --git a/src/include/access/stratnum.h b/src/include/access/stratnum.h index 8a47d3c9ec8..ccf2e0b9269 100644 --- a/src/include/access/stratnum.h +++ b/src/include/access/stratnum.h @@ -32,7 +32,12 @@ typedef uint16 StrategyNumber; #define BTGreaterEqualStrategyNumber 4 #define BTGreaterStrategyNumber 5 -#define BTMaxStrategyNumber 5 +#define BTMaxSearchStrategyNumber 5 /* number of B-tree search + * strategies */ + +#define BTNearestStrategyNumber 6 /* for ordering by <-> operator */ +#define BTMaxStrategyNumber 6 /* total numer of B-tree + * strategies */ /* * Strategy numbers for hash indexes. There's only one valid strategy for diff --git a/src/include/catalog/pg_amop.dat b/src/include/catalog/pg_amop.dat index d8a05214b11..805ae021e4c 100644 --- a/src/include/catalog/pg_amop.dat +++ b/src/include/catalog/pg_amop.dat @@ -30,6 +30,10 @@ { amopfamily => 'btree/integer_ops', amoplefttype => 'int2', amoprighttype => 'int2', amopstrategy => '5', amopopr => '>(int2,int2)', amopmethod => 'btree' }, +{ amopfamily => 'btree/integer_ops', amoplefttype => 'int2', + amoprighttype => 'int2', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(int2,int2)', amopmethod => 'btree', + amopsortfamily => 'btree/integer_ops' }, # crosstype operators int24 { amopfamily => 'btree/integer_ops', amoplefttype => 'int2', @@ -47,6 +51,10 @@ { amopfamily => 'btree/integer_ops', amoplefttype => 'int2', amoprighttype => 'int4', amopstrategy => '5', amopopr => '>(int2,int4)', amopmethod => 'btree' }, +{ amopfamily => 'btree/integer_ops', amoplefttype => 'int2', + amoprighttype => 'int4', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(int2,int4)', amopmethod => 'btree', + amopsortfamily => 'btree/integer_ops' }, # crosstype operators int28 { amopfamily => 'btree/integer_ops', amoplefttype => 'int2', @@ -64,6 +72,10 @@ { amopfamily => 'btree/integer_ops', amoplefttype => 'int2', amoprighttype => 'int8', amopstrategy => '5', amopopr => '>(int2,int8)', amopmethod => 'btree' }, +{ amopfamily => 'btree/integer_ops', amoplefttype => 'int2', + amoprighttype => 'int8', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(int2,int8)', amopmethod => 'btree', + amopsortfamily => 'btree/integer_ops' }, # default operators int4 { amopfamily => 'btree/integer_ops', amoplefttype => 'int4', @@ -81,6 +93,10 @@ { amopfamily => 'btree/integer_ops', amoplefttype => 'int4', amoprighttype => 'int4', amopstrategy => '5', amopopr => '>(int4,int4)', amopmethod => 'btree' }, +{ amopfamily => 'btree/integer_ops', amoplefttype => 'int4', + amoprighttype => 'int4', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(int4,int4)', amopmethod => 'btree', + amopsortfamily => 'btree/integer_ops' }, # crosstype operators int42 { amopfamily => 'btree/integer_ops', amoplefttype => 'int4', @@ -98,6 +114,10 @@ { amopfamily => 'btree/integer_ops', amoplefttype => 'int4', amoprighttype => 'int2', amopstrategy => '5', amopopr => '>(int4,int2)', amopmethod => 'btree' }, +{ amopfamily => 'btree/integer_ops', amoplefttype => 'int4', + amoprighttype => 'int2', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(int4,int2)', amopmethod => 'btree', + amopsortfamily => 'btree/integer_ops' }, # crosstype operators int48 { amopfamily => 'btree/integer_ops', amoplefttype => 'int4', @@ -115,6 +135,10 @@ { amopfamily => 'btree/integer_ops', amoplefttype => 'int4', amoprighttype => 'int8', amopstrategy => '5', amopopr => '>(int4,int8)', amopmethod => 'btree' }, +{ amopfamily => 'btree/integer_ops', amoplefttype => 'int4', + amoprighttype => 'int8', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(int4,int8)', amopmethod => 'btree', + amopsortfamily => 'btree/integer_ops' }, # default operators int8 { amopfamily => 'btree/integer_ops', amoplefttype => 'int8', @@ -132,6 +156,10 @@ { amopfamily => 'btree/integer_ops', amoplefttype => 'int8', amoprighttype => 'int8', amopstrategy => '5', amopopr => '>(int8,int8)', amopmethod => 'btree' }, +{ amopfamily => 'btree/integer_ops', amoplefttype => 'int8', + amoprighttype => 'int8', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(int8,int8)', amopmethod => 'btree', + amopsortfamily => 'btree/integer_ops' }, # crosstype operators int82 { amopfamily => 'btree/integer_ops', amoplefttype => 'int8', @@ -149,6 +177,10 @@ { amopfamily => 'btree/integer_ops', amoplefttype => 'int8', amoprighttype => 'int2', amopstrategy => '5', amopopr => '>(int8,int2)', amopmethod => 'btree' }, +{ amopfamily => 'btree/integer_ops', amoplefttype => 'int8', + amoprighttype => 'int2', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(int8,int2)', amopmethod => 'btree', + amopsortfamily => 'btree/integer_ops' }, # crosstype operators int84 { amopfamily => 'btree/integer_ops', amoplefttype => 'int8', @@ -166,6 +198,10 @@ { amopfamily => 'btree/integer_ops', amoplefttype => 'int8', amoprighttype => 'int4', amopstrategy => '5', amopopr => '>(int8,int4)', amopmethod => 'btree' }, +{ amopfamily => 'btree/integer_ops', amoplefttype => 'int8', + amoprighttype => 'int4', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(int8,int4)', amopmethod => 'btree', + amopsortfamily => 'btree/integer_ops' }, # btree oid_ops @@ -179,6 +215,10 @@ amopstrategy => '4', amopopr => '>=(oid,oid)', amopmethod => 'btree' }, { amopfamily => 'btree/oid_ops', amoplefttype => 'oid', amoprighttype => 'oid', amopstrategy => '5', amopopr => '>(oid,oid)', amopmethod => 'btree' }, +{ amopfamily => 'btree/oid_ops', amoplefttype => 'oid', + amoprighttype => 'oid', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(oid,oid)', amopmethod => 'btree', + amopsortfamily => 'btree/oid_ops' }, # btree xid8_ops @@ -247,6 +287,10 @@ { amopfamily => 'btree/float_ops', amoplefttype => 'float4', amoprighttype => 'float4', amopstrategy => '5', amopopr => '>(float4,float4)', amopmethod => 'btree' }, +{ amopfamily => 'btree/float_ops', amoplefttype => 'float4', + amoprighttype => 'float4', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(float4,float4)', amopmethod => 'btree', + amopsortfamily => 'btree/float_ops' }, # crosstype operators float48 { amopfamily => 'btree/float_ops', amoplefttype => 'float4', @@ -264,6 +308,10 @@ { amopfamily => 'btree/float_ops', amoplefttype => 'float4', amoprighttype => 'float8', amopstrategy => '5', amopopr => '>(float4,float8)', amopmethod => 'btree' }, +{ amopfamily => 'btree/float_ops', amoplefttype => 'float4', + amoprighttype => 'float8', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(float4,float8)', amopmethod => 'btree', + amopsortfamily => 'btree/float_ops' }, # default operators float8 { amopfamily => 'btree/float_ops', amoplefttype => 'float8', @@ -281,6 +329,10 @@ { amopfamily => 'btree/float_ops', amoplefttype => 'float8', amoprighttype => 'float8', amopstrategy => '5', amopopr => '>(float8,float8)', amopmethod => 'btree' }, +{ amopfamily => 'btree/float_ops', amoplefttype => 'float8', + amoprighttype => 'float8', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(float8,float8)', amopmethod => 'btree', + amopsortfamily => 'btree/float_ops' }, # crosstype operators float84 { amopfamily => 'btree/float_ops', amoplefttype => 'float8', @@ -298,6 +350,10 @@ { amopfamily => 'btree/float_ops', amoplefttype => 'float8', amoprighttype => 'float4', amopstrategy => '5', amopopr => '>(float8,float4)', amopmethod => 'btree' }, +{ amopfamily => 'btree/float_ops', amoplefttype => 'float8', + amoprighttype => 'float4', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(float8,float4)', amopmethod => 'btree', + amopsortfamily => 'btree/float_ops' }, # btree char_ops @@ -434,6 +490,10 @@ { amopfamily => 'btree/datetime_ops', amoplefttype => 'date', amoprighttype => 'date', amopstrategy => '5', amopopr => '>(date,date)', amopmethod => 'btree' }, +{ amopfamily => 'btree/datetime_ops', amoplefttype => 'date', + amoprighttype => 'date', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(date,date)', amopmethod => 'btree', + amopsortfamily => 'btree/integer_ops' }, # crosstype operators vs timestamp { amopfamily => 'btree/datetime_ops', amoplefttype => 'date', @@ -451,6 +511,10 @@ { amopfamily => 'btree/datetime_ops', amoplefttype => 'date', amoprighttype => 'timestamp', amopstrategy => '5', amopopr => '>(date,timestamp)', amopmethod => 'btree' }, +{ amopfamily => 'btree/datetime_ops', amoplefttype => 'date', + amoprighttype => 'timestamp', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(date,timestamp)', amopmethod => 'btree', + amopsortfamily => 'btree/interval_ops' }, # crosstype operators vs timestamptz { amopfamily => 'btree/datetime_ops', amoplefttype => 'date', @@ -468,6 +532,10 @@ { amopfamily => 'btree/datetime_ops', amoplefttype => 'date', amoprighttype => 'timestamptz', amopstrategy => '5', amopopr => '>(date,timestamptz)', amopmethod => 'btree' }, +{ amopfamily => 'btree/datetime_ops', amoplefttype => 'date', + amoprighttype => 'timestamptz', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(date,timestamptz)', amopmethod => 'btree', + amopsortfamily => 'btree/interval_ops' }, # default operators timestamp { amopfamily => 'btree/datetime_ops', amoplefttype => 'timestamp', @@ -485,6 +553,10 @@ { amopfamily => 'btree/datetime_ops', amoplefttype => 'timestamp', amoprighttype => 'timestamp', amopstrategy => '5', amopopr => '>(timestamp,timestamp)', amopmethod => 'btree' }, +{ amopfamily => 'btree/datetime_ops', amoplefttype => 'timestamp', + amoprighttype => 'timestamp', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(timestamp,timestamp)', amopmethod => 'btree', + amopsortfamily => 'btree/interval_ops' }, # crosstype operators vs date { amopfamily => 'btree/datetime_ops', amoplefttype => 'timestamp', @@ -502,6 +574,10 @@ { amopfamily => 'btree/datetime_ops', amoplefttype => 'timestamp', amoprighttype => 'date', amopstrategy => '5', amopopr => '>(timestamp,date)', amopmethod => 'btree' }, +{ amopfamily => 'btree/datetime_ops', amoplefttype => 'timestamp', + amoprighttype => 'date', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(timestamp,date)', amopmethod => 'btree', + amopsortfamily => 'btree/interval_ops' }, # crosstype operators vs timestamptz { amopfamily => 'btree/datetime_ops', amoplefttype => 'timestamp', @@ -519,6 +595,10 @@ { amopfamily => 'btree/datetime_ops', amoplefttype => 'timestamp', amoprighttype => 'timestamptz', amopstrategy => '5', amopopr => '>(timestamp,timestamptz)', amopmethod => 'btree' }, +{ amopfamily => 'btree/datetime_ops', amoplefttype => 'timestamp', + amoprighttype => 'timestamptz', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(timestamp,timestamptz)', amopmethod => 'btree', + amopsortfamily => 'btree/interval_ops' }, # default operators timestamptz { amopfamily => 'btree/datetime_ops', amoplefttype => 'timestamptz', @@ -536,6 +616,10 @@ { amopfamily => 'btree/datetime_ops', amoplefttype => 'timestamptz', amoprighttype => 'timestamptz', amopstrategy => '5', amopopr => '>(timestamptz,timestamptz)', amopmethod => 'btree' }, +{ amopfamily => 'btree/datetime_ops', amoplefttype => 'timestamptz', + amoprighttype => 'timestamptz', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(timestamptz,timestamptz)', amopmethod => 'btree', + amopsortfamily => 'btree/interval_ops' }, # crosstype operators vs date { amopfamily => 'btree/datetime_ops', amoplefttype => 'timestamptz', @@ -553,6 +637,10 @@ { amopfamily => 'btree/datetime_ops', amoplefttype => 'timestamptz', amoprighttype => 'date', amopstrategy => '5', amopopr => '>(timestamptz,date)', amopmethod => 'btree' }, +{ amopfamily => 'btree/datetime_ops', amoplefttype => 'timestamptz', + amoprighttype => 'date', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(timestamptz,date)', amopmethod => 'btree', + amopsortfamily => 'btree/interval_ops' }, # crosstype operators vs timestamp { amopfamily => 'btree/datetime_ops', amoplefttype => 'timestamptz', @@ -570,6 +658,10 @@ { amopfamily => 'btree/datetime_ops', amoplefttype => 'timestamptz', amoprighttype => 'timestamp', amopstrategy => '5', amopopr => '>(timestamptz,timestamp)', amopmethod => 'btree' }, +{ amopfamily => 'btree/datetime_ops', amoplefttype => 'timestamptz', + amoprighttype => 'timestamp', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(timestamptz,timestamp)', amopmethod => 'btree', + amopsortfamily => 'btree/interval_ops' }, # btree time_ops @@ -588,6 +680,10 @@ { amopfamily => 'btree/time_ops', amoplefttype => 'time', amoprighttype => 'time', amopstrategy => '5', amopopr => '>(time,time)', amopmethod => 'btree' }, +{ amopfamily => 'btree/time_ops', amoplefttype => 'time', + amoprighttype => 'time', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(time,time)', amopmethod => 'btree', + amopsortfamily => 'btree/interval_ops' }, # btree timetz_ops @@ -624,6 +720,10 @@ { amopfamily => 'btree/interval_ops', amoplefttype => 'interval', amoprighttype => 'interval', amopstrategy => '5', amopopr => '>(interval,interval)', amopmethod => 'btree' }, +{ amopfamily => 'btree/interval_ops', amoplefttype => 'interval', + amoprighttype => 'interval', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(interval,interval)', amopmethod => 'btree', + amopsortfamily => 'btree/interval_ops' }, # btree macaddr @@ -799,6 +899,10 @@ { amopfamily => 'btree/money_ops', amoplefttype => 'money', amoprighttype => 'money', amopstrategy => '5', amopopr => '>(money,money)', amopmethod => 'btree' }, +{ amopfamily => 'btree/money_ops', amoplefttype => 'money', + amoprighttype => 'money', amopstrategy => '6', amoppurpose => 'o', + amopopr => '<->(money,money)', amopmethod => 'btree', + amopsortfamily => 'btree/money_ops' }, # btree array_ops diff --git a/src/test/regress/expected/alter_generic.out b/src/test/regress/expected/alter_generic.out index ae54cb254f9..0b08c29bebb 100644 --- a/src/test/regress/expected/alter_generic.out +++ b/src/test/regress/expected/alter_generic.out @@ -355,10 +355,10 @@ ROLLBACK; CREATE OPERATOR FAMILY alt_opf4 USING btree; ALTER OPERATOR FAMILY alt_opf4 USING invalid_index_method ADD OPERATOR 1 < (int4, int2); -- invalid indexing_method ERROR: access method "invalid_index_method" does not exist -ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 6 < (int4, int2); -- operator number should be between 1 and 5 -ERROR: invalid operator number 6, must be between 1 and 5 -ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 0 < (int4, int2); -- operator number should be between 1 and 5 -ERROR: invalid operator number 0, must be between 1 and 5 +ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 7 < (int4, int2); -- operator number should be between 1 and 6 +ERROR: invalid operator number 7, must be between 1 and 6 +ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 0 < (int4, int2); -- operator number should be between 1 and 6 +ERROR: invalid operator number 0, must be between 1 and 6 ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 1 < ; -- operator without argument types ERROR: operator argument types must be specified in ALTER OPERATOR FAMILY ALTER OPERATOR FAMILY alt_opf4 USING btree ADD FUNCTION 0 btint42cmp(int4, int2); -- invalid options parsing function @@ -405,11 +405,12 @@ DROP OPERATOR FAMILY alt_opf8 USING btree; CREATE OPERATOR FAMILY alt_opf9 USING gist; ALTER OPERATOR FAMILY alt_opf9 USING gist ADD OPERATOR 1 < (int4, int4) FOR ORDER BY float_ops; DROP OPERATOR FAMILY alt_opf9 USING gist; --- Should fail. Ensure correct ordering methods in ALTER OPERATOR FAMILY ... ADD OPERATOR .. FOR ORDER BY +-- Should work. Ensure correct ordering methods in ALTER OPERATOR FAMILY ... ADD OPERATOR .. FOR ORDER BY +BEGIN TRANSACTION; CREATE OPERATOR FAMILY alt_opf10 USING btree; ALTER OPERATOR FAMILY alt_opf10 USING btree ADD OPERATOR 1 < (int4, int4) FOR ORDER BY float_ops; -ERROR: access method "btree" does not support ordering operators DROP OPERATOR FAMILY alt_opf10 USING btree; +ROLLBACK; -- Should work. Textbook case of ALTER OPERATOR FAMILY ... ADD OPERATOR with FOR ORDER BY CREATE OPERATOR FAMILY alt_opf11 USING gist; ALTER OPERATOR FAMILY alt_opf11 USING gist ADD OPERATOR 1 < (int4, int4) FOR ORDER BY float_ops; diff --git a/src/test/regress/expected/amutils.out b/src/test/regress/expected/amutils.out index 7ab6113c619..1b39abccbf8 100644 --- a/src/test/regress/expected/amutils.out +++ b/src/test/regress/expected/amutils.out @@ -24,7 +24,7 @@ select prop, nulls_first | | | f nulls_last | | | t orderable | | | t - distance_orderable | | | f + distance_orderable | | | t returnable | | | t search_array | | | t search_nulls | | | t @@ -100,7 +100,7 @@ select prop, nulls_first | f | f | f | f | f | f | f nulls_last | t | f | f | f | f | f | f orderable | t | f | f | f | f | f | f - distance_orderable | f | f | t | f | t | f | f + distance_orderable | t | f | t | f | t | f | f returnable | t | f | f | t | t | f | f search_array | t | f | f | f | f | f | f search_nulls | t | f | t | t | t | f | t @@ -231,7 +231,7 @@ select col, prop, pg_index_column_has_property(o, col, prop) 1 | desc | f 1 | nulls_first | f 1 | nulls_last | t - 1 | distance_orderable | f + 1 | distance_orderable | t 1 | returnable | t 1 | bogus | 2 | orderable | f diff --git a/src/test/regress/expected/btree_index.out b/src/test/regress/expected/btree_index.out index 510646cbce7..66f94af1211 100644 --- a/src/test/regress/expected/btree_index.out +++ b/src/test/regress/expected/btree_index.out @@ -486,3 +486,957 @@ ALTER INDEX btree_part_idx ALTER COLUMN id SET (n_distinct=100); ERROR: ALTER action ALTER COLUMN ... SET cannot be performed on relation "btree_part_idx" DETAIL: This operation is not supported for partitioned indexes. DROP TABLE btree_part; +--- +--- Test B-tree distance ordering +--- +SET enable_bitmapscan = OFF; +-- temporarily disable bt_i4_index index on bt_i4_heap(seqno) +UPDATE pg_index SET indisvalid = false WHERE indexrelid = 'bt_i4_index'::regclass; +CREATE INDEX bt_i4_heap_random_idx ON bt_i4_heap USING btree(random, seqno); +-- test unsupported orderings (by non-first index attribute or by more than one order keys) +EXPLAIN (COSTS OFF) SELECT * FROM bt_i4_heap ORDER BY seqno <-> 0; + QUERY PLAN +----------------------------------------------------------- + Index Only Scan using bt_i4_heap_random_idx on bt_i4_heap + Order By: (seqno <-> 0) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM bt_i4_heap ORDER BY random <-> 0, seqno <-> 0; + QUERY PLAN +----------------------------------------------------------- + Index Only Scan using bt_i4_heap_random_idx on bt_i4_heap + Order By: ((random <-> 0) AND (seqno <-> 0)) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM bt_i4_heap ORDER BY random <-> 0, random <-> 1; + QUERY PLAN +----------------------------------------------------------- + Index Only Scan using bt_i4_heap_random_idx on bt_i4_heap + Order By: ((random <-> 0) AND (random <-> 1)) +(2 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM bt_i4_heap +WHERE random > 1000000 AND (random, seqno) < (6000000, 0) +ORDER BY random <-> 4000000; + QUERY PLAN +------------------------------------------------------------------------------- + Index Only Scan using bt_i4_heap_random_idx on bt_i4_heap + Index Cond: ((random > 1000000) AND (ROW(random, seqno) < ROW(6000000, 0))) + Order By: (random <-> 4000000) +(3 rows) + +SELECT * FROM bt_i4_heap +WHERE random > 1000000 AND (random, seqno) < (6000000, 0) +ORDER BY random <-> 4000000; + seqno | random +-------+--------- + 6448 | 4157193 + 9004 | 3783884 + 4408 | 4488889 + 8391 | 4825069 + 8984 | 3148979 + 1829 | 3053937 + 6262 | 3013326 + 5380 | 3000193 + 9142 | 2847247 + 8411 | 2809541 + 2859 | 5224694 + 6320 | 5257716 + 2126 | 2648497 + 8729 | 5450460 + 6862 | 5556001 + 1836 | 5593978 + 2681 | 2321799 + 2893 | 1919087 + 210 | 1809552 +(19 rows) + +SELECT * FROM bt_i4_heap +WHERE random > 1000000 AND (random, seqno) < (6000000, 0) +ORDER BY random <-> 10000000; + seqno | random +-------+--------- + 1836 | 5593978 + 6862 | 5556001 + 8729 | 5450460 + 6320 | 5257716 + 2859 | 5224694 + 8391 | 4825069 + 4408 | 4488889 + 6448 | 4157193 + 9004 | 3783884 + 8984 | 3148979 + 1829 | 3053937 + 6262 | 3013326 + 5380 | 3000193 + 9142 | 2847247 + 8411 | 2809541 + 2126 | 2648497 + 2681 | 2321799 + 2893 | 1919087 + 210 | 1809552 +(19 rows) + +SELECT * FROM bt_i4_heap +WHERE random > 1000000 AND (random, seqno) < (6000000, 0) +ORDER BY random <-> 0; + seqno | random +-------+--------- + 210 | 1809552 + 2893 | 1919087 + 2681 | 2321799 + 2126 | 2648497 + 8411 | 2809541 + 9142 | 2847247 + 5380 | 3000193 + 6262 | 3013326 + 1829 | 3053937 + 8984 | 3148979 + 9004 | 3783884 + 6448 | 4157193 + 4408 | 4488889 + 8391 | 4825069 + 2859 | 5224694 + 6320 | 5257716 + 8729 | 5450460 + 6862 | 5556001 + 1836 | 5593978 +(19 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM bt_i4_heap +WHERE + random > 1000000 AND (random, seqno) < (6000000, 0) AND + random IN (1809552, 1919087, 2321799, 2648497, 3000193, 3013326, 4157193, 4488889, 5257716, 5593978, NULL) +ORDER BY random <-> 3000000; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Index Only Scan using bt_i4_heap_random_idx on bt_i4_heap + Index Cond: ((random > 1000000) AND (ROW(random, seqno) < ROW(6000000, 0)) AND (random = ANY ('{1809552,1919087,2321799,2648497,3000193,3013326,4157193,4488889,5257716,5593978,NULL}'::integer[]))) + Order By: (random <-> 3000000) +(3 rows) + +SELECT * FROM bt_i4_heap +WHERE + random > 1000000 AND (random, seqno) < (6000000, 0) AND + random IN (1809552, 1919087, 2321799, 2648497, 3000193, 3013326, 4157193, 4488889, 5257716, 5593978, NULL) +ORDER BY random <-> 3000000; + seqno | random +-------+--------- + 5380 | 3000193 + 6262 | 3013326 + 2126 | 2648497 + 2681 | 2321799 + 2893 | 1919087 + 6448 | 4157193 + 210 | 1809552 + 4408 | 4488889 + 6320 | 5257716 + 1836 | 5593978 +(10 rows) + +DROP INDEX bt_i4_heap_random_idx; +CREATE INDEX bt_i4_heap_random_idx ON bt_i4_heap USING btree(random DESC, seqno); +SELECT * FROM bt_i4_heap +WHERE random > 1000000 AND (random, seqno) < (6000000, 0) +ORDER BY random <-> 4000000; + seqno | random +-------+--------- + 6448 | 4157193 + 9004 | 3783884 + 4408 | 4488889 + 8391 | 4825069 + 8984 | 3148979 + 1829 | 3053937 + 6262 | 3013326 + 5380 | 3000193 + 9142 | 2847247 + 8411 | 2809541 + 2859 | 5224694 + 6320 | 5257716 + 2126 | 2648497 + 8729 | 5450460 + 6862 | 5556001 + 1836 | 5593978 + 2681 | 2321799 + 2893 | 1919087 + 210 | 1809552 +(19 rows) + +SELECT * FROM bt_i4_heap +WHERE random > 1000000 AND (random, seqno) < (6000000, 0) +ORDER BY random <-> 10000000; + seqno | random +-------+--------- + 1836 | 5593978 + 6862 | 5556001 + 8729 | 5450460 + 6320 | 5257716 + 2859 | 5224694 + 8391 | 4825069 + 4408 | 4488889 + 6448 | 4157193 + 9004 | 3783884 + 8984 | 3148979 + 1829 | 3053937 + 6262 | 3013326 + 5380 | 3000193 + 9142 | 2847247 + 8411 | 2809541 + 2126 | 2648497 + 2681 | 2321799 + 2893 | 1919087 + 210 | 1809552 +(19 rows) + +SELECT * FROM bt_i4_heap +WHERE random > 1000000 AND (random, seqno) < (6000000, 0) +ORDER BY random <-> 0; + seqno | random +-------+--------- + 210 | 1809552 + 2893 | 1919087 + 2681 | 2321799 + 2126 | 2648497 + 8411 | 2809541 + 9142 | 2847247 + 5380 | 3000193 + 6262 | 3013326 + 1829 | 3053937 + 8984 | 3148979 + 9004 | 3783884 + 6448 | 4157193 + 4408 | 4488889 + 8391 | 4825069 + 2859 | 5224694 + 6320 | 5257716 + 8729 | 5450460 + 6862 | 5556001 + 1836 | 5593978 +(19 rows) + +DROP INDEX bt_i4_heap_random_idx; +-- test parallel KNN scan +-- Serializable isolation would disable parallel query, so explicitly use an +-- arbitrary other level. +BEGIN ISOLATION LEVEL REPEATABLE READ; +SET parallel_setup_cost = 0; +SET parallel_tuple_cost = 0; +SET min_parallel_table_scan_size = 0; +SET max_parallel_workers = 4; +SET max_parallel_workers_per_gather = 4; +SET cpu_operator_cost = 0; +RESET enable_indexscan; +\set bt_knn_row_count 100000 +CREATE TABLE bt_knn_test AS SELECT i * 10 AS i FROM generate_series(1, :bt_knn_row_count) i; +CREATE INDEX bt_knn_test_idx ON bt_knn_test (i); +ALTER TABLE bt_knn_test SET (parallel_workers = 4); +ANALYZE bt_knn_test; +-- set the point inside the range +\set bt_knn_point (4 * :bt_knn_row_count + 3) +CREATE TABLE bt_knn_test2 AS + SELECT row_number() OVER (ORDER BY i * 10 <-> :bt_knn_point) AS n, i * 10 AS i + FROM generate_series(1, :bt_knn_row_count) i; +SET enable_sort = OFF; +EXPLAIN (COSTS OFF) +WITH bt_knn_test1 AS ( + SELECT row_number() OVER (ORDER BY i <-> :bt_knn_point) AS n, i FROM bt_knn_test +) +SELECT * FROM bt_knn_test1 t1 JOIN bt_knn_test2 t2 USING (n) WHERE t1.i <> t2.i; + QUERY PLAN +--------------------------------------------------------------------------------- + Hash Join + Hash Cond: ((row_number() OVER (?)) = t2.n) + Join Filter: (bt_knn_test.i <> t2.i) + -> WindowAgg + -> Gather Merge + Workers Planned: 4 + -> Parallel Index Only Scan using bt_knn_test_idx on bt_knn_test + Order By: (i <-> 400003) + -> Hash + -> Gather + Workers Planned: 4 + -> Parallel Seq Scan on bt_knn_test2 t2 +(12 rows) + +WITH bt_knn_test1 AS ( + SELECT row_number() OVER (ORDER BY i <-> :bt_knn_point) AS n, i FROM bt_knn_test +) +SELECT * FROM bt_knn_test1 t1 JOIN bt_knn_test2 t2 USING (n) WHERE t1.i <> t2.i; + n | i | i +---+---+--- +(0 rows) + +RESET enable_sort; +DROP TABLE bt_knn_test2; +-- set the point to the right of the range +\set bt_knn_point (11 * :bt_knn_row_count) +CREATE TABLE bt_knn_test2 AS + SELECT row_number() OVER (ORDER BY i * 10 <-> :bt_knn_point) AS n, i * 10 AS i + FROM generate_series(1, :bt_knn_row_count) i; +SET enable_sort = OFF; +EXPLAIN (COSTS OFF) +WITH bt_knn_test1 AS ( + SELECT row_number() OVER (ORDER BY i <-> :bt_knn_point) AS n, i FROM bt_knn_test +) +SELECT * FROM bt_knn_test1 t1 JOIN bt_knn_test2 t2 USING (n) WHERE t1.i <> t2.i; + QUERY PLAN +--------------------------------------------------------------------------------- + Hash Join + Hash Cond: ((row_number() OVER (?)) = t2.n) + Join Filter: (bt_knn_test.i <> t2.i) + -> WindowAgg + -> Gather Merge + Workers Planned: 4 + -> Parallel Index Only Scan using bt_knn_test_idx on bt_knn_test + Order By: (i <-> 1100000) + -> Hash + -> Gather + Workers Planned: 4 + -> Parallel Seq Scan on bt_knn_test2 t2 +(12 rows) + +WITH bt_knn_test1 AS ( + SELECT row_number() OVER (ORDER BY i <-> :bt_knn_point) AS n, i FROM bt_knn_test +) +SELECT * FROM bt_knn_test1 t1 JOIN bt_knn_test2 t2 USING (n) WHERE t1.i <> t2.i; + n | i | i +---+---+--- +(0 rows) + +RESET enable_sort; +DROP TABLE bt_knn_test2; +-- set the point to the left of the range +\set bt_knn_point (-:bt_knn_row_count) +CREATE TABLE bt_knn_test2 AS + SELECT row_number() OVER (ORDER BY i * 10 <-> :bt_knn_point) AS n, i * 10 AS i + FROM generate_series(1, :bt_knn_row_count) i; +SET enable_sort = OFF; +EXPLAIN (COSTS OFF) +WITH bt_knn_test1 AS ( + SELECT row_number() OVER (ORDER BY i <-> :bt_knn_point) AS n, i FROM bt_knn_test +) +SELECT * FROM bt_knn_test1 t1 JOIN bt_knn_test2 t2 USING (n) WHERE t1.i <> t2.i; + QUERY PLAN +--------------------------------------------------------------------------------- + Hash Join + Hash Cond: ((row_number() OVER (?)) = t2.n) + Join Filter: (bt_knn_test.i <> t2.i) + -> WindowAgg + -> Gather Merge + Workers Planned: 4 + -> Parallel Index Only Scan using bt_knn_test_idx on bt_knn_test + Order By: (i <-> '-100000'::integer) + -> Hash + -> Gather + Workers Planned: 4 + -> Parallel Seq Scan on bt_knn_test2 t2 +(12 rows) + +WITH bt_knn_test1 AS ( + SELECT row_number() OVER (ORDER BY i <-> :bt_knn_point) AS n, i FROM bt_knn_test +) +SELECT * FROM bt_knn_test1 t1 JOIN bt_knn_test2 t2 USING (n) WHERE t1.i <> t2.i; + n | i | i +---+---+--- +(0 rows) + +RESET enable_sort; +DROP TABLE bt_knn_test; +\set knn_row_count 30000 +CREATE TABLE bt_knn_test AS SELECT i FROM generate_series(1, 10) i, generate_series(1, :knn_row_count) j; +CREATE INDEX bt_knn_test_idx ON bt_knn_test (i); +ALTER TABLE bt_knn_test SET (parallel_workers = 4); +ANALYZE bt_knn_test; +SET enable_sort = OFF; +EXPLAIN (COSTS OFF) +WITH +t1 AS ( + SELECT row_number() OVER () AS n, i + FROM bt_knn_test + WHERE i IN (3, 4, 7, 8, 2) + ORDER BY i <-> 4 +), +t2 AS ( + SELECT i * :knn_row_count + j AS n, (ARRAY[4, 3, 2, 7, 8])[i + 1] AS i + FROM generate_series(0, 4) i, generate_series(1, :knn_row_count) j +) +SELECT * FROM t1 JOIN t2 USING (n) WHERE t1.i <> t2.i; + QUERY PLAN +--------------------------------------------------------------------------------- + Hash Join + Hash Cond: ((row_number() OVER (?)) = ((i.i * 30000) + j.j)) + Join Filter: (bt_knn_test.i <> ('{4,3,2,7,8}'::integer[])[(i.i + 1)]) + -> WindowAgg + -> Gather Merge + Workers Planned: 4 + -> Parallel Index Only Scan using bt_knn_test_idx on bt_knn_test + Index Cond: (i = ANY ('{3,4,7,8,2}'::integer[])) + Order By: (i <-> 4) + -> Hash + -> Nested Loop + -> Function Scan on generate_series i + -> Function Scan on generate_series j +(13 rows) + +WITH +t1 AS ( + SELECT row_number() OVER () AS n, i + FROM bt_knn_test + WHERE i IN (3, 4, 7, 8, 2) + ORDER BY i <-> 4 +), +t2 AS ( + SELECT i * :knn_row_count + j AS n, (ARRAY[4, 3, 2, 7, 8])[i + 1] AS i + FROM generate_series(0, 4) i, generate_series(1, :knn_row_count) j +) +SELECT * FROM t1 JOIN t2 USING (n) WHERE t1.i <> t2.i; + n | i | i +---+---+--- +(0 rows) + +RESET enable_sort; +RESET parallel_setup_cost; +RESET parallel_tuple_cost; +RESET min_parallel_table_scan_size; +RESET max_parallel_workers; +RESET max_parallel_workers_per_gather; +RESET cpu_operator_cost; +ROLLBACK; +-- enable bt_i4_index index on bt_i4_heap(seqno) +UPDATE pg_index SET indisvalid = true WHERE indexrelid = 'bt_i4_index'::regclass; +CREATE TABLE tenk3 AS SELECT thousand, tenthous FROM tenk1; +INSERT INTO tenk3 VALUES (NULL, 1), (NULL, 2), (NULL, 3); +-- Test distance ordering by ASC index +CREATE INDEX tenk3_idx ON tenk3 USING btree(thousand, tenthous); +EXPLAIN (COSTS OFF) +SELECT thousand, tenthous FROM tenk3 +WHERE (thousand, tenthous) >= (997, 5000) +ORDER BY thousand <-> 998; + QUERY PLAN +----------------------------------------------------------- + Index Only Scan using tenk3_idx on tenk3 + Index Cond: (ROW(thousand, tenthous) >= ROW(997, 5000)) + Order By: (thousand <-> 998) +(3 rows) + +SELECT thousand, tenthous FROM tenk3 +WHERE (thousand, tenthous) >= (997, 5000) +ORDER BY thousand <-> 998; + thousand | tenthous +----------+---------- + 998 | 998 + 998 | 1998 + 998 | 2998 + 998 | 3998 + 998 | 4998 + 998 | 5998 + 998 | 6998 + 998 | 7998 + 998 | 8998 + 998 | 9998 + 999 | 999 + 999 | 1999 + 999 | 2999 + 999 | 3999 + 999 | 4999 + 999 | 5999 + 999 | 6999 + 999 | 7999 + 999 | 8999 + 999 | 9999 + 997 | 9997 + 997 | 8997 + 997 | 7997 + 997 | 6997 + 997 | 5997 +(25 rows) + +SELECT thousand, tenthous FROM tenk3 +WHERE (thousand, tenthous) >= (997, 5000) +ORDER BY thousand <-> 0; + thousand | tenthous +----------+---------- + 997 | 5997 + 997 | 6997 + 997 | 7997 + 997 | 8997 + 997 | 9997 + 998 | 998 + 998 | 1998 + 998 | 2998 + 998 | 3998 + 998 | 4998 + 998 | 5998 + 998 | 6998 + 998 | 7998 + 998 | 8998 + 998 | 9998 + 999 | 999 + 999 | 1999 + 999 | 2999 + 999 | 3999 + 999 | 4999 + 999 | 5999 + 999 | 6999 + 999 | 7999 + 999 | 8999 + 999 | 9999 +(25 rows) + +SELECT thousand, tenthous FROM tenk3 +WHERE (thousand, tenthous) >= (997, 5000) AND thousand < 1000 +ORDER BY thousand <-> 10000; + thousand | tenthous +----------+---------- + 999 | 9999 + 999 | 8999 + 999 | 7999 + 999 | 6999 + 999 | 5999 + 999 | 4999 + 999 | 3999 + 999 | 2999 + 999 | 1999 + 999 | 999 + 998 | 9998 + 998 | 8998 + 998 | 7998 + 998 | 6998 + 998 | 5998 + 998 | 4998 + 998 | 3998 + 998 | 2998 + 998 | 1998 + 998 | 998 + 997 | 9997 + 997 | 8997 + 997 | 7997 + 997 | 6997 + 997 | 5997 +(25 rows) + +SELECT thousand, tenthous FROM tenk3 +ORDER BY thousand <-> 500 +OFFSET 9970; + thousand | tenthous +----------+---------- + 999 | 999 + 999 | 1999 + 999 | 2999 + 999 | 3999 + 999 | 4999 + 999 | 5999 + 999 | 6999 + 999 | 7999 + 999 | 8999 + 999 | 9999 + 1 | 9001 + 1 | 8001 + 1 | 7001 + 1 | 6001 + 1 | 5001 + 1 | 4001 + 1 | 3001 + 1 | 2001 + 1 | 1001 + 1 | 1 + 0 | 9000 + 0 | 8000 + 0 | 7000 + 0 | 6000 + 0 | 5000 + 0 | 4000 + 0 | 3000 + 0 | 2000 + 0 | 1000 + 0 | 0 + | 1 + | 2 + | 3 +(33 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM tenk3 +WHERE thousand > 100 AND thousand < 800 AND + thousand = ANY(ARRAY[0, 123, 234, 345, 456, 678, 901, NULL]::int2[]) +ORDER BY thousand <-> 300::int8; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------- + Index Only Scan using tenk3_idx on tenk3 + Index Cond: ((thousand > 100) AND (thousand < 800) AND (thousand = ANY ('{0,123,234,345,456,678,901,NULL}'::smallint[]))) + Order By: (thousand <-> '300'::bigint) +(3 rows) + +SELECT * FROM tenk3 +WHERE thousand > 100 AND thousand < 800 AND + thousand = ANY(ARRAY[0, 123, 234, 345, 456, 678, 901, NULL]::int2[]) +ORDER BY thousand <-> 300::int8; + thousand | tenthous +----------+---------- + 345 | 345 + 345 | 1345 + 345 | 2345 + 345 | 3345 + 345 | 4345 + 345 | 5345 + 345 | 6345 + 345 | 7345 + 345 | 8345 + 345 | 9345 + 234 | 234 + 234 | 1234 + 234 | 2234 + 234 | 3234 + 234 | 4234 + 234 | 5234 + 234 | 6234 + 234 | 7234 + 234 | 8234 + 234 | 9234 + 456 | 456 + 456 | 1456 + 456 | 2456 + 456 | 3456 + 456 | 4456 + 456 | 5456 + 456 | 6456 + 456 | 7456 + 456 | 8456 + 456 | 9456 + 123 | 123 + 123 | 1123 + 123 | 2123 + 123 | 3123 + 123 | 4123 + 123 | 5123 + 123 | 6123 + 123 | 7123 + 123 | 8123 + 123 | 9123 + 678 | 678 + 678 | 1678 + 678 | 2678 + 678 | 3678 + 678 | 4678 + 678 | 5678 + 678 | 6678 + 678 | 7678 + 678 | 8678 + 678 | 9678 +(50 rows) + +DROP INDEX tenk3_idx; +-- Test distance ordering by DESC index +CREATE INDEX tenk3_idx ON tenk3 USING btree(thousand DESC, tenthous); +SELECT thousand, tenthous FROM tenk3 +WHERE (thousand, tenthous) >= (997, 5000) +ORDER BY thousand <-> 998; + thousand | tenthous +----------+---------- + 998 | 998 + 998 | 1998 + 998 | 2998 + 998 | 3998 + 998 | 4998 + 998 | 5998 + 998 | 6998 + 998 | 7998 + 998 | 8998 + 998 | 9998 + 997 | 5997 + 997 | 6997 + 997 | 7997 + 997 | 8997 + 997 | 9997 + 999 | 9999 + 999 | 8999 + 999 | 7999 + 999 | 6999 + 999 | 5999 + 999 | 4999 + 999 | 3999 + 999 | 2999 + 999 | 1999 + 999 | 999 +(25 rows) + +SELECT thousand, tenthous FROM tenk3 +WHERE (thousand, tenthous) >= (997, 5000) +ORDER BY thousand <-> 0; + thousand | tenthous +----------+---------- + 997 | 9997 + 997 | 8997 + 997 | 7997 + 997 | 6997 + 997 | 5997 + 998 | 9998 + 998 | 8998 + 998 | 7998 + 998 | 6998 + 998 | 5998 + 998 | 4998 + 998 | 3998 + 998 | 2998 + 998 | 1998 + 998 | 998 + 999 | 9999 + 999 | 8999 + 999 | 7999 + 999 | 6999 + 999 | 5999 + 999 | 4999 + 999 | 3999 + 999 | 2999 + 999 | 1999 + 999 | 999 +(25 rows) + +SELECT thousand, tenthous FROM tenk3 +WHERE (thousand, tenthous) >= (997, 5000) AND thousand < 1000 +ORDER BY thousand <-> 10000; + thousand | tenthous +----------+---------- + 999 | 999 + 999 | 1999 + 999 | 2999 + 999 | 3999 + 999 | 4999 + 999 | 5999 + 999 | 6999 + 999 | 7999 + 999 | 8999 + 999 | 9999 + 998 | 998 + 998 | 1998 + 998 | 2998 + 998 | 3998 + 998 | 4998 + 998 | 5998 + 998 | 6998 + 998 | 7998 + 998 | 8998 + 998 | 9998 + 997 | 5997 + 997 | 6997 + 997 | 7997 + 997 | 8997 + 997 | 9997 +(25 rows) + +SELECT thousand, tenthous FROM tenk3 +ORDER BY thousand <-> 500 +OFFSET 9970; + thousand | tenthous +----------+---------- + 1 | 1 + 1 | 1001 + 1 | 2001 + 1 | 3001 + 1 | 4001 + 1 | 5001 + 1 | 6001 + 1 | 7001 + 1 | 8001 + 1 | 9001 + 999 | 9999 + 999 | 8999 + 999 | 7999 + 999 | 6999 + 999 | 5999 + 999 | 4999 + 999 | 3999 + 999 | 2999 + 999 | 1999 + 999 | 999 + 0 | 0 + 0 | 1000 + 0 | 2000 + 0 | 3000 + 0 | 4000 + 0 | 5000 + 0 | 6000 + 0 | 7000 + 0 | 8000 + 0 | 9000 + | 3 + | 2 + | 1 +(33 rows) + +DROP INDEX tenk3_idx; +DROP TABLE tenk3; +-- Test distance ordering on by-ref types +CREATE TABLE knn_btree_ts (ts timestamp); +INSERT INTO knn_btree_ts +SELECT timestamp '2017-05-03 00:00:00' + tenthous * interval '1 hour' +FROM tenk1; +CREATE INDEX knn_btree_ts_idx ON knn_btree_ts USING btree(ts); +SELECT ts, ts <-> timestamp '2017-05-01 00:00:00' FROM knn_btree_ts ORDER BY 2 LIMIT 20; + ts | ?column? +--------------------------+------------------- + Wed May 03 00:00:00 2017 | @ 2 days + Wed May 03 01:00:00 2017 | @ 2 days 1 hour + Wed May 03 02:00:00 2017 | @ 2 days 2 hours + Wed May 03 03:00:00 2017 | @ 2 days 3 hours + Wed May 03 04:00:00 2017 | @ 2 days 4 hours + Wed May 03 05:00:00 2017 | @ 2 days 5 hours + Wed May 03 06:00:00 2017 | @ 2 days 6 hours + Wed May 03 07:00:00 2017 | @ 2 days 7 hours + Wed May 03 08:00:00 2017 | @ 2 days 8 hours + Wed May 03 09:00:00 2017 | @ 2 days 9 hours + Wed May 03 10:00:00 2017 | @ 2 days 10 hours + Wed May 03 11:00:00 2017 | @ 2 days 11 hours + Wed May 03 12:00:00 2017 | @ 2 days 12 hours + Wed May 03 13:00:00 2017 | @ 2 days 13 hours + Wed May 03 14:00:00 2017 | @ 2 days 14 hours + Wed May 03 15:00:00 2017 | @ 2 days 15 hours + Wed May 03 16:00:00 2017 | @ 2 days 16 hours + Wed May 03 17:00:00 2017 | @ 2 days 17 hours + Wed May 03 18:00:00 2017 | @ 2 days 18 hours + Wed May 03 19:00:00 2017 | @ 2 days 19 hours +(20 rows) + +SELECT ts, ts <-> timestamp '2018-01-01 00:00:00' FROM knn_btree_ts ORDER BY 2 LIMIT 20; + ts | ?column? +--------------------------+------------ + Mon Jan 01 00:00:00 2018 | @ 0 + Mon Jan 01 01:00:00 2018 | @ 1 hour + Sun Dec 31 23:00:00 2017 | @ 1 hour + Mon Jan 01 02:00:00 2018 | @ 2 hours + Sun Dec 31 22:00:00 2017 | @ 2 hours + Mon Jan 01 03:00:00 2018 | @ 3 hours + Sun Dec 31 21:00:00 2017 | @ 3 hours + Mon Jan 01 04:00:00 2018 | @ 4 hours + Sun Dec 31 20:00:00 2017 | @ 4 hours + Mon Jan 01 05:00:00 2018 | @ 5 hours + Sun Dec 31 19:00:00 2017 | @ 5 hours + Mon Jan 01 06:00:00 2018 | @ 6 hours + Sun Dec 31 18:00:00 2017 | @ 6 hours + Mon Jan 01 07:00:00 2018 | @ 7 hours + Sun Dec 31 17:00:00 2017 | @ 7 hours + Mon Jan 01 08:00:00 2018 | @ 8 hours + Sun Dec 31 16:00:00 2017 | @ 8 hours + Mon Jan 01 09:00:00 2018 | @ 9 hours + Sun Dec 31 15:00:00 2017 | @ 9 hours + Mon Jan 01 10:00:00 2018 | @ 10 hours +(20 rows) + +DROP TABLE knn_btree_ts; +RESET enable_bitmapscan; +-- Test backward kNN scan +SET enable_sort = OFF; +EXPLAIN (COSTS OFF) SELECT thousand, tenthous FROM tenk1 ORDER BY thousand <-> 510; + QUERY PLAN +----------------------------------------------------- + Index Only Scan using tenk1_thous_tenthous on tenk1 + Order By: (thousand <-> 510) +(2 rows) + +BEGIN work; +DECLARE knn SCROLL CURSOR FOR +SELECT thousand, tenthous FROM tenk1 ORDER BY thousand <-> 510; +FETCH LAST FROM knn; + thousand | tenthous +----------+---------- + 0 | 0 +(1 row) + +FETCH BACKWARD 15 FROM knn; + thousand | tenthous +----------+---------- + 0 | 1000 + 0 | 2000 + 0 | 3000 + 0 | 4000 + 0 | 5000 + 0 | 6000 + 0 | 7000 + 0 | 8000 + 0 | 9000 + 1 | 1 + 1 | 1001 + 1 | 2001 + 1 | 3001 + 1 | 4001 + 1 | 5001 +(15 rows) + +FETCH RELATIVE -200 FROM knn; + thousand | tenthous +----------+---------- + 21 | 5021 +(1 row) + +FETCH BACKWARD 20 FROM knn; + thousand | tenthous +----------+---------- + 21 | 6021 + 21 | 7021 + 21 | 8021 + 21 | 9021 + 999 | 9999 + 999 | 8999 + 999 | 7999 + 999 | 6999 + 999 | 5999 + 999 | 4999 + 999 | 3999 + 999 | 2999 + 999 | 1999 + 999 | 999 + 22 | 22 + 22 | 1022 + 22 | 2022 + 22 | 3022 + 22 | 4022 + 22 | 5022 +(20 rows) + +FETCH FIRST FROM knn; + thousand | tenthous +----------+---------- + 510 | 510 +(1 row) + +FETCH LAST FROM knn; + thousand | tenthous +----------+---------- + 0 | 0 +(1 row) + +FETCH RELATIVE -215 FROM knn; + thousand | tenthous +----------+---------- + 21 | 5021 +(1 row) + +FETCH BACKWARD 20 FROM knn; + thousand | tenthous +----------+---------- + 21 | 6021 + 21 | 7021 + 21 | 8021 + 21 | 9021 + 999 | 9999 + 999 | 8999 + 999 | 7999 + 999 | 6999 + 999 | 5999 + 999 | 4999 + 999 | 3999 + 999 | 2999 + 999 | 1999 + 999 | 999 + 22 | 22 + 22 | 1022 + 22 | 2022 + 22 | 3022 + 22 | 4022 + 22 | 5022 +(20 rows) + +ROLLBACK work; +RESET enable_sort; diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 9d047b21b88..4a512417927 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -1432,6 +1432,8 @@ WHERE o1.oprnegate = o2.oid AND p1.oid = o1.oprcode AND p2.oid = o2.oprcode AND -- Btree comparison operators' functions should have the same volatility -- and leakproofness markings as the associated comparison support function. +-- Btree ordering operators' functions may be not leakproof, while the +-- associated comparison support function is leakproof. SELECT pp.oid::regprocedure as proc, pp.provolatile as vp, pp.proleakproof as lp, po.oid::regprocedure as opr, po.provolatile as vo, po.proleakproof as lo FROM pg_proc pp, pg_proc po, pg_operator o, pg_amproc ap, pg_amop ao @@ -1442,7 +1444,10 @@ WHERE pp.oid = ap.amproc AND po.oid = o.oprcode AND o.oid = ao.amopopr AND ao.amoprighttype = ap.amprocrighttype AND ap.amprocnum = 1 AND (pp.provolatile != po.provolatile OR - pp.proleakproof != po.proleakproof) + (pp.proleakproof != po.proleakproof AND + ao.amoppurpose = 's') OR + (pp.proleakproof < po.proleakproof AND + ao.amoppurpose = 'o')) ORDER BY 1; proc | vp | lp | opr | vo | lo ------+----+----+-----+----+---- @@ -1980,6 +1985,7 @@ ORDER BY 1, 2, 3; 403 | 5 | *> 403 | 5 | > 403 | 5 | ~>~ + 403 | 6 | <-> 405 | 1 | = 783 | 1 | << 783 | 1 | @@ @@ -2090,7 +2096,7 @@ ORDER BY 1, 2, 3; 4000 | 28 | ^@ 4000 | 29 | <^ 4000 | 30 | >^ -(124 rows) +(125 rows) -- Check that all opclass search operators have selectivity estimators. -- This is not absolutely required, but it seems a reasonable thing diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out index 3bbe4c5f974..c7f7f6bd3a3 100644 --- a/src/test/regress/expected/psql.out +++ b/src/test/regress/expected/psql.out @@ -5083,30 +5083,34 @@ List of access methods (1 row) \dAo+ btree float_ops - List of operators of operator families - AM | Operator family | Operator | Strategy | Purpose | Sort opfamily --------+-----------------+---------------------------------------+----------+---------+--------------- - btree | float_ops | <(double precision,double precision) | 1 | search | - btree | float_ops | <=(double precision,double precision) | 2 | search | - btree | float_ops | =(double precision,double precision) | 3 | search | - btree | float_ops | >=(double precision,double precision) | 4 | search | - btree | float_ops | >(double precision,double precision) | 5 | search | - btree | float_ops | <(real,real) | 1 | search | - btree | float_ops | <=(real,real) | 2 | search | - btree | float_ops | =(real,real) | 3 | search | - btree | float_ops | >=(real,real) | 4 | search | - btree | float_ops | >(real,real) | 5 | search | - btree | float_ops | <(double precision,real) | 1 | search | - btree | float_ops | <=(double precision,real) | 2 | search | - btree | float_ops | =(double precision,real) | 3 | search | - btree | float_ops | >=(double precision,real) | 4 | search | - btree | float_ops | >(double precision,real) | 5 | search | - btree | float_ops | <(real,double precision) | 1 | search | - btree | float_ops | <=(real,double precision) | 2 | search | - btree | float_ops | =(real,double precision) | 3 | search | - btree | float_ops | >=(real,double precision) | 4 | search | - btree | float_ops | >(real,double precision) | 5 | search | -(20 rows) + List of operators of operator families + AM | Operator family | Operator | Strategy | Purpose | Sort opfamily +-------+-----------------+----------------------------------------+----------+----------+--------------- + btree | float_ops | <(double precision,double precision) | 1 | search | + btree | float_ops | <=(double precision,double precision) | 2 | search | + btree | float_ops | =(double precision,double precision) | 3 | search | + btree | float_ops | >=(double precision,double precision) | 4 | search | + btree | float_ops | >(double precision,double precision) | 5 | search | + btree | float_ops | <->(double precision,double precision) | 6 | ordering | float_ops + btree | float_ops | <(real,real) | 1 | search | + btree | float_ops | <=(real,real) | 2 | search | + btree | float_ops | =(real,real) | 3 | search | + btree | float_ops | >=(real,real) | 4 | search | + btree | float_ops | >(real,real) | 5 | search | + btree | float_ops | <->(real,real) | 6 | ordering | float_ops + btree | float_ops | <(double precision,real) | 1 | search | + btree | float_ops | <=(double precision,real) | 2 | search | + btree | float_ops | =(double precision,real) | 3 | search | + btree | float_ops | >=(double precision,real) | 4 | search | + btree | float_ops | >(double precision,real) | 5 | search | + btree | float_ops | <->(double precision,real) | 6 | ordering | float_ops + btree | float_ops | <(real,double precision) | 1 | search | + btree | float_ops | <=(real,double precision) | 2 | search | + btree | float_ops | =(real,double precision) | 3 | search | + btree | float_ops | >=(real,double precision) | 4 | search | + btree | float_ops | >(real,double precision) | 5 | search | + btree | float_ops | <->(real,double precision) | 6 | ordering | float_ops +(24 rows) \dAo * pg_catalog.jsonb_path_ops List of operators of operator families diff --git a/src/test/regress/sql/alter_generic.sql b/src/test/regress/sql/alter_generic.sql index de58d268d31..bd3710d631b 100644 --- a/src/test/regress/sql/alter_generic.sql +++ b/src/test/regress/sql/alter_generic.sql @@ -306,8 +306,8 @@ ROLLBACK; -- Should fail. Invalid values for ALTER OPERATOR FAMILY .. ADD / DROP CREATE OPERATOR FAMILY alt_opf4 USING btree; ALTER OPERATOR FAMILY alt_opf4 USING invalid_index_method ADD OPERATOR 1 < (int4, int2); -- invalid indexing_method -ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 6 < (int4, int2); -- operator number should be between 1 and 5 -ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 0 < (int4, int2); -- operator number should be between 1 and 5 +ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 7 < (int4, int2); -- operator number should be between 1 and 6 +ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 0 < (int4, int2); -- operator number should be between 1 and 6 ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 1 < ; -- operator without argument types ALTER OPERATOR FAMILY alt_opf4 USING btree ADD FUNCTION 0 btint42cmp(int4, int2); -- invalid options parsing function ALTER OPERATOR FAMILY alt_opf4 USING btree ADD FUNCTION 6 btint42cmp(int4, int2); -- function number should be between 1 and 5 @@ -351,10 +351,12 @@ CREATE OPERATOR FAMILY alt_opf9 USING gist; ALTER OPERATOR FAMILY alt_opf9 USING gist ADD OPERATOR 1 < (int4, int4) FOR ORDER BY float_ops; DROP OPERATOR FAMILY alt_opf9 USING gist; --- Should fail. Ensure correct ordering methods in ALTER OPERATOR FAMILY ... ADD OPERATOR .. FOR ORDER BY +-- Should work. Ensure correct ordering methods in ALTER OPERATOR FAMILY ... ADD OPERATOR .. FOR ORDER BY +BEGIN TRANSACTION; CREATE OPERATOR FAMILY alt_opf10 USING btree; ALTER OPERATOR FAMILY alt_opf10 USING btree ADD OPERATOR 1 < (int4, int4) FOR ORDER BY float_ops; DROP OPERATOR FAMILY alt_opf10 USING btree; +ROLLBACK; -- Should work. Textbook case of ALTER OPERATOR FAMILY ... ADD OPERATOR with FOR ORDER BY CREATE OPERATOR FAMILY alt_opf11 USING gist; diff --git a/src/test/regress/sql/btree_index.sql b/src/test/regress/sql/btree_index.sql index 0d2a33f3705..98899492235 100644 --- a/src/test/regress/sql/btree_index.sql +++ b/src/test/regress/sql/btree_index.sql @@ -282,3 +282,315 @@ CREATE TABLE btree_part (id int4) PARTITION BY RANGE (id); CREATE INDEX btree_part_idx ON btree_part(id); ALTER INDEX btree_part_idx ALTER COLUMN id SET (n_distinct=100); DROP TABLE btree_part; + +--- +--- Test B-tree distance ordering +--- + +SET enable_bitmapscan = OFF; + +-- temporarily disable bt_i4_index index on bt_i4_heap(seqno) +UPDATE pg_index SET indisvalid = false WHERE indexrelid = 'bt_i4_index'::regclass; + +CREATE INDEX bt_i4_heap_random_idx ON bt_i4_heap USING btree(random, seqno); + +-- test unsupported orderings (by non-first index attribute or by more than one order keys) +EXPLAIN (COSTS OFF) SELECT * FROM bt_i4_heap ORDER BY seqno <-> 0; +EXPLAIN (COSTS OFF) SELECT * FROM bt_i4_heap ORDER BY random <-> 0, seqno <-> 0; +EXPLAIN (COSTS OFF) SELECT * FROM bt_i4_heap ORDER BY random <-> 0, random <-> 1; + +EXPLAIN (COSTS OFF) +SELECT * FROM bt_i4_heap +WHERE random > 1000000 AND (random, seqno) < (6000000, 0) +ORDER BY random <-> 4000000; + +SELECT * FROM bt_i4_heap +WHERE random > 1000000 AND (random, seqno) < (6000000, 0) +ORDER BY random <-> 4000000; + +SELECT * FROM bt_i4_heap +WHERE random > 1000000 AND (random, seqno) < (6000000, 0) +ORDER BY random <-> 10000000; + +SELECT * FROM bt_i4_heap +WHERE random > 1000000 AND (random, seqno) < (6000000, 0) +ORDER BY random <-> 0; + +EXPLAIN (COSTS OFF) +SELECT * FROM bt_i4_heap +WHERE + random > 1000000 AND (random, seqno) < (6000000, 0) AND + random IN (1809552, 1919087, 2321799, 2648497, 3000193, 3013326, 4157193, 4488889, 5257716, 5593978, NULL) +ORDER BY random <-> 3000000; + +SELECT * FROM bt_i4_heap +WHERE + random > 1000000 AND (random, seqno) < (6000000, 0) AND + random IN (1809552, 1919087, 2321799, 2648497, 3000193, 3013326, 4157193, 4488889, 5257716, 5593978, NULL) +ORDER BY random <-> 3000000; + +DROP INDEX bt_i4_heap_random_idx; + +CREATE INDEX bt_i4_heap_random_idx ON bt_i4_heap USING btree(random DESC, seqno); + +SELECT * FROM bt_i4_heap +WHERE random > 1000000 AND (random, seqno) < (6000000, 0) +ORDER BY random <-> 4000000; + +SELECT * FROM bt_i4_heap +WHERE random > 1000000 AND (random, seqno) < (6000000, 0) +ORDER BY random <-> 10000000; + +SELECT * FROM bt_i4_heap +WHERE random > 1000000 AND (random, seqno) < (6000000, 0) +ORDER BY random <-> 0; + +DROP INDEX bt_i4_heap_random_idx; + +-- test parallel KNN scan + +-- Serializable isolation would disable parallel query, so explicitly use an +-- arbitrary other level. +BEGIN ISOLATION LEVEL REPEATABLE READ; + +SET parallel_setup_cost = 0; +SET parallel_tuple_cost = 0; +SET min_parallel_table_scan_size = 0; +SET max_parallel_workers = 4; +SET max_parallel_workers_per_gather = 4; +SET cpu_operator_cost = 0; + +RESET enable_indexscan; + +\set bt_knn_row_count 100000 + +CREATE TABLE bt_knn_test AS SELECT i * 10 AS i FROM generate_series(1, :bt_knn_row_count) i; +CREATE INDEX bt_knn_test_idx ON bt_knn_test (i); +ALTER TABLE bt_knn_test SET (parallel_workers = 4); +ANALYZE bt_knn_test; + +-- set the point inside the range +\set bt_knn_point (4 * :bt_knn_row_count + 3) + +CREATE TABLE bt_knn_test2 AS + SELECT row_number() OVER (ORDER BY i * 10 <-> :bt_knn_point) AS n, i * 10 AS i + FROM generate_series(1, :bt_knn_row_count) i; + +SET enable_sort = OFF; + +EXPLAIN (COSTS OFF) +WITH bt_knn_test1 AS ( + SELECT row_number() OVER (ORDER BY i <-> :bt_knn_point) AS n, i FROM bt_knn_test +) +SELECT * FROM bt_knn_test1 t1 JOIN bt_knn_test2 t2 USING (n) WHERE t1.i <> t2.i; + +WITH bt_knn_test1 AS ( + SELECT row_number() OVER (ORDER BY i <-> :bt_knn_point) AS n, i FROM bt_knn_test +) +SELECT * FROM bt_knn_test1 t1 JOIN bt_knn_test2 t2 USING (n) WHERE t1.i <> t2.i; + +RESET enable_sort; + +DROP TABLE bt_knn_test2; + +-- set the point to the right of the range +\set bt_knn_point (11 * :bt_knn_row_count) + +CREATE TABLE bt_knn_test2 AS + SELECT row_number() OVER (ORDER BY i * 10 <-> :bt_knn_point) AS n, i * 10 AS i + FROM generate_series(1, :bt_knn_row_count) i; + +SET enable_sort = OFF; + +EXPLAIN (COSTS OFF) +WITH bt_knn_test1 AS ( + SELECT row_number() OVER (ORDER BY i <-> :bt_knn_point) AS n, i FROM bt_knn_test +) +SELECT * FROM bt_knn_test1 t1 JOIN bt_knn_test2 t2 USING (n) WHERE t1.i <> t2.i; + +WITH bt_knn_test1 AS ( + SELECT row_number() OVER (ORDER BY i <-> :bt_knn_point) AS n, i FROM bt_knn_test +) +SELECT * FROM bt_knn_test1 t1 JOIN bt_knn_test2 t2 USING (n) WHERE t1.i <> t2.i; + +RESET enable_sort; + +DROP TABLE bt_knn_test2; + +-- set the point to the left of the range +\set bt_knn_point (-:bt_knn_row_count) + +CREATE TABLE bt_knn_test2 AS + SELECT row_number() OVER (ORDER BY i * 10 <-> :bt_knn_point) AS n, i * 10 AS i + FROM generate_series(1, :bt_knn_row_count) i; + +SET enable_sort = OFF; + +EXPLAIN (COSTS OFF) +WITH bt_knn_test1 AS ( + SELECT row_number() OVER (ORDER BY i <-> :bt_knn_point) AS n, i FROM bt_knn_test +) +SELECT * FROM bt_knn_test1 t1 JOIN bt_knn_test2 t2 USING (n) WHERE t1.i <> t2.i; + +WITH bt_knn_test1 AS ( + SELECT row_number() OVER (ORDER BY i <-> :bt_knn_point) AS n, i FROM bt_knn_test +) +SELECT * FROM bt_knn_test1 t1 JOIN bt_knn_test2 t2 USING (n) WHERE t1.i <> t2.i; + +RESET enable_sort; + +DROP TABLE bt_knn_test; + +\set knn_row_count 30000 +CREATE TABLE bt_knn_test AS SELECT i FROM generate_series(1, 10) i, generate_series(1, :knn_row_count) j; +CREATE INDEX bt_knn_test_idx ON bt_knn_test (i); +ALTER TABLE bt_knn_test SET (parallel_workers = 4); +ANALYZE bt_knn_test; + +SET enable_sort = OFF; + +EXPLAIN (COSTS OFF) +WITH +t1 AS ( + SELECT row_number() OVER () AS n, i + FROM bt_knn_test + WHERE i IN (3, 4, 7, 8, 2) + ORDER BY i <-> 4 +), +t2 AS ( + SELECT i * :knn_row_count + j AS n, (ARRAY[4, 3, 2, 7, 8])[i + 1] AS i + FROM generate_series(0, 4) i, generate_series(1, :knn_row_count) j +) +SELECT * FROM t1 JOIN t2 USING (n) WHERE t1.i <> t2.i; + +WITH +t1 AS ( + SELECT row_number() OVER () AS n, i + FROM bt_knn_test + WHERE i IN (3, 4, 7, 8, 2) + ORDER BY i <-> 4 +), +t2 AS ( + SELECT i * :knn_row_count + j AS n, (ARRAY[4, 3, 2, 7, 8])[i + 1] AS i + FROM generate_series(0, 4) i, generate_series(1, :knn_row_count) j +) +SELECT * FROM t1 JOIN t2 USING (n) WHERE t1.i <> t2.i; + +RESET enable_sort; + +RESET parallel_setup_cost; +RESET parallel_tuple_cost; +RESET min_parallel_table_scan_size; +RESET max_parallel_workers; +RESET max_parallel_workers_per_gather; +RESET cpu_operator_cost; + +ROLLBACK; + +-- enable bt_i4_index index on bt_i4_heap(seqno) +UPDATE pg_index SET indisvalid = true WHERE indexrelid = 'bt_i4_index'::regclass; + + +CREATE TABLE tenk3 AS SELECT thousand, tenthous FROM tenk1; + +INSERT INTO tenk3 VALUES (NULL, 1), (NULL, 2), (NULL, 3); + +-- Test distance ordering by ASC index +CREATE INDEX tenk3_idx ON tenk3 USING btree(thousand, tenthous); + +EXPLAIN (COSTS OFF) +SELECT thousand, tenthous FROM tenk3 +WHERE (thousand, tenthous) >= (997, 5000) +ORDER BY thousand <-> 998; + +SELECT thousand, tenthous FROM tenk3 +WHERE (thousand, tenthous) >= (997, 5000) +ORDER BY thousand <-> 998; + +SELECT thousand, tenthous FROM tenk3 +WHERE (thousand, tenthous) >= (997, 5000) +ORDER BY thousand <-> 0; + +SELECT thousand, tenthous FROM tenk3 +WHERE (thousand, tenthous) >= (997, 5000) AND thousand < 1000 +ORDER BY thousand <-> 10000; + +SELECT thousand, tenthous FROM tenk3 +ORDER BY thousand <-> 500 +OFFSET 9970; + +EXPLAIN (COSTS OFF) +SELECT * FROM tenk3 +WHERE thousand > 100 AND thousand < 800 AND + thousand = ANY(ARRAY[0, 123, 234, 345, 456, 678, 901, NULL]::int2[]) +ORDER BY thousand <-> 300::int8; + +SELECT * FROM tenk3 +WHERE thousand > 100 AND thousand < 800 AND + thousand = ANY(ARRAY[0, 123, 234, 345, 456, 678, 901, NULL]::int2[]) +ORDER BY thousand <-> 300::int8; + +DROP INDEX tenk3_idx; + +-- Test distance ordering by DESC index +CREATE INDEX tenk3_idx ON tenk3 USING btree(thousand DESC, tenthous); + +SELECT thousand, tenthous FROM tenk3 +WHERE (thousand, tenthous) >= (997, 5000) +ORDER BY thousand <-> 998; + +SELECT thousand, tenthous FROM tenk3 +WHERE (thousand, tenthous) >= (997, 5000) +ORDER BY thousand <-> 0; + +SELECT thousand, tenthous FROM tenk3 +WHERE (thousand, tenthous) >= (997, 5000) AND thousand < 1000 +ORDER BY thousand <-> 10000; + +SELECT thousand, tenthous FROM tenk3 +ORDER BY thousand <-> 500 +OFFSET 9970; + +DROP INDEX tenk3_idx; + +DROP TABLE tenk3; + +-- Test distance ordering on by-ref types +CREATE TABLE knn_btree_ts (ts timestamp); + +INSERT INTO knn_btree_ts +SELECT timestamp '2017-05-03 00:00:00' + tenthous * interval '1 hour' +FROM tenk1; + +CREATE INDEX knn_btree_ts_idx ON knn_btree_ts USING btree(ts); + +SELECT ts, ts <-> timestamp '2017-05-01 00:00:00' FROM knn_btree_ts ORDER BY 2 LIMIT 20; +SELECT ts, ts <-> timestamp '2018-01-01 00:00:00' FROM knn_btree_ts ORDER BY 2 LIMIT 20; + +DROP TABLE knn_btree_ts; + +RESET enable_bitmapscan; + +-- Test backward kNN scan + +SET enable_sort = OFF; + +EXPLAIN (COSTS OFF) SELECT thousand, tenthous FROM tenk1 ORDER BY thousand <-> 510; + +BEGIN work; + +DECLARE knn SCROLL CURSOR FOR +SELECT thousand, tenthous FROM tenk1 ORDER BY thousand <-> 510; + +FETCH LAST FROM knn; +FETCH BACKWARD 15 FROM knn; +FETCH RELATIVE -200 FROM knn; +FETCH BACKWARD 20 FROM knn; +FETCH FIRST FROM knn; +FETCH LAST FROM knn; +FETCH RELATIVE -215 FROM knn; +FETCH BACKWARD 20 FROM knn; + +ROLLBACK work; + +RESET enable_sort; \ No newline at end of file diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql index 2fe7b6dcc49..d871c80866d 100644 --- a/src/test/regress/sql/opr_sanity.sql +++ b/src/test/regress/sql/opr_sanity.sql @@ -814,6 +814,8 @@ WHERE o1.oprnegate = o2.oid AND p1.oid = o1.oprcode AND p2.oid = o2.oprcode AND -- Btree comparison operators' functions should have the same volatility -- and leakproofness markings as the associated comparison support function. +-- Btree ordering operators' functions may be not leakproof, while the +-- associated comparison support function is leakproof. SELECT pp.oid::regprocedure as proc, pp.provolatile as vp, pp.proleakproof as lp, po.oid::regprocedure as opr, po.provolatile as vo, po.proleakproof as lo FROM pg_proc pp, pg_proc po, pg_operator o, pg_amproc ap, pg_amop ao @@ -824,7 +826,10 @@ WHERE pp.oid = ap.amproc AND po.oid = o.oprcode AND o.oid = ao.amopopr AND ao.amoprighttype = ap.amprocrighttype AND ap.amprocnum = 1 AND (pp.provolatile != po.provolatile OR - pp.proleakproof != po.proleakproof) + (pp.proleakproof != po.proleakproof AND + ao.amoppurpose = 's') OR + (pp.proleakproof < po.proleakproof AND + ao.amoppurpose = 'o')) ORDER BY 1; -- 2.45.2