Re: Use unique index for longer pathkeys. - Mailing list pgsql-hackers
From | Kyotaro HORIGUCHI |
---|---|
Subject | Re: Use unique index for longer pathkeys. |
Date | |
Msg-id | 20140715.174748.227122637.horiguchi.kyotaro@lab.ntt.co.jp Whole thread Raw |
In response to | Re: Use unique index for longer pathkeys. (Kyotaro HORIGUCHI <horiguchi.kyotaro@lab.ntt.co.jp>) |
Responses |
Re: Use unique index for longer pathkeys.
|
List | pgsql-hackers |
Hi, the attached is the revised version. - defined allnotnull instead of full_ordered in RelOptInfo and changed to use it. - compare_pathkeys_ignoring_strategy() is now integrated into compare_pathkeys(). - index pathkeys caching in RelOptInfo added. (build_index_pathkeys() does) - Refactoring collect_common_primary_pathkeys mainly from a reason of eliminating code duplicates and readability. - trim_query_pathkeys now tries to trim window_pathkeys. The old comment came from my misunderstanding. Window pathkeysare not always RowExpr and RowExpr pathkeys are feedable from not only PARTITION BY clause but also ORDER BY, DISTINCTor GROUP BY clauses. The comment for the function is modified. - Some cosmetic fixes including spacing after commas. - The shortcut judgement is *not* included. regards, -- Kyotaro Horiguchi NTT Open Source Software Center diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 9573a9b..546e112 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1789,9 +1789,11 @@ _outIndexOptInfo(StringInfo str, const IndexOptInfo *node) /* indexprs is redundant since we printindextlist */ WRITE_NODE_FIELD(indpred); WRITE_NODE_FIELD(indextlist); + /* cached pathkeys are omitted as indexkeys */ WRITE_BOOL_FIELD(predOK); WRITE_BOOL_FIELD(unique); WRITE_BOOL_FIELD(immediate); + WRITE_BOOL_FIELD(allnotnull); WRITE_BOOL_FIELD(hypothetical); /* we don't bother with fields copied from the pg_amentry */} diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index c81efe9..751eb6a 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -58,6 +58,7 @@ int geqo_threshold;join_search_hook_type join_search_hook = NULL; +static List *collect_common_primary_pathkeys(PlannerInfo *root);static void set_base_rel_sizes(PlannerInfo *root);staticvoid set_base_rel_pathlists(PlannerInfo *root);static void set_rel_size(PlannerInfo *root, RelOptInfo *rel, @@ -66,6 +67,7 @@ static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry*rte);static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); +static void trim_query_pathkeys(PlannerInfo * root);static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte);static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel, @@ -112,6 +114,203 @@ static void recurse_push_qual(Node *setOp, Query *topquery, RangeTblEntry *rte, Indexrti, Node *qual);static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel); +/* + * collect_common_primary_pathkeys: Collects common unique and non-null index + * pathkeys from all base relations in current root. + */ +static List * +collect_common_primary_pathkeys(PlannerInfo *root) +{ + List *result = NIL; + Index rti; + int nindex = 0; + List **pathkeys_array0 = NULL; + List **pathkeys_array = NULL; + List **pathkeys_array2 = NULL; + bool do_conjunction = (root->simple_rel_array_size > 2); + + for (rti = 1 ; rti < root->simple_rel_array_size ; rti++) + { + RelOptInfo *rel = root->simple_rel_array[rti]; + int prev_nindex = 0; + ListCell *lc; + + if (rel == NULL) + continue; + + Assert (rel->relid == rti); + + /* Scan all base relations except parent relations. */ + if (root->simple_rte_array[rti]->inh || + (rel->reloptkind != RELOPT_BASEREL && + rel->reloptkind != RELOPT_OTHER_MEMBER_REL)) + continue; + + if (do_conjunction) + { + if (pathkeys_array2) + memset(&pathkeys_array2, 0, sizeof(List*) * nindex); + else + { + /* Rig for AND (conjunction) operation. */ + nindex = 0; + + foreach (lc, rel->indexlist) + { + IndexOptInfo *index = (IndexOptInfo *) lfirst(lc); + + if (index->allnotnull && + index->unique && index->immediate && + index->indpred == NIL) + nindex++; + } + + if (nindex == 0) + return NULL; /* No common primary pathkeys exists.*/ + + pathkeys_array0 = palloc0(sizeof(List*) * nindex * 2); + pathkeys_array = pathkeys_array0; + pathkeys_array2 = pathkeys_array0 + nindex; + } + } + + prev_nindex = nindex; + nindex = 0; + + foreach (lc, rel->indexlist) + { + IndexOptInfo *index = (IndexOptInfo *) lfirst(lc); + List *idx_pathkeys; + + /* Only indexes that can unique sort are needed. */ + if (!index->allnotnull || + !(index->unique && index->immediate) || + index->indpred != NIL) + continue; + + idx_pathkeys = build_index_pathkeys(root, index, + ForwardScanDirection); + if (!idx_pathkeys) + continue; + + if (!do_conjunction) + { + result = lappend(result, idx_pathkeys); + nindex++; + } + else + { + /* + * AND operation: drop pathkeys which don't see identical + * one in this relation. + */ + int i; + + for (i = 0 ; i < prev_nindex ; i++) + { + if (compare_pathkeys(pathkeys_array[i], + idx_pathkeys, + false) == PATHKEYS_EQUAL) + break; + } + if (prev_nindex < 1 || i < prev_nindex) + pathkeys_array2[nindex++] = idx_pathkeys; + } + } + + if (nindex == 0) + { + /* No common primary pathkeys remained, exit. */ + if (pathkeys_array0) + pfree(pathkeys_array0); + return NULL; + } + + if (do_conjunction) + { + List **pktmp; + + /* swap two pathkeys arrays for the next iteration */ + pktmp = pathkeys_array2; + pathkeys_array2 = pathkeys_array; + pathkeys_array = pktmp; + } + } + + if (do_conjunction) + { + int i; + + for (i = 0 ; i < nindex ; i++) + { + if (pathkeys_array[i]) + result = lappend(result, pathkeys_array[i]); + } + pfree(pathkeys_array0); + } + + return result; +} + +/* + * trim_query_pathkeys: trim pathkeys following common primary pathkeys. + * + * Primary pathkeys are pathkeys which perfectly sorts tuples in its owner + * table so the query pathkeys can be replaced with the primary pathkeys when + * the latter prefixes the former. Common primary pathkeys are the primary + * pathkeys shared among all base relations accessed in current root. + * + * This function doesn't trim RowExpr pathkeys. + */ +static void +trim_query_pathkeys(PlannerInfo * root) +{ + ListCell *lc; + + foreach(lc, collect_common_primary_pathkeys(root)) + { + List *pk_guide = (List*) lfirst(lc); + + if (root->sort_pathkeys) + { + /* sort_pathkeys is not allowd to flip sorting directions + * differently from other pathkeys. So try to trim sort_pathkeys + * first preserving per-key directions, then try to trim other + * pathkeys using the trimmed sort_pathkeys as the guide. + */ + + root->sort_pathkeys = + shorten_pathkeys_following(root, root->sort_pathkeys, pk_guide, + false); + + /* + * Replace guide pathkeys with new sort_pathkeys if they differs + * from each other only by sorting directions. + */ + if (compare_pathkeys(root->sort_pathkeys, + pk_guide, false) != PATHKEYS_EQUAL && + compare_pathkeys(root->sort_pathkeys, + pk_guide, true) == PATHKEYS_EQUAL) + pk_guide = root->sort_pathkeys; + } + + root->group_pathkeys = + shorten_pathkeys_following(root, root->group_pathkeys, pk_guide, + true); + + root->window_pathkeys = + shorten_pathkeys_following(root, root->window_pathkeys, pk_guide, + true); + + root->distinct_pathkeys = + shorten_pathkeys_following(root, root->distinct_pathkeys, pk_guide, + true); + + root->query_pathkeys = + shorten_pathkeys_following(root, root->query_pathkeys, pk_guide, + true); + } +}/* * make_one_rel @@ -149,6 +348,7 @@ make_one_rel(PlannerInfo *root, List *joinlist) * Generate access paths for the base rels. */ set_base_rel_sizes(root); + trim_query_pathkeys(root); set_base_rel_pathlists(root); /* @@ -755,7 +955,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, List *existing_pathkeys= (List *) lfirst(lpk); if (compare_pathkeys(existing_pathkeys, - childkeys) == PATHKEYS_EQUAL) + childkeys, false) == PATHKEYS_EQUAL) { found = true; break; diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 5d953df..6cc9754 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -275,10 +275,13 @@ make_pathkey_from_sortop(PlannerInfo *root, * one is "better" than the other. * * We assumethe pathkeys are canonical, and so they can be checked for - * equality by simple pointer comparison. + * equality by simple pointer comparison when ignore_strategy is false. + * + * If ignore_strategy is true, the two pathkeys with no difference other + * than strategy are regarded as the same. */PathKeysComparison -compare_pathkeys(List *keys1, List *keys2) +compare_pathkeys(List *keys1, List *keys2, bool ignore_strategy){ ListCell *key1, *key2; @@ -296,7 +299,23 @@ compare_pathkeys(List *keys1, List *keys2) PathKey *pathkey1 = (PathKey *) lfirst(key1); PathKey *pathkey2 = (PathKey *) lfirst(key2); - if (pathkey1 != pathkey2) + if (pathkey1 == pathkey2) + continue; + + if (!ignore_strategy) + return PATHKEYS_DIFFERENT; /* no need to keep looking */ + + Assert(pathkey1->pk_strategy == BTGreaterStrategyNumber || + pathkey1->pk_strategy == BTLessStrategyNumber); + Assert(pathkey2->pk_strategy == BTGreaterStrategyNumber || + pathkey2->pk_strategy == BTLessStrategyNumber); + + if (pathkey1->pk_eclass != pathkey2->pk_eclass || + pathkey1->pk_opfamily != pathkey2->pk_opfamily || + !((pathkey1->pk_strategy == pathkey2->pk_strategy && + pathkey1->pk_nulls_first == pathkey2->pk_nulls_first) || + (pathkey1->pk_strategy != pathkey2->pk_strategy && + pathkey1->pk_nulls_first != pathkey2->pk_nulls_first))) return PATHKEYS_DIFFERENT; /* no needto keep looking */ } @@ -319,7 +338,7 @@ compare_pathkeys(List *keys1, List *keys2)boolpathkeys_contained_in(List *keys1, List *keys2){ - switch (compare_pathkeys(keys1, keys2)) + switch (compare_pathkeys(keys1, keys2, false)) { case PATHKEYS_EQUAL: case PATHKEYS_BETTER2: @@ -440,10 +459,15 @@ build_index_pathkeys(PlannerInfo *root, List *retval = NIL; ListCell *lc; int i; + List *cached_pathkeys = (ScanDirectionIsBackward(scandir) ? + index->indbwpathkeys : index->indfwpathkeys); if (index->sortopfamily == NULL) return NIL; /* non-orderable index */ + if (cached_pathkeys) + return cached_pathkeys; /* Return cached pathkeys if any */ + i = 0; foreach(lc, index->indextlist) { @@ -498,6 +522,12 @@ build_index_pathkeys(PlannerInfo *root, i++; } + /* Store created pathkeys for future use */ + if (ScanDirectionIsBackward(scandir)) + index->indbwpathkeys = retval; + else + index->indfwpathkeys = retval; + return retval;} @@ -1525,3 +1555,87 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel) return true; /* might beable to use them for ordering */ return false; /* definitely useless */} + +/* + * shorten_pathkeys_following: returns pk_guide if it prefixes pk_target + * regardless of the ordering directions of the pathkeys. + * allow_partial_flip allows ordering directions to be partially different + * between pk_target and pk_guide. + */ +List * +shorten_pathkeys_following(PlannerInfo *root, List *pk_target, List *pk_guide, + bool allow_partial_flip) +{ + ListCell *tlc, *glc; + int same = 0, reverse = 0; + List *result; + + if (pk_target == NIL) + return NIL; + + if (list_length(pk_target) < list_length(pk_guide)) + return pk_target; + + forboth(tlc, pk_target, glc, pk_guide) + { + PathKey *tpk = (PathKey *) lfirst(tlc); + PathKey *gpk = (PathKey *) lfirst(glc); + + if (tpk == gpk) + { + same++; + continue; + } + + Assert(tpk->pk_strategy == BTGreaterStrategyNumber || + tpk->pk_strategy == BTLessStrategyNumber); + Assert(gpk->pk_strategy == BTGreaterStrategyNumber || + gpk->pk_strategy == BTLessStrategyNumber); + + if (tpk->pk_eclass == gpk->pk_eclass && + tpk->pk_opfamily == gpk->pk_opfamily && + tpk->pk_strategy != gpk->pk_strategy && + tpk->pk_nulls_first != gpk->pk_nulls_first) + { + reverse++; + continue; + } + + return pk_target; + } + + /* trailing members in pk_target don't matter */ + + /* reverse == 0 means that pk_guide exactly prefixes pk_target */ + if (allow_partial_flip || reverse == 0) + return pk_guide; + + /* + * Don't replace pathkeys if partial flip is not allowed but partially + * flipped. + */ + Assert(reverse > 0); + if (same > 0) + return pk_target; + + /* make then reutrn the reverse pathkeys of pk_guide */ + result = NIL; + foreach (glc, pk_guide) + { + PathKey *gpk = (PathKey *) lfirst(glc); + PathKey *pk; + EquivalenceClass *ec = gpk->pk_eclass; + int revstr; + + if (gpk->pk_strategy == BTGreaterStrategyNumber) + revstr = BTLessStrategyNumber; + else + revstr = BTGreaterStrategyNumber; + + pk = make_canonical_pathkey(root, ec, gpk->pk_opfamily, revstr, + !gpk->pk_nulls_first); + + result = lappend(result, pk); + } + return result; +} diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index d129f8d..9433a40 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -312,14 +312,14 @@ set_cheapest(RelOptInfo *parent_rel) if (cmp > 0 || (cmp == 0 && compare_pathkeys(cheapest_startup_path->pathkeys, - path->pathkeys) == PATHKEYS_BETTER2)) + path->pathkeys, false) == PATHKEYS_BETTER2)) cheapest_startup_path = path; cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST); if (cmp > 0 || (cmp == 0 && compare_pathkeys(cheapest_total_path->pathkeys, - path->pathkeys) == PATHKEYS_BETTER2)) + path->pathkeys, false) == PATHKEYS_BETTER2)) cheapest_total_path = path; } } @@ -455,7 +455,7 @@ add_path(RelOptInfo *parent_rel, Path *new_path) old_path_pathkeys = old_path->param_info? NIL : old_path->pathkeys; keyscmp = compare_pathkeys(new_path_pathkeys, - old_path_pathkeys); + old_path_pathkeys, false); if (keyscmp != PATHKEYS_DIFFERENT) { switch (costcmp) @@ -653,7 +653,7 @@ add_path_precheck(RelOptInfo *parent_rel, old_path_pathkeys = old_path->param_info ? NIL: old_path->pathkeys; keyscmp = compare_pathkeys(new_path_pathkeys, - old_path_pathkeys); + old_path_pathkeys, false); if (keyscmp == PATHKEYS_EQUAL || keyscmp == PATHKEYS_BETTER2) { diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index b2becfa..1cebd2b 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -241,12 +241,28 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, info->reverse_sort= (bool *) palloc(sizeof(bool) * ncolumns); info->nulls_first = (bool *) palloc(sizeof(bool)* ncolumns); + info->allnotnull = true; for (i = 0; i < ncolumns; i++) { int16 opt = indexRelation->rd_indoption[i]; + int16 attno = info->indexkeys[i]; info->reverse_sort[i] = (opt & INDOPTION_DESC)!= 0; info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0; + + if (!info->allnotnull) continue; + + /* Check if there's no nullable key column */ + if (attno > 0) + { + if (!relation->rd_att->attrs[attno - 1]->attnotnull) + info->allnotnull = false; + } + else if (attno != ObjectIdAttributeNumber) + { + /* OID is apparently not-null */ + info->allnotnull = false; + } } } else if (indexRelation->rd_am->amcanorder) diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index dacbe9c..4855667 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -524,10 +524,13 @@ typedef struct IndexOptInfo List *indpred; /* predicate if a partial index, else NIL*/ List *indextlist; /* targetlist representing index columns */ + List *indfwpathkeys; /* index pathkeys for forward scan */ + List *indbwpathkeys; /* index pathkeys for backwad scan */ bool predOK; /* true if predicatematches query */ bool unique; /* true if a unique index */ bool immediate; /* is uniqueness enforced immediately? */ + bool allnotnull; /* all columns are "NOT NULL" ? */ bool hypothetical; /* true if index doesn'treally exist */ bool canreturn; /* can index return IndexTuples? */ bool amcanorderbyop;/* does AM support order by operator result? */ diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 9b22fda..f637fc3 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -146,7 +146,10 @@ typedef enum PATHKEYS_DIFFERENT /* neither pathkey includes the other */} PathKeysComparison; -extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2); +extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2, + bool ignore_strategy); +extern PathKeysComparison compare_pathkeys_ignoring_strategy(List *keys1, + List *keys2);extern bool pathkeys_contained_in(List *keys1,List *keys2);extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, @@ -187,5 +190,8 @@ extern List *truncate_useless_pathkeys(PlannerInfo *root, RelOptInfo *rel, List *pathkeys);extern bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel); +extern List *shorten_pathkeys_following(PlannerInfo *root, + List *pk_target, List *pk_guide, + bool allow_partial_flip);#endif /* PATHS_H */ diff --git a/src/test/isolation/expected/drop-index-concurrently-1.out b/src/test/isolation/expected/drop-index-concurrently-1.out index 75dff56..ab96fa0 100644 --- a/src/test/isolation/expected/drop-index-concurrently-1.out +++ b/src/test/isolation/expected/drop-index-concurrently-1.out @@ -19,10 +19,8 @@ Sortstep explains: EXPLAIN (COSTS OFF) EXECUTE getrow_seq;QUERY PLAN -Sort - Sort Key: id, data - -> Seq Scan on test_dc - Filter: ((data)::text = '34'::text) +Index Scan using test_dc_pkey on test_dc + Filter: ((data)::text = '34'::text)step select2: SELECT * FROM test_dc WHERE data=34 ORDER BY id,data;id data
pgsql-hackers by date: