Re: Use unique index for longer pathkeys. - Mailing list pgsql-hackers

From Kyotaro HORIGUCHI
Subject Re: Use unique index for longer pathkeys.
Date
Msg-id 20140715.174748.227122637.horiguchi.kyotaro@lab.ntt.co.jp
Whole thread Raw
In response to Re: Use unique index for longer pathkeys.  (Kyotaro HORIGUCHI <horiguchi.kyotaro@lab.ntt.co.jp>)
Responses Re: Use unique index for longer pathkeys.  (Amit Kapila <amit.kapila16@gmail.com>)
List pgsql-hackers
Hi, the attached is the revised version.
- defined allnotnull instead of full_ordered in RelOptInfo and  changed to use it.
- compare_pathkeys_ignoring_strategy() is now integrated into  compare_pathkeys().
- index pathkeys caching in RelOptInfo  added. (build_index_pathkeys() does)
- Refactoring collect_common_primary_pathkeys mainly from a  reason of eliminating code duplicates and readability.
- trim_query_pathkeys now tries to trim window_pathkeys. The old  comment came from my misunderstanding. Window
pathkeysare not  always RowExpr and RowExpr pathkeys are feedable from not only  PARTITION BY clause but also ORDER BY,
DISTINCTor GROUP BY  clauses. The comment for the function is modified.
 
- Some cosmetic fixes including spacing after commas.
- The shortcut judgement is *not* included.


regards,

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 9573a9b..546e112 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1789,9 +1789,11 @@ _outIndexOptInfo(StringInfo str, const IndexOptInfo *node)    /* indexprs is redundant since we
printindextlist */    WRITE_NODE_FIELD(indpred);    WRITE_NODE_FIELD(indextlist);
 
+    /* cached pathkeys are omitted as indexkeys */    WRITE_BOOL_FIELD(predOK);    WRITE_BOOL_FIELD(unique);
WRITE_BOOL_FIELD(immediate);
+    WRITE_BOOL_FIELD(allnotnull);    WRITE_BOOL_FIELD(hypothetical);    /* we don't bother with fields copied from the
pg_amentry */}
 
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index c81efe9..751eb6a 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -58,6 +58,7 @@ int            geqo_threshold;join_search_hook_type join_search_hook = NULL;
+static List *collect_common_primary_pathkeys(PlannerInfo *root);static void set_base_rel_sizes(PlannerInfo
*root);staticvoid set_base_rel_pathlists(PlannerInfo *root);static void set_rel_size(PlannerInfo *root, RelOptInfo
*rel,
@@ -66,6 +67,7 @@ static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,                 Index rti,
RangeTblEntry*rte);static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,                   RangeTblEntry
*rte);
+static void trim_query_pathkeys(PlannerInfo * root);static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo
*rel,                      RangeTblEntry *rte);static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel,
 
@@ -112,6 +114,203 @@ static void recurse_push_qual(Node *setOp, Query *topquery,                  RangeTblEntry *rte,
Indexrti, Node *qual);static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel);
 
+/*
+ * collect_common_primary_pathkeys: Collects common unique and non-null index
+ * pathkeys from all base relations in current root.
+ */
+static List *
+collect_common_primary_pathkeys(PlannerInfo *root)
+{
+    List *result = NIL;
+    Index rti;
+    int   nindex = 0;
+    List **pathkeys_array0 = NULL;
+    List **pathkeys_array = NULL;
+    List **pathkeys_array2 = NULL;
+    bool do_conjunction = (root->simple_rel_array_size > 2);
+
+    for (rti = 1 ; rti < root->simple_rel_array_size ; rti++)
+    {
+        RelOptInfo *rel = root->simple_rel_array[rti];
+        int prev_nindex = 0;
+        ListCell *lc;
+
+        if (rel == NULL)
+            continue;
+
+        Assert (rel->relid == rti);
+
+        /* Scan all base relations except parent relations. */
+        if (root->simple_rte_array[rti]->inh ||
+            (rel->reloptkind != RELOPT_BASEREL &&
+             rel->reloptkind != RELOPT_OTHER_MEMBER_REL))
+            continue;
+
+        if (do_conjunction)
+        {
+            if (pathkeys_array2)
+                memset(&pathkeys_array2, 0, sizeof(List*) * nindex);
+            else
+            {
+                /* Rig for AND (conjunction) operation. */
+                nindex = 0;
+
+                foreach (lc, rel->indexlist)
+                {
+                    IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);
+                    
+                    if (index->allnotnull &&
+                        index->unique && index->immediate &&
+                        index->indpred == NIL)
+                        nindex++;
+                }
+
+                if (nindex == 0)
+                    return NULL; /* No common primary pathkeys exists.*/
+
+                pathkeys_array0 = palloc0(sizeof(List*) * nindex * 2);
+                pathkeys_array  = pathkeys_array0;
+                pathkeys_array2 = pathkeys_array0 + nindex;
+            }
+        }
+
+        prev_nindex = nindex;
+        nindex = 0;
+
+        foreach (lc, rel->indexlist)
+        {
+            IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);
+            List *idx_pathkeys;
+                
+            /* Only indexes that can unique sort are needed. */
+            if (!index->allnotnull ||
+                !(index->unique && index->immediate) ||
+                index->indpred != NIL)
+                continue;
+
+            idx_pathkeys = build_index_pathkeys(root, index,
+                                                ForwardScanDirection);
+            if (!idx_pathkeys)
+                continue;
+
+            if (!do_conjunction)
+            {
+                result = lappend(result, idx_pathkeys);
+                nindex++;
+            }
+            else
+            {
+                /*
+                 * AND operation: drop pathkeys which don't see identical
+                 * one in this relation.
+                 */
+                int i;
+
+                for (i = 0 ; i < prev_nindex ; i++)
+                {
+                    if (compare_pathkeys(pathkeys_array[i],
+                                         idx_pathkeys,
+                                         false) == PATHKEYS_EQUAL)
+                        break;
+                }
+                if (prev_nindex < 1 || i < prev_nindex)
+                    pathkeys_array2[nindex++] = idx_pathkeys;
+            }
+        }
+
+        if (nindex == 0)
+        {
+            /* No common primary pathkeys remained, exit. */
+            if (pathkeys_array0)
+                pfree(pathkeys_array0);
+            return NULL;
+        }
+
+        if (do_conjunction)
+        {
+            List **pktmp;
+
+            /* swap two pathkeys arrays for the next iteration */
+            pktmp = pathkeys_array2;
+            pathkeys_array2 = pathkeys_array;
+            pathkeys_array = pktmp;
+        }
+    }
+
+    if (do_conjunction)
+    {
+        int i;
+
+        for (i = 0 ; i < nindex ; i++)
+        {
+            if (pathkeys_array[i])
+                result = lappend(result, pathkeys_array[i]);
+        }
+        pfree(pathkeys_array0);
+    }
+
+    return result;
+}
+
+/*
+ * trim_query_pathkeys: trim pathkeys following common primary pathkeys.
+ *
+ * Primary pathkeys are pathkeys which perfectly sorts tuples in its owner
+ * table so the query pathkeys can be replaced with the primary pathkeys when
+ * the latter prefixes the former. Common primary pathkeys are the primary
+ * pathkeys shared among all base relations accessed in current root.
+ *
+ * This function doesn't trim RowExpr pathkeys.
+ */
+static void
+trim_query_pathkeys(PlannerInfo * root)
+{
+    ListCell   *lc;
+
+    foreach(lc, collect_common_primary_pathkeys(root))
+    {
+        List *pk_guide = (List*) lfirst(lc);
+
+        if (root->sort_pathkeys)
+        {
+            /* sort_pathkeys is not allowd to flip sorting directions
+             * differently from other pathkeys. So try to trim sort_pathkeys
+             * first preserving per-key directions, then try to trim other
+             * pathkeys using the trimmed sort_pathkeys as the guide.
+             */
+
+            root->sort_pathkeys = 
+                shorten_pathkeys_following(root, root->sort_pathkeys, pk_guide,
+                                           false);
+
+            /*
+             * Replace guide pathkeys with new sort_pathkeys if they differs
+             * from each other only by sorting directions.
+             */
+            if (compare_pathkeys(root->sort_pathkeys,
+                                 pk_guide, false) !=  PATHKEYS_EQUAL &&
+                compare_pathkeys(root->sort_pathkeys,
+                                 pk_guide, true) == PATHKEYS_EQUAL)
+                pk_guide = root->sort_pathkeys;
+        }            
+
+        root->group_pathkeys =
+            shorten_pathkeys_following(root, root->group_pathkeys, pk_guide,
+                                       true);
+
+        root->window_pathkeys =
+            shorten_pathkeys_following(root, root->window_pathkeys, pk_guide,
+                                       true);
+
+        root->distinct_pathkeys =
+            shorten_pathkeys_following(root, root->distinct_pathkeys, pk_guide,
+                                       true);
+
+        root->query_pathkeys =
+            shorten_pathkeys_following(root, root->query_pathkeys, pk_guide,
+                                       true);
+    }    
+}/* * make_one_rel
@@ -149,6 +348,7 @@ make_one_rel(PlannerInfo *root, List *joinlist)     * Generate access paths for the base rels.
*/   set_base_rel_sizes(root);
 
+    trim_query_pathkeys(root);    set_base_rel_pathlists(root);    /*
@@ -755,7 +955,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,                    List
*existing_pathkeys= (List *) lfirst(lpk);                    if (compare_pathkeys(existing_pathkeys,
 
-                                         childkeys) == PATHKEYS_EQUAL)
+                                         childkeys, false) == PATHKEYS_EQUAL)                    {
  found = true;                        break;
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 5d953df..6cc9754 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -275,10 +275,13 @@ make_pathkey_from_sortop(PlannerInfo *root, *      one is "better" than the other. * *      We
assumethe pathkeys are canonical, and so they can be checked for
 
- *      equality by simple pointer comparison.
+ *      equality by simple pointer comparison when ignore_strategy is false.
+ *
+ *      If ignore_strategy is true, the two pathkeys with no difference other
+ *      than strategy are regarded as the same. */PathKeysComparison
-compare_pathkeys(List *keys1, List *keys2)
+compare_pathkeys(List *keys1, List *keys2, bool ignore_strategy){    ListCell   *key1,               *key2;
@@ -296,7 +299,23 @@ compare_pathkeys(List *keys1, List *keys2)        PathKey    *pathkey1 = (PathKey *) lfirst(key1);
      PathKey    *pathkey2 = (PathKey *) lfirst(key2);
 
-        if (pathkey1 != pathkey2)
+        if (pathkey1 == pathkey2)
+            continue;
+
+        if (!ignore_strategy)
+            return PATHKEYS_DIFFERENT;    /* no need to keep looking */
+
+        Assert(pathkey1->pk_strategy == BTGreaterStrategyNumber ||
+               pathkey1->pk_strategy == BTLessStrategyNumber);
+        Assert(pathkey2->pk_strategy == BTGreaterStrategyNumber ||
+               pathkey2->pk_strategy == BTLessStrategyNumber);
+
+        if (pathkey1->pk_eclass   != pathkey2->pk_eclass ||
+            pathkey1->pk_opfamily != pathkey2->pk_opfamily ||
+            !((pathkey1->pk_strategy    == pathkey2->pk_strategy &&
+               pathkey1->pk_nulls_first == pathkey2->pk_nulls_first) ||
+              (pathkey1->pk_strategy    != pathkey2->pk_strategy &&
+               pathkey1->pk_nulls_first != pathkey2->pk_nulls_first)))            return PATHKEYS_DIFFERENT;    /* no
needto keep looking */    }
 
@@ -319,7 +338,7 @@ compare_pathkeys(List *keys1, List *keys2)boolpathkeys_contained_in(List *keys1, List *keys2){
-    switch (compare_pathkeys(keys1, keys2))
+    switch (compare_pathkeys(keys1, keys2, false))    {        case PATHKEYS_EQUAL:        case PATHKEYS_BETTER2:
@@ -440,10 +459,15 @@ build_index_pathkeys(PlannerInfo *root,    List       *retval = NIL;    ListCell   *lc;    int
       i;
 
+    List       *cached_pathkeys = (ScanDirectionIsBackward(scandir) ?
+                                   index->indbwpathkeys : index->indfwpathkeys);    if (index->sortopfamily == NULL)
    return NIL;                /* non-orderable index */
 
+    if (cached_pathkeys)
+        return cached_pathkeys; /* Return cached pathkeys if any */
+    i = 0;    foreach(lc, index->indextlist)    {
@@ -498,6 +522,12 @@ build_index_pathkeys(PlannerInfo *root,        i++;    }
+    /* Store created pathkeys for future use */
+    if (ScanDirectionIsBackward(scandir))
+        index->indbwpathkeys = retval;
+    else
+        index->indfwpathkeys = retval;
+    return retval;}
@@ -1525,3 +1555,87 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)        return true;            /* might
beable to use them for ordering */    return false;                /* definitely useless */}
 
+
+/*
+ * shorten_pathkeys_following: returns pk_guide if it prefixes pk_target
+ * regardless of the ordering directions of the pathkeys.
+ * allow_partial_flip allows ordering directions to be partially different
+ * between pk_target and pk_guide.
+ */
+List *
+shorten_pathkeys_following(PlannerInfo *root, List *pk_target, List *pk_guide,
+    bool allow_partial_flip)
+{
+    ListCell *tlc, *glc;
+    int same = 0, reverse = 0;
+    List *result;
+
+    if (pk_target == NIL)
+        return NIL;
+
+    if (list_length(pk_target) < list_length(pk_guide))
+        return pk_target;
+
+    forboth(tlc, pk_target, glc, pk_guide)
+    {
+        PathKey *tpk = (PathKey *) lfirst(tlc);
+        PathKey *gpk = (PathKey *) lfirst(glc);
+
+        if (tpk == gpk)
+        {
+            same++;
+            continue;
+        }
+
+        Assert(tpk->pk_strategy == BTGreaterStrategyNumber ||
+               tpk->pk_strategy == BTLessStrategyNumber);
+        Assert(gpk->pk_strategy == BTGreaterStrategyNumber ||
+               gpk->pk_strategy == BTLessStrategyNumber);
+
+        if (tpk->pk_eclass == gpk->pk_eclass &&
+            tpk->pk_opfamily == gpk->pk_opfamily &&
+            tpk->pk_strategy != gpk->pk_strategy &&
+            tpk->pk_nulls_first != gpk->pk_nulls_first)
+        {
+            reverse++;
+            continue;
+        }
+
+        return pk_target;
+    }
+
+    /* trailing members in pk_target don't matter */
+
+    /* reverse == 0 means that pk_guide exactly prefixes pk_target */
+    if (allow_partial_flip || reverse == 0)
+        return pk_guide;
+
+    /*
+     * Don't replace pathkeys if partial flip is not allowed but partially
+     * flipped.
+     */
+    Assert(reverse > 0);
+    if (same > 0)
+        return pk_target;
+
+    /* make then reutrn the reverse pathkeys of pk_guide */
+    result = NIL;
+    foreach (glc, pk_guide)
+    {
+        PathKey *gpk = (PathKey *) lfirst(glc);
+        PathKey *pk;
+        EquivalenceClass *ec = gpk->pk_eclass;
+        int revstr;
+
+        if (gpk->pk_strategy == BTGreaterStrategyNumber)
+            revstr = BTLessStrategyNumber;
+        else
+            revstr = BTGreaterStrategyNumber;
+
+        pk = make_canonical_pathkey(root, ec, gpk->pk_opfamily, revstr,
+                                    !gpk->pk_nulls_first);
+
+        result = lappend(result, pk);
+    }
+    return result;
+}
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index d129f8d..9433a40 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -312,14 +312,14 @@ set_cheapest(RelOptInfo *parent_rel)            if (cmp > 0 ||                (cmp == 0 &&
        compare_pathkeys(cheapest_startup_path->pathkeys,
 
-                                  path->pathkeys) == PATHKEYS_BETTER2))
+                                  path->pathkeys, false) == PATHKEYS_BETTER2))                cheapest_startup_path =
path;           cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST);            if (cmp > 0 ||
 (cmp == 0 &&                 compare_pathkeys(cheapest_total_path->pathkeys,
 
-                                  path->pathkeys) == PATHKEYS_BETTER2))
+                                  path->pathkeys, false) == PATHKEYS_BETTER2))                cheapest_total_path =
path;       }    }
 
@@ -455,7 +455,7 @@ add_path(RelOptInfo *parent_rel, Path *new_path)            old_path_pathkeys =
old_path->param_info? NIL : old_path->pathkeys;            keyscmp = compare_pathkeys(new_path_pathkeys,
 
-                                       old_path_pathkeys);
+                                       old_path_pathkeys, false);            if (keyscmp != PATHKEYS_DIFFERENT)
   {                switch (costcmp)
 
@@ -653,7 +653,7 @@ add_path_precheck(RelOptInfo *parent_rel,                old_path_pathkeys = old_path->param_info ?
NIL: old_path->pathkeys;                keyscmp = compare_pathkeys(new_path_pathkeys,
 
-                                           old_path_pathkeys);
+                                           old_path_pathkeys, false);                if (keyscmp == PATHKEYS_EQUAL ||
                 keyscmp == PATHKEYS_BETTER2)                {
 
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index b2becfa..1cebd2b 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -241,12 +241,28 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
info->reverse_sort= (bool *) palloc(sizeof(bool) * ncolumns);                info->nulls_first = (bool *)
palloc(sizeof(bool)* ncolumns);
 
+                info->allnotnull = true;                for (i = 0; i < ncolumns; i++)                {
   int16        opt = indexRelation->rd_indoption[i];
 
+                    int16        attno = info->indexkeys[i];                    info->reverse_sort[i] = (opt &
INDOPTION_DESC)!= 0;                    info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;
 
+
+                    if (!info->allnotnull) continue;
+
+                    /* Check if there's no nullable key column */
+                    if (attno > 0)
+                    {
+                        if (!relation->rd_att->attrs[attno - 1]->attnotnull)
+                            info->allnotnull = false;
+                    }
+                    else if (attno != ObjectIdAttributeNumber)
+                    {
+                        /* OID is apparently not-null */
+                        info->allnotnull = false; 
+                    }                }            }            else if (indexRelation->rd_am->amcanorder)
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index dacbe9c..4855667 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -524,10 +524,13 @@ typedef struct IndexOptInfo    List       *indpred;        /* predicate if a partial index, else
NIL*/    List       *indextlist;        /* targetlist representing index columns */
 
+    List       *indfwpathkeys;    /* index pathkeys for forward scan */
+    List       *indbwpathkeys;    /* index pathkeys for backwad scan */    bool        predOK;            /* true if
predicatematches query */    bool        unique;            /* true if a unique index */    bool        immediate;
 /* is uniqueness enforced immediately? */
 
+    bool        allnotnull;        /* all columns are "NOT NULL" ? */    bool        hypothetical;    /* true if index
doesn'treally exist */    bool        canreturn;        /* can index return IndexTuples? */    bool
amcanorderbyop;/* does AM support order by operator result? */ 
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 9b22fda..f637fc3 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -146,7 +146,10 @@ typedef enum    PATHKEYS_DIFFERENT            /* neither pathkey includes the other */}
PathKeysComparison;
-extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
+extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2,
+                                           bool ignore_strategy);
+extern PathKeysComparison compare_pathkeys_ignoring_strategy(List *keys1,
+                                                             List *keys2);extern bool pathkeys_contained_in(List
*keys1,List *keys2);extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
  Relids required_outer,
 
@@ -187,5 +190,8 @@ extern List *truncate_useless_pathkeys(PlannerInfo *root,                          RelOptInfo *rel,
                        List *pathkeys);extern bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel);
 
+extern List *shorten_pathkeys_following(PlannerInfo *root, 
+                                        List *pk_target, List *pk_guide,
+                                        bool allow_partial_flip);#endif   /* PATHS_H */
diff --git a/src/test/isolation/expected/drop-index-concurrently-1.out
b/src/test/isolation/expected/drop-index-concurrently-1.out
index 75dff56..ab96fa0 100644
--- a/src/test/isolation/expected/drop-index-concurrently-1.out
+++ b/src/test/isolation/expected/drop-index-concurrently-1.out
@@ -19,10 +19,8 @@ Sortstep explains: EXPLAIN (COSTS OFF) EXECUTE getrow_seq;QUERY PLAN     
-Sort           
-  Sort Key: id, data
-  ->  Seq Scan on test_dc
-        Filter: ((data)::text = '34'::text)
+Index Scan using test_dc_pkey on test_dc
+  Filter: ((data)::text = '34'::text)step select2: SELECT * FROM test_dc WHERE data=34 ORDER BY id,data;id
data           

pgsql-hackers by date:

Previous
From: Dilip kumar
Date:
Subject: Re: Selectivity estimation for inet operators
Next
From: Fujii Masao
Date:
Subject: Re: psql: show only failed queries