Thread: subquery results bypassed

subquery results bypassed

From
pgsql-bugs@postgresql.org
Date:
Anthony Wood (woody+postgresqlbugs@switchonline.com.au) reports a bug with a severity of 1
The lower the number the more severe it is.

Short Description
subquery results bypassed

Long Description
The second query in the example code should return:

 a | b | c
---+---+---
(0 rows)

but actually returns

 a | b | c
---+---+---
 1 | 3 | 2
(1 row)


which is wrong, because this tuple is not in the subquery

Sample Code
CREATE TABLE "bug" ("a" TEXT, "b" TEXT, "c" TEXT);
INSERT INTO "bug" VALUES ('1','2','1');
INSERT INTO "bug" VALUES ('1','3','2');
SELECT DISTINCT ON ("a") * FROM "bug" ORDER BY "a","c";
SELECT * FROM (SELECT DISTINCT ON ("a") * FROM "bug" ORDER BY "a","c") bug WHERE "b"='3';
DROP TABLE "bug";


No file was uploaded with this report

Re: subquery results bypassed

From
Tom Lane
Date:
pgsql-bugs@postgresql.org writes:
> [ SELECT DISTINCT ON in a subquery-in-FROM misbehaves ]

Oooh, good catch!  I had thought about pushing down quals into a SELECT
DISTINCT, and concluded it was OK because the qual would eliminate all
or none of a set of not-DISTINCT rows.  But I forgot about DISTINCT ON
:-(.

Will have a source patch for this tomorrow, if that helps.

            regards, tom lane

Re: subquery results bypassed

From
Tom Lane
Date:
Anthony Wood (woody+postgresqlbugs@switchonline.com.au) writes:
> [ SELECT DISTINCT ON in a subquery-in-FROM misbehaves ]

Here's the patch against 7.1.2 to fix this problem.  This also fixes a
related problem noted a few days ago, that outer WHERE clauses shouldn't
be pushed down into a sub-select that has a LIMIT clause.

            regards, tom lane

*** src/backend/optimizer/path/allpaths.c.orig    Wed Mar 21 22:59:34 2001
--- src/backend/optimizer/path/allpaths.c    Tue Jul 31 14:05:05 2001
***************
*** 125,135 ****
               * Non-pushed-down clauses will get evaluated as qpquals of
               * the SubqueryScan node.
               *
               * XXX Are there any cases where we want to make a policy
               * decision not to push down, because it'd result in a worse
               * plan?
               */
!             if (rte->subquery->setOperations == NULL)
              {
                  /* OK to consider pushing down individual quals */
                  List       *upperrestrictlist = NIL;
--- 125,141 ----
               * Non-pushed-down clauses will get evaluated as qpquals of
               * the SubqueryScan node.
               *
+              * We can't push down into subqueries with LIMIT or DISTINCT ON
+              * clauses, either.
+              *
               * XXX Are there any cases where we want to make a policy
               * decision not to push down, because it'd result in a worse
               * plan?
               */
!             if (rte->subquery->setOperations == NULL &&
!                 rte->subquery->limitOffset == NULL &&
!                 rte->subquery->limitCount == NULL &&
!                 !has_distinct_on_clause(rte->subquery))
              {
                  /* OK to consider pushing down individual quals */
                  List       *upperrestrictlist = NIL;
*** src/backend/optimizer/util/clauses.c.orig    Tue Mar 27 12:12:34 2001
--- src/backend/optimizer/util/clauses.c    Tue Jul 31 14:05:01 2001
***************
*** 730,742 ****


  /*****************************************************************************
   *                                                                             *
   *        General clause-manipulating routines                                 *
   *                                                                             *
   *****************************************************************************/

  /*
!  * clause_relids_vars
   *      Retrieves distinct relids and vars appearing within a clause.
   *
   * '*relids' is set to an integer list of all distinct "varno"s appearing
--- 730,794 ----


  /*****************************************************************************
+  *        Tests on clauses of queries
+  *
+  * Possibly this code should go someplace else, since this isn't quite the
+  * same meaning of "clause" as is used elsewhere in this module.  But I can't
+  * think of a better place for it...
+  *****************************************************************************/
+
+ /*
+  * Test whether a query uses DISTINCT ON, ie, has a distinct-list that is
+  * just a subset of the output columns.
+  */
+ bool
+ has_distinct_on_clause(Query *query)
+ {
+     List   *targetList;
+
+     /* Is there a DISTINCT clause at all? */
+     if (query->distinctClause == NIL)
+         return false;
+     /*
+      * If the DISTINCT list contains all the nonjunk targetlist items,
+      * then it's a simple DISTINCT, else it's DISTINCT ON.  We do not
+      * require the lists to be in the same order (since the parser may
+      * have adjusted the DISTINCT clause ordering to agree with ORDER BY).
+      */
+     foreach(targetList, query->targetList)
+     {
+         TargetEntry *tle = (TargetEntry *) lfirst(targetList);
+         Index        ressortgroupref;
+         List       *distinctClause;
+
+         if (tle->resdom->resjunk)
+             continue;
+         ressortgroupref = tle->resdom->ressortgroupref;
+         if (ressortgroupref == 0)
+             return true;        /* definitely not in DISTINCT list */
+         foreach(distinctClause, query->distinctClause)
+         {
+             SortClause *scl = (SortClause *) lfirst(distinctClause);
+
+             if (scl->tleSortGroupRef == ressortgroupref)
+                 break;            /* found TLE in DISTINCT */
+         }
+         if (distinctClause == NIL)
+             return true;        /* this TLE is not in DISTINCT list */
+     }
+     /* It's a simple DISTINCT */
+     return false;
+ }
+
+
+ /*****************************************************************************
   *                                                                             *
   *        General clause-manipulating routines                                 *
   *                                                                             *
   *****************************************************************************/

  /*
!  * clause_get_relids_vars
   *      Retrieves distinct relids and vars appearing within a clause.
   *
   * '*relids' is set to an integer list of all distinct "varno"s appearing
*** src/backend/utils/adt/ruleutils.c.orig    Wed Apr 18 13:04:24 2001
--- src/backend/utils/adt/ruleutils.c    Tue Jul 31 14:04:51 2001
***************
*** 119,125 ****
  static void get_basic_select_query(Query *query, deparse_context *context);
  static void get_setop_query(Node *setOp, Query *query,
                  deparse_context *context, bool toplevel);
- static bool simple_distinct(List *distinctClause, List *targetList);
  static void get_rule_sortgroupclause(SortClause *srt, List *tlist,
                                       bool force_colno,
                                       deparse_context *context);
--- 119,124 ----
***************
*** 1006,1014 ****
      /* Add the DISTINCT clause if given */
      if (query->distinctClause != NIL)
      {
!         if (simple_distinct(query->distinctClause, query->targetList))
!             appendStringInfo(buf, " DISTINCT");
!         else
          {
              appendStringInfo(buf, " DISTINCT ON (");
              sep = "";
--- 1005,1011 ----
      /* Add the DISTINCT clause if given */
      if (query->distinctClause != NIL)
      {
!         if (has_distinct_on_clause(query))
          {
              appendStringInfo(buf, " DISTINCT ON (");
              sep = "";
***************
*** 1023,1028 ****
--- 1020,1027 ----
              }
              appendStringInfo(buf, ")");
          }
+         else
+             appendStringInfo(buf, " DISTINCT");
      }

      /* Then we tell what to select (the targetlist) */
***************
*** 1147,1180 ****
          elog(ERROR, "get_setop_query: unexpected node %d",
               (int) nodeTag(setOp));
      }
- }
-
- /*
-  * Detect whether a DISTINCT list can be represented as just DISTINCT
-  * or needs DISTINCT ON.  It's simple if it contains exactly the nonjunk
-  * targetlist items.
-  */
- static bool
- simple_distinct(List *distinctClause, List *targetList)
- {
-     while (targetList)
-     {
-         TargetEntry *tle = (TargetEntry *) lfirst(targetList);
-
-         if (!tle->resdom->resjunk)
-         {
-             if (distinctClause == NIL)
-                 return false;
-             if (((SortClause *) lfirst(distinctClause))->tleSortGroupRef !=
-                 tle->resdom->ressortgroupref)
-                 return false;
-             distinctClause = lnext(distinctClause);
-         }
-         targetList = lnext(targetList);
-     }
-     if (distinctClause != NIL)
-         return false;
-     return true;
  }

  /*
--- 1146,1151 ----
*** src/include/optimizer/clauses.h.orig    Wed Mar 21 23:00:53 2001
--- src/include/optimizer/clauses.h    Tue Jul 31 14:04:35 2001
***************
*** 59,64 ****
--- 59,66 ----
  extern bool is_pseudo_constant_clause(Node *clause);
  extern List *pull_constant_clauses(List *quals, List **constantQual);

+ extern bool has_distinct_on_clause(Query *query);
+
  extern void clause_get_relids_vars(Node *clause, Relids *relids, List **vars);
  extern int    NumRelids(Node *clause);
  extern void get_relattval(Node *clause, int targetrelid,