Re: [BUGS] BUG #3245: PANIC: failed to re-find shared lock object - Mailing list pgsql-patches

From Tom Lane
Subject Re: [BUGS] BUG #3245: PANIC: failed to re-find shared lock object
Date
Msg-id 2042.1177616578@sss.pgh.pa.us
Whole thread Raw
Responses Re: [BUGS] BUG #3245: PANIC: failed to re-find shared lock object
List pgsql-patches
Attached is the complete patch against HEAD to prevent hashtable bucket
splits during hash_seq_search.  Any comments before I start
back-porting?  I suppose we had better patch this all the way back,
even though AtPrepare_Locks() is the only known trouble spot.

            regards, tom lane

*** src/backend/access/transam/xact.c.orig    Tue Apr  3 12:34:35 2007
--- src/backend/access/transam/xact.c    Wed Apr 25 20:32:00 2007
***************
*** 1631,1636 ****
--- 1631,1637 ----
      /* smgrcommit already done */
      AtEOXact_Files();
      AtEOXact_ComboCid();
+     AtEOXact_HashTables(true);
      pgstat_clear_snapshot();
      pgstat_count_xact_commit();
      pgstat_report_txn_timestamp(0);
***************
*** 1849,1854 ****
--- 1850,1856 ----
      /* smgrcommit already done */
      AtEOXact_Files();
      AtEOXact_ComboCid();
+     AtEOXact_HashTables(true);
      pgstat_clear_snapshot();

      CurrentResourceOwner = NULL;
***************
*** 2003,2008 ****
--- 2005,2011 ----
      smgrabort();
      AtEOXact_Files();
      AtEOXact_ComboCid();
+     AtEOXact_HashTables(false);
      pgstat_clear_snapshot();
      pgstat_count_xact_rollback();
      pgstat_report_txn_timestamp(0);
***************
*** 3716,3721 ****
--- 3719,3725 ----
                            s->parent->subTransactionId);
      AtEOSubXact_Files(true, s->subTransactionId,
                        s->parent->subTransactionId);
+     AtEOSubXact_HashTables(true, s->nestingLevel);

      /*
       * We need to restore the upper transaction's read-only state, in case the
***************
*** 3827,3832 ****
--- 3831,3837 ----
                                s->parent->subTransactionId);
          AtEOSubXact_Files(false, s->subTransactionId,
                            s->parent->subTransactionId);
+         AtEOSubXact_HashTables(false, s->nestingLevel);
      }

      /*
*** src/backend/commands/prepare.c.orig    Mon Apr 16 14:21:07 2007
--- src/backend/commands/prepare.c    Thu Apr 26 15:00:40 2007
***************
*** 21,27 ****
  #include "catalog/pg_type.h"
  #include "commands/explain.h"
  #include "commands/prepare.h"
! #include "funcapi.h"
  #include "parser/analyze.h"
  #include "parser/parse_coerce.h"
  #include "parser/parse_expr.h"
--- 21,27 ----
  #include "catalog/pg_type.h"
  #include "commands/explain.h"
  #include "commands/prepare.h"
! #include "miscadmin.h"
  #include "parser/analyze.h"
  #include "parser/parse_coerce.h"
  #include "parser/parse_expr.h"
***************
*** 743,834 ****
  Datum
  pg_prepared_statement(PG_FUNCTION_ARGS)
  {
!     FuncCallContext *funcctx;
!     HASH_SEQ_STATUS *hash_seq;
!     PreparedStatement *prep_stmt;

!     /* stuff done only on the first call of the function */
!     if (SRF_IS_FIRSTCALL())
!     {
!         TupleDesc    tupdesc;
!         MemoryContext oldcontext;

!         /* create a function context for cross-call persistence */
!         funcctx = SRF_FIRSTCALL_INIT();

!         /*
!          * switch to memory context appropriate for multiple function calls
!          */
!         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

!         /* allocate memory for user context */
!         if (prepared_queries)
          {
!             hash_seq = (HASH_SEQ_STATUS *) palloc(sizeof(HASH_SEQ_STATUS));
!             hash_seq_init(hash_seq, prepared_queries);
!             funcctx->user_fctx = (void *) hash_seq;
!         }
!         else
!             funcctx->user_fctx = NULL;

!         /*
!          * build tupdesc for result tuples. This must match the definition of
!          * the pg_prepared_statements view in system_views.sql
!          */
!         tupdesc = CreateTemplateTupleDesc(5, false);
!         TupleDescInitEntry(tupdesc, (AttrNumber) 1, "name",
!                            TEXTOID, -1, 0);
!         TupleDescInitEntry(tupdesc, (AttrNumber) 2, "statement",
!                            TEXTOID, -1, 0);
!         TupleDescInitEntry(tupdesc, (AttrNumber) 3, "prepare_time",
!                            TIMESTAMPTZOID, -1, 0);
!         TupleDescInitEntry(tupdesc, (AttrNumber) 4, "parameter_types",
!                            REGTYPEARRAYOID, -1, 0);
!         TupleDescInitEntry(tupdesc, (AttrNumber) 5, "from_sql",
!                            BOOLOID, -1, 0);
!
!         funcctx->tuple_desc = BlessTupleDesc(tupdesc);
!         MemoryContextSwitchTo(oldcontext);
!     }
!
!     /* stuff done on every call of the function */
!     funcctx = SRF_PERCALL_SETUP();
!     hash_seq = (HASH_SEQ_STATUS *) funcctx->user_fctx;
!
!     /* if the hash table is uninitialized, we're done */
!     if (hash_seq == NULL)
!         SRF_RETURN_DONE(funcctx);
!
!     prep_stmt = hash_seq_search(hash_seq);
!     if (prep_stmt)
!     {
!         Datum        result;
!         HeapTuple    tuple;
!         Datum        values[5];
!         bool        nulls[5];

!         MemSet(nulls, 0, sizeof(nulls));

!         values[0] = DirectFunctionCall1(textin,
                                        CStringGetDatum(prep_stmt->stmt_name));

!         if (prep_stmt->plansource->query_string == NULL)
!             nulls[1] = true;
!         else
!             values[1] = DirectFunctionCall1(textin,
                          CStringGetDatum(prep_stmt->plansource->query_string));

!         values[2] = TimestampTzGetDatum(prep_stmt->prepare_time);
!         values[3] = build_regtype_array(prep_stmt->plansource->param_types,
!                                         prep_stmt->plansource->num_params);
!         values[4] = BoolGetDatum(prep_stmt->from_sql);
!
!         tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
!         result = HeapTupleGetDatum(tuple);
!         SRF_RETURN_NEXT(funcctx, result);
      }

!     SRF_RETURN_DONE(funcctx);
  }

  /*
--- 743,841 ----
  Datum
  pg_prepared_statement(PG_FUNCTION_ARGS)
  {
!     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
!     TupleDesc    tupdesc;
!     Tuplestorestate *tupstore;
!     MemoryContext per_query_ctx;
!     MemoryContext oldcontext;

!     /* check to see if caller supports us returning a tuplestore */
!     if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
!         ereport(ERROR,
!                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
!                  errmsg("set-valued function called in context that cannot accept a set")));
!     if (!(rsinfo->allowedModes & SFRM_Materialize))
!         ereport(ERROR,
!                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
!                  errmsg("materialize mode required, but it is not " \
!                         "allowed in this context")));
!
!     /* need to build tuplestore in query context */
!     per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
!     oldcontext = MemoryContextSwitchTo(per_query_ctx);

!     /*
!      * build tupdesc for result tuples. This must match the definition of
!      * the pg_prepared_statements view in system_views.sql
!      */
!     tupdesc = CreateTemplateTupleDesc(5, false);
!     TupleDescInitEntry(tupdesc, (AttrNumber) 1, "name",
!                        TEXTOID, -1, 0);
!     TupleDescInitEntry(tupdesc, (AttrNumber) 2, "statement",
!                        TEXTOID, -1, 0);
!     TupleDescInitEntry(tupdesc, (AttrNumber) 3, "prepare_time",
!                        TIMESTAMPTZOID, -1, 0);
!     TupleDescInitEntry(tupdesc, (AttrNumber) 4, "parameter_types",
!                        REGTYPEARRAYOID, -1, 0);
!     TupleDescInitEntry(tupdesc, (AttrNumber) 5, "from_sql",
!                        BOOLOID, -1, 0);

!     /*
!      * We put all the tuples into a tuplestore in one scan of the hashtable.
!      * This avoids any issue of the hashtable possibly changing between calls.
!      */
!     tupstore = tuplestore_begin_heap(true, false, work_mem);
!
!     /* hash table might be uninitialized */
!     if (prepared_queries)
!     {
!         HASH_SEQ_STATUS hash_seq;
!         PreparedStatement *prep_stmt;

!         hash_seq_init(&hash_seq, prepared_queries);
!         while ((prep_stmt = hash_seq_search(&hash_seq)) != NULL)
          {
!             HeapTuple    tuple;
!             Datum        values[5];
!             bool        nulls[5];

!             /* generate junk in short-term context */
!             MemoryContextSwitchTo(oldcontext);

!             MemSet(nulls, 0, sizeof(nulls));

!             values[0] = DirectFunctionCall1(textin,
                                        CStringGetDatum(prep_stmt->stmt_name));

!             if (prep_stmt->plansource->query_string == NULL)
!                 nulls[1] = true;
!             else
!                 values[1] = DirectFunctionCall1(textin,
                          CStringGetDatum(prep_stmt->plansource->query_string));

!             values[2] = TimestampTzGetDatum(prep_stmt->prepare_time);
!             values[3] = build_regtype_array(prep_stmt->plansource->param_types,
!                                             prep_stmt->plansource->num_params);
!             values[4] = BoolGetDatum(prep_stmt->from_sql);
!
!             tuple = heap_form_tuple(tupdesc, values, nulls);
!
!             /* switch to appropriate context while storing the tuple */
!             MemoryContextSwitchTo(per_query_ctx);
!             tuplestore_puttuple(tupstore, tuple);
!         }
      }

!     /* clean up and return the tuplestore */
!     tuplestore_donestoring(tupstore);
!
!     MemoryContextSwitchTo(oldcontext);
!
!     rsinfo->returnMode = SFRM_Materialize;
!     rsinfo->setResult = tupstore;
!     rsinfo->setDesc = tupdesc;
!
!     return (Datum) 0;
  }

  /*
*** src/backend/executor/nodeSubplan.c.orig    Mon Feb 26 20:11:25 2007
--- src/backend/executor/nodeSubplan.c    Wed Apr 25 19:22:50 2007
***************
*** 569,575 ****
      TupleHashIterator hashiter;
      TupleHashEntry entry;

!     ResetTupleHashIterator(hashtable, &hashiter);
      while ((entry = ScanTupleHashTable(&hashiter)) != NULL)
      {
          ExecStoreMinimalTuple(entry->firstTuple, hashtable->tableslot, false);
--- 569,575 ----
      TupleHashIterator hashiter;
      TupleHashEntry entry;

!     InitTupleHashIterator(hashtable, &hashiter);
      while ((entry = ScanTupleHashTable(&hashiter)) != NULL)
      {
          ExecStoreMinimalTuple(entry->firstTuple, hashtable->tableslot, false);
***************
*** 577,584 ****
--- 577,588 ----
                                 numCols, keyColIdx,
                                 hashtable->cur_eq_funcs,
                                 hashtable->tempcxt))
+         {
+             TermTupleHashIterator(&hashiter);
              return true;
+         }
      }
+     /* No TermTupleHashIterator call needed here */
      return false;
  }

*** src/backend/nodes/tidbitmap.c.orig    Fri Jan  5 18:01:58 2007
--- src/backend/nodes/tidbitmap.c    Wed Apr 25 19:38:05 2007
***************
*** 907,913 ****
          tbm_mark_page_lossy(tbm, page->blockno);

          if (tbm->nentries <= tbm->maxentries)
!             return;                /* we have done enough */

          /*
           * Note: tbm_mark_page_lossy may have inserted a lossy chunk into the
--- 907,917 ----
          tbm_mark_page_lossy(tbm, page->blockno);

          if (tbm->nentries <= tbm->maxentries)
!         {
!             /* we have done enough */
!             hash_seq_term(&status);
!             break;
!         }

          /*
           * Note: tbm_mark_page_lossy may have inserted a lossy chunk into the
*** src/backend/utils/hash/dynahash.c.orig    Fri Jan  5 18:02:22 2007
--- src/backend/utils/hash/dynahash.c    Thu Apr 26 14:29:55 2007
***************
*** 63,68 ****
--- 63,69 ----

  #include "postgres.h"

+ #include "access/xact.h"
  #include "storage/shmem.h"
  #include "storage/spin.h"
  #include "utils/dynahash.h"
***************
*** 160,165 ****
--- 161,169 ----
      char       *tabname;        /* table name (for error messages) */
      bool        isshared;        /* true if table is in shared memory */

+     /* freezing a shared table isn't allowed, so we can keep state here */
+     bool        frozen;            /* true = no more inserts allowed */
+
      /* We keep local copies of these fixed values to reduce contention */
      Size        keysize;        /* hash key length in bytes */
      long        ssize;            /* segment size --- must be power of 2 */
***************
*** 195,200 ****
--- 199,207 ----
  static int    choose_nelem_alloc(Size entrysize);
  static bool init_htab(HTAB *hashp, long nelem);
  static void hash_corrupted(HTAB *hashp);
+ static void register_seq_scan(HTAB *hashp);
+ static void deregister_seq_scan(HTAB *hashp);
+ static bool has_seq_scans(HTAB *hashp);


  /*
***************
*** 356,361 ****
--- 363,370 ----
                       errmsg("out of memory")));
      }

+     hashp->frozen = false;
+
      hdefault(hashp);

      hctl = hashp->hctl;
***************
*** 898,903 ****
--- 907,916 ----
              if (currBucket != NULL)
                  return (void *) ELEMENTKEY(currBucket);

+             /* disallow inserts if frozen */
+             if (hashp->frozen)
+                 elog(ERROR, "cannot insert into a frozen hashtable");
+
              currBucket = get_hash_entry(hashp);
              if (currBucket == NULL)
              {
***************
*** 925,934 ****

              /* caller is expected to fill the data field on return */

!             /* Check if it is time to split a bucket */
!             /* Can't split if running in partitioned mode */
              if (!IS_PARTITIONED(hctl) &&
!              hctl->nentries / (long) (hctl->max_bucket + 1) >= hctl->ffactor)
              {
                  /*
                   * NOTE: failure to expand table is not a fatal error, it just
--- 938,952 ----

              /* caller is expected to fill the data field on return */

!             /*
!              * Check if it is time to split a bucket.  Can't split if running
!              * in partitioned mode, nor if table is the subject of any active
!              * hash_seq_search scans.  Strange order of these tests is to try
!              * to check cheaper conditions first.
!              */
              if (!IS_PARTITIONED(hctl) &&
!                 hctl->nentries / (long) (hctl->max_bucket + 1) >= hctl->ffactor &&
!                 !has_seq_scans(hashp))
              {
                  /*
                   * NOTE: failure to expand table is not a fatal error, it just
***************
*** 1001,1018 ****
  }

  /*
!  * hash_seq_init/_search
   *            Sequentially search through hash table and return
   *            all the elements one by one, return NULL when no more.
   *
   * NOTE: caller may delete the returned element before continuing the scan.
   * However, deleting any other element while the scan is in progress is
   * UNDEFINED (it might be the one that curIndex is pointing at!).  Also,
   * if elements are added to the table while the scan is in progress, it is
   * unspecified whether they will be visited by the scan or not.
   *
   * NOTE: to use this with a partitioned hashtable, caller had better hold
   * at least shared lock on all partitions of the table throughout the scan!
   */
  void
  hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
--- 1019,1048 ----
  }

  /*
!  * hash_seq_init/_search/_term
   *            Sequentially search through hash table and return
   *            all the elements one by one, return NULL when no more.
   *
+  * hash_seq_term should be called if and only if the scan is abandoned before
+  * completion; if hash_seq_search returns NULL then it has already done the
+  * end-of-scan cleanup.
+  *
   * NOTE: caller may delete the returned element before continuing the scan.
   * However, deleting any other element while the scan is in progress is
   * UNDEFINED (it might be the one that curIndex is pointing at!).  Also,
   * if elements are added to the table while the scan is in progress, it is
   * unspecified whether they will be visited by the scan or not.
   *
+  * NOTE: it is possible to use hash_seq_init/hash_seq_search without any
+  * worry about hash_seq_term cleanup, if the hashtable is first locked against
+  * further insertions by calling hash_freeze.  This is used by nodeAgg.c,
+  * wherein it is inconvenient to track whether a scan is still open, and
+  * there's no possibility of further insertions after readout has begun.
+  *
   * NOTE: to use this with a partitioned hashtable, caller had better hold
   * at least shared lock on all partitions of the table throughout the scan!
+  * We can cope with insertions or deletions by our own backend, but *not*
+  * with concurrent insertions or deletions by another.
   */
  void
  hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
***************
*** 1020,1025 ****
--- 1050,1057 ----
      status->hashp = hashp;
      status->curBucket = 0;
      status->curEntry = NULL;
+     if (!hashp->frozen)
+         register_seq_scan(hashp);
  }

  void *
***************
*** 1054,1060 ****
--- 1086,1095 ----
      max_bucket = hctl->max_bucket;

      if (curBucket > max_bucket)
+     {
+         hash_seq_term(status);
          return NULL;            /* search is done */
+     }

      /*
       * first find the right segment in the table directory.
***************
*** 1076,1081 ****
--- 1111,1117 ----
          if (++curBucket > max_bucket)
          {
              status->curBucket = curBucket;
+             hash_seq_term(status);
              return NULL;        /* search is done */
          }
          if (++segment_ndx >= ssize)
***************
*** 1094,1099 ****
--- 1130,1165 ----
      return (void *) ELEMENTKEY(curElem);
  }

+ void
+ hash_seq_term(HASH_SEQ_STATUS *status)
+ {
+     if (!status->hashp->frozen)
+         deregister_seq_scan(status->hashp);
+ }
+
+ /*
+  * hash_freeze
+  *            Freeze a hashtable against future insertions (deletions are
+  *            still allowed)
+  *
+  * The reason for doing this is that by preventing any more bucket splits,
+  * we no longer need to worry about registering hash_seq_search scans,
+  * and thus caller need not be careful about ensuring hash_seq_term gets
+  * called at the right times.
+  *
+  * Multiple calls to hash_freeze() are allowed, but you can't freeze a table
+  * with active scans (since hash_seq_term would then do the wrong thing).
+  */
+ void
+ hash_freeze(HTAB *hashp)
+ {
+     if (hashp->isshared)
+         elog(ERROR, "cannot freeze shared hashtable");
+     if (!hashp->frozen && has_seq_scans(hashp))
+         elog(ERROR, "cannot freeze hashtable with active scans");
+     hashp->frozen = true;
+ }
+

  /********************************* UTILITIES ************************/

***************
*** 1323,1326 ****
--- 1389,1525 ----
      for (i = 0, limit = 1; limit < num; i++, limit <<= 1)
          ;
      return i;
+ }
+
+
+ /************************* SEQ SCAN TRACKING ************************/
+
+ /*
+  * We track active hash_seq_search scans here.  The need for this mechanism
+  * comes from the fact that a scan will get confused if a bucket split occurs
+  * while it's in progress: it might visit entries twice, or even miss some
+  * entirely (if it's partway through the same bucket that splits).  Hence
+  * we want to inhibit bucket splits if there are any active scans on the
+  * table being inserted into.  This is a fairly rare case in current usage,
+  * so just postponing the split until the next insertion seems sufficient.
+  *
+  * Given present usages of the function, only a few scans are likely to be
+  * open concurrently; so a finite-size stack of open scans seems sufficient,
+  * and we don't worry that linear search is too slow.  Note that we do
+  * allow multiple scans of the same hashtable to be open concurrently.
+  *
+  * This mechanism can support concurrent scan and insertion in a shared
+  * hashtable if it's the same backend doing both.  It would fail otherwise,
+  * but locking reasons seem to preclude any such scenario anyway, so we don't
+  * worry.
+  *
+  * This arrangement is reasonably robust if a transient hashtable is deleted
+  * without notifying us.  The absolute worst case is we might inhibit splits
+  * in another table created later at exactly the same address.  We will give
+  * a warning at transaction end for reference leaks, so any bugs leading to
+  * lack of notification should be easy to catch.
+  */
+
+ #define MAX_SEQ_SCANS 100
+
+ static HTAB *seq_scan_tables[MAX_SEQ_SCANS];    /* tables being scanned */
+ static int    seq_scan_level[MAX_SEQ_SCANS];        /* subtransaction nest level */
+ static int    num_seq_scans = 0;
+
+
+ /* Register a table as having an active hash_seq_search scan */
+ static void
+ register_seq_scan(HTAB *hashp)
+ {
+     if (num_seq_scans >= MAX_SEQ_SCANS)
+         elog(ERROR, "too many active hash_seq_search scans");
+     seq_scan_tables[num_seq_scans] = hashp;
+     seq_scan_level[num_seq_scans] = GetCurrentTransactionNestLevel();
+     num_seq_scans++;
+ }
+
+ /* Deregister an active scan */
+ static void
+ deregister_seq_scan(HTAB *hashp)
+ {
+     int        i;
+
+     /* Search backward since it's most likely at the stack top */
+     for (i = num_seq_scans - 1; i >= 0; i--)
+     {
+         if (seq_scan_tables[i] == hashp)
+         {
+             seq_scan_tables[i] = seq_scan_tables[num_seq_scans - 1];
+             seq_scan_level[i] = seq_scan_level[num_seq_scans - 1];
+             num_seq_scans--;
+             return;
+         }
+     }
+     elog(ERROR, "no hash_seq_search scan for hash table \"%s\"",
+          hashp->tabname);
+ }
+
+ /* Check if a table has any active scan */
+ static bool
+ has_seq_scans(HTAB *hashp)
+ {
+     int        i;
+
+     for (i = 0; i < num_seq_scans; i++)
+     {
+         if (seq_scan_tables[i] == hashp)
+             return true;
+     }
+     return false;
+ }
+
+ /* Clean up any open scans at end of transaction */
+ void
+ AtEOXact_HashTables(bool isCommit)
+ {
+     /*
+      * During abort cleanup, open scans are expected; just silently clean 'em
+      * out.  An open scan at commit means someone forgot a hash_seq_term()
+      * call, so complain.
+      *
+      * Note: it's tempting to try to print the tabname here, but refrain for
+      * fear of touching deallocated memory.  This isn't a user-facing message
+      * anyway, so it needn't be pretty.
+      */
+     if (isCommit)
+     {
+         int        i;
+
+         for (i = 0; i < num_seq_scans; i++)
+         {
+             elog(WARNING, "leaked hash_seq_search scan for hash table %p",
+                  seq_scan_tables[i]);
+         }
+     }
+     num_seq_scans = 0;
+ }
+
+ /* Clean up any open scans at end of subtransaction */
+ void
+ AtEOSubXact_HashTables(bool isCommit, int nestDepth)
+ {
+     int        i;
+
+     /*
+      * Search backward to make cleanup easy.  Note we must check all entries,
+      * not only those at the end of the array, because deletion technique
+      * doesn't keep them in order.
+      */
+     for (i = num_seq_scans - 1; i >= 0; i--)
+     {
+         if (seq_scan_level[i] >= nestDepth)
+         {
+             if (isCommit)
+                 elog(WARNING, "leaked hash_seq_search scan for hash table %p",
+                      seq_scan_tables[i]);
+             seq_scan_tables[i] = seq_scan_tables[num_seq_scans - 1];
+             seq_scan_level[i] = seq_scan_level[num_seq_scans - 1];
+             num_seq_scans--;
+         }
+     }
  }
*** src/backend/utils/mmgr/portalmem.c.orig    Thu Apr 12 14:21:37 2007
--- src/backend/utils/mmgr/portalmem.c    Thu Apr 26 15:27:57 2007
***************
*** 22,28 ****
  #include "access/xact.h"
  #include "catalog/pg_type.h"
  #include "commands/portalcmds.h"
- #include "funcapi.h"
  #include "miscadmin.h"
  #include "utils/builtins.h"
  #include "utils/memutils.h"
--- 22,27 ----
***************
*** 621,627 ****
          /* Zap all non-holdable portals */
          PortalDrop(portal, true);

!         /* Restart the iteration */
          hash_seq_init(&status, PortalHashTable);
      }
  }
--- 620,628 ----
          /* Zap all non-holdable portals */
          PortalDrop(portal, true);

!         /* Restart the iteration in case that led to other drops */
!         /* XXX is this really necessary? */
!         hash_seq_term(&status);
          hash_seq_init(&status, PortalHashTable);
      }
  }
***************
*** 858,936 ****
  Datum
  pg_cursor(PG_FUNCTION_ARGS)
  {
!     FuncCallContext *funcctx;
!     HASH_SEQ_STATUS *hash_seq;
      PortalHashEnt *hentry;

!     /* stuff done only on the first call of the function */
!     if (SRF_IS_FIRSTCALL())
!     {
!         MemoryContext oldcontext;
!         TupleDesc    tupdesc;
!
!         /* create a function context for cross-call persistence */
!         funcctx = SRF_FIRSTCALL_INIT();
!
!         /*
!          * switch to memory context appropriate for multiple function calls
!          */
!         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
!
!         if (PortalHashTable)
!         {
!             hash_seq = (HASH_SEQ_STATUS *) palloc(sizeof(HASH_SEQ_STATUS));
!             hash_seq_init(hash_seq, PortalHashTable);
!             funcctx->user_fctx = (void *) hash_seq;
!         }
!         else
!             funcctx->user_fctx = NULL;
!
!         /*
!          * build tupdesc for result tuples. This must match the definition of
!          * the pg_cursors view in system_views.sql
!          */
!         tupdesc = CreateTemplateTupleDesc(6, false);
!         TupleDescInitEntry(tupdesc, (AttrNumber) 1, "name",
!                            TEXTOID, -1, 0);
!         TupleDescInitEntry(tupdesc, (AttrNumber) 2, "statement",
!                            TEXTOID, -1, 0);
!         TupleDescInitEntry(tupdesc, (AttrNumber) 3, "is_holdable",
!                            BOOLOID, -1, 0);
!         TupleDescInitEntry(tupdesc, (AttrNumber) 4, "is_binary",
!                            BOOLOID, -1, 0);
!         TupleDescInitEntry(tupdesc, (AttrNumber) 5, "is_scrollable",
!                            BOOLOID, -1, 0);
!         TupleDescInitEntry(tupdesc, (AttrNumber) 6, "creation_time",
!                            TIMESTAMPTZOID, -1, 0);
!
!         funcctx->tuple_desc = BlessTupleDesc(tupdesc);
!         MemoryContextSwitchTo(oldcontext);
!     }

!     /* stuff done on every call of the function */
!     funcctx = SRF_PERCALL_SETUP();
!     hash_seq = (HASH_SEQ_STATUS *) funcctx->user_fctx;
!
!     /* if the hash table is uninitialized, we're done */
!     if (hash_seq == NULL)
!         SRF_RETURN_DONE(funcctx);

!     /* loop until we find a visible portal or hit the end of the list */
!     while ((hentry = hash_seq_search(hash_seq)) != NULL)
!     {
!         if (hentry->portal->visible)
!             break;
!     }

!     if (hentry)
      {
!         Portal        portal;
!         Datum        result;
          HeapTuple    tuple;
          Datum        values[6];
          bool        nulls[6];

!         portal = hentry->portal;
          MemSet(nulls, 0, sizeof(nulls));

          values[0] = DirectFunctionCall1(textin, CStringGetDatum(portal->name));
--- 859,926 ----
  Datum
  pg_cursor(PG_FUNCTION_ARGS)
  {
!     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
!     TupleDesc    tupdesc;
!     Tuplestorestate *tupstore;
!     MemoryContext per_query_ctx;
!     MemoryContext oldcontext;
!     HASH_SEQ_STATUS hash_seq;
      PortalHashEnt *hentry;

!     /* check to see if caller supports us returning a tuplestore */
!     if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
!         ereport(ERROR,
!                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
!                  errmsg("set-valued function called in context that cannot accept a set")));
!     if (!(rsinfo->allowedModes & SFRM_Materialize))
!         ereport(ERROR,
!                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
!                  errmsg("materialize mode required, but it is not " \
!                         "allowed in this context")));
!
!     /* need to build tuplestore in query context */
!     per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
!     oldcontext = MemoryContextSwitchTo(per_query_ctx);

!     /*
!      * build tupdesc for result tuples. This must match the definition of
!      * the pg_cursors view in system_views.sql
!      */
!     tupdesc = CreateTemplateTupleDesc(6, false);
!     TupleDescInitEntry(tupdesc, (AttrNumber) 1, "name",
!                        TEXTOID, -1, 0);
!     TupleDescInitEntry(tupdesc, (AttrNumber) 2, "statement",
!                        TEXTOID, -1, 0);
!     TupleDescInitEntry(tupdesc, (AttrNumber) 3, "is_holdable",
!                        BOOLOID, -1, 0);
!     TupleDescInitEntry(tupdesc, (AttrNumber) 4, "is_binary",
!                        BOOLOID, -1, 0);
!     TupleDescInitEntry(tupdesc, (AttrNumber) 5, "is_scrollable",
!                        BOOLOID, -1, 0);
!     TupleDescInitEntry(tupdesc, (AttrNumber) 6, "creation_time",
!                        TIMESTAMPTZOID, -1, 0);

!     /*
!      * We put all the tuples into a tuplestore in one scan of the hashtable.
!      * This avoids any issue of the hashtable possibly changing between calls.
!      */
!     tupstore = tuplestore_begin_heap(true, false, work_mem);

!     hash_seq_init(&hash_seq, PortalHashTable);
!     while ((hentry = hash_seq_search(&hash_seq)) != NULL)
      {
!         Portal        portal = hentry->portal;
          HeapTuple    tuple;
          Datum        values[6];
          bool        nulls[6];

!         /* report only "visible" entries */
!         if (!portal->visible)
!             continue;
!
!         /* generate junk in short-term context */
!         MemoryContextSwitchTo(oldcontext);
!
          MemSet(nulls, 0, sizeof(nulls));

          values[0] = DirectFunctionCall1(textin, CStringGetDatum(portal->name));
***************
*** 944,953 ****
          values[4] = BoolGetDatum(portal->cursorOptions & CURSOR_OPT_SCROLL);
          values[5] = TimestampTzGetDatum(portal->creation_time);

!         tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
!         result = HeapTupleGetDatum(tuple);
!         SRF_RETURN_NEXT(funcctx, result);
      }

!     SRF_RETURN_DONE(funcctx);
  }
--- 934,954 ----
          values[4] = BoolGetDatum(portal->cursorOptions & CURSOR_OPT_SCROLL);
          values[5] = TimestampTzGetDatum(portal->creation_time);

!         tuple = heap_form_tuple(tupdesc, values, nulls);
!
!         /* switch to appropriate context while storing the tuple */
!         MemoryContextSwitchTo(per_query_ctx);
!         tuplestore_puttuple(tupstore, tuple);
      }

!     /* clean up and return the tuplestore */
!     tuplestore_donestoring(tupstore);
!
!     MemoryContextSwitchTo(oldcontext);
!
!     rsinfo->returnMode = SFRM_Materialize;
!     rsinfo->setResult = tupstore;
!     rsinfo->setDesc = tupdesc;
!
!     return (Datum) 0;
  }
*** src/include/nodes/execnodes.h.orig    Tue Mar 27 19:21:12 2007
--- src/include/nodes/execnodes.h    Wed Apr 25 19:21:40 2007
***************
*** 408,415 ****

  typedef HASH_SEQ_STATUS TupleHashIterator;

! #define ResetTupleHashIterator(htable, iter) \
      hash_seq_init(iter, (htable)->hashtab)
  #define ScanTupleHashTable(iter) \
      ((TupleHashEntry) hash_seq_search(iter))

--- 408,427 ----

  typedef HASH_SEQ_STATUS TupleHashIterator;

! /*
!  * Use InitTupleHashIterator/TermTupleHashIterator for a read/write scan.
!  * Use ResetTupleHashIterator if the table can be frozen (in this case no
!  * explicit scan termination is needed).
!  */
! #define InitTupleHashIterator(htable, iter) \
      hash_seq_init(iter, (htable)->hashtab)
+ #define TermTupleHashIterator(iter) \
+     hash_seq_term(iter)
+ #define ResetTupleHashIterator(htable, iter) \
+     do { \
+         hash_freeze((htable)->hashtab); \
+         hash_seq_init(iter, (htable)->hashtab); \
+     } while (0)
  #define ScanTupleHashTable(iter) \
      ((TupleHashEntry) hash_seq_search(iter))

*** src/include/utils/hsearch.h.orig    Fri Jan  5 18:02:59 2007
--- src/include/utils/hsearch.h    Wed Apr 25 20:29:18 2007
***************
*** 130,138 ****
--- 130,142 ----
  extern long hash_get_num_entries(HTAB *hashp);
  extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp);
  extern void *hash_seq_search(HASH_SEQ_STATUS *status);
+ extern void hash_seq_term(HASH_SEQ_STATUS *status);
+ extern void hash_freeze(HTAB *hashp);
  extern Size hash_estimate_size(long num_entries, Size entrysize);
  extern long hash_select_dirsize(long num_entries);
  extern Size hash_get_shared_size(HASHCTL *info, int flags);
+ extern void AtEOXact_HashTables(bool isCommit);
+ extern void AtEOSubXact_HashTables(bool isCommit, int nestDepth);

  /*
   * prototypes for functions in hashfn.c

pgsql-patches by date:

Previous
From: Bruce Momjian
Date:
Subject: Re: New version of GENERATED/IDENTITY, was Re: parser dilemma
Next
From: Bruce Momjian
Date:
Subject: Re: Auto Partitioning Patch - WIP version 1