Re: transaction block: server closed the connection unexpectedly - Mailing list pgsql-bugs

From Tom Lane
Subject Re: transaction block: server closed the connection unexpectedly
Date
Msg-id 17356.1094493624@sss.pgh.pa.us
Whole thread Raw
In response to Re: transaction block: server closed the connection unexpectedly  (Alvaro Herrera <alvherre@dcc.uchile.cl>)
List pgsql-bugs
Alvaro Herrera <alvherre@dcc.uchile.cl> writes:
> The problem is that there's a pin on an index page when the smgr tries
> to drop its buffers.  This patch corrects this problem, by having
> resowner cleanup before smgr.

Good diagnosis, not very good fix.  You didn't do anything about
adjusting the comments to match the code, and you missed the identical
bug occurring during subtransaction abort.

One of the most powerful programming techniques I know is, once you've
identified a bug, to ask yourself "where else might I (or others) have
made this same error?".  That would have led you to the subtransaction
case, even if we hadn't already agreed that the order of operations
during xact/subxact commit/abort should be kept the same as much as
possible.

Patch as-applied is attached.

            regards, tom lane


*** src/backend/access/transam/xact.c.orig    Mon Aug 30 15:00:03 2004
--- src/backend/access/transam/xact.c    Mon Sep  6 13:52:42 2004
***************
*** 1333,1341 ****
       * backend-wide state.
       */

-     smgrDoPendingDeletes(true);
-     /* smgrcommit already done */
-
      CallXactCallbacks(XACT_EVENT_COMMIT, InvalidTransactionId);

      ResourceOwnerRelease(TopTransactionResourceOwner,
--- 1333,1338 ----
***************
*** 1352,1357 ****
--- 1349,1362 ----
       */
      AtEOXact_Inval(true);

+     /*
+      * Likewise, dropping of files deleted during the transaction is best done
+      * after releasing relcache and buffer pins.  (This is not strictly
+      * necessary during commit, since such pins should have been released
+      * already, but this ordering is definitely critical during abort.)
+      */
+     smgrDoPendingDeletes(true);
+
      ResourceOwnerRelease(TopTransactionResourceOwner,
                           RESOURCE_RELEASE_LOCKS,
                           true, true);
***************
*** 1363,1368 ****
--- 1368,1374 ----
      AtEOXact_SPI(true);
      AtEOXact_on_commit_actions(true, s->transactionIdData);
      AtEOXact_Namespace(true);
+     /* smgrcommit already done */
      AtEOXact_Files();
      pgstat_count_xact_commit();

***************
*** 1481,1495 ****
       * ordering.
       */

-     smgrDoPendingDeletes(false);
-     smgrabort();
-
      CallXactCallbacks(XACT_EVENT_ABORT, InvalidTransactionId);

      ResourceOwnerRelease(TopTransactionResourceOwner,
                           RESOURCE_RELEASE_BEFORE_LOCKS,
                           false, true);
      AtEOXact_Inval(false);
      ResourceOwnerRelease(TopTransactionResourceOwner,
                           RESOURCE_RELEASE_LOCKS,
                           false, true);
--- 1487,1499 ----
       * ordering.
       */

      CallXactCallbacks(XACT_EVENT_ABORT, InvalidTransactionId);

      ResourceOwnerRelease(TopTransactionResourceOwner,
                           RESOURCE_RELEASE_BEFORE_LOCKS,
                           false, true);
      AtEOXact_Inval(false);
+     smgrDoPendingDeletes(false);
      ResourceOwnerRelease(TopTransactionResourceOwner,
                           RESOURCE_RELEASE_LOCKS,
                           false, true);
***************
*** 1501,1506 ****
--- 1505,1511 ----
      AtEOXact_SPI(false);
      AtEOXact_on_commit_actions(false, s->transactionIdData);
      AtEOXact_Namespace(false);
+     smgrabort();
      AtEOXact_Files();
      pgstat_count_xact_rollback();

***************
*** 3014,3020 ****
      AtSubCommit_Notify();
      AtEOSubXact_UpdatePasswordFile(true, s->transactionIdData,
                                     s->parent->transactionIdData);
-     AtSubCommit_smgr();

      CallXactCallbacks(XACT_EVENT_COMMIT_SUB, s->parent->transactionIdData);

--- 3019,3024 ----
***************
*** 3024,3029 ****
--- 3028,3034 ----
      AtEOSubXact_RelationCache(true, s->transactionIdData,
                                s->parent->transactionIdData);
      AtEOSubXact_Inval(true);
+     AtSubCommit_smgr();
      ResourceOwnerRelease(s->curTransactionOwner,
                           RESOURCE_RELEASE_LOCKS,
                           true, false);
***************
*** 3109,3116 ****
      RecordSubTransactionAbort();

      /* Post-abort cleanup */
-     AtSubAbort_smgr();
-
      CallXactCallbacks(XACT_EVENT_ABORT_SUB, s->parent->transactionIdData);

      ResourceOwnerRelease(s->curTransactionOwner,
--- 3114,3119 ----
***************
*** 3119,3124 ****
--- 3122,3128 ----
      AtEOSubXact_RelationCache(false, s->transactionIdData,
                                s->parent->transactionIdData);
      AtEOSubXact_Inval(false);
+     AtSubAbort_smgr();
      ResourceOwnerRelease(s->curTransactionOwner,
                           RESOURCE_RELEASE_LOCKS,
                           false, false);
*** src/backend/storage/smgr/smgr.c.orig    Sun Aug 29 22:57:38 2004
--- src/backend/storage/smgr/smgr.c    Mon Sep  6 13:39:42 2004
***************
*** 784,790 ****
  }

  /*
!  *    smgrabort() -- Abort changes made during the current transaction.
   */
  void
  smgrabort(void)
--- 784,790 ----
  }

  /*
!  *    smgrabort() -- Clean up after transaction abort.
   */
  void
  smgrabort(void)
*** src/test/regress/expected/transactions.out.orig    Thu Aug 12 14:57:49 2004
--- src/test/regress/expected/transactions.out    Mon Sep  6 13:46:34 2004
***************
*** 374,379 ****
--- 374,394 ----
      FETCH 10 FROM c;
  ERROR:  portal "c" cannot be run
  COMMIT;
+ -- test case for problems with dropping an open relation during abort
+ BEGIN;
+     savepoint x;
+         CREATE TABLE koju (a INT UNIQUE);
+ NOTICE:  CREATE TABLE / UNIQUE will create implicit index "koju_a_key" for table "koju"
+         INSERT INTO koju VALUES (1);
+         INSERT INTO koju VALUES (1);
+ ERROR:  duplicate key violates unique constraint "koju_a_key"
+     rollback to x;
+     CREATE TABLE koju (a INT UNIQUE);
+ NOTICE:  CREATE TABLE / UNIQUE will create implicit index "koju_a_key" for table "koju"
+     INSERT INTO koju VALUES (1);
+     INSERT INTO koju VALUES (1);
+ ERROR:  duplicate key violates unique constraint "koju_a_key"
+ ROLLBACK;
  DROP TABLE foo;
  DROP TABLE baz;
  DROP TABLE barbaz;
*** src/test/regress/sql/transactions.sql.orig    Thu Aug 12 14:25:44 2004
--- src/test/regress/sql/transactions.sql    Mon Sep  6 13:44:08 2004
***************
*** 231,236 ****
--- 231,249 ----
      FETCH 10 FROM c;
  COMMIT;

+ -- test case for problems with dropping an open relation during abort
+ BEGIN;
+     savepoint x;
+         CREATE TABLE koju (a INT UNIQUE);
+         INSERT INTO koju VALUES (1);
+         INSERT INTO koju VALUES (1);
+     rollback to x;
+
+     CREATE TABLE koju (a INT UNIQUE);
+     INSERT INTO koju VALUES (1);
+     INSERT INTO koju VALUES (1);
+ ROLLBACK;
+
  DROP TABLE foo;
  DROP TABLE baz;
  DROP TABLE barbaz;

pgsql-bugs by date:

Previous
From: Tom Lane
Date:
Subject: Re: BUG #1239: Stale postmaster.pid prevents server start
Next
From: Tom Lane
Date:
Subject: Re: error in simple sql function breaks connection