Re: PATCH to allow concurrent VACUUMs to not lock each - Mailing list pgsql-patches

From Tom Lane
Subject Re: PATCH to allow concurrent VACUUMs to not lock each
Date
Msg-id 5805.1124307653@sss.pgh.pa.us
Whole thread Raw
In response to Re: PATCH to allow concurrent VACUUMs to not lock each  (Tom Lane <tgl@sss.pgh.pa.us>)
Responses Re: PATCH to allow concurrent VACUUMs to not lock each  (Hannu Krosing <hannu@skype.net>)
List pgsql-patches
Just for the archives, attached is as far as I'd gotten with cleaning up
Hannu's patch before I realized that it wasn't doing what it needed to
do.  This fixes an end-of-transaction race condition (can't unset
inVacuum before xact end, unless you want OldestXmin going backwards
from the point of view of other people) and improves the documentation
of what's going on.  But unless someone can convince me that it's safe
to mess with GetSnapshotData, it's unlikely this'll ever get applied.

            regards, tom lane

Index: src/backend/access/transam/twophase.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/access/transam/twophase.c,v
retrieving revision 1.9
diff -c -r1.9 twophase.c
*** src/backend/access/transam/twophase.c    31 Jul 2005 17:19:17 -0000    1.9
--- src/backend/access/transam/twophase.c    17 Aug 2005 19:35:48 -0000
***************
*** 273,278 ****
--- 273,279 ----
      gxact->proc.pid = 0;
      gxact->proc.databaseId = databaseid;
      gxact->proc.roleId = owner;
+     gxact->proc.inVacuum = false;
      gxact->proc.lwWaiting = false;
      gxact->proc.lwExclusive = false;
      gxact->proc.lwWaitLink = NULL;
Index: src/backend/access/transam/xact.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/access/transam/xact.c,v
retrieving revision 1.212
diff -c -r1.212 xact.c
*** src/backend/access/transam/xact.c    8 Aug 2005 19:17:22 -0000    1.212
--- src/backend/access/transam/xact.c    17 Aug 2005 19:35:49 -0000
***************
*** 1516,1521 ****
--- 1516,1522 ----
          LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
          MyProc->xid = InvalidTransactionId;
          MyProc->xmin = InvalidTransactionId;
+         MyProc->inVacuum = false;    /* must be cleared with xid/xmin */

          /* Clear the subtransaction-XID cache too while holding the lock */
          MyProc->subxids.nxids = 0;
***************
*** 1752,1757 ****
--- 1753,1759 ----
      LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
      MyProc->xid = InvalidTransactionId;
      MyProc->xmin = InvalidTransactionId;
+     MyProc->inVacuum = false;    /* must be cleared with xid/xmin */

      /* Clear the subtransaction-XID cache too while holding the lock */
      MyProc->subxids.nxids = 0;
***************
*** 1915,1920 ****
--- 1917,1923 ----
          LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
          MyProc->xid = InvalidTransactionId;
          MyProc->xmin = InvalidTransactionId;
+         MyProc->inVacuum = false;    /* must be cleared with xid/xmin */

          /* Clear the subtransaction-XID cache too while holding the lock */
          MyProc->subxids.nxids = 0;
Index: src/backend/access/transam/xlog.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v
retrieving revision 1.215
diff -c -r1.215 xlog.c
*** src/backend/access/transam/xlog.c    11 Aug 2005 21:11:43 -0000    1.215
--- src/backend/access/transam/xlog.c    17 Aug 2005 19:35:51 -0000
***************
*** 5226,5232 ****
       * mustn't do this because StartupSUBTRANS hasn't been called yet.
       */
      if (!InRecovery)
!         TruncateSUBTRANS(GetOldestXmin(true));

      if (!shutdown)
          ereport(DEBUG2,
--- 5226,5232 ----
       * mustn't do this because StartupSUBTRANS hasn't been called yet.
       */
      if (!InRecovery)
!         TruncateSUBTRANS(GetOldestXmin(true, false));

      if (!shutdown)
          ereport(DEBUG2,
Index: src/backend/catalog/index.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/catalog/index.c,v
retrieving revision 1.259
diff -c -r1.259 index.c
*** src/backend/catalog/index.c    12 Aug 2005 01:35:56 -0000    1.259
--- src/backend/catalog/index.c    17 Aug 2005 19:35:52 -0000
***************
*** 1433,1439 ****
      else
      {
          snapshot = SnapshotAny;
!         OldestXmin = GetOldestXmin(heapRelation->rd_rel->relisshared);
      }

      scan = heap_beginscan(heapRelation, /* relation */
--- 1433,1440 ----
      else
      {
          snapshot = SnapshotAny;
!         /* okay to ignore lazy VACUUMs here */
!         OldestXmin = GetOldestXmin(heapRelation->rd_rel->relisshared, true);
      }

      scan = heap_beginscan(heapRelation, /* relation */
Index: src/backend/commands/vacuum.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/commands/vacuum.c,v
retrieving revision 1.312
diff -c -r1.312 vacuum.c
*** src/backend/commands/vacuum.c    29 Jul 2005 19:30:03 -0000    1.312
--- src/backend/commands/vacuum.c    17 Aug 2005 19:35:52 -0000
***************
*** 36,41 ****
--- 36,42 ----
  #include "executor/executor.h"
  #include "miscadmin.h"
  #include "storage/freespace.h"
+ #include "storage/proc.h"
  #include "storage/procarray.h"
  #include "storage/smgr.h"
  #include "tcop/pquery.h"
***************
*** 342,349 ****
           * determining the cutoff with which we vacuum shared relations,
           * it is not possible for that database to have a cutoff newer
           * than OLDXMIN recorded in pg_database.
           */
!         vacuum_set_xid_limits(vacstmt, false,
                                &initialOldestXmin,
                                &initialFreezeLimit);
      }
--- 343,353 ----
           * determining the cutoff with which we vacuum shared relations,
           * it is not possible for that database to have a cutoff newer
           * than OLDXMIN recorded in pg_database.
+          *
+          * We can't ignore concurrent lazy VACUUMs, because these values will
+          * be used to truncate clog below.
           */
!         vacuum_set_xid_limits(vacstmt, false, false,
                                &initialOldestXmin,
                                &initialFreezeLimit);
      }
***************
*** 583,595 ****
   * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
   */
  void
! vacuum_set_xid_limits(VacuumStmt *vacstmt, bool sharedRel,
                        TransactionId *oldestXmin,
                        TransactionId *freezeLimit)
  {
      TransactionId limit;

!     *oldestXmin = GetOldestXmin(sharedRel);

      Assert(TransactionIdIsNormal(*oldestXmin));

--- 587,600 ----
   * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
   */
  void
! vacuum_set_xid_limits(VacuumStmt *vacstmt,
!                       bool sharedRel, bool ignoreVacuum,
                        TransactionId *oldestXmin,
                        TransactionId *freezeLimit)
  {
      TransactionId limit;

!     *oldestXmin = GetOldestXmin(sharedRel, ignoreVacuum);

      Assert(TransactionIdIsNormal(*oldestXmin));

***************
*** 644,649 ****
--- 649,659 ----
   *        pg_class would've been obsoleted.  Of course, this only works for
   *        fixed-size never-null columns, but these are.
   *
+  *        Another reason for doing it this way is that when we are in a lazy
+  *        VACUUM and have inVacuum set, we mustn't do any updates --- somebody
+  *        vacuuming pg_class might think they could delete a tuple marked with
+  *        xmin = our xid.
+  *
   *        This routine is shared by full VACUUM, lazy VACUUM, and stand-alone
   *        ANALYZE.
   */
***************
*** 924,931 ****

      /* Begin a transaction for vacuuming this relation */
      StartTransactionCommand();
!     /* functions in indexes may want a snapshot set */
!     ActiveSnapshot = CopySnapshot(GetTransactionSnapshot());

      /*
       * Tell the cache replacement strategy that vacuum is causing all
--- 934,968 ----

      /* Begin a transaction for vacuuming this relation */
      StartTransactionCommand();
!
!     if (vacstmt->full)
!     {
!         /* functions in indexes may want a snapshot set */
!         ActiveSnapshot = CopySnapshot(GetTransactionSnapshot());
!     }
!     else
!     {
!         /*
!          * During a lazy VACUUM we do not run any user-supplied functions,
!          * and so it should be safe to not create a transaction snapshot.
!          *
!          * We can furthermore set the inVacuum flag, which lets other
!          * concurrent VACUUMs know that they can ignore this one while
!          * determining their OldestXmin.  (The reason we don't set inVacuum
!          * during a full VACUUM is exactly that we may have to run user-
!          * defined functions for functional indexes, and we want to make
!          * sure that if they use the snapshot set above, any tuples it
!          * requires can't get removed from other tables.  An index function
!          * that depends on the contents of other tables is arguably broken,
!          * but we won't break it here by violating transaction semantics.)
!          *
!          * Note: the inVacuum flag remains set until CommitTransaction or
!          * AbortTransaction.  We don't want to clear it until we reset
!          * MyProc->xid/xmin, else OldestXmin might appear to go backwards,
!          * which is probably Not Good.
!          */
!         MyProc->inVacuum = true;
!     }

      /*
       * Tell the cache replacement strategy that vacuum is causing all
***************
*** 1105,1111 ****
                  i;
      VRelStats  *vacrelstats;

!     vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared,
                            &OldestXmin, &FreezeLimit);

      /*
--- 1142,1148 ----
                  i;
      VRelStats  *vacrelstats;

!     vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared, true,
                            &OldestXmin, &FreezeLimit);

      /*
Index: src/backend/commands/vacuumlazy.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v
retrieving revision 1.56
diff -c -r1.56 vacuumlazy.c
*** src/backend/commands/vacuumlazy.c    29 Jul 2005 19:30:03 -0000    1.56
--- src/backend/commands/vacuumlazy.c    17 Aug 2005 19:35:52 -0000
***************
*** 142,148 ****
      else
          elevel = DEBUG2;

!     vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared,
                            &OldestXmin, &FreezeLimit);

      vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
--- 142,148 ----
      else
          elevel = DEBUG2;

!     vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared, true,
                            &OldestXmin, &FreezeLimit);

      vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
Index: src/backend/storage/ipc/procarray.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/storage/ipc/procarray.c,v
retrieving revision 1.4
diff -c -r1.4 procarray.c
*** src/backend/storage/ipc/procarray.c    31 Jul 2005 17:19:18 -0000    1.4
--- src/backend/storage/ipc/procarray.c    17 Aug 2005 19:35:52 -0000
***************
*** 386,405 ****
   * If allDbs is TRUE then all backends are considered; if allDbs is FALSE
   * then only backends running in my own database are considered.
   *
   * This is used by VACUUM to decide which deleted tuples must be preserved
   * in a table.    allDbs = TRUE is needed for shared relations, but allDbs =
   * FALSE is sufficient for non-shared relations, since only backends in my
!  * own database could ever see the tuples in them.
   *
   * This is also used to determine where to truncate pg_subtrans.  allDbs
!  * must be TRUE for that case.
   *
   * Note: we include the currently running xids in the set of considered xids.
   * This ensures that if a just-started xact has not yet set its snapshot,
   * when it does set the snapshot it cannot set xmin less than what we compute.
   */
  TransactionId
! GetOldestXmin(bool allDbs)
  {
      ProcArrayStruct *arrayP = procArray;
      TransactionId result;
--- 386,409 ----
   * If allDbs is TRUE then all backends are considered; if allDbs is FALSE
   * then only backends running in my own database are considered.
   *
+  * If ignoreVacuum is TRUE then backends with inVacuum set are ignored.
+  *
   * This is used by VACUUM to decide which deleted tuples must be preserved
   * in a table.    allDbs = TRUE is needed for shared relations, but allDbs =
   * FALSE is sufficient for non-shared relations, since only backends in my
!  * own database could ever see the tuples in them.  Also, we can ignore
!  * concurrently running lazy VACUUMs because (a) they must be working on other
!  * tables, and (b) they don't need to do snapshot-based lookups.
   *
   * This is also used to determine where to truncate pg_subtrans.  allDbs
!  * must be TRUE for that case, and ignoreVacuum FALSE.
   *
   * Note: we include the currently running xids in the set of considered xids.
   * This ensures that if a just-started xact has not yet set its snapshot,
   * when it does set the snapshot it cannot set xmin less than what we compute.
   */
  TransactionId
! GetOldestXmin(bool allDbs, bool ignoreVacuum)
  {
      ProcArrayStruct *arrayP = procArray;
      TransactionId result;
***************
*** 423,428 ****
--- 427,435 ----
      {
          PGPROC       *proc = arrayP->procs[index];

+         if (ignoreVacuum && proc->inVacuum)
+             continue;
+
          if (allDbs || proc->databaseId == MyDatabaseId)
          {
              /* Fetch xid just once - see GetNewTransactionId */
***************
*** 470,476 ****
   *            older than this are known not running any more.
   *        RecentGlobalXmin: the global xmin (oldest TransactionXmin across all
   *            running transactions).  This is the same computation done by
!  *            GetOldestXmin(TRUE).
   *----------
   */
  Snapshot
--- 477,483 ----
   *            older than this are known not running any more.
   *        RecentGlobalXmin: the global xmin (oldest TransactionXmin across all
   *            running transactions).  This is the same computation done by
!  *            GetOldestXmin(TRUE, FALSE).
   *----------
   */
  Snapshot
Index: src/backend/storage/lmgr/proc.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v
retrieving revision 1.162
diff -c -r1.162 proc.c
*** src/backend/storage/lmgr/proc.c    8 Aug 2005 03:11:55 -0000    1.162
--- src/backend/storage/lmgr/proc.c    17 Aug 2005 19:35:52 -0000
***************
*** 256,261 ****
--- 256,262 ----
      MyProc->databaseId = MyDatabaseId;
      /* Will be set properly after the session role id is determined */
      MyProc->roleId = InvalidOid;
+     MyProc->inVacuum = false;
      MyProc->lwWaiting = false;
      MyProc->lwExclusive = false;
      MyProc->lwWaitLink = NULL;
***************
*** 335,340 ****
--- 336,342 ----
      MyProc->xmin = InvalidTransactionId;
      MyProc->databaseId = MyDatabaseId;
      MyProc->roleId = InvalidOid;
+     MyProc->inVacuum = false;
      MyProc->lwWaiting = false;
      MyProc->lwExclusive = false;
      MyProc->lwWaitLink = NULL;
Index: src/include/commands/vacuum.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/commands/vacuum.h,v
retrieving revision 1.60
diff -c -r1.60 vacuum.h
*** src/include/commands/vacuum.h    14 Jul 2005 05:13:43 -0000    1.60
--- src/include/commands/vacuum.h    17 Aug 2005 19:35:52 -0000
***************
*** 133,139 ****
                      BlockNumber num_pages,
                      double num_tuples,
                      bool hasindex);
! extern void vacuum_set_xid_limits(VacuumStmt *vacstmt, bool sharedRel,
                        TransactionId *oldestXmin,
                        TransactionId *freezeLimit);
  extern bool vac_is_partial_index(Relation indrel);
--- 133,140 ----
                      BlockNumber num_pages,
                      double num_tuples,
                      bool hasindex);
! extern void vacuum_set_xid_limits(VacuumStmt *vacstmt,
!                       bool sharedRel, bool ignoreVacuum,
                        TransactionId *oldestXmin,
                        TransactionId *freezeLimit);
  extern bool vac_is_partial_index(Relation indrel);
Index: src/include/storage/proc.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/storage/proc.h,v
retrieving revision 1.80
diff -c -r1.80 proc.h
*** src/include/storage/proc.h    31 Jul 2005 17:19:22 -0000    1.80
--- src/include/storage/proc.h    17 Aug 2005 19:35:52 -0000
***************
*** 73,78 ****
--- 73,80 ----
      Oid            databaseId;        /* OID of database this backend is using */
      Oid            roleId;            /* OID of role using this backend */

+     bool        inVacuum;        /* true if current xact is a VACUUM */
+
      /* Info about LWLock the process is currently waiting for, if any. */
      bool        lwWaiting;        /* true if waiting for an LW lock */
      bool        lwExclusive;    /* true if waiting for exclusive access */
Index: src/include/storage/procarray.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/storage/procarray.h,v
retrieving revision 1.3
diff -c -r1.3 procarray.h
*** src/include/storage/procarray.h    31 Jul 2005 17:19:22 -0000    1.3
--- src/include/storage/procarray.h    17 Aug 2005 19:35:52 -0000
***************
*** 24,30 ****

  extern bool TransactionIdIsInProgress(TransactionId xid);
  extern bool TransactionIdIsActive(TransactionId xid);
! extern TransactionId GetOldestXmin(bool allDbs);

  extern PGPROC *BackendPidGetProc(int pid);
  extern bool IsBackendPid(int pid);
--- 24,30 ----

  extern bool TransactionIdIsInProgress(TransactionId xid);
  extern bool TransactionIdIsActive(TransactionId xid);
! extern TransactionId GetOldestXmin(bool allDbs, bool ignoreVacuum);

  extern PGPROC *BackendPidGetProc(int pid);
  extern bool IsBackendPid(int pid);

pgsql-patches by date:

Previous
From: Tom Lane
Date:
Subject: Re: Trivial patch, silence CC warnings
Next
From: Bruce Momjian
Date:
Subject: Cleanup of to_char/from_char