WAL logging freezing - Mailing list pgsql-patches

From Heikki Linnakangas
Subject WAL logging freezing
Date
Msg-id 453DBEAD.3080201@enterprisedb.com
Whole thread Raw
Responses Re: WAL logging freezing
Re: WAL logging freezing
List pgsql-patches
Here's a patch for WAL logging tuple freezes in vacuum, per discussion
on pgsql-bugs.

This patch is against CVS head. Should this be backported to stable
branches? I think it should.

After writing the patch, I realized that it needs some thought if
backported, because WAL records of removing tuples and freezing tuples
share the same heapam opcode XLOG_HEAP_CLEAN, and are only
differentiated by setting a flag. If we applied the patch as it is, and
for some reason someone replayed a WAL log generated by a newer version,
with the patch, with an older version, without the patch, the older
version would interpret the freeze WAL records as dead tuple removals,
and remove live records. I would've liked to give freezing a new opcode,
  but we've ran out of them (see htup.h).

--
    Heikki Linnakangas
    EnterpriseDB   http://www.enterprisedb.com

Index: src/backend/access/heap/heapam.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/access/heap/heapam.c,v
retrieving revision 1.220
diff -c -r1.220 heapam.c
*** src/backend/access/heap/heapam.c    4 Oct 2006 00:29:48 -0000    1.220
--- src/backend/access/heap/heapam.c    23 Oct 2006 18:17:17 -0000
***************
*** 2877,2889 ****
  /*
   * Perform XLogInsert for a heap-clean operation.  Caller must already
   * have modified the buffer and marked it dirty.
   */
  XLogRecPtr
! log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *unused, int uncnt)
  {
      xl_heap_clean xlrec;
      XLogRecPtr    recptr;
      XLogRecData rdata[2];

      /* Caller should not call me on a temp relation */
      Assert(!reln->rd_istemp);
--- 2877,2895 ----
  /*
   * Perform XLogInsert for a heap-clean operation.  Caller must already
   * have modified the buffer and marked it dirty.
+  *
+  * If freeze is true, the tuples specified in offsets array were frozen,
+  * otherwise they were dead and removed.
   */
  XLogRecPtr
! log_heap_clean(Relation reln, Buffer buffer,
!                OffsetNumber *offsets, int noffsets, bool freeze)
  {
      xl_heap_clean xlrec;
      XLogRecPtr    recptr;
      XLogRecData rdata[2];
+     uint8        info = freeze ?
+         (XLOG_HEAP_CLEAN | XLOG_HEAP_FREEZE) : XLOG_HEAP_CLEAN;

      /* Caller should not call me on a temp relation */
      Assert(!reln->rd_istemp);
***************
*** 2901,2910 ****
       * that it is.    When XLogInsert stores the whole buffer, the offsets array
       * need not be stored too.
       */
!     if (uncnt > 0)
      {
!         rdata[1].data = (char *) unused;
!         rdata[1].len = uncnt * sizeof(OffsetNumber);
      }
      else
      {
--- 2907,2916 ----
       * that it is.    When XLogInsert stores the whole buffer, the offsets array
       * need not be stored too.
       */
!     if (noffsets > 0)
      {
!         rdata[1].data = (char *) offsets;
!         rdata[1].len = noffsets * sizeof(OffsetNumber);
      }
      else
      {
***************
*** 2915,2921 ****
      rdata[1].buffer_std = true;
      rdata[1].next = NULL;

!     recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CLEAN, rdata);

      return recptr;
  }
--- 2921,2927 ----
      rdata[1].buffer_std = true;
      rdata[1].next = NULL;

!     recptr = XLogInsert(RM_HEAP_ID, info, rdata);

      return recptr;
  }
***************
*** 3030,3039 ****
--- 3036,3048 ----
      Relation    reln;
      Buffer        buffer;
      Page        page;
+     bool        freeze;

      if (record->xl_info & XLR_BKP_BLOCK_1)
          return;

+     freeze = record->xl_info & XLOG_HEAP_FREEZE;
+
      reln = XLogOpenRelation(xlrec->node);
      buffer = XLogReadBuffer(reln, xlrec->block, false);
      if (!BufferIsValid(buffer))
***************
*** 3048,3069 ****

      if (record->xl_len > SizeOfHeapClean)
      {
!         OffsetNumber *unused;
!         OffsetNumber *unend;
          ItemId        lp;

!         unused = (OffsetNumber *) ((char *) xlrec + SizeOfHeapClean);
!         unend = (OffsetNumber *) ((char *) xlrec + record->xl_len);

!         while (unused < unend)
          {
!             lp = PageGetItemId(page, *unused + 1);
!             lp->lp_flags &= ~LP_USED;
!             unused++;
          }
      }

!     PageRepairFragmentation(page, NULL);

      PageSetLSN(page, lsn);
      PageSetTLI(page, ThisTimeLineID);
--- 3057,3089 ----

      if (record->xl_len > SizeOfHeapClean)
      {
!         OffsetNumber *offsets;
!         OffsetNumber *offend;
          ItemId        lp;

!         offsets = (OffsetNumber *) ((char *) xlrec + SizeOfHeapClean);
!         offend = (OffsetNumber *) ((char *) xlrec + record->xl_len);

!         while (offsets < offend)
          {
!             lp = PageGetItemId(page, *offsets + 1);
!
!             if(freeze)
!             {
!                 HeapTupleHeader htup = (HeapTupleHeader) PageGetItem(page, lp);
!
!                 Assert(!(htup->t_infomask & HEAP_XMIN_INVALID));
!
!                 htup->t_infomask |= HEAP_XMIN_COMMITTED;
!                 HeapTupleHeaderSetXmin(htup, FrozenTransactionId);
!             } else
!                 lp->lp_flags &= ~LP_USED;
!             offsets++;
          }
      }

!     if(!freeze)
!         PageRepairFragmentation(page, NULL);

      PageSetLSN(page, lsn);
      PageSetTLI(page, ThisTimeLineID);
Index: src/backend/commands/vacuum.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/commands/vacuum.c,v
retrieving revision 1.341
diff -c -r1.341 vacuum.c
*** src/backend/commands/vacuum.c    4 Oct 2006 00:29:51 -0000    1.341
--- src/backend/commands/vacuum.c    23 Oct 2006 18:36:07 -0000
***************
*** 1357,1364 ****
          Buffer        buf;
          OffsetNumber offnum,
                      maxoff;
!         bool        pgchanged,
!                     notup;

          vacuum_delay_point();

--- 1357,1365 ----
          Buffer        buf;
          OffsetNumber offnum,
                      maxoff;
!         bool        notup;
!         OffsetNumber frozen[MaxOffsetNumber];
!         int            nfrozen;

          vacuum_delay_point();

***************
*** 1414,1420 ****
              continue;
          }

!         pgchanged = false;
          notup = true;
          maxoff = PageGetMaxOffsetNumber(page);
          for (offnum = FirstOffsetNumber;
--- 1415,1421 ----
              continue;
          }

!         nfrozen = 0;
          notup = true;
          maxoff = PageGetMaxOffsetNumber(page);
          for (offnum = FirstOffsetNumber;
***************
*** 1458,1464 ****
                          HeapTupleHeaderSetXmin(tuple.t_data, FrozenTransactionId);
                          /* infomask should be okay already */
                          Assert(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED);
!                         pgchanged = true;
                      }

                      /*
--- 1459,1465 ----
                          HeapTupleHeaderSetXmin(tuple.t_data, FrozenTransactionId);
                          /* infomask should be okay already */
                          Assert(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED);
!                         frozen[nfrozen++] = offnum;
                      }

                      /*
***************
*** 1627,1634 ****
          else
              empty_end_pages = 0;

!         if (pgchanged)
              MarkBufferDirty(buf);
          UnlockReleaseBuffer(buf);
      }

--- 1628,1650 ----
          else
              empty_end_pages = 0;

!         /*
!          * If we froze any tuples, write a WAL record. We used to treat
!          * freezing the same as hint bit updates, because it was thought that
!          * losing a tuple freeze doesn't matter since the tuple is marked as
!          * committed anyway. But that's not safe: if we later truncate the
!          * clog and crash, we might end up with xids on the disk that belonged
!          * to a truncated clog segment.
!          */
!         if (nfrozen > 0)
!         {
!             XLogRecPtr recptr;
!
              MarkBufferDirty(buf);
+             recptr = log_heap_clean(onerel, buf, frozen, nfrozen, true);
+             PageSetLSN(page, recptr);
+             PageSetTLI(page, ThisTimeLineID);
+         }
          UnlockReleaseBuffer(buf);
      }

***************
*** 2603,2609 ****
              {
                  XLogRecPtr    recptr;

!                 recptr = log_heap_clean(onerel, buf, unused, uncnt);
                  PageSetLSN(page, recptr);
                  PageSetTLI(page, ThisTimeLineID);
              }
--- 2619,2625 ----
              {
                  XLogRecPtr    recptr;

!                 recptr = log_heap_clean(onerel, buf, unused, uncnt, false);
                  PageSetLSN(page, recptr);
                  PageSetTLI(page, ThisTimeLineID);
              }
***************
*** 3074,3080 ****
      {
          XLogRecPtr    recptr;

!         recptr = log_heap_clean(onerel, buffer, unused, uncnt);
          PageSetLSN(page, recptr);
          PageSetTLI(page, ThisTimeLineID);
      }
--- 3090,3096 ----
      {
          XLogRecPtr    recptr;

!         recptr = log_heap_clean(onerel, buffer, unused, uncnt, false);
          PageSetLSN(page, recptr);
          PageSetTLI(page, ThisTimeLineID);
      }
Index: src/backend/commands/vacuumlazy.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/commands/vacuumlazy.c,v
retrieving revision 1.80
diff -c -r1.80 vacuumlazy.c
*** src/backend/commands/vacuumlazy.c    4 Oct 2006 00:29:52 -0000    1.80
--- src/backend/commands/vacuumlazy.c    23 Oct 2006 18:35:52 -0000
***************
*** 266,275 ****
          Page        page;
          OffsetNumber offnum,
                      maxoff;
!         bool        pgchanged,
!                     tupgone,
                      hastup;
          int            prev_dead_count;

          vacuum_delay_point();

--- 266,276 ----
          Page        page;
          OffsetNumber offnum,
                      maxoff;
!         bool        tupgone,
                      hastup;
          int            prev_dead_count;
+         OffsetNumber frozen[MaxOffsetNumber];
+         int            nfrozen;

          vacuum_delay_point();

***************
*** 349,355 ****
              continue;
          }

!         pgchanged = false;
          hastup = false;
          prev_dead_count = vacrelstats->num_dead_tuples;
          maxoff = PageGetMaxOffsetNumber(page);
--- 350,356 ----
              continue;
          }

!         nfrozen = 0;
          hastup = false;
          prev_dead_count = vacrelstats->num_dead_tuples;
          maxoff = PageGetMaxOffsetNumber(page);
***************
*** 398,404 ****
                          HeapTupleHeaderSetXmin(tuple.t_data, FrozenTransactionId);
                          /* infomask should be okay already */
                          Assert(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED);
!                         pgchanged = true;
                      }

                      /*
--- 399,405 ----
                          HeapTupleHeaderSetXmin(tuple.t_data, FrozenTransactionId);
                          /* infomask should be okay already */
                          Assert(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED);
!                         frozen[nfrozen++] = offnum;
                      }

                      /*
***************
*** 485,492 ****
          if (hastup)
              vacrelstats->nonempty_pages = blkno + 1;

!         if (pgchanged)
              MarkBufferDirty(buf);
          UnlockReleaseBuffer(buf);
      }

--- 486,508 ----
          if (hastup)
              vacrelstats->nonempty_pages = blkno + 1;

!         /*
!          * If we froze any tuples, write a WAL record. We used to treat
!          * freezing the same as hint bit updates, because it was thought that
!          * losing a tuple freeze doesn't matter since the tuple is marked as
!          * committed anyway. But that's not safe: if we later truncate the
!          * clog and crash, we might end up with xids on the disk that belonged
!          * to a truncated clog segment.
!          */
!         if (nfrozen > 0)
!         {
!             XLogRecPtr recptr;
!
              MarkBufferDirty(buf);
+             recptr = log_heap_clean(onerel, buf, frozen, nfrozen, true);
+             PageSetLSN(page, recptr);
+             PageSetTLI(page, ThisTimeLineID);
+         }
          UnlockReleaseBuffer(buf);
      }

***************
*** 635,641 ****
      {
          XLogRecPtr    recptr;

!         recptr = log_heap_clean(onerel, buffer, unused, uncnt);
          PageSetLSN(page, recptr);
          PageSetTLI(page, ThisTimeLineID);
      }
--- 651,657 ----
      {
          XLogRecPtr    recptr;

!         recptr = log_heap_clean(onerel, buffer, unused, uncnt, false);
          PageSetLSN(page, recptr);
          PageSetTLI(page, ThisTimeLineID);
      }
Index: src/include/access/heapam.h
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/include/access/heapam.h,v
retrieving revision 1.116
diff -c -r1.116 heapam.h
*** src/include/access/heapam.h    4 Oct 2006 00:30:07 -0000    1.116
--- src/include/access/heapam.h    23 Oct 2006 17:52:27 -0000
***************
*** 182,188 ****
  extern void heap_redo(XLogRecPtr lsn, XLogRecord *rptr);
  extern void heap_desc(StringInfo buf, uint8 xl_info, char *rec);
  extern XLogRecPtr log_heap_clean(Relation reln, Buffer buffer,
!                OffsetNumber *unused, int uncnt);
  extern XLogRecPtr log_heap_move(Relation reln, Buffer oldbuf,
                ItemPointerData from,
                Buffer newbuf, HeapTuple newtup);
--- 182,188 ----
  extern void heap_redo(XLogRecPtr lsn, XLogRecord *rptr);
  extern void heap_desc(StringInfo buf, uint8 xl_info, char *rec);
  extern XLogRecPtr log_heap_clean(Relation reln, Buffer buffer,
!                OffsetNumber *offsets, int noffsets, bool freeze);
  extern XLogRecPtr log_heap_move(Relation reln, Buffer oldbuf,
                ItemPointerData from,
                Buffer newbuf, HeapTuple newtup);
Index: src/include/access/htup.h
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/include/access/htup.h,v
retrieving revision 1.86
diff -c -r1.86 htup.h
*** src/include/access/htup.h    4 Oct 2006 00:30:07 -0000    1.86
--- src/include/access/htup.h    23 Oct 2006 17:14:44 -0000
***************
*** 510,515 ****
--- 510,521 ----
   * we can (and we do) restore entire page in redo
   */
  #define XLOG_HEAP_INIT_PAGE 0x80
+ /*
+  * XLOG_HEAP_CLEAN | XLOG_HEAP_FREEZE means that tuples on this page
+  * should be frozen. We can share the bit with XLOG_HEAP_INIT_PAGE,
+  * because it's not used when cleaning.
+  */
+ #define XLOG_HEAP_FREEZE    0x80

  /*
   * All what we need to find changed tuple


pgsql-patches by date:

Previous
From: Tom Lane
Date:
Subject: Re: pg_buffercache tidyup
Next
From: Alvaro Herrera
Date:
Subject: Re: WAL logging freezing