WAL logging freezing - Mailing list pgsql-patches
From | Heikki Linnakangas |
---|---|
Subject | WAL logging freezing |
Date | |
Msg-id | 453DBEAD.3080201@enterprisedb.com Whole thread Raw |
Responses |
Re: WAL logging freezing
Re: WAL logging freezing |
List | pgsql-patches |
Here's a patch for WAL logging tuple freezes in vacuum, per discussion on pgsql-bugs. This patch is against CVS head. Should this be backported to stable branches? I think it should. After writing the patch, I realized that it needs some thought if backported, because WAL records of removing tuples and freezing tuples share the same heapam opcode XLOG_HEAP_CLEAN, and are only differentiated by setting a flag. If we applied the patch as it is, and for some reason someone replayed a WAL log generated by a newer version, with the patch, with an older version, without the patch, the older version would interpret the freeze WAL records as dead tuple removals, and remove live records. I would've liked to give freezing a new opcode, but we've ran out of them (see htup.h). -- Heikki Linnakangas EnterpriseDB http://www.enterprisedb.com Index: src/backend/access/heap/heapam.c =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/access/heap/heapam.c,v retrieving revision 1.220 diff -c -r1.220 heapam.c *** src/backend/access/heap/heapam.c 4 Oct 2006 00:29:48 -0000 1.220 --- src/backend/access/heap/heapam.c 23 Oct 2006 18:17:17 -0000 *************** *** 2877,2889 **** /* * Perform XLogInsert for a heap-clean operation. Caller must already * have modified the buffer and marked it dirty. */ XLogRecPtr ! log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *unused, int uncnt) { xl_heap_clean xlrec; XLogRecPtr recptr; XLogRecData rdata[2]; /* Caller should not call me on a temp relation */ Assert(!reln->rd_istemp); --- 2877,2895 ---- /* * Perform XLogInsert for a heap-clean operation. Caller must already * have modified the buffer and marked it dirty. + * + * If freeze is true, the tuples specified in offsets array were frozen, + * otherwise they were dead and removed. */ XLogRecPtr ! log_heap_clean(Relation reln, Buffer buffer, ! OffsetNumber *offsets, int noffsets, bool freeze) { xl_heap_clean xlrec; XLogRecPtr recptr; XLogRecData rdata[2]; + uint8 info = freeze ? + (XLOG_HEAP_CLEAN | XLOG_HEAP_FREEZE) : XLOG_HEAP_CLEAN; /* Caller should not call me on a temp relation */ Assert(!reln->rd_istemp); *************** *** 2901,2910 **** * that it is. When XLogInsert stores the whole buffer, the offsets array * need not be stored too. */ ! if (uncnt > 0) { ! rdata[1].data = (char *) unused; ! rdata[1].len = uncnt * sizeof(OffsetNumber); } else { --- 2907,2916 ---- * that it is. When XLogInsert stores the whole buffer, the offsets array * need not be stored too. */ ! if (noffsets > 0) { ! rdata[1].data = (char *) offsets; ! rdata[1].len = noffsets * sizeof(OffsetNumber); } else { *************** *** 2915,2921 **** rdata[1].buffer_std = true; rdata[1].next = NULL; ! recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CLEAN, rdata); return recptr; } --- 2921,2927 ---- rdata[1].buffer_std = true; rdata[1].next = NULL; ! recptr = XLogInsert(RM_HEAP_ID, info, rdata); return recptr; } *************** *** 3030,3039 **** --- 3036,3048 ---- Relation reln; Buffer buffer; Page page; + bool freeze; if (record->xl_info & XLR_BKP_BLOCK_1) return; + freeze = record->xl_info & XLOG_HEAP_FREEZE; + reln = XLogOpenRelation(xlrec->node); buffer = XLogReadBuffer(reln, xlrec->block, false); if (!BufferIsValid(buffer)) *************** *** 3048,3069 **** if (record->xl_len > SizeOfHeapClean) { ! OffsetNumber *unused; ! OffsetNumber *unend; ItemId lp; ! unused = (OffsetNumber *) ((char *) xlrec + SizeOfHeapClean); ! unend = (OffsetNumber *) ((char *) xlrec + record->xl_len); ! while (unused < unend) { ! lp = PageGetItemId(page, *unused + 1); ! lp->lp_flags &= ~LP_USED; ! unused++; } } ! PageRepairFragmentation(page, NULL); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); --- 3057,3089 ---- if (record->xl_len > SizeOfHeapClean) { ! OffsetNumber *offsets; ! OffsetNumber *offend; ItemId lp; ! offsets = (OffsetNumber *) ((char *) xlrec + SizeOfHeapClean); ! offend = (OffsetNumber *) ((char *) xlrec + record->xl_len); ! while (offsets < offend) { ! lp = PageGetItemId(page, *offsets + 1); ! ! if(freeze) ! { ! HeapTupleHeader htup = (HeapTupleHeader) PageGetItem(page, lp); ! ! Assert(!(htup->t_infomask & HEAP_XMIN_INVALID)); ! ! htup->t_infomask |= HEAP_XMIN_COMMITTED; ! HeapTupleHeaderSetXmin(htup, FrozenTransactionId); ! } else ! lp->lp_flags &= ~LP_USED; ! offsets++; } } ! if(!freeze) ! PageRepairFragmentation(page, NULL); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); Index: src/backend/commands/vacuum.c =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/commands/vacuum.c,v retrieving revision 1.341 diff -c -r1.341 vacuum.c *** src/backend/commands/vacuum.c 4 Oct 2006 00:29:51 -0000 1.341 --- src/backend/commands/vacuum.c 23 Oct 2006 18:36:07 -0000 *************** *** 1357,1364 **** Buffer buf; OffsetNumber offnum, maxoff; ! bool pgchanged, ! notup; vacuum_delay_point(); --- 1357,1365 ---- Buffer buf; OffsetNumber offnum, maxoff; ! bool notup; ! OffsetNumber frozen[MaxOffsetNumber]; ! int nfrozen; vacuum_delay_point(); *************** *** 1414,1420 **** continue; } ! pgchanged = false; notup = true; maxoff = PageGetMaxOffsetNumber(page); for (offnum = FirstOffsetNumber; --- 1415,1421 ---- continue; } ! nfrozen = 0; notup = true; maxoff = PageGetMaxOffsetNumber(page); for (offnum = FirstOffsetNumber; *************** *** 1458,1464 **** HeapTupleHeaderSetXmin(tuple.t_data, FrozenTransactionId); /* infomask should be okay already */ Assert(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED); ! pgchanged = true; } /* --- 1459,1465 ---- HeapTupleHeaderSetXmin(tuple.t_data, FrozenTransactionId); /* infomask should be okay already */ Assert(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED); ! frozen[nfrozen++] = offnum; } /* *************** *** 1627,1634 **** else empty_end_pages = 0; ! if (pgchanged) MarkBufferDirty(buf); UnlockReleaseBuffer(buf); } --- 1628,1650 ---- else empty_end_pages = 0; ! /* ! * If we froze any tuples, write a WAL record. We used to treat ! * freezing the same as hint bit updates, because it was thought that ! * losing a tuple freeze doesn't matter since the tuple is marked as ! * committed anyway. But that's not safe: if we later truncate the ! * clog and crash, we might end up with xids on the disk that belonged ! * to a truncated clog segment. ! */ ! if (nfrozen > 0) ! { ! XLogRecPtr recptr; ! MarkBufferDirty(buf); + recptr = log_heap_clean(onerel, buf, frozen, nfrozen, true); + PageSetLSN(page, recptr); + PageSetTLI(page, ThisTimeLineID); + } UnlockReleaseBuffer(buf); } *************** *** 2603,2609 **** { XLogRecPtr recptr; ! recptr = log_heap_clean(onerel, buf, unused, uncnt); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } --- 2619,2625 ---- { XLogRecPtr recptr; ! recptr = log_heap_clean(onerel, buf, unused, uncnt, false); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } *************** *** 3074,3080 **** { XLogRecPtr recptr; ! recptr = log_heap_clean(onerel, buffer, unused, uncnt); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } --- 3090,3096 ---- { XLogRecPtr recptr; ! recptr = log_heap_clean(onerel, buffer, unused, uncnt, false); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } Index: src/backend/commands/vacuumlazy.c =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/commands/vacuumlazy.c,v retrieving revision 1.80 diff -c -r1.80 vacuumlazy.c *** src/backend/commands/vacuumlazy.c 4 Oct 2006 00:29:52 -0000 1.80 --- src/backend/commands/vacuumlazy.c 23 Oct 2006 18:35:52 -0000 *************** *** 266,275 **** Page page; OffsetNumber offnum, maxoff; ! bool pgchanged, ! tupgone, hastup; int prev_dead_count; vacuum_delay_point(); --- 266,276 ---- Page page; OffsetNumber offnum, maxoff; ! bool tupgone, hastup; int prev_dead_count; + OffsetNumber frozen[MaxOffsetNumber]; + int nfrozen; vacuum_delay_point(); *************** *** 349,355 **** continue; } ! pgchanged = false; hastup = false; prev_dead_count = vacrelstats->num_dead_tuples; maxoff = PageGetMaxOffsetNumber(page); --- 350,356 ---- continue; } ! nfrozen = 0; hastup = false; prev_dead_count = vacrelstats->num_dead_tuples; maxoff = PageGetMaxOffsetNumber(page); *************** *** 398,404 **** HeapTupleHeaderSetXmin(tuple.t_data, FrozenTransactionId); /* infomask should be okay already */ Assert(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED); ! pgchanged = true; } /* --- 399,405 ---- HeapTupleHeaderSetXmin(tuple.t_data, FrozenTransactionId); /* infomask should be okay already */ Assert(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED); ! frozen[nfrozen++] = offnum; } /* *************** *** 485,492 **** if (hastup) vacrelstats->nonempty_pages = blkno + 1; ! if (pgchanged) MarkBufferDirty(buf); UnlockReleaseBuffer(buf); } --- 486,508 ---- if (hastup) vacrelstats->nonempty_pages = blkno + 1; ! /* ! * If we froze any tuples, write a WAL record. We used to treat ! * freezing the same as hint bit updates, because it was thought that ! * losing a tuple freeze doesn't matter since the tuple is marked as ! * committed anyway. But that's not safe: if we later truncate the ! * clog and crash, we might end up with xids on the disk that belonged ! * to a truncated clog segment. ! */ ! if (nfrozen > 0) ! { ! XLogRecPtr recptr; ! MarkBufferDirty(buf); + recptr = log_heap_clean(onerel, buf, frozen, nfrozen, true); + PageSetLSN(page, recptr); + PageSetTLI(page, ThisTimeLineID); + } UnlockReleaseBuffer(buf); } *************** *** 635,641 **** { XLogRecPtr recptr; ! recptr = log_heap_clean(onerel, buffer, unused, uncnt); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } --- 651,657 ---- { XLogRecPtr recptr; ! recptr = log_heap_clean(onerel, buffer, unused, uncnt, false); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } Index: src/include/access/heapam.h =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/include/access/heapam.h,v retrieving revision 1.116 diff -c -r1.116 heapam.h *** src/include/access/heapam.h 4 Oct 2006 00:30:07 -0000 1.116 --- src/include/access/heapam.h 23 Oct 2006 17:52:27 -0000 *************** *** 182,188 **** extern void heap_redo(XLogRecPtr lsn, XLogRecord *rptr); extern void heap_desc(StringInfo buf, uint8 xl_info, char *rec); extern XLogRecPtr log_heap_clean(Relation reln, Buffer buffer, ! OffsetNumber *unused, int uncnt); extern XLogRecPtr log_heap_move(Relation reln, Buffer oldbuf, ItemPointerData from, Buffer newbuf, HeapTuple newtup); --- 182,188 ---- extern void heap_redo(XLogRecPtr lsn, XLogRecord *rptr); extern void heap_desc(StringInfo buf, uint8 xl_info, char *rec); extern XLogRecPtr log_heap_clean(Relation reln, Buffer buffer, ! OffsetNumber *offsets, int noffsets, bool freeze); extern XLogRecPtr log_heap_move(Relation reln, Buffer oldbuf, ItemPointerData from, Buffer newbuf, HeapTuple newtup); Index: src/include/access/htup.h =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/include/access/htup.h,v retrieving revision 1.86 diff -c -r1.86 htup.h *** src/include/access/htup.h 4 Oct 2006 00:30:07 -0000 1.86 --- src/include/access/htup.h 23 Oct 2006 17:14:44 -0000 *************** *** 510,515 **** --- 510,521 ---- * we can (and we do) restore entire page in redo */ #define XLOG_HEAP_INIT_PAGE 0x80 + /* + * XLOG_HEAP_CLEAN | XLOG_HEAP_FREEZE means that tuples on this page + * should be frozen. We can share the bit with XLOG_HEAP_INIT_PAGE, + * because it's not used when cleaning. + */ + #define XLOG_HEAP_FREEZE 0x80 /* * All what we need to find changed tuple
pgsql-patches by date: