From 772103d67b8b518541209b0460f80e3342a406e6 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Mon, 22 Jun 2020 10:05:21 +0530 Subject: [PATCH] Introduce vacuum errcontext to display additional information. The additional information displayed will be block number for error occurring while processing heap and index name for error occurring while processing the index. This will help us in diagnosing the problems that occur during a vacuum. For ex. due to corruption (either caused by bad hardware or by some bug) if we get some error while vacuuming, it can help us identify the block in heap and or additional index information. It sets up an error context callback to display additional information with the error. During different phases of vacuum (heap scan, heap vacuum, index vacuum, index clean up, heap truncate), we update the error context callback to display appropriate information. We can extend it to a bit more granular level like adding the phases for FSM operations or for prefetching the blocks while truncating. However, I felt that it requires adding many more error callback function calls and can make the code a bit complex, so left those for now. --- src/backend/access/heap/vacuumlazy.c | 185 +++++++++++++++++++++++++++++++++-- src/tools/pgindent/typedefs.list | 1 + 2 files changed, 178 insertions(+), 8 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 3d4719d..a3f8f0b 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -110,8 +110,21 @@ */ #define PREFETCH_SIZE ((BlockNumber) 32) +/* Phases of vacuum during which we report error context. */ +typedef enum +{ + VACUUM_ERRCB_PHASE_UNKNOWN, + VACUUM_ERRCB_PHASE_SCAN_HEAP, + VACUUM_ERRCB_PHASE_VACUUM_INDEX, + VACUUM_ERRCB_PHASE_VACUUM_HEAP, + VACUUM_ERRCB_PHASE_INDEX_CLEANUP, + VACUUM_ERRCB_PHASE_TRUNCATE +} VacErrPhase; + typedef struct LVRelStats { + char *relnamespace; + char *relname; /* useindex = true means two-pass strategy; false means one-pass */ bool useindex; /* Overall statistics about rel */ @@ -136,6 +149,11 @@ typedef struct LVRelStats int num_index_scans; TransactionId latestRemovedXid; bool lock_waiter_detected; + + /* Used for error callback */ + char *indname; + BlockNumber blkno; /* used only for heap operations */ + VacErrPhase phase; } LVRelStats; @@ -175,6 +193,9 @@ static bool lazy_tid_reaped(ItemPointer itemptr, void *state); static int vac_cmp_itemptr(const void *left, const void *right); static bool heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cutoff_xid, bool *all_frozen); +static void vacuum_error_callback(void *arg); +static void update_vacuum_error_info(LVRelStats *errinfo, int phase, + BlockNumber blkno, char *indname); /* @@ -208,6 +229,7 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params, double new_live_tuples; TransactionId new_frozen_xid; MultiXactId new_min_multi; + ErrorContextCallback errcallback; Assert(params != NULL); Assert(params->index_cleanup != VACOPT_TERNARY_DEFAULT); @@ -257,6 +279,10 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params, vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats)); + vacrelstats->relnamespace = get_namespace_name(RelationGetNamespace(onerel)); + vacrelstats->relname = pstrdup(RelationGetRelationName(onerel)); + vacrelstats->indname = NULL; + vacrelstats->phase = VACUUM_ERRCB_PHASE_UNKNOWN; vacrelstats->old_rel_pages = onerel->rd_rel->relpages; vacrelstats->old_live_tuples = onerel->rd_rel->reltuples; vacrelstats->num_index_scans = 0; @@ -268,6 +294,22 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params, vacrelstats->useindex = (nindexes > 0 && params->index_cleanup == VACOPT_TERNARY_ENABLED); + /* + * Setup error traceback support for ereport(). The idea is to set up an + * error context callback to display additional information on any error + * during a vacuum. During different phases of vacuum (heap scan, heap + * vacuum, index vacuum, index clean up, heap truncate), we update the + * error context callback to display appropriate information. + * + * Note that the index vacuum and heap vacuum phases may be called + * multiple times in the middle of the heap scan phase. So the old phase + * information is restored at the end of those phases. + */ + errcallback.callback = vacuum_error_callback; + errcallback.arg = vacrelstats; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + /* Do the vacuuming */ lazy_scan_heap(onerel, params, vacrelstats, Irel, nindexes, aggressive); @@ -294,7 +336,19 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params, * Optionally truncate the relation. */ if (should_attempt_truncation(params, vacrelstats)) + { + /* + * Update error traceback information. This is the last phase during + * which we add context information to errors, so we don't need to + * revert to the previous phase. + */ + update_vacuum_error_info(vacrelstats, VACUUM_ERRCB_PHASE_TRUNCATE, + vacrelstats->nonempty_pages, NULL); lazy_truncate_heap(onerel, vacrelstats); + } + + /* Pop the error context stack */ + error_context_stack = errcallback.previous; /* Report that we are now doing final cleanup */ pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, @@ -483,7 +537,6 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, BlockNumber nblocks, blkno; HeapTupleData tuple; - char *relname; TransactionId relfrozenxid = onerel->rd_rel->relfrozenxid; TransactionId relminmxid = onerel->rd_rel->relminmxid; BlockNumber empty_pages, @@ -511,17 +564,16 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, pg_rusage_init(&ru0); - relname = RelationGetRelationName(onerel); if (aggressive) ereport(elevel, (errmsg("aggressively vacuuming \"%s.%s\"", - get_namespace_name(RelationGetNamespace(onerel)), - relname))); + vacrelstats->relnamespace, + vacrelstats->relname))); else ereport(elevel, (errmsg("vacuuming \"%s.%s\"", - get_namespace_name(RelationGetNamespace(onerel)), - relname))); + vacrelstats->relnamespace, + vacrelstats->relname))); empty_pages = vacuumed_pages = 0; next_fsm_block_to_vacuum = (BlockNumber) 0; @@ -642,6 +694,9 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno); + update_vacuum_error_info(vacrelstats, VACUUM_ERRCB_PHASE_SCAN_HEAP, + blkno, NULL); + if (blkno == next_unskippable_block) { /* Time to advance next_unskippable_block */ @@ -1309,7 +1364,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer)) { elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u", - relname, blkno); + vacrelstats->relname, blkno); visibilitymap_clear(onerel, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS); } @@ -1330,7 +1385,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, else if (PageIsAllVisible(page) && has_dead_tuples) { elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u", - relname, blkno); + vacrelstats->relname, blkno); PageClearAllVisible(page); MarkBufferDirty(buf); visibilitymap_clear(onerel, blkno, vmbuffer, @@ -1513,6 +1568,12 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) int npages; PGRUsage ru0; Buffer vmbuffer = InvalidBuffer; + LVRelStats olderrinfo; + + /* Update error traceback information */ + olderrinfo = *vacrelstats; + update_vacuum_error_info(vacrelstats, VACUUM_ERRCB_PHASE_VACUUM_HEAP, + InvalidBlockNumber, NULL); pg_rusage_init(&ru0); npages = 0; @@ -1528,6 +1589,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) vacuum_delay_point(); tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]); + vacrelstats->blkno = tblk; buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL, vac_strategy); if (!ConditionalLockBufferForCleanup(buf)) @@ -1559,6 +1621,12 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) RelationGetRelationName(onerel), tupindex, npages), errdetail_internal("%s", pg_rusage_show(&ru0)))); + + /* Revert to the previous phase information for error traceback */ + update_vacuum_error_info(vacrelstats, + olderrinfo.phase, + olderrinfo.blkno, + olderrinfo.indname); } /* @@ -1580,9 +1648,15 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, int uncnt = 0; TransactionId visibility_cutoff_xid; bool all_frozen; + LVRelStats olderrinfo; pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno); + /* Update error traceback information */ + olderrinfo = *vacrelstats; + update_vacuum_error_info(vacrelstats, VACUUM_ERRCB_PHASE_VACUUM_HEAP, + blkno, NULL); + START_CRIT_SECTION(); for (; tupindex < vacrelstats->num_dead_tuples; tupindex++) @@ -1659,6 +1733,11 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, *vmbuffer, visibility_cutoff_xid, flags); } + /* Revert to the previous phase information for error traceback */ + update_vacuum_error_info(vacrelstats, + olderrinfo.phase, + olderrinfo.blkno, + olderrinfo.indname); return tupindex; } @@ -1729,6 +1808,7 @@ lazy_vacuum_index(Relation indrel, { IndexVacuumInfo ivinfo; PGRUsage ru0; + LVRelStats olderrinfo; pg_rusage_init(&ru0); @@ -1741,6 +1821,13 @@ lazy_vacuum_index(Relation indrel, ivinfo.num_heap_tuples = vacrelstats->old_live_tuples; ivinfo.strategy = vac_strategy; + /* Update error traceback information */ + olderrinfo = *vacrelstats; + update_vacuum_error_info(vacrelstats, + VACUUM_ERRCB_PHASE_VACUUM_INDEX, + InvalidBlockNumber, + RelationGetRelationName(indrel)); + /* Do bulk deletion */ *stats = index_bulk_delete(&ivinfo, *stats, lazy_tid_reaped, (void *) vacrelstats); @@ -1750,6 +1837,12 @@ lazy_vacuum_index(Relation indrel, RelationGetRelationName(indrel), vacrelstats->num_dead_tuples), errdetail_internal("%s", pg_rusage_show(&ru0)))); + + /* Revert to the previous phase information for error traceback */ + update_vacuum_error_info(vacrelstats, + olderrinfo.phase, + olderrinfo.blkno, + olderrinfo.indname); } /* @@ -1762,6 +1855,7 @@ lazy_cleanup_index(Relation indrel, { IndexVacuumInfo ivinfo; PGRUsage ru0; + LVRelStats olderrcbarg; pg_rusage_init(&ru0); @@ -1779,8 +1873,21 @@ lazy_cleanup_index(Relation indrel, ivinfo.num_heap_tuples = vacrelstats->new_rel_tuples; ivinfo.strategy = vac_strategy; + /* Update error traceback information */ + olderrcbarg = *vacrelstats; + update_vacuum_error_info(vacrelstats, + VACUUM_ERRCB_PHASE_INDEX_CLEANUP, + InvalidBlockNumber, + RelationGetRelationName(indrel)); + stats = index_vacuum_cleanup(&ivinfo, stats); + /* Revert back to the old phase information for error traceback */ + update_vacuum_error_info(vacrelstats, + olderrcbarg.phase, + olderrcbarg.blkno, + olderrcbarg.indname); + if (!stats) return; @@ -1936,6 +2043,7 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats) * were vacuuming. */ new_rel_pages = count_nondeletable_pages(onerel, vacrelstats); + vacrelstats->blkno = new_rel_pages; if (new_rel_pages >= old_rel_pages) { @@ -2339,3 +2447,64 @@ heap_page_is_all_visible(Relation rel, Buffer buf, return all_visible; } + +/* + * Error context callback for errors occurring during vacuum. + */ +static void +vacuum_error_callback(void *arg) +{ + LVRelStats *errinfo = arg; + + switch (errinfo->phase) + { + case VACUUM_ERRCB_PHASE_SCAN_HEAP: + if (BlockNumberIsValid(errinfo->blkno)) + errcontext("while scanning block %u of relation \"%s.%s\"", + errinfo->blkno, errinfo->relnamespace, errinfo->relname); + break; + + case VACUUM_ERRCB_PHASE_VACUUM_HEAP: + if (BlockNumberIsValid(errinfo->blkno)) + errcontext("while vacuuming block %u of relation \"%s.%s\"", + errinfo->blkno, errinfo->relnamespace, errinfo->relname); + break; + + case VACUUM_ERRCB_PHASE_VACUUM_INDEX: + errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"", + errinfo->indname, errinfo->relnamespace, errinfo->relname); + break; + + case VACUUM_ERRCB_PHASE_INDEX_CLEANUP: + errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"", + errinfo->indname, errinfo->relnamespace, errinfo->relname); + break; + + case VACUUM_ERRCB_PHASE_TRUNCATE: + if (BlockNumberIsValid(errinfo->blkno)) + errcontext("while truncating relation \"%s.%s\" to %u blocks", + errinfo->relnamespace, errinfo->relname, errinfo->blkno); + break; + + case VACUUM_ERRCB_PHASE_UNKNOWN: + default: + return; /* do nothing; the errinfo may not be + * initialized */ + } +} + +/* Update vacuum error callback for the current phase, block, and index. */ +static void +update_vacuum_error_info(LVRelStats *errinfo, int phase, BlockNumber blkno, + char *indname) +{ + errinfo->blkno = blkno; + errinfo->phase = phase; + + /* Free index name from any previous phase */ + if (errinfo->indname) + pfree(errinfo->indname); + + /* For index phases, save the name of the current index for the callback */ + errinfo->indname = indname ? pstrdup(indname) : NULL; +} diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 2f23dc3..022a3d4 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2551,6 +2551,7 @@ UserMapping UserOpts VacAttrStats VacAttrStatsP +VacErrPhase VacOptTernaryValue VacuumParams VacuumRelation -- 1.8.3.1