From 272952b5631f092d9bcfdc7865dbf7ae9b8a2b23 Mon Sep 17 00:00:00 2001 From: Bertrand Drouvot Date: Tue, 4 Nov 2025 13:52:46 +0000 Subject: [PATCH v10 2/2] handle relation statistics correctly during rewrites Now that PGSTAT_KIND_RELATION is keyed by refilenode, we need to handle rewrites. To do so, this patch: - Adds PgStat_PendingRewrite, a new struct to track rewrite operations within a transaction, storing the old locator, new locator, and original locator (for rewrite chains). This allows stats to be copied from the original location to the final location at commit time. - Adds a new function, pgstat_mark_rewrite(), called when a table rewrite begins. It records the rewrite operation in a local list and detects rewrite chains by checking if the old_locator matches any existing new_locator, preserving the chain's original_locator. - Modifies pgstat_copy_relation_stats(), to accept RelFileLocators instead of Relations, with a new increment parameter to accumulate stats (needed for rewrite chains with DML between rewrites). - Ensures that AtEOXact_PgStat_Relations(), AtPrepare_PgStat_Relations(), pgstat_twophase_postcommit()/postabort() pgstat_drop_relation() handle the PgStat_PendingRewrite list correctly. Note that due to the new flush call in pgstat_twophase_postcommit() we can not call GetCurrentTransactionStopTimestamp() in pgstat_relation_flush_cb(). So, adding a check to handle this special case and call GetCurrentTimestamp() instead. Note that we'd call GetCurrentTimestamp() only if there is a rewrite, so that the GetCurrentTimestamp() extra cost should be negligible. Another solution could be to trigger the flush from FinishPreparedTransaction() but that's not worth the extra complexity. The new pending_rewrites list is traversed in multiple places. The overhead should be negligible in comparison to a rewrite and the list should not contain a lot of rewrites in practice. The pending_rewrites list is traversed in multiple places. In typical usage, the list will contain only a few entries so the traversal cost is negligible ( furthermore in comparison to a rewrite). --- src/backend/catalog/index.c | 2 +- src/backend/commands/cluster.c | 5 + src/backend/commands/tablecmds.c | 6 + src/backend/utils/activity/pgstat_relation.c | 391 ++++++++++++++++++- src/backend/utils/activity/pgstat_xact.c | 25 +- src/backend/utils/cache/relcache.c | 6 + src/include/pgstat.h | 5 +- src/tools/pgindent/typedefs.list | 1 + 8 files changed, 424 insertions(+), 17 deletions(-) 92.8% src/backend/utils/activity/ 4.9% src/backend/ diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 8dea58ad96b..b71925a22c3 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -1795,7 +1795,7 @@ index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName) changeDependenciesOn(RelationRelationId, oldIndexId, newIndexId); /* copy over statistics from old to new index */ - pgstat_copy_relation_stats(newClassRel, oldClassRel); + pgstat_copy_relation_stats(newClassRel->rd_locator, oldClassRel->rd_locator, false); /* Copy data of pg_statistic from the old index to the new one */ CopyStatistics(oldIndexId, newIndexId); diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 2120c85ccb4..6155b12afab 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -1196,6 +1196,11 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, rel1 = relation_open(r1, NoLock); rel2 = relation_open(r2, NoLock); + + /* Mark that a rewrite happened */ + if (RELKIND_HAS_STORAGE(rel1->rd_rel->relkind)) + pgstat_mark_rewrite(rel1->rd_locator, rel2->rd_locator); + rel2->rd_createSubid = rel1->rd_createSubid; rel2->rd_newRelfilelocatorSubid = rel1->rd_newRelfilelocatorSubid; rel2->rd_firstRelfilelocatorSubid = rel1->rd_firstRelfilelocatorSubid; diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 6b1a00ed477..9de70f321ed 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -16884,6 +16884,7 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) Oid reltoastrelid; RelFileNumber newrelfilenumber; RelFileLocator newrlocator; + RelFileLocator oldrlocator; List *reltoastidxids = NIL; ListCell *lc; @@ -16922,6 +16923,7 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) newrlocator = rel->rd_locator; newrlocator.relNumber = newrelfilenumber; newrlocator.spcOid = newTableSpace; + oldrlocator = rel->rd_locator; /* hand off to AM to actually create new rel storage and copy the data */ if (rel->rd_rel->relkind == RELKIND_INDEX) @@ -16934,6 +16936,10 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) table_relation_copy_data(rel, &newrlocator); } + /* mark that a rewrite happened */ + if (RELKIND_HAS_STORAGE(rel->rd_rel->relkind)) + pgstat_mark_rewrite(oldrlocator, newrlocator); + /* * Update the pg_class row. * diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index a267e93f8be..123fb50d98f 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -30,6 +30,19 @@ #include "utils/syscache.h" #include "utils/timestamp.h" +/* Pending rewrite operations for stats copying */ +typedef struct PgStat_PendingRewrite +{ + RelFileLocator old_locator; + RelFileLocator new_locator; + RelFileLocator original_locator; + int nest_level; /* Transaction nesting level where rewrite + * occurred */ + struct PgStat_PendingRewrite *next; +} PgStat_PendingRewrite; + +/* The pending rewrites list for current transaction */ +static PgStat_PendingRewrite *pending_rewrites = NULL; /* Record that's written to 2PC state file when pgstat state is persisted */ typedef struct TwoPhasePgStatRecord @@ -43,6 +56,8 @@ typedef struct TwoPhasePgStatRecord PgStat_Counter deleted_pre_truncdrop; RelFileLocator locator; /* table's rd_locator */ bool truncdropped; /* was the relation truncated/dropped? */ + RelFileLocator rewrite_old_locator; + int rewrite_nest_level; } TwoPhasePgStatRecord; @@ -54,27 +69,70 @@ static void restore_truncdrop_counters(PgStat_TableXactStatus *trans); /* - * Copy stats between relations. This is used for things like REINDEX + * Copy stats between RelFileLocator. This is used for things like REINDEX * CONCURRENTLY. */ void -pgstat_copy_relation_stats(Relation dst, Relation src) +pgstat_copy_relation_stats(RelFileLocator dst, RelFileLocator src, bool increment) { PgStat_StatTabEntry *srcstats; PgStatShared_Relation *dstshstats; PgStat_EntryRef *dst_ref; - srcstats = pgstat_fetch_stat_tabentry_ext(RelationGetRelid(src)); + srcstats = (PgStat_StatTabEntry *) pgstat_fetch_entry(PGSTAT_KIND_RELATION, + src.dbOid, + RelFileLocatorToPgStatObjid(src)); if (!srcstats) return; dst_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_RELATION, - dst->rd_rel->relisshared ? InvalidOid : MyDatabaseId, - RelationGetRelid(dst), + dst.dbOid, + RelFileLocatorToPgStatObjid(dst), false); dstshstats = (PgStatShared_Relation *) dst_ref->shared_stats; - dstshstats->stats = *srcstats; + + if (!increment) + dstshstats->stats = *srcstats; + else + { + /* Increment those statistics */ +#define RELFSTAT_ACC(fld, stats_to_add) \ + (dstshstats->stats.fld += stats_to_add->fld) + RELFSTAT_ACC(numscans, srcstats); + RELFSTAT_ACC(tuples_returned, srcstats); + RELFSTAT_ACC(tuples_fetched, srcstats); + RELFSTAT_ACC(tuples_inserted, srcstats); + RELFSTAT_ACC(tuples_updated, srcstats); + RELFSTAT_ACC(tuples_deleted, srcstats); + RELFSTAT_ACC(tuples_hot_updated, srcstats); + RELFSTAT_ACC(tuples_newpage_updated, srcstats); + RELFSTAT_ACC(live_tuples, srcstats); + RELFSTAT_ACC(dead_tuples, srcstats); + RELFSTAT_ACC(mod_since_analyze, srcstats); + RELFSTAT_ACC(ins_since_vacuum, srcstats); + RELFSTAT_ACC(blocks_fetched, srcstats); + RELFSTAT_ACC(blocks_hit, srcstats); + RELFSTAT_ACC(vacuum_count, srcstats); + RELFSTAT_ACC(autovacuum_count, srcstats); + RELFSTAT_ACC(analyze_count, srcstats); + RELFSTAT_ACC(autoanalyze_count, srcstats); + RELFSTAT_ACC(total_vacuum_time, srcstats); + RELFSTAT_ACC(total_autovacuum_time, srcstats); + RELFSTAT_ACC(total_analyze_time, srcstats); + RELFSTAT_ACC(total_autoanalyze_time, srcstats); +#undef RELFSTAT_ACC + + /* Replace those statistics */ +#define RELFSTAT_REP(fld, stats_to_rep) \ + (dstshstats->stats.fld = stats_to_rep->fld) + RELFSTAT_REP(lastscan, srcstats); + RELFSTAT_REP(last_vacuum_time, srcstats); + RELFSTAT_REP(last_autovacuum_time, srcstats); + RELFSTAT_REP(last_analyze_time, srcstats); + RELFSTAT_REP(last_autoanalyze_time, srcstats); +#undef RELFSTAT_REP + } pgstat_unlock_entry(dst_ref); } @@ -136,6 +194,7 @@ void pgstat_assoc_relation(Relation rel) { RelFileLocator locator; + PgStat_TableStatus *pgstat_info; Assert(rel->pgstat_enabled); Assert(rel->pgstat_info == NULL); @@ -164,14 +223,54 @@ pgstat_assoc_relation(Relation rel) locator.relNumber = rel->rd_id; } + /* + * If this relation was rewritten during the current transaction we may be + * reopening it with its new RelFileLocator. In that case, continue using + * the stats entry associated with the old locator rather than creating a + * new one. This ensures all stats from before and after the rewrite are + * tracked in a single entry which will be properly copied to the new + * locator at transaction commit. + */ + if (pending_rewrites != NULL) + { + PgStat_PendingRewrite *rewrite; + + for (rewrite = pending_rewrites; rewrite != NULL; rewrite = rewrite->next) + { + if (locator.dbOid == rewrite->new_locator.dbOid && + locator.spcOid == rewrite->new_locator.spcOid && + locator.relNumber == rewrite->new_locator.relNumber) + { + pgstat_info = pgstat_prep_relation_pending(rewrite->old_locator); + goto found_entry; + } + } + } + /* Else find or make the PgStat_TableStatus entry, and update link */ - rel->pgstat_info = pgstat_prep_relation_pending(locator); + pgstat_info = pgstat_prep_relation_pending(locator); + +found_entry: + rel->pgstat_info = pgstat_info; + + /* + * For relations stats, we key by physical file location, not by relation + * OID. This means during operations like ALTER TYPE it's possible that + * the relation OID changes but the relfilenode stays the same (no actual + * rewrite needed). Unlink the old relation first. + */ + if (pgstat_info->relation != NULL && + pgstat_info->relation != rel) + { + pgstat_info->relation->pgstat_info = NULL; + pgstat_info->relation = NULL; + } /* don't allow link a stats to multiple relcache entries */ - Assert(rel->pgstat_info->relation == NULL); + Assert(pgstat_info->relation == NULL); /* mark this relation as the owner */ - rel->pgstat_info->relation = rel; + pgstat_info->relation = rel; } /* @@ -214,14 +313,37 @@ pgstat_drop_relation(Relation rel) { int nest_level = GetCurrentTransactionNestLevel(); PgStat_TableStatus *pgstat_info; + bool skip_transactional_drop = false; /* don't track stats for relations without storage */ if (!RELKIND_HAS_STORAGE(rel->rd_rel->relkind)) return; - pgstat_drop_transactional(PGSTAT_KIND_RELATION, - rel->rd_locator.dbOid, - RelFileLocatorToPgStatObjid(rel->rd_locator)); + /* Check if this drop is part of a pending rewrite */ + if (pending_rewrites != NULL) + { + PgStat_PendingRewrite *rewrite; + + for (rewrite = pending_rewrites; rewrite != NULL; rewrite = rewrite->next) + { + if (rel->rd_locator.dbOid == rewrite->old_locator.dbOid && + rel->rd_locator.spcOid == rewrite->old_locator.spcOid && + rel->rd_locator.relNumber == rewrite->old_locator.relNumber) + { + skip_transactional_drop = true; + break; + } + } + } + + /* + * If it is part of a rewrite, drop its stats later, for example in + * AtEOXact_PgStat_Relations(), so skip it here. + */ + if (!skip_transactional_drop) + pgstat_drop_transactional(PGSTAT_KIND_RELATION, + rel->rd_locator.dbOid, + RelFileLocatorToPgStatObjid(rel->rd_locator)); if (!pgstat_should_count_relation(rel)) return; @@ -666,6 +788,48 @@ AtEOXact_PgStat_Relations(PgStat_SubXactStatus *xact_state, bool isCommit) } tabstat->trans = NULL; } + + /* preserve the stats in case of rewrite */ + if (isCommit && pending_rewrites != NULL) + { + PgStat_PendingRewrite *rewrite; + PgStat_PendingRewrite *prev = NULL; + PgStat_PendingRewrite *current = pending_rewrites; + PgStat_PendingRewrite *next; + + /* reverse the rewrites list to process in chronological order */ + while (current != NULL) + { + next = current->next; + current->next = prev; + prev = current; + current = next; + } + + /* now process rewrites in chronological order */ + for (rewrite = prev; rewrite != NULL; rewrite = rewrite->next) + { + PgStat_EntryRef *old_entry_ref; + + old_entry_ref = pgstat_fetch_pending_entry(PGSTAT_KIND_RELATION, + rewrite->old_locator.dbOid, + RelFileLocatorToPgStatObjid(rewrite->old_locator)); + + if (old_entry_ref && old_entry_ref->pending) + pgstat_relation_flush_cb(old_entry_ref, false); + + pgstat_copy_relation_stats(rewrite->new_locator, + rewrite->old_locator, true); + + /* drop old locator's stats */ + if (!pgstat_drop_entry(PGSTAT_KIND_RELATION, + rewrite->old_locator.dbOid, + RelFileLocatorToPgStatObjid(rewrite->old_locator))) + pgstat_request_entry_refs_gc(); + } + } + + pending_rewrites = NULL; } /* @@ -681,6 +845,30 @@ AtEOSubXact_PgStat_Relations(PgStat_SubXactStatus *xact_state, bool isCommit, in PgStat_TableXactStatus *trans; PgStat_TableXactStatus *next_trans; + /* + * If we don't commit then remove the associated rewrites if any, to keep + * the rewrite chain in sync with what will be eventually committed. + */ + if (!isCommit) + { + PgStat_PendingRewrite **rewrite_ptr = &pending_rewrites; + + while (*rewrite_ptr != NULL) + { + if ((*rewrite_ptr)->nest_level >= nestDepth) + { + PgStat_PendingRewrite *to_remove = *rewrite_ptr; + + *rewrite_ptr = (*rewrite_ptr)->next; + pfree(to_remove); + } + else + { + rewrite_ptr = &((*rewrite_ptr)->next); + } + } + } + for (trans = xact_state->first; trans != NULL; trans = next_trans) { PgStat_TableStatus *tabstat; @@ -760,11 +948,19 @@ void AtPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state) { PgStat_TableXactStatus *trans; + PgStat_PendingRewrite *rewrite; + /* + * For each tabstat, find its matching rewrite and remove it from the + * pending rewrites list. This way, after processing all tabstats, pending + * rewrites will only contain rewrite only transactions. + */ for (trans = xact_state->first; trans != NULL; trans = trans->next) { PgStat_TableStatus *tabstat PG_USED_FOR_ASSERTS_ONLY; TwoPhasePgStatRecord record; + PgStat_PendingRewrite **rewrite_ptr; + bool found_rewrite = false; Assert(trans->nest_level == 1); Assert(trans->upper == NULL); @@ -784,10 +980,83 @@ AtPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state) record.locator = tabstat->locator; record.truncdropped = trans->truncdropped; + record.rewrite_nest_level = 0; + + /* + * Look for a matching rewrite and remove it from pending rewrites. We + * check three possible matches: + * + * The new_locator when stats have been added after the rewrite. The + * old_locator when stats have been added before the rewrite but not + * after. The original_locator when this tabstat is part of a rewrite + * chain. + */ + rewrite_ptr = &pending_rewrites; + while (*rewrite_ptr != NULL) + { + rewrite = *rewrite_ptr; + + if ((record.locator.dbOid == rewrite->new_locator.dbOid && + record.locator.spcOid == rewrite->new_locator.spcOid && + record.locator.relNumber == rewrite->new_locator.relNumber) || + (tabstat->locator.dbOid == rewrite->old_locator.dbOid && + tabstat->locator.spcOid == rewrite->old_locator.spcOid && + tabstat->locator.relNumber == rewrite->old_locator.relNumber) || + (tabstat->locator.dbOid == rewrite->original_locator.dbOid && + tabstat->locator.spcOid == rewrite->original_locator.spcOid && + tabstat->locator.relNumber == rewrite->original_locator.relNumber)) + { + /* + * Found matching rewrite. Record the rewrite information and + * remove this rewrite from the list since it's now handled. + */ + record.rewrite_old_locator = rewrite->original_locator; + record.rewrite_nest_level = rewrite->nest_level; + record.locator = rewrite->new_locator; + found_rewrite = true; + + /* Remove from pending_rewrites list */ + *rewrite_ptr = rewrite->next; + pfree(rewrite); + break; + } + else + { + /* Move to next rewrite in the list */ + rewrite_ptr = &(rewrite->next); + } + } + + /* If no rewrite found, clear the rewrite fields */ + if (!found_rewrite) + { + memset(&record.rewrite_old_locator, 0, sizeof(RelFileLocator)); + } + + RegisterTwoPhaseRecord(TWOPHASE_RM_PGSTAT_ID, 0, + &record, sizeof(TwoPhasePgStatRecord)); + } + + /* + * Now process any rewrites still pending. These are rewrite only + * transactions. We need to preserve their stats even though there's no + * tabstat entry for them. + */ + for (rewrite = pending_rewrites; rewrite != NULL; rewrite = rewrite->next) + { + TwoPhasePgStatRecord record; + + memset(&record, 0, sizeof(TwoPhasePgStatRecord)); + record.locator = rewrite->new_locator; + record.rewrite_old_locator = rewrite->original_locator; + record.rewrite_nest_level = rewrite->nest_level; + record.truncdropped = false; RegisterTwoPhaseRecord(TWOPHASE_RM_PGSTAT_ID, 0, &record, sizeof(TwoPhasePgStatRecord)); } + + pending_rewrites = NULL; } /* @@ -810,6 +1079,8 @@ PostPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state) tabstat = trans->parent; tabstat->trans = NULL; } + + pending_rewrites = NULL; } /* @@ -845,6 +1116,29 @@ pgstat_twophase_postcommit(FullTransactionId fxid, uint16 info, pgstat_info->counts.changed_tuples += rec->tuples_inserted + rec->tuples_updated + rec->tuples_deleted; + + if (rec->rewrite_nest_level > 0) + { + PgStat_EntryRef *old_entry_ref; + + /* Flush any pending stats for old locator first */ + old_entry_ref = pgstat_fetch_pending_entry(PGSTAT_KIND_RELATION, + rec->rewrite_old_locator.dbOid, + RelFileLocatorToPgStatObjid(rec->rewrite_old_locator)); + + if (old_entry_ref && old_entry_ref->pending) + pgstat_relation_flush_cb(old_entry_ref, false); + + /* Copy stats from old to new locator */ + pgstat_copy_relation_stats(rec->locator, rec->rewrite_old_locator, + true); + + /* Drop old locator's stats */ + if (!pgstat_drop_entry(PGSTAT_KIND_RELATION, + rec->rewrite_old_locator.dbOid, + RelFileLocatorToPgStatObjid(rec->rewrite_old_locator))) + pgstat_request_entry_refs_gc(); + } } /* @@ -859,9 +1153,26 @@ pgstat_twophase_postabort(FullTransactionId fxid, uint16 info, { TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata; PgStat_TableStatus *pgstat_info; + RelFileLocator target_locator; + + /* + * For aborted transactions with rewrites (like TRUNCATE), we need to + * restore stats to the old locator, not the new one. The new locator + * should be dropped since the rewrite is being rolled back. + */ + if (rec->rewrite_nest_level > 0) + { + /* Use the old locator */ + target_locator = rec->rewrite_old_locator; + } + else + { + /* No rewrite, use the original locator */ + target_locator = rec->locator; + } /* Find or create a tabstat entry for the target locator */ - pgstat_info = pgstat_prep_relation_pending(rec->locator); + pgstat_info = pgstat_prep_relation_pending(target_locator); /* Same math as in AtEOXact_PgStat, abort case */ if (rec->truncdropped) @@ -916,7 +1227,17 @@ pgstat_relation_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) tabentry->numscans += lstats->counts.numscans; if (lstats->counts.numscans) { - TimestampTz t = GetCurrentTransactionStopTimestamp(); + TimestampTz t; + + /* + * Checking the transaction state due to the flush call in + * pgstat_twophase_postcommit() that would break the assertion on the + * state in GetCurrentTransactionStopTimestamp(). + */ + if (!IsTransactionState()) + t = GetCurrentTransactionStopTimestamp(); + else + t = GetCurrentTimestamp(); if (t > tabentry->lastscan) tabentry->lastscan = t; @@ -1167,3 +1488,45 @@ pgstat_reloid_to_relfilelocator(Oid reloid, RelFileLocator *locator) ReleaseSysCache(tuple); return result; } + +/* + * Mark that a relation rewrite has occurred, preserving the original locator + * so stats can be copied at transaction commit. + */ +void +pgstat_mark_rewrite(RelFileLocator old_locator, RelFileLocator new_locator) +{ + PgStat_PendingRewrite *rewrite; + PgStat_PendingRewrite *existing; + RelFileLocator original_locator = old_locator; + + for (existing = pending_rewrites; existing != NULL; existing = existing->next) + { + if (old_locator.dbOid == existing->new_locator.dbOid && + old_locator.spcOid == existing->new_locator.spcOid && + old_locator.relNumber == existing->new_locator.relNumber) + { + original_locator = existing->original_locator; + break; + } + } + + /* Allocate in TopTransactionContext memory context */ + rewrite = MemoryContextAlloc(TopTransactionContext, + sizeof(PgStat_PendingRewrite)); + + rewrite->old_locator = old_locator; + rewrite->new_locator = new_locator; + rewrite->original_locator = original_locator; + rewrite->nest_level = GetCurrentTransactionNestLevel(); + + /* Add to the list */ + rewrite->next = pending_rewrites; + pending_rewrites = rewrite; +} + +void +pgstat_clear_rewrite(void) +{ + pending_rewrites = NULL; +} diff --git a/src/backend/utils/activity/pgstat_xact.c b/src/backend/utils/activity/pgstat_xact.c index bc9864bd8d9..f8cf3755ce2 100644 --- a/src/backend/utils/activity/pgstat_xact.c +++ b/src/backend/utils/activity/pgstat_xact.c @@ -55,6 +55,8 @@ AtEOXact_PgStat(bool isCommit, bool parallel) } pgStatXactStack = NULL; + pgstat_clear_rewrite(); + /* Make sure any stats snapshot is thrown away */ pgstat_clear_snapshot(); } @@ -360,8 +362,29 @@ create_drop_transactional_internal(PgStat_Kind kind, Oid dboid, uint64 objid, bo void pgstat_create_transactional(PgStat_Kind kind, Oid dboid, uint64 objid) { - if (pgstat_get_entry_ref(kind, dboid, objid, false, NULL)) + PgStat_EntryRef *entry_ref; + + entry_ref = pgstat_get_entry_ref(kind, dboid, objid, false, NULL); + + if (entry_ref) { + /* + * For relations stats, we key by physical file location, not by + * relation OID. This means during operations like ALTER TYPE where + * the relation OID changes but the relfilenode stays the same (no + * actual rewrite needed), we'll find an existing entry. + * + * This is expected behavior, we want to preserve stats across the + * catalog change. Simply reset and recreate the entry for the new + * relation OID without warning. + */ + if (kind == PGSTAT_KIND_RELATION) + { + pgstat_reset(kind, dboid, objid); + create_drop_transactional_internal(kind, dboid, objid, true); + return; + } + ereport(WARNING, errmsg("resetting existing statistics for kind %s, db=%u, oid=%" PRIu64, (pgstat_get_kind_info(kind))->name, dboid, diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 2d0cb7bcfd4..c98e5c51d63 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -85,6 +85,7 @@ #include "utils/inval.h" #include "utils/lsyscache.h" #include "utils/memutils.h" +#include "utils/pgstat_internal.h" #include "utils/relmapper.h" #include "utils/resowner.h" #include "utils/snapmgr.h" @@ -3780,6 +3781,7 @@ RelationSetNewRelfilenumber(Relation relation, char persistence) MultiXactId minmulti = InvalidMultiXactId; TransactionId freezeXid = InvalidTransactionId; RelFileLocator newrlocator; + RelFileLocator oldrlocator = relation->rd_locator; if (!IsBinaryUpgrade) { @@ -3951,6 +3953,10 @@ RelationSetNewRelfilenumber(Relation relation, char persistence) table_close(pg_class, RowExclusiveLock); + /* Mark that a rewrite happened */ + if (RELKIND_HAS_STORAGE(relation->rd_rel->relkind)) + pgstat_mark_rewrite(oldrlocator, newrlocator); + /* * Make the pg_class row change or relation map change visible. This will * cause the relcache entry to get updated, too. diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 3102f86aa24..8750c025bbe 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -670,7 +670,7 @@ extern PgStat_FunctionCounts *find_funcstat_entry(Oid func_id); extern void pgstat_create_relation(Relation rel); extern void pgstat_drop_relation(Relation rel); -extern void pgstat_copy_relation_stats(Relation dst, Relation src); +extern void pgstat_copy_relation_stats(RelFileLocator dst, RelFileLocator src, bool increment); extern void pgstat_init_relation(Relation rel); extern void pgstat_assoc_relation(Relation rel); @@ -682,6 +682,9 @@ extern void pgstat_report_vacuum(Relation rel, PgStat_Counter livetuples, extern void pgstat_report_analyze(Relation rel, PgStat_Counter livetuples, PgStat_Counter deadtuples, bool resetcounter, TimestampTz starttime); +extern void pgstat_mark_rewrite(RelFileLocator old_locator, + RelFileLocator new_locator); +extern void pgstat_clear_rewrite(void); /* * If stats are enabled, but pending data hasn't been prepared yet, call diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 04845d5e680..3184e9c464b 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2269,6 +2269,7 @@ PgStat_KindInfo PgStat_LocalState PgStat_PendingDroppedStatsItem PgStat_PendingIO +PgStat_PendingRewrite PgStat_SLRUStats PgStat_ShmemControl PgStat_Snapshot -- 2.34.1