From 8f1598ac7795d70a9baecc3492b2930dcd828f35 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Thu, 11 Jan 2024 15:48:44 +0100 Subject: [PATCH v20240111 7/7] decode all sequence relfilenodes --- src/backend/replication/logical/decode.c | 31 ++++--------------- .../replication/logical/reorderbuffer.c | 26 ++++++++++++++-- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c index 95f224df4a7..a141f444772 100644 --- a/src/backend/replication/logical/decode.c +++ b/src/backend/replication/logical/decode.c @@ -1432,27 +1432,12 @@ seq_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) * change as transactional and queue it. Otherwise it needs to be treated * as non-transactional, in which case we just send it to the plugin right * away. - * - * XXX This may incorrectly identify some changes as non-transactional, - * if the relfilenode was created before the snapshot switched to FULL - * (so that smgr_decode skipped it, and it's not in the hash table). - * But we still handle this correctly, because the snapshot can't be - * in CONSISTENT state yet, so the following check will skip the change - * too, and we won't try to apply it (which would fail due to unknown - * relfilenode). */ transactional = ReorderBufferSequenceIsTransactional(ctx->reorder, target_locator, NULL); - /* - * Skip the change if already processed (per the snapshot). - * - * XXX It's important to skip all non-transactional changes with snapshot - * that is not consistent yet. The sequence change may be incorrectly - * identified as non-transactional (see above), and we must not attempt - * to process it. - */ + /* Skip the change if already processed (per the snapshot). */ if (transactional && !SnapBuildProcessChange(builder, xid, buf->origptr)) return; @@ -1516,11 +1501,15 @@ seq_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) * We only care about XLOG_SMGR_CREATE records for "our" database (logical * decoding is restricted to a single database), and we do the filtering * and skipping, as appropriate. + * + * Note that the relfilenode is decoded and added to the reorder buffer even + * before the snapshot switches to SNAPBUILD_FULL_SNAPSHOT. This is necessary + * so that seq_decode can correctly identify transactional changes when the + * state changes to FULL after the relfilenode is created. */ void smgr_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) { - SnapBuild *builder = ctx->snapshot_builder; XLogReaderState *r = buf->record; TransactionId xid = XLogRecGetXid(r); uint8 info = XLogRecGetInfo(buf->record) & ~XLR_INFO_MASK; @@ -1542,14 +1531,6 @@ smgr_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) ReorderBufferProcessXid(ctx->reorder, XLogRecGetXid(r), buf->origptr); - /* - * If we don't have snapshot or we are just fast-forwarding, there is no - * point in decoding relfilenode information. - */ - if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT || - ctx->fast_forward) - return; - /* only interested in our database */ xlrec = (xl_smgr_create *) XLogRecGetData(r); if (xlrec->rlocator.dbOid != ctx->slot->data.database) diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index acaa6dce8e4..017fd9c09d7 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -1280,10 +1280,30 @@ ReorderBufferAddRelFileLocator(ReorderBuffer *rb, TransactionId xid, &found); /* - * We've just decoded creation of the relfilenode, so if we found it - * in the hash table, something is wrong. + * We've just decoded creation of the relfilenode, but it's possible a + * the transaction aborted without triggering a reorderbuffer cleanup, + * and the relfilenode was allocated again. While quite unlikely (the + * hash table entry should be cleaned when processing RUNNING_XACTS, + * and we should not reuse the relfilenode this fast). + * + * So if we find an entry for the relfilenode, we simply point it to + * the new transaction, but we need to remove it from the hash table + * in the original transaction (otherwise it'd confuse cleanup). */ - Assert(!found); + if (found) + { + /* + * The entry must be pointing to a transaction, and there must be + * must be a hash table of relfilenodes. + */ + Assert(entry->txn); + Assert(entry->txn->sequences_hash); + + if (hash_search(entry->txn->sequences_hash, + (void *) &entry->rlocator, + HASH_REMOVE, NULL) == NULL) + elog(ERROR, "hash table of sequence relfilenode corrupted"); + } entry->txn = txn; -- 2.43.0