From 14e2ee95a153726f88649f545c3606a85e848be3 Mon Sep 17 00:00:00 2001 From: alterego655 <824662526@qq.com> Date: Thu, 30 Oct 2025 17:57:45 +0800 Subject: [PATCH v1] Optimize SnapBuild by maintaining committed.xip in sorted order We now maintain committed.xip in xidComparator order using a per-commit batch insertion approach: Collect all relevant XIDs for each commit Sort the batch once: O(m log m) Reverse-merge it into the global array: O(N + m) With this change, snapshot builds can skip the qsort() step and simply memcpy the sorted array. While per-commit work increases from O(m) (plain append) to O(m log m + N) (sort-and-merge), eliminating repeated O(N log N) sorts at each snapshot build significantly reduces overall steady-state cost once CONSISTENT is reached and snapshots are built frequently. --- src/backend/replication/logical/snapbuild.c | 97 +++++++++++++++++--- src/include/replication/snapbuild_internal.h | 12 +-- 2 files changed, 85 insertions(+), 24 deletions(-) diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index 98ddee20929..500805f43dc 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -407,8 +407,10 @@ SnapBuildBuildSnapshot(SnapBuild *builder) builder->committed.xip, builder->committed.xcnt * sizeof(TransactionId)); - /* sort so we can bsearch() */ - qsort(snapshot->xip, snapshot->xcnt, sizeof(TransactionId), xidComparator); +#ifdef USE_ASSERT_CHECKING + for (int i = 1; i < snapshot->xcnt; i++) + Assert(snapshot->xip[i - 1] <= snapshot->xip[i]); +#endif /* * Initially, subxip is empty, i.e. it's a snapshot to be used by @@ -826,14 +828,29 @@ SnapBuildDistributeSnapshotAndInval(SnapBuild *builder, XLogRecPtr lsn, Transact * Keep track of a new catalog changing transaction that has committed. */ static void -SnapBuildAddCommittedTxn(SnapBuild *builder, TransactionId xid) +SnapBuildAddCommittedTxns(SnapBuild *builder, + const TransactionId *batch_xids, + int batch_cnt) { - Assert(TransactionIdIsValid(xid)); + TransactionId *committed_xids; + int old_xcnt; + int old_idx; /* read position in old array */ + int batch_idx; /* read position in batch */ + int write_idx; /* write position (from end) */ - if (builder->committed.xcnt == builder->committed.xcnt_space) + old_xcnt = builder->committed.xcnt; + + /* Ensure we have space for all elements */ + if (old_xcnt + batch_cnt > builder->committed.xcnt_space) { builder->committed.xcnt_space = builder->committed.xcnt_space * 2 + 1; + /* + * Repeat if we need more than 2x current space. + */ + while (old_xcnt + batch_cnt > builder->committed.xcnt_space) + builder->committed.xcnt_space = builder->committed.xcnt_space * 2 + 1; + elog(DEBUG1, "increasing space for committed transactions to %u", (uint32) builder->committed.xcnt_space); @@ -841,12 +858,35 @@ SnapBuildAddCommittedTxn(SnapBuild *builder, TransactionId xid) builder->committed.xcnt_space * sizeof(TransactionId)); } + committed_xids = builder->committed.xip; + /* - * TODO: It might make sense to keep the array sorted here instead of - * doing it every time we build a new snapshot. On the other hand this - * gets called repeatedly when a transaction with subtransactions commits. + * Merge from the end to avoid overwriting unread data. We write the + * merged result starting from position (old_xcnt + batch_cnt - 1) and + * work backwards. */ - builder->committed.xip[builder->committed.xcnt++] = xid; + old_idx = old_xcnt - 1; + batch_idx = batch_cnt - 1; + write_idx = old_xcnt + batch_cnt - 1; + + while (old_idx >= 0 && batch_idx >= 0) + { + if (committed_xids[old_idx] > batch_xids[batch_idx]) + committed_xids[write_idx--] = committed_xids[old_idx--]; + else + { + Assert(TransactionIdIsValid(batch_xids[batch_idx])); + committed_xids[write_idx--] = batch_xids[batch_idx--]; + } + } + + /* Copy any remaining batch elements */ + while (batch_idx >= 0) + committed_xids[write_idx--] = batch_xids[batch_idx--]; + + /* Old elements that weren't moved are already in the correct position */ + + builder->committed.xcnt = old_xcnt + batch_cnt; } /* @@ -948,6 +988,13 @@ SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid, TransactionId xmax = xid; + /* + * Collect XIDs that need tracking into a batch. We'll sort and merge + * them into committed.xip in one pass at the end. + */ + TransactionId *batch_xids = NULL; + int batch_cnt = 0; + /* * Transactions preceding BUILDING_SNAPSHOT will neither be decoded, nor * will they be part of a snapshot. So we don't need to record anything. @@ -978,6 +1025,12 @@ SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid, } } + if (nsubxacts > 0 || builder->building_full_snapshot || + SnapBuildXidHasCatalogChanges(builder, xid, xinfo)) + { + batch_xids = palloc((nsubxacts + 1) * sizeof(TransactionId)); + } + for (nxact = 0; nxact < nsubxacts; nxact++) { TransactionId subxid = subxacts[nxact]; @@ -994,7 +1047,7 @@ SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid, elog(DEBUG1, "found subtransaction %u:%u with catalog changes", xid, subxid); - SnapBuildAddCommittedTxn(builder, subxid); + batch_xids[batch_cnt++] = subxid; if (NormalTransactionIdFollows(subxid, xmax)) xmax = subxid; @@ -1008,7 +1061,7 @@ SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid, */ else if (needs_timetravel) { - SnapBuildAddCommittedTxn(builder, subxid); + batch_xids[batch_cnt++] = subxid; if (NormalTransactionIdFollows(subxid, xmax)) xmax = subxid; } @@ -1021,7 +1074,7 @@ SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid, xid); needs_snapshot = true; needs_timetravel = true; - SnapBuildAddCommittedTxn(builder, xid); + batch_xids[batch_cnt++] = xid; } else if (sub_needs_timetravel) { @@ -1029,13 +1082,29 @@ SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid, elog(DEBUG2, "forced transaction %u to do timetravel due to one of its subtransactions", xid); needs_timetravel = true; - SnapBuildAddCommittedTxn(builder, xid); + batch_xids[batch_cnt++] = xid; } else if (needs_timetravel) { elog(DEBUG2, "forced transaction %u to do timetravel", xid); - SnapBuildAddCommittedTxn(builder, xid); + batch_xids[batch_cnt++] = xid; + } + + /* + * Sort and merge the batch into committed.xip. This maintains the + * invariant that committed.xip is globally sorted by raw uint32 order + * (xidComparator). + */ + if (batch_cnt > 0) + { + /* Sort by raw uint32 order */ + qsort(batch_xids, batch_cnt, sizeof(TransactionId), xidComparator); + + /* Merge into global array */ + SnapBuildAddCommittedTxns(builder, batch_xids, batch_cnt); + + pfree(batch_xids); } if (!needs_timetravel) diff --git a/src/include/replication/snapbuild_internal.h b/src/include/replication/snapbuild_internal.h index 3b915dc8793..164160e5e5e 100644 --- a/src/include/replication/snapbuild_internal.h +++ b/src/include/replication/snapbuild_internal.h @@ -115,16 +115,8 @@ struct SnapBuild /* * Array of committed transactions that have modified the catalog. * - * As this array is frequently modified we do *not* keep it in - * xidComparator order. Instead we sort the array when building & - * distributing a snapshot. - * - * TODO: It's unclear whether that reasoning has much merit. Every - * time we add something here after becoming consistent will also - * require distributing a snapshot. Storing them sorted would - * potentially also make it easier to purge (but more complicated wrt - * wraparound?). Should be improved if sorting while building the - * snapshot shows up in profiles. + * Maintained in sorted order (by raw uint32 value) to allow efficient + * snapshot building without repeated sorting overhead. */ TransactionId *xip; } committed; -- 2.51.0