From a7c8d1da5d0496b2a7d05d52e37511a37075e13d Mon Sep 17 00:00:00 2001 From: alterego655 <824662526@qq.com> Date: Tue, 16 Dec 2025 15:15:14 +0800 Subject: [PATCH v3 1/2] Optimize SnapBuild by maintaining committed.xip in sorted order Maintain committed.xip in xidComparator order using a two-phase approach that balances performance during slot initialization with efficiency during steady-state decoding: Pre-CONSISTENT phase: - Use fast O(1) append for unsorted XIDs - Sort once at transition to CONSISTENT state - Avoids merge overhead during initial snapshot building Post-CONSISTENT phase: - Use per-commit batch insertion with sorted merge - Collect all relevant XIDs for each commit - Sort the batch once: O(m log m) - Reverse-merge into the global array: O(N + m) With this change, snapshot builds can skip the qsort() step and simply memcpy the sorted array. While per-commit work increases from O(m) (plain append) to O(m log m + N) (sort-and-merge) after CONSISTENT, eliminating repeated O(N log N) sorts at each snapshot build significantly reduces overall steady-state cost once CONSISTENT is reached and snapshots are built frequently. --- src/backend/replication/logical/snapbuild.c | 164 +++++++++++++++++-- src/include/replication/snapbuild_internal.h | 12 +- 2 files changed, 151 insertions(+), 25 deletions(-) diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index d6ab1e017eb..e0d7d3e0ae8 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -407,8 +407,25 @@ SnapBuildBuildSnapshot(SnapBuild *builder) builder->committed.xip, builder->committed.xcnt * sizeof(TransactionId)); - /* sort so we can bsearch() */ - qsort(snapshot->xip, snapshot->xcnt, sizeof(TransactionId), xidComparator); + if (builder->state < SNAPBUILD_CONSISTENT) + { + /* + * Pre-CONSISTENT: committed.xip is unsorted, sort the snapshot copy. + */ + if (snapshot->xcnt > 1) + qsort(snapshot->xip, snapshot->xcnt, + sizeof(TransactionId), xidComparator); + } +#ifdef USE_ASSERT_CHECKING + else + { + /* + * Post-CONSISTENT: committed.xip should already be sorted, verify. + */ + for (int i = 1; i < snapshot->xcnt; i++) + Assert(snapshot->xip[i - 1] < snapshot->xip[i]); + } +#endif /* * Initially, subxip is empty, i.e. it's a snapshot to be used by @@ -822,17 +839,33 @@ SnapBuildDistributeSnapshotAndInval(SnapBuild *builder, XLogRecPtr lsn, Transact } /* - * Keep track of a new catalog changing transaction that has committed. + * Keep track of new catalog changing transactions that have committed. + * + * Before reaching CONSISTENT state, we use fast O(1) append since the array + * doesn't need to be sorted yet. After CONSISTENT, we maintain sorted order + * using a merge approach to avoid repeated full-array sorts at snapshot build. */ static void -SnapBuildAddCommittedTxn(SnapBuild *builder, TransactionId xid) +SnapBuildAddCommittedTxns(SnapBuild *builder, + const TransactionId *batch_xids, + int batch_cnt) { - Assert(TransactionIdIsValid(xid)); + TransactionId *committed_xids; + size_t old_xcnt; + + old_xcnt = builder->committed.xcnt; - if (builder->committed.xcnt == builder->committed.xcnt_space) + /* Ensure we have space for all elements */ + if (old_xcnt + batch_cnt > builder->committed.xcnt_space) { builder->committed.xcnt_space = builder->committed.xcnt_space * 2 + 1; + /* + * Repeat if we need more than 2x current space. + */ + while (old_xcnt + batch_cnt > builder->committed.xcnt_space) + builder->committed.xcnt_space = builder->committed.xcnt_space * 2 + 1; + elog(DEBUG1, "increasing space for committed transactions to %u", (uint32) builder->committed.xcnt_space); @@ -840,12 +873,57 @@ SnapBuildAddCommittedTxn(SnapBuild *builder, TransactionId xid) builder->committed.xcnt_space * sizeof(TransactionId)); } + committed_xids = builder->committed.xip; + /* - * TODO: It might make sense to keep the array sorted here instead of - * doing it every time we build a new snapshot. On the other hand this - * gets called repeatedly when a transaction with subtransactions commits. + * Before CONSISTENT state, just append unsorted for O(1) performance. The + * array will be sorted once when transitioning to CONSISTENT. */ - builder->committed.xip[builder->committed.xcnt++] = xid; + if (builder->state < SNAPBUILD_CONSISTENT) + { + for (int i = 0; i < batch_cnt; i++) + { + Assert(TransactionIdIsValid(batch_xids[i])); + committed_xids[old_xcnt++] = batch_xids[i]; + } + builder->committed.xcnt = old_xcnt; + } + else + { + /* + * After CONSISTENT, maintain sorted order via merge. Merge from the + * end to avoid overwriting unread data. + */ + int old_idx = old_xcnt - 1; + int batch_idx = batch_cnt - 1; + int write_idx = old_xcnt + batch_cnt - 1; + + while (old_idx >= 0 && batch_idx >= 0) + { + Assert(TransactionIdIsValid(batch_xids[batch_idx])); + + if (committed_xids[old_idx] > batch_xids[batch_idx]) + { + committed_xids[write_idx--] = committed_xids[old_idx--]; + } + else + { + /* Duplicates should never occur */ + Assert(committed_xids[old_idx] != batch_xids[batch_idx]); + committed_xids[write_idx--] = batch_xids[batch_idx--]; + } + } + + /* Copy any remaining batch elements */ + while (batch_idx >= 0) + { + Assert(TransactionIdIsValid(batch_xids[batch_idx])); + committed_xids[write_idx--] = batch_xids[batch_idx--]; + } + + /* Old elements that weren't moved are already in correct position */ + builder->committed.xcnt = old_xcnt + batch_cnt; + } } /* @@ -947,6 +1025,13 @@ SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid, TransactionId xmax = xid; + /* + * Collect XIDs that need tracking into a batch. We'll sort and merge + * them into committed.xip in one pass at the end. + */ + TransactionId *batch_xids = NULL; + int batch_cnt = 0; + /* * Transactions preceding BUILDING_SNAPSHOT will neither be decoded, nor * will they be part of a snapshot. So we don't need to record anything. @@ -977,6 +1062,12 @@ SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid, } } + if (nsubxacts > 0 || builder->building_full_snapshot || + SnapBuildXidHasCatalogChanges(builder, xid, xinfo)) + { + batch_xids = palloc((nsubxacts + 1) * sizeof(TransactionId)); + } + for (nxact = 0; nxact < nsubxacts; nxact++) { TransactionId subxid = subxacts[nxact]; @@ -993,7 +1084,7 @@ SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid, elog(DEBUG1, "found subtransaction %u:%u with catalog changes", xid, subxid); - SnapBuildAddCommittedTxn(builder, subxid); + batch_xids[batch_cnt++] = subxid; if (NormalTransactionIdFollows(subxid, xmax)) xmax = subxid; @@ -1007,7 +1098,7 @@ SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid, */ else if (needs_timetravel) { - SnapBuildAddCommittedTxn(builder, subxid); + batch_xids[batch_cnt++] = subxid; if (NormalTransactionIdFollows(subxid, xmax)) xmax = subxid; } @@ -1020,7 +1111,7 @@ SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid, xid); needs_snapshot = true; needs_timetravel = true; - SnapBuildAddCommittedTxn(builder, xid); + batch_xids[batch_cnt++] = xid; } else if (sub_needs_timetravel) { @@ -1028,15 +1119,33 @@ SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid, elog(DEBUG2, "forced transaction %u to do timetravel due to one of its subtransactions", xid); needs_timetravel = true; - SnapBuildAddCommittedTxn(builder, xid); + batch_xids[batch_cnt++] = xid; } else if (needs_timetravel) { elog(DEBUG2, "forced transaction %u to do timetravel", xid); - SnapBuildAddCommittedTxn(builder, xid); + batch_xids[batch_cnt++] = xid; + } + + /* + * Sort and merge the batch into committed.xip. This maintains the + * invariant that committed.xip is globally sorted by raw uint32 order + * (xidComparator). + */ + if (batch_cnt > 0) + { + /* Sort by raw uint32 order */ + qsort(batch_xids, batch_cnt, sizeof(TransactionId), xidComparator); + + /* Merge into global array */ + SnapBuildAddCommittedTxns(builder, batch_xids, batch_cnt); } + /* Free batch array if allocated (even if nothing was added to it) */ + if (batch_xids != NULL) + pfree(batch_xids); + if (!needs_timetravel) { /* record that we cannot export a general snapshot anymore */ @@ -1305,6 +1414,15 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn Assert(TransactionIdIsNormal(builder->xmin)); Assert(TransactionIdIsNormal(builder->xmax)); + /* + * Sort committed.xip before transitioning to CONSISTENT. During the + * pre-CONSISTENT phase, XIDs were appended unsorted for performance. + * Now we need sorted order for efficient snapshot building. + */ + if (builder->committed.xcnt > 1) + qsort(builder->committed.xip, builder->committed.xcnt, + sizeof(TransactionId), xidComparator); + builder->state = SNAPBUILD_CONSISTENT; builder->next_phase_at = InvalidTransactionId; @@ -1402,6 +1520,15 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn TransactionIdPrecedesOrEquals(builder->next_phase_at, running->oldestRunningXid)) { + /* + * Sort committed.xip before transitioning to CONSISTENT. During the + * pre-CONSISTENT phase, XIDs were appended unsorted for performance. + * Now we need sorted order for efficient snapshot building. + */ + if (builder->committed.xcnt > 1) + qsort(builder->committed.xip, builder->committed.xcnt, + sizeof(TransactionId), xidComparator); + builder->state = SNAPBUILD_CONSISTENT; builder->next_phase_at = InvalidTransactionId; @@ -1889,6 +2016,13 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn) pfree(builder->committed.xip); builder->committed.xcnt_space = ondisk.builder.committed.xcnt; builder->committed.xip = ondisk.builder.committed.xip; + + /* + * Sort the restored array to ensure it's in xidComparator order. Old + * snapshot files may have been written with unsorted arrays. + */ + qsort(builder->committed.xip, builder->committed.xcnt, + sizeof(TransactionId), xidComparator); } ondisk.builder.committed.xip = NULL; diff --git a/src/include/replication/snapbuild_internal.h b/src/include/replication/snapbuild_internal.h index 3b915dc8793..164160e5e5e 100644 --- a/src/include/replication/snapbuild_internal.h +++ b/src/include/replication/snapbuild_internal.h @@ -115,16 +115,8 @@ struct SnapBuild /* * Array of committed transactions that have modified the catalog. * - * As this array is frequently modified we do *not* keep it in - * xidComparator order. Instead we sort the array when building & - * distributing a snapshot. - * - * TODO: It's unclear whether that reasoning has much merit. Every - * time we add something here after becoming consistent will also - * require distributing a snapshot. Storing them sorted would - * potentially also make it easier to purge (but more complicated wrt - * wraparound?). Should be improved if sorting while building the - * snapshot shows up in profiles. + * Maintained in sorted order (by raw uint32 value) to allow efficient + * snapshot building without repeated sorting overhead. */ TransactionId *xip; } committed; -- 2.51.0