From cae8ef31feb4c8dc8beb3c7422548955455a35c9 Mon Sep 17 00:00:00 2001 From: "suyu.cmj" Date: Fri, 7 Jul 2023 08:44:49 +0000 Subject: [PATCH] Fix the bug of a 2PC transaction maybe recovered twice During recovery, a two-phase transaction should be restored either from the disk file or from the WAL. However, a two-phase transaction maybe restored from both way if we crashed during doing a checkpoint. Considering that the data on disk file may not be reliable, so in this case, we only store the transaction data recorded in the WAL. --- src/backend/access/transam/twophase.c | 61 +++++++++++++++++++++------ 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index d145836a79..629b79401c 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -2491,6 +2491,8 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, char *bufptr; const char *gid; GlobalTransaction gxact; + bool addnewgxact = true; + int i; Assert(LWLockHeldByMeInMode(TwoPhaseStateLock, LW_EXCLUSIVE)); Assert(RecoveryInProgress()); @@ -2509,15 +2511,54 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, * that it got added in the redo phase */ + /* + * During recovery, the two-phase data can be added in two ways: + * 1) restored from disk file when its xid < checkPoint.nextxid, + * 2) restored from the WAL when its prepare_start_lsn > checkPoint.redo, + * A two-phase transaction may satisfy above two conditions if we + * crashed during doing a checkpoint. Considering that the data + * on disk file may not be reliable, so in this case, we only store + * the transaction data recorded in the WAL. + */ + for (i = 0; i < TwoPhaseState->numPrepXacts; i++) + { + GlobalTransaction curxact = TwoPhaseState->prepXacts[i]; + + if (curxact->xid == hdr->xid) + { + if (curxact->ondisk && !XLogRecPtrIsInvalid(start_lsn)) + { + gxact = curxact; + addnewgxact = false; + ereport(WARNING, + (errmsg("found duplicate two-phase transaction:%u, store data from the WAL", + hdr->xid))); + break; + } + else + ereport(ERROR, + (errmsg("found unexpected duplicate two-phase transaction:%u, check for data correctness.", + hdr->xid))); + } + } + /* Get a free gxact from the freelist */ - if (TwoPhaseState->freeGXacts == NULL) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("maximum number of prepared transactions reached"), - errhint("Increase max_prepared_transactions (currently %d).", - max_prepared_xacts))); - gxact = TwoPhaseState->freeGXacts; - TwoPhaseState->freeGXacts = gxact->next; + if (addnewgxact) + { + if (TwoPhaseState->freeGXacts == NULL) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("maximum number of prepared transactions reached"), + errhint("Increase max_prepared_transactions (currently %d).", + max_prepared_xacts))); + + gxact = TwoPhaseState->freeGXacts; + TwoPhaseState->freeGXacts = gxact->next; + + /* And insert it into the active array */ + Assert(TwoPhaseState->numPrepXacts < max_prepared_xacts); + TwoPhaseState->prepXacts[TwoPhaseState->numPrepXacts++] = gxact; + } gxact->prepared_at = hdr->prepared_at; gxact->prepare_start_lsn = start_lsn; @@ -2530,10 +2571,6 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, gxact->inredo = true; /* yes, added in redo */ strcpy(gxact->gid, gid); - /* And insert it into the active array */ - Assert(TwoPhaseState->numPrepXacts < max_prepared_xacts); - TwoPhaseState->prepXacts[TwoPhaseState->numPrepXacts++] = gxact; - if (origin_id != InvalidRepOriginId) { /* recover apply progress */ -- 2.19.1.6.gb485710b