diff --git a/doc/src/sgml/mvcc.sgml b/doc/src/sgml/mvcc.sgml index 020bbcd..e7d6c36 100644 *** a/doc/src/sgml/mvcc.sgml --- b/doc/src/sgml/mvcc.sgml *************** COMMIT; *** 386,407 **** behave the same as SELECT in terms of searching for target rows: they will only find target rows that were committed as of the transaction start time. However, such a ! target ! row might have already been updated (or deleted or locked) by ! another concurrent transaction by the time it is found. In this case, the ! serializable transaction will wait for the first updating transaction to commit or ! roll back (if it is still in progress). If the first updater rolls back, ! then its effects are negated and the serializable transaction can proceed ! with updating the originally found row. But if the first updater commits ! (and actually updated or deleted the row, not just locked it) ! then the serializable transaction will be rolled back with the message ERROR: could not serialize access due to concurrent update ! because a serializable transaction cannot modify or lock rows changed by ! other transactions after the serializable transaction began. --- 386,407 ---- behave the same as SELECT in terms of searching for target rows: they will only find target rows that were committed as of the transaction start time. However, such a ! target row might have already been subject to a concurrent ! UPDATE, DELETE, SELECT ! FOR UPDATE, or SELECT FOR SHARE. In this case, ! the serializable transaction will wait for the other transaction to commit ! or roll back (if it is still in progress). If it rolls back then its effects ! are negated and the serializable transaction can proceed with modifying ! or locking the originally found row. If it commits, and the two commands ! conflict according to , ! the serializable transaction is rolled back with the message ERROR: could not serialize access due to concurrent update ! since serializable transaction cannot simply proceed with the newer row ! version like read committed ones do. *************** ERROR: could not serialize access due t *** 418,423 **** --- 418,463 ---- transactions will never have serialization conflicts. + + Serialization Conflicts + + + + + + + Serializable Transaction + Concurrent Transaction + + + UPDATE, DELETE + SELECT FOR UPDATE + SELECT FOR SHARE + + + + + UPDATE, DELETE + X + X + X + + + SELECT FOR UPDATE + X + X + X + + + SELECT FOR SHARE + X + + + + + +
+ The Serializable mode provides a rigorous guarantee that each transaction sees a wholly consistent view of the database. However, *************** SELECT SUM(value) FROM mytab WHERE class *** 921,926 **** --- 961,974 ---- + Serializable transactions are affected by concurrent + SELECT FOR SHARE and SELECT FOR UPDATE + for longer than those locks are actually held, and may be aborted + when trying to obtain a conflicting lock. For details, + see + + + PostgreSQL doesn't remember any information about modified rows in memory, so there is no limit on the number of rows locked at one time. However, locking a row diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 4020906..a397bad 100644 *** a/src/backend/access/heap/heapam.c --- b/src/backend/access/heap/heapam.c *************** simple_heap_insert(Relation relation, He *** 2033,2055 **** * update_xmax - output parameter, used only for failure case (see below) * cid - delete command ID (used for visibility test, and stored into * cmax if successful) - * crosscheck - if not InvalidSnapshot, also check tuple against this * wait - true if should wait for any conflicting update to commit/abort * * Normal, successful return value is HeapTupleMayBeUpdated, which * actually means we did delete it. Failure return codes are * HeapTupleSelfUpdated, HeapTupleUpdated, or HeapTupleBeingUpdated * (the last only possible if wait == false). * ! * In the failure cases, the routine returns the tuple's t_ctid and t_xmax. ! * If t_ctid is the same as tid, the tuple was deleted; if different, the ! * tuple was updated, and t_ctid is the location of the replacement tuple. ! * (t_xmax is needed to verify that the replacement tuple matches.) */ HTSU_Result heap_delete(Relation relation, ItemPointer tid, ItemPointer ctid, TransactionId *update_xmax, ! CommandId cid, Snapshot crosscheck, bool wait) { HTSU_Result result; TransactionId xid = GetCurrentTransactionId(); --- 2033,2064 ---- * update_xmax - output parameter, used only for failure case (see below) * cid - delete command ID (used for visibility test, and stored into * cmax if successful) * wait - true if should wait for any conflicting update to commit/abort + * lockcheck_snapshot - if not InvalidSnapshot, report the tuple as updated if it + * was once locked by a transaction not visible under this snapshot * * Normal, successful return value is HeapTupleMayBeUpdated, which * actually means we did delete it. Failure return codes are * HeapTupleSelfUpdated, HeapTupleUpdated, or HeapTupleBeingUpdated * (the last only possible if wait == false). * ! * In the failure cases, the routine returns the tuple's t_ctid and t_max ! * in ctid and update_xmax. ! * If ctid is the same as t_self and update_xmax a valid transaction id, ! * the tuple was deleted. ! * If ctid differes from t_self, the tuple was updated, ctid is the location ! * of the replacement tupe and update_xmax is the updating transaction's xid. ! * update_xmax must in this case be used to verify that the replacement tuple ! * matched. ! * Otherwise, if ctid is the same as t_self and update_xmax is ! * InvalidTranactionId, the tuple was neither replaced nor deleted, but locked ! * by a transaction invisible to lockcheck_snapshot. This case can thus only ! * arise if lockcheck_snapshot is a valid snapshot. */ HTSU_Result heap_delete(Relation relation, ItemPointer tid, ItemPointer ctid, TransactionId *update_xmax, ! CommandId cid, bool wait, Snapshot lockcheck_snapshot) { HTSU_Result result; TransactionId xid = GetCurrentTransactionId(); *************** l1: *** 2171,2181 **** result = HeapTupleUpdated; } ! if (crosscheck != InvalidSnapshot && result == HeapTupleMayBeUpdated) { ! /* Perform additional check for transaction-snapshot mode RI updates */ ! if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer)) ! result = HeapTupleUpdated; } if (result != HeapTupleMayBeUpdated) --- 2180,2201 ---- result = HeapTupleUpdated; } ! /* If the tuple was updated, we report the updating transaction's ! * xid in update_xmax. Otherwise, we must check that it wasn't ! * locked by a transaction invisible to lockcheck_snapshot before ! * continuing. ! */ ! if (result != HeapTupleMayBeUpdated) { ! Assert(!(tp.t_data->t_infomask & HEAP_XMAX_INVALID)); ! *update_xmax = HeapTupleHeaderGetXmax(tp.t_data); ! } ! else if ((lockcheck_snapshot != InvalidSnapshot) && ! !HeapSatisfiesLockersVisible(tp.t_data, lockcheck_snapshot)) ! { ! Assert((tp.t_data->t_infomask & HEAP_IS_LOCKED)); ! *update_xmax = InvalidTransactionId; ! result = HeapTupleUpdated; } if (result != HeapTupleMayBeUpdated) *************** l1: *** 2183,2191 **** Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated || result == HeapTupleBeingUpdated); - Assert(!(tp.t_data->t_infomask & HEAP_XMAX_INVALID)); *ctid = tp.t_data->t_ctid; - *update_xmax = HeapTupleHeaderGetXmax(tp.t_data); UnlockReleaseBuffer(buffer); if (have_tuple_lock) UnlockTuple(relation, &(tp.t_self), ExclusiveLock); --- 2203,2209 ---- *************** simple_heap_delete(Relation relation, It *** 2313,2320 **** result = heap_delete(relation, tid, &update_ctid, &update_xmax, ! GetCurrentCommandId(true), InvalidSnapshot, ! true /* wait for commit */ ); switch (result) { case HeapTupleSelfUpdated: --- 2331,2339 ---- result = heap_delete(relation, tid, &update_ctid, &update_xmax, ! GetCurrentCommandId(true), ! true /* wait for commit */ , ! InvalidSnapshot); switch (result) { case HeapTupleSelfUpdated: *************** simple_heap_delete(Relation relation, It *** 2349,2355 **** * update_xmax - output parameter, used only for failure case (see below) * cid - update command ID (used for visibility test, and stored into * cmax/cmin if successful) ! * crosscheck - if not InvalidSnapshot, also check old tuple against this * wait - true if should wait for any conflicting update to commit/abort * * Normal, successful return value is HeapTupleMayBeUpdated, which --- 2368,2375 ---- * update_xmax - output parameter, used only for failure case (see below) * cid - update command ID (used for visibility test, and stored into * cmax/cmin if successful) ! * lockcheck_snapshot - if not InvalidSnapshot, report the tuple as updated ! * if it was once locked by a transaction not visible under this snapshot * wait - true if should wait for any conflicting update to commit/abort * * Normal, successful return value is HeapTupleMayBeUpdated, which *************** simple_heap_delete(Relation relation, It *** 2363,2377 **** * update was done. However, any TOAST changes in the new tuple's * data are not reflected into *newtup. * ! * In the failure cases, the routine returns the tuple's t_ctid and t_xmax. ! * If t_ctid is the same as otid, the tuple was deleted; if different, the ! * tuple was updated, and t_ctid is the location of the replacement tuple. ! * (t_xmax is needed to verify that the replacement tuple matches.) */ HTSU_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, ItemPointer ctid, TransactionId *update_xmax, ! CommandId cid, Snapshot crosscheck, bool wait) { HTSU_Result result; TransactionId xid = GetCurrentTransactionId(); --- 2383,2405 ---- * update was done. However, any TOAST changes in the new tuple's * data are not reflected into *newtup. * ! * In the failure cases, the routine returns the tuple's t_ctid and t_max ! * in ctid and update_xmax. ! * If ctid is the same as t_self and update_xmax a valid transaction id, ! * the tuple was deleted. ! * If ctid differes from t_self, the tuple was updated, ctid is the location ! * of the replacement tupe and update_xmax is the updating transaction's xid. ! * update_xmax must in this case be used to verify that the replacement tuple ! * matched. ! * Otherwise, if ctid is the same as t_self and update_xmax is ! * InvalidTranactionId, the tuple was neither replaced nor deleted, but locked ! * by a transaction invisible to lockcheck_snapshot. This case can thus only ! * arise if lockcheck_snapshot is a valid snapshot. */ HTSU_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, ItemPointer ctid, TransactionId *update_xmax, ! CommandId cid, bool wait, Snapshot lockcheck_snapshot) { HTSU_Result result; TransactionId xid = GetCurrentTransactionId(); *************** l2: *** 2523,2533 **** result = HeapTupleUpdated; } ! if (crosscheck != InvalidSnapshot && result == HeapTupleMayBeUpdated) { ! /* Perform additional check for transaction-snapshot mode RI updates */ ! if (!HeapTupleSatisfiesVisibility(&oldtup, crosscheck, buffer)) ! result = HeapTupleUpdated; } if (result != HeapTupleMayBeUpdated) --- 2551,2572 ---- result = HeapTupleUpdated; } ! /* If the tuple was updated, we report the updating transaction's ! * xid in update_xmax. Otherwise, we must check that it wasn't ! * locked by a transaction invisible to lockcheck_snapshot before ! * continuing. ! */ ! if (result != HeapTupleMayBeUpdated) { ! Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)); ! *update_xmax = HeapTupleHeaderGetXmax(oldtup.t_data); ! } ! else if ((lockcheck_snapshot != InvalidSnapshot) && ! !HeapSatisfiesLockersVisible(oldtup.t_data, lockcheck_snapshot)) ! { ! Assert((oldtup.t_data->t_infomask & HEAP_IS_LOCKED)); ! *update_xmax = InvalidTransactionId; ! result = HeapTupleUpdated; } if (result != HeapTupleMayBeUpdated) *************** l2: *** 2535,2543 **** Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated || result == HeapTupleBeingUpdated); - Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)); *ctid = oldtup.t_data->t_ctid; - *update_xmax = HeapTupleHeaderGetXmax(oldtup.t_data); UnlockReleaseBuffer(buffer); if (have_tuple_lock) UnlockTuple(relation, &(oldtup.t_self), ExclusiveLock); --- 2574,2580 ---- *************** simple_heap_update(Relation relation, It *** 2978,2985 **** result = heap_update(relation, otid, tup, &update_ctid, &update_xmax, ! GetCurrentCommandId(true), InvalidSnapshot, ! true /* wait for commit */ ); switch (result) { case HeapTupleSelfUpdated: --- 3015,3022 ---- result = heap_update(relation, otid, tup, &update_ctid, &update_xmax, ! GetCurrentCommandId(true), ! true /* wait for commit */, InvalidSnapshot); switch (result) { case HeapTupleSelfUpdated: *************** simple_heap_update(Relation relation, It *** 3013,3018 **** --- 3050,3059 ---- * tuple's cmax if lock is successful) * mode: indicates if shared or exclusive tuple lock is desired * nowait: if true, ereport rather than blocking if lock not available + * lockcheck_snapshot: if not InvalidSnapshot, report the tuple as updated if it + * was once locked by a transaction not visible under this snapshot. + * Note that HeapTupleUpdated is reported even for non-conflicting locks - + * the caller is expected to deal with that. * * Output parameters: * *tuple: all fields filled in *************** simple_heap_update(Relation relation, It *** 3025,3035 **** * HeapTupleSelfUpdated: lock failed because tuple updated by self * HeapTupleUpdated: lock failed because tuple updated by other xact * ! * In the failure cases, the routine returns the tuple's t_ctid and t_xmax. ! * If t_ctid is the same as t_self, the tuple was deleted; if different, the ! * tuple was updated, and t_ctid is the location of the replacement tuple. ! * (t_xmax is needed to verify that the replacement tuple matches.) ! * * * NOTES: because the shared-memory lock table is of finite size, but users * could reasonably want to lock large numbers of tuples, we do not rely on --- 3066,3083 ---- * HeapTupleSelfUpdated: lock failed because tuple updated by self * HeapTupleUpdated: lock failed because tuple updated by other xact * ! * In the failure cases, the routine returns the tuple's t_ctid and t_max ! * in ctid and update_xmax. ! * If ctid is the same as t_self and update_xmax a valid transaction id, ! * the tuple was deleted. ! * If ctid differes from t_self, the tuple was updated, ctid is the location ! * of the replacement tupe and update_xmax is the updating transaction's xid. ! * update_xmax must in this case be used to verify that the replacement tuple ! * matched. ! * Otherwise, if ctid is the same as t_self and update_xmax is ! * InvalidTranactionId, the tuple was neither replaced nor deleted, but locked ! * by a transaction invisible to lockcheck_snapshot. This case can thus only ! * arise if lockcheck_snapshot is a valid snapshot. * * NOTES: because the shared-memory lock table is of finite size, but users * could reasonably want to lock large numbers of tuples, we do not rely on *************** simple_heap_update(Relation relation, It *** 3066,3072 **** HTSU_Result heap_lock_tuple(Relation relation, HeapTuple tuple, Buffer *buffer, ItemPointer ctid, TransactionId *update_xmax, ! CommandId cid, LockTupleMode mode, bool nowait) { HTSU_Result result; ItemPointer tid = &(tuple->t_self); --- 3114,3121 ---- HTSU_Result heap_lock_tuple(Relation relation, HeapTuple tuple, Buffer *buffer, ItemPointer ctid, TransactionId *update_xmax, ! CommandId cid, LockTupleMode mode, bool nowait, ! Snapshot lockcheck_snapshot) { HTSU_Result result; ItemPointer tid = &(tuple->t_self); *************** l3: *** 3247,3258 **** result = HeapTupleUpdated; } if (result != HeapTupleMayBeUpdated) { Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated); - Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID)); *ctid = tuple->t_data->t_ctid; - *update_xmax = HeapTupleHeaderGetXmax(tuple->t_data); LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); if (have_tuple_lock) UnlockTuple(relation, tid, tuple_lock_type); --- 3296,3323 ---- result = HeapTupleUpdated; } + /* If the tuple was updated, we report the updating transaction's + * xid in update_xmax. Otherwise, we must check that it wasn't + * locked by a transaction invisible to lockcheck_snapshot before + * continuing. + */ + if (result != HeapTupleMayBeUpdated) + { + Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID)); + *update_xmax = HeapTupleHeaderGetXmax(tuple->t_data); + } + else if ((lockcheck_snapshot != InvalidSnapshot) && + !HeapSatisfiesLockersVisible(tuple->t_data, lockcheck_snapshot)) + { + Assert((tuple->t_data->t_infomask & HEAP_IS_LOCKED)); + *update_xmax = InvalidTransactionId; + result = HeapTupleUpdated; + } + if (result != HeapTupleMayBeUpdated) { Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated); *ctid = tuple->t_data->t_ctid; LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); if (have_tuple_lock) UnlockTuple(relation, tid, tuple_lock_type); diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 9422ef3..3395ea6 100644 *** a/src/backend/access/transam/multixact.c --- b/src/backend/access/transam/multixact.c *************** static MemoryContext MXactContext = NULL *** 211,217 **** #endif /* internal MultiXactId management */ - static void MultiXactIdSetOldestVisible(void); static MultiXactId CreateMultiXactId(int nxids, TransactionId *xids); static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, int nxids, TransactionId *xids); --- 211,216 ---- *************** MultiXactIdSetOldestMember(void) *** 531,537 **** * there is no live transaction, now or later, that can be a member of any * MultiXactId older than the OldestVisibleMXactId we compute here. */ ! static void MultiXactIdSetOldestVisible(void) { if (!MultiXactIdIsValid(OldestVisibleMXactId[MyBackendId])) --- 530,536 ---- * there is no live transaction, now or later, that can be a member of any * MultiXactId older than the OldestVisibleMXactId we compute here. */ ! void MultiXactIdSetOldestVisible(void) { if (!MultiXactIdIsValid(OldestVisibleMXactId[MyBackendId])) diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 7b8bee8..be28f3e 100644 *** a/src/backend/commands/copy.c --- b/src/backend/commands/copy.c *************** DoCopy(const CopyStmt *stmt, const char *** 1091,1097 **** /* Create a QueryDesc requesting no output */ cstate->queryDesc = CreateQueryDesc(plan, queryString, GetActiveSnapshot(), - InvalidSnapshot, dest, NULL, 0); /* --- 1091,1096 ---- diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 99f5c29..9ccd9fc 100644 *** a/src/backend/commands/explain.c --- b/src/backend/commands/explain.c *************** ExplainOnePlan(PlannedStmt *plannedstmt, *** 377,383 **** /* Create a QueryDesc requesting no output */ queryDesc = CreateQueryDesc(plannedstmt, queryString, ! GetActiveSnapshot(), InvalidSnapshot, None_Receiver, params, instrument_option); INSTR_TIME_SET_CURRENT(starttime); --- 377,383 ---- /* Create a QueryDesc requesting no output */ queryDesc = CreateQueryDesc(plannedstmt, queryString, ! GetActiveSnapshot(), None_Receiver, params, instrument_option); INSTR_TIME_SET_CURRENT(starttime); diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 8195392..eaf8e8b 100644 *** a/src/backend/commands/trigger.c --- b/src/backend/commands/trigger.c *************** GetTupleForTrigger(EState *estate, *** 2474,2486 **** /* * lock tuple for update */ ltrmark:; tuple.t_self = *tid; test = heap_lock_tuple(relation, &tuple, &buffer, &update_ctid, &update_xmax, estate->es_output_cid, ! LockTupleExclusive, false); switch (test) { case HeapTupleSelfUpdated: --- 2474,2492 ---- /* * lock tuple for update + * + * Serializable transactions pass their snapshot as the + * logcheck_snapshot. This lets heap_lock_tuple report concurrently + * FOR SHARE or FOR UPDATE locked tuples as HeapTupleUpdated. */ ltrmark:; tuple.t_self = *tid; test = heap_lock_tuple(relation, &tuple, &buffer, &update_ctid, &update_xmax, estate->es_output_cid, ! LockTupleExclusive, false, ! IsolationUsesXactSnapshot() ? ! estate->es_snapshot : InvalidSnapshot); switch (test) { case HeapTupleSelfUpdated: diff --git a/src/backend/executor/README b/src/backend/executor/README index fec191d..ab70f40 100644 *** a/src/backend/executor/README --- b/src/backend/executor/README *************** is no explicit prohibition on SRFs in UP *** 195,197 **** --- 195,263 ---- that only the first result row of an SRF counts, because all subsequent rows will result in attempts to re-update an already updated target row. This is historical behavior and seems not worth changing.) + + Row Locks and Serializable Transactions + --------------------------------------- + + In READ COMMITTED mode, a transaction who encounters a locked row during + an UPDATE, DELETE, SELECT FOR UPDATE or SELECT FOR SHARE simply blocks + until the locking transaction commits or roll backs, and in the former case + then re-executes the statement using the new row version, as described above. + + In case of a per-transaction snapshot (as opposed to per query, i.e. isolation + level READ COMMITTED), this is not satisfactory. The RI triggers for example + take a FOR SHARE lock on a parent row before allowing a child row to be + inserted and verify that deleting a parent row leaves no orphaned children + behind before allowing the delete to occur. From within READ COMMITTED + transactions, blocking upon a delete or a parent row until all lockers have + finished is sufficient to guarantee that this check finds any potential + orphan, since the check will be executed with a up-to-date snapshot to which + the locking transaction's changes are visible. For isolation level REPEATABLE + READ and higher, however, is not true, since these will continue to use their + old snapshot and hence miss newly inserted rows. + + Such transactions therefore treat a FOR SHARE or FOR UPDATE lock on a tuple + the same as an actual update during UPDATE and SELECT FOR UPDATE. They are + thus aborted when trying to UPDATE or FOR UPDATE lock a row that was FOR SHARE + or FOR UPDATE locked by a concurrent transaction. + + Note that the behavior is not totally symmetric - locking a row FOR UPDATE + that was concurrently locked FOR SHARE abort with a serialization error, while + FOR SHARE locking a row that was concurrently locked FOR UPDATE will succeed + (after the locking transaction has committed or aborted, of course, and assume + that the FOR UPDATE wasn't followed by an actual UPDATE). This is hard to + avoid, since we cannot reliably distinguish between the different lock + strengths once the locking transaction has ended - another READ COMMITTED + transaction might immediately lock the row with a different strength. + + To restore symmetry, we'd thus need to either abort with a serialization error + upon *any* request to lock a row that was locked concurrently, independent + from the locks' strengths, or never abort in such a situation. Both behaviors + are undesirable, the former because of the increased number of serialization + errors it'd cause and the latter because obtaining a FOR UPDATE lock is + supposed to prevent a future update from failing. + + The guarantees provided by the different lock strengths are thus + + 1) Acquiring a FOR SHARE lock guarantees, for all modifications of the tuple + from within any transaction, that all statements run *after* the modifying + statement will see the locking transaction's changes. This includes any + statement run from within AFTER triggers fired by the modifying statement. For + isolation level REPEATABLE READ and above, even the modifying statement itself + will see the locking transaction's changes. + + 2) Acquiring a FOR UPDATE lock guarantees that any statement run after the FOR + UPDATE lock was granted will see the changes done by any concurrent + transaction that at one point obtained a FOR SHARE lock on the tuple. For + isolation level REPEATABLE READ and above, even the statement itself will see + these changes. + + With isolation level REPEATABLE READ and above, a serialization error is raised if one of these guarantees is violated. + + Implementation-wise, heap_update(), heap_delete() and heap_lock_tuple() take a + parameter lockcheck_snapshot(), and report a tuple as updated if it was locked + by a transaction not visible to lockcheck_snapshot(). The implementation + depends on the fact that if one transaction invisible to lockcheck_snapshop + locks the tuple, every future locker must still be running by the time the + first locker commits or aborts, and will thus surely be invisible to the + lockcheck_snapshot if the original locker was. diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index c4719f3..5bcbf54 100644 *** a/src/backend/executor/execMain.c --- b/src/backend/executor/execMain.c *************** standard_ExecutorStart(QueryDesc *queryD *** 183,189 **** * Copy other important information into the EState */ estate->es_snapshot = RegisterSnapshot(queryDesc->snapshot); - estate->es_crosscheck_snapshot = RegisterSnapshot(queryDesc->crosscheck_snapshot); estate->es_instrument = queryDesc->instrument_options; /* --- 183,188 ---- *************** standard_ExecutorEnd(QueryDesc *queryDes *** 348,354 **** /* do away with our snapshots */ UnregisterSnapshot(estate->es_snapshot); - UnregisterSnapshot(estate->es_crosscheck_snapshot); /* * Must switch out of context before destroying it --- 347,352 ---- *************** EvalPlanQualFetch(EState *estate, Relati *** 1563,1573 **** /* * This is a live tuple, so now try to lock it. */ test = heap_lock_tuple(relation, &tuple, &buffer, &update_ctid, &update_xmax, estate->es_output_cid, ! lockmode, false); /* We now have two pins on the buffer, get rid of one */ ReleaseBuffer(buffer); --- 1561,1578 ---- /* * This is a live tuple, so now try to lock it. + * + * Serializable transactions pass their snapshot as the logcheck_snapshot. + * This lets heap_lock_tuple report concurrently FOR SHARE or FOR UPDATE + * locked tuples as HeapTupleUpdated. */ + Assert(!IsolationUsesXactSnapshot() || (estate->es_snapshot != InvalidSnapshot)); test = heap_lock_tuple(relation, &tuple, &buffer, &update_ctid, &update_xmax, estate->es_output_cid, ! lockmode, false, ! IsolationUsesXactSnapshot() ? estate->es_snapshot : ! InvalidSnapshot); /* We now have two pins on the buffer, get rid of one */ ReleaseBuffer(buffer); *************** EvalPlanQualStart(EPQState *epqstate, ES *** 1936,1942 **** */ estate->es_direction = ForwardScanDirection; estate->es_snapshot = parentestate->es_snapshot; - estate->es_crosscheck_snapshot = parentestate->es_crosscheck_snapshot; estate->es_range_table = parentestate->es_range_table; estate->es_plannedstmt = parentestate->es_plannedstmt; estate->es_junkFilter = parentestate->es_junkFilter; --- 1941,1946 ---- diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 6ad0f1e..cef3e86 100644 *** a/src/backend/executor/execUtils.c --- b/src/backend/executor/execUtils.c *************** CreateExecutorState(void) *** 109,115 **** */ estate->es_direction = ForwardScanDirection; estate->es_snapshot = SnapshotNow; - estate->es_crosscheck_snapshot = InvalidSnapshot; /* no crosscheck */ estate->es_range_table = NIL; estate->es_plannedstmt = NULL; --- 109,114 ---- diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c index 11f92ad..6a6f23d 100644 *** a/src/backend/executor/functions.c --- b/src/backend/executor/functions.c *************** postquel_start(execution_state *es, SQLF *** 415,421 **** if (IsA(es->stmt, PlannedStmt)) es->qd = CreateQueryDesc((PlannedStmt *) es->stmt, fcache->src, ! snapshot, InvalidSnapshot, dest, fcache->paramLI, 0); else --- 415,421 ---- if (IsA(es->stmt, PlannedStmt)) es->qd = CreateQueryDesc((PlannedStmt *) es->stmt, fcache->src, ! snapshot, dest, fcache->paramLI, 0); else diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c index 440a601..330e5f0 100644 *** a/src/backend/executor/nodeLockRows.c --- b/src/backend/executor/nodeLockRows.c *************** *** 22,27 **** --- 22,28 ---- #include "postgres.h" #include "access/xact.h" + #include "access/transam.h" #include "executor/executor.h" #include "executor/nodeLockRows.h" #include "storage/bufmgr.h" *************** lnext: *** 71,76 **** --- 72,78 ---- ItemPointerData update_ctid; TransactionId update_xmax; LockTupleMode lockmode; + Snapshot lockcheck_snapshot = InvalidSnapshot; HTSU_Result test; HeapTuple copyTuple; *************** lnext: *** 110,123 **** /* okay, try to lock the tuple */ if (erm->markType == ROW_MARK_EXCLUSIVE) lockmode = LockTupleExclusive; else lockmode = LockTupleShared; test = heap_lock_tuple(erm->relation, &tuple, &buffer, &update_ctid, &update_xmax, estate->es_output_cid, ! lockmode, erm->noWait); ReleaseBuffer(buffer); switch (test) { --- 112,149 ---- /* okay, try to lock the tuple */ if (erm->markType == ROW_MARK_EXCLUSIVE) + { lockmode = LockTupleExclusive; + + /* + * Transactions using a per-transaction snapshot pass + * that snapshot as lockcheck_snapshot. This causes tuples + * to be reported as HeapTupleUpdated also if they were + * merely locked (shared or exclusively) at some point by + * a concurrent transaction. See src/backend/executor/README + * for details. + */ + if (IsolationUsesXactSnapshot()) + lockcheck_snapshot = estate->es_snapshot; + } else + { lockmode = LockTupleShared; + + /* + * We never pass a lockcheck_snapshot to heap_lock_tuple() + * in this case. Thus, a transaction using a per-transaction + * snapshot may share-lock a row previously locked exclusively + * by a concurrent transaction. See src/backend/executor/README + * for why this is hard to avoid and OK. + */ + } test = heap_lock_tuple(erm->relation, &tuple, &buffer, &update_ctid, &update_xmax, estate->es_output_cid, ! lockmode, erm->noWait, ! lockcheck_snapshot); ReleaseBuffer(buffer); switch (test) { *************** lnext: *** 134,139 **** --- 160,171 ---- ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); + /* In case of a per-query snapshot, we didn't pass + * a lockcheck_snapshot to heap_update() and can + * thus expect a tuple to have a valid xmax here + */ + Assert(TransactionIdIsValid(update_xmax)); + if (ItemPointerEquals(&update_ctid, &tuple.t_self)) { diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 541adaf..4e32c18 100644 *** a/src/backend/executor/nodeModifyTable.c --- b/src/backend/executor/nodeModifyTable.c *************** *** 38,43 **** --- 38,44 ---- #include "postgres.h" #include "access/xact.h" + #include "access/transam.h" #include "commands/trigger.h" #include "executor/executor.h" #include "executor/nodeModifyTable.h" *************** ExecDelete(ItemPointer tupleid, *** 367,384 **** /* * delete the tuple * ! * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check ! * that the row to be deleted is visible to that snapshot, and throw a ! * can't-serialize error if not. This is a special-case behavior ! * needed for referential integrity updates in transaction-snapshot ! * mode transactions. */ ldelete:; result = heap_delete(resultRelationDesc, tupleid, &update_ctid, &update_xmax, estate->es_output_cid, ! estate->es_crosscheck_snapshot, ! true /* wait for commit */ ); switch (result) { case HeapTupleSelfUpdated: --- 368,387 ---- /* * delete the tuple * ! * Transactions using a per-transaction snapshot pass ! * that snapshot as lockcheck_snapshot. This causes tuples ! * to be reported as HeapTupleUpdated also if they were ! * merely locked (shared or exclusively) at some point by ! * a concurrent transaction. See src/backend/executor/README ! * for details. */ ldelete:; result = heap_delete(resultRelationDesc, tupleid, &update_ctid, &update_xmax, estate->es_output_cid, ! true, /* wait for commit */ ! IsolationUsesXactSnapshot() ? estate->es_snapshot : ! InvalidSnapshot); switch (result) { case HeapTupleSelfUpdated: *************** ldelete:; *** 393,398 **** --- 396,406 ---- ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); + /* In case of a per-query snapshot, we didn't pass + * a lockcheck_snapshot to heap_update() and can + * thus expect a tuple to have a valid xmax here + */ + Assert(TransactionIdIsValid(update_xmax)); if (!ItemPointerEquals(tupleid, &update_ctid)) { TupleTableSlot *epqslot; *************** lreplace:; *** 615,631 **** /* * replace the heap tuple * ! * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check ! * that the row to be updated is visible to that snapshot, and throw a ! * can't-serialize error if not. This is a special-case behavior ! * needed for referential integrity updates in transaction-snapshot ! * mode transactions. */ result = heap_update(resultRelationDesc, tupleid, tuple, &update_ctid, &update_xmax, estate->es_output_cid, ! estate->es_crosscheck_snapshot, ! true /* wait for commit */ ); switch (result) { case HeapTupleSelfUpdated: --- 623,641 ---- /* * replace the heap tuple * ! * Transactions using a per-transaction snapshot pass ! * that snapshot as lockcheck_snapshot. This causes tuples ! * to be reported as HeapTupleUpdated also if they were ! * merely locked (shared or exclusively) at some point by ! * a concurrent transaction. See src/backend/executor/README ! * for details. */ result = heap_update(resultRelationDesc, tupleid, tuple, &update_ctid, &update_xmax, estate->es_output_cid, ! true, /* wait for commit */ ! IsolationUsesXactSnapshot() ? estate->es_snapshot : ! InvalidSnapshot); switch (result) { case HeapTupleSelfUpdated: *************** lreplace:; *** 640,645 **** --- 650,660 ---- ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); + /* In case of a per-query snapshot, we didn't pass + * a lockcheck_snapshot to heap_update() and can + * thus expect a tuple to have a valid xmax here + */ + Assert(TransactionIdIsValid(update_xmax)); if (!ItemPointerEquals(tupleid, &update_ctid)) { TupleTableSlot *epqslot; diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c index c579017..aa631f7 100644 *** a/src/backend/executor/spi.c --- b/src/backend/executor/spi.c *************** static void _SPI_prepare_plan(const char *** 51,57 **** ParamListInfo boundParams); static int _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI, ! Snapshot snapshot, Snapshot crosscheck_snapshot, bool read_only, bool fire_triggers, long tcount); static ParamListInfo _SPI_convert_params(int nargs, Oid *argtypes, --- 51,57 ---- ParamListInfo boundParams); static int _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI, ! Snapshot snapshot, bool read_only, bool fire_triggers, long tcount); static ParamListInfo _SPI_convert_params(int nargs, Oid *argtypes, *************** SPI_execute(const char *src, bool read_o *** 357,363 **** _SPI_prepare_plan(src, &plan, NULL); res = _SPI_execute_plan(&plan, NULL, ! InvalidSnapshot, InvalidSnapshot, read_only, true, tcount); _SPI_end_call(true); --- 357,363 ---- _SPI_prepare_plan(src, &plan, NULL); res = _SPI_execute_plan(&plan, NULL, ! InvalidSnapshot, read_only, true, tcount); _SPI_end_call(true); *************** SPI_execute_plan(SPIPlanPtr plan, Datum *** 392,398 **** _SPI_convert_params(plan->nargs, plan->argtypes, Values, Nulls, 0), ! InvalidSnapshot, InvalidSnapshot, read_only, true, tcount); _SPI_end_call(true); --- 392,398 ---- _SPI_convert_params(plan->nargs, plan->argtypes, Values, Nulls, 0), ! InvalidSnapshot, read_only, true, tcount); _SPI_end_call(true); *************** SPI_execute_plan_with_paramlist(SPIPlanP *** 421,427 **** return res; res = _SPI_execute_plan(plan, params, ! InvalidSnapshot, InvalidSnapshot, read_only, true, tcount); _SPI_end_call(true); --- 421,427 ---- return res; res = _SPI_execute_plan(plan, params, ! InvalidSnapshot, read_only, true, tcount); _SPI_end_call(true); *************** SPI_execute_plan_with_paramlist(SPIPlanP *** 444,450 **** int SPI_execute_snapshot(SPIPlanPtr plan, Datum *Values, const char *Nulls, ! Snapshot snapshot, Snapshot crosscheck_snapshot, bool read_only, bool fire_triggers, long tcount) { int res; --- 444,450 ---- int SPI_execute_snapshot(SPIPlanPtr plan, Datum *Values, const char *Nulls, ! Snapshot snapshot, bool read_only, bool fire_triggers, long tcount) { int res; *************** SPI_execute_snapshot(SPIPlanPtr plan, *** 463,469 **** _SPI_convert_params(plan->nargs, plan->argtypes, Values, Nulls, 0), ! snapshot, crosscheck_snapshot, read_only, fire_triggers, tcount); _SPI_end_call(true); --- 463,469 ---- _SPI_convert_params(plan->nargs, plan->argtypes, Values, Nulls, 0), ! snapshot, read_only, fire_triggers, tcount); _SPI_end_call(true); *************** SPI_execute_with_args(const char *src, *** 516,522 **** /* We don't need to copy the plan since it will be thrown away anyway */ res = _SPI_execute_plan(&plan, paramLI, ! InvalidSnapshot, InvalidSnapshot, read_only, true, tcount); _SPI_end_call(true); --- 516,522 ---- /* We don't need to copy the plan since it will be thrown away anyway */ res = _SPI_execute_plan(&plan, paramLI, ! InvalidSnapshot, read_only, true, tcount); _SPI_end_call(true); *************** _SPI_prepare_plan(const char *src, SPIPl *** 1752,1758 **** * * snapshot: query snapshot to use, or InvalidSnapshot for the normal * behavior of taking a new snapshot for each query. - * crosscheck_snapshot: for RI use, all others pass InvalidSnapshot * read_only: TRUE for read-only execution (no CommandCounterIncrement) * fire_triggers: TRUE to fire AFTER triggers at end of query (normal case); * FALSE means any AFTER triggers are postponed to end of outer query --- 1752,1757 ---- *************** _SPI_prepare_plan(const char *src, SPIPl *** 1760,1766 **** */ static int _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI, ! Snapshot snapshot, Snapshot crosscheck_snapshot, bool read_only, bool fire_triggers, long tcount) { int my_res = 0; --- 1759,1765 ---- */ static int _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI, ! Snapshot snapshot, bool read_only, bool fire_triggers, long tcount) { int my_res = 0; *************** _SPI_execute_plan(SPIPlanPtr plan, Param *** 1903,1909 **** qdesc = CreateQueryDesc((PlannedStmt *) stmt, plansource->query_string, ! snap, crosscheck_snapshot, dest, paramLI, 0); res = _SPI_pquery(qdesc, fire_triggers, --- 1902,1908 ---- qdesc = CreateQueryDesc((PlannedStmt *) stmt, plansource->query_string, ! snap, dest, paramLI, 0); res = _SPI_pquery(qdesc, fire_triggers, diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index 8eb02da..fdf206f 100644 *** a/src/backend/tcop/pquery.c --- b/src/backend/tcop/pquery.c *************** QueryDesc * *** 64,70 **** CreateQueryDesc(PlannedStmt *plannedstmt, const char *sourceText, Snapshot snapshot, - Snapshot crosscheck_snapshot, DestReceiver *dest, ParamListInfo params, int instrument_options) --- 64,69 ---- *************** CreateQueryDesc(PlannedStmt *plannedstmt *** 76,83 **** qd->utilitystmt = plannedstmt->utilityStmt; /* in case DECLARE CURSOR */ qd->sourceText = sourceText; /* query text */ qd->snapshot = RegisterSnapshot(snapshot); /* snapshot */ - /* RI check snapshot */ - qd->crosscheck_snapshot = RegisterSnapshot(crosscheck_snapshot); qd->dest = dest; /* output dest */ qd->params = params; /* parameter values passed into query */ qd->instrument_options = instrument_options; /* instrumentation --- 75,80 ---- *************** CreateUtilityQueryDesc(Node *utilitystmt *** 109,115 **** qd->utilitystmt = utilitystmt; /* utility command */ qd->sourceText = sourceText; /* query text */ qd->snapshot = RegisterSnapshot(snapshot); /* snapshot */ - qd->crosscheck_snapshot = InvalidSnapshot; /* RI check snapshot */ qd->dest = dest; /* output dest */ qd->params = params; /* parameter values passed into query */ qd->instrument_options = false; /* uninteresting for utilities */ --- 106,111 ---- *************** FreeQueryDesc(QueryDesc *qdesc) *** 134,140 **** /* forget our snapshots */ UnregisterSnapshot(qdesc->snapshot); - UnregisterSnapshot(qdesc->crosscheck_snapshot); /* Only the QueryDesc itself need be freed */ pfree(qdesc); --- 130,135 ---- *************** ProcessQuery(PlannedStmt *plan, *** 178,184 **** * Create the QueryDesc object */ queryDesc = CreateQueryDesc(plan, sourceText, ! GetActiveSnapshot(), InvalidSnapshot, dest, params, 0); /* --- 173,179 ---- * Create the QueryDesc object */ queryDesc = CreateQueryDesc(plan, sourceText, ! GetActiveSnapshot(), dest, params, 0); /* *************** PortalStart(Portal portal, ParamListInfo *** 514,520 **** queryDesc = CreateQueryDesc((PlannedStmt *) linitial(portal->stmts), portal->sourceText, GetActiveSnapshot(), - InvalidSnapshot, None_Receiver, params, 0); --- 509,514 ---- diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index 33a8935..ff51281 100644 *** a/src/backend/utils/adt/ri_triggers.c --- b/src/backend/utils/adt/ri_triggers.c *************** static SPIPlanPtr ri_PlanCheck(const cha *** 230,236 **** static bool ri_PerformCheck(RI_QueryKey *qkey, SPIPlanPtr qplan, Relation fk_rel, Relation pk_rel, HeapTuple old_tuple, HeapTuple new_tuple, - bool detectNewRows, int expect_OK, const char *constrname); static void ri_ExtractValues(RI_QueryKey *qkey, int key_idx, Relation rel, HeapTuple tuple, --- 230,235 ---- *************** RI_FKey_check(PG_FUNCTION_ARGS) *** 357,363 **** ri_PerformCheck(&qkey, qplan, fk_rel, pk_rel, NULL, NULL, - false, SPI_OK_SELECT, NameStr(riinfo.conname)); --- 356,361 ---- *************** RI_FKey_check(PG_FUNCTION_ARGS) *** 500,506 **** ri_PerformCheck(&qkey, qplan, fk_rel, pk_rel, NULL, new_row, - false, SPI_OK_SELECT, NameStr(riinfo.conname)); --- 498,503 ---- *************** ri_Check_Pk_Match(Relation pk_rel, Relat *** 661,667 **** result = ri_PerformCheck(&qkey, qplan, fk_rel, pk_rel, old_row, NULL, - true, /* treat like update */ SPI_OK_SELECT, NULL); if (SPI_finish() != SPI_OK_FINISH) --- 658,663 ---- *************** RI_FKey_noaction_del(PG_FUNCTION_ARGS) *** 818,824 **** ri_PerformCheck(&qkey, qplan, fk_rel, pk_rel, old_row, NULL, - true, /* must detect new rows */ SPI_OK_SELECT, NameStr(riinfo.conname)); --- 814,819 ---- *************** RI_FKey_noaction_upd(PG_FUNCTION_ARGS) *** 1006,1012 **** ri_PerformCheck(&qkey, qplan, fk_rel, pk_rel, old_row, NULL, - true, /* must detect new rows */ SPI_OK_SELECT, NameStr(riinfo.conname)); --- 1001,1006 ---- *************** RI_FKey_cascade_del(PG_FUNCTION_ARGS) *** 1168,1174 **** ri_PerformCheck(&qkey, qplan, fk_rel, pk_rel, old_row, NULL, - true, /* must detect new rows */ SPI_OK_DELETE, NameStr(riinfo.conname)); --- 1162,1167 ---- *************** RI_FKey_cascade_upd(PG_FUNCTION_ARGS) *** 1356,1362 **** ri_PerformCheck(&qkey, qplan, fk_rel, pk_rel, old_row, new_row, - true, /* must detect new rows */ SPI_OK_UPDATE, NameStr(riinfo.conname)); --- 1349,1354 ---- *************** RI_FKey_restrict_del(PG_FUNCTION_ARGS) *** 1527,1533 **** ri_PerformCheck(&qkey, qplan, fk_rel, pk_rel, old_row, NULL, - true, /* must detect new rows */ SPI_OK_SELECT, NameStr(riinfo.conname)); --- 1519,1524 ---- *************** RI_FKey_restrict_upd(PG_FUNCTION_ARGS) *** 1710,1716 **** ri_PerformCheck(&qkey, qplan, fk_rel, pk_rel, old_row, NULL, - true, /* must detect new rows */ SPI_OK_SELECT, NameStr(riinfo.conname)); --- 1701,1706 ---- *************** RI_FKey_setnull_del(PG_FUNCTION_ARGS) *** 1881,1887 **** ri_PerformCheck(&qkey, qplan, fk_rel, pk_rel, old_row, NULL, - true, /* must detect new rows */ SPI_OK_UPDATE, NameStr(riinfo.conname)); --- 1871,1876 ---- *************** RI_FKey_setnull_upd(PG_FUNCTION_ARGS) *** 2097,2103 **** ri_PerformCheck(&qkey, qplan, fk_rel, pk_rel, old_row, NULL, - true, /* must detect new rows */ SPI_OK_UPDATE, NameStr(riinfo.conname)); --- 2086,2091 ---- *************** RI_FKey_setdefault_del(PG_FUNCTION_ARGS) *** 2269,2275 **** ri_PerformCheck(&qkey, qplan, fk_rel, pk_rel, old_row, NULL, - true, /* must detect new rows */ SPI_OK_UPDATE, NameStr(riinfo.conname)); --- 2257,2262 ---- *************** RI_FKey_setdefault_upd(PG_FUNCTION_ARGS) *** 2472,2478 **** ri_PerformCheck(&qkey, qplan, fk_rel, pk_rel, old_row, NULL, - true, /* must detect new rows */ SPI_OK_UPDATE, NameStr(riinfo.conname)); --- 2459,2464 ---- *************** RI_Initial_Check(Trigger *trigger, Relat *** 2792,2798 **** spi_result = SPI_execute_snapshot(qplan, NULL, NULL, GetLatestSnapshot(), - InvalidSnapshot, true, false, 1); /* Check result */ --- 2778,2783 ---- *************** static bool *** 3271,3284 **** ri_PerformCheck(RI_QueryKey *qkey, SPIPlanPtr qplan, Relation fk_rel, Relation pk_rel, HeapTuple old_tuple, HeapTuple new_tuple, - bool detectNewRows, int expect_OK, const char *constrname) { Relation query_rel, source_rel; int key_idx; - Snapshot test_snapshot; - Snapshot crosscheck_snapshot; int limit; int spi_result; Oid save_userid; --- 3256,3266 ---- *************** ri_PerformCheck(RI_QueryKey *qkey, SPIPl *** 3330,3359 **** } /* - * In READ COMMITTED mode, we just need to use an up-to-date regular - * snapshot, and we will see all rows that could be interesting. But in - * transaction-snapshot mode, we can't change the transaction snapshot. - * If the caller passes detectNewRows == false then it's okay to do the query - * with the transaction snapshot; otherwise we use a current snapshot, and - * tell the executor to error out if it finds any rows under the current - * snapshot that wouldn't be visible per the transaction snapshot. Note - * that SPI_execute_snapshot will register the snapshots, so we don't need - * to bother here. - */ - if (IsolationUsesXactSnapshot() && detectNewRows) - { - CommandCounterIncrement(); /* be sure all my own work is visible */ - test_snapshot = GetLatestSnapshot(); - crosscheck_snapshot = GetTransactionSnapshot(); - } - else - { - /* the default SPI behavior is okay */ - test_snapshot = InvalidSnapshot; - crosscheck_snapshot = InvalidSnapshot; - } - - /* * If this is a select query (e.g., for a 'no action' or 'restrict' * trigger), we only need to see if there is a single row in the table, * matching the key. Otherwise, limit = 0 - because we want the query to --- 3312,3317 ---- *************** ri_PerformCheck(RI_QueryKey *qkey, SPIPl *** 3369,3375 **** /* Finally we can run the query. */ spi_result = SPI_execute_snapshot(qplan, vals, nulls, ! test_snapshot, crosscheck_snapshot, false, false, limit); /* Restore UID and security context */ --- 3327,3333 ---- /* Finally we can run the query. */ spi_result = SPI_execute_snapshot(qplan, vals, nulls, ! InvalidSnapshot, false, false, limit); /* Restore UID and security context */ diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 273d8bd..cd277ab 100644 *** a/src/backend/utils/time/snapmgr.c --- b/src/backend/utils/time/snapmgr.c *************** *** 27,32 **** --- 27,33 ---- #include "access/transam.h" #include "access/xact.h" + #include "access/multixact.h" #include "storage/proc.h" #include "storage/procarray.h" #include "utils/memutils.h" *************** GetTransactionSnapshot(void) *** 126,131 **** --- 127,142 ---- { Assert(RegisteredSnapshots == 0); + /* + * We must store the oldest visible multi xact *before* taking the + * serializable snapshot. Otherwise HeapSatisfiesLockersVisible in + * heapam.c will be in trouble, since it depends on being able to + * inspect all multi xact ids which might contain xids invisible to + * the serializable snapshot. + */ + if (IsolationUsesXactSnapshot()) + MultiXactIdSetOldestVisible(); + CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); FirstSnapshotSet = true; diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c index 719dc13..f49a112 100644 *** a/src/backend/utils/time/tqual.c --- b/src/backend/utils/time/tqual.c *************** HeapTupleSatisfiesVacuum(HeapTupleHeader *** 1243,1248 **** --- 1243,1336 ---- return HEAPTUPLE_DEAD; } + /* + * Returns false if one of the tuple's lockers committed but + * isn't visible according to lockcheck_snapshot, excluding subtransactions + * of the current transaction. + * Assumes that all locking transaction either committed or aborted, + * but aren't still in progress. + */ + bool + HeapSatisfiesLockersVisible(HeapTupleHeader tuple, Snapshot lockcheck_snapshot) + { + if (lockcheck_snapshot == InvalidSnapshot) + return true; + + if (tuple->t_infomask & HEAP_IS_LOCKED) + { + /* + * If the tuple was locked, we now check whether the locking + * transaction(s) are visible under lockcheck_snapshot. If + * they aren't, we pretend that the tuple was updated. + */ + if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) + { + TransactionId* xids; + int xids_l = GetMultiXactIdMembers(HeapTupleHeaderGetXmax(tuple), &xids); + + if (xids_l < 1) + { + /* + * The multi xact either is too old to be inspected or + * doesn't contain members. The second case is probably + * impossible, but even if not it doesn't pose any problem. + * In the first case, we have to trust that all xids that + * were contained in the xact are in fact visible under + * lockcheck_snapshot. Currently this is always the case, + * since lockcheck_snapshot is always the transaction's + * serializable snapshot, and we call + * MultiXactIdSetOldestVisible() before acquiring that + * snapshot. + */ + return true; + } + else + { + int i; + for (i = 0; i < xids_l; ++i) + { + /* Ignore our own subtransactions */ + if (TransactionIdIsCurrentTransactionId(xids[i])) + continue; + + /* + * We expect to be called after the locking transactions' + * fates have been decided + */ + Assert(!TransactionIdIsInProgress(xids[i])); + + if (!TransactionIdDidAbort(xids[i]) && + XidInMVCCSnapshot(xids[i], lockcheck_snapshot)) + { + /* Non-aborted, invisible locker */ + return false; + } + } + return true; + } + } + else + { + TransactionId xid = HeapTupleHeaderGetXmax(tuple); + + /* Ignore our own subtransactions */ + if (TransactionIdIsCurrentTransactionId(xid)) + return true; + + /* We expect to be called after the locking transactions' fates have been decided */ + Assert(!TransactionIdIsInProgress(xid)); + + /* Locker must either be visible or have aborted */ + return TransactionIdDidAbort(xid) || + !XidInMVCCSnapshot(xid, lockcheck_snapshot); + } + } + else + { + /* Tuple wasn't locked */ + return true; + } + } /* * XidInMVCCSnapshot diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 45e100c..2075759 100644 *** a/src/include/access/heapam.h --- b/src/include/access/heapam.h *************** extern Oid heap_insert(Relation relation *** 98,112 **** int options, BulkInsertState bistate); extern HTSU_Result heap_delete(Relation relation, ItemPointer tid, ItemPointer ctid, TransactionId *update_xmax, ! CommandId cid, Snapshot crosscheck, bool wait); extern HTSU_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, ItemPointer ctid, TransactionId *update_xmax, ! CommandId cid, Snapshot crosscheck, bool wait); extern HTSU_Result heap_lock_tuple(Relation relation, HeapTuple tuple, Buffer *buffer, ItemPointer ctid, TransactionId *update_xmax, CommandId cid, ! LockTupleMode mode, bool nowait); extern void heap_inplace_update(Relation relation, HeapTuple tuple); extern bool heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid, Buffer buf); --- 98,113 ---- int options, BulkInsertState bistate); extern HTSU_Result heap_delete(Relation relation, ItemPointer tid, ItemPointer ctid, TransactionId *update_xmax, ! CommandId cid, bool wait, Snapshot lockcheck_snapshot); extern HTSU_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, ItemPointer ctid, TransactionId *update_xmax, ! CommandId cid, bool wait, Snapshot lockcheck_snapshot); extern HTSU_Result heap_lock_tuple(Relation relation, HeapTuple tuple, Buffer *buffer, ItemPointer ctid, TransactionId *update_xmax, CommandId cid, ! LockTupleMode mode, bool nowait, ! Snapshot lockcheck_snapshot); extern void heap_inplace_update(Relation relation, HeapTuple tuple); extern bool heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid, Buffer buf); diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h index 565848e..93ad4cb 100644 *** a/src/include/access/multixact.h --- b/src/include/access/multixact.h *************** extern bool MultiXactIdIsCurrent(MultiXa *** 49,54 **** --- 49,55 ---- extern void MultiXactIdWait(MultiXactId multi); extern bool ConditionalMultiXactIdWait(MultiXactId multi); extern void MultiXactIdSetOldestMember(void); + extern void MultiXactIdSetOldestVisible(void); extern int GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids); extern void AtEOXact_MultiXact(void); diff --git a/src/include/executor/execdesc.h b/src/include/executor/execdesc.h index 4fb57d2..6881eb2 100644 *** a/src/include/executor/execdesc.h --- b/src/include/executor/execdesc.h *************** typedef struct QueryDesc *** 39,45 **** Node *utilitystmt; /* utility statement, or null */ const char *sourceText; /* source text of the query */ Snapshot snapshot; /* snapshot to use for query */ - Snapshot crosscheck_snapshot; /* crosscheck for RI update/delete */ DestReceiver *dest; /* the destination for tuple output */ ParamListInfo params; /* param values being passed in */ int instrument_options; /* OR of InstrumentOption flags */ --- 39,44 ---- *************** typedef struct QueryDesc *** 57,63 **** extern QueryDesc *CreateQueryDesc(PlannedStmt *plannedstmt, const char *sourceText, Snapshot snapshot, - Snapshot crosscheck_snapshot, DestReceiver *dest, ParamListInfo params, int instrument_options); --- 56,61 ---- diff --git a/src/include/executor/spi.h b/src/include/executor/spi.h index 96e29b9..b8536bd 100644 *** a/src/include/executor/spi.h --- b/src/include/executor/spi.h *************** extern int SPI_execp(SPIPlanPtr plan, Da *** 82,88 **** extern int SPI_execute_snapshot(SPIPlanPtr plan, Datum *Values, const char *Nulls, Snapshot snapshot, - Snapshot crosscheck_snapshot, bool read_only, bool fire_triggers, long tcount); extern int SPI_execute_with_args(const char *src, int nargs, Oid *argtypes, --- 82,87 ---- diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index d669c24..83b1f02 100644 *** a/src/include/nodes/execnodes.h --- b/src/include/nodes/execnodes.h *************** typedef struct EState *** 337,343 **** /* Basic state for all query types: */ ScanDirection es_direction; /* current scan direction */ Snapshot es_snapshot; /* time qual to use */ - Snapshot es_crosscheck_snapshot; /* crosscheck time qual for RI */ List *es_range_table; /* List of RangeTblEntry */ PlannedStmt *es_plannedstmt; /* link to top of plan tree */ --- 337,342 ---- diff --git a/src/include/utils/tqual.h b/src/include/utils/tqual.h index df14f59..d18dbe6 100644 *** a/src/include/utils/tqual.h --- b/src/include/utils/tqual.h *************** extern HTSV_Result HeapTupleSatisfiesVac *** 86,90 **** --- 86,92 ---- extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid); + extern bool HeapSatisfiesLockersVisible(HeapTupleHeader tuple, + Snapshot snapshot); #endif /* TQUAL_H */