diff --git a/doc/src/sgml/mvcc.sgml b/doc/src/sgml/mvcc.sgml
index 020bbcd..e7d6c36 100644
*** a/doc/src/sgml/mvcc.sgml
--- b/doc/src/sgml/mvcc.sgml
*************** COMMIT;
*** 386,407 ****
behave the same as SELECT
in terms of searching for target rows: they will only find target rows
that were committed as of the transaction start time. However, such a
! target
! row might have already been updated (or deleted or locked) by
! another concurrent transaction by the time it is found. In this case, the
! serializable transaction will wait for the first updating transaction to commit or
! roll back (if it is still in progress). If the first updater rolls back,
! then its effects are negated and the serializable transaction can proceed
! with updating the originally found row. But if the first updater commits
! (and actually updated or deleted the row, not just locked it)
! then the serializable transaction will be rolled back with the message
ERROR: could not serialize access due to concurrent update
! because a serializable transaction cannot modify or lock rows changed by
! other transactions after the serializable transaction began.
--- 386,407 ----
behave the same as SELECT
in terms of searching for target rows: they will only find target rows
that were committed as of the transaction start time. However, such a
! target row might have already been subject to a concurrent
! UPDATE, DELETE, SELECT
! FOR UPDATE, or SELECT FOR SHARE. In this case,
! the serializable transaction will wait for the other transaction to commit
! or roll back (if it is still in progress). If it rolls back then its effects
! are negated and the serializable transaction can proceed with modifying
! or locking the originally found row. If it commits, and the two commands
! conflict according to ,
! the serializable transaction is rolled back with the message
ERROR: could not serialize access due to concurrent update
! since serializable transaction cannot simply proceed with the newer row
! version like read committed ones do.
*************** ERROR: could not serialize access due t
*** 418,423 ****
--- 418,463 ----
transactions will never have serialization conflicts.
+
+ Serialization Conflicts
+
+
+
+
+
+
+ Serializable Transaction
+ Concurrent Transaction
+
+
+ UPDATE, DELETE
+ SELECT FOR UPDATE
+ SELECT FOR SHARE
+
+
+
+
+ UPDATE, DELETE
+ X
+ X
+ X
+
+
+ SELECT FOR UPDATE
+ X
+ X
+ X
+
+
+ SELECT FOR SHARE
+ X
+
+
+
+
+
+
+
The Serializable mode provides a rigorous guarantee that each
transaction sees a wholly consistent view of the database. However,
*************** SELECT SUM(value) FROM mytab WHERE class
*** 921,926 ****
--- 961,974 ----
+ Serializable transactions are affected by concurrent
+ SELECT FOR SHARE and SELECT FOR UPDATE
+ for longer than those locks are actually held, and may be aborted
+ when trying to obtain a conflicting lock. For details,
+ see
+
+
+
PostgreSQL doesn't remember any
information about modified rows in memory, so there is no limit on
the number of rows locked at one time. However, locking a row
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 4020906..a397bad 100644
*** a/src/backend/access/heap/heapam.c
--- b/src/backend/access/heap/heapam.c
*************** simple_heap_insert(Relation relation, He
*** 2033,2055 ****
* update_xmax - output parameter, used only for failure case (see below)
* cid - delete command ID (used for visibility test, and stored into
* cmax if successful)
- * crosscheck - if not InvalidSnapshot, also check tuple against this
* wait - true if should wait for any conflicting update to commit/abort
*
* Normal, successful return value is HeapTupleMayBeUpdated, which
* actually means we did delete it. Failure return codes are
* HeapTupleSelfUpdated, HeapTupleUpdated, or HeapTupleBeingUpdated
* (the last only possible if wait == false).
*
! * In the failure cases, the routine returns the tuple's t_ctid and t_xmax.
! * If t_ctid is the same as tid, the tuple was deleted; if different, the
! * tuple was updated, and t_ctid is the location of the replacement tuple.
! * (t_xmax is needed to verify that the replacement tuple matches.)
*/
HTSU_Result
heap_delete(Relation relation, ItemPointer tid,
ItemPointer ctid, TransactionId *update_xmax,
! CommandId cid, Snapshot crosscheck, bool wait)
{
HTSU_Result result;
TransactionId xid = GetCurrentTransactionId();
--- 2033,2064 ----
* update_xmax - output parameter, used only for failure case (see below)
* cid - delete command ID (used for visibility test, and stored into
* cmax if successful)
* wait - true if should wait for any conflicting update to commit/abort
+ * lockcheck_snapshot - if not InvalidSnapshot, report the tuple as updated if it
+ * was once locked by a transaction not visible under this snapshot
*
* Normal, successful return value is HeapTupleMayBeUpdated, which
* actually means we did delete it. Failure return codes are
* HeapTupleSelfUpdated, HeapTupleUpdated, or HeapTupleBeingUpdated
* (the last only possible if wait == false).
*
! * In the failure cases, the routine returns the tuple's t_ctid and t_max
! * in ctid and update_xmax.
! * If ctid is the same as t_self and update_xmax a valid transaction id,
! * the tuple was deleted.
! * If ctid differes from t_self, the tuple was updated, ctid is the location
! * of the replacement tupe and update_xmax is the updating transaction's xid.
! * update_xmax must in this case be used to verify that the replacement tuple
! * matched.
! * Otherwise, if ctid is the same as t_self and update_xmax is
! * InvalidTranactionId, the tuple was neither replaced nor deleted, but locked
! * by a transaction invisible to lockcheck_snapshot. This case can thus only
! * arise if lockcheck_snapshot is a valid snapshot.
*/
HTSU_Result
heap_delete(Relation relation, ItemPointer tid,
ItemPointer ctid, TransactionId *update_xmax,
! CommandId cid, bool wait, Snapshot lockcheck_snapshot)
{
HTSU_Result result;
TransactionId xid = GetCurrentTransactionId();
*************** l1:
*** 2171,2181 ****
result = HeapTupleUpdated;
}
! if (crosscheck != InvalidSnapshot && result == HeapTupleMayBeUpdated)
{
! /* Perform additional check for transaction-snapshot mode RI updates */
! if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
! result = HeapTupleUpdated;
}
if (result != HeapTupleMayBeUpdated)
--- 2180,2201 ----
result = HeapTupleUpdated;
}
! /* If the tuple was updated, we report the updating transaction's
! * xid in update_xmax. Otherwise, we must check that it wasn't
! * locked by a transaction invisible to lockcheck_snapshot before
! * continuing.
! */
! if (result != HeapTupleMayBeUpdated)
{
! Assert(!(tp.t_data->t_infomask & HEAP_XMAX_INVALID));
! *update_xmax = HeapTupleHeaderGetXmax(tp.t_data);
! }
! else if ((lockcheck_snapshot != InvalidSnapshot) &&
! !HeapSatisfiesLockersVisible(tp.t_data, lockcheck_snapshot))
! {
! Assert((tp.t_data->t_infomask & HEAP_IS_LOCKED));
! *update_xmax = InvalidTransactionId;
! result = HeapTupleUpdated;
}
if (result != HeapTupleMayBeUpdated)
*************** l1:
*** 2183,2191 ****
Assert(result == HeapTupleSelfUpdated ||
result == HeapTupleUpdated ||
result == HeapTupleBeingUpdated);
- Assert(!(tp.t_data->t_infomask & HEAP_XMAX_INVALID));
*ctid = tp.t_data->t_ctid;
- *update_xmax = HeapTupleHeaderGetXmax(tp.t_data);
UnlockReleaseBuffer(buffer);
if (have_tuple_lock)
UnlockTuple(relation, &(tp.t_self), ExclusiveLock);
--- 2203,2209 ----
*************** simple_heap_delete(Relation relation, It
*** 2313,2320 ****
result = heap_delete(relation, tid,
&update_ctid, &update_xmax,
! GetCurrentCommandId(true), InvalidSnapshot,
! true /* wait for commit */ );
switch (result)
{
case HeapTupleSelfUpdated:
--- 2331,2339 ----
result = heap_delete(relation, tid,
&update_ctid, &update_xmax,
! GetCurrentCommandId(true),
! true /* wait for commit */ ,
! InvalidSnapshot);
switch (result)
{
case HeapTupleSelfUpdated:
*************** simple_heap_delete(Relation relation, It
*** 2349,2355 ****
* update_xmax - output parameter, used only for failure case (see below)
* cid - update command ID (used for visibility test, and stored into
* cmax/cmin if successful)
! * crosscheck - if not InvalidSnapshot, also check old tuple against this
* wait - true if should wait for any conflicting update to commit/abort
*
* Normal, successful return value is HeapTupleMayBeUpdated, which
--- 2368,2375 ----
* update_xmax - output parameter, used only for failure case (see below)
* cid - update command ID (used for visibility test, and stored into
* cmax/cmin if successful)
! * lockcheck_snapshot - if not InvalidSnapshot, report the tuple as updated
! * if it was once locked by a transaction not visible under this snapshot
* wait - true if should wait for any conflicting update to commit/abort
*
* Normal, successful return value is HeapTupleMayBeUpdated, which
*************** simple_heap_delete(Relation relation, It
*** 2363,2377 ****
* update was done. However, any TOAST changes in the new tuple's
* data are not reflected into *newtup.
*
! * In the failure cases, the routine returns the tuple's t_ctid and t_xmax.
! * If t_ctid is the same as otid, the tuple was deleted; if different, the
! * tuple was updated, and t_ctid is the location of the replacement tuple.
! * (t_xmax is needed to verify that the replacement tuple matches.)
*/
HTSU_Result
heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
ItemPointer ctid, TransactionId *update_xmax,
! CommandId cid, Snapshot crosscheck, bool wait)
{
HTSU_Result result;
TransactionId xid = GetCurrentTransactionId();
--- 2383,2405 ----
* update was done. However, any TOAST changes in the new tuple's
* data are not reflected into *newtup.
*
! * In the failure cases, the routine returns the tuple's t_ctid and t_max
! * in ctid and update_xmax.
! * If ctid is the same as t_self and update_xmax a valid transaction id,
! * the tuple was deleted.
! * If ctid differes from t_self, the tuple was updated, ctid is the location
! * of the replacement tupe and update_xmax is the updating transaction's xid.
! * update_xmax must in this case be used to verify that the replacement tuple
! * matched.
! * Otherwise, if ctid is the same as t_self and update_xmax is
! * InvalidTranactionId, the tuple was neither replaced nor deleted, but locked
! * by a transaction invisible to lockcheck_snapshot. This case can thus only
! * arise if lockcheck_snapshot is a valid snapshot.
*/
HTSU_Result
heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
ItemPointer ctid, TransactionId *update_xmax,
! CommandId cid, bool wait, Snapshot lockcheck_snapshot)
{
HTSU_Result result;
TransactionId xid = GetCurrentTransactionId();
*************** l2:
*** 2523,2533 ****
result = HeapTupleUpdated;
}
! if (crosscheck != InvalidSnapshot && result == HeapTupleMayBeUpdated)
{
! /* Perform additional check for transaction-snapshot mode RI updates */
! if (!HeapTupleSatisfiesVisibility(&oldtup, crosscheck, buffer))
! result = HeapTupleUpdated;
}
if (result != HeapTupleMayBeUpdated)
--- 2551,2572 ----
result = HeapTupleUpdated;
}
! /* If the tuple was updated, we report the updating transaction's
! * xid in update_xmax. Otherwise, we must check that it wasn't
! * locked by a transaction invisible to lockcheck_snapshot before
! * continuing.
! */
! if (result != HeapTupleMayBeUpdated)
{
! Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
! *update_xmax = HeapTupleHeaderGetXmax(oldtup.t_data);
! }
! else if ((lockcheck_snapshot != InvalidSnapshot) &&
! !HeapSatisfiesLockersVisible(oldtup.t_data, lockcheck_snapshot))
! {
! Assert((oldtup.t_data->t_infomask & HEAP_IS_LOCKED));
! *update_xmax = InvalidTransactionId;
! result = HeapTupleUpdated;
}
if (result != HeapTupleMayBeUpdated)
*************** l2:
*** 2535,2543 ****
Assert(result == HeapTupleSelfUpdated ||
result == HeapTupleUpdated ||
result == HeapTupleBeingUpdated);
- Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
*ctid = oldtup.t_data->t_ctid;
- *update_xmax = HeapTupleHeaderGetXmax(oldtup.t_data);
UnlockReleaseBuffer(buffer);
if (have_tuple_lock)
UnlockTuple(relation, &(oldtup.t_self), ExclusiveLock);
--- 2574,2580 ----
*************** simple_heap_update(Relation relation, It
*** 2978,2985 ****
result = heap_update(relation, otid, tup,
&update_ctid, &update_xmax,
! GetCurrentCommandId(true), InvalidSnapshot,
! true /* wait for commit */ );
switch (result)
{
case HeapTupleSelfUpdated:
--- 3015,3022 ----
result = heap_update(relation, otid, tup,
&update_ctid, &update_xmax,
! GetCurrentCommandId(true),
! true /* wait for commit */, InvalidSnapshot);
switch (result)
{
case HeapTupleSelfUpdated:
*************** simple_heap_update(Relation relation, It
*** 3013,3018 ****
--- 3050,3059 ----
* tuple's cmax if lock is successful)
* mode: indicates if shared or exclusive tuple lock is desired
* nowait: if true, ereport rather than blocking if lock not available
+ * lockcheck_snapshot: if not InvalidSnapshot, report the tuple as updated if it
+ * was once locked by a transaction not visible under this snapshot.
+ * Note that HeapTupleUpdated is reported even for non-conflicting locks -
+ * the caller is expected to deal with that.
*
* Output parameters:
* *tuple: all fields filled in
*************** simple_heap_update(Relation relation, It
*** 3025,3035 ****
* HeapTupleSelfUpdated: lock failed because tuple updated by self
* HeapTupleUpdated: lock failed because tuple updated by other xact
*
! * In the failure cases, the routine returns the tuple's t_ctid and t_xmax.
! * If t_ctid is the same as t_self, the tuple was deleted; if different, the
! * tuple was updated, and t_ctid is the location of the replacement tuple.
! * (t_xmax is needed to verify that the replacement tuple matches.)
! *
*
* NOTES: because the shared-memory lock table is of finite size, but users
* could reasonably want to lock large numbers of tuples, we do not rely on
--- 3066,3083 ----
* HeapTupleSelfUpdated: lock failed because tuple updated by self
* HeapTupleUpdated: lock failed because tuple updated by other xact
*
! * In the failure cases, the routine returns the tuple's t_ctid and t_max
! * in ctid and update_xmax.
! * If ctid is the same as t_self and update_xmax a valid transaction id,
! * the tuple was deleted.
! * If ctid differes from t_self, the tuple was updated, ctid is the location
! * of the replacement tupe and update_xmax is the updating transaction's xid.
! * update_xmax must in this case be used to verify that the replacement tuple
! * matched.
! * Otherwise, if ctid is the same as t_self and update_xmax is
! * InvalidTranactionId, the tuple was neither replaced nor deleted, but locked
! * by a transaction invisible to lockcheck_snapshot. This case can thus only
! * arise if lockcheck_snapshot is a valid snapshot.
*
* NOTES: because the shared-memory lock table is of finite size, but users
* could reasonably want to lock large numbers of tuples, we do not rely on
*************** simple_heap_update(Relation relation, It
*** 3066,3072 ****
HTSU_Result
heap_lock_tuple(Relation relation, HeapTuple tuple, Buffer *buffer,
ItemPointer ctid, TransactionId *update_xmax,
! CommandId cid, LockTupleMode mode, bool nowait)
{
HTSU_Result result;
ItemPointer tid = &(tuple->t_self);
--- 3114,3121 ----
HTSU_Result
heap_lock_tuple(Relation relation, HeapTuple tuple, Buffer *buffer,
ItemPointer ctid, TransactionId *update_xmax,
! CommandId cid, LockTupleMode mode, bool nowait,
! Snapshot lockcheck_snapshot)
{
HTSU_Result result;
ItemPointer tid = &(tuple->t_self);
*************** l3:
*** 3247,3258 ****
result = HeapTupleUpdated;
}
if (result != HeapTupleMayBeUpdated)
{
Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated);
- Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
*ctid = tuple->t_data->t_ctid;
- *update_xmax = HeapTupleHeaderGetXmax(tuple->t_data);
LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
if (have_tuple_lock)
UnlockTuple(relation, tid, tuple_lock_type);
--- 3296,3323 ----
result = HeapTupleUpdated;
}
+ /* If the tuple was updated, we report the updating transaction's
+ * xid in update_xmax. Otherwise, we must check that it wasn't
+ * locked by a transaction invisible to lockcheck_snapshot before
+ * continuing.
+ */
+ if (result != HeapTupleMayBeUpdated)
+ {
+ Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
+ *update_xmax = HeapTupleHeaderGetXmax(tuple->t_data);
+ }
+ else if ((lockcheck_snapshot != InvalidSnapshot) &&
+ !HeapSatisfiesLockersVisible(tuple->t_data, lockcheck_snapshot))
+ {
+ Assert((tuple->t_data->t_infomask & HEAP_IS_LOCKED));
+ *update_xmax = InvalidTransactionId;
+ result = HeapTupleUpdated;
+ }
+
if (result != HeapTupleMayBeUpdated)
{
Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated);
*ctid = tuple->t_data->t_ctid;
LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
if (have_tuple_lock)
UnlockTuple(relation, tid, tuple_lock_type);
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 9422ef3..3395ea6 100644
*** a/src/backend/access/transam/multixact.c
--- b/src/backend/access/transam/multixact.c
*************** static MemoryContext MXactContext = NULL
*** 211,217 ****
#endif
/* internal MultiXactId management */
- static void MultiXactIdSetOldestVisible(void);
static MultiXactId CreateMultiXactId(int nxids, TransactionId *xids);
static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
int nxids, TransactionId *xids);
--- 211,216 ----
*************** MultiXactIdSetOldestMember(void)
*** 531,537 ****
* there is no live transaction, now or later, that can be a member of any
* MultiXactId older than the OldestVisibleMXactId we compute here.
*/
! static void
MultiXactIdSetOldestVisible(void)
{
if (!MultiXactIdIsValid(OldestVisibleMXactId[MyBackendId]))
--- 530,536 ----
* there is no live transaction, now or later, that can be a member of any
* MultiXactId older than the OldestVisibleMXactId we compute here.
*/
! void
MultiXactIdSetOldestVisible(void)
{
if (!MultiXactIdIsValid(OldestVisibleMXactId[MyBackendId]))
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 7b8bee8..be28f3e 100644
*** a/src/backend/commands/copy.c
--- b/src/backend/commands/copy.c
*************** DoCopy(const CopyStmt *stmt, const char
*** 1091,1097 ****
/* Create a QueryDesc requesting no output */
cstate->queryDesc = CreateQueryDesc(plan, queryString,
GetActiveSnapshot(),
- InvalidSnapshot,
dest, NULL, 0);
/*
--- 1091,1096 ----
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 99f5c29..9ccd9fc 100644
*** a/src/backend/commands/explain.c
--- b/src/backend/commands/explain.c
*************** ExplainOnePlan(PlannedStmt *plannedstmt,
*** 377,383 ****
/* Create a QueryDesc requesting no output */
queryDesc = CreateQueryDesc(plannedstmt, queryString,
! GetActiveSnapshot(), InvalidSnapshot,
None_Receiver, params, instrument_option);
INSTR_TIME_SET_CURRENT(starttime);
--- 377,383 ----
/* Create a QueryDesc requesting no output */
queryDesc = CreateQueryDesc(plannedstmt, queryString,
! GetActiveSnapshot(),
None_Receiver, params, instrument_option);
INSTR_TIME_SET_CURRENT(starttime);
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 8195392..eaf8e8b 100644
*** a/src/backend/commands/trigger.c
--- b/src/backend/commands/trigger.c
*************** GetTupleForTrigger(EState *estate,
*** 2474,2486 ****
/*
* lock tuple for update
*/
ltrmark:;
tuple.t_self = *tid;
test = heap_lock_tuple(relation, &tuple, &buffer,
&update_ctid, &update_xmax,
estate->es_output_cid,
! LockTupleExclusive, false);
switch (test)
{
case HeapTupleSelfUpdated:
--- 2474,2492 ----
/*
* lock tuple for update
+ *
+ * Serializable transactions pass their snapshot as the
+ * logcheck_snapshot. This lets heap_lock_tuple report concurrently
+ * FOR SHARE or FOR UPDATE locked tuples as HeapTupleUpdated.
*/
ltrmark:;
tuple.t_self = *tid;
test = heap_lock_tuple(relation, &tuple, &buffer,
&update_ctid, &update_xmax,
estate->es_output_cid,
! LockTupleExclusive, false,
! IsolationUsesXactSnapshot() ?
! estate->es_snapshot : InvalidSnapshot);
switch (test)
{
case HeapTupleSelfUpdated:
diff --git a/src/backend/executor/README b/src/backend/executor/README
index fec191d..ab70f40 100644
*** a/src/backend/executor/README
--- b/src/backend/executor/README
*************** is no explicit prohibition on SRFs in UP
*** 195,197 ****
--- 195,263 ----
that only the first result row of an SRF counts, because all subsequent
rows will result in attempts to re-update an already updated target row.
This is historical behavior and seems not worth changing.)
+
+ Row Locks and Serializable Transactions
+ ---------------------------------------
+
+ In READ COMMITTED mode, a transaction who encounters a locked row during
+ an UPDATE, DELETE, SELECT FOR UPDATE or SELECT FOR SHARE simply blocks
+ until the locking transaction commits or roll backs, and in the former case
+ then re-executes the statement using the new row version, as described above.
+
+ In case of a per-transaction snapshot (as opposed to per query, i.e. isolation
+ level READ COMMITTED), this is not satisfactory. The RI triggers for example
+ take a FOR SHARE lock on a parent row before allowing a child row to be
+ inserted and verify that deleting a parent row leaves no orphaned children
+ behind before allowing the delete to occur. From within READ COMMITTED
+ transactions, blocking upon a delete or a parent row until all lockers have
+ finished is sufficient to guarantee that this check finds any potential
+ orphan, since the check will be executed with a up-to-date snapshot to which
+ the locking transaction's changes are visible. For isolation level REPEATABLE
+ READ and higher, however, is not true, since these will continue to use their
+ old snapshot and hence miss newly inserted rows.
+
+ Such transactions therefore treat a FOR SHARE or FOR UPDATE lock on a tuple
+ the same as an actual update during UPDATE and SELECT FOR UPDATE. They are
+ thus aborted when trying to UPDATE or FOR UPDATE lock a row that was FOR SHARE
+ or FOR UPDATE locked by a concurrent transaction.
+
+ Note that the behavior is not totally symmetric - locking a row FOR UPDATE
+ that was concurrently locked FOR SHARE abort with a serialization error, while
+ FOR SHARE locking a row that was concurrently locked FOR UPDATE will succeed
+ (after the locking transaction has committed or aborted, of course, and assume
+ that the FOR UPDATE wasn't followed by an actual UPDATE). This is hard to
+ avoid, since we cannot reliably distinguish between the different lock
+ strengths once the locking transaction has ended - another READ COMMITTED
+ transaction might immediately lock the row with a different strength.
+
+ To restore symmetry, we'd thus need to either abort with a serialization error
+ upon *any* request to lock a row that was locked concurrently, independent
+ from the locks' strengths, or never abort in such a situation. Both behaviors
+ are undesirable, the former because of the increased number of serialization
+ errors it'd cause and the latter because obtaining a FOR UPDATE lock is
+ supposed to prevent a future update from failing.
+
+ The guarantees provided by the different lock strengths are thus
+
+ 1) Acquiring a FOR SHARE lock guarantees, for all modifications of the tuple
+ from within any transaction, that all statements run *after* the modifying
+ statement will see the locking transaction's changes. This includes any
+ statement run from within AFTER triggers fired by the modifying statement. For
+ isolation level REPEATABLE READ and above, even the modifying statement itself
+ will see the locking transaction's changes.
+
+ 2) Acquiring a FOR UPDATE lock guarantees that any statement run after the FOR
+ UPDATE lock was granted will see the changes done by any concurrent
+ transaction that at one point obtained a FOR SHARE lock on the tuple. For
+ isolation level REPEATABLE READ and above, even the statement itself will see
+ these changes.
+
+ With isolation level REPEATABLE READ and above, a serialization error is raised if one of these guarantees is violated.
+
+ Implementation-wise, heap_update(), heap_delete() and heap_lock_tuple() take a
+ parameter lockcheck_snapshot(), and report a tuple as updated if it was locked
+ by a transaction not visible to lockcheck_snapshot(). The implementation
+ depends on the fact that if one transaction invisible to lockcheck_snapshop
+ locks the tuple, every future locker must still be running by the time the
+ first locker commits or aborts, and will thus surely be invisible to the
+ lockcheck_snapshot if the original locker was.
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index c4719f3..5bcbf54 100644
*** a/src/backend/executor/execMain.c
--- b/src/backend/executor/execMain.c
*************** standard_ExecutorStart(QueryDesc *queryD
*** 183,189 ****
* Copy other important information into the EState
*/
estate->es_snapshot = RegisterSnapshot(queryDesc->snapshot);
- estate->es_crosscheck_snapshot = RegisterSnapshot(queryDesc->crosscheck_snapshot);
estate->es_instrument = queryDesc->instrument_options;
/*
--- 183,188 ----
*************** standard_ExecutorEnd(QueryDesc *queryDes
*** 348,354 ****
/* do away with our snapshots */
UnregisterSnapshot(estate->es_snapshot);
- UnregisterSnapshot(estate->es_crosscheck_snapshot);
/*
* Must switch out of context before destroying it
--- 347,352 ----
*************** EvalPlanQualFetch(EState *estate, Relati
*** 1563,1573 ****
/*
* This is a live tuple, so now try to lock it.
*/
test = heap_lock_tuple(relation, &tuple, &buffer,
&update_ctid, &update_xmax,
estate->es_output_cid,
! lockmode, false);
/* We now have two pins on the buffer, get rid of one */
ReleaseBuffer(buffer);
--- 1561,1578 ----
/*
* This is a live tuple, so now try to lock it.
+ *
+ * Serializable transactions pass their snapshot as the logcheck_snapshot.
+ * This lets heap_lock_tuple report concurrently FOR SHARE or FOR UPDATE
+ * locked tuples as HeapTupleUpdated.
*/
+ Assert(!IsolationUsesXactSnapshot() || (estate->es_snapshot != InvalidSnapshot));
test = heap_lock_tuple(relation, &tuple, &buffer,
&update_ctid, &update_xmax,
estate->es_output_cid,
! lockmode, false,
! IsolationUsesXactSnapshot() ? estate->es_snapshot :
! InvalidSnapshot);
/* We now have two pins on the buffer, get rid of one */
ReleaseBuffer(buffer);
*************** EvalPlanQualStart(EPQState *epqstate, ES
*** 1936,1942 ****
*/
estate->es_direction = ForwardScanDirection;
estate->es_snapshot = parentestate->es_snapshot;
- estate->es_crosscheck_snapshot = parentestate->es_crosscheck_snapshot;
estate->es_range_table = parentestate->es_range_table;
estate->es_plannedstmt = parentestate->es_plannedstmt;
estate->es_junkFilter = parentestate->es_junkFilter;
--- 1941,1946 ----
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index 6ad0f1e..cef3e86 100644
*** a/src/backend/executor/execUtils.c
--- b/src/backend/executor/execUtils.c
*************** CreateExecutorState(void)
*** 109,115 ****
*/
estate->es_direction = ForwardScanDirection;
estate->es_snapshot = SnapshotNow;
- estate->es_crosscheck_snapshot = InvalidSnapshot; /* no crosscheck */
estate->es_range_table = NIL;
estate->es_plannedstmt = NULL;
--- 109,114 ----
diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c
index 11f92ad..6a6f23d 100644
*** a/src/backend/executor/functions.c
--- b/src/backend/executor/functions.c
*************** postquel_start(execution_state *es, SQLF
*** 415,421 ****
if (IsA(es->stmt, PlannedStmt))
es->qd = CreateQueryDesc((PlannedStmt *) es->stmt,
fcache->src,
! snapshot, InvalidSnapshot,
dest,
fcache->paramLI, 0);
else
--- 415,421 ----
if (IsA(es->stmt, PlannedStmt))
es->qd = CreateQueryDesc((PlannedStmt *) es->stmt,
fcache->src,
! snapshot,
dest,
fcache->paramLI, 0);
else
diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c
index 440a601..330e5f0 100644
*** a/src/backend/executor/nodeLockRows.c
--- b/src/backend/executor/nodeLockRows.c
***************
*** 22,27 ****
--- 22,28 ----
#include "postgres.h"
#include "access/xact.h"
+ #include "access/transam.h"
#include "executor/executor.h"
#include "executor/nodeLockRows.h"
#include "storage/bufmgr.h"
*************** lnext:
*** 71,76 ****
--- 72,78 ----
ItemPointerData update_ctid;
TransactionId update_xmax;
LockTupleMode lockmode;
+ Snapshot lockcheck_snapshot = InvalidSnapshot;
HTSU_Result test;
HeapTuple copyTuple;
*************** lnext:
*** 110,123 ****
/* okay, try to lock the tuple */
if (erm->markType == ROW_MARK_EXCLUSIVE)
lockmode = LockTupleExclusive;
else
lockmode = LockTupleShared;
test = heap_lock_tuple(erm->relation, &tuple, &buffer,
&update_ctid, &update_xmax,
estate->es_output_cid,
! lockmode, erm->noWait);
ReleaseBuffer(buffer);
switch (test)
{
--- 112,149 ----
/* okay, try to lock the tuple */
if (erm->markType == ROW_MARK_EXCLUSIVE)
+ {
lockmode = LockTupleExclusive;
+
+ /*
+ * Transactions using a per-transaction snapshot pass
+ * that snapshot as lockcheck_snapshot. This causes tuples
+ * to be reported as HeapTupleUpdated also if they were
+ * merely locked (shared or exclusively) at some point by
+ * a concurrent transaction. See src/backend/executor/README
+ * for details.
+ */
+ if (IsolationUsesXactSnapshot())
+ lockcheck_snapshot = estate->es_snapshot;
+ }
else
+ {
lockmode = LockTupleShared;
+
+ /*
+ * We never pass a lockcheck_snapshot to heap_lock_tuple()
+ * in this case. Thus, a transaction using a per-transaction
+ * snapshot may share-lock a row previously locked exclusively
+ * by a concurrent transaction. See src/backend/executor/README
+ * for why this is hard to avoid and OK.
+ */
+ }
test = heap_lock_tuple(erm->relation, &tuple, &buffer,
&update_ctid, &update_xmax,
estate->es_output_cid,
! lockmode, erm->noWait,
! lockcheck_snapshot);
ReleaseBuffer(buffer);
switch (test)
{
*************** lnext:
*** 134,139 ****
--- 160,171 ----
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
+ /* In case of a per-query snapshot, we didn't pass
+ * a lockcheck_snapshot to heap_update() and can
+ * thus expect a tuple to have a valid xmax here
+ */
+ Assert(TransactionIdIsValid(update_xmax));
+
if (ItemPointerEquals(&update_ctid,
&tuple.t_self))
{
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 541adaf..4e32c18 100644
*** a/src/backend/executor/nodeModifyTable.c
--- b/src/backend/executor/nodeModifyTable.c
***************
*** 38,43 ****
--- 38,44 ----
#include "postgres.h"
#include "access/xact.h"
+ #include "access/transam.h"
#include "commands/trigger.h"
#include "executor/executor.h"
#include "executor/nodeModifyTable.h"
*************** ExecDelete(ItemPointer tupleid,
*** 367,384 ****
/*
* delete the tuple
*
! * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check
! * that the row to be deleted is visible to that snapshot, and throw a
! * can't-serialize error if not. This is a special-case behavior
! * needed for referential integrity updates in transaction-snapshot
! * mode transactions.
*/
ldelete:;
result = heap_delete(resultRelationDesc, tupleid,
&update_ctid, &update_xmax,
estate->es_output_cid,
! estate->es_crosscheck_snapshot,
! true /* wait for commit */ );
switch (result)
{
case HeapTupleSelfUpdated:
--- 368,387 ----
/*
* delete the tuple
*
! * Transactions using a per-transaction snapshot pass
! * that snapshot as lockcheck_snapshot. This causes tuples
! * to be reported as HeapTupleUpdated also if they were
! * merely locked (shared or exclusively) at some point by
! * a concurrent transaction. See src/backend/executor/README
! * for details.
*/
ldelete:;
result = heap_delete(resultRelationDesc, tupleid,
&update_ctid, &update_xmax,
estate->es_output_cid,
! true, /* wait for commit */
! IsolationUsesXactSnapshot() ? estate->es_snapshot :
! InvalidSnapshot);
switch (result)
{
case HeapTupleSelfUpdated:
*************** ldelete:;
*** 393,398 ****
--- 396,406 ----
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
+ /* In case of a per-query snapshot, we didn't pass
+ * a lockcheck_snapshot to heap_update() and can
+ * thus expect a tuple to have a valid xmax here
+ */
+ Assert(TransactionIdIsValid(update_xmax));
if (!ItemPointerEquals(tupleid, &update_ctid))
{
TupleTableSlot *epqslot;
*************** lreplace:;
*** 615,631 ****
/*
* replace the heap tuple
*
! * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check
! * that the row to be updated is visible to that snapshot, and throw a
! * can't-serialize error if not. This is a special-case behavior
! * needed for referential integrity updates in transaction-snapshot
! * mode transactions.
*/
result = heap_update(resultRelationDesc, tupleid, tuple,
&update_ctid, &update_xmax,
estate->es_output_cid,
! estate->es_crosscheck_snapshot,
! true /* wait for commit */ );
switch (result)
{
case HeapTupleSelfUpdated:
--- 623,641 ----
/*
* replace the heap tuple
*
! * Transactions using a per-transaction snapshot pass
! * that snapshot as lockcheck_snapshot. This causes tuples
! * to be reported as HeapTupleUpdated also if they were
! * merely locked (shared or exclusively) at some point by
! * a concurrent transaction. See src/backend/executor/README
! * for details.
*/
result = heap_update(resultRelationDesc, tupleid, tuple,
&update_ctid, &update_xmax,
estate->es_output_cid,
! true, /* wait for commit */
! IsolationUsesXactSnapshot() ? estate->es_snapshot :
! InvalidSnapshot);
switch (result)
{
case HeapTupleSelfUpdated:
*************** lreplace:;
*** 640,645 ****
--- 650,660 ----
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
+ /* In case of a per-query snapshot, we didn't pass
+ * a lockcheck_snapshot to heap_update() and can
+ * thus expect a tuple to have a valid xmax here
+ */
+ Assert(TransactionIdIsValid(update_xmax));
if (!ItemPointerEquals(tupleid, &update_ctid))
{
TupleTableSlot *epqslot;
diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c
index c579017..aa631f7 100644
*** a/src/backend/executor/spi.c
--- b/src/backend/executor/spi.c
*************** static void _SPI_prepare_plan(const char
*** 51,57 ****
ParamListInfo boundParams);
static int _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI,
! Snapshot snapshot, Snapshot crosscheck_snapshot,
bool read_only, bool fire_triggers, long tcount);
static ParamListInfo _SPI_convert_params(int nargs, Oid *argtypes,
--- 51,57 ----
ParamListInfo boundParams);
static int _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI,
! Snapshot snapshot,
bool read_only, bool fire_triggers, long tcount);
static ParamListInfo _SPI_convert_params(int nargs, Oid *argtypes,
*************** SPI_execute(const char *src, bool read_o
*** 357,363 ****
_SPI_prepare_plan(src, &plan, NULL);
res = _SPI_execute_plan(&plan, NULL,
! InvalidSnapshot, InvalidSnapshot,
read_only, true, tcount);
_SPI_end_call(true);
--- 357,363 ----
_SPI_prepare_plan(src, &plan, NULL);
res = _SPI_execute_plan(&plan, NULL,
! InvalidSnapshot,
read_only, true, tcount);
_SPI_end_call(true);
*************** SPI_execute_plan(SPIPlanPtr plan, Datum
*** 392,398 ****
_SPI_convert_params(plan->nargs, plan->argtypes,
Values, Nulls,
0),
! InvalidSnapshot, InvalidSnapshot,
read_only, true, tcount);
_SPI_end_call(true);
--- 392,398 ----
_SPI_convert_params(plan->nargs, plan->argtypes,
Values, Nulls,
0),
! InvalidSnapshot,
read_only, true, tcount);
_SPI_end_call(true);
*************** SPI_execute_plan_with_paramlist(SPIPlanP
*** 421,427 ****
return res;
res = _SPI_execute_plan(plan, params,
! InvalidSnapshot, InvalidSnapshot,
read_only, true, tcount);
_SPI_end_call(true);
--- 421,427 ----
return res;
res = _SPI_execute_plan(plan, params,
! InvalidSnapshot,
read_only, true, tcount);
_SPI_end_call(true);
*************** SPI_execute_plan_with_paramlist(SPIPlanP
*** 444,450 ****
int
SPI_execute_snapshot(SPIPlanPtr plan,
Datum *Values, const char *Nulls,
! Snapshot snapshot, Snapshot crosscheck_snapshot,
bool read_only, bool fire_triggers, long tcount)
{
int res;
--- 444,450 ----
int
SPI_execute_snapshot(SPIPlanPtr plan,
Datum *Values, const char *Nulls,
! Snapshot snapshot,
bool read_only, bool fire_triggers, long tcount)
{
int res;
*************** SPI_execute_snapshot(SPIPlanPtr plan,
*** 463,469 ****
_SPI_convert_params(plan->nargs, plan->argtypes,
Values, Nulls,
0),
! snapshot, crosscheck_snapshot,
read_only, fire_triggers, tcount);
_SPI_end_call(true);
--- 463,469 ----
_SPI_convert_params(plan->nargs, plan->argtypes,
Values, Nulls,
0),
! snapshot,
read_only, fire_triggers, tcount);
_SPI_end_call(true);
*************** SPI_execute_with_args(const char *src,
*** 516,522 ****
/* We don't need to copy the plan since it will be thrown away anyway */
res = _SPI_execute_plan(&plan, paramLI,
! InvalidSnapshot, InvalidSnapshot,
read_only, true, tcount);
_SPI_end_call(true);
--- 516,522 ----
/* We don't need to copy the plan since it will be thrown away anyway */
res = _SPI_execute_plan(&plan, paramLI,
! InvalidSnapshot,
read_only, true, tcount);
_SPI_end_call(true);
*************** _SPI_prepare_plan(const char *src, SPIPl
*** 1752,1758 ****
*
* snapshot: query snapshot to use, or InvalidSnapshot for the normal
* behavior of taking a new snapshot for each query.
- * crosscheck_snapshot: for RI use, all others pass InvalidSnapshot
* read_only: TRUE for read-only execution (no CommandCounterIncrement)
* fire_triggers: TRUE to fire AFTER triggers at end of query (normal case);
* FALSE means any AFTER triggers are postponed to end of outer query
--- 1752,1757 ----
*************** _SPI_prepare_plan(const char *src, SPIPl
*** 1760,1766 ****
*/
static int
_SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI,
! Snapshot snapshot, Snapshot crosscheck_snapshot,
bool read_only, bool fire_triggers, long tcount)
{
int my_res = 0;
--- 1759,1765 ----
*/
static int
_SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI,
! Snapshot snapshot,
bool read_only, bool fire_triggers, long tcount)
{
int my_res = 0;
*************** _SPI_execute_plan(SPIPlanPtr plan, Param
*** 1903,1909 ****
qdesc = CreateQueryDesc((PlannedStmt *) stmt,
plansource->query_string,
! snap, crosscheck_snapshot,
dest,
paramLI, 0);
res = _SPI_pquery(qdesc, fire_triggers,
--- 1902,1908 ----
qdesc = CreateQueryDesc((PlannedStmt *) stmt,
plansource->query_string,
! snap,
dest,
paramLI, 0);
res = _SPI_pquery(qdesc, fire_triggers,
diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c
index 8eb02da..fdf206f 100644
*** a/src/backend/tcop/pquery.c
--- b/src/backend/tcop/pquery.c
*************** QueryDesc *
*** 64,70 ****
CreateQueryDesc(PlannedStmt *plannedstmt,
const char *sourceText,
Snapshot snapshot,
- Snapshot crosscheck_snapshot,
DestReceiver *dest,
ParamListInfo params,
int instrument_options)
--- 64,69 ----
*************** CreateQueryDesc(PlannedStmt *plannedstmt
*** 76,83 ****
qd->utilitystmt = plannedstmt->utilityStmt; /* in case DECLARE CURSOR */
qd->sourceText = sourceText; /* query text */
qd->snapshot = RegisterSnapshot(snapshot); /* snapshot */
- /* RI check snapshot */
- qd->crosscheck_snapshot = RegisterSnapshot(crosscheck_snapshot);
qd->dest = dest; /* output dest */
qd->params = params; /* parameter values passed into query */
qd->instrument_options = instrument_options; /* instrumentation
--- 75,80 ----
*************** CreateUtilityQueryDesc(Node *utilitystmt
*** 109,115 ****
qd->utilitystmt = utilitystmt; /* utility command */
qd->sourceText = sourceText; /* query text */
qd->snapshot = RegisterSnapshot(snapshot); /* snapshot */
- qd->crosscheck_snapshot = InvalidSnapshot; /* RI check snapshot */
qd->dest = dest; /* output dest */
qd->params = params; /* parameter values passed into query */
qd->instrument_options = false; /* uninteresting for utilities */
--- 106,111 ----
*************** FreeQueryDesc(QueryDesc *qdesc)
*** 134,140 ****
/* forget our snapshots */
UnregisterSnapshot(qdesc->snapshot);
- UnregisterSnapshot(qdesc->crosscheck_snapshot);
/* Only the QueryDesc itself need be freed */
pfree(qdesc);
--- 130,135 ----
*************** ProcessQuery(PlannedStmt *plan,
*** 178,184 ****
* Create the QueryDesc object
*/
queryDesc = CreateQueryDesc(plan, sourceText,
! GetActiveSnapshot(), InvalidSnapshot,
dest, params, 0);
/*
--- 173,179 ----
* Create the QueryDesc object
*/
queryDesc = CreateQueryDesc(plan, sourceText,
! GetActiveSnapshot(),
dest, params, 0);
/*
*************** PortalStart(Portal portal, ParamListInfo
*** 514,520 ****
queryDesc = CreateQueryDesc((PlannedStmt *) linitial(portal->stmts),
portal->sourceText,
GetActiveSnapshot(),
- InvalidSnapshot,
None_Receiver,
params,
0);
--- 509,514 ----
diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c
index 33a8935..ff51281 100644
*** a/src/backend/utils/adt/ri_triggers.c
--- b/src/backend/utils/adt/ri_triggers.c
*************** static SPIPlanPtr ri_PlanCheck(const cha
*** 230,236 ****
static bool ri_PerformCheck(RI_QueryKey *qkey, SPIPlanPtr qplan,
Relation fk_rel, Relation pk_rel,
HeapTuple old_tuple, HeapTuple new_tuple,
- bool detectNewRows,
int expect_OK, const char *constrname);
static void ri_ExtractValues(RI_QueryKey *qkey, int key_idx,
Relation rel, HeapTuple tuple,
--- 230,235 ----
*************** RI_FKey_check(PG_FUNCTION_ARGS)
*** 357,363 ****
ri_PerformCheck(&qkey, qplan,
fk_rel, pk_rel,
NULL, NULL,
- false,
SPI_OK_SELECT,
NameStr(riinfo.conname));
--- 356,361 ----
*************** RI_FKey_check(PG_FUNCTION_ARGS)
*** 500,506 ****
ri_PerformCheck(&qkey, qplan,
fk_rel, pk_rel,
NULL, new_row,
- false,
SPI_OK_SELECT,
NameStr(riinfo.conname));
--- 498,503 ----
*************** ri_Check_Pk_Match(Relation pk_rel, Relat
*** 661,667 ****
result = ri_PerformCheck(&qkey, qplan,
fk_rel, pk_rel,
old_row, NULL,
- true, /* treat like update */
SPI_OK_SELECT, NULL);
if (SPI_finish() != SPI_OK_FINISH)
--- 658,663 ----
*************** RI_FKey_noaction_del(PG_FUNCTION_ARGS)
*** 818,824 ****
ri_PerformCheck(&qkey, qplan,
fk_rel, pk_rel,
old_row, NULL,
- true, /* must detect new rows */
SPI_OK_SELECT,
NameStr(riinfo.conname));
--- 814,819 ----
*************** RI_FKey_noaction_upd(PG_FUNCTION_ARGS)
*** 1006,1012 ****
ri_PerformCheck(&qkey, qplan,
fk_rel, pk_rel,
old_row, NULL,
- true, /* must detect new rows */
SPI_OK_SELECT,
NameStr(riinfo.conname));
--- 1001,1006 ----
*************** RI_FKey_cascade_del(PG_FUNCTION_ARGS)
*** 1168,1174 ****
ri_PerformCheck(&qkey, qplan,
fk_rel, pk_rel,
old_row, NULL,
- true, /* must detect new rows */
SPI_OK_DELETE,
NameStr(riinfo.conname));
--- 1162,1167 ----
*************** RI_FKey_cascade_upd(PG_FUNCTION_ARGS)
*** 1356,1362 ****
ri_PerformCheck(&qkey, qplan,
fk_rel, pk_rel,
old_row, new_row,
- true, /* must detect new rows */
SPI_OK_UPDATE,
NameStr(riinfo.conname));
--- 1349,1354 ----
*************** RI_FKey_restrict_del(PG_FUNCTION_ARGS)
*** 1527,1533 ****
ri_PerformCheck(&qkey, qplan,
fk_rel, pk_rel,
old_row, NULL,
- true, /* must detect new rows */
SPI_OK_SELECT,
NameStr(riinfo.conname));
--- 1519,1524 ----
*************** RI_FKey_restrict_upd(PG_FUNCTION_ARGS)
*** 1710,1716 ****
ri_PerformCheck(&qkey, qplan,
fk_rel, pk_rel,
old_row, NULL,
- true, /* must detect new rows */
SPI_OK_SELECT,
NameStr(riinfo.conname));
--- 1701,1706 ----
*************** RI_FKey_setnull_del(PG_FUNCTION_ARGS)
*** 1881,1887 ****
ri_PerformCheck(&qkey, qplan,
fk_rel, pk_rel,
old_row, NULL,
- true, /* must detect new rows */
SPI_OK_UPDATE,
NameStr(riinfo.conname));
--- 1871,1876 ----
*************** RI_FKey_setnull_upd(PG_FUNCTION_ARGS)
*** 2097,2103 ****
ri_PerformCheck(&qkey, qplan,
fk_rel, pk_rel,
old_row, NULL,
- true, /* must detect new rows */
SPI_OK_UPDATE,
NameStr(riinfo.conname));
--- 2086,2091 ----
*************** RI_FKey_setdefault_del(PG_FUNCTION_ARGS)
*** 2269,2275 ****
ri_PerformCheck(&qkey, qplan,
fk_rel, pk_rel,
old_row, NULL,
- true, /* must detect new rows */
SPI_OK_UPDATE,
NameStr(riinfo.conname));
--- 2257,2262 ----
*************** RI_FKey_setdefault_upd(PG_FUNCTION_ARGS)
*** 2472,2478 ****
ri_PerformCheck(&qkey, qplan,
fk_rel, pk_rel,
old_row, NULL,
- true, /* must detect new rows */
SPI_OK_UPDATE,
NameStr(riinfo.conname));
--- 2459,2464 ----
*************** RI_Initial_Check(Trigger *trigger, Relat
*** 2792,2798 ****
spi_result = SPI_execute_snapshot(qplan,
NULL, NULL,
GetLatestSnapshot(),
- InvalidSnapshot,
true, false, 1);
/* Check result */
--- 2778,2783 ----
*************** static bool
*** 3271,3284 ****
ri_PerformCheck(RI_QueryKey *qkey, SPIPlanPtr qplan,
Relation fk_rel, Relation pk_rel,
HeapTuple old_tuple, HeapTuple new_tuple,
- bool detectNewRows,
int expect_OK, const char *constrname)
{
Relation query_rel,
source_rel;
int key_idx;
- Snapshot test_snapshot;
- Snapshot crosscheck_snapshot;
int limit;
int spi_result;
Oid save_userid;
--- 3256,3266 ----
*************** ri_PerformCheck(RI_QueryKey *qkey, SPIPl
*** 3330,3359 ****
}
/*
- * In READ COMMITTED mode, we just need to use an up-to-date regular
- * snapshot, and we will see all rows that could be interesting. But in
- * transaction-snapshot mode, we can't change the transaction snapshot.
- * If the caller passes detectNewRows == false then it's okay to do the query
- * with the transaction snapshot; otherwise we use a current snapshot, and
- * tell the executor to error out if it finds any rows under the current
- * snapshot that wouldn't be visible per the transaction snapshot. Note
- * that SPI_execute_snapshot will register the snapshots, so we don't need
- * to bother here.
- */
- if (IsolationUsesXactSnapshot() && detectNewRows)
- {
- CommandCounterIncrement(); /* be sure all my own work is visible */
- test_snapshot = GetLatestSnapshot();
- crosscheck_snapshot = GetTransactionSnapshot();
- }
- else
- {
- /* the default SPI behavior is okay */
- test_snapshot = InvalidSnapshot;
- crosscheck_snapshot = InvalidSnapshot;
- }
-
- /*
* If this is a select query (e.g., for a 'no action' or 'restrict'
* trigger), we only need to see if there is a single row in the table,
* matching the key. Otherwise, limit = 0 - because we want the query to
--- 3312,3317 ----
*************** ri_PerformCheck(RI_QueryKey *qkey, SPIPl
*** 3369,3375 ****
/* Finally we can run the query. */
spi_result = SPI_execute_snapshot(qplan,
vals, nulls,
! test_snapshot, crosscheck_snapshot,
false, false, limit);
/* Restore UID and security context */
--- 3327,3333 ----
/* Finally we can run the query. */
spi_result = SPI_execute_snapshot(qplan,
vals, nulls,
! InvalidSnapshot,
false, false, limit);
/* Restore UID and security context */
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 273d8bd..cd277ab 100644
*** a/src/backend/utils/time/snapmgr.c
--- b/src/backend/utils/time/snapmgr.c
***************
*** 27,32 ****
--- 27,33 ----
#include "access/transam.h"
#include "access/xact.h"
+ #include "access/multixact.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "utils/memutils.h"
*************** GetTransactionSnapshot(void)
*** 126,131 ****
--- 127,142 ----
{
Assert(RegisteredSnapshots == 0);
+ /*
+ * We must store the oldest visible multi xact *before* taking the
+ * serializable snapshot. Otherwise HeapSatisfiesLockersVisible in
+ * heapam.c will be in trouble, since it depends on being able to
+ * inspect all multi xact ids which might contain xids invisible to
+ * the serializable snapshot.
+ */
+ if (IsolationUsesXactSnapshot())
+ MultiXactIdSetOldestVisible();
+
CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
FirstSnapshotSet = true;
diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c
index 719dc13..f49a112 100644
*** a/src/backend/utils/time/tqual.c
--- b/src/backend/utils/time/tqual.c
*************** HeapTupleSatisfiesVacuum(HeapTupleHeader
*** 1243,1248 ****
--- 1243,1336 ----
return HEAPTUPLE_DEAD;
}
+ /*
+ * Returns false if one of the tuple's lockers committed but
+ * isn't visible according to lockcheck_snapshot, excluding subtransactions
+ * of the current transaction.
+ * Assumes that all locking transaction either committed or aborted,
+ * but aren't still in progress.
+ */
+ bool
+ HeapSatisfiesLockersVisible(HeapTupleHeader tuple, Snapshot lockcheck_snapshot)
+ {
+ if (lockcheck_snapshot == InvalidSnapshot)
+ return true;
+
+ if (tuple->t_infomask & HEAP_IS_LOCKED)
+ {
+ /*
+ * If the tuple was locked, we now check whether the locking
+ * transaction(s) are visible under lockcheck_snapshot. If
+ * they aren't, we pretend that the tuple was updated.
+ */
+ if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
+ {
+ TransactionId* xids;
+ int xids_l = GetMultiXactIdMembers(HeapTupleHeaderGetXmax(tuple), &xids);
+
+ if (xids_l < 1)
+ {
+ /*
+ * The multi xact either is too old to be inspected or
+ * doesn't contain members. The second case is probably
+ * impossible, but even if not it doesn't pose any problem.
+ * In the first case, we have to trust that all xids that
+ * were contained in the xact are in fact visible under
+ * lockcheck_snapshot. Currently this is always the case,
+ * since lockcheck_snapshot is always the transaction's
+ * serializable snapshot, and we call
+ * MultiXactIdSetOldestVisible() before acquiring that
+ * snapshot.
+ */
+ return true;
+ }
+ else
+ {
+ int i;
+ for (i = 0; i < xids_l; ++i)
+ {
+ /* Ignore our own subtransactions */
+ if (TransactionIdIsCurrentTransactionId(xids[i]))
+ continue;
+
+ /*
+ * We expect to be called after the locking transactions'
+ * fates have been decided
+ */
+ Assert(!TransactionIdIsInProgress(xids[i]));
+
+ if (!TransactionIdDidAbort(xids[i]) &&
+ XidInMVCCSnapshot(xids[i], lockcheck_snapshot))
+ {
+ /* Non-aborted, invisible locker */
+ return false;
+ }
+ }
+ return true;
+ }
+ }
+ else
+ {
+ TransactionId xid = HeapTupleHeaderGetXmax(tuple);
+
+ /* Ignore our own subtransactions */
+ if (TransactionIdIsCurrentTransactionId(xid))
+ return true;
+
+ /* We expect to be called after the locking transactions' fates have been decided */
+ Assert(!TransactionIdIsInProgress(xid));
+
+ /* Locker must either be visible or have aborted */
+ return TransactionIdDidAbort(xid) ||
+ !XidInMVCCSnapshot(xid, lockcheck_snapshot);
+ }
+ }
+ else
+ {
+ /* Tuple wasn't locked */
+ return true;
+ }
+ }
/*
* XidInMVCCSnapshot
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 45e100c..2075759 100644
*** a/src/include/access/heapam.h
--- b/src/include/access/heapam.h
*************** extern Oid heap_insert(Relation relation
*** 98,112 ****
int options, BulkInsertState bistate);
extern HTSU_Result heap_delete(Relation relation, ItemPointer tid,
ItemPointer ctid, TransactionId *update_xmax,
! CommandId cid, Snapshot crosscheck, bool wait);
extern HTSU_Result heap_update(Relation relation, ItemPointer otid,
HeapTuple newtup,
ItemPointer ctid, TransactionId *update_xmax,
! CommandId cid, Snapshot crosscheck, bool wait);
extern HTSU_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
Buffer *buffer, ItemPointer ctid,
TransactionId *update_xmax, CommandId cid,
! LockTupleMode mode, bool nowait);
extern void heap_inplace_update(Relation relation, HeapTuple tuple);
extern bool heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
Buffer buf);
--- 98,113 ----
int options, BulkInsertState bistate);
extern HTSU_Result heap_delete(Relation relation, ItemPointer tid,
ItemPointer ctid, TransactionId *update_xmax,
! CommandId cid, bool wait, Snapshot lockcheck_snapshot);
extern HTSU_Result heap_update(Relation relation, ItemPointer otid,
HeapTuple newtup,
ItemPointer ctid, TransactionId *update_xmax,
! CommandId cid, bool wait, Snapshot lockcheck_snapshot);
extern HTSU_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
Buffer *buffer, ItemPointer ctid,
TransactionId *update_xmax, CommandId cid,
! LockTupleMode mode, bool nowait,
! Snapshot lockcheck_snapshot);
extern void heap_inplace_update(Relation relation, HeapTuple tuple);
extern bool heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
Buffer buf);
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index 565848e..93ad4cb 100644
*** a/src/include/access/multixact.h
--- b/src/include/access/multixact.h
*************** extern bool MultiXactIdIsCurrent(MultiXa
*** 49,54 ****
--- 49,55 ----
extern void MultiXactIdWait(MultiXactId multi);
extern bool ConditionalMultiXactIdWait(MultiXactId multi);
extern void MultiXactIdSetOldestMember(void);
+ extern void MultiXactIdSetOldestVisible(void);
extern int GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids);
extern void AtEOXact_MultiXact(void);
diff --git a/src/include/executor/execdesc.h b/src/include/executor/execdesc.h
index 4fb57d2..6881eb2 100644
*** a/src/include/executor/execdesc.h
--- b/src/include/executor/execdesc.h
*************** typedef struct QueryDesc
*** 39,45 ****
Node *utilitystmt; /* utility statement, or null */
const char *sourceText; /* source text of the query */
Snapshot snapshot; /* snapshot to use for query */
- Snapshot crosscheck_snapshot; /* crosscheck for RI update/delete */
DestReceiver *dest; /* the destination for tuple output */
ParamListInfo params; /* param values being passed in */
int instrument_options; /* OR of InstrumentOption flags */
--- 39,44 ----
*************** typedef struct QueryDesc
*** 57,63 ****
extern QueryDesc *CreateQueryDesc(PlannedStmt *plannedstmt,
const char *sourceText,
Snapshot snapshot,
- Snapshot crosscheck_snapshot,
DestReceiver *dest,
ParamListInfo params,
int instrument_options);
--- 56,61 ----
diff --git a/src/include/executor/spi.h b/src/include/executor/spi.h
index 96e29b9..b8536bd 100644
*** a/src/include/executor/spi.h
--- b/src/include/executor/spi.h
*************** extern int SPI_execp(SPIPlanPtr plan, Da
*** 82,88 ****
extern int SPI_execute_snapshot(SPIPlanPtr plan,
Datum *Values, const char *Nulls,
Snapshot snapshot,
- Snapshot crosscheck_snapshot,
bool read_only, bool fire_triggers, long tcount);
extern int SPI_execute_with_args(const char *src,
int nargs, Oid *argtypes,
--- 82,87 ----
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index d669c24..83b1f02 100644
*** a/src/include/nodes/execnodes.h
--- b/src/include/nodes/execnodes.h
*************** typedef struct EState
*** 337,343 ****
/* Basic state for all query types: */
ScanDirection es_direction; /* current scan direction */
Snapshot es_snapshot; /* time qual to use */
- Snapshot es_crosscheck_snapshot; /* crosscheck time qual for RI */
List *es_range_table; /* List of RangeTblEntry */
PlannedStmt *es_plannedstmt; /* link to top of plan tree */
--- 337,342 ----
diff --git a/src/include/utils/tqual.h b/src/include/utils/tqual.h
index df14f59..d18dbe6 100644
*** a/src/include/utils/tqual.h
--- b/src/include/utils/tqual.h
*************** extern HTSV_Result HeapTupleSatisfiesVac
*** 86,90 ****
--- 86,92 ----
extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
uint16 infomask, TransactionId xid);
+ extern bool HeapSatisfiesLockersVisible(HeapTupleHeader tuple,
+ Snapshot snapshot);
#endif /* TQUAL_H */