From c0ac767e5befc6b7e6a8d606307843f8d68d5d67 Mon Sep 17 00:00:00 2001 From: Amul Sul Date: Fri, 12 Jan 2018 11:19:37 +0530 Subject: [PATCH 1/2] Invalidate ip_blkid v4 v4: Rebased on "UPDATE of partition key v35" patch[5]. v3: Update w.r.t Amit Kapila's[3] & Alvaro Herrera[4] comments - typo in all error message and comment : "to an another" -> "to another" - error message change : "tuple to be updated" -> "tuple to be locked" - In ExecOnConflictUpdate(), error report converted into assert & comments added. v2: Updated w.r.t Robert review comments[2] - Updated couple of comment of heap_delete argument and ItemPointerData - Added same concurrent update error logic in ExecOnConflictUpdate, RelationFindReplTupleByIndex and RelationFindReplTupleSeq v1: Initial version -- as per Amit Kapila's suggestions[1] - When tuple is being moved to another partition then ip_blkid in the tuple header mark to InvalidBlockNumber. ------------- References: ------------- 1] https://postgr.es/m/CAA4eK1KEZQ%2BCyXbBzfn1jFHoEfa_OemDLhLyy7xfD1QUZLo1DQ%40mail.gmail.com 2] https://postgr.es/m/CA%2BTgmoYY98AEjh7RDtuzaLC--_0smCozXRu6bFmZTaX5Ne%3DB5Q%40mail.gmail.com 3] https://postgr.es/m/CAA4eK1LQS6TmsGaEwR9HgF-9TZTHxrdAELuX6wOZBDbbjOfDjQ@mail.gmail.com 4] https://postgr.es/m/20171124160756.eyljpmpfzwd6jmnr@alvherre.pgsql 5] https://postgr.es/m/CAJ3gD9dixkkMzNnnP1CaZ1H17-U17ok_sVbjZZo+wnB=rJH6yg@mail.gmail.com --- src/backend/access/heap/heapam.c | 13 +++++++++++-- src/backend/commands/trigger.c | 5 +++++ src/backend/executor/execMain.c | 4 ++++ src/backend/executor/execReplication.c | 8 ++++++++ src/backend/executor/nodeLockRows.c | 5 +++++ src/backend/executor/nodeModifyTable.c | 28 ++++++++++++++++++++++++---- src/include/access/heapam.h | 2 +- src/include/storage/itemptr.h | 4 +++- 8 files changed, 61 insertions(+), 8 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index dbc8f2d6c7..afd5d79497 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -3014,6 +3014,8 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask) * crosscheck - if not InvalidSnapshot, also check tuple against this * wait - true if should wait for any conflicting update to commit/abort * hufd - output parameter, filled in failure cases (see below) + * row_moved - true iff the tuple is being moved to another partition + * table due to an update of partition key. Otherwise, false. * * Normal, successful return value is HeapTupleMayBeUpdated, which * actually means we did delete it. Failure return codes are @@ -3029,7 +3031,7 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask) HTSU_Result heap_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, - HeapUpdateFailureData *hufd) + HeapUpdateFailureData *hufd, bool row_moved) { HTSU_Result result; TransactionId xid = GetCurrentTransactionId(); @@ -3297,6 +3299,13 @@ l1: /* Make sure there is no forward chain link in t_ctid */ tp.t_data->t_ctid = tp.t_self; + /* + * Sets a block identifier to the InvalidBlockNumber to indicate such an + * update being moved tuple to another partition. + */ + if (row_moved) + BlockIdSet(&((tp.t_data->t_ctid).ip_blkid), InvalidBlockNumber); + MarkBufferDirty(buffer); /* @@ -3422,7 +3431,7 @@ simple_heap_delete(Relation relation, ItemPointer tid) result = heap_delete(relation, tid, GetCurrentCommandId(true), InvalidSnapshot, true /* wait for commit */ , - &hufd); + &hufd, false); switch (result) { case HeapTupleSelfUpdated: diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index e8af18e254..f943666c40 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -3071,6 +3071,11 @@ ltrmark:; ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); + if (!BlockNumberIsValid(BlockIdGetBlockNumber(&((hufd.ctid).ip_blkid)))) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("tuple to be locked was already moved to another partition due to concurrent update"))); + if (!ItemPointerEquals(&hufd.ctid, &tuple.t_self)) { /* it was updated, so look at the updated version */ diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 16822e962a..4115604011 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -2709,6 +2709,10 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode, ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); + if (!BlockNumberIsValid(BlockIdGetBlockNumber(&((hufd.ctid).ip_blkid)))) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("tuple to be locked was already moved to another partition due to concurrent update"))); /* Should not encounter speculative tuple on recheck */ Assert(!HeapTupleHeaderIsSpeculative(tuple.t_data)); diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 32891abbdf..9016d8fb11 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -194,6 +194,10 @@ retry: ereport(LOG, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("concurrent update, retrying"))); + if (!BlockNumberIsValid(BlockIdGetBlockNumber(&((hufd.ctid).ip_blkid)))) + ereport(LOG, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("tuple to be locked was already moved to another partition due to concurrent update, retrying"))); goto retry; case HeapTupleInvisible: elog(ERROR, "attempted to lock invisible tuple"); @@ -352,6 +356,10 @@ retry: ereport(LOG, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("concurrent update, retrying"))); + if (!BlockNumberIsValid(BlockIdGetBlockNumber(&((hufd.ctid).ip_blkid)))) + ereport(LOG, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("tuple to be locked was already moved to another partition due to concurrent update, retrying"))); goto retry; case HeapTupleInvisible: elog(ERROR, "attempted to lock invisible tuple"); diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c index 7961b4be6a..b07b7092de 100644 --- a/src/backend/executor/nodeLockRows.c +++ b/src/backend/executor/nodeLockRows.c @@ -218,6 +218,11 @@ lnext: ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); + if (!BlockNumberIsValid(BlockIdGetBlockNumber(&((hufd.ctid).ip_blkid)))) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("tuple to be locked was already moved to another partition due to concurrent update"))); + if (ItemPointerEquals(&hufd.ctid, &tuple.t_self)) { /* Tuple was deleted, so don't return it */ diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index e9c0b23172..7f3cbaa00e 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -708,7 +708,8 @@ ExecDelete(ModifyTableState *mtstate, EState *estate, bool *tupleDeleted, bool processReturning, - bool canSetTag) + bool canSetTag, + bool row_moved) { ResultRelInfo *resultRelInfo; Relation resultRelationDesc; @@ -799,7 +800,8 @@ ldelete:; estate->es_output_cid, estate->es_crosscheck_snapshot, true /* wait for commit */ , - &hufd); + &hufd, + row_moved); switch (result) { case HeapTupleSelfUpdated: @@ -845,6 +847,11 @@ ldelete:; ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); + if (!BlockNumberIsValid(BlockIdGetBlockNumber(&((hufd.ctid).ip_blkid)))) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("tuple to be locked was already moved to another partition due to concurrent update"))); + if (!ItemPointerEquals(tupleid, &hufd.ctid)) { TupleTableSlot *epqslot; @@ -1149,7 +1156,7 @@ lreplace:; * from INSERT. */ ExecDelete(mtstate, tupleid, oldtuple, planSlot, epqstate, estate, - &tuple_deleted, false, false); + &tuple_deleted, false, false, true); /* * For some reason if DELETE didn't happen (e.g. trigger prevented @@ -1292,6 +1299,11 @@ lreplace:; ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); + if (!BlockNumberIsValid(BlockIdGetBlockNumber(&((hufd.ctid).ip_blkid)))) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("tuple to be locked was already moved to another partition due to concurrent update"))); + if (!ItemPointerEquals(tupleid, &hufd.ctid)) { TupleTableSlot *epqslot; @@ -1462,6 +1474,14 @@ ExecOnConflictUpdate(ModifyTableState *mtstate, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); + /* + * As long as we don't support an UPDATE of INSERT ON CONFLICT for + * a partitioned table we shouldn't reach to a case where tuple to + * be lock is moved to another partition due to concurrent update + * of partition key. + */ + Assert(BlockNumberIsValid(BlockIdGetBlockNumber(&((hufd.ctid).ip_blkid)))); + /* * Tell caller to try again from the very start. * @@ -2055,7 +2075,7 @@ ExecModifyTable(PlanState *pstate) case CMD_DELETE: slot = ExecDelete(node, tupleid, oldtuple, planSlot, &node->mt_epqstate, estate, - NULL, true, node->canSetTag); + NULL, true, node->canSetTag, false); break; default: elog(ERROR, "unknown operation"); diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 4c0256b18a..44a211a740 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -156,7 +156,7 @@ extern void heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, CommandId cid, int options, BulkInsertState bistate); extern HTSU_Result heap_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, - HeapUpdateFailureData *hufd); + HeapUpdateFailureData *hufd, bool row_moved); extern void heap_finish_speculative(Relation relation, HeapTuple tuple); extern void heap_abort_speculative(Relation relation, HeapTuple tuple); extern HTSU_Result heap_update(Relation relation, ItemPointer otid, diff --git a/src/include/storage/itemptr.h b/src/include/storage/itemptr.h index 6c9ed3696b..79dceb414f 100644 --- a/src/include/storage/itemptr.h +++ b/src/include/storage/itemptr.h @@ -23,7 +23,9 @@ * This is a pointer to an item within a disk page of a known file * (for example, a cross-link from an index to its parent table). * blkid tells us which block, posid tells us which entry in the linp - * (ItemIdData) array we want. + * (ItemIdData) array we want. blkid is marked InvalidBlockNumber when + * a tuple is moved to another partition relation due to an update of + * partition key. * * Note: because there is an item pointer in each tuple header and index * tuple header on disk, it's very important not to waste space with -- 2.14.1