From 86f1699bc8ea2fafac7b26cf7f9c369a1f016326 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 26 Jun 2023 04:26:30 +0300 Subject: [PATCH 12/12] Introduce RowID -- bytea tuple identifier Currently, there are two ways to reference the tuple: tuple identifier (tid) and whole row copy. The tuple identifier used for regular tables consists of 32-bit block number and 16-bit offset. This seems limited for some use-cases, in particular index-organized tables. The whole row copy used to identify tuples in FDW. That could be extended to regular tables, but that seems overkill. This commit introduces RowID -- new bytea tuple identifier. Table AM can choose the way tuple is identified by providing new get_row_ref_type() API function. New system attribute RowIdAttributeNumber holds RowID when appropriate. Table AM methods now accepts Datum arguments as tuple identifiers. Those Datum could be either tid or bytea depending on what table_get_row_ref_type() says. ModifyTable node and triggers are aware of RowID. IndexScan and BitmapScan nodes are not aware of RowIDs and expect tids. Table AMs which use RowIDs are supposed to redefine those nodes using hooks. --- contrib/amcheck/verify_nbtree.c | 3 +- src/backend/access/common/heaptuple.c | 4 + src/backend/access/heap/heapam_handler.c | 33 ++- src/backend/access/table/tableam.c | 4 +- src/backend/catalog/aclchk.c | 2 +- src/backend/commands/trigger.c | 251 ++++++++++++++++++----- src/backend/executor/execExprInterp.c | 4 +- src/backend/executor/execMain.c | 11 +- src/backend/executor/execReplication.c | 12 +- src/backend/executor/nodeLockRows.c | 17 +- src/backend/executor/nodeModifyTable.c | 141 +++++++++---- src/backend/executor/nodeTidscan.c | 2 +- src/backend/optimizer/prep/preptlist.c | 16 ++ src/backend/optimizer/util/appendinfo.c | 33 ++- src/backend/optimizer/util/inherit.c | 20 +- src/backend/parser/parse_relation.c | 7 +- src/backend/rewrite/rewriteHandler.c | 1 + src/backend/utils/sort/tuplestore.c | 30 +++ src/include/access/sysattr.h | 3 +- src/include/access/tableam.h | 56 +++-- src/include/commands/trigger.h | 4 +- src/include/nodes/primnodes.h | 1 + src/include/utils/tuplestore.h | 3 + 23 files changed, 508 insertions(+), 150 deletions(-) diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index bcff849aa90..caaee8424ea 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -982,7 +982,8 @@ heap_entry_is_visible(BtreeCheckState *state, ItemPointer tid) TupleTableSlot *slot = table_slot_create(state->heaprel, NULL); tid_visible = table_tuple_fetch_row_version(state->heaprel, - tid, state->snapshot, slot); + PointerGetDatum(tid), + state->snapshot, slot); if (slot != NULL) ExecDropSingleTupleTableSlot(slot); diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index c52d40dce0f..7bfde232759 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -756,6 +756,10 @@ heap_getsysattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull) case TableOidAttributeNumber: result = ObjectIdGetDatum(tup->t_tableOid); break; + case RowIdAttributeNumber: + *isnull = true; + result = 0; + break; default: elog(ERROR, "invalid attnum: %d", attnum); result = 0; /* keep compiler quiet */ diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index de38174f83d..504ef922f18 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -47,7 +47,7 @@ #include "utils/rel.h" #include "utils/sampling.h" -static TM_Result heapam_tuple_lock(Relation relation, ItemPointer tid, +static TM_Result heapam_tuple_lock(Relation relation, Datum tupleid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, @@ -199,7 +199,7 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan, static bool heapam_fetch_row_version(Relation relation, - ItemPointer tid, + Datum tupleid, Snapshot snapshot, TupleTableSlot *slot) { @@ -208,7 +208,7 @@ heapam_fetch_row_version(Relation relation, Assert(TTS_IS_BUFFERTUPLE(slot)); - bslot->base.tupdata.t_self = *tid; + bslot->base.tupdata.t_self = *DatumGetItemPointer(tupleid); if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer, false)) { /* store in slot, transferring existing pin */ @@ -373,7 +373,7 @@ ExecCheckTIDVisible(EState *estate, if (!IsolationUsesXactSnapshot()) return; - if (!table_tuple_fetch_row_version(rel, tid, + if (!table_tuple_fetch_row_version(rel, PointerGetDatum(tid), SnapshotAny, tempSlot)) elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT"); ExecCheckTupleVisible(estate, rel, tempSlot); @@ -420,7 +420,7 @@ heapam_tuple_insert_with_arbiter(ResultRelInfo *resultRelInfo, * here means our previous conclusion that the tuple is * conclusively committed is not true anymore. */ - test = table_tuple_lock(rel, &conflictTid, + test = table_tuple_lock(rel, PointerGetDatum(&conflictTid), estate->es_snapshot, lockedSlot, estate->es_output_cid, lockmode, LockWaitBlock, 0, @@ -600,12 +600,13 @@ heapam_tuple_insert_with_arbiter(ResultRelInfo *resultRelInfo, } static TM_Result -heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, +heapam_tuple_delete(Relation relation, Datum tupleid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, int options, TM_FailureData *tmfd, bool changingPart, TupleTableSlot *oldSlot) { TM_Result result; + ItemPointer tid = DatumGetItemPointer(tupleid); /* * Currently Deleting of index tuples are handled at vacuum, in case if @@ -628,7 +629,7 @@ heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, * heapam_tuple_lock() will take advantage of tuple loaded into * oldSlot by heap_delete(). */ - result = heapam_tuple_lock(relation, tid, snapshot, + result = heapam_tuple_lock(relation, tupleid, snapshot, oldSlot, cid, LockTupleExclusive, (options & TABLE_MODIFY_WAIT) ? LockWaitBlock : @@ -645,7 +646,7 @@ heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, static TM_Result -heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, +heapam_tuple_update(Relation relation, Datum tupleid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, int options, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes, @@ -653,6 +654,7 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, { bool shouldFree = true; HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); + ItemPointer otid = DatumGetItemPointer(tupleid); TM_Result result; /* Update the tuple with table oid */ @@ -700,7 +702,7 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, * heapam_tuple_lock() will take advantage of tuple loaded into * oldSlot by heap_update(). */ - result = heapam_tuple_lock(relation, otid, snapshot, + result = heapam_tuple_lock(relation, tupleid, snapshot, oldSlot, cid, *lockmode, (options & TABLE_MODIFY_WAIT) ? LockWaitBlock : @@ -716,7 +718,7 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, } static TM_Result -heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, +heapam_tuple_lock(Relation relation, Datum tupleid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd) @@ -724,6 +726,7 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot; TM_Result result; HeapTuple tuple = &bslot->base.tupdata; + ItemPointer tid = DatumGetItemPointer(tupleid); bool follow_updates; follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0; @@ -2660,6 +2663,15 @@ heapam_scan_get_blocks_done(HeapScanDesc hscan) * ------------------------------------------------------------------------ */ +/* + * All heap tables use TID row identifier. + */ +static RowRefType +heapam_get_row_ref_type(Relation rel) +{ + return ROW_REF_TID; +} + /* * Check to see whether the table needs a TOAST table. It does only if * (1) there are any toastable attributes, and (2) the maximum length @@ -3240,6 +3252,7 @@ static const TableAmRoutine heapam_methods = { .define_index_validate = NULL, .define_index = NULL, + .get_row_ref_type = heapam_get_row_ref_type, .free_rd_amcache = NULL, .relation_size = table_block_relation_size, .relation_needs_toast_table = heapam_relation_needs_toast_table, diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index eb8af13eabe..2d831a1388d 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -310,7 +310,7 @@ simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot, if (oldSlot) options |= TABLE_MODIFY_FETCH_OLD_TUPLE; - result = table_tuple_delete(rel, tid, + result = table_tuple_delete(rel, PointerGetDatum(tid), GetCurrentCommandId(true), snapshot, InvalidSnapshot, options, @@ -366,7 +366,7 @@ simple_table_tuple_update(Relation rel, ItemPointer otid, if (oldSlot) options |= TABLE_MODIFY_FETCH_OLD_TUPLE; - result = table_tuple_update(rel, otid, slot, + result = table_tuple_update(rel, PointerGetDatum(otid), slot, GetCurrentCommandId(true), snapshot, InvalidSnapshot, options, diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c index 3ce6c09b44d..1759f4c2cee 100644 --- a/src/backend/catalog/aclchk.c +++ b/src/backend/catalog/aclchk.c @@ -1643,7 +1643,7 @@ expand_all_col_privileges(Oid table_oid, Form_pg_class classForm, AttrNumber curr_att; Assert(classForm->relnatts - FirstLowInvalidHeapAttributeNumber < num_col_privileges); - for (curr_att = FirstLowInvalidHeapAttributeNumber + 1; + for (curr_att = FirstLowInvalidHeapAttributeNumber + 2; curr_att <= classForm->relnatts; curr_att++) { diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index c7095751221..7612c203139 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -83,7 +83,7 @@ static void SetTriggerFlags(TriggerDesc *trigdesc, Trigger *trigger); static bool GetTupleForTrigger(EState *estate, EPQState *epqstate, ResultRelInfo *relinfo, - ItemPointer tid, + Datum tupleid, LockTupleMode lockmode, TupleTableSlot *oldslot, TupleTableSlot **epqslot, @@ -2688,7 +2688,7 @@ ExecASDeleteTriggers(EState *estate, ResultRelInfo *relinfo, bool ExecBRDeleteTriggers(EState *estate, EPQState *epqstate, ResultRelInfo *relinfo, - ItemPointer tupleid, + Datum tupleid, HeapTuple fdw_trigtuple, TupleTableSlot **epqslot, TM_Result *tmresult, @@ -2702,7 +2702,7 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate, bool should_free = false; int i; - Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); + Assert(HeapTupleIsValid(fdw_trigtuple) ^ (DatumGetPointer(tupleid) != NULL)); if (fdw_trigtuple == NULL) { TupleTableSlot *epqslot_candidate = NULL; @@ -2930,7 +2930,7 @@ ExecASUpdateTriggers(EState *estate, ResultRelInfo *relinfo, bool ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, ResultRelInfo *relinfo, - ItemPointer tupleid, + Datum tupleid, HeapTuple fdw_trigtuple, TupleTableSlot *newslot, TM_Result *tmresult, @@ -2950,7 +2950,7 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, /* Determine lock mode to use */ lockmode = ExecUpdateLockMode(estate, relinfo); - Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); + Assert(HeapTupleIsValid(fdw_trigtuple) ^ (DatumGetPointer(tupleid) != NULL)); if (fdw_trigtuple == NULL) { TupleTableSlot *epqslot_candidate = NULL; @@ -3249,7 +3249,7 @@ static bool GetTupleForTrigger(EState *estate, EPQState *epqstate, ResultRelInfo *relinfo, - ItemPointer tid, + Datum tupleid, LockTupleMode lockmode, TupleTableSlot *oldslot, TupleTableSlot **epqslot, @@ -3274,7 +3274,9 @@ GetTupleForTrigger(EState *estate, */ if (!IsolationUsesXactSnapshot()) lockflags |= TUPLE_LOCK_FLAG_FIND_LAST_VERSION; - test = table_tuple_lock(relation, tid, estate->es_snapshot, oldslot, + + test = table_tuple_lock(relation, tupleid, + estate->es_snapshot, oldslot, estate->es_output_cid, lockmode, LockWaitBlock, lockflags, @@ -3370,8 +3372,8 @@ GetTupleForTrigger(EState *estate, * We expect the tuple to be present, thus very simple error handling * suffices. */ - if (!table_tuple_fetch_row_version(relation, tid, SnapshotAny, - oldslot)) + if (!table_tuple_fetch_row_version(relation, tupleid, + SnapshotAny, oldslot)) elog(ERROR, "failed to fetch tuple for trigger"); } @@ -3577,18 +3579,24 @@ typedef SetConstraintStateData *SetConstraintState; * cycles. So we need only ensure that ats_firing_id is zero when attaching * a new event to an existing AfterTriggerSharedData record. */ -typedef uint32 TriggerFlags; - -#define AFTER_TRIGGER_OFFSET 0x07FFFFFF /* must be low-order bits */ -#define AFTER_TRIGGER_DONE 0x80000000 -#define AFTER_TRIGGER_IN_PROGRESS 0x40000000 +typedef uint64 TriggerFlags; + +#define AFTER_TRIGGER_SIZE UINT64CONST(0xFFFF000000000) /* must be low-order + * bits */ +#define AFTER_TRIGGER_SIZE_SHIFT (36) +#define AFTER_TRIGGER_OFFSET UINT64CONST(0x000000FFFFFFF) /* must be low-order + * bits */ +#define AFTER_TRIGGER_DONE UINT64CONST(0x0000800000000) +#define AFTER_TRIGGER_IN_PROGRESS UINT64CONST(0x0000400000000) /* bits describing the size and tuple sources of this event */ -#define AFTER_TRIGGER_FDW_REUSE 0x00000000 -#define AFTER_TRIGGER_FDW_FETCH 0x20000000 -#define AFTER_TRIGGER_1CTID 0x10000000 -#define AFTER_TRIGGER_2CTID 0x30000000 -#define AFTER_TRIGGER_CP_UPDATE 0x08000000 -#define AFTER_TRIGGER_TUP_BITS 0x38000000 +#define AFTER_TRIGGER_FDW_REUSE UINT64CONST(0x0000000000000) +#define AFTER_TRIGGER_FDW_FETCH UINT64CONST(0x0000200000000) +#define AFTER_TRIGGER_1CTID UINT64CONST(0x0000100000000) +#define AFTER_TRIGGER_ROWID1 UINT64CONST(0x0000010000000) +#define AFTER_TRIGGER_2CTID UINT64CONST(0x0000300000000) +#define AFTER_TRIGGER_ROWID2 UINT64CONST(0x0000020000000) +#define AFTER_TRIGGER_CP_UPDATE UINT64CONST(0x0000080000000) +#define AFTER_TRIGGER_TUP_BITS UINT64CONST(0x0000380000000) typedef struct AfterTriggerSharedData *AfterTriggerShared; typedef struct AfterTriggerSharedData @@ -3640,6 +3648,9 @@ typedef struct AfterTriggerEventDataZeroCtids } AfterTriggerEventDataZeroCtids; #define SizeofTriggerEvent(evt) \ + (((evt)->ate_flags & AFTER_TRIGGER_SIZE) >> AFTER_TRIGGER_SIZE_SHIFT) + +#define BasicSizeofTriggerEvent(evt) \ (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_CP_UPDATE ? \ sizeof(AfterTriggerEventData) : \ (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID ? \ @@ -3992,14 +4003,34 @@ afterTriggerCopyBitmap(Bitmapset *src) */ static void afterTriggerAddEvent(AfterTriggerEventList *events, - AfterTriggerEvent event, AfterTriggerShared evtshared) + AfterTriggerEvent event, AfterTriggerShared evtshared, + bytea *rowid1, bytea *rowid2) { - Size eventsize = SizeofTriggerEvent(event); - Size needed = eventsize + sizeof(AfterTriggerSharedData); + Size basiceventsize = MAXALIGN(BasicSizeofTriggerEvent(event)); + Size eventsize; + Size needed; AfterTriggerEventChunk *chunk; AfterTriggerShared newshared; AfterTriggerEvent newevent; + if (SizeofTriggerEvent(event) == 0) + { + eventsize = basiceventsize; + if (event->ate_flags & AFTER_TRIGGER_ROWID1) + eventsize += MAXALIGN(VARSIZE(rowid1)); + + if (event->ate_flags & AFTER_TRIGGER_ROWID2) + eventsize += MAXALIGN(VARSIZE(rowid2)); + + event->ate_flags |= eventsize << AFTER_TRIGGER_SIZE_SHIFT; + } + else + { + eventsize = SizeofTriggerEvent(event); + } + + needed = eventsize + sizeof(AfterTriggerSharedData); + /* * If empty list or not enough room in the tail chunk, make a new chunk. * We assume here that a new shared record will always be needed. @@ -4032,7 +4063,7 @@ afterTriggerAddEvent(AfterTriggerEventList *events, * sizes used should be MAXALIGN multiples, to ensure that the shared * records will be aligned safely. */ -#define MIN_CHUNK_SIZE 1024 +#define MIN_CHUNK_SIZE (1024*4) #define MAX_CHUNK_SIZE (1024*1024) #if MAX_CHUNK_SIZE > (AFTER_TRIGGER_OFFSET+1) @@ -4051,6 +4082,7 @@ afterTriggerAddEvent(AfterTriggerEventList *events, chunksize *= 2; /* okay, double it */ else chunksize /= 2; /* too many shared records */ + chunksize = Max(chunksize, MIN_CHUNK_SIZE); chunksize = Min(chunksize, MAX_CHUNK_SIZE); } chunk = MemoryContextAlloc(afterTriggers.event_cxt, chunksize); @@ -4091,7 +4123,26 @@ afterTriggerAddEvent(AfterTriggerEventList *events, /* Insert the data */ newevent = (AfterTriggerEvent) chunk->freeptr; - memcpy(newevent, event, eventsize); + if (!rowid1 && !rowid2) + { + memcpy(newevent, event, eventsize); + } + else + { + Pointer ptr = chunk->freeptr; + + memcpy(newevent, event, basiceventsize); + ptr += basiceventsize; + + if (event->ate_flags & AFTER_TRIGGER_ROWID1) + { + memcpy(ptr, rowid1, MAXALIGN(VARSIZE(rowid1))); + ptr += MAXALIGN(VARSIZE(rowid1)); + } + + if (event->ate_flags & AFTER_TRIGGER_ROWID2) + memcpy(ptr, rowid2, MAXALIGN(VARSIZE(rowid2))); + } /* ... and link the new event to its shared record */ newevent->ate_flags &= ~AFTER_TRIGGER_OFFSET; newevent->ate_flags |= (char *) newshared - (char *) newevent; @@ -4251,6 +4302,7 @@ AfterTriggerExecute(EState *estate, int tgindx; bool should_free_trig = false; bool should_free_new = false; + Pointer ptr; /* * Locate trigger in trigdesc. @@ -4282,15 +4334,17 @@ AfterTriggerExecute(EState *estate, { Tuplestorestate *fdw_tuplestore = GetCurrentFDWTuplestore(); - if (!tuplestore_gettupleslot(fdw_tuplestore, true, false, - trig_tuple_slot1)) + if (!tuplestore_force_gettupleslot(fdw_tuplestore, true, false, + trig_tuple_slot1)) elog(ERROR, "failed to fetch tuple1 for AFTER trigger"); if ((evtshared->ats_event & TRIGGER_EVENT_OPMASK) == TRIGGER_EVENT_UPDATE && - !tuplestore_gettupleslot(fdw_tuplestore, true, false, - trig_tuple_slot2)) + !tuplestore_force_gettupleslot(fdw_tuplestore, true, false, + trig_tuple_slot2)) elog(ERROR, "failed to fetch tuple2 for AFTER trigger"); + trig_tuple_slot1->tts_tid = event->ate_ctid1; + trig_tuple_slot2->tts_tid = event->ate_ctid2; } /* fall through */ case AFTER_TRIGGER_FDW_REUSE: @@ -4322,13 +4376,26 @@ AfterTriggerExecute(EState *estate, break; default: - if (ItemPointerIsValid(&(event->ate_ctid1))) + ptr = (Pointer) event + MAXALIGN(BasicSizeofTriggerEvent(event)); + if (ItemPointerIsValid(&(event->ate_ctid1)) || + (event->ate_flags & AFTER_TRIGGER_ROWID1)) { + Datum tupleid; + TupleTableSlot *src_slot = ExecGetTriggerOldSlot(estate, src_relInfo); - if (!table_tuple_fetch_row_version(src_rel, - &(event->ate_ctid1), + if (event->ate_flags & AFTER_TRIGGER_ROWID1) + { + tupleid = PointerGetDatum(ptr); + ptr += MAXALIGN(VARSIZE(ptr)); + } + else + { + tupleid = PointerGetDatum(&(event->ate_ctid1)); + } + + if (!table_tuple_fetch_row_version(src_rel, tupleid, SnapshotAny, src_slot)) elog(ERROR, "failed to fetch tuple1 for AFTER trigger"); @@ -4364,13 +4431,23 @@ AfterTriggerExecute(EState *estate, /* don't touch ctid2 if not there */ if (((event->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID || (event->ate_flags & AFTER_TRIGGER_CP_UPDATE)) && - ItemPointerIsValid(&(event->ate_ctid2))) + (ItemPointerIsValid(&(event->ate_ctid2)) || + (event->ate_flags & AFTER_TRIGGER_ROWID2))) { + Datum tupleid; + TupleTableSlot *dst_slot = ExecGetTriggerNewSlot(estate, dst_relInfo); - if (!table_tuple_fetch_row_version(dst_rel, - &(event->ate_ctid2), + if (event->ate_flags & AFTER_TRIGGER_ROWID2) + { + tupleid = PointerGetDatum(ptr); + } + else + { + tupleid = PointerGetDatum(&(event->ate_ctid2)); + } + if (!table_tuple_fetch_row_version(dst_rel, tupleid, SnapshotAny, dst_slot)) elog(ERROR, "failed to fetch tuple2 for AFTER trigger"); @@ -4544,7 +4621,7 @@ afterTriggerMarkEvents(AfterTriggerEventList *events, { deferred_found = true; /* add it to move_list */ - afterTriggerAddEvent(move_list, event, evtshared); + afterTriggerAddEvent(move_list, event, evtshared, NULL, NULL); /* mark original copy "done" so we don't do it again */ event->ate_flags |= AFTER_TRIGGER_DONE; } @@ -4647,6 +4724,7 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events, trigdesc = rInfo->ri_TrigDesc; finfo = rInfo->ri_TrigFunctions; instr = rInfo->ri_TrigInstrument; + if (slot1 != NULL) { ExecDropSingleTupleTableSlot(slot1); @@ -6039,6 +6117,8 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, int tgtype_level; int i; Tuplestorestate *fdw_tuplestore = NULL; + bytea *rowId1 = NULL; + bytea *rowId2 = NULL; /* * Check state. We use a normal test not Assert because it is possible to @@ -6132,6 +6212,12 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, * if so. This preserves the behavior that statement-level triggers fire * just once per statement and fire after row-level triggers. */ + + /* Determine flags */ + if (!(relkind == RELKIND_FOREIGN_TABLE && row_trigger)) + new_event.ate_flags = (row_trigger && event == TRIGGER_EVENT_UPDATE) ? + AFTER_TRIGGER_2CTID : AFTER_TRIGGER_1CTID; + switch (event) { case TRIGGER_EVENT_INSERT: @@ -6142,6 +6228,14 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, Assert(newslot != NULL); ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid1)); ItemPointerSetInvalid(&(new_event.ate_ctid2)); + if (table_get_row_ref_type(rel) == ROW_REF_ROWID) + { + bool isnull; + + rowId1 = DatumGetByteaP(slot_getsysattr(newslot, RowIdAttributeNumber, &isnull)); + new_event.ate_flags |= AFTER_TRIGGER_ROWID1; + Assert(!isnull); + } } else { @@ -6161,6 +6255,14 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, Assert(newslot == NULL); ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1)); ItemPointerSetInvalid(&(new_event.ate_ctid2)); + if (table_get_row_ref_type(rel) == ROW_REF_ROWID) + { + bool isnull; + + rowId1 = DatumGetByteaP(slot_getsysattr(oldslot, RowIdAttributeNumber, &isnull)); + new_event.ate_flags |= AFTER_TRIGGER_ROWID1; + Assert(!isnull); + } } else { @@ -6176,10 +6278,57 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, tgtype_event = TRIGGER_TYPE_UPDATE; if (row_trigger) { + bool src_rowid = false, + dst_rowid = false; + Assert(oldslot != NULL); Assert(newslot != NULL); ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1)); ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid2)); + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + Relation src_rel = src_partinfo->ri_RelationDesc; + Relation dst_rel = dst_partinfo->ri_RelationDesc; + + src_rowid = table_get_row_ref_type(src_rel) == + ROW_REF_ROWID; + dst_rowid = table_get_row_ref_type(dst_rel) == + ROW_REF_ROWID; + } + else + { + if (table_get_row_ref_type(rel) == ROW_REF_ROWID) + { + src_rowid = true; + dst_rowid = true; + } + } + + if (src_rowid) + { + Datum val; + bool isnull; + + val = slot_getsysattr(oldslot, + RowIdAttributeNumber, + &isnull); + rowId1 = DatumGetByteaP(val); + Assert(!isnull); + new_event.ate_flags |= AFTER_TRIGGER_ROWID1; + } + + if (dst_rowid) + { + Datum val; + bool isnull; + + val = slot_getsysattr(newslot, + RowIdAttributeNumber, + &isnull); + rowId2 = DatumGetByteaP(val); + Assert(!isnull); + new_event.ate_flags |= AFTER_TRIGGER_ROWID2; + } /* * Also remember the OIDs of partitions to fetch these tuples @@ -6217,20 +6366,6 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, break; } - /* Determine flags */ - if (!(relkind == RELKIND_FOREIGN_TABLE && row_trigger)) - { - if (row_trigger && event == TRIGGER_EVENT_UPDATE) - { - if (relkind == RELKIND_PARTITIONED_TABLE) - new_event.ate_flags = AFTER_TRIGGER_CP_UPDATE; - else - new_event.ate_flags = AFTER_TRIGGER_2CTID; - } - else - new_event.ate_flags = AFTER_TRIGGER_1CTID; - } - /* else, we'll initialize ate_flags for each trigger */ tgtype_level = (row_trigger ? TRIGGER_TYPE_ROW : TRIGGER_TYPE_STATEMENT); @@ -6375,6 +6510,20 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, continue; /* Uniqueness definitely not violated */ } + /* Determine flags */ + if (!(relkind == RELKIND_FOREIGN_TABLE && row_trigger)) + { + if (row_trigger && event == TRIGGER_EVENT_UPDATE) + { + if (relkind == RELKIND_PARTITIONED_TABLE) + new_event.ate_flags = AFTER_TRIGGER_CP_UPDATE; + else + new_event.ate_flags = AFTER_TRIGGER_2CTID; + } + else + new_event.ate_flags = AFTER_TRIGGER_1CTID; + } + /* * Fill in event structure and add it to the current query's queue. * Note we set ats_table to NULL whenever this trigger doesn't use @@ -6396,7 +6545,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, new_shared.ats_modifiedcols = afterTriggerCopyBitmap(modifiedCols); afterTriggerAddEvent(&afterTriggers.query_stack[afterTriggers.query_depth].events, - &new_event, &new_shared); + &new_event, &new_shared, rowId1, rowId2); } /* diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 24c2b60c62a..1803d5df5e7 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -4429,7 +4429,9 @@ ExecEvalSysVar(ExprState *state, ExprEvalStep *op, ExprContext *econtext, op->resnull); *op->resvalue = d; /* this ought to be unreachable, but it's cheap enough to check */ - if (unlikely(*op->resnull)) + if (op->d.var.attnum != RowIdAttributeNumber && + op->d.var.attnum != SelfItemPointerAttributeNumber && + unlikely(*op->resnull)) elog(ERROR, "failed to fetch attribute from slot"); } diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 4c5a7bbf620..dacdeabba1d 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -869,13 +869,15 @@ InitPlan(QueryDesc *queryDesc, int eflags) Oid relid; Relation relation; ExecRowMark *erm; + RangeTblEntry *rangeEntry; /* ignore "parent" rowmarks; they are irrelevant at runtime */ if (rc->isParent) continue; /* get relation's OID (will produce InvalidOid if subquery) */ - relid = exec_rt_fetch(rc->rti, estate)->relid; + rangeEntry = exec_rt_fetch(rc->rti, estate); + relid = rangeEntry->relid; /* open relation, if we need to access it for this mark type */ switch (rc->markType) @@ -908,6 +910,10 @@ InitPlan(QueryDesc *queryDesc, int eflags) erm->prti = rc->prti; erm->rowmarkId = rc->rowmarkId; erm->markType = rc->markType; + if (erm->markType == ROW_MARK_COPY) + erm->refType = ROW_REF_COPY; + else + erm->refType = rangeEntry->reftype; erm->strength = rc->strength; erm->waitPolicy = rc->waitPolicy; erm->ermActive = false; @@ -1295,6 +1301,7 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo, resultRelInfo->ri_ChildToRootMap = NULL; resultRelInfo->ri_ChildToRootMapValid = false; resultRelInfo->ri_CopyMultiInsertBuffer = NULL; + resultRelInfo->ri_RowRefType = table_get_row_ref_type(resultRelationDesc); } /* @@ -2727,7 +2734,7 @@ EvalPlanQualFetchRowMark(EPQState *epqstate, Index rti, TupleTableSlot *slot) { /* ordinary table, fetch the tuple */ if (!table_tuple_fetch_row_version(erm->relation, - (ItemPointer) DatumGetPointer(datum), + datum, SnapshotAny, slot)) elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck"); return true; diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index fda9877a55f..f24f5e28c2f 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -256,7 +256,8 @@ retry: PushActiveSnapshot(GetLatestSnapshot()); - res = table_tuple_lock(rel, &(outslot->tts_tid), GetLatestSnapshot(), + res = table_tuple_lock(rel, PointerGetDatum(&(outslot->tts_tid)), + GetLatestSnapshot(), outslot, GetCurrentCommandId(false), lockmode, @@ -440,7 +441,8 @@ retry: PushActiveSnapshot(GetLatestSnapshot()); - res = table_tuple_lock(rel, &(outslot->tts_tid), GetLatestSnapshot(), + res = table_tuple_lock(rel, PointerGetDatum(&(outslot->tts_tid)), + GetLatestSnapshot(), outslot, GetCurrentCommandId(false), lockmode, @@ -577,7 +579,8 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo, resultRelInfo->ri_TrigDesc->trig_update_before_row) { if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo, - tid, NULL, slot, NULL, NULL)) + PointerGetDatum(tid), NULL, slot, + NULL, NULL)) skip_tuple = true; /* "do nothing" */ } @@ -644,7 +647,8 @@ ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo, resultRelInfo->ri_TrigDesc->trig_delete_before_row) { skip_tuple = !ExecBRDeleteTriggers(estate, epqstate, resultRelInfo, - tid, NULL, NULL, NULL, NULL); + PointerGetDatum(tid), NULL, NULL, + NULL, NULL); } if (!skip_tuple) diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c index e459971d32e..53efde108cc 100644 --- a/src/backend/executor/nodeLockRows.c +++ b/src/backend/executor/nodeLockRows.c @@ -27,6 +27,7 @@ #include "executor/nodeLockRows.h" #include "foreign/fdwapi.h" #include "miscadmin.h" +#include "utils/datum.h" #include "utils/rel.h" @@ -157,7 +158,16 @@ lnext: } /* okay, try to lock (and fetch) the tuple */ - tid = *((ItemPointer) DatumGetPointer(datum)); + if (erm->refType == ROW_REF_TID) + { + tid = *((ItemPointer) DatumGetPointer(datum)); + datum = PointerGetDatum(&tid); + } + else + { + Assert(erm->refType == ROW_REF_ROWID); + datum = datumCopy(datum, false, -1); + } switch (erm->markType) { case ROW_MARK_EXCLUSIVE: @@ -182,12 +192,15 @@ lnext: if (!IsolationUsesXactSnapshot()) lockflags |= TUPLE_LOCK_FLAG_FIND_LAST_VERSION; - test = table_tuple_lock(erm->relation, &tid, estate->es_snapshot, + test = table_tuple_lock(erm->relation, datum, estate->es_snapshot, markSlot, estate->es_output_cid, lockmode, erm->waitPolicy, lockflags, &tmfd); + if (erm->refType == ROW_REF_ROWID) + pfree(DatumGetPointer(datum)); + switch (test) { case TM_WouldBlock: diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 5faf10b254f..8a2e431a433 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -132,7 +132,7 @@ static void ExecPendingInserts(EState *estate); static void ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context, ResultRelInfo *sourcePartInfo, ResultRelInfo *destPartInfo, - ItemPointer tupleid, + Datum tupleid, TupleTableSlot *oldSlot, TupleTableSlot *newslot); static bool ExecOnConflictUpdate(ModifyTableContext *context, @@ -149,12 +149,12 @@ static TupleTableSlot *ExecPrepareTupleRouting(ModifyTableState *mtstate, static TupleTableSlot *ExecMerge(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, + Datum tupleid, bool canSetTag); static void ExecInitMerge(ModifyTableState *mtstate, EState *estate); static bool ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, + Datum tupleid, bool canSetTag); static void ExecMergeNotMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, @@ -1221,7 +1221,7 @@ ExecPendingInserts(EState *estate) */ static bool ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, HeapTuple oldtuple, + Datum tupleid, HeapTuple oldtuple, TupleTableSlot **epqreturnslot, TM_Result *result) { if (result) @@ -1252,7 +1252,7 @@ ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo, */ static TM_Result ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, bool changingPart, int options, + Datum tupleid, bool changingPart, int options, TupleTableSlot *oldSlot) { EState *estate = context->estate; @@ -1276,7 +1276,7 @@ ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, */ static void ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, HeapTuple oldtuple, + HeapTuple oldtuple, TupleTableSlot *slot, bool changingPart) { ModifyTableState *mtstate = context->mtstate; @@ -1356,7 +1356,7 @@ ExecInitDeleteTupleSlot(ModifyTableState *mtstate, static TupleTableSlot * ExecDelete(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, + Datum tupleid, HeapTuple oldtuple, TupleTableSlot *oldSlot, bool processReturning, @@ -1545,7 +1545,7 @@ ldelete: if (tupleDeleted) *tupleDeleted = true; - ExecDeleteEpilogue(context, resultRelInfo, tupleid, oldtuple, + ExecDeleteEpilogue(context, resultRelInfo, oldtuple, oldSlot, changingPart); /* Process RETURNING if present and if requested */ @@ -1562,7 +1562,7 @@ ldelete: /* FDW must have provided a slot containing the deleted row */ Assert(!TupIsNull(slot)); } - else + else if (!slot || TupIsNull(slot)) { /* Copy old tuple to the returning slot */ slot = ExecGetReturningSlot(estate, resultRelInfo); @@ -1611,7 +1611,7 @@ ldelete: static bool ExecCrossPartitionUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, HeapTuple oldtuple, + Datum tupleid, HeapTuple oldtuple, TupleTableSlot *slot, bool canSetTag, UpdateContext *updateCxt, @@ -1766,7 +1766,7 @@ ExecCrossPartitionUpdate(ModifyTableContext *context, */ static bool ExecUpdatePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot, + Datum tupleid, HeapTuple oldtuple, TupleTableSlot *slot, TM_Result *result) { Relation resultRelationDesc = resultRelInfo->ri_RelationDesc; @@ -1843,7 +1843,7 @@ ExecUpdatePrepareSlot(ResultRelInfo *resultRelInfo, */ static TM_Result ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot, + Datum tupleid, HeapTuple oldtuple, TupleTableSlot *slot, bool canSetTag, int options, TupleTableSlot *oldSlot, UpdateContext *updateCxt) { @@ -1999,7 +1999,7 @@ lreplace: */ static void ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt, - ResultRelInfo *resultRelInfo, ItemPointer tupleid, + ResultRelInfo *resultRelInfo, HeapTuple oldtuple, TupleTableSlot *slot, TupleTableSlot *oldSlot) { @@ -2049,7 +2049,7 @@ static void ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context, ResultRelInfo *sourcePartInfo, ResultRelInfo *destPartInfo, - ItemPointer tupleid, + Datum tupleid, TupleTableSlot *oldslot, TupleTableSlot *newslot) { @@ -2139,7 +2139,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context, */ static TupleTableSlot * ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot, + Datum tupleid, HeapTuple oldtuple, TupleTableSlot *slot, TupleTableSlot *oldSlot, bool canSetTag, bool locked) { EState *estate = context->estate; @@ -2193,10 +2193,14 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, } else { - int options = TABLE_MODIFY_WAIT | TABLE_MODIFY_FETCH_OLD_TUPLE; + int options = TABLE_MODIFY_WAIT; - if (!locked && !IsolationUsesXactSnapshot()) - options |= TABLE_MODIFY_LOCK_UPDATED; + if (!locked) + { + options |= TABLE_MODIFY_FETCH_OLD_TUPLE; + if (!IsolationUsesXactSnapshot()) + options |= TABLE_MODIFY_LOCK_UPDATED; + } /* * If we generate a new candidate tuple after EvalPlanQual testing, we @@ -2304,7 +2308,7 @@ redo_act: if (canSetTag) (estate->es_processed)++; - ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, tupleid, oldtuple, + ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, oldtuple, slot, oldSlot); /* Process RETURNING if present */ @@ -2336,7 +2340,19 @@ ExecOnConflictUpdate(ModifyTableContext *context, ExprContext *econtext = mtstate->ps.ps_ExprContext; ExprState *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause; TupleTableSlot *existing = resultRelInfo->ri_onConflict->oc_Existing; - ItemPointer conflictTid = &existing->tts_tid; + Datum tupleid; + + if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID) + { + bool isnull; + + tupleid = slot_getsysattr(existing, RowIdAttributeNumber, &isnull); + Assert(!isnull); + } + else + { + tupleid = PointerGetDatum(&existing->tts_tid); + } /* * Make tuple and any needed join variables available to ExecQual and @@ -2392,7 +2408,7 @@ ExecOnConflictUpdate(ModifyTableContext *context, /* Execute UPDATE with projection */ *returning = ExecUpdate(context, resultRelInfo, - conflictTid, NULL, + tupleid, NULL, resultRelInfo->ri_onConflict->oc_ProjSlot, existing, canSetTag, true); @@ -2411,7 +2427,7 @@ ExecOnConflictUpdate(ModifyTableContext *context, */ static TupleTableSlot * ExecMerge(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, bool canSetTag) + Datum tupleid, bool canSetTag) { bool matched; @@ -2458,7 +2474,7 @@ ExecMerge(ModifyTableContext *context, ResultRelInfo *resultRelInfo, * from ExecMergeNotMatched to ExecMergeMatched, there is no risk of a * livelock. */ - matched = tupleid != NULL; + matched = DatumGetPointer(tupleid) != NULL; if (matched) matched = ExecMergeMatched(context, resultRelInfo, tupleid, canSetTag); @@ -2497,7 +2513,7 @@ ExecMerge(ModifyTableContext *context, ResultRelInfo *resultRelInfo, */ static bool ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, bool canSetTag) + Datum tupleid, bool canSetTag) { ModifyTableState *mtstate = context->mtstate; TupleTableSlot *newslot; @@ -2619,7 +2635,7 @@ lmerge_matched: if (result == TM_Ok && updateCxt.updated) { ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, - tupleid, NULL, newslot, + NULL, newslot, resultRelInfo->ri_oldTupleSlot); mtstate->mt_merge_updated += 1; } @@ -2638,7 +2654,7 @@ lmerge_matched: false, TABLE_MODIFY_WAIT, NULL); if (result == TM_Ok) { - ExecDeleteEpilogue(context, resultRelInfo, tupleid, NULL, + ExecDeleteEpilogue(context, resultRelInfo, NULL, resultRelInfo->ri_oldTupleSlot, false); mtstate->mt_merge_deleted += 1; } @@ -2736,9 +2752,13 @@ lmerge_matched: if (TupIsNull(epqslot)) return false; - (void) ExecGetJunkAttribute(epqslot, - resultRelInfo->ri_RowIdAttNo, - &isNull); + /* + * Update tupleid to that of the new tuple, for + * the refetch we do at the top. + */ + tupleid = ExecGetJunkAttribute(epqslot, + resultRelInfo->ri_RowIdAttNo, + &isNull); if (isNull) return false; @@ -2762,11 +2782,7 @@ lmerge_matched: * apply all the MATCHED rules again, to ensure * that the first qualifying WHEN MATCHED action * is executed. - * - * Update tupleid to that of the new tuple, for - * the refetch we do at the top. */ - ItemPointerCopy(&context->tmfd.ctid, tupleid); goto lmerge_matched; case TM_Deleted: @@ -3265,10 +3281,10 @@ ExecModifyTable(PlanState *pstate) PlanState *subplanstate; TupleTableSlot *slot; TupleTableSlot *oldSlot; + Datum tupleid; ItemPointerData tuple_ctid; HeapTupleData oldtupdata; HeapTuple oldtuple; - ItemPointer tupleid; CHECK_FOR_INTERRUPTS(); @@ -3317,6 +3333,8 @@ ExecModifyTable(PlanState *pstate) */ for (;;) { + RowRefType refType; + /* * Reset the per-output-tuple exprcontext. This is needed because * triggers expect to use that context as workspace. It's a bit ugly @@ -3366,7 +3384,8 @@ ExecModifyTable(PlanState *pstate) { EvalPlanQualSetSlot(&node->mt_epqstate, context.planSlot); - ExecMerge(&context, node->resultRelInfo, NULL, node->canSetTag); + ExecMerge(&context, node->resultRelInfo, + PointerGetDatum(NULL), node->canSetTag); continue; /* no RETURNING support yet */ } @@ -3402,7 +3421,8 @@ ExecModifyTable(PlanState *pstate) EvalPlanQualSetSlot(&node->mt_epqstate, context.planSlot); slot = context.planSlot; - tupleid = NULL; + refType = resultRelInfo->ri_RowRefType; + tupleid = PointerGetDatum(NULL); oldtuple = NULL; /* @@ -3444,16 +3464,33 @@ ExecModifyTable(PlanState *pstate) { EvalPlanQualSetSlot(&node->mt_epqstate, context.planSlot); - ExecMerge(&context, node->resultRelInfo, NULL, node->canSetTag); + ExecMerge(&context, node->resultRelInfo, + PointerGetDatum(NULL), node->canSetTag); continue; /* no RETURNING support yet */ } elog(ERROR, "ctid is NULL"); } - tupleid = (ItemPointer) DatumGetPointer(datum); - tuple_ctid = *tupleid; /* be sure we don't free ctid!! */ - tupleid = &tuple_ctid; + if (refType == ROW_REF_TID) + { + /* shouldn't ever get a null result... */ + if (isNull) + elog(ERROR, "ctid is NULL"); + + tuple_ctid = *((ItemPointer) DatumGetPointer(datum)); /* be sure we don't free + * ctid!! */ + tupleid = PointerGetDatum(&tuple_ctid); + } + else + { + Assert(refType == ROW_REF_ROWID); + /* shouldn't ever get a null result... */ + if (isNull) + elog(ERROR, "rowid is NULL"); + + tupleid = datumCopy(datum, false, -1); + } } /* @@ -3530,6 +3567,7 @@ ExecModifyTable(PlanState *pstate) /* Fetch the most recent version of old tuple. */ Relation relation = resultRelInfo->ri_RelationDesc; + Assert(DatumGetPointer(tupleid) != NULL); if (!table_tuple_fetch_row_version(relation, tupleid, SnapshotAny, oldSlot)) @@ -3564,6 +3602,9 @@ ExecModifyTable(PlanState *pstate) break; } + if (refType == ROW_REF_ROWID && DatumGetPointer(tupleid) != NULL) + pfree(DatumGetPointer(tupleid)); + /* * If we got a RETURNING result, return it to caller. We'll continue * the work on next call. @@ -3803,10 +3844,20 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) relkind == RELKIND_MATVIEW || relkind == RELKIND_PARTITIONED_TABLE) { - resultRelInfo->ri_RowIdAttNo = - ExecFindJunkAttributeInTlist(subplan->targetlist, "ctid"); - if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo)) - elog(ERROR, "could not find junk ctid column"); + if (resultRelInfo->ri_RowRefType == ROW_REF_TID) + { + resultRelInfo->ri_RowIdAttNo = + ExecFindJunkAttributeInTlist(subplan->targetlist, "ctid"); + if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo)) + elog(ERROR, "could not find junk ctid column"); + } + else + { + resultRelInfo->ri_RowIdAttNo = + ExecFindJunkAttributeInTlist(subplan->targetlist, "rowid"); + if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo)) + elog(ERROR, "could not find junk rowid column"); + } } else if (relkind == RELKIND_FOREIGN_TABLE) { @@ -4118,6 +4169,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) estate->es_auxmodifytables = lcons(mtstate, estate->es_auxmodifytables); + + return mtstate; } diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c index 15055077d03..a4ad0aa0d2e 100644 --- a/src/backend/executor/nodeTidscan.c +++ b/src/backend/executor/nodeTidscan.c @@ -378,7 +378,7 @@ TidNext(TidScanState *node) if (node->tss_isCurrentOf) table_tuple_get_latest_tid(scan, &tid); - if (table_tuple_fetch_row_version(heapRelation, &tid, snapshot, slot)) + if (table_tuple_fetch_row_version(heapRelation, PointerGetDatum(&tid), snapshot, slot)) return slot; /* Bad TID or failed snapshot qual; try next */ diff --git a/src/backend/optimizer/prep/preptlist.c b/src/backend/optimizer/prep/preptlist.c index ef2ce552970..0d849332904 100644 --- a/src/backend/optimizer/prep/preptlist.c +++ b/src/backend/optimizer/prep/preptlist.c @@ -226,6 +226,22 @@ preprocess_targetlist(PlannerInfo *root) true); tlist = lappend(tlist, tle); } + if (rc->allRefTypes & (1 << ROW_REF_ROWID)) + { + /* Need to fetch TID */ + var = makeVar(rc->rti, + RowIdAttributeNumber, + BYTEAOID, + -1, + InvalidOid, + 0); + snprintf(resname, sizeof(resname), "rowid%u", rc->rowmarkId); + tle = makeTargetEntry((Expr *) var, + list_length(tlist) + 1, + pstrdup(resname), + true); + tlist = lappend(tlist, tle); + } if (rc->allRefTypes & (1 << ROW_REF_COPY)) { /* Need the whole row as a junk var */ diff --git a/src/backend/optimizer/util/appendinfo.c b/src/backend/optimizer/util/appendinfo.c index f456b3b0a44..837d2e92c5a 100644 --- a/src/backend/optimizer/util/appendinfo.c +++ b/src/backend/optimizer/util/appendinfo.c @@ -16,6 +16,7 @@ #include "access/htup_details.h" #include "access/table.h" +#include "access/tableam.h" #include "foreign/fdwapi.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" @@ -896,17 +897,35 @@ add_row_identity_columns(PlannerInfo *root, Index rtindex, relkind == RELKIND_MATVIEW || relkind == RELKIND_PARTITIONED_TABLE) { + RowRefType refType = ROW_REF_TID; + + refType = table_get_row_ref_type(target_relation); + /* * Emit CTID so that executor can find the row to merge, update or * delete. */ - var = makeVar(rtindex, - SelfItemPointerAttributeNumber, - TIDOID, - -1, - InvalidOid, - 0); - add_row_identity_var(root, var, rtindex, "ctid"); + if (refType == ROW_REF_TID) + { + var = makeVar(rtindex, + SelfItemPointerAttributeNumber, + TIDOID, + -1, + InvalidOid, + 0); + add_row_identity_var(root, var, rtindex, "ctid"); + } + else + { + Assert(refType == ROW_REF_ROWID); + var = makeVar(rtindex, + RowIdAttributeNumber, + BYTEAOID, + -1, + InvalidOid, + 0); + add_row_identity_var(root, var, rtindex, "rowid"); + } } else if (relkind == RELKIND_FOREIGN_TABLE) { diff --git a/src/backend/optimizer/util/inherit.c b/src/backend/optimizer/util/inherit.c index b728d0c9d73..af6a82b3246 100644 --- a/src/backend/optimizer/util/inherit.c +++ b/src/backend/optimizer/util/inherit.c @@ -283,6 +283,24 @@ expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel, newvars = lappend(newvars, var); } + if ((new_allRefTypes & (1 << ROW_REF_ROWID)) && + !(old_allRefTypes & (1 << ROW_REF_ROWID))) + { + var = makeVar(oldrc->rti, + RowIdAttributeNumber, + BYTEAOID, + -1, + InvalidOid, + 0); + snprintf(resname, sizeof(resname), "rowid%u", oldrc->rowmarkId); + tle = makeTargetEntry((Expr *) var, + list_length(root->processed_tlist) + 1, + pstrdup(resname), + true); + root->processed_tlist = lappend(root->processed_tlist, tle); + newvars = lappend(newvars, var); + } + /* Add tableoid junk Var, unless we had it already */ if (!old_isParent) { @@ -486,7 +504,7 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte, Assert(parentrte->rtekind == RTE_RELATION); /* else this is dubious */ childrte->relid = childOID; childrte->relkind = childrel->rd_rel->relkind; - childrte->reftype = ROW_REF_TID; + childrte->reftype = table_get_row_ref_type(childrel); /* A partitioned child will need to be expanded further. */ if (childrte->relkind == RELKIND_PARTITIONED_TABLE) { diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c index 56feb553141..df99d6b49b3 100644 --- a/src/backend/parser/parse_relation.c +++ b/src/backend/parser/parse_relation.c @@ -1503,7 +1503,7 @@ addRangeTableEntry(ParseState *pstate, rte->relid = RelationGetRelid(rel); rte->relkind = rel->rd_rel->relkind; rte->rellockmode = lockmode; - rte->reftype = ROW_REF_TID; + rte->reftype = table_get_row_ref_type(rel); /* * Build the list of effective column names using user-supplied aliases @@ -1589,7 +1589,7 @@ addRangeTableEntryForRelation(ParseState *pstate, rte->relid = RelationGetRelid(rel); rte->relkind = rel->rd_rel->relkind; rte->rellockmode = lockmode; - rte->reftype = ROW_REF_TID; + rte->reftype = table_get_row_ref_type(rel); /* * Build the list of effective column names using user-supplied aliases @@ -3273,6 +3273,9 @@ get_rte_attribute_name(RangeTblEntry *rte, AttrNumber attnum) attnum > 0 && attnum <= list_length(rte->alias->colnames)) return strVal(list_nth(rte->alias->colnames, attnum - 1)); + if (attnum == RowIdAttributeNumber) + return "rowid"; + /* * If the RTE is a relation, go to the system catalogs not the * eref->colnames list. This is a little slower but it will give the diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index 41a362310a8..0ec58ee7f73 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -1832,6 +1832,7 @@ ApplyRetrieveRule(Query *parsetree, rte = rt_fetch(rt_index, parsetree->rtable); rte->rtekind = RTE_SUBQUERY; + rte->reftype = ROW_REF_COPY; rte->subquery = rule_action; rte->security_barrier = RelationIsSecurityView(relation); diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c index 0c444378227..26e5934716a 100644 --- a/src/backend/utils/sort/tuplestore.c +++ b/src/backend/utils/sort/tuplestore.c @@ -1100,6 +1100,36 @@ tuplestore_gettupleslot(Tuplestorestate *state, bool forward, } } +/* + * Same as tuplestore_gettupleslot(), but foces tuple storage to slot. Thus, + * it can work with slot types different than minimal tuple. + */ +bool +tuplestore_force_gettupleslot(Tuplestorestate *state, bool forward, + bool copy, TupleTableSlot *slot) +{ + MinimalTuple tuple; + bool should_free; + + tuple = (MinimalTuple) tuplestore_gettuple(state, forward, &should_free); + + if (tuple) + { + if (copy && !should_free) + { + tuple = heap_copy_minimal_tuple(tuple); + should_free = true; + } + ExecForceStoreMinimalTuple(tuple, slot, should_free); + return true; + } + else + { + ExecClearTuple(slot); + return false; + } +} + /* * tuplestore_advance - exported function to adjust position without fetching * diff --git a/src/include/access/sysattr.h b/src/include/access/sysattr.h index 8f08682750b..d717a7cafec 100644 --- a/src/include/access/sysattr.h +++ b/src/include/access/sysattr.h @@ -24,6 +24,7 @@ #define MaxTransactionIdAttributeNumber (-4) #define MaxCommandIdAttributeNumber (-5) #define TableOidAttributeNumber (-6) -#define FirstLowInvalidHeapAttributeNumber (-7) +#define RowIdAttributeNumber (-7) +#define FirstLowInvalidHeapAttributeNumber (-8) #endif /* SYSATTR_H */ diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index b2b397023c7..c5a24289440 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -477,7 +477,7 @@ typedef struct TableAmRoutine * test, returns true, false otherwise. */ bool (*tuple_fetch_row_version) (Relation rel, - ItemPointer tid, + Datum tupleid, Snapshot snapshot, TupleTableSlot *slot); @@ -536,7 +536,7 @@ typedef struct TableAmRoutine /* see table_tuple_delete() for reference about parameters */ TM_Result (*tuple_delete) (Relation rel, - ItemPointer tid, + Datum tupleid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, @@ -547,7 +547,7 @@ typedef struct TableAmRoutine /* see table_tuple_update() for reference about parameters */ TM_Result (*tuple_update) (Relation rel, - ItemPointer otid, + Datum tupleid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, @@ -560,7 +560,7 @@ typedef struct TableAmRoutine /* see table_tuple_lock() for reference about parameters */ TM_Result (*tuple_lock) (Relation rel, - ItemPointer tid, + Datum tupleid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, @@ -706,6 +706,11 @@ typedef struct TableAmRoutine * ------------------------------------------------------------------------ */ + /* + * Get the type of row identifier in the table. + */ + RowRefType (*get_row_ref_type) (Relation rel); + /* * This callback frees relation private cache data stored in rd_amcache. * If this callback is not provided, rd_amcache is assumed to point to @@ -1297,9 +1302,9 @@ extern bool table_index_fetch_tuple_check(Relation rel, /* - * Fetch tuple at `tid` into `slot`, after doing a visibility test according to - * `snapshot`. If a tuple was found and passed the visibility test, returns - * true, false otherwise. + * Fetch tuple identified by `tupleid` into `slot`, after doing a visibility + * test according to `snapshot`. If a tuple was found and passed the visibility + * test, returns true, false otherwise. * * See table_index_fetch_tuple's comment about what the difference between * these functions is. It is correct to use this function outside of index @@ -1307,7 +1312,7 @@ extern bool table_index_fetch_tuple_check(Relation rel, */ static inline bool table_tuple_fetch_row_version(Relation rel, - ItemPointer tid, + Datum tupleid, Snapshot snapshot, TupleTableSlot *slot) { @@ -1319,7 +1324,8 @@ table_tuple_fetch_row_version(Relation rel, if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan)) elog(ERROR, "unexpected table_tuple_fetch_row_version call during logical decoding"); - return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot); + return rel->rd_tableam->tuple_fetch_row_version(rel, tupleid, + snapshot, slot); } /* @@ -1504,7 +1510,7 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots, * * Input parameters: * relation - table to be modified (caller must hold suitable lock) - * tid - TID of tuple to be deleted + * tupleid - identifier of tuple to be deleted * cid - delete command ID (used for visibility test, and stored into * cmax if successful) * crosscheck - if not InvalidSnapshot, also check tuple against this @@ -1533,12 +1539,12 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots, * struct TM_FailureData for additional info. */ static inline TM_Result -table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid, +table_tuple_delete(Relation rel, Datum tupleid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, int options, TM_FailureData *tmfd, bool changingPart, TupleTableSlot *oldSlot) { - return rel->rd_tableam->tuple_delete(rel, tid, cid, + return rel->rd_tableam->tuple_delete(rel, tupleid, cid, snapshot, crosscheck, options, tmfd, changingPart, oldSlot); @@ -1552,7 +1558,7 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid, * * Input parameters: * relation - table to be modified (caller must hold suitable lock) - * otid - TID of old tuple to be replaced + * tupleid - identifier of old tuple to be replaced * slot - newly constructed tuple data to store * cid - update command ID (used for visibility test, and stored into * cmax/cmin if successful) @@ -1589,13 +1595,13 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid, * for additional info. */ static inline TM_Result -table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, +table_tuple_update(Relation rel, Datum tupleid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, int options, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes, TupleTableSlot *oldSlot) { - return rel->rd_tableam->tuple_update(rel, otid, slot, + return rel->rd_tableam->tuple_update(rel, tupleid, slot, cid, snapshot, crosscheck, options, tmfd, lockmode, update_indexes, @@ -1607,7 +1613,7 @@ table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, * * Input parameters: * relation: relation containing tuple (caller must hold suitable lock) - * tid: TID of tuple to lock + * tupleid: identifier of tuple to lock * snapshot: snapshot to use for visibility determinations * cid: current command ID (used for visibility test, and stored into * tuple's cmax if lock is successful) @@ -1636,12 +1642,12 @@ table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, * comments for struct TM_FailureData for additional info. */ static inline TM_Result -table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot, +table_tuple_lock(Relation rel, Datum tupleid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd) { - return rel->rd_tableam->tuple_lock(rel, tid, snapshot, slot, + return rel->rd_tableam->tuple_lock(rel, tupleid, snapshot, slot, cid, mode, wait_policy, flags, tmfd); } @@ -1923,6 +1929,20 @@ table_define_index(Relation rel, Oid indoid, bool reindex, * ---------------------------------------------------------------------------- */ +/* + * Get the type of row identifier. Returns ROW_REF_TID when table AM routine + * is not accessible. This happens during catalog initialization. All catalog + * tables are known to use heap. + */ +static inline RowRefType +table_get_row_ref_type(Relation rel) +{ + if (rel->rd_tableam) + return rel->rd_tableam->get_row_ref_type(rel); + else + return ROW_REF_TID; +} + /* * Frees relation private cache data stored in rd_amcache. Uses * free_rd_amcache method if provided. Assumes rd_amcache to point to single diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h index 4903b4b7bc2..15e1fbe7700 100644 --- a/src/include/commands/trigger.h +++ b/src/include/commands/trigger.h @@ -209,7 +209,7 @@ extern void ExecASDeleteTriggers(EState *estate, extern bool ExecBRDeleteTriggers(EState *estate, EPQState *epqstate, ResultRelInfo *relinfo, - ItemPointer tupleid, + Datum tupleid, HeapTuple fdw_trigtuple, TupleTableSlot **epqslot, TM_Result *tmresult, @@ -231,7 +231,7 @@ extern void ExecASUpdateTriggers(EState *estate, extern bool ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, ResultRelInfo *relinfo, - ItemPointer tupleid, + Datum tupleid, HeapTuple fdw_trigtuple, TupleTableSlot *newslot, TM_Result *tmresult, diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 278a1227fc7..766e7a6ba4b 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -2072,6 +2072,7 @@ typedef struct OnConflictExpr typedef enum RowRefType { ROW_REF_TID, /* Item pointer (block, offset) */ + ROW_REF_ROWID, /* Bytea row id */ ROW_REF_COPY /* Full row copy */ } RowRefType; diff --git a/src/include/utils/tuplestore.h b/src/include/utils/tuplestore.h index 1077c5fdeaa..c9f1f57aac5 100644 --- a/src/include/utils/tuplestore.h +++ b/src/include/utils/tuplestore.h @@ -73,6 +73,9 @@ extern bool tuplestore_in_memory(Tuplestorestate *state); extern bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward, bool copy, TupleTableSlot *slot); +extern bool tuplestore_force_gettupleslot(Tuplestorestate *state, bool forward, + bool copy, TupleTableSlot *slot); + extern bool tuplestore_advance(Tuplestorestate *state, bool forward); extern bool tuplestore_skiptuples(Tuplestorestate *state, -- 2.39.3 (Apple Git-145)