From 681a21758ca3bf8e9599d5900df832d2c075a6fe Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Mon, 11 Jan 2021 14:45:03 -0800 Subject: [PATCH 6/7] Experiment: One array again --- src/include/access/tableam.h | 19 ++++++------------ src/backend/access/heap/heapam.c | 18 ++++++----------- src/backend/access/index/genam.c | 11 ++++------- src/backend/access/nbtree/nbtdedup.c | 24 +++++++++-------------- src/backend/access/nbtree/nbtinsert.c | 22 ++++++++------------- src/backend/access/nbtree/nbtpage.c | 28 +++++++-------------------- 6 files changed, 40 insertions(+), 82 deletions(-) diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 33bffb6815..777084d5a6 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -133,10 +133,8 @@ typedef struct TM_FailureData * * Represents the status of table tuples, referenced by table TID and taken by * index AM from index tuples. State consists of high level parameters of the - * deletion operation, plus two mutable palloc()'d arrays for information - * about the status of individual table tuples. These are conceptually one - * single array. Using two arrays keeps the TM_IndexDelete struct small, - * which makes sorting the first array (the deltids array) fast. + * deletion operation, plus a mutable palloc()'d arrays for information about + * the status of individual table tuples. * * Some index AM callers perform simple index tuple deletion (by specifying * bottomup = false), and include only known-dead deltids. These known-dead @@ -167,8 +165,8 @@ typedef struct TM_FailureData * "extra" TIDs, but a block-based AM should always manage to do so in * practice. * - * The final contents of the deltids/status arrays are interesting to callers - * that ask tableam to perform speculative work (i.e. when _any_ items have + * The final contents of the deltids array is interesting to callers that ask + * tableam to perform speculative work (i.e. when _any_ items have * knowndeletable set to false up front). These index AM callers will * naturally need to consult final state to determine which index tuples are * in fact deletable. @@ -186,18 +184,14 @@ typedef struct TM_FailureData typedef struct TM_IndexDelete { ItemPointerData tid; /* table TID from index tuple */ - int16 id; /* Offset into TM_IndexStatus array */ -} TM_IndexDelete; -typedef struct TM_IndexStatus -{ OffsetNumber idxoffnum; /* Index am page offset number */ bool knowndeletable; /* Currently known to be deletable? */ /* Bottom-up index deletion specific fields follow */ bool promising; /* Promising (duplicate) index tuple? */ int16 freespace; /* Space freed in index if deleted */ -} TM_IndexStatus; +} TM_IndexDelete; /* * Index AM/tableam coordination is central to the design of bottom-up index @@ -223,9 +217,8 @@ typedef struct TM_IndexDeleteOp int bottomupfreespace; /* Bottom-up space target */ /* Mutable per-TID information follows (index AM initializes entries) */ - int ndeltids; /* Current # of deltids/status elements */ + int ndeltids; /* Current # of deltids elements */ TM_IndexDelete *deltids; - TM_IndexStatus *status; } TM_IndexDeleteOp; /* "options" flag bits for table_tuple_insert */ diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 5b9cfb26cf..7eb4962946 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -7088,7 +7088,6 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate) for (int i = 0; i < delstate->ndeltids; i++) { TM_IndexDelete *ideltid = &delstate->deltids[i]; - TM_IndexStatus *istatus = delstate->status + ideltid->id; ItemPointer htid = &ideltid->tid; OffsetNumber offnum; @@ -7192,8 +7191,8 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate) maxoff = PageGetMaxOffsetNumber(page); } - if (istatus->knowndeletable) - Assert(!delstate->bottomup && !istatus->promising); + if (ideltid->knowndeletable) + Assert(!delstate->bottomup && !ideltid->promising); else { ItemPointerData tmp = *htid; @@ -7205,13 +7204,13 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate) continue; /* can't delete entry */ /* Caller will delete, since whole HOT chain is vacuumable */ - istatus->knowndeletable = true; + ideltid->knowndeletable = true; /* Maintain index free space info for bottom-up deletion case */ if (delstate->bottomup) { - Assert(istatus->freespace > 0); - actualfreespace += istatus->freespace; + Assert(ideltid->freespace > 0); + actualfreespace += ideltid->freespace; if (actualfreespace >= curtargetfreespace) bottomup_final_block = true; } @@ -7358,10 +7357,6 @@ index_delete_sort(TM_IndexDeleteOp *delstate) */ const int gaps[9] = {1968, 861, 336, 112, 48, 21, 7, 3, 1}; - /* Think carefully before changing anything here -- keep swaps cheap */ - StaticAssertStmt(sizeof(TM_IndexDelete) <= 8, - "element size exceeds 8 bytes"); - for (int g = 0; g < lengthof(gaps); g++) { for (int hi = gaps[g], i = low + hi; i < ndeltids; i++) @@ -7574,9 +7569,8 @@ bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate) for (int i = 0; i < delstate->ndeltids; i++) { TM_IndexDelete *ideltid = &delstate->deltids[i]; - TM_IndexStatus *istatus = delstate->status + ideltid->id; ItemPointer htid = &ideltid->tid; - bool promising = istatus->promising; + bool promising = ideltid->promising; if (curblock != ItemPointerGetBlockNumber(htid)) { diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index c911c705ba..e7e20de120 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -305,7 +305,6 @@ index_compute_xid_horizon_for_tuples(Relation irel, delstate.bottomupfreespace = 0; delstate.ndeltids = 0; delstate.deltids = palloc(nitems * sizeof(TM_IndexDelete)); - delstate.status = palloc(nitems * sizeof(TM_IndexStatus)); /* identify what the index tuples about to be deleted point to */ for (int i = 0; i < nitems; i++) @@ -318,11 +317,10 @@ index_compute_xid_horizon_for_tuples(Relation irel, Assert(ItemIdIsDead(iitemid)); ItemPointerCopy(&itup->t_tid, &delstate.deltids[i].tid); - delstate.deltids[i].id = delstate.ndeltids; - delstate.status[i].idxoffnum = InvalidOffsetNumber; /* unused */ - delstate.status[i].knowndeletable = true; /* LP_DEAD-marked */ - delstate.status[i].promising = false; /* unused */ - delstate.status[i].freespace = 0; /* unused */ + delstate.deltids[i].idxoffnum = InvalidOffsetNumber; /* unused */ + delstate.deltids[i].knowndeletable = true; /* LP_DEAD-marked */ + delstate.deltids[i].promising = false; /* unused */ + delstate.deltids[i].freespace = 0; /* unused */ delstate.ndeltids++; } @@ -334,7 +332,6 @@ index_compute_xid_horizon_for_tuples(Relation irel, Assert(delstate.ndeltids == nitems); pfree(delstate.deltids); - pfree(delstate.status); return latestRemovedXid; } diff --git a/src/backend/access/nbtree/nbtdedup.c b/src/backend/access/nbtree/nbtdedup.c index 854e3b2cf9..e0f77821a3 100644 --- a/src/backend/access/nbtree/nbtdedup.c +++ b/src/backend/access/nbtree/nbtdedup.c @@ -346,7 +346,6 @@ _bt_bottomupdel_pass(Relation rel, Buffer buf, Relation heapRel, delstate.bottomupfreespace = Max(BLCKSZ / 16, newitemsz); delstate.ndeltids = 0; delstate.deltids = palloc(MaxTIDsPerBTreePage * sizeof(TM_IndexDelete)); - delstate.status = palloc(MaxTIDsPerBTreePage * sizeof(TM_IndexStatus)); minoff = P_FIRSTDATAKEY(opaque); maxoff = PageGetMaxOffsetNumber(page); @@ -400,7 +399,6 @@ _bt_bottomupdel_pass(Relation rel, Buffer buf, Relation heapRel, _bt_delitems_delete_check(rel, buf, heapRel, &delstate); pfree(delstate.deltids); - pfree(delstate.status); /* Report "success" to caller unconditionally to avoid deduplication */ if (neverdedup) @@ -647,17 +645,15 @@ _bt_bottomupdel_finish_pending(Page page, BTDedupState state, ItemId itemid = PageGetItemId(page, offnum); IndexTuple itup = (IndexTuple) PageGetItem(page, itemid); TM_IndexDelete *ideltid = &delstate->deltids[delstate->ndeltids]; - TM_IndexStatus *istatus = &delstate->status[delstate->ndeltids]; if (!BTreeTupleIsPosting(itup)) { /* Simple case: A plain non-pivot tuple */ ideltid->tid = itup->t_tid; - ideltid->id = delstate->ndeltids; - istatus->idxoffnum = offnum; - istatus->knowndeletable = false; /* for now */ - istatus->promising = dupinterval; /* simple rule */ - istatus->freespace = ItemIdGetLength(itemid) + sizeof(ItemIdData); + ideltid->idxoffnum = offnum; + ideltid->knowndeletable = false; /* for now */ + ideltid->promising = dupinterval; /* simple rule */ + ideltid->freespace = ItemIdGetLength(itemid) + sizeof(ItemIdData); delstate->ndeltids++; } @@ -711,17 +707,15 @@ _bt_bottomupdel_finish_pending(Page page, BTDedupState state, ItemPointer htid = BTreeTupleGetPostingN(itup, p); ideltid->tid = *htid; - ideltid->id = delstate->ndeltids; - istatus->idxoffnum = offnum; - istatus->knowndeletable = false; /* for now */ - istatus->promising = false; + ideltid->idxoffnum = offnum; + ideltid->knowndeletable = false; /* for now */ + ideltid->promising = false; if ((firstpromising && p == 0) || (lastpromising && p == nitem - 1)) - istatus->promising = true; - istatus->freespace = sizeof(ItemPointerData); /* at worst */ + ideltid->promising = true; + ideltid->freespace = sizeof(ItemPointerData); /* at worst */ ideltid++; - istatus++; delstate->ndeltids++; } } diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index e333603912..f041c63ab9 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -2801,7 +2801,6 @@ _bt_simpledel_pass(Relation rel, Buffer buffer, Relation heapRel, delstate.bottomupfreespace = 0; delstate.ndeltids = 0; delstate.deltids = palloc(MaxTIDsPerBTreePage * sizeof(TM_IndexDelete)); - delstate.status = palloc(MaxTIDsPerBTreePage * sizeof(TM_IndexStatus)); for (offnum = minoff; offnum <= maxoff; @@ -2810,7 +2809,6 @@ _bt_simpledel_pass(Relation rel, Buffer buffer, Relation heapRel, ItemId itemid = PageGetItemId(page, offnum); IndexTuple itup = (IndexTuple) PageGetItem(page, itemid); TM_IndexDelete *odeltid = &delstate.deltids[delstate.ndeltids]; - TM_IndexStatus *ostatus = &delstate.status[delstate.ndeltids]; BlockNumber tidblock; void *match; @@ -2831,11 +2829,10 @@ _bt_simpledel_pass(Relation rel, Buffer buffer, Relation heapRel, * LP_DEAD-bit set tuples on page -- add TID to deltids */ odeltid->tid = itup->t_tid; - odeltid->id = delstate.ndeltids; - ostatus->idxoffnum = offnum; - ostatus->knowndeletable = ItemIdIsDead(itemid); - ostatus->promising = false; /* unused */ - ostatus->freespace = 0; /* unused */ + odeltid->idxoffnum = offnum; + odeltid->knowndeletable = ItemIdIsDead(itemid); + odeltid->promising = false; /* unused */ + odeltid->freespace = 0; /* unused */ delstate.ndeltids++; } @@ -2862,14 +2859,12 @@ _bt_simpledel_pass(Relation rel, Buffer buffer, Relation heapRel, * from LP_DEAD-bit set tuples on page -- add TID to deltids */ odeltid->tid = *tid; - odeltid->id = delstate.ndeltids; - ostatus->idxoffnum = offnum; - ostatus->knowndeletable = ItemIdIsDead(itemid); - ostatus->promising = false; /* unused */ - ostatus->freespace = 0; /* unused */ + odeltid->idxoffnum = offnum; + odeltid->knowndeletable = ItemIdIsDead(itemid); + odeltid->promising = false; /* unused */ + odeltid->freespace = 0; /* unused */ odeltid++; - ostatus++; delstate.ndeltids++; } } @@ -2883,7 +2878,6 @@ _bt_simpledel_pass(Relation rel, Buffer buffer, Relation heapRel, _bt_delitems_delete_check(rel, buffer, heapRel, &delstate); pfree(delstate.deltids); - pfree(delstate.status); } /* diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index e230f912c2..6297b95640 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -1442,14 +1442,12 @@ _bt_delitems_cmp(const void *a, const void *b) TM_IndexDelete *indexdelete1 = (TM_IndexDelete *) a; TM_IndexDelete *indexdelete2 = (TM_IndexDelete *) b; - if (indexdelete1->id > indexdelete2->id) + if (indexdelete1->idxoffnum > indexdelete2->idxoffnum) return 1; - if (indexdelete1->id < indexdelete2->id) + if (indexdelete1->idxoffnum < indexdelete2->idxoffnum) return -1; - Assert(false); - - return 0; + return ItemPointerCompare(&indexdelete1->tid, &indexdelete2->tid); } /* @@ -1478,17 +1476,6 @@ _bt_delitems_cmp(const void *a, const void *b) * without expecting that tableam will check most of them. The tableam has * considerable discretion around which entries/blocks it checks. Our role in * costing the bottom-up deletion operation is strictly advisory. - * - * Note: Caller must have added deltids entries (i.e. entries that go in - * delstate's main array) in leaf-page-wise order: page offset number order, - * TID order among entries taken from the same posting list tuple (tiebreak on - * TID). This order is convenient to work with here. - * - * Note: We also rely on the id field of each deltids element "capturing" this - * original leaf-page-wise order. That is, we expect to be able to get back - * to the original leaf-page-wise order just by sorting deltids on the id - * field (tableam will sort deltids for its own reasons, so we'll need to put - * it back in leaf-page-wise order afterwards). */ void _bt_delitems_delete_check(Relation rel, Buffer buf, Relation heapRel, @@ -1531,7 +1518,7 @@ _bt_delitems_delete_check(Relation rel, Buffer buf, Relation heapRel, /* We definitely have to delete at least one index tuple (or one TID) */ for (int i = 0; i < delstate->ndeltids; i++) { - TM_IndexStatus *dstatus = delstate->status + delstate->deltids[i].id; + TM_IndexDelete *dstatus = &delstate->deltids[i]; OffsetNumber idxoffnum = dstatus->idxoffnum; ItemId itemid = PageGetItemId(page, idxoffnum); IndexTuple itup = (IndexTuple) PageGetItem(page, itemid); @@ -1590,15 +1577,14 @@ _bt_delitems_delete_check(Relation rel, Buffer buf, Relation heapRel, for (; nestedi < delstate->ndeltids; nestedi++) { TM_IndexDelete *tcdeltid = &delstate->deltids[nestedi]; - TM_IndexStatus *tdstatus = (delstate->status + tcdeltid->id); /* Stop once we get past all itup related deltids entries */ - Assert(tdstatus->idxoffnum >= idxoffnum); - if (tdstatus->idxoffnum != idxoffnum) + Assert(tcdeltid->idxoffnum >= idxoffnum); + if (tcdeltid->idxoffnum != idxoffnum) break; /* Skip past non-deletable itup related entries up front */ - if (!tdstatus->knowndeletable) + if (!tcdeltid->knowndeletable) continue; /* Entry is first partial ptid match (or an exact match)? */ -- 2.27.0