Re: heapgettup refactoring - Mailing list pgsql-hackers
From | Andres Freund |
---|---|
Subject | Re: heapgettup refactoring |
Date | |
Msg-id | 20221101010948.hsf33emgnwzvil4a@awork3.anarazel.de Whole thread Raw |
In response to | heapgettup refactoring (Melanie Plageman <melanieplageman@gmail.com>) |
Responses |
Re: heapgettup refactoring
|
List | pgsql-hackers |
Hi, On 2022-10-31 14:37:39 -0400, Melanie Plageman wrote: > and heapgetpage(). heapgettup() and heapgettup_pagemode() have a lot of > duplicated code, confusingly nested if statements, and unnecessary local > variables. While working on a feature for the AIO/DIO patchset, I > noticed that it was difficult to add new code to heapgettup() and > heapgettup_pagemode() because of how the functions are written. Thanks for working on this - the current state is quite painful. > From cde2d6720f4f5ab2531c22ad4a5f0d9e6ec1039d Mon Sep 17 00:00:00 2001 > From: Melanie Plageman <melanieplageman@gmail.com> > Date: Wed, 26 Oct 2022 20:00:34 -0400 > Subject: [PATCH v1 1/3] Remove breaks in HeapTupleSatisfiesVisibility > > breaks in HeapTupleSatisfiesVisibility were superfluous > --- > src/backend/access/heap/heapam_visibility.c | 7 ------- > 1 file changed, 7 deletions(-) > > diff --git a/src/backend/access/heap/heapam_visibility.c b/src/backend/access/heap/heapam_visibility.c > index 6e33d1c881..dd5d5da190 100644 > --- a/src/backend/access/heap/heapam_visibility.c > +++ b/src/backend/access/heap/heapam_visibility.c > @@ -1769,25 +1769,18 @@ HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer) > { > case SNAPSHOT_MVCC: > return HeapTupleSatisfiesMVCC(htup, snapshot, buffer); > - break; > case SNAPSHOT_SELF: > return HeapTupleSatisfiesSelf(htup, snapshot, buffer); > - break; > case SNAPSHOT_ANY: > return HeapTupleSatisfiesAny(htup, snapshot, buffer); > - break; > case SNAPSHOT_TOAST: > return HeapTupleSatisfiesToast(htup, snapshot, buffer); > - break; > case SNAPSHOT_DIRTY: > return HeapTupleSatisfiesDirty(htup, snapshot, buffer); > - break; > case SNAPSHOT_HISTORIC_MVCC: > return HeapTupleSatisfiesHistoricMVCC(htup, snapshot, buffer); > - break; > case SNAPSHOT_NON_VACUUMABLE: > return HeapTupleSatisfiesNonVacuumable(htup, snapshot, buffer); > - break; > } Not sure what the author of this code, a certain Mr Freund, was thinking when he added those returns... > From 9d8b01960463dc64ff5b111d523ff80fce3017af Mon Sep 17 00:00:00 2001 > From: Melanie Plageman <melanieplageman@gmail.com> > Date: Mon, 31 Oct 2022 13:40:06 -0400 > Subject: [PATCH v1 2/3] Turn HeapKeyTest macro into function > > This should always be inlined appropriately now. It is easier to read as > a function. Also, remove unused include in catcache.c. > --- > src/backend/access/heap/heapam.c | 10 ++-- > src/backend/utils/cache/catcache.c | 1 - > src/include/access/valid.h | 76 ++++++++++++------------------ > 3 files changed, 36 insertions(+), 51 deletions(-) > > diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c > index 12be87efed..1c995faa12 100644 > --- a/src/backend/access/heap/heapam.c > +++ b/src/backend/access/heap/heapam.c > @@ -716,8 +716,10 @@ heapgettup(HeapScanDesc scan, > snapshot); > > if (valid && key != NULL) > - HeapKeyTest(tuple, RelationGetDescr(scan->rs_base.rs_rd), > - nkeys, key, valid); > + { > + valid = HeapKeyTest(tuple, RelationGetDescr(scan->rs_base.rs_rd), > + nkeys, key); > + } > > if (valid) > { superfluous parens. > --- a/src/include/access/valid.h > +++ b/src/include/access/valid.h > @@ -19,51 +19,35 @@ > * > * Test a heap tuple to see if it satisfies a scan key. > */ > -#define HeapKeyTest(tuple, \ > - tupdesc, \ > - nkeys, \ > - keys, \ > - result) \ > -do \ > -{ \ > - /* Use underscores to protect the variables passed in as parameters */ \ > - int __cur_nkeys = (nkeys); \ > - ScanKey __cur_keys = (keys); \ > - \ > - (result) = true; /* may change */ \ > - for (; __cur_nkeys--; __cur_keys++) \ > - { \ > - Datum __atp; \ > - bool __isnull; \ > - Datum __test; \ > - \ > - if (__cur_keys->sk_flags & SK_ISNULL) \ > - { \ > - (result) = false; \ > - break; \ > - } \ > - \ > - __atp = heap_getattr((tuple), \ > - __cur_keys->sk_attno, \ > - (tupdesc), \ > - &__isnull); \ > - \ > - if (__isnull) \ > - { \ > - (result) = false; \ > - break; \ > - } \ > - \ > - __test = FunctionCall2Coll(&__cur_keys->sk_func, \ > - __cur_keys->sk_collation, \ > - __atp, __cur_keys->sk_argument); \ > - \ > - if (!DatumGetBool(__test)) \ > - { \ > - (result) = false; \ > - break; \ > - } \ > - } \ > -} while (0) > +static inline bool > +HeapKeyTest(HeapTuple tuple, TupleDesc tupdesc, int nkeys, ScanKey keys) > +{ > + int cur_nkeys = nkeys; > + ScanKey cur_key = keys; > + > + for (; cur_nkeys--; cur_key++) > + { > + Datum atp; > + bool isnull; > + Datum test; > + > + if (cur_key->sk_flags & SK_ISNULL) > + return false; > + > + atp = heap_getattr(tuple, cur_key->sk_attno, tupdesc, &isnull); > + > + if (isnull) > + return false; > + > + test = FunctionCall2Coll(&cur_key->sk_func, > + cur_key->sk_collation, > + atp, cur_key->sk_argument); > + > + if (!DatumGetBool(test)) > + return false; > + } > + > + return true; > +} Seems like a simple and nice win in readability. I recall looking at this in the past and thinking that there was some additional subtlety here, but I can't see what that'd be. > From a894ce38c488df6546392b9f3bd894b67edf951e Mon Sep 17 00:00:00 2001 > From: Melanie Plageman <melanieplageman@gmail.com> > Date: Mon, 31 Oct 2022 13:40:29 -0400 > Subject: [PATCH v1 3/3] Refactor heapgettup* and heapgetpage > > Simplify heapgettup(), heapgettup_pagemode(), and heapgetpage(). All > three contained several unnecessary local variables, duplicate code, and > nested if statements. Streamlining these improves readability and > extensibility. It'd be nice to break this into slightly smaller chunks. > + > +static inline void heapgettup_no_movement(HeapScanDesc scan) > +{ FWIW, for function definitions we keep the return type (and with that also the the "static inline") on a separate line. > + ItemId lpp; > + OffsetNumber lineoff; > + BlockNumber page; > + Page dp; > + HeapTuple tuple = &(scan->rs_ctup); > + /* > + * ``no movement'' scan direction: refetch prior tuple > + */ > + > + /* Since the tuple was previously fetched, needn't lock page here */ > + if (!scan->rs_inited) > + { > + Assert(!BufferIsValid(scan->rs_cbuf)); > + tuple->t_data = NULL; > + return; Is it possible to have a no-movement scan with an uninitialized scan? That doesn't really seem to make sense. At least that's how I understand the explanation for NoMovement nearby: * dir == NoMovementScanDirection means "re-fetch the tuple indicated * by scan->rs_ctup". We can't have a rs_ctup without an already started scan, no? Looks like this is pre-existing code that you just moved, but it still seems wrong. > + } > + page = ItemPointerGetBlockNumber(&(tuple->t_self)); > + if (page != scan->rs_cblock) > + heapgetpage((TableScanDesc) scan, page); We have a BlockNumber page; and Page dp; in this code which seems almost intentionally confusing. This again is a pre-existing issue but perhaps we could clean it up first? > +static inline Page heapgettup_continue_page(HeapScanDesc scan, BlockNumber page, ScanDirection dir, > + int *linesleft, OffsetNumber *lineoff) > +{ > + HeapTuple tuple = &(scan->rs_ctup); Hm. Finding the next offset via rs_ctup doesn't seem quite right. For one, it's not actually that cheap to extract the offset from an ItemPointer because of the the way we pack it into ItemPointerData. > + Page dp = BufferGetPage(scan->rs_cbuf); > + TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, dp); Newlines between definitions and code :) Perhaps worth asserting that the scan is initialized and that rs_cbuf is valid? > + if (ScanDirectionIsForward(dir)) > + { > + *lineoff = OffsetNumberNext(ItemPointerGetOffsetNumber(&(tuple->t_self))); > + *linesleft = PageGetMaxOffsetNumber(dp) - (*lineoff) + 1; We can't access PageGetMaxOffsetNumber etc without holding a lock on the page. It's not immediately obvious that that is held in all paths. > +static inline BlockNumber heapgettup_initial_page(HeapScanDesc scan, ScanDirection dir) > +{ > + Assert(!ScanDirectionIsNoMovement(dir)); > + Assert(!scan->rs_inited); Is there a reason we couldn't set rs_inited in here, rather than reapeating that in all callers? ISTM that this function should deal with the /* * return null immediately if relation is empty */ logic, I think you now are repeating that check on every call to heapgettup(). > @@ -511,182 +711,55 @@ heapgettup(HeapScanDesc scan, > ScanKey key) > { > HeapTuple tuple = &(scan->rs_ctup); > - Snapshot snapshot = scan->rs_base.rs_snapshot; > - bool backward = ScanDirectionIsBackward(dir); > BlockNumber page; > - bool finished; > Page dp; > - int lines; > OffsetNumber lineoff; > int linesleft; > - ItemId lpp; > + > + if (ScanDirectionIsNoMovement(dir)) > + return heapgettup_no_movement(scan); Maybe add an unlikely() - this path is barely ever used... > /* > - * calculate next starting lineoff, given scan direction > + * return null immediately if relation is empty > */ > - if (ScanDirectionIsForward(dir)) > + if (scan->rs_nblocks == 0 || scan->rs_numblocks == 0) > { As mentioned above, I don't think we should repeat the nblocks check on every call. > + page = scan->rs_cblock; > + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); > + dp = heapgettup_continue_page(scan, page, dir, &linesleft, &lineoff); > + goto continue_page; > } > > /* > * advance the scan until we find a qualifying tuple or run out of stuff > * to scan > */ > - lpp = PageGetItemId(dp, lineoff); > - for (;;) > + while (page != InvalidBlockNumber) > { > + heapgetpage((TableScanDesc) scan, page); > + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); > + dp = heapgettup_start_page(scan, page, dir, &linesleft, &lineoff); > + continue_page: I don't like the goto continue_page at all. Seems that the paths leading here should call LockBuffer(), heapgettup_start_page() etc? Possibly a do {} while () loop could do the trick as well. Greetings, Andres Freund
pgsql-hackers by date: