Re: 9.2.1 & index-only scans : abnormal heap fetches after VACUUM FULL - Mailing list pgsql-hackers

From Andres Freund
Subject Re: 9.2.1 & index-only scans : abnormal heap fetches after VACUUM FULL
Date
Msg-id 20140215151640.GD19470@alap3.anarazel.de
Whole thread Raw
In response to Re: 9.2.1 & index-only scans : abnormal heap fetches after VACUUM FULL  (Bruce Momjian <bruce@momjian.us>)
Responses Re: 9.2.1 & index-only scans : abnormal heap fetches after VACUUM FULL  (Bruce Momjian <bruce@momjian.us>)
List pgsql-hackers
Hi,

> *************** end_heap_rewrite(RewriteState state)
> *** 281,286 ****
> --- 284,290 ----
>                           true);
>           RelationOpenSmgr(state->rs_new_rel);
>   
> +         update_page_vm(state->rs_new_rel, state->rs_buffer, state->rs_blockno);
>           PageSetChecksumInplace(state->rs_buffer, state->rs_blockno);
>   
>           smgrextend(state->rs_new_rel->rd_smgr, MAIN_FORKNUM, state->rs_blockno,
> *************** raw_heap_insert(RewriteState state, Heap
> *** 633,638 ****
> --- 637,643 ----
>                */
>               RelationOpenSmgr(state->rs_new_rel);
>   
> +             update_page_vm(state->rs_new_rel, page, state->rs_blockno);
>               PageSetChecksumInplace(page, state->rs_blockno);
>   
>               smgrextend(state->rs_new_rel->rd_smgr, MAIN_FORKNUM,

I think those two cases should be combined.

> + static void
> + update_page_vm(Relation relation, Page page, BlockNumber blkno)
> + {
> +     Buffer        vmbuffer = InvalidBuffer;
> +     TransactionId visibility_cutoff_xid;
> + 
> +     visibilitymap_pin(relation, blkno, &vmbuffer);
> +     Assert(BufferIsValid(vmbuffer));
> + 
> +     if (!visibilitymap_test(relation, blkno, &vmbuffer) &&
> +         heap_page_is_all_visible(relation, InvalidBuffer, page,
> +                                  &visibility_cutoff_xid))
> +     {
> +         PageSetAllVisible(page);
> +         visibilitymap_set(relation, blkno, InvalidBuffer,
> +                           InvalidXLogRecPtr, vmbuffer, visibility_cutoff_xid);
> +     }
> +     ReleaseBuffer(vmbuffer);
> + }

How could visibilitymap_test() be true here?

> diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
> new file mode 100644
> index 899ffac..3e7546b
> *** a/src/backend/access/heap/visibilitymap.c
> --- b/src/backend/access/heap/visibilitymap.c
> *************** visibilitymap_set(Relation rel, BlockNum
> *** 257,263 ****
>   #endif
>   
>       Assert(InRecovery || XLogRecPtrIsInvalid(recptr));
> -     Assert(InRecovery || BufferIsValid(heapBuf));
>   
>       /* Check that we have the right heap page pinned, if present */
>       if (BufferIsValid(heapBuf) && BufferGetBlockNumber(heapBuf) != heapBlk)
> --- 257,262 ----
> *************** visibilitymap_set(Relation rel, BlockNum
> *** 278,284 ****
>           map[mapByte] |= (1 << mapBit);
>           MarkBufferDirty(vmBuf);
>   
> !         if (RelationNeedsWAL(rel))
>           {
>               if (XLogRecPtrIsInvalid(recptr))
>               {
> --- 277,283 ----
>           map[mapByte] |= (1 << mapBit);
>           MarkBufferDirty(vmBuf);
>   
> !         if (RelationNeedsWAL(rel) && BufferIsValid(heapBuf))
>           {
>               if (XLogRecPtrIsInvalid(recptr))
>               {

At the very least this needs to revise visibilitymap_set's comment about
the requirement of passing a buffer unless !InRecovery.

Also, how is this going to work with replication if you're explicitly
not WAL logging this?


> *************** vac_cmp_itemptr(const void *left, const
> *** 1730,1743 ****
>    * transactions. Also return the visibility_cutoff_xid which is the highest
>    * xmin amongst the visible tuples.
>    */
> ! static bool
> ! heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cutoff_xid)
>   {
> -     Page        page = BufferGetPage(buf);
>       OffsetNumber offnum,
>                   maxoff;
>       bool        all_visible = true;
>   
>       *visibility_cutoff_xid = InvalidTransactionId;
>   
>       /*
> --- 1728,1744 ----
>    * transactions. Also return the visibility_cutoff_xid which is the highest
>    * xmin amongst the visible tuples.
>    */
> ! bool
> ! heap_page_is_all_visible(Relation rel, Buffer buf, Page page,
> !                          TransactionId *visibility_cutoff_xid)
>   {
>       OffsetNumber offnum,
>                   maxoff;
>       bool        all_visible = true;
>   
> +     if (BufferIsValid(buf))
> +         page = BufferGetPage(buf);
> + 
>       *visibility_cutoff_xid = InvalidTransactionId;
>   
>       /*
> *************** heap_page_is_all_visible(Relation rel, B
> *** 1758,1764 ****
>           if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
>               continue;
>   
> !         ItemPointerSet(&(tuple.t_self), BufferGetBlockNumber(buf), offnum);
>   
>           /*
>            * Dead line pointers can have index pointers pointing to them. So
> --- 1759,1767 ----
>           if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
>               continue;
>   
> !         /* XXX use 0 or real offset? */
> !         ItemPointerSet(&(tuple.t_self), BufferIsValid(buf) ?
> !                        BufferGetBlockNumber(buf) : 0, offnum);

How about passing in the page and block number from the outside and not
dealing with a buffer in here at all?

>           /*
>            * Dead line pointers can have index pointers pointing to them. So
> diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c
> new file mode 100644
> index f626755..b37ecc4
> *** a/src/backend/utils/time/tqual.c
> --- b/src/backend/utils/time/tqual.c
> *************** static inline void
> *** 108,113 ****
> --- 108,117 ----
>   SetHintBits(HeapTupleHeader tuple, Buffer buffer,
>               uint16 infomask, TransactionId xid)
>   {
> +     /* we might not have a buffer if we are doing raw_heap_insert() */
> +     if (!BufferIsValid(buffer))
> +         return;
> + 
>       if (TransactionIdIsValid(xid))
>       {
>           /* NB: xid must be known committed here! */

This looks seriously wrong to me.

I think the whole idea of doing this in private memory is bad. The
visibility routines aren't written for that, and the above is just one
instance of that, and I am far from convinced it's the only one. So you
either need to work out how to rely on the visibility checking done in
cluster.c, or you need to modify rewriteheap.c to write via
shared_buffers.

Greetings,

Andres Freund



pgsql-hackers by date:

Previous
From: Peter Eisentraut
Date:
Subject: Re: Create function prototype as part of PG_FUNCTION_INFO_V1
Next
From: Tom Lane
Date:
Subject: Re: narwhal and PGDLLIMPORT