Better tracking of free space during SP-GiST index build - Mailing list pgsql-hackers

From Tom Lane
Subject Better tracking of free space during SP-GiST index build
Date
Msg-id 19621.1472082350@sss.pgh.pa.us
Whole thread Raw
Responses Re: Better tracking of free space during SP-GiST index build  (Tomas Vondra <tomas.vondra@2ndquadrant.com>)
List pgsql-hackers
Over in the thread about the SP-GiST inet opclass, I threatened to post
a patch like this, and here it is.

The basic idea is to track more than just the very latest page we've used
in each of the page categories that SP-GiST works with.  I started with an
arrangement that gave an equal number of cache slots to each category, but
soon realized that that was dumb, because there are usually way more leaf
pages than anything else.  So this version has a little table of how many
slots to give to each category.  The constants could maybe use a bit more
fiddling, if we have some more test data sets to try this on.

On the IRRExplorer data set we discussed in the other thread, this reduces
the index size from 132MB to 120MB.  Poking into that more closely with
pg_filedump, the total free space within the index drops from 42MB to
28MB.  If you think those numbers don't add up, you're right --- this
seems to result in more non-leaf tuples than before.  I'm not sure why;
maybe more aggressive sucking up of free space results in more splits.
(Maybe adjustment of the default spgist fillfactor would be in order
to counteract that?)  But the index search time doesn't seem to be hurt,
so perhaps there's nothing to worry about.

As coded, this makes no attempt to preferentially select pages with the
most or least free space.  I don't know if it'd be worth any cycles to
do that.

I'll put this in the commitfest queue.  It could use review from someone
with the time and motivation to do performance testing/tuning.

            regards, tom lane

diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c
index d570ae5..95c45fa 100644
*** a/src/backend/access/spgist/spgutils.c
--- b/src/backend/access/spgist/spgutils.c
***************
*** 27,32 ****
--- 27,56 ----


  /*
+  * This array defines how many entries of the lastUsedPages[] cache are
+  * reserved for index pages of each classification known to SpGistGetBuffer().
+  */
+ struct LUPMapEntry
+ {
+     int            count;
+     int            start;            /* must equal sum of preceding counts */
+ };
+
+ static const struct LUPMapEntry lastUsedPagesMap[] = {
+     {8, 0},                        /* inner pages, parity 0 */
+     {8, 8},                        /* inner pages, parity 1 */
+     {8, 16},                    /* inner pages, parity 2 */
+     {60, 24},                    /* leaf pages */
+     {2, 84},                    /* inner pages for nulls, parity 0 */
+     {2, 86},                    /* inner pages for nulls, parity 1 */
+     {2, 88},                    /* inner pages for nulls, parity 2 */
+     {10, 90}                    /* leaf pages for nulls */
+
+ #define LASTUSEDPAGESMAP_END 100    /* must equal SPGIST_CACHED_PAGES */
+ };
+
+
+ /*
   * SP-GiST handler function: return IndexAmRoutine with access method parameters
   * and callbacks.
   */
*************** spghandler(PG_FUNCTION_ARGS)
*** 35,40 ****
--- 59,67 ----
  {
      IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);

+     StaticAssertStmt(LASTUSEDPAGESMAP_END == SPGIST_CACHED_PAGES,
+                      "lastUsedPagesMap[] does not match SPGIST_CACHED_PAGES");
+
      amroutine->amstrategies = 0;
      amroutine->amsupport = SPGISTNProc;
      amroutine->amcanorder = false;
*************** spgGetCache(Relation index)
*** 129,140 ****

          metadata = SpGistPageGetMeta(BufferGetPage(metabuffer));

!         if (metadata->magicNumber != SPGIST_MAGIC_NUMBER)
              elog(ERROR, "index \"%s\" is not an SP-GiST index",
                   RelationGetRelationName(index));

-         cache->lastUsedPages = metadata->lastUsedPages;
-
          UnlockReleaseBuffer(metabuffer);

          index->rd_amcache = (void *) cache;
--- 156,179 ----

          metadata = SpGistPageGetMeta(BufferGetPage(metabuffer));

!         if (metadata->magicNumber == SPGIST_MAGIC_NUMBER)
!             cache->lastUsedPages = metadata->lastUsedPages;
!         else if (metadata->magicNumber == SPGIST_OLD_MAGIC_NUMBER)
!         {
!             /*
!              * We could make an effort to slurp up the pages that exist in the
!              * old-format cache, but it's probably not worth the trouble. Just
!              * init our cache to empty.
!              */
!             int            i;
!
!             for (i = 0; i < SPGIST_CACHED_PAGES; i++)
!                 cache->lastUsedPages.cachedPage[i].blkno = InvalidBlockNumber;
!         }
!         else
              elog(ERROR, "index \"%s\" is not an SP-GiST index",
                   RelationGetRelationName(index));

          UnlockReleaseBuffer(metabuffer);

          index->rd_amcache = (void *) cache;
*************** SpGistUpdateMetaPage(Relation index)
*** 258,263 ****
--- 297,305 ----
          if (ConditionalLockBuffer(metabuffer))
          {
              metadata = SpGistPageGetMeta(BufferGetPage(metabuffer));
+
+             /* Update both the magic # and the cached page list */
+             metadata->magicNumber = SPGIST_MAGIC_NUMBER;
              metadata->lastUsedPages = cache->lastUsedPages;

              MarkBufferDirty(metabuffer);
*************** SpGistUpdateMetaPage(Relation index)
*** 270,279 ****
      }
  }

- /* Macro to select proper element of lastUsedPages cache depending on flags */
- /* Masking flags with SPGIST_CACHED_PAGES is just for paranoia's sake */
- #define GET_LUP(c, f)  (&(c)->lastUsedPages.cachedPage[((unsigned int) (f)) % SPGIST_CACHED_PAGES])
-
  /*
   * Allocate and initialize a new buffer of the type and parity specified by
   * flags.  The returned buffer is already pinned and exclusive-locked.
--- 312,317 ----
*************** SpGistUpdateMetaPage(Relation index)
*** 297,303 ****
  static Buffer
  allocNewBuffer(Relation index, int flags)
  {
-     SpGistCache *cache = spgGetCache(index);
      uint16        pageflags = 0;

      if (GBUF_REQ_LEAF(flags))
--- 335,340 ----
*************** allocNewBuffer(Relation index, int flags
*** 330,340 ****
              else
              {
                  /* Page has wrong parity, record it in cache and try again */
!                 if (pageflags & SPGIST_NULLS)
!                     blkFlags |= GBUF_NULLS;
!                 cache->lastUsedPages.cachedPage[blkFlags].blkno = blkno;
!                 cache->lastUsedPages.cachedPage[blkFlags].freeSpace =
!                     PageGetExactFreeSpace(BufferGetPage(buffer));
                  UnlockReleaseBuffer(buffer);
              }
          }
--- 367,373 ----
              else
              {
                  /* Page has wrong parity, record it in cache and try again */
!                 SpGistSetLastUsedPage(index, buffer);
                  UnlockReleaseBuffer(buffer);
              }
          }
*************** Buffer
*** 354,360 ****
  SpGistGetBuffer(Relation index, int flags, int needSpace, bool *isNew)
  {
      SpGistCache *cache = spgGetCache(index);
!     SpGistLastUsedPage *lup;

      /* Bail out if even an empty page wouldn't meet the demand */
      if (needSpace > SPGIST_PAGE_CAPACITY)
--- 387,395 ----
  SpGistGetBuffer(Relation index, int flags, int needSpace, bool *isNew)
  {
      SpGistCache *cache = spgGetCache(index);
!     int            lupstart,
!                 lupstop,
!                 i;

      /* Bail out if even an empty page wouldn't meet the demand */
      if (needSpace > SPGIST_PAGE_CAPACITY)
*************** SpGistGetBuffer(Relation index, int flag
*** 371,405 ****
                                                  SPGIST_DEFAULT_FILLFACTOR);
      needSpace = Min(needSpace, SPGIST_PAGE_CAPACITY);

!     /* Get the cache entry for this flags setting */
!     lup = GET_LUP(cache, flags);
!
!     /* If we have nothing cached, just turn it over to allocNewBuffer */
!     if (lup->blkno == InvalidBlockNumber)
!     {
!         *isNew = true;
!         return allocNewBuffer(index, flags);
!     }
!
!     /* fixed pages should never be in cache */
!     Assert(!SpGistBlockIsFixed(lup->blkno));

!     /* If cached freeSpace isn't enough, don't bother looking at the page */
!     if (lup->freeSpace >= needSpace)
      {
          Buffer        buffer;
          Page        page;

!         buffer = ReadBuffer(index, lup->blkno);

          if (!ConditionalLockBuffer(buffer))
          {
!             /*
!              * buffer is locked by another process, so return a new buffer
!              */
              ReleaseBuffer(buffer);
!             *isNew = true;
!             return allocNewBuffer(index, flags);
          }

          page = BufferGetPage(buffer);
--- 406,443 ----
                                                  SPGIST_DEFAULT_FILLFACTOR);
      needSpace = Min(needSpace, SPGIST_PAGE_CAPACITY);

!     /* Locate the cache entries for this flags setting */
!     lupstart = lastUsedPagesMap[flags].start;
!     lupstop = lastUsedPagesMap[flags].count + lupstart;

!     /*
!      * Search for a usable entry.  If there's more than one, we just choose
!      * the first one; is there a better heuristic?
!      */
!     for (i = lupstart; i < lupstop; i++)
      {
+         SpGistLastUsedPage *lup = &cache->lastUsedPages.cachedPage[i];
+         BlockNumber blkno = lup->blkno;
          Buffer        buffer;
          Page        page;

!         if (blkno == InvalidBlockNumber)
!             continue;
!
!         /* fixed pages should never be in cache */
!         Assert(!SpGistBlockIsFixed(blkno));
!
!         /* If cached freeSpace isn't enough, don't bother looking at the page */
!         if (lup->freeSpace < needSpace)
!             continue;
!
!         buffer = ReadBuffer(index, blkno);

          if (!ConditionalLockBuffer(buffer))
          {
!             /* buffer is locked by another process, so keep searching */
              ReleaseBuffer(buffer);
!             continue;
          }

          page = BufferGetPage(buffer);
*************** SpGistGetBuffer(Relation index, int flag
*** 414,419 ****
--- 452,459 ----
              if (GBUF_REQ_NULLS(flags))
                  pageflags |= SPGIST_NULLS;
              SpGistInitBuffer(buffer, pageflags);
+
+             /* Update freespace info and return the buffer */
              lup->freeSpace = PageGetExactFreeSpace(page) - needSpace;
              *isNew = true;
              return buffer;
*************** SpGistGetBuffer(Relation index, int flag
*** 437,445 ****
              }
          }

!         /*
!          * fallback to allocation of new buffer
!          */
          UnlockReleaseBuffer(buffer);
      }

--- 477,483 ----
              }
          }

!         /* Give up on this page, but keep scanning cache entries */
          UnlockReleaseBuffer(buffer);
      }

*************** SpGistGetBuffer(Relation index, int flag
*** 451,474 ****
  /*
   * Update lastUsedPages cache when done modifying a page.
   *
!  * We update the appropriate cache entry if it already contained this page
!  * (its freeSpace is likely obsolete), or if this page has more space than
!  * whatever we had cached.
   */
  void
  SpGistSetLastUsedPage(Relation index, Buffer buffer)
  {
      SpGistCache *cache = spgGetCache(index);
-     SpGistLastUsedPage *lup;
-     int            freeSpace;
      Page        page = BufferGetPage(buffer);
      BlockNumber blkno = BufferGetBlockNumber(buffer);
      int            flags;

      /* Never enter fixed pages (root pages) in cache, though */
      if (SpGistBlockIsFixed(blkno))
          return;

      if (SpGistPageIsLeaf(page))
          flags = GBUF_LEAF;
      else
--- 489,520 ----
  /*
   * Update lastUsedPages cache when done modifying a page.
   *
!  * If there's already a cache entry for this page, update its freeSpace.
!  * Otherwise, remember the page in the cache if it has more space than
!  * the least useful page we had before.
   */
  void
  SpGistSetLastUsedPage(Relation index, Buffer buffer)
  {
      SpGistCache *cache = spgGetCache(index);
      Page        page = BufferGetPage(buffer);
      BlockNumber blkno = BufferGetBlockNumber(buffer);
      int            flags;
+     int            freeSpace;
+     int            emptySlot,
+                 minFreeSlot,
+                 minFreeSpace;
+     int            lupstart,
+                 lupstop,
+                 i;

      /* Never enter fixed pages (root pages) in cache, though */
      if (SpGistBlockIsFixed(blkno))
          return;

+     /* Determine page's current free space and its category */
+     freeSpace = PageGetExactFreeSpace(page);
+
      if (SpGistPageIsLeaf(page))
          flags = GBUF_LEAF;
      else
*************** SpGistSetLastUsedPage(Relation index, Bu
*** 476,490 ****
      if (SpGistPageStoresNulls(page))
          flags |= GBUF_NULLS;

!     lup = GET_LUP(cache, flags);

!     freeSpace = PageGetExactFreeSpace(page);
!     if (lup->blkno == InvalidBlockNumber || lup->blkno == blkno ||
!         lup->freeSpace < freeSpace)
      {
!         lup->blkno = blkno;
!         lup->freeSpace = freeSpace;
      }
  }

  /*
--- 522,567 ----
      if (SpGistPageStoresNulls(page))
          flags |= GBUF_NULLS;

!     /* Locate the cache entries for this flags setting */
!     lupstart = lastUsedPagesMap[flags].start;
!     lupstop = lastUsedPagesMap[flags].count + lupstart;

!     /*
!      * If page is in the cache, just update its free space; otherwise identify
!      * best candidate slot to replace.
!      */
!     emptySlot = minFreeSlot = -1;
!     minFreeSpace = BLCKSZ + 1;
!     for (i = lupstart; i < lupstop; i++)
      {
!         SpGistLastUsedPage *lup = &cache->lastUsedPages.cachedPage[i];
!
!         if (lup->blkno == blkno)
!         {
!             lup->freeSpace = freeSpace;
!             return;
!         }
!         if (lup->blkno == InvalidBlockNumber)
!             emptySlot = i;
!         else if (lup->freeSpace < minFreeSpace)
!         {
!             minFreeSlot = i;
!             minFreeSpace = lup->freeSpace;
!         }
      }
+
+     if (emptySlot >= 0)
+         i = emptySlot;
+     else if (minFreeSlot >= 0 && minFreeSpace < freeSpace)
+         i = minFreeSlot;
+     else
+     {
+         /* There's no suitable slot, so just forget the page */
+         return;
+     }
+
+     cache->lastUsedPages.cachedPage[i].blkno = blkno;
+     cache->lastUsedPages.cachedPage[i].freeSpace = freeSpace;
  }

  /*
diff --git a/src/include/access/spgist_private.h b/src/include/access/spgist_private.h
index cb8fa9c..66a4fb4 100644
*** a/src/include/access/spgist_private.h
--- b/src/include/access/spgist_private.h
*************** typedef struct SpGistLastUsedPage
*** 80,87 ****
      int            freeSpace;        /* page's free space (could be obsolete!) */
  } SpGistLastUsedPage;

! /* Note: indexes in cachedPage[] match flag assignments for SpGistGetBuffer */
! #define SPGIST_CACHED_PAGES 8

  typedef struct SpGistLUPCache
  {
--- 80,91 ----
      int            freeSpace;        /* page's free space (could be obsolete!) */
  } SpGistLastUsedPage;

! /*
!  * See spgutils.c for the assignments of indexes in cachedPage[].  Note that
!  * making SPGIST_CACHED_PAGES much larger than this would fail with the
!  * minimum allowed value of BLCKSZ, ie 1kB.
!  */
! #define SPGIST_CACHED_PAGES 100

  typedef struct SpGistLUPCache
  {
*************** typedef struct SpGistMetaPageData
*** 97,103 ****
      SpGistLUPCache lastUsedPages;        /* shared storage of last-used info */
  } SpGistMetaPageData;

! #define SPGIST_MAGIC_NUMBER (0xBA0BABEE)

  #define SpGistPageGetMeta(p) \
      ((SpGistMetaPageData *) PageGetContents(p))
--- 101,110 ----
      SpGistLUPCache lastUsedPages;        /* shared storage of last-used info */
  } SpGistMetaPageData;

! /* For current format: */
! #define SPGIST_MAGIC_NUMBER (0xBA0BABE1)
! /* For pre-v10 format (which had a smaller lastUsedPages array): */
! #define SPGIST_OLD_MAGIC_NUMBER (0xBA0BABEE)

  #define SpGistPageGetMeta(p) \
      ((SpGistMetaPageData *) PageGetContents(p))
*************** typedef struct spgxlogVacuumRedirect
*** 599,605 ****
   * In addition, GBUF_NULLS can be OR'd in to get a page for storage of
   * null-valued tuples.
   *
!  * Note: these flag values are used as indexes into lastUsedPages.
   */
  #define GBUF_LEAF                0x03
  #define GBUF_INNER_PARITY(x)    ((x) % 3)
--- 606,613 ----
   * In addition, GBUF_NULLS can be OR'd in to get a page for storage of
   * null-valued tuples.
   *
!  * Note: these flag values are used as indexes into spgutils.c's
!  * lastUsedPagesMap[].
   */
  #define GBUF_LEAF                0x03
  #define GBUF_INNER_PARITY(x)    ((x) % 3)

pgsql-hackers by date:

Previous
From: Martín Marqués
Date:
Subject: Re: pg_dump with tables created in schemas created by extensions
Next
From: "Tsunakawa, Takayuki"
Date:
Subject: Re: pg_stat_lwlock wait time view