Re: reducing the footprint of ScanKeyword (was Re: Large writable variables) - Mailing list pgsql-hackers

From Tom Lane
Subject Re: reducing the footprint of ScanKeyword (was Re: Large writable variables)
Date
Msg-id 8151.1546984446@sss.pgh.pa.us
Whole thread Raw
In response to Re: reducing the footprint of ScanKeyword (was Re: Large writablevariables)  (Andres Freund <andres@anarazel.de>)
Responses Re: reducing the footprint of ScanKeyword (was Re: Large writable variables)  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-hackers
Andres Freund <andres@anarazel.de> writes:
> On 2019-01-08 13:41:16 -0500, John Naylor wrote:
>> Do you mean the fmgr table?

> Not the entire fmgr table, but just the builtin oid index, generated by
> the following section:
> ...
> The generated fmgr_builtin_oid_index is pretty sparse, and a more dense
> hashtable might e.g. more efficient from a cache perspective.

I experimented with this, but TBH I think it's a dead loss.  We currently
have 2768 built-in functions, so the perfect hash table requires 5537
int16 entries, which is not *that* much less than the 10000 entries
that are in fmgr_builtin_oid_index presently.  When you consider the
extra cycles needed to do the hashing, and the fact that you have to
touch (usually) two cache lines not one in the lookup table, it's hard
to see how this could net out as a win performance-wise.

Also, I fail to understand why fmgr_builtin_oid_index has 10000 entries
anyway.  We could easily have fmgrtab.c expose the last actually assigned
builtin function OID (presently 6121) and make the index array only
that big, which just about eliminates the space advantage completely.

BTW, I found out while trying this that Joerg's fear of the hash
multipliers being too simplistic is valid: the perfect hash generator
failed until I changed them.  I picked a larger value that should be
just as easy to use for shift-and-add purposes.

            regards, tom lane

diff --git a/src/backend/utils/Gen_fmgrtab.pl b/src/backend/utils/Gen_fmgrtab.pl
index cafe408..9fceb60 100644
*** a/src/backend/utils/Gen_fmgrtab.pl
--- b/src/backend/utils/Gen_fmgrtab.pl
*************** use Catalog;
*** 18,23 ****
--- 18,24 ----

  use strict;
  use warnings;
+ use PerfectHash;

  # Collect arguments
  my @input_files;
*************** foreach my $s (sort { $a->{oid} <=> $b->
*** 219,237 ****
      print $pfh "extern Datum $s->{prosrc}(PG_FUNCTION_ARGS);\n";
  }

! # Create the fmgr_builtins table, collect data for fmgr_builtin_oid_index
  print $tfh "\nconst FmgrBuiltin fmgr_builtins[] = {\n";
  my %bmap;
  $bmap{'t'} = 'true';
  $bmap{'f'} = 'false';
! my @fmgr_builtin_oid_index;
  my $fmgr_count = 0;
  foreach my $s (sort { $a->{oid} <=> $b->{oid} } @fmgr)
  {
      print $tfh
        "  { $s->{oid}, $s->{nargs}, $bmap{$s->{strict}}, $bmap{$s->{retset}}, \"$s->{prosrc}\", $s->{prosrc} }";

!     $fmgr_builtin_oid_index[ $s->{oid} ] = $fmgr_count++;

      if ($fmgr_count <= $#fmgr)
      {
--- 220,244 ----
      print $pfh "extern Datum $s->{prosrc}(PG_FUNCTION_ARGS);\n";
  }

! # Create the fmgr_builtins table, collect data for hash function
  print $tfh "\nconst FmgrBuiltin fmgr_builtins[] = {\n";
  my %bmap;
  $bmap{'t'} = 'true';
  $bmap{'f'} = 'false';
! my @fmgr_builtin_oids;
! my $prev_oid = 0;
  my $fmgr_count = 0;
  foreach my $s (sort { $a->{oid} <=> $b->{oid} } @fmgr)
  {
      print $tfh
        "  { $s->{oid}, $s->{nargs}, $bmap{$s->{strict}}, $bmap{$s->{retset}}, \"$s->{prosrc}\", $s->{prosrc} }";

!     die "duplicate OIDs" if $s->{oid} <= $prev_oid;
!     $prev_oid = $s->{oid};
!
!     push @fmgr_builtin_oids, pack("n", $s->{oid});
!
!     $fmgr_count++;

      if ($fmgr_count <= $#fmgr)
      {
*************** print $tfh "};\n";
*** 246,283 ****

  print $tfh qq|
  const int fmgr_nbuiltins = (sizeof(fmgr_builtins) / sizeof(FmgrBuiltin));
- |;

-
- # Create fmgr_builtins_oid_index table.
- #
- # Note that the array has to be filled up to FirstGenbkiObjectId,
- # as we can't rely on zero initialization as 0 is a valid mapping.
- print $tfh qq|
- const uint16 fmgr_builtin_oid_index[FirstGenbkiObjectId] = {
  |;

- for (my $i = 0; $i < $FirstGenbkiObjectId; $i++)
- {
-     my $oid = $fmgr_builtin_oid_index[$i];

!     # fmgr_builtin_oid_index is sparse, map nonexistant functions to
!     # InvalidOidBuiltinMapping
!     if (not defined $oid)
!     {
!         $oid = 'InvalidOidBuiltinMapping';
!     }

!     if ($i + 1 == $FirstGenbkiObjectId)
!     {
!         print $tfh "  $oid\n";
!     }
!     else
!     {
!         print $tfh "  $oid,\n";
!     }
! }
! print $tfh "};\n";


  # And add the file footers.
--- 253,267 ----

  print $tfh qq|
  const int fmgr_nbuiltins = (sizeof(fmgr_builtins) / sizeof(FmgrBuiltin));

  |;


! # Create perfect hash function for searching fmgr_builtin by OID.

! print $tfh PerfectHash::generate_hash_function(\@fmgr_builtin_oids,
!                            "fmgr_builtin_oid_hash",
!                            0);


  # And add the file footers.
diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c
index b41649f..ad93032 100644
*** a/src/backend/utils/fmgr/fmgr.c
--- b/src/backend/utils/fmgr/fmgr.c
*************** extern Datum fmgr_security_definer(PG_FU
*** 72,92 ****
  static const FmgrBuiltin *
  fmgr_isbuiltin(Oid id)
  {
!     uint16        index;

      /* fast lookup only possible if original oid still assigned */
      if (id >= FirstGenbkiObjectId)
          return NULL;

      /*
!      * Lookup function data. If there's a miss in that range it's likely a
!      * nonexistant function, returning NULL here will trigger an ERROR later.
       */
!     index = fmgr_builtin_oid_index[id];
!     if (index == InvalidOidBuiltinMapping)
          return NULL;

!     return &fmgr_builtins[index];
  }

  /*
--- 72,103 ----
  static const FmgrBuiltin *
  fmgr_isbuiltin(Oid id)
  {
!     const FmgrBuiltin *result;
!     uint16        hashkey;
!     int index;

      /* fast lookup only possible if original oid still assigned */
      if (id >= FirstGenbkiObjectId)
          return NULL;

      /*
!      * Lookup function data.  The hash key for this is the low-order 16 bits
!      * of the OID, in network byte order.
       */
!     hashkey = htons(id);
!     index = fmgr_builtin_oid_hash(&hashkey, sizeof(hashkey));
!
!     /* Out-of-range hash result means definitely no match */
!     if (index < 0 || index >= fmgr_nbuiltins)
          return NULL;

!     result = &fmgr_builtins[index];
!
!     /* We have to verify the match, though */
!     if (id != result->foid)
!         return NULL;
!
!     return result;
  }

  /*
diff --git a/src/include/utils/fmgrtab.h b/src/include/utils/fmgrtab.h
index a778f88..f27aff5 100644
*** a/src/include/utils/fmgrtab.h
--- b/src/include/utils/fmgrtab.h
*************** extern const FmgrBuiltin fmgr_builtins[]
*** 36,46 ****

  extern const int fmgr_nbuiltins;    /* number of entries in table */

! /*
!  * Mapping from a builtin function's oid to the index in the fmgr_builtins
!  * array.
!  */
! #define InvalidOidBuiltinMapping PG_UINT16_MAX
! extern const uint16 fmgr_builtin_oid_index[FirstGenbkiObjectId];

  #endif                            /* FMGRTAB_H */
--- 36,41 ----

  extern const int fmgr_nbuiltins;    /* number of entries in table */

! extern int fmgr_builtin_oid_hash(const void *key, size_t keylen);

  #endif                            /* FMGRTAB_H */
diff --git a/src/tools/PerfectHash.pm b/src/tools/PerfectHash.pm
index 34d55cf..862357b 100644
*** a/src/tools/PerfectHash.pm
--- b/src/tools/PerfectHash.pm
*************** sub generate_hash_function
*** 77,83 ****
      # but these two multipliers are chosen to be cheap to calculate via
      # shift-and-add, so don't change them except at great need.
      my $hash_mult1 = 31;
!     my $hash_mult2 = 37;

      # We just try successive hash seed values until we find one that works.
      # (Commonly, random seeds are tried, but we want reproducible results
--- 77,83 ----
      # but these two multipliers are chosen to be cheap to calculate via
      # shift-and-add, so don't change them except at great need.
      my $hash_mult1 = 31;
!     my $hash_mult2 = 1029;

      # We just try successive hash seed values until we find one that works.
      # (Commonly, random seeds are tried, but we want reproducible results

pgsql-hackers by date:

Previous
From: CNG L
Date:
Subject: Question about autovacuum function autovac_balance_cost()
Next
From: John Naylor
Date:
Subject: Re: reducing the footprint of ScanKeyword (was Re: Large writable variables)