Re: extensible enum types - Mailing list pgsql-hackers

From Andrew Dunstan
Subject Re: extensible enum types
Date
Msg-id 4C202239.5070901@dunslane.net
Whole thread Raw
In response to Re: extensible enum types  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-hackers

Tom Lane wrote:
> Andrew Dunstan <andrew@dunslane.net> writes:
>
>> Tom Lane wrote:
>>
>>> Well, having to do a cache lookup already makes it a couple orders of
>>> magnitude more expensive than an OID comparison.  However, it's hard to
>>> say how much that matters in terms of total application performance.
>>> We really could do with a bit of performance testing here ...
>>>
>
>
>> I have done some. The performance hit is fairly horrible. Adding cache
>> lookups for the enum rows to the comarison routines made a REINDEX on a
>> 1m row table where the index is on an enum column (the enum has 500
>> regards, tom lane
>>
>> randomly ordered labels) jump from around 10s to around 70s.
>>
>
> Hmmm... that's bad, but I bet it's still less than the cost of comparing
> NUMERICs.  Also, did you make any attempt to avoid repetitive cache
> lookups by storing a pointer in fn_extra (cf array comparisons)?
>
>
>

OK, making a bit of progress. Attached is a sort of proof of concept
patch that does that. It stores a bsearchable list of {enum, sort_order}
pairs in fn_extra, along with a flag that indicates if the oids are  in
fact ordered. This flag, which would be maintained in and populated from
pg_type, would allow avoidance of any significant performance penalty in
such cases by relying on straight Oid comparison. We'd probably need to
keep a count of labels in pg_type too so we could size the cache
appropriately. This approach just about buys the best of both worlds.
The execution time for the test mentioned above is down from around 70s
to around 20s. I think for a worst case that's not too bad, especially
when it is completely avoided unless we have perturbed the sort order.

If anyone wants to play along, my test set is available at
<http://developer.postgresql.org/~adunstan/enumtest.dmp> It's about 8.5Mb.

cheers

andrew
Index: enum.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/enum.c,v
retrieving revision 1.11
diff -c -r1.11 enum.c
*** enum.c    26 Feb 2010 02:01:08 -0000    1.11
--- enum.c    22 Jun 2010 02:16:48 -0000
***************
*** 14,19 ****
--- 14,20 ----
  #include "postgres.h"

  #include "catalog/pg_enum.h"
+ #include "catalog/pg_type.h"
  #include "fmgr.h"
  #include "utils/array.h"
  #include "utils/builtins.h"
***************
*** 22,27 ****
--- 23,52 ----
  #include "libpq/pqformat.h"
  #include "miscadmin.h"

+ typedef struct
+ {
+     Oid      enum_oid;
+     uint32   sort_order;
+ } enum_sort;
+
+ typedef struct
+ {
+     bool      oids_are_sorted;
+     int       sort_list_length;
+     enum_sort sort_order_list[1024];
+ } enum_sort_cache;
+
+
+ static int
+ enum_sort_cmp(void * es1, void * es2)
+ {
+     enum_sort *p1, *p2;
+     p1 = (enum_sort *)es1;
+     p2 = (enum_sort *)es2;
+     return p1->enum_oid - p2->enum_oid;
+ }
+
+

  static ArrayType *enum_range_internal(Oid enumtypoid, Oid lower, Oid upper);
  static int    enum_elem_cmp(const void *left, const void *right);
***************
*** 155,167 ****

  /* Comparison functions and related */

  Datum
  enum_lt(PG_FUNCTION_ARGS)
  {
      Oid            a = PG_GETARG_OID(0);
      Oid            b = PG_GETARG_OID(1);

!     PG_RETURN_BOOL(a < b);
  }

  Datum
--- 180,283 ----

  /* Comparison functions and related */

+ static inline int
+ enum_ccmp(Oid arg1, Oid arg2, FunctionCallInfo fcinfo)
+ {
+
+     enum_sort_cache * mycache;
+     enum_sort *es1, *es2;
+     int sort1, sort2;
+     bool added = false;
+     HeapTuple    tup;
+     Form_pg_enum en;
+     Oid typeoid;
+     Form_pg_type typ;
+
+     if (arg1 == arg2)
+         return 0;
+
+     mycache = (enum_sort_cache *) fcinfo->flinfo->fn_extra;
+     if (mycache == NULL )
+     {
+         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+                                                       sizeof(enum_sort_cache));
+         mycache = (enum_sort_cache *) fcinfo->flinfo->fn_extra;
+         mycache->sort_list_length = 1;
+         tup = SearchSysCache1(ENUMOID, ObjectIdGetDatum(arg1));
+         en = (Form_pg_enum) GETSTRUCT(tup);
+         mycache->sort_order_list[0].enum_oid = arg1;
+         mycache->sort_order_list[0].sort_order = arg1;
+         typeoid = en->enumtypid;
+         ReleaseSysCache(tup);
+         tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
+         typ = (Form_pg_type)  GETSTRUCT(tup);
+         if (typ->typtype != 'e')
+             elog(ERROR,"wrong type for oid %u",typeoid);
+         /* XXX TODO fill in oids_are_sorted property from type tuple here */
+         mycache->oids_are_sorted = false;
+         ReleaseSysCache(tup);
+     }
+
+     if (mycache->oids_are_sorted)
+         return arg1 - arg2;
+
+     es1 = bsearch(&arg1,mycache->sort_order_list,mycache->sort_list_length,
+                   sizeof(enum_sort),enum_sort_cmp);
+     es2 = bsearch(&arg2,mycache->sort_order_list,mycache->sort_list_length,
+                   sizeof(enum_sort),enum_sort_cmp);
+
+     if (es1 == NULL)
+     {
+
+         tup = SearchSysCache1(ENUMOID, ObjectIdGetDatum(arg1));
+         en = (Form_pg_enum) GETSTRUCT(tup);
+         mycache->sort_order_list[mycache->sort_list_length].enum_oid = arg1;
+         sort1 = arg1; /* XXX should be en->enumsort */
+         mycache->sort_order_list[mycache->sort_list_length].sort_order =
+             sort1;
+         ReleaseSysCache(tup);
+         mycache->sort_list_length ++;
+         added = true;
+     }
+     else
+     {
+         /* already in cache */
+         sort1 = es1->sort_order;
+     }
+
+     if (es2 == NULL)
+     {
+
+         tup = SearchSysCache1(ENUMOID, ObjectIdGetDatum(arg2));
+         en = (Form_pg_enum) GETSTRUCT(tup);
+         sort2 = arg2; /* XXX should be en->enumsort */
+         mycache->sort_order_list[mycache->sort_list_length].enum_oid = arg2;
+         mycache->sort_order_list[mycache->sort_list_length].sort_order =
+             sort2;
+         ReleaseSysCache(tup);
+         mycache->sort_list_length ++;
+         added = true;
+     }
+     else
+     {
+         /* already in cache */
+         sort2 = es2->sort_order;
+     }
+
+     if (added)
+         qsort(mycache->sort_order_list,mycache->sort_list_length,
+               sizeof(enum_sort),enum_sort_cmp);
+
+     return sort1 - sort2;
+ }
+
  Datum
  enum_lt(PG_FUNCTION_ARGS)
  {
      Oid            a = PG_GETARG_OID(0);
      Oid            b = PG_GETARG_OID(1);

!     PG_RETURN_BOOL(enum_ccmp(a,b,fcinfo) < 0);
  }

  Datum
***************
*** 170,176 ****
      Oid            a = PG_GETARG_OID(0);
      Oid            b = PG_GETARG_OID(1);

!     PG_RETURN_BOOL(a <= b);
  }

  Datum
--- 286,292 ----
      Oid            a = PG_GETARG_OID(0);
      Oid            b = PG_GETARG_OID(1);

!     PG_RETURN_BOOL(enum_ccmp(a,b,fcinfo) <= 0);
  }

  Datum
***************
*** 197,203 ****
      Oid            a = PG_GETARG_OID(0);
      Oid            b = PG_GETARG_OID(1);

!     PG_RETURN_BOOL(a >= b);
  }

  Datum
--- 313,319 ----
      Oid            a = PG_GETARG_OID(0);
      Oid            b = PG_GETARG_OID(1);

!     PG_RETURN_BOOL(enum_ccmp(a,b,fcinfo) >= 0);
  }

  Datum
***************
*** 206,212 ****
      Oid            a = PG_GETARG_OID(0);
      Oid            b = PG_GETARG_OID(1);

!     PG_RETURN_BOOL(a > b);
  }

  Datum
--- 322,328 ----
      Oid            a = PG_GETARG_OID(0);
      Oid            b = PG_GETARG_OID(1);

!     PG_RETURN_BOOL(enum_ccmp(a,b,fcinfo) > 0);
  }

  Datum
***************
*** 233,242 ****
      Oid            a = PG_GETARG_OID(0);
      Oid            b = PG_GETARG_OID(1);

!     if (a > b)
!         PG_RETURN_INT32(1);
!     else if (a == b)
          PG_RETURN_INT32(0);
      else
          PG_RETURN_INT32(-1);
  }
--- 349,358 ----
      Oid            a = PG_GETARG_OID(0);
      Oid            b = PG_GETARG_OID(1);

!     if (a == b)
          PG_RETURN_INT32(0);
+     else if (enum_ccmp(a,b,fcinfo) > 0)
+         PG_RETURN_INT32(1);
      else
          PG_RETURN_INT32(-1);
  }

pgsql-hackers by date:

Previous
From: Robert Haas
Date:
Subject: missing "else" in postmaster.c?
Next
From: Tom Lane
Date:
Subject: Re: what exactly is a PlaceHolderVar?