Avoiding re-creation of uuid_t state with OSSP UUID - Mailing list pgsql-hackers

From Tom Lane
Subject Avoiding re-creation of uuid_t state with OSSP UUID
Date
Msg-id 26394.1401326589@sss.pgh.pa.us
Whole thread Raw
List pgsql-hackers
While mucking around with contrib/uuid-ossp, I realized that its usage
of the OSSP UUID library is really rather broken: it creates and destroys
a uuid_t object for each call of the UUID creation functions.  This is not
the way you're supposed to use that library.  The uuid_t object is meant
to hold persistent state such as the system MAC address, so really we
ought to make such an object once per session and re-use it, as per the
attached proposed patch.  Doing it like this has multiple benefits:

* saving the cycles needed to create and destroy a uuid_t object, notably
including kernel calls to find out the system's MAC address.  On my
machine, this patch reduces the runtime of uuid_generate_v1() from about
16 microseconds to about 3.

* reducing the amount of entropy we draw from /dev/urandom.  strace'ing
shows that at least with RHEL6's version of OSSP UUID, each uuid_create
call reads 2 bytes from /dev/urandom, thus depleting the available
entropy system-wide.

* enabling the code to actually guarantee that successive V1-style UUIDs
are distinct.  OSSP remembers the last timestamp it read from the kernel,
and if the new one is the same, it increments the saved "clock sequence"
value to guarantee a distinct result.  But that logic all depends on
re-using a cached uuid_t!  Without that, if successive gettimeofday calls
produce the same result, we're at risk of producing duplicate UUIDs if
the clock sequence values are the same.  uuid_create does initialize the
clock sequence value to a random number, but since only 14 bits are
available for it in the V1 UUID format, the chance of collision is 1 in
16K.  Now admittedly you'd need a machine noticeably faster than mine to
get duplicate timestamps with the existing code, but we're not that far
away from having it happen.

So I think the existing code is not only rather broken, but its effects
on the system entropy pool are not far short of being a security bug.
Accordingly, I'd like to not only apply this to HEAD but back-patch it.
Comments?

            regards, tom lane

diff --git a/contrib/uuid-ossp/uuid-ossp.c b/contrib/uuid-ossp/uuid-ossp.c
index 88da168..9e9905b 100644
*** a/contrib/uuid-ossp/uuid-ossp.c
--- b/contrib/uuid-ossp/uuid-ossp.c
*************** pguuid_complain(uuid_rc_t rc)
*** 141,146 ****
--- 141,182 ----
                   errmsg("OSSP uuid library failure: error code %d", rc)));
  }

+ /*
+  * We create a uuid_t object just once per session and re-use it for all
+  * operations in this module.  OSSP UUID caches the system MAC address and
+  * other state in this object.  Reusing the object has a number of benefits:
+  * saving the cycles needed to fetch the system MAC address over and over,
+  * reducing the amount of entropy we draw from /dev/urandom, and providing a
+  * positive guarantee that successive generated V1-style UUIDs don't collide.
+  * (On a machine fast enough to generate multiple UUIDs per microsecond,
+  * or whatever the system's wall-clock resolution is, we'd otherwise risk
+  * collisions whenever random initialization of the uuid_t's clock sequence
+  * value chanced to produce duplicates.)
+  *
+  * However: when we're doing V3 or V5 UUID creation, uuid_make needs two
+  * uuid_t objects, one holding the namespace UUID and one for the result.
+  * It's unspecified whether it's safe to use the same uuid_t for both cases,
+  * so let's cache a second uuid_t for use as the namespace holder object.
+  */
+ static uuid_t *
+ get_cached_uuid_t(int which)
+ {
+     static uuid_t *cached_uuid[2] = {NULL, NULL};
+
+     if (cached_uuid[which] == NULL)
+     {
+         uuid_rc_t    rc;
+
+         rc = uuid_create(&cached_uuid[which]);
+         if (rc != UUID_RC_OK)
+         {
+             cached_uuid[which] = NULL;
+             pguuid_complain(rc);
+         }
+     }
+     return cached_uuid[which];
+ }
+
  static char *
  uuid_to_string(const uuid_t *uuid)
  {
*************** string_to_uuid(const char *str, uuid_t *
*** 171,190 ****
  static Datum
  special_uuid_value(const char *name)
  {
!     uuid_t       *uuid;
      char       *str;
      uuid_rc_t    rc;

-     rc = uuid_create(&uuid);
-     if (rc != UUID_RC_OK)
-         pguuid_complain(rc);
      rc = uuid_load(uuid, name);
      if (rc != UUID_RC_OK)
          pguuid_complain(rc);
      str = uuid_to_string(uuid);
-     rc = uuid_destroy(uuid);
-     if (rc != UUID_RC_OK)
-         pguuid_complain(rc);

      return DirectFunctionCall1(uuid_in, CStringGetDatum(str));
  }
--- 207,220 ----
  static Datum
  special_uuid_value(const char *name)
  {
!     uuid_t       *uuid = get_cached_uuid_t(0);
      char       *str;
      uuid_rc_t    rc;

      rc = uuid_load(uuid, name);
      if (rc != UUID_RC_OK)
          pguuid_complain(rc);
      str = uuid_to_string(uuid);

      return DirectFunctionCall1(uuid_in, CStringGetDatum(str));
  }
*************** special_uuid_value(const char *name)
*** 193,212 ****
  static Datum
  uuid_generate_internal(int mode, const uuid_t *ns, const char *name, int len)
  {
!     uuid_t       *uuid;
      char       *str;
      uuid_rc_t    rc;

-     rc = uuid_create(&uuid);
-     if (rc != UUID_RC_OK)
-         pguuid_complain(rc);
      rc = uuid_make(uuid, mode, ns, name);
      if (rc != UUID_RC_OK)
          pguuid_complain(rc);
      str = uuid_to_string(uuid);
-     rc = uuid_destroy(uuid);
-     if (rc != UUID_RC_OK)
-         pguuid_complain(rc);

      return DirectFunctionCall1(uuid_in, CStringGetDatum(str));
  }
--- 223,236 ----
  static Datum
  uuid_generate_internal(int mode, const uuid_t *ns, const char *name, int len)
  {
!     uuid_t       *uuid = get_cached_uuid_t(0);
      char       *str;
      uuid_rc_t    rc;

      rc = uuid_make(uuid, mode, ns, name);
      if (rc != UUID_RC_OK)
          pguuid_complain(rc);
      str = uuid_to_string(uuid);

      return DirectFunctionCall1(uuid_in, CStringGetDatum(str));
  }
*************** uuid_generate_internal(int mode, const u
*** 215,240 ****
  static Datum
  uuid_generate_v35_internal(int mode, pg_uuid_t *ns, text *name)
  {
!     uuid_t       *ns_uuid;
!     Datum        result;
!     uuid_rc_t    rc;

!     rc = uuid_create(&ns_uuid);
!     if (rc != UUID_RC_OK)
!         pguuid_complain(rc);
!     string_to_uuid(DatumGetCString(DirectFunctionCall1(uuid_out, UUIDPGetDatum(ns))),
                     ns_uuid);

!     result = uuid_generate_internal(mode,
!                                     ns_uuid,
!                                     text_to_cstring(name),
!                                     0);
!
!     rc = uuid_destroy(ns_uuid);
!     if (rc != UUID_RC_OK)
!         pguuid_complain(rc);
!
!     return result;
  }

  #else                            /* !HAVE_UUID_OSSP */
--- 239,254 ----
  static Datum
  uuid_generate_v35_internal(int mode, pg_uuid_t *ns, text *name)
  {
!     uuid_t       *ns_uuid = get_cached_uuid_t(1);

!     string_to_uuid(DatumGetCString(DirectFunctionCall1(uuid_out,
!                                                        UUIDPGetDatum(ns))),
                     ns_uuid);

!     return uuid_generate_internal(mode,
!                                   ns_uuid,
!                                   text_to_cstring(name),
!                                   0);
  }

  #else                            /* !HAVE_UUID_OSSP */

pgsql-hackers by date:

Previous
From: Peter Geoghegan
Date:
Subject: Re: Extended Prefetching using Asynchronous IO - proposal and patch
Next
From: Vik Fearing
Date:
Subject: Re: SQL access to database attributes