Re: Performance improvements for src/port/snprintf.c - Mailing list pgsql-hackers

From Tom Lane
Subject Re: Performance improvements for src/port/snprintf.c
Date
Msg-id 13178.1538794717@sss.pgh.pa.us
Whole thread Raw
In response to Re: Performance improvements for src/port/snprintf.c  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-hackers
I stepped back a bit from the raw performance question and thought about
what we actually want functionally in snprintf's float handling.  There
are a couple of points worth making:

* The fact that float[48]out explicitly handle NaN and Inf cases is a
leftover from when they had to cope with varying behavior of
platform-specific snprintf implementations.  Now that we've standardized
on snprintf.c, it makes a lot more sense to enforce standardized printing
of these values inside snprintf.c.  That not only avoids repeated tests
for these cases at different code levels, but ensures that the uniform
behavior exists for all our invocations of *printf, not just float[48]out.

* snprintf.c doesn't really work right for IEEE minus zero, as I recently
noted in another thread (<23662.1538067926@sss.pgh.pa.us>).  While this
is not of significance for float[48]out, it might be a problem for other
callers.  Now that we've enforced usage of snprintf.c across-the-board,
I think it's more important to worry about these corner cases.  It's not
that expensive to fix either; we can test for minus zero with something
like this:
        static const double dzero = 0.0;
        if (value == 0.0 &&
            memcmp(&value, &dzero, sizeof(double)) != 0)
(ie, "it's equal to zero but not bitwise equal to zero").  While that
looks like it might be expensive, I find that recent versions of both
gcc and clang can optimize the memcmp call down to something like
        cmpq    $0, 8(%rsp)
so I think it's well worth the cost to get this right.

The attached proposed patch addresses both of those points.

Also, in service of the performance angle, I went ahead and made a
roughly strfromd-like entry point in snprintf.c, but using an API
that doesn't force textual conversion of the precision spec.

As best I can tell, this patch puts the performance of float8out
on par with what it was in v11, measuring using a tight loop like

    while (count-- > 0)
    {
        char *str = float8out_internal(val);
        pfree(str);
        CHECK_FOR_INTERRUPTS();
    }

For me, this is within a percent or two either way on a couple of
different machines; that's within the noise level.

            regards, tom lane

diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c
index df35557..260377c 100644
*** a/src/backend/utils/adt/float.c
--- b/src/backend/utils/adt/float.c
*************** Datum
*** 243,272 ****
  float4out(PG_FUNCTION_ARGS)
  {
      float4        num = PG_GETARG_FLOAT4(0);
!     char       *ascii;
!
!     if (isnan(num))
!         PG_RETURN_CSTRING(pstrdup("NaN"));
!
!     switch (is_infinite(num))
!     {
!         case 1:
!             ascii = pstrdup("Infinity");
!             break;
!         case -1:
!             ascii = pstrdup("-Infinity");
!             break;
!         default:
!             {
!                 int            ndig = FLT_DIG + extra_float_digits;
!
!                 if (ndig < 1)
!                     ndig = 1;
!
!                 ascii = psprintf("%.*g", ndig, num);
!             }
!     }

      PG_RETURN_CSTRING(ascii);
  }

--- 243,252 ----
  float4out(PG_FUNCTION_ARGS)
  {
      float4        num = PG_GETARG_FLOAT4(0);
!     char       *ascii = (char *) palloc(32);
!     int            ndig = FLT_DIG + extra_float_digits;

+     (void) pg_strfromd(ascii, 32, ndig, num);
      PG_RETURN_CSTRING(ascii);
  }

*************** float8out(PG_FUNCTION_ARGS)
*** 479,508 ****
  char *
  float8out_internal(double num)
  {
!     char       *ascii;
!
!     if (isnan(num))
!         return pstrdup("NaN");
!
!     switch (is_infinite(num))
!     {
!         case 1:
!             ascii = pstrdup("Infinity");
!             break;
!         case -1:
!             ascii = pstrdup("-Infinity");
!             break;
!         default:
!             {
!                 int            ndig = DBL_DIG + extra_float_digits;
!
!                 if (ndig < 1)
!                     ndig = 1;
!
!                 ascii = psprintf("%.*g", ndig, num);
!             }
!     }

      return ascii;
  }

--- 459,468 ----
  char *
  float8out_internal(double num)
  {
!     char       *ascii = (char *) palloc(32);
!     int            ndig = DBL_DIG + extra_float_digits;

+     (void) pg_strfromd(ascii, 32, ndig, num);
      return ascii;
  }

diff --git a/src/include/port.h b/src/include/port.h
index e654d5c..0729c3f 100644
*** a/src/include/port.h
--- b/src/include/port.h
*************** extern int    pg_printf(const char *fmt,...
*** 187,192 ****
--- 187,195 ----
  #define fprintf            pg_fprintf
  #define printf(...)        pg_printf(__VA_ARGS__)

+ /* This is also provided by snprintf.c */
+ extern int    pg_strfromd(char *str, size_t count, int precision, double value);
+
  /* Replace strerror() with our own, somewhat more robust wrapper */
  extern char *pg_strerror(int errnum);
  #define strerror pg_strerror
diff --git a/src/port/snprintf.c b/src/port/snprintf.c
index ef496fa..897c683 100644
*** a/src/port/snprintf.c
--- b/src/port/snprintf.c
*************** fmtfloat(double value, char type, int fo
*** 1111,1120 ****
      int            zeropadlen = 0; /* amount to pad with zeroes */
      int            padlen;            /* amount to pad with spaces */

-     /* Handle sign (NaNs have no sign) */
-     if (!isnan(value) && adjust_sign((value < 0), forcesign, &signvalue))
-         value = -value;
-
      /*
       * We rely on the regular C library's sprintf to do the basic conversion,
       * then handle padding considerations here.
--- 1111,1116 ----
*************** fmtfloat(double value, char type, int fo
*** 1128,1161 ****
       * bytes and limit requested precision to 350 digits; this should prevent
       * buffer overrun even with non-IEEE math.  If the original precision
       * request was more than 350, separately pad with zeroes.
       */
      if (precision < 0)            /* cover possible overflow of "accum" */
          precision = 0;
      prec = Min(precision, 350);

!     if (pointflag)
      {
!         zeropadlen = precision - prec;
!         fmt[0] = '%';
!         fmt[1] = '.';
!         fmt[2] = '*';
!         fmt[3] = type;
!         fmt[4] = '\0';
!         vallen = sprintf(convert, fmt, prec, value);
      }
      else
      {
!         fmt[0] = '%';
!         fmt[1] = type;
!         fmt[2] = '\0';
!         vallen = sprintf(convert, fmt, value);
!     }
!     if (vallen < 0)
!         goto fail;

!     /* If it's infinity or NaN, forget about doing any zero-padding */
!     if (zeropadlen > 0 && !isdigit((unsigned char) convert[vallen - 1]))
!         zeropadlen = 0;

      padlen = compute_padlen(minlen, vallen + zeropadlen, leftjust);

--- 1124,1185 ----
       * bytes and limit requested precision to 350 digits; this should prevent
       * buffer overrun even with non-IEEE math.  If the original precision
       * request was more than 350, separately pad with zeroes.
+      *
+      * We handle infinities and NaNs specially to ensure platform-independent
+      * output.
       */
      if (precision < 0)            /* cover possible overflow of "accum" */
          precision = 0;
      prec = Min(precision, 350);

!     if (isnan(value))
      {
!         strcpy(convert, "NaN");
!         vallen = 3;
!         /* no zero padding, regardless of precision spec */
      }
      else
      {
!         /*
!          * Handle sign (NaNs have no sign, so we don't do this in the case
!          * above).  "value < 0.0" will not be true for IEEE minus zero, so we
!          * detect that by looking for the case where value equals 0.0
!          * according to == but not according to memcmp.
!          */
!         static const double dzero = 0.0;

!         if (adjust_sign((value < 0.0 ||
!                          (value == 0.0 &&
!                           memcmp(&value, &dzero, sizeof(double)) != 0)),
!                         forcesign, &signvalue))
!             value = -value;
!
!         if (isinf(value))
!         {
!             strcpy(convert, "Infinity");
!             vallen = 8;
!             /* no zero padding, regardless of precision spec */
!         }
!         else if (pointflag)
!         {
!             zeropadlen = precision - prec;
!             fmt[0] = '%';
!             fmt[1] = '.';
!             fmt[2] = '*';
!             fmt[3] = type;
!             fmt[4] = '\0';
!             vallen = sprintf(convert, fmt, prec, value);
!         }
!         else
!         {
!             fmt[0] = '%';
!             fmt[1] = type;
!             fmt[2] = '\0';
!             vallen = sprintf(convert, fmt, value);
!         }
!         if (vallen < 0)
!             goto fail;
!     }

      padlen = compute_padlen(minlen, vallen + zeropadlen, leftjust);

*************** fail:
*** 1197,1202 ****
--- 1221,1316 ----
      target->failed = true;
  }

+ /*
+  * Nonstandard entry point to print a double value efficiently.
+  *
+  * This is approximately equivalent to strfromd(), but has an API more
+  * adapted to what float8out() wants.  The behavior is like snprintf()
+  * with a format of "%.ng", where n is the specified precision.
+  * However, the target buffer must be nonempty (i.e. count > 0), and
+  * the precision is silently bounded to a sane range.
+  */
+ int
+ pg_strfromd(char *str, size_t count, int precision, double value)
+ {
+     PrintfTarget target;
+     int            signvalue = 0;
+     int            vallen;
+     char        fmt[8];
+     char        convert[64];
+
+     /* Set up the target like pg_snprintf, but require nonempty buffer */
+     Assert(count > 0);
+     target.bufstart = target.bufptr = str;
+     target.bufend = str + count - 1;
+     target.stream = NULL;
+     target.nchars = 0;
+     target.failed = false;
+
+     /*
+      * We bound precision to a reasonable range; the combination of this and
+      * the knowledge that we're using "g" format without padding allows the
+      * convert[] buffer to be reasonably small.
+      */
+     if (precision < 1)
+         precision = 1;
+     else if (precision > 32)
+         precision = 32;
+
+     /*
+      * The rest is just an inlined version of the fmtfloat() logic above,
+      * simplified using the knowledge that no padding is wanted.
+      */
+     if (isnan(value))
+     {
+         strcpy(convert, "NaN");
+         vallen = 3;
+     }
+     else
+     {
+         static const double dzero = 0.0;
+
+         if (value < 0.0 ||
+             (value == 0.0 &&
+              memcmp(&value, &dzero, sizeof(double)) != 0))
+         {
+             signvalue = '-';
+             value = -value;
+         }
+
+         if (isinf(value))
+         {
+             strcpy(convert, "Infinity");
+             vallen = 8;
+         }
+         else
+         {
+             fmt[0] = '%';
+             fmt[1] = '.';
+             fmt[2] = '*';
+             fmt[3] = 'g';
+             fmt[4] = '\0';
+             vallen = sprintf(convert, fmt, precision, value);
+             if (vallen < 0)
+             {
+                 target.failed = true;
+                 goto fail;
+             }
+         }
+     }
+
+     if (signvalue)
+         dopr_outch(signvalue, &target);
+
+     dostr(convert, vallen, &target);
+
+ fail:
+     *(target.bufptr) = '\0';
+     return target.failed ? -1 : (target.bufptr - target.bufstart
+                                  + target.nchars);
+ }
+
+
  static void
  dostr(const char *str, int slen, PrintfTarget *target)
  {

pgsql-hackers by date:

Previous
From: Andrew Gierth
Date:
Subject: Re: Early WIP/PoC for inlining CTEs
Next
From: Andrew Gierth
Date:
Subject: Re: Performance improvements for src/port/snprintf.c