Thread: Allow the "operand" input of width_bucket() to be NaN

Allow the "operand" input of width_bucket() to be NaN

From
Tom Lane
Date:
The attached patch does what was discussed in the pgsql-docs
thread at [1], namely change the four-argument variants of
width_bucket() to allow their first argument to be NaN,
treating that value as larger than any non-NaN.

While these functions are defined by the SQL standard, it doesn't
appear to have anything to say about NaN values.  So it's up to
us to decide what's the most consistent behavior.  The arguments
for changing this are:

1. It's consistent with the array variant of width_bucket(),
which treats NaN as a valid input larger than any non-NaN.

2. The first argument is probably coming from a table,
so it's more likely for it to be NaN than is the case for
the histogram endpoints.  It'd be better not to throw an
error in such cases.

Of course, #2 is a bit of a value judgment.  One could
alternatively argue that accepting NaN risks "garbage in,
garbage out" results, since the result will not be visibly
distinct from the results for ordinary values.

Thoughts?  (I'm envisioning this as a v19 change.)

            regards, tom lane

[1] https://www.postgresql.org/message-id/flat/2BD74F86-5B89-4AC1-8F13-23CED3546AC1%40gmail.com

diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c
index ba66a9c4ce6..7b97d2be6ca 100644
--- a/src/backend/utils/adt/float.c
+++ b/src/backend/utils/adt/float.c
@@ -4067,8 +4067,9 @@ float84ge(PG_FUNCTION_ARGS)
  * with the specified characteristics. An operand smaller than the
  * lower bound is assigned to bucket 0. An operand greater than or equal
  * to the upper bound is assigned to an additional bucket (with number
- * count+1). We don't allow "NaN" for any of the float8 inputs, and we
- * don't allow either of the histogram bounds to be +/- infinity.
+ * count+1). We don't allow the histogram bounds to be NaN or +/- infinity,
+ * but we do allow those values for the operand (taking NaN to be larger
+ * than any other value, as we do in comparisons).
  */
 Datum
 width_bucket_float8(PG_FUNCTION_ARGS)
@@ -4084,12 +4085,11 @@ width_bucket_float8(PG_FUNCTION_ARGS)
                 (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
                  errmsg("count must be greater than zero")));

-    if (isnan(operand) || isnan(bound1) || isnan(bound2))
+    if (isnan(bound1) || isnan(bound2))
         ereport(ERROR,
                 (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
-                 errmsg("operand, lower bound, and upper bound cannot be NaN")));
+                 errmsg("lower and upper bounds cannot be NaN")));

-    /* Note that we allow "operand" to be infinite */
     if (isinf(bound1) || isinf(bound2))
         ereport(ERROR,
                 (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
@@ -4097,15 +4097,15 @@ width_bucket_float8(PG_FUNCTION_ARGS)

     if (bound1 < bound2)
     {
-        if (operand < bound1)
-            result = 0;
-        else if (operand >= bound2)
+        if (isnan(operand) || operand >= bound2)
         {
             if (pg_add_s32_overflow(count, 1, &result))
                 ereport(ERROR,
                         (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
                          errmsg("integer out of range")));
         }
+        else if (operand < bound1)
+            result = 0;
         else
         {
             if (!isinf(bound2 - bound1))
@@ -4135,7 +4135,7 @@ width_bucket_float8(PG_FUNCTION_ARGS)
     }
     else if (bound1 > bound2)
     {
-        if (operand > bound1)
+        if (isnan(operand) || operand > bound1)
             result = 0;
         else if (operand <= bound2)
         {
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index 58ad1a65ef7..c9233565d57 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -1960,8 +1960,9 @@ generate_series_numeric_support(PG_FUNCTION_ARGS)
  * with the specified characteristics. An operand smaller than the
  * lower bound is assigned to bucket 0. An operand greater than or equal
  * to the upper bound is assigned to an additional bucket (with number
- * count+1). We don't allow "NaN" for any of the numeric inputs, and we
- * don't allow either of the histogram bounds to be +/- infinity.
+ * count+1). We don't allow the histogram bounds to be NaN or +/- infinity,
+ * but we do allow those values for the operand (taking NaN to be larger
+ * than any other value, as we do in comparisons).
  */
 Datum
 width_bucket_numeric(PG_FUNCTION_ARGS)
@@ -1979,17 +1980,13 @@ width_bucket_numeric(PG_FUNCTION_ARGS)
                 (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
                  errmsg("count must be greater than zero")));

-    if (NUMERIC_IS_SPECIAL(operand) ||
-        NUMERIC_IS_SPECIAL(bound1) ||
-        NUMERIC_IS_SPECIAL(bound2))
+    if (NUMERIC_IS_SPECIAL(bound1) || NUMERIC_IS_SPECIAL(bound2))
     {
-        if (NUMERIC_IS_NAN(operand) ||
-            NUMERIC_IS_NAN(bound1) ||
-            NUMERIC_IS_NAN(bound2))
+        if (NUMERIC_IS_NAN(bound1) || NUMERIC_IS_NAN(bound2))
             ereport(ERROR,
                     (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
-                     errmsg("operand, lower bound, and upper bound cannot be NaN")));
-        /* We allow "operand" to be infinite; cmp_numerics will cope */
+                     errmsg("lower and upper bounds cannot be NaN")));
+
         if (NUMERIC_IS_INF(bound1) || NUMERIC_IS_INF(bound2))
             ereport(ERROR,
                     (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
diff --git a/src/test/regress/expected/numeric.out b/src/test/regress/expected/numeric.out
index 072d76ce131..93e93be5668 100644
--- a/src/test/regress/expected/numeric.out
+++ b/src/test/regress/expected/numeric.out
@@ -1464,9 +1464,21 @@ ERROR:  count must be greater than zero
 SELECT width_bucket(3.5::float8, 3.0::float8, 3.0::float8, 888);
 ERROR:  lower bound cannot equal upper bound
 SELECT width_bucket('NaN', 3.0, 4.0, 888);
-ERROR:  operand, lower bound, and upper bound cannot be NaN
+ width_bucket
+--------------
+          889
+(1 row)
+
+SELECT width_bucket('NaN'::float8, 3.0::float8, 4.0::float8, 888);
+ width_bucket
+--------------
+          889
+(1 row)
+
+SELECT width_bucket(0, 'NaN', 4.0, 888);
+ERROR:  lower and upper bounds cannot be NaN
 SELECT width_bucket(0::float8, 'NaN', 4.0::float8, 888);
-ERROR:  operand, lower bound, and upper bound cannot be NaN
+ERROR:  lower and upper bounds cannot be NaN
 SELECT width_bucket(2.0, 3.0, '-inf', 888);
 ERROR:  lower and upper bounds must be finite
 SELECT width_bucket(0::float8, '-inf', 4.0::float8, 888);
diff --git a/src/test/regress/sql/numeric.sql b/src/test/regress/sql/numeric.sql
index b98ae27df56..640c6d92f4c 100644
--- a/src/test/regress/sql/numeric.sql
+++ b/src/test/regress/sql/numeric.sql
@@ -869,6 +869,8 @@ SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, 0);
 SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, -5);
 SELECT width_bucket(3.5::float8, 3.0::float8, 3.0::float8, 888);
 SELECT width_bucket('NaN', 3.0, 4.0, 888);
+SELECT width_bucket('NaN'::float8, 3.0::float8, 4.0::float8, 888);
+SELECT width_bucket(0, 'NaN', 4.0, 888);
 SELECT width_bucket(0::float8, 'NaN', 4.0::float8, 888);
 SELECT width_bucket(2.0, 3.0, '-inf', 888);
 SELECT width_bucket(0::float8, '-inf', 4.0::float8, 888);