Re: Avg performance for int8/numeric - Mailing list pgsql-patches

From Mark Kirkwood
Subject Re: Avg performance for int8/numeric
Date
Msg-id 45D648BF.4020607@paradise.net.nz
Whole thread Raw
In response to Re: Avg performance for int8/numeric  (Bruce Momjian <bruce@momjian.us>)
Responses Re: Avg performance for int8/numeric  (Bruce Momjian <bruce@momjian.us>)
Re: Avg performance for int8/numeric  (Bruce Momjian <bruce@momjian.us>)
List pgsql-patches
Bruce Momjian wrote:
> I have tested this patch but it generates regression failures.
>
> There was some code drift, so I am attaching an updated version of the
> patch, and the regression diffs.  The 'four' column is an 'int4' so my
> guess is that somehow the wrong aggregate is being called.
>

Good catch - I must have neglected to run the regression test after
amending the number of array arguments for the numeric avg :-(.

Hmmm - this changing the number of array args for avg means we can't mix
transition functions for variance with final functions for avg - which
is exactly what the regression suite does with the 'newavg' aggregate.

I've 'fixed' this by amending the definition of 'newavg' to use the
transition and final function that 'avg' does. However I found myself
asking if this lost us the point of that test - so I looked back at the
older postgres versions (e.g. 7.1.3) and saw that back *then* 'newavg'
and 'avg' were defined using the same functions...so I think making the
change as indicated is ok.

I've attached a new patch with this change.

Cheers

Mark
diff -Nacr pgsql.orig/src/backend/utils/adt/numeric.c pgsql/src/backend/utils/adt/numeric.c
*** pgsql.orig/src/backend/utils/adt/numeric.c    Sun Jan 21 11:36:20 2007
--- pgsql/src/backend/utils/adt/numeric.c    Fri Feb 16 18:09:24 2007
***************
*** 2165,2170 ****
--- 2165,2204 ----
      return result;
  }

+ /*
+  * Improve avg performance by not caclulating sum(X*X).
+  */
+ static ArrayType *
+ do_numeric_avg_accum(ArrayType *transarray, Numeric newval)
+ {
+     Datum       *transdatums;
+     int            ndatums;
+     Datum        N,
+                 sumX;
+     ArrayType  *result;
+
+     /* We assume the input is array of numeric */
+     deconstruct_array(transarray,
+                       NUMERICOID, -1, false, 'i',
+                       &transdatums, NULL, &ndatums);
+     if (ndatums != 2)
+         elog(ERROR, "expected 2-element numeric array");
+     N = transdatums[0];
+     sumX = transdatums[1];
+
+     N = DirectFunctionCall1(numeric_inc, N);
+     sumX = DirectFunctionCall2(numeric_add, sumX,
+                                NumericGetDatum(newval));
+
+     transdatums[0] = N;
+     transdatums[1] = sumX;
+
+     result = construct_array(transdatums, 2,
+                              NUMERICOID, -1, false, 'i');
+
+     return result;
+ }
+
  Datum
  numeric_accum(PG_FUNCTION_ARGS)
  {
***************
*** 2175,2180 ****
--- 2209,2226 ----
  }

  /*
+  * Optimized case for average of numeric.
+  */
+ Datum
+ numeric_avg_accum(PG_FUNCTION_ARGS)
+ {
+     ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+     Numeric        newval = PG_GETARG_NUMERIC(1);
+
+     PG_RETURN_ARRAYTYPE_P(do_numeric_avg_accum(transarray, newval));
+ }
+
+ /*
   * Integer data types all use Numeric accumulators to share code and
   * avoid risk of overflow.    For int2 and int4 inputs, Numeric accumulation
   * is overkill for the N and sum(X) values, but definitely not overkill
***************
*** 2219,2224 ****
--- 2265,2286 ----
      PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval));
  }

+ /*
+  * Optimized case for average of int8.
+  */
+ Datum
+ int8_avg_accum(PG_FUNCTION_ARGS)
+ {
+     ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+     Datum        newval8 = PG_GETARG_DATUM(1);
+     Numeric        newval;
+
+     newval = DatumGetNumeric(DirectFunctionCall1(int8_numeric, newval8));
+
+     PG_RETURN_ARRAYTYPE_P(do_numeric_avg_accum(transarray, newval));
+ }
+
+
  Datum
  numeric_avg(PG_FUNCTION_ARGS)
  {
***************
*** 2232,2242 ****
      deconstruct_array(transarray,
                        NUMERICOID, -1, false, 'i',
                        &transdatums, NULL, &ndatums);
!     if (ndatums != 3)
!         elog(ERROR, "expected 3-element numeric array");
      N = DatumGetNumeric(transdatums[0]);
      sumX = DatumGetNumeric(transdatums[1]);
-     /* ignore sumX2 */

      /* SQL92 defines AVG of no values to be NULL */
      /* N is zero iff no digits (cf. numeric_uminus) */
--- 2294,2303 ----
      deconstruct_array(transarray,
                        NUMERICOID, -1, false, 'i',
                        &transdatums, NULL, &ndatums);
!     if (ndatums != 2)
!         elog(ERROR, "expected 2-element numeric array");
      N = DatumGetNumeric(transdatums[0]);
      sumX = DatumGetNumeric(transdatums[1]);

      /* SQL92 defines AVG of no values to be NULL */
      /* N is zero iff no digits (cf. numeric_uminus) */
diff -Nacr pgsql.orig/src/include/catalog/catversion.h pgsql/src/include/catalog/catversion.h
*** pgsql.orig/src/include/catalog/catversion.h    Fri Feb 16 18:06:34 2007
--- pgsql/src/include/catalog/catversion.h    Fri Feb 16 18:09:24 2007
***************
*** 53,58 ****
   */

  /*                            yyyymmddN */
! #define CATALOG_VERSION_NO    200702131

  #endif
--- 53,58 ----
   */

  /*                            yyyymmddN */
! #define CATALOG_VERSION_NO    200702151

  #endif
diff -Nacr pgsql.orig/src/include/catalog/pg_aggregate.h pgsql/src/include/catalog/pg_aggregate.h
*** pgsql.orig/src/include/catalog/pg_aggregate.h    Sun Jan 21 11:36:22 2007
--- pgsql/src/include/catalog/pg_aggregate.h    Fri Feb 16 18:09:24 2007
***************
*** 80,89 ****
   */

  /* avg */
! DATA(insert ( 2100    int8_accum        numeric_avg        0    1231    "{0,0,0}" ));
  DATA(insert ( 2101    int4_avg_accum    int8_avg        0    1016    "{0,0}" ));
  DATA(insert ( 2102    int2_avg_accum    int8_avg        0    1016    "{0,0}" ));
! DATA(insert ( 2103    numeric_accum    numeric_avg        0    1231    "{0,0,0}" ));
  DATA(insert ( 2104    float4_accum    float8_avg        0    1022    "{0,0,0}" ));
  DATA(insert ( 2105    float8_accum    float8_avg        0    1022    "{0,0,0}" ));
  DATA(insert ( 2106    interval_accum    interval_avg    0    1187    "{0 second,0 second}" ));
--- 80,89 ----
   */

  /* avg */
! DATA(insert ( 2100    int8_avg_accum    numeric_avg        0    1231    "{0,0}" ));
  DATA(insert ( 2101    int4_avg_accum    int8_avg        0    1016    "{0,0}" ));
  DATA(insert ( 2102    int2_avg_accum    int8_avg        0    1016    "{0,0}" ));
! DATA(insert ( 2103    numeric_avg_accum    numeric_avg        0    1231    "{0,0}" ));
  DATA(insert ( 2104    float4_accum    float8_avg        0    1022    "{0,0,0}" ));
  DATA(insert ( 2105    float8_accum    float8_avg        0    1022    "{0,0,0}" ));
  DATA(insert ( 2106    interval_accum    interval_avg    0    1187    "{0 second,0 second}" ));
diff -Nacr pgsql.orig/src/include/catalog/pg_proc.h pgsql/src/include/catalog/pg_proc.h
*** pgsql.orig/src/include/catalog/pg_proc.h    Fri Feb 16 18:06:37 2007
--- pgsql/src/include/catalog/pg_proc.h    Fri Feb 16 18:09:24 2007
***************
*** 2744,2754 ****
--- 2744,2758 ----
  DESCR("STDDEV_SAMP aggregate final function");
  DATA(insert OID = 1833 (  numeric_accum    PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 1700" _null_ _null_ _null_
numeric_accum- _null_ )); 
  DESCR("aggregate transition function");
+ DATA(insert OID = 2858 (  numeric_avg_accum    PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 1700" _null_ _null_ _null_
numeric_avg_accum- _null_ )); 
+ DESCR("aggregate transition function");
  DATA(insert OID = 1834 (  int2_accum       PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 21" _null_ _null_ _null_
int2_accum- _null_ )); 
  DESCR("aggregate transition function");
  DATA(insert OID = 1835 (  int4_accum       PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 23" _null_ _null_ _null_
int4_accum- _null_ )); 
  DESCR("aggregate transition function");
  DATA(insert OID = 1836 (  int8_accum       PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 20" _null_ _null_ _null_
int8_accum- _null_ )); 
+ DESCR("aggregate transition function");
+ DATA(insert OID = 2746 (  int8_avg_accum       PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 20" _null_ _null_ _null_
int8_avg_accum- _null_ )); 
  DESCR("aggregate transition function");
  DATA(insert OID = 1837 (  numeric_avg       PGNSP PGUID 12 1 0 f f t f i 1 1700 "1231" _null_ _null_ _null_
numeric_avg- _null_ )); 
  DESCR("AVG aggregate final function");
diff -Nacr pgsql.orig/src/include/utils/builtins.h pgsql/src/include/utils/builtins.h
*** pgsql.orig/src/include/utils/builtins.h    Fri Feb 16 18:06:37 2007
--- pgsql/src/include/utils/builtins.h    Fri Feb 16 18:09:24 2007
***************
*** 841,849 ****
--- 841,851 ----
  extern Datum text_numeric(PG_FUNCTION_ARGS);
  extern Datum numeric_text(PG_FUNCTION_ARGS);
  extern Datum numeric_accum(PG_FUNCTION_ARGS);
+ extern Datum numeric_avg_accum(PG_FUNCTION_ARGS);
  extern Datum int2_accum(PG_FUNCTION_ARGS);
  extern Datum int4_accum(PG_FUNCTION_ARGS);
  extern Datum int8_accum(PG_FUNCTION_ARGS);
+ extern Datum int8_avg_accum(PG_FUNCTION_ARGS);
  extern Datum numeric_avg(PG_FUNCTION_ARGS);
  extern Datum numeric_var_pop(PG_FUNCTION_ARGS);
  extern Datum numeric_var_samp(PG_FUNCTION_ARGS);
diff -Nacr pgsql.orig/src/test/regress/expected/create_aggregate.out
pgsql/src/test/regress/expected/create_aggregate.out
*** pgsql.orig/src/test/regress/expected/create_aggregate.out    Sat Jul 29 13:45:35 2006
--- pgsql/src/test/regress/expected/create_aggregate.out    Sat Feb 17 12:05:39 2007
***************
*** 3,11 ****
  --
  -- all functions CREATEd
  CREATE AGGREGATE newavg (
!    sfunc = int4_accum, basetype = int4, stype = _numeric,
!    finalfunc = numeric_avg,
!    initcond1 = '{0,0,0}'
  );
  -- test comments
  COMMENT ON AGGREGATE newavg_wrong (int4) IS 'an agg comment';
--- 3,11 ----
  --
  -- all functions CREATEd
  CREATE AGGREGATE newavg (
!    sfunc = int4_avg_accum, basetype = int4, stype = _int8,
!    finalfunc = int8_avg,
!    initcond1 = '{0,0}'
  );
  -- test comments
  COMMENT ON AGGREGATE newavg_wrong (int4) IS 'an agg comment';
diff -Nacr pgsql.orig/src/test/regress/sql/create_aggregate.sql pgsql/src/test/regress/sql/create_aggregate.sql
*** pgsql.orig/src/test/regress/sql/create_aggregate.sql    Sat Jul 29 13:45:35 2006
--- pgsql/src/test/regress/sql/create_aggregate.sql    Sat Feb 17 12:00:37 2007
***************
*** 4,12 ****

  -- all functions CREATEd
  CREATE AGGREGATE newavg (
!    sfunc = int4_accum, basetype = int4, stype = _numeric,
!    finalfunc = numeric_avg,
!    initcond1 = '{0,0,0}'
  );

  -- test comments
--- 4,12 ----

  -- all functions CREATEd
  CREATE AGGREGATE newavg (
!    sfunc = int4_avg_accum, basetype = int4, stype = _int8,
!    finalfunc = int8_avg,
!    initcond1 = '{0,0}'
  );

  -- test comments

pgsql-patches by date:

Previous
From: "Brendan Jurd"
Date:
Subject: Re: [GENERAL] ISO week dates
Next
From: Bruce Momjian
Date:
Subject: Re: Avg performance for int8/numeric