Re: Avg performance for int8/numeric - Mailing list pgsql-patches

From Bruce Momjian
Subject Re: Avg performance for int8/numeric
Date
Msg-id 200702160455.l1G4trs10100@momjian.us
Whole thread Raw
In response to Re: Avg performance for int8/numeric  (Mark Kirkwood <markir@paradise.net.nz>)
Responses Re: Avg performance for int8/numeric
List pgsql-patches
I have tested this patch but it generates regression failures.

There was some code drift, so I am attaching an updated version of the
patch, and the regression diffs.  The 'four' column is an 'int4' so my
guess is that somehow the wrong aggregate is being called.

---------------------------------------------------------------------------

Mark Kirkwood wrote:
> Neil Conway wrote:
> > On Fri, 2006-11-24 at 11:08 +1300, Mark Kirkwood wrote:
> >> - Modifies do_numeric_accum to have an extra bool parameter and does not
> >> calc sumX2 when it is false.
> >
> > I think it would be clearer to reorganize this function slightly, and
> > have only a single branch on "useSumX2". On first glance it isn't
> > obviously that transdatums[2] is defined (but unchanged) when useSumX2
> > is false.
> >
>
> Right - new patch attached that adds a new function do_numeric_avg_accum
> that only uses N and sum(X). This means I could amend the avg aggregates
> for numeric, int8 to have a initvalues of {0,0}.

--
  Bruce Momjian  <bruce@momjian.us>          http://momjian.us
  EnterpriseDB                               http://www.enterprisedb.com

  + If your life is a hard drive, Christ can be your backup. +
*** ./expected/aggregates.out    Fri Jul 28 14:33:04 2006
--- ./results/aggregates.out    Thu Feb 15 23:54:45 2007
***************
*** 238,248 ****

  -- user-defined aggregates
  SELECT newavg(four) AS avg_1 FROM onek;
!        avg_1
! --------------------
!  1.5000000000000000
! (1 row)
!
  SELECT newsum(four) AS sum_1500 FROM onek;
   sum_1500
  ----------
--- 238,244 ----

  -- user-defined aggregates
  SELECT newavg(four) AS avg_1 FROM onek;
! ERROR:  expected 2-element numeric array
  SELECT newsum(four) AS sum_1500 FROM onek;
   sum_1500
  ----------

======================================================================

Index: src/backend/utils/adt/numeric.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/numeric.c,v
retrieving revision 1.99
diff -c -c -r1.99 numeric.c
*** src/backend/utils/adt/numeric.c    16 Jan 2007 21:41:13 -0000    1.99
--- src/backend/utils/adt/numeric.c    16 Feb 2007 04:30:27 -0000
***************
*** 2165,2170 ****
--- 2165,2204 ----
      return result;
  }

+ /*
+  * Improve avg performance by not caclulating sum(X*X).
+  */
+ static ArrayType *
+ do_numeric_avg_accum(ArrayType *transarray, Numeric newval)
+ {
+     Datum       *transdatums;
+     int            ndatums;
+     Datum        N,
+                 sumX;
+     ArrayType  *result;
+
+     /* We assume the input is array of numeric */
+     deconstruct_array(transarray,
+                       NUMERICOID, -1, false, 'i',
+                       &transdatums, NULL, &ndatums);
+     if (ndatums != 2)
+         elog(ERROR, "expected 2-element numeric array");
+     N = transdatums[0];
+     sumX = transdatums[1];
+
+     N = DirectFunctionCall1(numeric_inc, N);
+     sumX = DirectFunctionCall2(numeric_add, sumX,
+                                NumericGetDatum(newval));
+
+     transdatums[0] = N;
+     transdatums[1] = sumX;
+
+     result = construct_array(transdatums, 2,
+                              NUMERICOID, -1, false, 'i');
+
+     return result;
+ }
+
  Datum
  numeric_accum(PG_FUNCTION_ARGS)
  {
***************
*** 2175,2180 ****
--- 2209,2226 ----
  }

  /*
+  * Optimized case for average of numeric.
+  */
+ Datum
+ numeric_avg_accum(PG_FUNCTION_ARGS)
+ {
+     ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+     Numeric        newval = PG_GETARG_NUMERIC(1);
+
+     PG_RETURN_ARRAYTYPE_P(do_numeric_avg_accum(transarray, newval));
+ }
+
+ /*
   * Integer data types all use Numeric accumulators to share code and
   * avoid risk of overflow.    For int2 and int4 inputs, Numeric accumulation
   * is overkill for the N and sum(X) values, but definitely not overkill
***************
*** 2219,2224 ****
--- 2265,2286 ----
      PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval));
  }

+ /*
+  * Optimized case for average of int8.
+  */
+ Datum
+ int8_avg_accum(PG_FUNCTION_ARGS)
+ {
+     ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
+     Datum        newval8 = PG_GETARG_DATUM(1);
+     Numeric        newval;
+
+     newval = DatumGetNumeric(DirectFunctionCall1(int8_numeric, newval8));
+
+     PG_RETURN_ARRAYTYPE_P(do_numeric_avg_accum(transarray, newval));
+ }
+
+
  Datum
  numeric_avg(PG_FUNCTION_ARGS)
  {
***************
*** 2232,2242 ****
      deconstruct_array(transarray,
                        NUMERICOID, -1, false, 'i',
                        &transdatums, NULL, &ndatums);
!     if (ndatums != 3)
!         elog(ERROR, "expected 3-element numeric array");
      N = DatumGetNumeric(transdatums[0]);
      sumX = DatumGetNumeric(transdatums[1]);
-     /* ignore sumX2 */

      /* SQL92 defines AVG of no values to be NULL */
      /* N is zero iff no digits (cf. numeric_uminus) */
--- 2294,2303 ----
      deconstruct_array(transarray,
                        NUMERICOID, -1, false, 'i',
                        &transdatums, NULL, &ndatums);
!     if (ndatums != 2)
!         elog(ERROR, "expected 2-element numeric array");
      N = DatumGetNumeric(transdatums[0]);
      sumX = DatumGetNumeric(transdatums[1]);

      /* SQL92 defines AVG of no values to be NULL */
      /* N is zero iff no digits (cf. numeric_uminus) */
Index: src/include/catalog/catversion.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/catalog/catversion.h,v
retrieving revision 1.384
diff -c -c -r1.384 catversion.h
*** src/include/catalog/catversion.h    14 Feb 2007 01:58:58 -0000    1.384
--- src/include/catalog/catversion.h    16 Feb 2007 04:30:28 -0000
***************
*** 53,58 ****
   */

  /*                            yyyymmddN */
! #define CATALOG_VERSION_NO    200702131

  #endif
--- 53,58 ----
   */

  /*                            yyyymmddN */
! #define CATALOG_VERSION_NO    200702151

  #endif
Index: src/include/catalog/pg_aggregate.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/catalog/pg_aggregate.h,v
retrieving revision 1.60
diff -c -c -r1.60 pg_aggregate.h
*** src/include/catalog/pg_aggregate.h    20 Jan 2007 09:27:19 -0000    1.60
--- src/include/catalog/pg_aggregate.h    16 Feb 2007 04:30:28 -0000
***************
*** 80,89 ****
   */

  /* avg */
! DATA(insert ( 2100    int8_accum        numeric_avg        0    1231    "{0,0,0}" ));
  DATA(insert ( 2101    int4_avg_accum    int8_avg        0    1016    "{0,0}" ));
  DATA(insert ( 2102    int2_avg_accum    int8_avg        0    1016    "{0,0}" ));
! DATA(insert ( 2103    numeric_accum    numeric_avg        0    1231    "{0,0,0}" ));
  DATA(insert ( 2104    float4_accum    float8_avg        0    1022    "{0,0,0}" ));
  DATA(insert ( 2105    float8_accum    float8_avg        0    1022    "{0,0,0}" ));
  DATA(insert ( 2106    interval_accum    interval_avg    0    1187    "{0 second,0 second}" ));
--- 80,89 ----
   */

  /* avg */
! DATA(insert ( 2100    int8_avg_accum    numeric_avg        0    1231    "{0,0}" ));
  DATA(insert ( 2101    int4_avg_accum    int8_avg        0    1016    "{0,0}" ));
  DATA(insert ( 2102    int2_avg_accum    int8_avg        0    1016    "{0,0}" ));
! DATA(insert ( 2103    numeric_avg_accum    numeric_avg        0    1231    "{0,0}" ));
  DATA(insert ( 2104    float4_accum    float8_avg        0    1022    "{0,0,0}" ));
  DATA(insert ( 2105    float8_accum    float8_avg        0    1022    "{0,0,0}" ));
  DATA(insert ( 2106    interval_accum    interval_avg    0    1187    "{0 second,0 second}" ));
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.443
diff -c -c -r1.443 pg_proc.h
*** src/include/catalog/pg_proc.h    7 Feb 2007 23:11:30 -0000    1.443
--- src/include/catalog/pg_proc.h    16 Feb 2007 04:30:31 -0000
***************
*** 2744,2755 ****
--- 2744,2759 ----
  DESCR("STDDEV_SAMP aggregate final function");
  DATA(insert OID = 1833 (  numeric_accum    PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 1700" _null_ _null_ _null_
numeric_accum- _null_ )); 
  DESCR("aggregate transition function");
+ DATA(insert OID = 2858 (  numeric_avg_accum    PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 1700" _null_ _null_ _null_
numeric_avg_accum- _null_ )); 
+ DESCR("aggregate transition function");
  DATA(insert OID = 1834 (  int2_accum       PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 21" _null_ _null_ _null_
int2_accum- _null_ )); 
  DESCR("aggregate transition function");
  DATA(insert OID = 1835 (  int4_accum       PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 23" _null_ _null_ _null_
int4_accum- _null_ )); 
  DESCR("aggregate transition function");
  DATA(insert OID = 1836 (  int8_accum       PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 20" _null_ _null_ _null_
int8_accum- _null_ )); 
  DESCR("aggregate transition function");
+ DATA(insert OID = 2746 (  int8_avg_accum       PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 20" _null_ _null_ _null_
int8_avg_accum- _null_ )); 
+ DESCR("aggregate transition function");
  DATA(insert OID = 1837 (  numeric_avg       PGNSP PGUID 12 1 0 f f t f i 1 1700 "1231" _null_ _null_ _null_
numeric_avg- _null_ )); 
  DESCR("AVG aggregate final function");
  DATA(insert OID = 2514 (  numeric_var_pop  PGNSP PGUID 12 1 0 f f t f i 1 1700 "1231" _null_ _null_ _null_
numeric_var_pop- _null_ )); 
Index: src/include/utils/builtins.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/utils/builtins.h,v
retrieving revision 1.287
diff -c -c -r1.287 builtins.h
*** src/include/utils/builtins.h    28 Jan 2007 16:16:54 -0000    1.287
--- src/include/utils/builtins.h    16 Feb 2007 04:30:32 -0000
***************
*** 841,849 ****
--- 841,851 ----
  extern Datum text_numeric(PG_FUNCTION_ARGS);
  extern Datum numeric_text(PG_FUNCTION_ARGS);
  extern Datum numeric_accum(PG_FUNCTION_ARGS);
+ extern Datum numeric_avg_accum(PG_FUNCTION_ARGS);
  extern Datum int2_accum(PG_FUNCTION_ARGS);
  extern Datum int4_accum(PG_FUNCTION_ARGS);
  extern Datum int8_accum(PG_FUNCTION_ARGS);
+ extern Datum int8_avg_accum(PG_FUNCTION_ARGS);
  extern Datum numeric_avg(PG_FUNCTION_ARGS);
  extern Datum numeric_var_pop(PG_FUNCTION_ARGS);
  extern Datum numeric_var_samp(PG_FUNCTION_ARGS);

pgsql-patches by date:

Previous
From: Tom Lane
Date:
Subject: Re: WIP patch - INSERT-able log statements
Next
From: Bruce Momjian
Date:
Subject: Re: [GENERAL] ISO week dates