Avg performance for int8/numeric - Mailing list pgsql-patches

From Mark Kirkwood
Subject Avg performance for int8/numeric
Date
Msg-id 45661BE7.4050205@paradise.net.nz
Whole thread Raw
Responses Re: Avg performance for int8/numeric
Re: Avg performance for int8/numeric
List pgsql-patches
Avg performance for these two datatypes can be improved by *not*
calculating the sum of squares in the shared accumulator
(do_numeric_accum). However there is a little subtlety as this function
is also the shared by variance and stddev!

This patch:

- Modifies do_numeric_accum to have an extra bool parameter and does not
calc sumX2 when it is false.
- Amends all the accumulators that call it to include the bool (set to
true).
- Adds new functions [int8|numeric]_avg_accum that call do_numeric_accum
with the bool set to false.
- Amends the the bootstrap entries for pg_aggregate to use the new
accumulators for avg(int8|numeric).
- Adds the new accumulators into the bootstrap entries for pg_proc.

Performance gain is approx 33% (it is still slower than doing sum/count
- possibly due to the construct/deconstruct overhead of the numeric
transition array).

Cheers

Mark
Index: src/backend/utils/adt/numeric.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/numeric.c,v
retrieving revision 1.96
diff -c -r1.96 numeric.c
*** src/backend/utils/adt/numeric.c    4 Oct 2006 00:29:59 -0000    1.96
--- src/backend/utils/adt/numeric.c    23 Nov 2006 08:51:44 -0000
***************
*** 2060,2066 ****
   */

  static ArrayType *
! do_numeric_accum(ArrayType *transarray, Numeric newval)
  {
      Datum       *transdatums;
      int            ndatums;
--- 2060,2066 ----
   */

  static ArrayType *
! do_numeric_accum(ArrayType *transarray, Numeric newval, bool useSumX2)
  {
      Datum       *transdatums;
      int            ndatums;
***************
*** 2082,2095 ****
      N = DirectFunctionCall1(numeric_inc, N);
      sumX = DirectFunctionCall2(numeric_add, sumX,
                                 NumericGetDatum(newval));
!     sumX2 = DirectFunctionCall2(numeric_add, sumX2,
                                  DirectFunctionCall2(numeric_mul,
                                                      NumericGetDatum(newval),
                                                      NumericGetDatum(newval)));

      transdatums[0] = N;
      transdatums[1] = sumX;
!     transdatums[2] = sumX2;

      result = construct_array(transdatums, 3,
                               NUMERICOID, -1, false, 'i');
--- 2082,2097 ----
      N = DirectFunctionCall1(numeric_inc, N);
      sumX = DirectFunctionCall2(numeric_add, sumX,
                                 NumericGetDatum(newval));
!     if (useSumX2)
!         sumX2 = DirectFunctionCall2(numeric_add, sumX2,
                                  DirectFunctionCall2(numeric_mul,
                                                      NumericGetDatum(newval),
                                                      NumericGetDatum(newval)));

      transdatums[0] = N;
      transdatums[1] = sumX;
!     if (useSumX2)
!         transdatums[2] = sumX2;

      result = construct_array(transdatums, 3,
                               NUMERICOID, -1, false, 'i');
***************
*** 2103,2109 ****
      ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
      Numeric        newval = PG_GETARG_NUMERIC(1);

!     PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval));
  }

  /*
--- 2105,2123 ----
      ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
      Numeric        newval = PG_GETARG_NUMERIC(1);

!     PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval, true));
! }
!
! /*
!  * Optimized case for average of numeric.
!  */
! Datum
! numeric_avg_accum(PG_FUNCTION_ARGS)
! {
!     ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
!     Numeric        newval = PG_GETARG_NUMERIC(1);
!
!     PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval, false));
  }

  /*
***************
*** 2124,2130 ****

      newval = DatumGetNumeric(DirectFunctionCall1(int2_numeric, newval2));

!     PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval));
  }

  Datum
--- 2138,2144 ----

      newval = DatumGetNumeric(DirectFunctionCall1(int2_numeric, newval2));

!     PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval, true));
  }

  Datum
***************
*** 2136,2142 ****

      newval = DatumGetNumeric(DirectFunctionCall1(int4_numeric, newval4));

!     PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval));
  }

  Datum
--- 2150,2156 ----

      newval = DatumGetNumeric(DirectFunctionCall1(int4_numeric, newval4));

!     PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval, true));
  }

  Datum
***************
*** 2148,2156 ****

      newval = DatumGetNumeric(DirectFunctionCall1(int8_numeric, newval8));

!     PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval));
  }

  Datum
  numeric_avg(PG_FUNCTION_ARGS)
  {
--- 2162,2186 ----

      newval = DatumGetNumeric(DirectFunctionCall1(int8_numeric, newval8));

!     PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval, true));
! }
!
! /*
!  * Optimized case for average of int8.
!  */
! Datum
! int8_avg_accum(PG_FUNCTION_ARGS)
! {
!     ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
!     Datum        newval8 = PG_GETARG_DATUM(1);
!     Numeric        newval;
!
!     newval = DatumGetNumeric(DirectFunctionCall1(int8_numeric, newval8));
!
!     PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval, false));
  }

+
  Datum
  numeric_avg(PG_FUNCTION_ARGS)
  {
Index: src/include/catalog/pg_aggregate.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/catalog/pg_aggregate.h,v
retrieving revision 1.58
diff -c -r1.58 pg_aggregate.h
*** src/include/catalog/pg_aggregate.h    4 Oct 2006 00:30:07 -0000    1.58
--- src/include/catalog/pg_aggregate.h    23 Nov 2006 08:51:49 -0000
***************
*** 80,89 ****
   */

  /* avg */
! DATA(insert ( 2100    int8_accum        numeric_avg        0    1231    "{0,0,0}" ));
  DATA(insert ( 2101    int4_avg_accum    int8_avg        0    1016    "{0,0}" ));
  DATA(insert ( 2102    int2_avg_accum    int8_avg        0    1016    "{0,0}" ));
! DATA(insert ( 2103    numeric_accum    numeric_avg        0    1231    "{0,0,0}" ));
  DATA(insert ( 2104    float4_accum    float8_avg        0    1022    "{0,0,0}" ));
  DATA(insert ( 2105    float8_accum    float8_avg        0    1022    "{0,0,0}" ));
  DATA(insert ( 2106    interval_accum    interval_avg    0    1187    "{0 second,0 second}" ));
--- 80,89 ----
   */

  /* avg */
! DATA(insert ( 2100    int8_avg_accum    numeric_avg        0    1231    "{0,0,0}" ));
  DATA(insert ( 2101    int4_avg_accum    int8_avg        0    1016    "{0,0}" ));
  DATA(insert ( 2102    int2_avg_accum    int8_avg        0    1016    "{0,0}" ));
! DATA(insert ( 2103    numeric_avg_accum    numeric_avg        0    1231    "{0,0,0}" ));
  DATA(insert ( 2104    float4_accum    float8_avg        0    1022    "{0,0,0}" ));
  DATA(insert ( 2105    float8_accum    float8_avg        0    1022    "{0,0,0}" ));
  DATA(insert ( 2106    interval_accum    interval_avg    0    1187    "{0 second,0 second}" ));
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.427
diff -c -r1.427 pg_proc.h
*** src/include/catalog/pg_proc.h    4 Oct 2006 00:30:07 -0000    1.427
--- src/include/catalog/pg_proc.h    23 Nov 2006 08:52:37 -0000
***************
*** 2697,2708 ****
--- 2697,2710 ----
  DATA(insert OID = 1832 (  float8_stddev_samp    PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_
float8_stddev_samp- _null_ )); 
  DESCR("STDDEV_SAMP aggregate final function");
  DATA(insert OID = 1833 (  numeric_accum    PGNSP PGUID 12 f f t f i 2 1231 "1231 1700" _null_ _null_ _null_
numeric_accum- _null_ )); 
+ DATA(insert OID = 2858 (  numeric_avg_accum    PGNSP PGUID 12 f f t f i 2 1231 "1231 1700" _null_ _null_ _null_
numeric_avg_accum- _null_ )); 
  DESCR("aggregate transition function");
  DATA(insert OID = 1834 (  int2_accum       PGNSP PGUID 12 f f t f i 2 1231 "1231 21" _null_ _null_ _null_ int2_accum
-_null_ )); 
  DESCR("aggregate transition function");
  DATA(insert OID = 1835 (  int4_accum       PGNSP PGUID 12 f f t f i 2 1231 "1231 23" _null_ _null_ _null_ int4_accum
-_null_ )); 
  DESCR("aggregate transition function");
  DATA(insert OID = 1836 (  int8_accum       PGNSP PGUID 12 f f t f i 2 1231 "1231 20" _null_ _null_ _null_ int8_accum
-_null_ )); 
+ DATA(insert OID = 2857 (  int8_avg_accum       PGNSP PGUID 12 f f t f i 2 1231 "1231 20" _null_ _null_ _null_
int8_avg_accum- _null_ )); 
  DESCR("aggregate transition function");
  DATA(insert OID = 1837 (  numeric_avg       PGNSP PGUID 12 f f t f i 1 1700 "1231" _null_ _null_ _null_
numeric_avg- _null_ )); 
  DESCR("AVG aggregate final function");
Index: src/include/utils/builtins.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/utils/builtins.h,v
retrieving revision 1.282
diff -c -r1.282 builtins.h
*** src/include/utils/builtins.h    18 Sep 2006 22:40:40 -0000    1.282
--- src/include/utils/builtins.h    23 Nov 2006 08:52:41 -0000
***************
*** 833,841 ****
--- 833,843 ----
  extern Datum text_numeric(PG_FUNCTION_ARGS);
  extern Datum numeric_text(PG_FUNCTION_ARGS);
  extern Datum numeric_accum(PG_FUNCTION_ARGS);
+ extern Datum numeric_avg_accum(PG_FUNCTION_ARGS);
  extern Datum int2_accum(PG_FUNCTION_ARGS);
  extern Datum int4_accum(PG_FUNCTION_ARGS);
  extern Datum int8_accum(PG_FUNCTION_ARGS);
+ extern Datum int8_avg_accum(PG_FUNCTION_ARGS);
  extern Datum numeric_avg(PG_FUNCTION_ARGS);
  extern Datum numeric_var_pop(PG_FUNCTION_ARGS);
  extern Datum numeric_var_samp(PG_FUNCTION_ARGS);

pgsql-patches by date:

Previous
From: Bruce Momjian
Date:
Subject: Re: [PERFORM] Direct I/O issues
Next
From: Neil Conway
Date:
Subject: Re: Avg performance for int8/numeric