Avg performance for int8/numeric - Mailing list pgsql-patches
From | Mark Kirkwood |
---|---|
Subject | Avg performance for int8/numeric |
Date | |
Msg-id | 45661BE7.4050205@paradise.net.nz Whole thread Raw |
Responses |
Re: Avg performance for int8/numeric
Re: Avg performance for int8/numeric |
List | pgsql-patches |
Avg performance for these two datatypes can be improved by *not* calculating the sum of squares in the shared accumulator (do_numeric_accum). However there is a little subtlety as this function is also the shared by variance and stddev! This patch: - Modifies do_numeric_accum to have an extra bool parameter and does not calc sumX2 when it is false. - Amends all the accumulators that call it to include the bool (set to true). - Adds new functions [int8|numeric]_avg_accum that call do_numeric_accum with the bool set to false. - Amends the the bootstrap entries for pg_aggregate to use the new accumulators for avg(int8|numeric). - Adds the new accumulators into the bootstrap entries for pg_proc. Performance gain is approx 33% (it is still slower than doing sum/count - possibly due to the construct/deconstruct overhead of the numeric transition array). Cheers Mark Index: src/backend/utils/adt/numeric.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/numeric.c,v retrieving revision 1.96 diff -c -r1.96 numeric.c *** src/backend/utils/adt/numeric.c 4 Oct 2006 00:29:59 -0000 1.96 --- src/backend/utils/adt/numeric.c 23 Nov 2006 08:51:44 -0000 *************** *** 2060,2066 **** */ static ArrayType * ! do_numeric_accum(ArrayType *transarray, Numeric newval) { Datum *transdatums; int ndatums; --- 2060,2066 ---- */ static ArrayType * ! do_numeric_accum(ArrayType *transarray, Numeric newval, bool useSumX2) { Datum *transdatums; int ndatums; *************** *** 2082,2095 **** N = DirectFunctionCall1(numeric_inc, N); sumX = DirectFunctionCall2(numeric_add, sumX, NumericGetDatum(newval)); ! sumX2 = DirectFunctionCall2(numeric_add, sumX2, DirectFunctionCall2(numeric_mul, NumericGetDatum(newval), NumericGetDatum(newval))); transdatums[0] = N; transdatums[1] = sumX; ! transdatums[2] = sumX2; result = construct_array(transdatums, 3, NUMERICOID, -1, false, 'i'); --- 2082,2097 ---- N = DirectFunctionCall1(numeric_inc, N); sumX = DirectFunctionCall2(numeric_add, sumX, NumericGetDatum(newval)); ! if (useSumX2) ! sumX2 = DirectFunctionCall2(numeric_add, sumX2, DirectFunctionCall2(numeric_mul, NumericGetDatum(newval), NumericGetDatum(newval))); transdatums[0] = N; transdatums[1] = sumX; ! if (useSumX2) ! transdatums[2] = sumX2; result = construct_array(transdatums, 3, NUMERICOID, -1, false, 'i'); *************** *** 2103,2109 **** ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); Numeric newval = PG_GETARG_NUMERIC(1); ! PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval)); } /* --- 2105,2123 ---- ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); Numeric newval = PG_GETARG_NUMERIC(1); ! PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval, true)); ! } ! ! /* ! * Optimized case for average of numeric. ! */ ! Datum ! numeric_avg_accum(PG_FUNCTION_ARGS) ! { ! ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); ! Numeric newval = PG_GETARG_NUMERIC(1); ! ! PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval, false)); } /* *************** *** 2124,2130 **** newval = DatumGetNumeric(DirectFunctionCall1(int2_numeric, newval2)); ! PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval)); } Datum --- 2138,2144 ---- newval = DatumGetNumeric(DirectFunctionCall1(int2_numeric, newval2)); ! PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval, true)); } Datum *************** *** 2136,2142 **** newval = DatumGetNumeric(DirectFunctionCall1(int4_numeric, newval4)); ! PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval)); } Datum --- 2150,2156 ---- newval = DatumGetNumeric(DirectFunctionCall1(int4_numeric, newval4)); ! PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval, true)); } Datum *************** *** 2148,2156 **** newval = DatumGetNumeric(DirectFunctionCall1(int8_numeric, newval8)); ! PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval)); } Datum numeric_avg(PG_FUNCTION_ARGS) { --- 2162,2186 ---- newval = DatumGetNumeric(DirectFunctionCall1(int8_numeric, newval8)); ! PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval, true)); ! } ! ! /* ! * Optimized case for average of int8. ! */ ! Datum ! int8_avg_accum(PG_FUNCTION_ARGS) ! { ! ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); ! Datum newval8 = PG_GETARG_DATUM(1); ! Numeric newval; ! ! newval = DatumGetNumeric(DirectFunctionCall1(int8_numeric, newval8)); ! ! PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval, false)); } + Datum numeric_avg(PG_FUNCTION_ARGS) { Index: src/include/catalog/pg_aggregate.h =================================================================== RCS file: /projects/cvsroot/pgsql/src/include/catalog/pg_aggregate.h,v retrieving revision 1.58 diff -c -r1.58 pg_aggregate.h *** src/include/catalog/pg_aggregate.h 4 Oct 2006 00:30:07 -0000 1.58 --- src/include/catalog/pg_aggregate.h 23 Nov 2006 08:51:49 -0000 *************** *** 80,89 **** */ /* avg */ ! DATA(insert ( 2100 int8_accum numeric_avg 0 1231 "{0,0,0}" )); DATA(insert ( 2101 int4_avg_accum int8_avg 0 1016 "{0,0}" )); DATA(insert ( 2102 int2_avg_accum int8_avg 0 1016 "{0,0}" )); ! DATA(insert ( 2103 numeric_accum numeric_avg 0 1231 "{0,0,0}" )); DATA(insert ( 2104 float4_accum float8_avg 0 1022 "{0,0,0}" )); DATA(insert ( 2105 float8_accum float8_avg 0 1022 "{0,0,0}" )); DATA(insert ( 2106 interval_accum interval_avg 0 1187 "{0 second,0 second}" )); --- 80,89 ---- */ /* avg */ ! DATA(insert ( 2100 int8_avg_accum numeric_avg 0 1231 "{0,0,0}" )); DATA(insert ( 2101 int4_avg_accum int8_avg 0 1016 "{0,0}" )); DATA(insert ( 2102 int2_avg_accum int8_avg 0 1016 "{0,0}" )); ! DATA(insert ( 2103 numeric_avg_accum numeric_avg 0 1231 "{0,0,0}" )); DATA(insert ( 2104 float4_accum float8_avg 0 1022 "{0,0,0}" )); DATA(insert ( 2105 float8_accum float8_avg 0 1022 "{0,0,0}" )); DATA(insert ( 2106 interval_accum interval_avg 0 1187 "{0 second,0 second}" )); Index: src/include/catalog/pg_proc.h =================================================================== RCS file: /projects/cvsroot/pgsql/src/include/catalog/pg_proc.h,v retrieving revision 1.427 diff -c -r1.427 pg_proc.h *** src/include/catalog/pg_proc.h 4 Oct 2006 00:30:07 -0000 1.427 --- src/include/catalog/pg_proc.h 23 Nov 2006 08:52:37 -0000 *************** *** 2697,2708 **** --- 2697,2710 ---- DATA(insert OID = 1832 ( float8_stddev_samp PGNSP PGUID 12 f f t f i 1 701 "1022" _null_ _null_ _null_ float8_stddev_samp- _null_ )); DESCR("STDDEV_SAMP aggregate final function"); DATA(insert OID = 1833 ( numeric_accum PGNSP PGUID 12 f f t f i 2 1231 "1231 1700" _null_ _null_ _null_ numeric_accum- _null_ )); + DATA(insert OID = 2858 ( numeric_avg_accum PGNSP PGUID 12 f f t f i 2 1231 "1231 1700" _null_ _null_ _null_ numeric_avg_accum- _null_ )); DESCR("aggregate transition function"); DATA(insert OID = 1834 ( int2_accum PGNSP PGUID 12 f f t f i 2 1231 "1231 21" _null_ _null_ _null_ int2_accum -_null_ )); DESCR("aggregate transition function"); DATA(insert OID = 1835 ( int4_accum PGNSP PGUID 12 f f t f i 2 1231 "1231 23" _null_ _null_ _null_ int4_accum -_null_ )); DESCR("aggregate transition function"); DATA(insert OID = 1836 ( int8_accum PGNSP PGUID 12 f f t f i 2 1231 "1231 20" _null_ _null_ _null_ int8_accum -_null_ )); + DATA(insert OID = 2857 ( int8_avg_accum PGNSP PGUID 12 f f t f i 2 1231 "1231 20" _null_ _null_ _null_ int8_avg_accum- _null_ )); DESCR("aggregate transition function"); DATA(insert OID = 1837 ( numeric_avg PGNSP PGUID 12 f f t f i 1 1700 "1231" _null_ _null_ _null_ numeric_avg- _null_ )); DESCR("AVG aggregate final function"); Index: src/include/utils/builtins.h =================================================================== RCS file: /projects/cvsroot/pgsql/src/include/utils/builtins.h,v retrieving revision 1.282 diff -c -r1.282 builtins.h *** src/include/utils/builtins.h 18 Sep 2006 22:40:40 -0000 1.282 --- src/include/utils/builtins.h 23 Nov 2006 08:52:41 -0000 *************** *** 833,841 **** --- 833,843 ---- extern Datum text_numeric(PG_FUNCTION_ARGS); extern Datum numeric_text(PG_FUNCTION_ARGS); extern Datum numeric_accum(PG_FUNCTION_ARGS); + extern Datum numeric_avg_accum(PG_FUNCTION_ARGS); extern Datum int2_accum(PG_FUNCTION_ARGS); extern Datum int4_accum(PG_FUNCTION_ARGS); extern Datum int8_accum(PG_FUNCTION_ARGS); + extern Datum int8_avg_accum(PG_FUNCTION_ARGS); extern Datum numeric_avg(PG_FUNCTION_ARGS); extern Datum numeric_var_pop(PG_FUNCTION_ARGS); extern Datum numeric_var_samp(PG_FUNCTION_ARGS);
pgsql-patches by date: