Re: Avg performance for int8/numeric - Mailing list pgsql-patches
From | Bruce Momjian |
---|---|
Subject | Re: Avg performance for int8/numeric |
Date | |
Msg-id | 200702160455.l1G4trs10100@momjian.us Whole thread Raw |
In response to | Re: Avg performance for int8/numeric (Mark Kirkwood <markir@paradise.net.nz>) |
Responses |
Re: Avg performance for int8/numeric
|
List | pgsql-patches |
I have tested this patch but it generates regression failures. There was some code drift, so I am attaching an updated version of the patch, and the regression diffs. The 'four' column is an 'int4' so my guess is that somehow the wrong aggregate is being called. --------------------------------------------------------------------------- Mark Kirkwood wrote: > Neil Conway wrote: > > On Fri, 2006-11-24 at 11:08 +1300, Mark Kirkwood wrote: > >> - Modifies do_numeric_accum to have an extra bool parameter and does not > >> calc sumX2 when it is false. > > > > I think it would be clearer to reorganize this function slightly, and > > have only a single branch on "useSumX2". On first glance it isn't > > obviously that transdatums[2] is defined (but unchanged) when useSumX2 > > is false. > > > > Right - new patch attached that adds a new function do_numeric_avg_accum > that only uses N and sum(X). This means I could amend the avg aggregates > for numeric, int8 to have a initvalues of {0,0}. -- Bruce Momjian <bruce@momjian.us> http://momjian.us EnterpriseDB http://www.enterprisedb.com + If your life is a hard drive, Christ can be your backup. + *** ./expected/aggregates.out Fri Jul 28 14:33:04 2006 --- ./results/aggregates.out Thu Feb 15 23:54:45 2007 *************** *** 238,248 **** -- user-defined aggregates SELECT newavg(four) AS avg_1 FROM onek; ! avg_1 ! -------------------- ! 1.5000000000000000 ! (1 row) ! SELECT newsum(four) AS sum_1500 FROM onek; sum_1500 ---------- --- 238,244 ---- -- user-defined aggregates SELECT newavg(four) AS avg_1 FROM onek; ! ERROR: expected 2-element numeric array SELECT newsum(four) AS sum_1500 FROM onek; sum_1500 ---------- ====================================================================== Index: src/backend/utils/adt/numeric.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/utils/adt/numeric.c,v retrieving revision 1.99 diff -c -c -r1.99 numeric.c *** src/backend/utils/adt/numeric.c 16 Jan 2007 21:41:13 -0000 1.99 --- src/backend/utils/adt/numeric.c 16 Feb 2007 04:30:27 -0000 *************** *** 2165,2170 **** --- 2165,2204 ---- return result; } + /* + * Improve avg performance by not caclulating sum(X*X). + */ + static ArrayType * + do_numeric_avg_accum(ArrayType *transarray, Numeric newval) + { + Datum *transdatums; + int ndatums; + Datum N, + sumX; + ArrayType *result; + + /* We assume the input is array of numeric */ + deconstruct_array(transarray, + NUMERICOID, -1, false, 'i', + &transdatums, NULL, &ndatums); + if (ndatums != 2) + elog(ERROR, "expected 2-element numeric array"); + N = transdatums[0]; + sumX = transdatums[1]; + + N = DirectFunctionCall1(numeric_inc, N); + sumX = DirectFunctionCall2(numeric_add, sumX, + NumericGetDatum(newval)); + + transdatums[0] = N; + transdatums[1] = sumX; + + result = construct_array(transdatums, 2, + NUMERICOID, -1, false, 'i'); + + return result; + } + Datum numeric_accum(PG_FUNCTION_ARGS) { *************** *** 2175,2180 **** --- 2209,2226 ---- } /* + * Optimized case for average of numeric. + */ + Datum + numeric_avg_accum(PG_FUNCTION_ARGS) + { + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + Numeric newval = PG_GETARG_NUMERIC(1); + + PG_RETURN_ARRAYTYPE_P(do_numeric_avg_accum(transarray, newval)); + } + + /* * Integer data types all use Numeric accumulators to share code and * avoid risk of overflow. For int2 and int4 inputs, Numeric accumulation * is overkill for the N and sum(X) values, but definitely not overkill *************** *** 2219,2224 **** --- 2265,2286 ---- PG_RETURN_ARRAYTYPE_P(do_numeric_accum(transarray, newval)); } + /* + * Optimized case for average of int8. + */ + Datum + int8_avg_accum(PG_FUNCTION_ARGS) + { + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + Datum newval8 = PG_GETARG_DATUM(1); + Numeric newval; + + newval = DatumGetNumeric(DirectFunctionCall1(int8_numeric, newval8)); + + PG_RETURN_ARRAYTYPE_P(do_numeric_avg_accum(transarray, newval)); + } + + Datum numeric_avg(PG_FUNCTION_ARGS) { *************** *** 2232,2242 **** deconstruct_array(transarray, NUMERICOID, -1, false, 'i', &transdatums, NULL, &ndatums); ! if (ndatums != 3) ! elog(ERROR, "expected 3-element numeric array"); N = DatumGetNumeric(transdatums[0]); sumX = DatumGetNumeric(transdatums[1]); - /* ignore sumX2 */ /* SQL92 defines AVG of no values to be NULL */ /* N is zero iff no digits (cf. numeric_uminus) */ --- 2294,2303 ---- deconstruct_array(transarray, NUMERICOID, -1, false, 'i', &transdatums, NULL, &ndatums); ! if (ndatums != 2) ! elog(ERROR, "expected 2-element numeric array"); N = DatumGetNumeric(transdatums[0]); sumX = DatumGetNumeric(transdatums[1]); /* SQL92 defines AVG of no values to be NULL */ /* N is zero iff no digits (cf. numeric_uminus) */ Index: src/include/catalog/catversion.h =================================================================== RCS file: /cvsroot/pgsql/src/include/catalog/catversion.h,v retrieving revision 1.384 diff -c -c -r1.384 catversion.h *** src/include/catalog/catversion.h 14 Feb 2007 01:58:58 -0000 1.384 --- src/include/catalog/catversion.h 16 Feb 2007 04:30:28 -0000 *************** *** 53,58 **** */ /* yyyymmddN */ ! #define CATALOG_VERSION_NO 200702131 #endif --- 53,58 ---- */ /* yyyymmddN */ ! #define CATALOG_VERSION_NO 200702151 #endif Index: src/include/catalog/pg_aggregate.h =================================================================== RCS file: /cvsroot/pgsql/src/include/catalog/pg_aggregate.h,v retrieving revision 1.60 diff -c -c -r1.60 pg_aggregate.h *** src/include/catalog/pg_aggregate.h 20 Jan 2007 09:27:19 -0000 1.60 --- src/include/catalog/pg_aggregate.h 16 Feb 2007 04:30:28 -0000 *************** *** 80,89 **** */ /* avg */ ! DATA(insert ( 2100 int8_accum numeric_avg 0 1231 "{0,0,0}" )); DATA(insert ( 2101 int4_avg_accum int8_avg 0 1016 "{0,0}" )); DATA(insert ( 2102 int2_avg_accum int8_avg 0 1016 "{0,0}" )); ! DATA(insert ( 2103 numeric_accum numeric_avg 0 1231 "{0,0,0}" )); DATA(insert ( 2104 float4_accum float8_avg 0 1022 "{0,0,0}" )); DATA(insert ( 2105 float8_accum float8_avg 0 1022 "{0,0,0}" )); DATA(insert ( 2106 interval_accum interval_avg 0 1187 "{0 second,0 second}" )); --- 80,89 ---- */ /* avg */ ! DATA(insert ( 2100 int8_avg_accum numeric_avg 0 1231 "{0,0}" )); DATA(insert ( 2101 int4_avg_accum int8_avg 0 1016 "{0,0}" )); DATA(insert ( 2102 int2_avg_accum int8_avg 0 1016 "{0,0}" )); ! DATA(insert ( 2103 numeric_avg_accum numeric_avg 0 1231 "{0,0}" )); DATA(insert ( 2104 float4_accum float8_avg 0 1022 "{0,0,0}" )); DATA(insert ( 2105 float8_accum float8_avg 0 1022 "{0,0,0}" )); DATA(insert ( 2106 interval_accum interval_avg 0 1187 "{0 second,0 second}" )); Index: src/include/catalog/pg_proc.h =================================================================== RCS file: /cvsroot/pgsql/src/include/catalog/pg_proc.h,v retrieving revision 1.443 diff -c -c -r1.443 pg_proc.h *** src/include/catalog/pg_proc.h 7 Feb 2007 23:11:30 -0000 1.443 --- src/include/catalog/pg_proc.h 16 Feb 2007 04:30:31 -0000 *************** *** 2744,2755 **** --- 2744,2759 ---- DESCR("STDDEV_SAMP aggregate final function"); DATA(insert OID = 1833 ( numeric_accum PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 1700" _null_ _null_ _null_ numeric_accum- _null_ )); DESCR("aggregate transition function"); + DATA(insert OID = 2858 ( numeric_avg_accum PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 1700" _null_ _null_ _null_ numeric_avg_accum- _null_ )); + DESCR("aggregate transition function"); DATA(insert OID = 1834 ( int2_accum PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 21" _null_ _null_ _null_ int2_accum- _null_ )); DESCR("aggregate transition function"); DATA(insert OID = 1835 ( int4_accum PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 23" _null_ _null_ _null_ int4_accum- _null_ )); DESCR("aggregate transition function"); DATA(insert OID = 1836 ( int8_accum PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 20" _null_ _null_ _null_ int8_accum- _null_ )); DESCR("aggregate transition function"); + DATA(insert OID = 2746 ( int8_avg_accum PGNSP PGUID 12 1 0 f f t f i 2 1231 "1231 20" _null_ _null_ _null_ int8_avg_accum- _null_ )); + DESCR("aggregate transition function"); DATA(insert OID = 1837 ( numeric_avg PGNSP PGUID 12 1 0 f f t f i 1 1700 "1231" _null_ _null_ _null_ numeric_avg- _null_ )); DESCR("AVG aggregate final function"); DATA(insert OID = 2514 ( numeric_var_pop PGNSP PGUID 12 1 0 f f t f i 1 1700 "1231" _null_ _null_ _null_ numeric_var_pop- _null_ )); Index: src/include/utils/builtins.h =================================================================== RCS file: /cvsroot/pgsql/src/include/utils/builtins.h,v retrieving revision 1.287 diff -c -c -r1.287 builtins.h *** src/include/utils/builtins.h 28 Jan 2007 16:16:54 -0000 1.287 --- src/include/utils/builtins.h 16 Feb 2007 04:30:32 -0000 *************** *** 841,849 **** --- 841,851 ---- extern Datum text_numeric(PG_FUNCTION_ARGS); extern Datum numeric_text(PG_FUNCTION_ARGS); extern Datum numeric_accum(PG_FUNCTION_ARGS); + extern Datum numeric_avg_accum(PG_FUNCTION_ARGS); extern Datum int2_accum(PG_FUNCTION_ARGS); extern Datum int4_accum(PG_FUNCTION_ARGS); extern Datum int8_accum(PG_FUNCTION_ARGS); + extern Datum int8_avg_accum(PG_FUNCTION_ARGS); extern Datum numeric_avg(PG_FUNCTION_ARGS); extern Datum numeric_var_pop(PG_FUNCTION_ARGS); extern Datum numeric_var_samp(PG_FUNCTION_ARGS);
pgsql-patches by date: