*** ./doc/src/sgml/func.sgml.orig 2010-10-03 08:57:09.000000000 +0100 --- ./doc/src/sgml/func.sgml 2010-10-03 08:58:29.000000000 +0100 *************** *** 10386,10391 **** --- 10386,10411 ---- + + Arithmetic median + median + + median(expression) + + + smallint, int, + bigint, real, double + precision, or numeric + + + double precision for floating-point arguments, + otherwise numeric + + arithmetic median + + + + regr_avgx(Y, X) *** ./src/backend/utils/adt/Makefile.orig 2010-10-03 09:05:29.000000000 +0100 --- ./src/backend/utils/adt/Makefile 2010-10-03 09:05:57.000000000 +0100 *************** *** 19,25 **** cash.o char.o date.o datetime.o datum.o domains.o \ enum.o float.o format_type.o \ geo_ops.o geo_selfuncs.o int.o int8.o like.o lockfuncs.o \ ! misc.o nabstime.o name.o numeric.o numutils.o \ oid.o oracle_compat.o pseudotypes.o rowtypes.o \ regexp.o regproc.o ruleutils.o selfuncs.o \ tid.o timestamp.o varbit.o varchar.o varlena.o version.o xid.o \ --- 19,25 ---- cash.o char.o date.o datetime.o datum.o domains.o \ enum.o float.o format_type.o \ geo_ops.o geo_selfuncs.o int.o int8.o like.o lockfuncs.o \ ! median.o misc.o nabstime.o name.o numeric.o numutils.o \ oid.o oracle_compat.o pseudotypes.o rowtypes.o \ regexp.o regproc.o ruleutils.o selfuncs.o \ tid.o timestamp.o varbit.o varchar.o varlena.o version.o xid.o \ *** ./src/backend/utils/adt/median.c.orig 2010-10-03 08:54:55.000000000 +0100 --- ./src/backend/utils/adt/median.c 2010-10-05 10:16:41.000000000 +0100 *************** *** 0 **** --- 1,387 ---- + #include "postgres.h" + + #include "fmgr.h" + #include "miscadmin.h" + + #include "catalog/pg_type.h" + #include "parser/parse_coerce.h" + #include "parser/parse_oper.h" + #include "utils/builtins.h" + #include "utils/lsyscache.h" + #include "utils/numeric.h" + #include "utils/tuplesort.h" + + /* + * used as type of state variable median's function. It uses a + * tuplesort as safe and inteligent storage. But we cannot to use + * a tuplesort under window aggregate context. + */ + typedef struct + { + int nelems; /* number of valid entries */ + Tuplesortstate *sortstate; + bool sorted; /* true if the entries have been sorted */ + bool free_tuplesort; /* true if the tuplesort should be freed */ + FmgrInfo cast_func_finfo; + bool datumTypByVal; + } MedianAggState; + + static void + freeDatum(Datum value, bool typByVal, bool isNull) + { + if (!typByVal && !isNull) + { + Pointer s = DatumGetPointer(value); + + pfree(s); + } + } + + static MedianAggState * + makeMedianAggState(FunctionCallInfo fcinfo, Oid valtype, Oid targettype) + { + MemoryContext oldctx; + MemoryContext aggcontext; + MedianAggState *aggstate; + Oid sortop, + castfunc; + CoercionPathType pathtype; + int16 typlen; + bool typbyval; + + if (!AggCheckCallContext(fcinfo, &aggcontext)) + { + /* cannot be called directly because of internal-type argument */ + elog(ERROR, "median_transfn called in non-aggregate context"); + } + + oldctx = MemoryContextSwitchTo(aggcontext); + + aggstate = (MedianAggState *) palloc0(sizeof(MedianAggState)); + + valtype = get_fn_expr_argtype(fcinfo->flinfo, 1); + get_sort_group_operators(valtype, + true, false, false, + &sortop, NULL, NULL); + + /* lookup necessary attributies of the datum type, used for datumFree */ + get_typlenbyval(valtype, &typlen, &typbyval); + aggstate->datumTypByVal = typbyval; + + /* initialize a tuplesort */ + aggstate->sortstate = tuplesort_begin_datum(valtype, + sortop, + SORTBY_NULLS_DEFAULT, + work_mem, true); + aggstate->sorted = false; + aggstate->free_tuplesort = false; /* set in transfn, if necessary */ + + MemoryContextSwitchTo(oldctx); + + if (valtype != targettype) + { + /* find a cast function */ + pathtype = find_coercion_pathway(targettype, valtype, + COERCION_EXPLICIT, + &castfunc); + if (pathtype == COERCION_PATH_FUNC) + { + Assert(OidIsValid(castfunc)); + fmgr_info_cxt(castfunc, &aggstate->cast_func_finfo, + aggcontext); + } + else if (pathtype == COERCION_PATH_RELABELTYPE) + { + aggstate->cast_func_finfo.fn_oid = InvalidOid; + } + else + elog(ERROR, "no conversion function from %s %s", + format_type_be(valtype), + format_type_be(targettype)); + } + + return aggstate; + } + + /* + * append a non NULL value to tuplesort + */ + static Datum + common_median_transfn(FunctionCallInfo fcinfo, Oid typoid, Oid targetoid) + { + MedianAggState *aggstate; + + aggstate = PG_ARGISNULL(0) ? NULL : (MedianAggState *) PG_GETARG_POINTER(0); + + if (!PG_ARGISNULL(1)) + { + bool is_running_median = false; + + if (aggstate == NULL) + aggstate = makeMedianAggState(fcinfo, typoid, targetoid); + + if (aggstate->sorted) + { + /* + * We have already done a sort and computed a median value, so + * this must be a running median computation, over a WINDOW with + * an ORDER BY clause. + * + * Transfer all the data to a new tuplesort, so that we can sort + * it again in the final function. This is very inefficient, and + * should probably be replaced with a better algorithm one day. + * + * For now, this is the easiest solution. We might as well try + * to keep the new tuplesort in memory, since it will use up + * memory so slowly that it's unlikely to ever be a problem. + */ + MemoryContext aggcontext; + MemoryContext oldctx; + Oid valtype; + Oid sortop; + Tuplesortstate *sortstate; + Datum value; + bool isNull; + + if (!AggCheckCallContext(fcinfo, &aggcontext)) + { + /* cannot be called directly because of internal-type argument */ + elog(ERROR, "median_transfn called in non-aggregate context"); + } + + oldctx = MemoryContextSwitchTo(aggcontext); + + valtype = get_fn_expr_argtype(fcinfo->flinfo, 1); + get_sort_group_operators(valtype, + true, false, false, + &sortop, NULL, NULL); + + sortstate = tuplesort_begin_datum(valtype, + sortop, + SORTBY_NULLS_DEFAULT, + 2000000, true); + + MemoryContextSwitchTo(oldctx); + + tuplesort_rescan(aggstate->sortstate); + while (tuplesort_getdatum(aggstate->sortstate, true, &value, &isNull)) + { + tuplesort_putdatum(sortstate, value, isNull); + freeDatum(value, aggstate->datumTypByVal, isNull); + } + + tuplesort_end(aggstate->sortstate); + + aggstate->sortstate = sortstate; + aggstate->sorted = false; + is_running_median = true; + } + + /* + * The final function should free the tuplesort unless this is a + * running median computation. If this is the first valid value, + * we don't actually know if it will be a running median, but it's + * safe to not free in that case anyway, since the tuplesort won't + * have used a temp file. + */ + aggstate->free_tuplesort = aggstate->nelems > 0 && !is_running_median; + + tuplesort_putdatum(aggstate->sortstate, PG_GETARG_DATUM(1), false); + aggstate->nelems++; + } + + PG_RETURN_POINTER(aggstate); + } + + /* + * just wrappers to be opr sanity checks happy + */ + Datum + median_numeric_transfn(PG_FUNCTION_ARGS) + { + return common_median_transfn(fcinfo, + NUMERICOID, NUMERICOID); + } + + Datum + median_int8_transfn(PG_FUNCTION_ARGS) + { + return common_median_transfn(fcinfo, + INT8OID, NUMERICOID); + } + + Datum + median_int4_transfn(PG_FUNCTION_ARGS) + { + return common_median_transfn(fcinfo, + INT4OID, NUMERICOID); + } + + Datum + median_int2_transfn(PG_FUNCTION_ARGS) + { + return common_median_transfn(fcinfo, + INT2OID, NUMERICOID); + } + + Datum + median_double_transfn(PG_FUNCTION_ARGS) + { + return common_median_transfn(fcinfo, + FLOAT8OID, FLOAT8OID); + } + + Datum + median_float_transfn(PG_FUNCTION_ARGS) + { + return common_median_transfn(fcinfo, + FLOAT4OID, FLOAT8OID); + } + + /* + * Used for reading values from tuplesort. The value has to be + * double or cast function is defined (and used). + */ + static double + to_double(Datum value, FmgrInfo *cast_func_finfo) + { + /* when valtype is same as target type, returns directly */ + if (cast_func_finfo->fn_oid == InvalidOid) + return DatumGetFloat8(value); + + return DatumGetFloat8(FunctionCall1(cast_func_finfo, value)); + } + + /* + * Used as final function for median when result is double. + */ + Datum + median_finalfn_double(PG_FUNCTION_ARGS) + { + MedianAggState *aggstate; + + Assert(AggCheckCallContext(fcinfo, NULL)); + + aggstate = PG_ARGISNULL(0) ? NULL : (MedianAggState *) PG_GETARG_POINTER(0); + + if (aggstate != NULL) + { + int lidx; + int hidx; + Datum value; + bool isNull; + int i = 1; + double result = 0; + + hidx = aggstate->nelems / 2 + 1; + lidx = (aggstate->nelems + 1) / 2; + + tuplesort_performsort(aggstate->sortstate); + aggstate->sorted = true; + + while (tuplesort_getdatum(aggstate->sortstate, true, &value, &isNull)) + { + if (i++ == lidx) + { + result = to_double(value, &aggstate->cast_func_finfo); + freeDatum(value, aggstate->datumTypByVal, isNull); + + if (lidx != hidx) + { + tuplesort_getdatum(aggstate->sortstate, true, &value, &isNull); + result = (result + to_double(value, &aggstate->cast_func_finfo)) / 2.0; + freeDatum(value, aggstate->datumTypByVal, isNull); + } + break; + } + } + + if (aggstate->free_tuplesort) + tuplesort_end(aggstate->sortstate); + + PG_RETURN_FLOAT8(result); + } + + PG_RETURN_NULL(); + } + + /* + * Used for reading values from tuplesort. The value has to be + * Numeric or cast function is defined (and used). + */ + static Numeric + to_numeric(Datum value, FmgrInfo *cast_func_finfo) + { + /* when valtype is same as target type, returns directly */ + if (cast_func_finfo->fn_oid == InvalidOid) + return DatumGetNumeric(value); + + return DatumGetNumeric(FunctionCall1(cast_func_finfo, value)); + } + + /* + * Used as final function for median when result is numeric. + */ + Datum + median_finalfn_numeric(PG_FUNCTION_ARGS) + { + MedianAggState *aggstate; + + Assert(AggCheckCallContext(fcinfo, NULL)); + + aggstate = PG_ARGISNULL(0) ? NULL : (MedianAggState *) PG_GETARG_POINTER(0); + + if (aggstate != NULL) + { + int lidx; + int hidx; + Datum a_value; + bool a_isNull; + int i = 1; + Numeric result = NULL; /* be compiler quiet */ + + hidx = aggstate->nelems / 2 + 1; + lidx = (aggstate->nelems + 1) / 2; + + tuplesort_performsort(aggstate->sortstate); + aggstate->sorted = true; + + while (tuplesort_getdatum(aggstate->sortstate, true, &a_value, &a_isNull)) + { + if (i++ == lidx) + { + result = to_numeric(a_value, &aggstate->cast_func_finfo); + + if (lidx != hidx) + { + Datum b_value; + bool b_isNull; + Numeric stack; + + tuplesort_getdatum(aggstate->sortstate, true, &b_value, &b_isNull); + + stack = to_numeric(b_value, &aggstate->cast_func_finfo); + + stack = DatumGetNumeric(DirectFunctionCall2(numeric_add, + NumericGetDatum(stack), + NumericGetDatum(result))); + result = DatumGetNumeric(DirectFunctionCall2(numeric_div, + NumericGetDatum(stack), + DirectFunctionCall1(float4_numeric, + Float4GetDatum(2.0)))); + freeDatum(b_value, aggstate->datumTypByVal, b_isNull); + } + break; + } + else + freeDatum(a_value, aggstate->datumTypByVal, a_isNull); + } + + if (aggstate->free_tuplesort) + tuplesort_end(aggstate->sortstate); + + PG_RETURN_NUMERIC(result); + } + + PG_RETURN_NULL(); + } *** ./src/include/catalog/pg_aggregate.h.orig 2010-10-03 08:57:09.000000000 +0100 --- ./src/include/catalog/pg_aggregate.h 2010-10-03 08:54:23.000000000 +0100 *************** *** 226,231 **** --- 226,239 ---- /* text */ DATA(insert ( 3538 string_agg_transfn string_agg_finalfn 0 2281 _null_ )); + /* median */ + DATA(insert ( 3123 median_double_transfn median_finalfn_double 0 2281 _null_ )); + DATA(insert ( 3124 median_float_transfn median_finalfn_double 0 2281 _null_ )); + DATA(insert ( 3125 median_numeric_transfn median_finalfn_numeric 0 2281 _null_ )); + DATA(insert ( 3126 median_int8_transfn median_finalfn_numeric 0 2281 _null_ )); + DATA(insert ( 3127 median_int4_transfn median_finalfn_numeric 0 2281 _null_ )); + DATA(insert ( 3128 median_int2_transfn median_finalfn_numeric 0 2281 _null_ )); + /* * prototypes for functions in pg_aggregate.c */ *** ./src/include/catalog/pg_proc.h.orig 2010-10-03 08:57:09.000000000 +0100 --- ./src/include/catalog/pg_proc.h 2010-10-03 08:54:23.000000000 +0100 *************** *** 4850,4855 **** --- 4850,4884 ---- DATA(insert OID = 3114 ( nth_value PGNSP PGUID 12 1 0 0 f t f t f i 2 0 2283 "2283 23" _null_ _null_ _null_ _null_ window_nth_value _null_ _null_ _null_ )); DESCR("fetch the Nth row value"); + /* median aggregate */ + DATA(insert OID = 3115 ( median_numeric_transfn PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 1700" _null_ _null_ _null_ _null_ median_numeric_transfn _null_ _null_ _null_ )); + DESCR("median transident function for numeric"); + DATA(insert OID = 3116 ( median_int8_transfn PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 20" _null_ _null_ _null_ _null_ median_int8_transfn _null_ _null_ _null_ )); + DESCR("median transident function for bigint"); + DATA(insert OID = 3117 ( median_int4_transfn PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 23" _null_ _null_ _null_ _null_ median_int4_transfn _null_ _null_ _null_ )); + DESCR("median transident function for int"); + DATA(insert OID = 3118 ( median_int2_transfn PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 21" _null_ _null_ _null_ _null_ median_int2_transfn _null_ _null_ _null_ )); + DESCR("median transident function for smallint"); + DATA(insert OID = 3119 ( median_double_transfn PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 701" _null_ _null_ _null_ _null_ median_double_transfn _null_ _null_ _null_ )); + DESCR("median transident function for double precision"); + DATA(insert OID = 3120 ( median_float_transfn PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 700" _null_ _null_ _null_ _null_ median_float_transfn _null_ _null_ _null_ )); + DESCR("median transident function for real"); + DATA(insert OID = 3121 ( median_finalfn_double PGNSP PGUID 12 1 0 0 f f f f f i 1 0 701 "2281" _null_ _null_ _null_ _null_ median_finalfn_double _null_ _null_ _null_ )); + DESCR("median final function with double precision result"); + DATA(insert OID = 3122 ( median_finalfn_numeric PGNSP PGUID 12 1 0 0 f f f f f i 1 0 1700 "2281" _null_ _null_ _null_ _null_ median_finalfn_numeric _null_ _null_ _null_ )); + DESCR("median final function with numeric result"); + DATA(insert OID = 3123 ( median PGNSP PGUID 12 1 0 0 t f f f f i 1 0 701 "701" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); + DESCR("median aggregate"); + DATA(insert OID = 3124 ( median PGNSP PGUID 12 1 0 0 t f f f f i 1 0 701 "700" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); + DESCR("median aggregate"); + DATA(insert OID = 3125 ( median PGNSP PGUID 12 1 0 0 t f f f f i 1 0 1700 "1700" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); + DESCR("median aggregate"); + DATA(insert OID = 3126 ( median PGNSP PGUID 12 1 0 0 t f f f f i 1 0 1700 "20" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); + DESCR("median aggregate"); + DATA(insert OID = 3127 ( median PGNSP PGUID 12 1 0 0 t f f f f i 1 0 1700 "23" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); + DESCR("median aggregate"); + DATA(insert OID = 3128 ( median PGNSP PGUID 12 1 0 0 t f f f f i 1 0 1700 "21" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); + DESCR("median aggregate"); /* * Symbolic values for provolatile column: these indicate whether the result *** ./src/include/utils/builtins.h.orig 2010-10-03 08:57:09.000000000 +0100 --- ./src/include/utils/builtins.h 2010-10-03 08:54:23.000000000 +0100 *************** *** 934,939 **** --- 934,947 ---- extern Datum int8_avg(PG_FUNCTION_ARGS); extern Datum width_bucket_numeric(PG_FUNCTION_ARGS); extern Datum hash_numeric(PG_FUNCTION_ARGS); + extern Datum median_numeric_transfn(PG_FUNCTION_ARGS); + extern Datum median_int8_transfn(PG_FUNCTION_ARGS); + extern Datum median_int4_transfn(PG_FUNCTION_ARGS); + extern Datum median_int2_transfn(PG_FUNCTION_ARGS); + extern Datum median_double_transfn(PG_FUNCTION_ARGS); + extern Datum median_float_transfn(PG_FUNCTION_ARGS); + extern Datum median_finalfn_double(PG_FUNCTION_ARGS); + extern Datum median_finalfn_numeric(PG_FUNCTION_ARGS); /* ri_triggers.c */ extern Datum RI_FKey_check_ins(PG_FUNCTION_ARGS); *** ./src/test/regress/expected/numeric.out.orig 2010-10-03 08:57:09.000000000 +0100 --- ./src/test/regress/expected/numeric.out 2010-10-03 09:01:36.000000000 +0100 *************** *** 1372,1374 **** --- 1372,1423 ---- 12345678901234567890 (1 row) + -- median test + create table median_test (a int, b bigint, c smallint, d numeric, e double precision, f real); + insert into median_test select i,i,i,i,i,i from generate_series(1,10) g(i); + select median(a), + median(b), + median(c), + median(d), + median(e), + median(f) from median_test; + median | median | median | median | median | median + --------------------+--------------------+--------------------+--------------------+--------+-------- + 5.5000000000000000 | 5.5000000000000000 | 5.5000000000000000 | 5.5000000000000000 | 5.5 | 5.5 + (1 row) + + truncate table median_test; + insert into median_test select i,i,i,i,i,i from generate_series(1,11) g(i); + select median(a), + median(b), + median(c), + median(d), + median(e), + median(f) from median_test; + median | median | median | median | median | median + --------+--------+--------+--------+--------+-------- + 6 | 6 | 6 | 6 | 6 | 6 + (1 row) + + select median(a) over (order by a), + median(b) over (order by a), + median(c) over (order by a), + median(d) over (order by a), + median(e) over (order by a), + median(f) over (order by a) from median_test; + median | median | median | median | median | median + --------------------+--------------------+--------------------+--------------------+--------+-------- + 1 | 1 | 1 | 1 | 1 | 1 + 1.5000000000000000 | 1.5000000000000000 | 1.5000000000000000 | 1.5000000000000000 | 1.5 | 1.5 + 2 | 2 | 2 | 2 | 2 | 2 + 2.5000000000000000 | 2.5000000000000000 | 2.5000000000000000 | 2.5000000000000000 | 2.5 | 2.5 + 3 | 3 | 3 | 3 | 3 | 3 + 3.5000000000000000 | 3.5000000000000000 | 3.5000000000000000 | 3.5000000000000000 | 3.5 | 3.5 + 4 | 4 | 4 | 4 | 4 | 4 + 4.5000000000000000 | 4.5000000000000000 | 4.5000000000000000 | 4.5000000000000000 | 4.5 | 4.5 + 5 | 5 | 5 | 5 | 5 | 5 + 5.5000000000000000 | 5.5000000000000000 | 5.5000000000000000 | 5.5000000000000000 | 5.5 | 5.5 + 6 | 6 | 6 | 6 | 6 | 6 + (11 rows) + + drop table median_test; *** ./src/test/regress/sql/numeric.sql.orig 2010-10-03 08:57:09.000000000 +0100 --- ./src/test/regress/sql/numeric.sql 2010-10-03 09:02:43.000000000 +0100 *************** *** 824,826 **** --- 824,852 ---- select 12345678901234567890 / 123; select div(12345678901234567890, 123); select div(12345678901234567890, 123) * 123 + 12345678901234567890 % 123; + + -- median test + create table median_test (a int, b bigint, c smallint, d numeric, e double precision, f real); + insert into median_test select i,i,i,i,i,i from generate_series(1,10) g(i); + select median(a), + median(b), + median(c), + median(d), + median(e), + median(f) from median_test; + truncate table median_test; + insert into median_test select i,i,i,i,i,i from generate_series(1,11) g(i); + select median(a), + median(b), + median(c), + median(d), + median(e), + median(f) from median_test; + select median(a) over (order by a), + median(b) over (order by a), + median(c) over (order by a), + median(d) over (order by a), + median(e) over (order by a), + median(f) over (order by a) from median_test; + + drop table median_test;