diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c new file mode 100644 index df35557..08e38e3 --- a/src/backend/utils/adt/float.c +++ b/src/backend/utils/adt/float.c @@ -2401,8 +2401,21 @@ setseed(PG_FUNCTION_ARGS) * float8_stddev_samp - produce final result for float STDDEV_SAMP() * float8_stddev_pop - produce final result for float STDDEV_POP() * + * The naive schoolbook implementation of these aggregates works by + * accumulating sum(X) and sum(X^2). However, this approach suffers from + * large rounding errors in the final computation of quantities like the + * population variance (N*sum(X^2) - sum(X)^2) / N^2, since each of the + * intermediate terms is potentially very large, while the difference is often + * quite small. + * + * Instead we use Welford's algorithm which works by accumulating Mx=Sum(X)/N + * and Sxx=sum((X-Mx)^2), using a numerically stable algorithm to + * incrementally update those quantities. The final computations of each of + * the aggregate values is then trivial and gives more accurate results (for + * example, the population variance is just Sxx/N). + * * The transition datatype for all these aggregates is a 3-element array - * of float8, holding the values N, sum(X), sum(X*X) in that order. + * of float8, holding the values N, Mx, Sxx in that order. * * Note that we represent N as a float to avoid having to build a special * datatype. Given a reasonable floating-point implementation, there should @@ -2441,6 +2454,15 @@ float8_combine(PG_FUNCTION_ARGS) ArrayType *transarray2 = PG_GETARG_ARRAYTYPE_P(1); float8 *transvalues1; float8 *transvalues2; + float8 N1, + Mx1, + Sxx1, + N2, + Mx2, + Sxx2, + N, + Mx, + Sxx; if (!AggCheckCallContext(fcinfo, NULL)) elog(ERROR, "aggregate function called in non-aggregate context"); @@ -2448,9 +2470,51 @@ float8_combine(PG_FUNCTION_ARGS) transvalues1 = check_float8_array(transarray1, "float8_combine", 3); transvalues2 = check_float8_array(transarray2, "float8_combine", 3); - transvalues1[0] = transvalues1[0] + transvalues2[0]; - transvalues1[1] = float8_pl(transvalues1[1], transvalues2[1]); - transvalues1[2] = float8_pl(transvalues1[2], transvalues2[2]); + N1 = transvalues1[0]; + Mx1 = transvalues1[1]; + Sxx1 = transvalues1[2]; + + N2 = transvalues2[0]; + Mx2 = transvalues2[1]; + Sxx2 = transvalues2[2]; + + /*-------------------- + * The transition values combine using a generalization of Welford's + * algorithm as follows: + * + * N = N1 + N2 + * Mx = (N1 * Mx1 + N2 * Mx2) / N + * Sxx = Sxx1 + Sxx2 + N1 * N2 * (Mx1 - Mx2)^2 / N + * + * It's worth handling the special cases N1 = 0 and N2 = 0 separately + * since those cases are trivial, and we then don't need to worry about + * division-by-zero errors in the general case. + *-------------------- + */ + if (N1 == 0.0) + { + N = N2; + Mx = Mx2; + Sxx = Sxx2; + } + else if (N2 == 0.0) + { + N = N1; + Mx = Mx1; + Sxx = Sxx1; + } + else + { + N = N1 + N2; + Mx = (N1 * Mx1 + N2 * Mx2) / N; + check_float8_val(Mx, isinf(Mx1) || isinf(Mx2), true); + Sxx = Sxx1 + Sxx2 + (N1 * N2 * (Mx1 - Mx2) * (Mx1 - Mx2)) / N; + check_float8_val(Sxx, isinf(Sxx1) || isinf(Sxx2), true); + } + + transvalues1[0] = N; + transvalues1[1] = Mx; + transvalues1[2] = Sxx; PG_RETURN_ARRAYTYPE_P(transarray1); } @@ -2461,8 +2525,28 @@ float8_accum(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 newval = PG_GETARG_FLOAT8(1); float8 *transvalues; + float8 N, + Mx, + Sxx, + d1, + d2; transvalues = check_float8_array(transarray, "float8_accum", 3); + N = transvalues[0]; + Mx = transvalues[1]; + Sxx = transvalues[2]; + + /* + * Use Welford's algorithm to incorporate the new value into the + * transition values. + */ + N += 1.0; + d1 = newval - Mx; + Mx += d1 / N; + check_float8_val(Mx, isinf(transvalues[1]) || isinf(newval), true); + d2 = newval - Mx; + Sxx += d1 * d2; + check_float8_val(Sxx, isinf(transvalues[2]) || isinf(newval), true); /* * If we're invoked as an aggregate, we can cheat and modify our first @@ -2471,9 +2555,9 @@ float8_accum(PG_FUNCTION_ARGS) */ if (AggCheckCallContext(fcinfo, NULL)) { - transvalues[0] = transvalues[0] + 1.0; - transvalues[1] = float8_pl(transvalues[1], newval); - transvalues[2] = float8_pl(transvalues[2], float8_mul(newval, newval)); + transvalues[0] = N; + transvalues[1] = Mx; + transvalues[2] = Sxx; PG_RETURN_ARRAYTYPE_P(transarray); } @@ -2482,9 +2566,9 @@ float8_accum(PG_FUNCTION_ARGS) Datum transdatums[3]; ArrayType *result; - transvalues[0] = transvalues[0] + 1.0; - transvalues[1] = float8_pl(transvalues[1], newval); - transvalues[2] = float8_pl(transvalues[2], float8_mul(newval, newval)); + transdatums[0] = Float8GetDatumFast(N); + transdatums[1] = Float8GetDatumFast(Mx); + transdatums[2] = Float8GetDatumFast(Sxx); result = construct_array(transdatums, 3, FLOAT8OID, @@ -2502,8 +2586,28 @@ float4_accum(PG_FUNCTION_ARGS) /* do computations as float8 */ float8 newval = PG_GETARG_FLOAT4(1); float8 *transvalues; + float8 N, + Mx, + Sxx, + d1, + d2; transvalues = check_float8_array(transarray, "float4_accum", 3); + N = transvalues[0]; + Mx = transvalues[1]; + Sxx = transvalues[2]; + + /* + * Use Welford's algorithm to incorporate the new value into the + * transition values. + */ + N += 1.0; + d1 = newval - Mx; + Mx += d1 / N; + check_float8_val(Mx, isinf(transvalues[1]) || isinf(newval), true); + d2 = newval - Mx; + Sxx += d1 * d2; + check_float8_val(Sxx, isinf(transvalues[2]) || isinf(newval), true); /* * If we're invoked as an aggregate, we can cheat and modify our first @@ -2512,9 +2616,9 @@ float4_accum(PG_FUNCTION_ARGS) */ if (AggCheckCallContext(fcinfo, NULL)) { - transvalues[0] = transvalues[0] + 1.0; - transvalues[1] = float8_pl(transvalues[1], newval); - transvalues[2] = float8_pl(transvalues[2], float8_mul(newval, newval)); + transvalues[0] = N; + transvalues[1] = Mx; + transvalues[2] = Sxx; PG_RETURN_ARRAYTYPE_P(transarray); } @@ -2523,9 +2627,9 @@ float4_accum(PG_FUNCTION_ARGS) Datum transdatums[3]; ArrayType *result; - transvalues[0] = transvalues[0] + 1.0; - transvalues[1] = float8_pl(transvalues[1], newval); - transvalues[2] = float8_pl(transvalues[2], float8_mul(newval, newval)); + transdatums[0] = Float8GetDatumFast(N); + transdatums[1] = Float8GetDatumFast(Mx); + transdatums[2] = Float8GetDatumFast(Sxx); result = construct_array(transdatums, 3, FLOAT8OID, @@ -2541,18 +2645,18 @@ float8_avg(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX; + Mx; transvalues = check_float8_array(transarray, "float8_avg", 3); N = transvalues[0]; - sumX = transvalues[1]; - /* ignore sumX2 */ + Mx = transvalues[1]; + /* ignore Sxx */ /* SQL defines AVG of no values to be NULL */ if (N == 0.0) PG_RETURN_NULL(); - PG_RETURN_FLOAT8(sumX / N); + PG_RETURN_FLOAT8(Mx); } Datum @@ -2561,27 +2665,22 @@ float8_var_pop(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumX2, - numerator; + Sxx; transvalues = check_float8_array(transarray, "float8_var_pop", 3); N = transvalues[0]; - sumX = transvalues[1]; - sumX2 = transvalues[2]; + /* ignore Mx */ + Sxx = transvalues[2]; /* Population variance is undefined when N is 0, so return NULL */ if (N == 0.0) PG_RETURN_NULL(); - numerator = N * sumX2 - sumX * sumX; - check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true); - - /* Watch out for roundoff error producing a negative numerator */ - if (numerator <= 0.0) + /* Watch out for roundoff error producing a negative variance */ + if (Sxx <= 0.0) PG_RETURN_FLOAT8(0.0); - PG_RETURN_FLOAT8(numerator / (N * N)); + PG_RETURN_FLOAT8(Sxx / N); } Datum @@ -2590,27 +2689,22 @@ float8_var_samp(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumX2, - numerator; + Sxx; transvalues = check_float8_array(transarray, "float8_var_samp", 3); N = transvalues[0]; - sumX = transvalues[1]; - sumX2 = transvalues[2]; + /* ignore Mx */ + Sxx = transvalues[2]; /* Sample variance is undefined when N is 0 or 1, so return NULL */ if (N <= 1.0) PG_RETURN_NULL(); - numerator = N * sumX2 - sumX * sumX; - check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true); - - /* Watch out for roundoff error producing a negative numerator */ - if (numerator <= 0.0) + /* Watch out for roundoff error producing a negative variance */ + if (Sxx <= 0.0) PG_RETURN_FLOAT8(0.0); - PG_RETURN_FLOAT8(numerator / (N * (N - 1.0))); + PG_RETURN_FLOAT8(Sxx / (N - 1.0)); } Datum @@ -2619,27 +2713,22 @@ float8_stddev_pop(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumX2, - numerator; + Sxx; transvalues = check_float8_array(transarray, "float8_stddev_pop", 3); N = transvalues[0]; - sumX = transvalues[1]; - sumX2 = transvalues[2]; + /* ignore Mx */ + Sxx = transvalues[2]; /* Population stddev is undefined when N is 0, so return NULL */ if (N == 0.0) PG_RETURN_NULL(); - numerator = N * sumX2 - sumX * sumX; - check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true); - - /* Watch out for roundoff error producing a negative numerator */ - if (numerator <= 0.0) + /* Watch out for roundoff error producing a negative variance */ + if (Sxx <= 0.0) PG_RETURN_FLOAT8(0.0); - PG_RETURN_FLOAT8(sqrt(numerator / (N * N))); + PG_RETURN_FLOAT8(sqrt(Sxx / N)); } Datum @@ -2648,27 +2737,22 @@ float8_stddev_samp(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumX2, - numerator; + Sxx; transvalues = check_float8_array(transarray, "float8_stddev_samp", 3); N = transvalues[0]; - sumX = transvalues[1]; - sumX2 = transvalues[2]; + /* ignore Mx */ + Sxx = transvalues[2]; /* Sample stddev is undefined when N is 0 or 1, so return NULL */ if (N <= 1.0) PG_RETURN_NULL(); - numerator = N * sumX2 - sumX * sumX; - check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true); - - /* Watch out for roundoff error producing a negative numerator */ - if (numerator <= 0.0) + /* Watch out for roundoff error producing a negative variance */ + if (Sxx <= 0.0) PG_RETURN_FLOAT8(0.0); - PG_RETURN_FLOAT8(sqrt(numerator / (N * (N - 1.0)))); + PG_RETURN_FLOAT8(sqrt(Sxx / (N - 1.0))); } /* @@ -2676,9 +2760,14 @@ float8_stddev_samp(PG_FUNCTION_ARGS) * SQL2003 BINARY AGGREGATES * ========================= * + * As with the preceding aggregates, we use Welford's algorithm to reduce + * rounding errors in the aggregate final functions. + * * The transition datatype for all these aggregates is a 6-element array of - * float8, holding the values N, sum(X), sum(X*X), sum(Y), sum(Y*Y), sum(X*Y) - * in that order. Note that Y is the first argument to the aggregates! + * float8, holding the values N, Mx=sum(X)/N, Sxx=sum((X-Mx)^2), My=sum(Y)/N, + * Syy=sum((Y-My)^2), Sxy=sum((X-Mx)*(Y-My)) in that order. + * + * Note that Y is the first argument to all these aggregates! * * It might seem attractive to optimize this by having multiple accumulator * functions that only calculate the sums actually needed. But on most @@ -2695,31 +2784,43 @@ float8_regr_accum(PG_FUNCTION_ARGS) float8 newvalX = PG_GETARG_FLOAT8(2); float8 *transvalues; float8 N, - sumX, - sumX2, - sumY, - sumY2, - sumXY; + Mx, + Sxx, + My, + Syy, + Sxy, + d1X, + d2X, + d1Y, + d2Y; transvalues = check_float8_array(transarray, "float8_regr_accum", 6); N = transvalues[0]; - sumX = transvalues[1]; - sumX2 = transvalues[2]; - sumY = transvalues[3]; - sumY2 = transvalues[4]; - sumXY = transvalues[5]; + Mx = transvalues[1]; + Sxx = transvalues[2]; + My = transvalues[3]; + Syy = transvalues[4]; + Sxy = transvalues[5]; + /* + * Use Welford's algorithm to incorporate the new values into the + * transition values. + */ N += 1.0; - sumX += newvalX; - check_float8_val(sumX, isinf(transvalues[1]) || isinf(newvalX), true); - sumX2 += newvalX * newvalX; - check_float8_val(sumX2, isinf(transvalues[2]) || isinf(newvalX), true); - sumY += newvalY; - check_float8_val(sumY, isinf(transvalues[3]) || isinf(newvalY), true); - sumY2 += newvalY * newvalY; - check_float8_val(sumY2, isinf(transvalues[4]) || isinf(newvalY), true); - sumXY += newvalX * newvalY; - check_float8_val(sumXY, isinf(transvalues[5]) || isinf(newvalX) || + d1X = newvalX - Mx; + Mx += d1X / N; + check_float8_val(Mx, isinf(transvalues[1]) || isinf(newvalX), true); + d2X = newvalX - Mx; + Sxx += d1X * d2X; + check_float8_val(Sxx, isinf(transvalues[3]) || isinf(newvalX), true); + d1Y = newvalY - My; + My += d1Y / N; + check_float8_val(My, isinf(transvalues[2]) || isinf(newvalY), true); + d2Y = newvalY - My; + Syy += d1Y * d2Y; + check_float8_val(Syy, isinf(transvalues[4]) || isinf(newvalY), true); + Sxy += d1X * d2Y; + check_float8_val(Sxy, isinf(transvalues[5]) || isinf(newvalX) || isinf(newvalY), true); /* @@ -2730,11 +2831,11 @@ float8_regr_accum(PG_FUNCTION_ARGS) if (AggCheckCallContext(fcinfo, NULL)) { transvalues[0] = N; - transvalues[1] = sumX; - transvalues[2] = sumX2; - transvalues[3] = sumY; - transvalues[4] = sumY2; - transvalues[5] = sumXY; + transvalues[1] = Mx; + transvalues[2] = Sxx; + transvalues[3] = My; + transvalues[4] = Syy; + transvalues[5] = Sxy; PG_RETURN_ARRAYTYPE_P(transarray); } @@ -2744,11 +2845,11 @@ float8_regr_accum(PG_FUNCTION_ARGS) ArrayType *result; transdatums[0] = Float8GetDatumFast(N); - transdatums[1] = Float8GetDatumFast(sumX); - transdatums[2] = Float8GetDatumFast(sumX2); - transdatums[3] = Float8GetDatumFast(sumY); - transdatums[4] = Float8GetDatumFast(sumY2); - transdatums[5] = Float8GetDatumFast(sumXY); + transdatums[1] = Float8GetDatumFast(Mx); + transdatums[2] = Float8GetDatumFast(Sxx); + transdatums[3] = Float8GetDatumFast(My); + transdatums[4] = Float8GetDatumFast(Syy); + transdatums[5] = Float8GetDatumFast(Sxy); result = construct_array(transdatums, 6, FLOAT8OID, @@ -2773,6 +2874,24 @@ float8_regr_combine(PG_FUNCTION_ARGS) ArrayType *transarray2 = PG_GETARG_ARRAYTYPE_P(1); float8 *transvalues1; float8 *transvalues2; + float8 N1, + Mx1, + Sxx1, + My1, + Syy1, + Sxy1, + N2, + Mx2, + Sxx2, + My2, + Syy2, + Sxy2, + N, + Mx, + Sxx, + My, + Syy, + Sxy; if (!AggCheckCallContext(fcinfo, NULL)) elog(ERROR, "aggregate function called in non-aggregate context"); @@ -2780,12 +2899,75 @@ float8_regr_combine(PG_FUNCTION_ARGS) transvalues1 = check_float8_array(transarray1, "float8_regr_combine", 6); transvalues2 = check_float8_array(transarray2, "float8_regr_combine", 6); - transvalues1[0] = transvalues1[0] + transvalues2[0]; - transvalues1[1] = float8_pl(transvalues1[1], transvalues2[1]); - transvalues1[2] = float8_pl(transvalues1[2], transvalues2[2]); - transvalues1[3] = float8_pl(transvalues1[3], transvalues2[3]); - transvalues1[4] = float8_pl(transvalues1[4], transvalues2[4]); - transvalues1[5] = float8_pl(transvalues1[5], transvalues2[5]); + N1 = transvalues1[0]; + Mx1 = transvalues1[1]; + Sxx1 = transvalues1[2]; + My1 = transvalues1[3]; + Syy1 = transvalues1[4]; + Sxy1 = transvalues1[5]; + + N2 = transvalues2[0]; + Mx2 = transvalues2[1]; + Sxx2 = transvalues2[2]; + My2 = transvalues2[3]; + Syy2 = transvalues2[4]; + Sxy2 = transvalues2[5]; + + /*-------------------- + * The transition values combine using a generalization of Welford's + * algorithm as follows: + * + * N = N1 + N2 + * Mx = (N1 * Mx1 + N2 * Mx2) / N + * Sxx = Sxx1 + Sxx2 + N1 * N2 * (Mx1 - Mx2)^2 / N + * My = (N1 * My1 + N2 * My2) / N + * Syy = Syy1 + Syy2 + N1 * N2 * (My1 - My2)^2 / N + * Sxy = Sxy1 + Sxy2 + N1 * N2 * (Mx1 - Mx2) * (My1 - My2) / N + * + * It's worth handling the special cases N1 = 0 and N2 = 0 separately + * since those cases are trivial, and we then don't need to worry about + * division-by-zero errors in the general case. + *-------------------- + */ + if (N1 == 0.0) + { + N = N2; + Mx = Mx2; + Sxx = Sxx2; + My = My2; + Syy = Syy2; + Sxy = Sxy2; + } + else if (N2 == 0.0) + { + N = N1; + Mx = Mx1; + Sxx = Sxx1; + My = My1; + Syy = Syy1; + Sxy = Sxy1; + } + else + { + N = N1 + N2; + Mx = (N1 * Mx1 + N2 * Mx2) / N; + check_float8_val(Mx, isinf(Mx1) || isinf(Mx2), true); + Sxx = Sxx1 + Sxx2 + (N1 * N2 * (Mx1 - Mx2) * (Mx1 - Mx2)) / N; + check_float8_val(Sxx, isinf(Sxx1) || isinf(Sxx2), true); + My = (N1 * My1 + N2 * My2) / N; + check_float8_val(My, isinf(My1) || isinf(My2), true); + Syy = Syy1 + Syy2 + (N1 * N2 * (My1 - My2) * (My1 - My2)) / N; + check_float8_val(Syy, isinf(Syy1) || isinf(Syy2), true); + Sxy = Sxy1 + Sxy2 + (N1 * N2 * (Mx1 - Mx2) * (My1 - My2)) / N; + check_float8_val(Sxy, isinf(Sxy1) || isinf(Sxy2), true); + } + + transvalues1[0] = N; + transvalues1[1] = Mx; + transvalues1[2] = Sxx; + transvalues1[3] = My; + transvalues1[4] = Syy; + transvalues1[5] = Sxy; PG_RETURN_ARRAYTYPE_P(transarray1); } @@ -2797,27 +2979,21 @@ float8_regr_sxx(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumX2, - numerator; + Sxx; transvalues = check_float8_array(transarray, "float8_regr_sxx", 6); N = transvalues[0]; - sumX = transvalues[1]; - sumX2 = transvalues[2]; + Sxx = transvalues[2]; /* if N is 0 we should return NULL */ if (N < 1.0) PG_RETURN_NULL(); - numerator = N * sumX2 - sumX * sumX; - check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true); - - /* Watch out for roundoff error producing a negative numerator */ - if (numerator <= 0.0) + /* Watch out for roundoff error producing a negative result */ + if (Sxx <= 0.0) PG_RETURN_FLOAT8(0.0); - PG_RETURN_FLOAT8(numerator / N); + PG_RETURN_FLOAT8(Sxx); } Datum @@ -2826,27 +3002,21 @@ float8_regr_syy(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumY, - sumY2, - numerator; + Syy; transvalues = check_float8_array(transarray, "float8_regr_syy", 6); N = transvalues[0]; - sumY = transvalues[3]; - sumY2 = transvalues[4]; + Syy = transvalues[4]; /* if N is 0 we should return NULL */ if (N < 1.0) PG_RETURN_NULL(); - numerator = N * sumY2 - sumY * sumY; - check_float8_val(numerator, isinf(sumY2) || isinf(sumY), true); - - /* Watch out for roundoff error producing a negative numerator */ - if (numerator <= 0.0) + /* Watch out for roundoff error producing a negative result */ + if (Syy <= 0.0) PG_RETURN_FLOAT8(0.0); - PG_RETURN_FLOAT8(numerator / N); + PG_RETURN_FLOAT8(Syy); } Datum @@ -2855,28 +3025,19 @@ float8_regr_sxy(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumY, - sumXY, - numerator; + Sxy; transvalues = check_float8_array(transarray, "float8_regr_sxy", 6); N = transvalues[0]; - sumX = transvalues[1]; - sumY = transvalues[3]; - sumXY = transvalues[5]; + Sxy = transvalues[5]; /* if N is 0 we should return NULL */ if (N < 1.0) PG_RETURN_NULL(); - numerator = N * sumXY - sumX * sumY; - check_float8_val(numerator, isinf(sumXY) || isinf(sumX) || - isinf(sumY), true); - /* A negative result is valid here */ - PG_RETURN_FLOAT8(numerator / N); + PG_RETURN_FLOAT8(Sxy); } Datum @@ -2885,17 +3046,17 @@ float8_regr_avgx(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX; + Mx; transvalues = check_float8_array(transarray, "float8_regr_avgx", 6); N = transvalues[0]; - sumX = transvalues[1]; + Mx = transvalues[1]; /* if N is 0 we should return NULL */ if (N < 1.0) PG_RETURN_NULL(); - PG_RETURN_FLOAT8(sumX / N); + PG_RETURN_FLOAT8(Mx); } Datum @@ -2904,17 +3065,17 @@ float8_regr_avgy(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumY; + My; transvalues = check_float8_array(transarray, "float8_regr_avgy", 6); N = transvalues[0]; - sumY = transvalues[3]; + My = transvalues[3]; /* if N is 0 we should return NULL */ if (N < 1.0) PG_RETURN_NULL(); - PG_RETURN_FLOAT8(sumY / N); + PG_RETURN_FLOAT8(My); } Datum @@ -2923,26 +3084,17 @@ float8_covar_pop(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumY, - sumXY, - numerator; + Sxy; transvalues = check_float8_array(transarray, "float8_covar_pop", 6); N = transvalues[0]; - sumX = transvalues[1]; - sumY = transvalues[3]; - sumXY = transvalues[5]; + Sxy = transvalues[5]; /* if N is 0 we should return NULL */ if (N < 1.0) PG_RETURN_NULL(); - numerator = N * sumXY - sumX * sumY; - check_float8_val(numerator, isinf(sumXY) || isinf(sumX) || - isinf(sumY), true); - - PG_RETURN_FLOAT8(numerator / (N * N)); + PG_RETURN_FLOAT8(Sxy / N); } Datum @@ -2951,26 +3103,17 @@ float8_covar_samp(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumY, - sumXY, - numerator; + Sxy; transvalues = check_float8_array(transarray, "float8_covar_samp", 6); N = transvalues[0]; - sumX = transvalues[1]; - sumY = transvalues[3]; - sumXY = transvalues[5]; + Sxy = transvalues[5]; /* if N is <= 1 we should return NULL */ if (N < 2.0) PG_RETURN_NULL(); - numerator = N * sumXY - sumX * sumY; - check_float8_val(numerator, isinf(sumXY) || isinf(sumX) || - isinf(sumY), true); - - PG_RETURN_FLOAT8(numerator / (N * (N - 1.0))); + PG_RETURN_FLOAT8(Sxy / (N - 1.0)); } Datum @@ -2979,38 +3122,24 @@ float8_corr(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumX2, - sumY, - sumY2, - sumXY, - numeratorX, - numeratorY, - numeratorXY; + Sxx, + Syy, + Sxy; transvalues = check_float8_array(transarray, "float8_corr", 6); N = transvalues[0]; - sumX = transvalues[1]; - sumX2 = transvalues[2]; - sumY = transvalues[3]; - sumY2 = transvalues[4]; - sumXY = transvalues[5]; + Sxx = transvalues[2]; + Syy = transvalues[4]; + Sxy = transvalues[5]; /* if N is 0 we should return NULL */ if (N < 1.0) PG_RETURN_NULL(); - numeratorX = N * sumX2 - sumX * sumX; - check_float8_val(numeratorX, isinf(sumX2) || isinf(sumX), true); - numeratorY = N * sumY2 - sumY * sumY; - check_float8_val(numeratorY, isinf(sumY2) || isinf(sumY), true); - numeratorXY = N * sumXY - sumX * sumY; - check_float8_val(numeratorXY, isinf(sumXY) || isinf(sumX) || - isinf(sumY), true); - if (numeratorX <= 0 || numeratorY <= 0) + if (Sxx <= 0 || Syy <= 0) PG_RETURN_NULL(); - PG_RETURN_FLOAT8(numeratorXY / sqrt(numeratorX * numeratorY)); + PG_RETURN_FLOAT8(Sxy / sqrt(Sxx * Syy)); } Datum @@ -3019,42 +3148,27 @@ float8_regr_r2(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumX2, - sumY, - sumY2, - sumXY, - numeratorX, - numeratorY, - numeratorXY; + Sxx, + Syy, + Sxy; transvalues = check_float8_array(transarray, "float8_regr_r2", 6); N = transvalues[0]; - sumX = transvalues[1]; - sumX2 = transvalues[2]; - sumY = transvalues[3]; - sumY2 = transvalues[4]; - sumXY = transvalues[5]; + Sxx = transvalues[2]; + Syy = transvalues[4]; + Sxy = transvalues[5]; /* if N is 0 we should return NULL */ if (N < 1.0) PG_RETURN_NULL(); - numeratorX = N * sumX2 - sumX * sumX; - check_float8_val(numeratorX, isinf(sumX2) || isinf(sumX), true); - numeratorY = N * sumY2 - sumY * sumY; - check_float8_val(numeratorY, isinf(sumY2) || isinf(sumY), true); - numeratorXY = N * sumXY - sumX * sumY; - check_float8_val(numeratorXY, isinf(sumXY) || isinf(sumX) || - isinf(sumY), true); - if (numeratorX <= 0) + if (Sxx <= 0) PG_RETURN_NULL(); /* per spec, horizontal line produces 1.0 */ - if (numeratorY <= 0) + if (Syy <= 0) PG_RETURN_FLOAT8(1.0); - PG_RETURN_FLOAT8((numeratorXY * numeratorXY) / - (numeratorX * numeratorY)); + PG_RETURN_FLOAT8((Sxy * Sxy) / (Sxx * Syy)); } Datum @@ -3063,33 +3177,22 @@ float8_regr_slope(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumX2, - sumY, - sumXY, - numeratorX, - numeratorXY; + Sxx, + Sxy; transvalues = check_float8_array(transarray, "float8_regr_slope", 6); N = transvalues[0]; - sumX = transvalues[1]; - sumX2 = transvalues[2]; - sumY = transvalues[3]; - sumXY = transvalues[5]; + Sxx = transvalues[2]; + Sxy = transvalues[5]; /* if N is 0 we should return NULL */ if (N < 1.0) PG_RETURN_NULL(); - numeratorX = N * sumX2 - sumX * sumX; - check_float8_val(numeratorX, isinf(sumX2) || isinf(sumX), true); - numeratorXY = N * sumXY - sumX * sumY; - check_float8_val(numeratorXY, isinf(sumXY) || isinf(sumX) || - isinf(sumY), true); - if (numeratorX <= 0) + if (Sxx <= 0) PG_RETURN_NULL(); - PG_RETURN_FLOAT8(numeratorXY / numeratorX); + PG_RETURN_FLOAT8(Sxy / Sxx); } Datum @@ -3098,33 +3201,26 @@ float8_regr_intercept(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumX2, - sumY, - sumXY, - numeratorX, - numeratorXXY; + Mx, + Sxx, + My, + Sxy; transvalues = check_float8_array(transarray, "float8_regr_intercept", 6); N = transvalues[0]; - sumX = transvalues[1]; - sumX2 = transvalues[2]; - sumY = transvalues[3]; - sumXY = transvalues[5]; + Mx = transvalues[1]; + Sxx = transvalues[2]; + My = transvalues[3]; + Sxy = transvalues[5]; /* if N is 0 we should return NULL */ if (N < 1.0) PG_RETURN_NULL(); - numeratorX = N * sumX2 - sumX * sumX; - check_float8_val(numeratorX, isinf(sumX2) || isinf(sumX), true); - numeratorXXY = sumY * sumX2 - sumX * sumXY; - check_float8_val(numeratorXXY, isinf(sumY) || isinf(sumX2) || - isinf(sumX) || isinf(sumXY), true); - if (numeratorX <= 0) + if (Sxx <= 0) PG_RETURN_NULL(); - PG_RETURN_FLOAT8(numeratorXXY / numeratorX); + PG_RETURN_FLOAT8(My - Mx * Sxy / Sxx); }