array_accum aggregate - Mailing list pgsql-patches

From Stephen Frost
Subject array_accum aggregate
Date
Msg-id 20061011045116.GW24675@kenobi.snowman.net
Whole thread Raw
Responses Re: array_accum aggregate  (Neil Conway <neilc@samurai.com>)
List pgsql-patches
Greetings,

  Please find below a patch to add the array_accum aggregate as a
  built-in using two new C functions defined in array_userfuncs.c.
  These functions simply expose the pre-existing efficient array
  building routines used elsewhere in the backend (accumArrayResult
  and makeArrayResult, specifically).   An array_accum aggregate has
  existed in the documentation for quite some time using the
  inefficient (for larger arrays) array_append routine.  The
  documentation around the example has also been updated to reflect
  the addition of this built-in.

  Documentation and a regression test are also included.

      Thanks,

        Stephen

Index: doc/src/sgml/func.sgml
===================================================================
RCS file: /projects/cvsroot/pgsql/doc/src/sgml/func.sgml,v
retrieving revision 1.343
diff -c -r1.343 func.sgml
*** doc/src/sgml/func.sgml    1 Oct 2006 18:54:31 -0000    1.343
--- doc/src/sgml/func.sgml    11 Oct 2006 04:38:45 -0000
***************
*** 7851,7856 ****
--- 7851,7872 ----
       <row>
        <entry>
         <indexterm>
+         <primary>array_accum</primary>
+        </indexterm>
+        <function>array_accum(<replaceable class="parameter">anyelement</replaceable>)</function>
+       </entry>
+       <entry>
+        <type>anyelement</type>
+       </entry>
+       <entry>
+         array of elements of same type as argument type
+       </entry>
+       <entry>an array of all input elements (NULLs, non-nulls, and duplicates)</entry>
+      </row>
+
+      <row>
+       <entry>
+        <indexterm>
          <primary>average</primary>
         </indexterm>
         <function>avg(<replaceable class="parameter">expression</replaceable>)</function>
Index: doc/src/sgml/xaggr.sgml
===================================================================
RCS file: /projects/cvsroot/pgsql/doc/src/sgml/xaggr.sgml,v
retrieving revision 1.33
diff -c -r1.33 xaggr.sgml
*** doc/src/sgml/xaggr.sgml    16 Sep 2006 00:30:16 -0000    1.33
--- doc/src/sgml/xaggr.sgml    11 Oct 2006 04:38:45 -0000
***************
*** 132,138 ****
  </programlisting>

     Here, the actual state type for any aggregate call is the array type
!    having the actual input type as elements.
    </para>

    <para>
--- 132,141 ----
  </programlisting>

     Here, the actual state type for any aggregate call is the array type
!    having the actual input type as elements.  Note: array_accum() is now
!    a built-in aggregate which uses a much more efficient mechanism than
!    that which is provided by array_append, prior users of array_accum()
!    may be pleasantly suprised at the marked improvment for larger arrays.
    </para>

    <para>
Index: src/backend/utils/adt/array_userfuncs.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/array_userfuncs.c,v
retrieving revision 1.20
diff -c -r1.20 array_userfuncs.c
*** src/backend/utils/adt/array_userfuncs.c    14 Jul 2006 14:52:23 -0000    1.20
--- src/backend/utils/adt/array_userfuncs.c    11 Oct 2006 04:38:46 -0000
***************
*** 15,20 ****
--- 15,22 ----
  #include "utils/array.h"
  #include "utils/builtins.h"
  #include "utils/lsyscache.h"
+ #include "utils/memutils.h"
+ #include "nodes/execnodes.h"


  /*-----------------------------------------------------------------------------
***************
*** 399,404 ****
--- 401,516 ----
      PG_RETURN_ARRAYTYPE_P(result);
  }

+ /* Structure, used by aaccum_sfunc and aaccum_ffunc to
+  * implement the array_accum() aggregate, for storing
+  * pointers to the ArrayBuildState for the array we are
+  * building and the MemoryContext in which it is being
+  * built.  Note that this structure is
+  * considered an 'anyarray' externally, which is a
+  * variable-length datatype, and therefore
+  * must open with an int32 defining the length. */
+ typedef struct {
+     int32                 vl_len;
+     ArrayBuildState        *astate;
+     MemoryContext         arrctx;
+ } aaccum_info;
+
+ /*-----------------------------------------------------------------------------
+  * aaccum_sfunc :
+  *      State transistion function for the array_accum() aggregate,
+  *      efficiently builds an in-memory array by working in blocks and
+  *      minimizing realloc()'s and copying of the data in general.
+  *      Creates a seperate memory context attached to the AggContext into
+  *      which the array is built.  That context is free'd when the final
+  *      function is called (aaccum_ffunc).  accumArrayResult() does all
+  *      the heavy lifting here, this is really just a glue function.
+  *----------------------------------------------------------------------------
+  */
+ Datum
+ aaccum_sfunc(PG_FUNCTION_ARGS)
+ {
+     aaccum_info        *ainfo;
+     AggState        *aggstate;
+
+     /* Make sure we are being called in an aggregate. */
+     if (!fcinfo->context || !IsA(fcinfo->context, AggState))
+         ereport(ERROR,
+                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                  errmsg("Can not call aaccum_sfunc as a non-aggregate"),
+                  errhint("Use the array_accum aggregate")));
+
+     aggstate = (AggState*) fcinfo->context;
+
+     /* Initial call passes NULL in for our state variable.
+      * Allocate memory to store the pointers in and create
+      * our context. */
+     if (PG_ARGISNULL(0)) {
+         /* Allocate memory to hold the pointers to the ArrayBuildState
+          * and the MemoryContext where we are building the array.  Note
+          * that we can do this in the CurrentMemoryContext because when
+          * we return the storage "bytea" will be copied into the AggState
+          * context by the caller and passed back to us on the next call. */
+         ainfo = (aaccum_info*) palloc(sizeof(aaccum_info));
+         ainfo->vl_len = sizeof(aaccum_info);
+         ainfo->astate = NULL;
+
+         /* New context created which will store our array accumulation.
+          * The parent is the AggContext for this query since it needs to
+          * persist for the same timeframe as the state value.
+          * The state value holds the pointers to the ArrayBuildState and this
+          * MemoryContext through the aaccum_info structure. */
+         ainfo->arrctx = AllocSetContextCreate(aggstate->aggcontext, "ArrayAccumCtx",
+                                               ALLOCSET_DEFAULT_MINSIZE,
+                                               ALLOCSET_DEFAULT_INITSIZE,
+                                               ALLOCSET_DEFAULT_MAXSIZE);
+     } else {
+         /* Our state variable is non-null, therefore it must be an existing
+          * ainfo structure. */
+         ainfo = (aaccum_info*) PG_GETARG_BYTEA_P(0);
+     }
+
+     /* Pull the element to be added and pass it along with the ArrayBuildState
+      * and ArrayAccumCtx MemoryContext to accumArrayResult, checking if it is
+      * NULL or not. */
+     ainfo->astate = accumArrayResult(ainfo->astate,
+                                      PG_ARGISNULL(1) ? (Datum) 0 : PG_GETARG_DATUM(1),
+                                      PG_ARGISNULL(1),
+                                      get_fn_expr_argtype(fcinfo->flinfo, 1),
+                                      ainfo->arrctx);
+
+     /* Caller will copy storage into the AggContext after the first call and then
+      * should not touch it as we will always return the same pointer passed in. */
+     PG_RETURN_BYTEA_P(ainfo);
+ }
+
+ /*-----------------------------------------------------------------------------
+  * aaccum_ffunc :
+  *      Final function for the array_accum() aggregate, creates the final
+  *      finished array and passes it back to the user.  Also deletes the
+  *      memory context created by the aaccum_sfunc().  makeArrayResult()
+  *      does all the heavy lifting here, this is really just a glue function.
+  *----------------------------------------------------------------------------
+  */
+ Datum
+ aaccum_ffunc(PG_FUNCTION_ARGS)
+ {
+     aaccum_info        *ainfo;
+
+     /* Check if we are passed in a NULL */
+     if (PG_ARGISNULL(0)) PG_RETURN_ARRAYTYPE_P(NULL);
+
+     /* Make sure we are being called in an aggregate. */
+     if (!fcinfo->context || !IsA(fcinfo->context, AggState))
+         ereport(ERROR,
+                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                  errmsg("Can not call aaccum_sfunc as a non-aggregate"),
+                  errhint("Use the array_accum aggregate")));
+
+     ainfo = (aaccum_info*) PG_GETARG_BYTEA_P(0);
+
+     /* makeArrayResult will delete ainfo->arrctx for us. */
+     PG_RETURN_ARRAYTYPE_P(makeArrayResult(ainfo->astate, ainfo->arrctx));
+ }

  /*
   * used by text_to_array() in varlena.c
Index: src/include/catalog/pg_aggregate.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/catalog/pg_aggregate.h,v
retrieving revision 1.58
diff -c -r1.58 pg_aggregate.h
*** src/include/catalog/pg_aggregate.h    4 Oct 2006 00:30:07 -0000    1.58
--- src/include/catalog/pg_aggregate.h    11 Oct 2006 04:38:46 -0000
***************
*** 221,226 ****
--- 221,229 ----
  DATA(insert ( 2242 bitand          -                    0    1560    _null_ ));
  DATA(insert ( 2243 bitor          -                    0    1560    _null_ ));

+ /* array accumulation */
+ DATA(insert ( 322    aaccum_sfunc          aaccum_ffunc            0    2277    _null_ ));
+
  /*
   * prototypes for functions in pg_aggregate.c
   */
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.427
diff -c -r1.427 pg_proc.h
*** src/include/catalog/pg_proc.h    4 Oct 2006 00:30:07 -0000    1.427
--- src/include/catalog/pg_proc.h    11 Oct 2006 04:38:47 -0000
***************
*** 1017,1022 ****
--- 1017,1026 ----
  DESCR("larger of two");
  DATA(insert OID = 516 (  array_smaller       PGNSP PGUID 12 f f t f i 2 2277 "2277 2277" _null_ _null_ _null_
array_smaller- _null_ )); 
  DESCR("smaller of two");
+ DATA(insert OID = 320 (  aaccum_sfunc       PGNSP PGUID 12 f f f f i 2 2277 "2277 2283" _null_ _null_ _null_
aaccum_sfunc- _null_ )); 
+ DESCR("array_accum aggregate state function");
+ DATA(insert OID = 321 (  aaccum_ffunc       PGNSP PGUID 12 f f f f i 1 2277 "2277" _null_ _null_ _null_ aaccum_ffunc
-_null_ )); 
+ DESCR("array_accum aggregate final function");

  DATA(insert OID = 760 (  smgrin               PGNSP PGUID 12 f f t f s 1 210 "2275" _null_ _null_ _null_  smgrin -
_null_)); 
  DESCR("I/O");
***************
*** 3252,3257 ****
--- 3256,3263 ----
  DATA(insert OID = 2828 (  covar_samp        PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_
aggregate_dummy- _null_ )); 
  DATA(insert OID = 2829 (  corr                PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_
aggregate_dummy- _null_ )); 

+ DATA(insert OID = 322 (  array_accum        PGNSP PGUID 12 t f f f i 1 2277 "2283" _null_ _null_ _null_
aggregate_dummy- _null_ )); 
+
  DATA(insert OID = 2160 ( text_pattern_lt     PGNSP PGUID 12 f f t f i 2 16 "25 25" _null_ _null_ _null_
text_pattern_lt- _null_ )); 
  DATA(insert OID = 2161 ( text_pattern_le     PGNSP PGUID 12 f f t f i 2 16 "25 25" _null_ _null_ _null_
text_pattern_le- _null_ )); 
  DATA(insert OID = 2162 ( text_pattern_eq     PGNSP PGUID 12 f f t f i 2 16 "25 25" _null_ _null_ _null_
text_pattern_eq- _null_ )); 
Index: src/include/utils/array.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/utils/array.h,v
retrieving revision 1.59
diff -c -r1.59 array.h
*** src/include/utils/array.h    10 Sep 2006 20:14:20 -0000    1.59
--- src/include/utils/array.h    11 Oct 2006 04:38:47 -0000
***************
*** 266,271 ****
--- 266,274 ----
   */
  extern Datum array_push(PG_FUNCTION_ARGS);
  extern Datum array_cat(PG_FUNCTION_ARGS);
+ extern Datum aaccum_sfunc(PG_FUNCTION_ARGS);
+ extern Datum aaccum_ffunc(PG_FUNCTION_ARGS);
+

  extern ArrayType *create_singleton_array(FunctionCallInfo fcinfo,
                         Oid element_type,
Index: src/test/regress/expected/aggregates.out
===================================================================
RCS file: /projects/cvsroot/pgsql/src/test/regress/expected/aggregates.out,v
retrieving revision 1.15
diff -c -r1.15 aggregates.out
*** src/test/regress/expected/aggregates.out    28 Jul 2006 18:33:04 -0000    1.15
--- src/test/regress/expected/aggregates.out    11 Oct 2006 04:38:47 -0000
***************
*** 236,241 ****
--- 236,248 ----
     9 |   100 |   4
  (10 rows)

+ -- array accumulation aggregate
+ SELECT array_accum(generate_series) from generate_series(0,5);
+   array_accum
+ ---------------
+  {0,1,2,3,4,5}
+ (1 row)
+
  -- user-defined aggregates
  SELECT newavg(four) AS avg_1 FROM onek;
         avg_1
Index: src/test/regress/sql/aggregates.sql
===================================================================
RCS file: /projects/cvsroot/pgsql/src/test/regress/sql/aggregates.sql,v
retrieving revision 1.13
diff -c -r1.13 aggregates.sql
*** src/test/regress/sql/aggregates.sql    28 Jul 2006 18:33:04 -0000    1.13
--- src/test/regress/sql/aggregates.sql    11 Oct 2006 04:38:48 -0000
***************
*** 59,64 ****
--- 59,67 ----
  select ten, count(four), sum(DISTINCT four) from onek
  group by ten order by ten;

+ -- array accumulation aggregate
+ SELECT array_accum(generate_series) from generate_series(0,5);
+
  -- user-defined aggregates
  SELECT newavg(four) AS avg_1 FROM onek;
  SELECT newsum(four) AS sum_1500 FROM onek;

pgsql-patches by date:

Previous
From: Michael Glaesemann
Date:
Subject: Constraint trigger doc patch
Next
From: "Hiroshi Saito"
Date:
Subject: warning: "min" redefined of qsort.