Re: WITHIN GROUP patch - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Re: WITHIN GROUP patch |
Date | |
Msg-id | 24114.1389149362@sss.pgh.pa.us Whole thread Raw |
In response to | Re: WITHIN GROUP patch (Tom Lane <tgl@sss.pgh.pa.us>) |
List | pgsql-hackers |
I wrote: > There's probably some overhead from traversing advance_transition_function > for each row, which your version wouldn't have done. 15% sounds pretty > high for that though, since advance_transition_function doesn't have much > to do when the transition function is non strict and the state value is > pass-by-value (which "internal" is). It's possible that we could do > something to micro-optimize that case. The most obvious micro-optimization that's possible there is to avoid redoing InitFunctionCallInfoData for each row. I tried this as in the attached patch. It seems to be good for about half a percent overall savings on your example case. That's not much, but then again this helps for *all* aggregates, and many of them are cheaper than ordered aggregates. I see about 2% overall savings on select count(*) from generate_series(1,1000000); which seems like a more interesting number. As against that, the roughly-kilobyte-sized FunctionCallInfoData is no longer just transient data on the stack, but persists for the lifespan of the query. We pay that already for plain FuncExpr and OpExpr nodes, though. On balance, I'm inclined to apply this; the performance benefit may be marginal but it seems like it makes advance_transition_function's API a tad cleaner by reducing the number of distinct structs it's hacking on. Comments? regards, tom lane diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 827b009..0dafb51 100644 *** a/src/backend/executor/nodeAgg.c --- b/src/backend/executor/nodeAgg.c *************** typedef struct AggStatePerAggData *** 235,240 **** --- 235,248 ---- */ Tuplesortstate *sortstate; /* sort object, if DISTINCT or ORDER BY */ + + /* + * This field is a pre-initialized FunctionCallInfo struct used for + * calling this aggregate's transfn. We save a few cycles per row by not + * re-initializing the unchanging fields; which isn't much, but it seems + * worth the extra space consumption. + */ + FunctionCallInfoData transfn_fcinfo; } AggStatePerAggData; /* *************** static void initialize_aggregates(AggSta *** 290,297 **** AggStatePerGroup pergroup); static void advance_transition_function(AggState *aggstate, AggStatePerAgg peraggstate, ! AggStatePerGroup pergroupstate, ! FunctionCallInfoData *fcinfo); static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup); static void process_ordered_aggregate_single(AggState *aggstate, AggStatePerAgg peraggstate, --- 298,304 ---- AggStatePerGroup pergroup); static void advance_transition_function(AggState *aggstate, AggStatePerAgg peraggstate, ! AggStatePerGroup pergroupstate); static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup); static void process_ordered_aggregate_single(AggState *aggstate, AggStatePerAgg peraggstate, *************** initialize_aggregates(AggState *aggstate *** 399,419 **** * Given new input value(s), advance the transition function of an aggregate. * * The new values (and null flags) have been preloaded into argument positions ! * 1 and up in fcinfo, so that we needn't copy them again to pass to the ! * transition function. No other fields of fcinfo are assumed valid. * * It doesn't matter which memory context this is called in. */ static void advance_transition_function(AggState *aggstate, AggStatePerAgg peraggstate, ! AggStatePerGroup pergroupstate, ! FunctionCallInfoData *fcinfo) { ! int numTransInputs = peraggstate->numTransInputs; MemoryContext oldContext; Datum newVal; - int i; if (peraggstate->transfn.fn_strict) { --- 406,425 ---- * Given new input value(s), advance the transition function of an aggregate. * * The new values (and null flags) have been preloaded into argument positions ! * 1 and up in peraggstate->transfn_fcinfo, so that we needn't copy them again ! * to pass to the transition function. We also expect that the static fields ! * of the fcinfo are already initialized; that was done by ExecInitAgg(). * * It doesn't matter which memory context this is called in. */ static void advance_transition_function(AggState *aggstate, AggStatePerAgg peraggstate, ! AggStatePerGroup pergroupstate) { ! FunctionCallInfoData *fcinfo = &peraggstate->transfn_fcinfo; MemoryContext oldContext; Datum newVal; if (peraggstate->transfn.fn_strict) { *************** advance_transition_function(AggState *ag *** 421,426 **** --- 427,435 ---- * For a strict transfn, nothing happens when there's a NULL input; we * just keep the prior transValue. */ + int numTransInputs = peraggstate->numTransInputs; + int i; + for (i = 1; i <= numTransInputs; i++) { if (fcinfo->argnull[i]) *************** advance_transition_function(AggState *ag *** 467,478 **** /* * OK to call the transition function */ - InitFunctionCallInfoData(*fcinfo, &(peraggstate->transfn), - numTransInputs + 1, - peraggstate->aggCollation, - (void *) aggstate, NULL); fcinfo->arg[0] = pergroupstate->transValue; fcinfo->argnull[0] = pergroupstate->transValueIsNull; newVal = FunctionCallInvoke(fcinfo); --- 476,484 ---- /* * OK to call the transition function */ fcinfo->arg[0] = pergroupstate->transValue; fcinfo->argnull[0] = pergroupstate->transValueIsNull; + fcinfo->isnull = false; /* just in case transfn doesn't set it */ newVal = FunctionCallInvoke(fcinfo); *************** advance_aggregates(AggState *aggstate, A *** 574,592 **** else { /* We can apply the transition function immediately */ ! FunctionCallInfoData fcinfo; /* Load values into fcinfo */ /* Start from 1, since the 0th arg will be the transition value */ Assert(slot->tts_nvalid >= numTransInputs); for (i = 0; i < numTransInputs; i++) { ! fcinfo.arg[i + 1] = slot->tts_values[i]; ! fcinfo.argnull[i + 1] = slot->tts_isnull[i]; } ! advance_transition_function(aggstate, peraggstate, pergroupstate, ! &fcinfo); } } } --- 580,597 ---- else { /* We can apply the transition function immediately */ ! FunctionCallInfoData *fcinfo = &peraggstate->transfn_fcinfo; /* Load values into fcinfo */ /* Start from 1, since the 0th arg will be the transition value */ Assert(slot->tts_nvalid >= numTransInputs); for (i = 0; i < numTransInputs; i++) { ! fcinfo->arg[i + 1] = slot->tts_values[i]; ! fcinfo->argnull[i + 1] = slot->tts_isnull[i]; } ! advance_transition_function(aggstate, peraggstate, pergroupstate); } } } *************** process_ordered_aggregate_single(AggStat *** 622,638 **** MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory; MemoryContext oldContext; bool isDistinct = (peraggstate->numDistinctCols > 0); Datum *newVal; bool *isNull; - FunctionCallInfoData fcinfo; Assert(peraggstate->numDistinctCols < 2); tuplesort_performsort(peraggstate->sortstate); /* Load the column into argument 1 (arg 0 will be transition value) */ ! newVal = fcinfo.arg + 1; ! isNull = fcinfo.argnull + 1; /* * Note: if input type is pass-by-ref, the datums returned by the sort are --- 627,643 ---- MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory; MemoryContext oldContext; bool isDistinct = (peraggstate->numDistinctCols > 0); + FunctionCallInfoData *fcinfo = &peraggstate->transfn_fcinfo; Datum *newVal; bool *isNull; Assert(peraggstate->numDistinctCols < 2); tuplesort_performsort(peraggstate->sortstate); /* Load the column into argument 1 (arg 0 will be transition value) */ ! newVal = fcinfo->arg + 1; ! isNull = fcinfo->argnull + 1; /* * Note: if input type is pass-by-ref, the datums returned by the sort are *************** process_ordered_aggregate_single(AggStat *** 668,675 **** } else { ! advance_transition_function(aggstate, peraggstate, pergroupstate, ! &fcinfo); /* forget the old value, if any */ if (!oldIsNull && !peraggstate->inputtypeByVal) pfree(DatumGetPointer(oldVal)); --- 673,679 ---- } else { ! advance_transition_function(aggstate, peraggstate, pergroupstate); /* forget the old value, if any */ if (!oldIsNull && !peraggstate->inputtypeByVal) pfree(DatumGetPointer(oldVal)); *************** process_ordered_aggregate_multi(AggState *** 704,710 **** AggStatePerGroup pergroupstate) { MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory; ! FunctionCallInfoData fcinfo; TupleTableSlot *slot1 = peraggstate->evalslot; TupleTableSlot *slot2 = peraggstate->uniqslot; int numTransInputs = peraggstate->numTransInputs; --- 708,714 ---- AggStatePerGroup pergroupstate) { MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory; ! FunctionCallInfoData *fcinfo = &peraggstate->transfn_fcinfo; TupleTableSlot *slot1 = peraggstate->evalslot; TupleTableSlot *slot2 = peraggstate->uniqslot; int numTransInputs = peraggstate->numTransInputs; *************** process_ordered_aggregate_multi(AggState *** 739,750 **** /* Start from 1, since the 0th arg will be the transition value */ for (i = 0; i < numTransInputs; i++) { ! fcinfo.arg[i + 1] = slot1->tts_values[i]; ! fcinfo.argnull[i + 1] = slot1->tts_isnull[i]; } ! advance_transition_function(aggstate, peraggstate, pergroupstate, ! &fcinfo); if (numDistinctCols > 0) { --- 743,753 ---- /* Start from 1, since the 0th arg will be the transition value */ for (i = 0; i < numTransInputs; i++) { ! fcinfo->arg[i + 1] = slot1->tts_values[i]; ! fcinfo->argnull[i + 1] = slot1->tts_isnull[i]; } ! advance_transition_function(aggstate, peraggstate, pergroupstate); if (numDistinctCols > 0) { *************** ExecInitAgg(Agg *node, EState *estate, i *** 1799,1804 **** --- 1802,1808 ---- &transfnexpr, &finalfnexpr); + /* set up infrastructure for calling the transfn and finalfn */ fmgr_info(transfn_oid, &peraggstate->transfn); fmgr_info_set_expr((Node *) transfnexpr, &peraggstate->transfn); *************** ExecInitAgg(Agg *node, EState *estate, i *** 1810,1815 **** --- 1814,1826 ---- peraggstate->aggCollation = aggref->inputcollid; + InitFunctionCallInfoData(peraggstate->transfn_fcinfo, + &peraggstate->transfn, + peraggstate->numTransInputs + 1, + peraggstate->aggCollation, + (void *) aggstate, NULL); + + /* get info about relevant datatypes */ get_typlenbyval(aggref->aggtype, &peraggstate->resulttypeLen, &peraggstate->resulttypeByVal);
pgsql-hackers by date: