From cd760f5d333a2f1c7b871f05d4a231c0381c660d Mon Sep 17 00:00:00 2001 From: James Coleman Date: Mon, 27 Apr 2020 08:51:28 -0400 Subject: [PATCH v3 2/3] Try using dynahash --- src/backend/executor/execExpr.c | 29 ++-- src/backend/executor/execExprInterp.c | 197 ++++++++++++---------- src/include/executor/execExpr.h | 7 +- src/test/regress/expected/expressions.out | 7 + src/test/regress/sql/expressions.sql | 2 + 5 files changed, 138 insertions(+), 104 deletions(-) diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index c202cc7e89..6249db5426 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -31,6 +31,7 @@ #include "postgres.h" #include "access/nbtree.h" +#include "access/hash.h" #include "catalog/objectaccess.h" #include "catalog/pg_type.h" #include "executor/execExpr.h" @@ -949,10 +950,13 @@ ExecInitExprRec(Expr *node, ExprState *state, { ScalarArrayOpExpr *opexpr = (ScalarArrayOpExpr *) node; Oid func; + Oid hash_func; Expr *scalararg; Expr *arrayarg; FmgrInfo *finfo; FunctionCallInfo fcinfo; + FmgrInfo *hash_finfo; + FunctionCallInfo hash_fcinfo; AclResult aclresult; bool useBinarySearch = false; @@ -983,11 +987,6 @@ ExecInitExprRec(Expr *node, ExprState *state, { Datum arrdatum = ((Const *) arrayarg)->constvalue; ArrayType *arr = (ArrayType *) DatumGetPointer(arrdatum); - Oid orderingOp; - Oid orderingFunc; - Oid opfamily; - Oid opcintype; - int16 strategy; int nitems; /* @@ -998,21 +997,24 @@ ExecInitExprRec(Expr *node, ExprState *state, if (nitems >= MIN_ARRAY_SIZE_FOR_BINARY_SEARCH) { /* - * Find the ordering op that matches the originally + * Find the hash op that matches the originally * planned equality op. */ - orderingOp = get_ordering_op_for_equality_op(opexpr->opno, NULL); - get_ordering_op_properties(orderingOp, &opfamily, &opcintype, &strategy); - orderingFunc = get_opfamily_proc(opfamily, opcintype, opcintype, BTORDER_PROC); + useBinarySearch = get_op_hash_functions(opexpr->opno, NULL, &hash_func); /* * But we may not have one, so fall back to the * default implementation if necessary. */ - if (OidIsValid(orderingFunc)) + if (useBinarySearch) { - useBinarySearch = true; - func = orderingFunc; + hash_finfo = palloc0(sizeof(FmgrInfo)); + hash_fcinfo = palloc0(SizeForFunctionCallInfo(2)); + fmgr_info(hash_func, hash_finfo); + fmgr_info_set_expr((Node *) node, hash_finfo); + InitFunctionCallInfoData(*hash_fcinfo, hash_finfo, 2, + opexpr->inputcollid, NULL, NULL); + InvokeFunctionExecuteHook(hash_func); } } } @@ -1042,6 +1044,9 @@ ExecInitExprRec(Expr *node, ExprState *state, scratch.d.scalararraybinsearchop.finfo = finfo; scratch.d.scalararraybinsearchop.fcinfo_data = fcinfo; scratch.d.scalararraybinsearchop.fn_addr = finfo->fn_addr; + scratch.d.scalararraybinsearchop.hash_finfo = hash_finfo; + scratch.d.scalararraybinsearchop.hash_fcinfo_data = hash_fcinfo; + scratch.d.scalararraybinsearchop.hash_fn_addr = hash_finfo->fn_addr; } else { diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 5bebafbf0c..e54e807c6b 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -178,8 +178,6 @@ ExecAggPlainTransByRef(AggState *aggstate, AggStatePerTrans pertrans, AggStatePerGroup pergroup, ExprContext *aggcontext, int setno); -static int compare_array_elements(const void *a, const void *b, void *arg); - /* * Prepare ExprState for interpreted execution. */ @@ -3593,6 +3591,51 @@ ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op) *op->resnull = resultnull; } +static ExprEvalStep *current_saop_op; + +/* + * Hash function for elements. + * + * We use the element type's default hash opclass, and the column collation + * if the type is collation-sensitive. + */ +/* XXX: Name function to be specific to saop binsearch? */ +static uint32 +element_hash(const void *key, Size keysize) +{ + Datum hash; + FunctionCallInfo fcinfo = current_saop_op->d.scalararraybinsearchop.hash_fcinfo_data; + + fcinfo->args[0].value = *((const Datum *) key); + fcinfo->args[0].isnull = false; + + /* The keysize parameter is superfluous here */ + hash = current_saop_op->d.scalararraybinsearchop.hash_fn_addr(fcinfo); + + return DatumGetUInt32(hash); +} + +/* + * Matching function for elements, to be used in hashtable lookups. + */ +/* XXX: Name function to be specific to saop binsearch? */ +static int +element_match(const void *key1, const void *key2, Size keysize) +{ + Datum result; + FunctionCallInfo fcinfo = current_saop_op->d.scalararraybinsearchop.fcinfo_data; + + fcinfo->args[0].value = *((const Datum *) key1); + fcinfo->args[0].isnull = false; + fcinfo->args[1].value = *((const Datum *) key2); + fcinfo->args[1].isnull = false; + + /* The keysize parameter is superfluous here */ + result = current_saop_op->d.scalararraybinsearchop.fn_addr(fcinfo); + + return DatumGetBool(result) ? 0 : 1; +} + /* * Evaluate "scalar op ANY (const array)". * @@ -3613,16 +3656,19 @@ ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext * FunctionCallInfo fcinfo = op->d.scalararraybinsearchop.fcinfo_data; bool strictfunc = op->d.scalararraybinsearchop.finfo->fn_strict; ArrayType *arr; + Datum scalar = fcinfo->args[0].value; Datum result; bool resultnull; - bool *elem_nulls; - int l = 0, - r, - res; + bool hashfound; + int res; + + /* If we're only executing once, do we need a way to fall back to the regular loop? */ /* We don't setup a binary search op if the array const is null. */ Assert(!*op->resnull); + current_saop_op = op; + /* * If the scalar is NULL, and the function is strict, return NULL; no * point in executing the search. @@ -3634,104 +3680,88 @@ ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext * } /* Preprocess the array the first time we execute the op. */ - if (op->d.scalararraybinsearchop.elem_values == NULL) + if (op->d.scalararraybinsearchop.elements_tab == NULL) { - /* Cache the original lhs so we can scribble on it. */ - Datum scalar = fcinfo->args[0].value; - bool scalar_isnull = fcinfo->args[0].isnull; - int num_nonnulls = 0; - MemoryContext old_cxt; - MemoryContext array_cxt; int16 typlen; bool typbyval; char typalign; + HASHCTL elem_hash_ctl; + int nitems; + int num_nulls = 0; + char *s; + bits8 *bitmap; + int bitmask; arr = DatumGetArrayTypeP(*op->resvalue); + nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); get_typlenbyvalalign(ARR_ELEMTYPE(arr), &typlen, &typbyval, &typalign); - array_cxt = AllocSetContextCreate( - econtext->ecxt_per_query_memory, - "scalararraybinsearchop context", - ALLOCSET_SMALL_SIZES); - old_cxt = MemoryContextSwitchTo(array_cxt); + MemSet(&elem_hash_ctl, 0, sizeof(elem_hash_ctl)); + elem_hash_ctl.keysize = sizeof(Datum); + elem_hash_ctl.entrysize = sizeof(Datum); + elem_hash_ctl.hash = element_hash; + elem_hash_ctl.match = element_match; + elem_hash_ctl.hcxt = econtext->ecxt_per_query_memory; + op->d.scalararraybinsearchop.elements_tab = hash_create("Scalar array op expr elements", + nitems, + &elem_hash_ctl, + HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT); + + s = (char *) ARR_DATA_PTR(arr); + bitmap = ARR_NULLBITMAP(arr); + bitmask = 1; + for (int i = 0; i < nitems; i++) + { + Datum element; + + /* Get array element, checking for NULL. */ + if (bitmap && (*bitmap & bitmask) == 0) + { + num_nulls++; + } + else + { + element = fetch_att(s, typbyval, typlen); + s = att_addlength_pointer(s, typlen, s); + s = (char *) att_align_nominal(s, typalign); - deconstruct_array(arr, - ARR_ELEMTYPE(arr), - typlen, typbyval, typalign, - &op->d.scalararraybinsearchop.elem_values, &elem_nulls, &op->d.scalararraybinsearchop.num_elems); + hash_search(op->d.scalararraybinsearchop.elements_tab, (const void *) &element, HASH_ENTER, NULL); + } - /* Remove null entries from the array. */ - for (int j = 0; j < op->d.scalararraybinsearchop.num_elems; j++) - { - if (!elem_nulls[j]) - op->d.scalararraybinsearchop.elem_values[num_nonnulls++] = op->d.scalararraybinsearchop.elem_values[j]; + /* Advance bitmap pointer if any. */ + if (bitmap) + { + bitmask <<= 1; + if (bitmask == 0x100) + { + bitmap++; + bitmask = 1; + } + } } /* * Remember if we had any nulls so that we know if we need to execute * non-strict functions with a null lhs value if no match is found. */ - op->d.scalararraybinsearchop.has_nulls = num_nonnulls < op->d.scalararraybinsearchop.num_elems; - op->d.scalararraybinsearchop.num_elems = num_nonnulls; + op->d.scalararraybinsearchop.has_nulls = num_nulls > 0; /* - * Setup the fcinfo for sorting. We've removed nulls, so both lhs and - * rhs values are guaranteed to be non-null. + * We only setup a binary search op if we have > 8 elements, so we don't + * need to worry about adding an optimization for the empty array case. */ - fcinfo->args[0].isnull = false; - fcinfo->args[1].isnull = false; - - /* Sort the array and remove duplicate elements. */ - qsort_arg(op->d.scalararraybinsearchop.elem_values, op->d.scalararraybinsearchop.num_elems, sizeof(Datum), - compare_array_elements, op); - op->d.scalararraybinsearchop.num_elems = qunique_arg(op->d.scalararraybinsearchop.elem_values, op->d.scalararraybinsearchop.num_elems, sizeof(Datum), - compare_array_elements, op); - - /* Restore the lhs value after we scribbed on it for sorting. */ - fcinfo->args[0].value = scalar; - fcinfo->args[0].isnull = scalar_isnull; - - MemoryContextSwitchTo(old_cxt); + Assert(nitems > 0); } - /* - * We only setup a binary search op if we have > 8 elements, so we don't - * need to worry about adding an optimization for the empty array case. - */ - Assert(!(op->d.scalararraybinsearchop.num_elems <= 0 && !op->d.scalararraybinsearchop.has_nulls)); - - /* Assume no match will be found until proven otherwise. */ - result = BoolGetDatum(false); + /* Check the hash to see if we have a match. */ + hash_search(op->d.scalararraybinsearchop.elements_tab, (const void *) &scalar, HASH_FIND, &hashfound); + result = BoolGetDatum(hashfound); resultnull = false; - /* Binary search through the array. */ - r = op->d.scalararraybinsearchop.num_elems - 1; - while (l <= r) - { - int i = (l + r) / 2; - - fcinfo->args[1].value = op->d.scalararraybinsearchop.elem_values[i]; - - /* Call comparison function */ - fcinfo->isnull = false; - res = DatumGetInt32(op->d.scalararraybinsearchop.fn_addr(fcinfo)); - - if (res == 0) - { - result = BoolGetDatum(true); - resultnull = false; - break; - } - else if (res > 0) - l = i + 1; - else - r = i - 1; - } - /* * If we didn't find a match in the array, we still might need to handle * the possibility of null values (we've previously removed them from the @@ -3761,19 +3791,6 @@ ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext * *op->resnull = resultnull; } -/* XXX: Name function to be specific to saop binsearch? */ -static int -compare_array_elements(const void *a, const void *b, void *arg) -{ - ExprEvalStep *op = (ExprEvalStep *) arg; - FunctionCallInfo fcinfo = op->d.scalararraybinsearchop.fcinfo_data; - - fcinfo->args[0].value = *((const Datum *) a); - fcinfo->args[1].value = *((const Datum *) b); - - return DatumGetInt32(op->d.scalararraybinsearchop.fn_addr(fcinfo)); -} - /* * Evaluate a NOT NULL domain constraint. */ diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h index ac4478d060..2e93b1f990 100644 --- a/src/include/executor/execExpr.h +++ b/src/include/executor/execExpr.h @@ -554,13 +554,16 @@ typedef struct ExprEvalStep /* for EEOP_SCALARARRAYOP_BINSEARCH */ struct { - int num_elems; bool has_nulls; - Datum *elem_values; + HTAB *elements_tab; FmgrInfo *finfo; /* function's lookup data */ FunctionCallInfo fcinfo_data; /* arguments etc */ /* faster to access without additional indirection: */ PGFunction fn_addr; /* actual call address */ + FmgrInfo *hash_finfo; /* function's lookup data */ + FunctionCallInfo hash_fcinfo_data; /* arguments etc */ + /* faster to access without additional indirection: */ + PGFunction hash_fn_addr; /* actual call address */ } scalararraybinsearchop; /* for EEOP_XMLEXPR */ diff --git a/src/test/regress/expected/expressions.out b/src/test/regress/expected/expressions.out index 55b57b9c59..f37dfe1ce2 100644 --- a/src/test/regress/expected/expressions.out +++ b/src/test/regress/expected/expressions.out @@ -197,3 +197,10 @@ select null::int in (10, 9, 2, 8, 3, 7, 4, 6, 5, null); (1 row) +select 'a' in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'); + ?column? +---------- + t +(1 row) + +-- TODO: test non-strict op? diff --git a/src/test/regress/sql/expressions.sql b/src/test/regress/sql/expressions.sql index 3cb850d838..c30fe66c5e 100644 --- a/src/test/regress/sql/expressions.sql +++ b/src/test/regress/sql/expressions.sql @@ -76,3 +76,5 @@ select 1 in (null, null, null, null, null, null, null, null, null, null, null); select 1 in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1, null); select null::int in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1); select null::int in (10, 9, 2, 8, 3, 7, 4, 6, 5, null); +select 'a' in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'); +-- TODO: test non-strict op? -- 2.17.1