From d1ee656886659f799638bdf1a11f2a05a76d9858 Mon Sep 17 00:00:00 2001 From: Alexey Grishchenko Date: Thu, 28 Jul 2016 13:34:49 +0100 Subject: [PATCH] Fix for PL/Python slow input arrays traversal issue Original implementation uses array_ref for each element of the array. Each call to array_ref causes subsequent call to array_seek. Function array_seek in turn has a shortcut for fixed-size arrays with no nulls. But if your array is not of fixed-size elements, or if it contains nulls, each call to array_seek would cause calculation of the Kth element offset starting from the first element. This results in O(N^2) processing time for arrays of non-fixed-size elements and arrays with nulls. For example, array of 100'000 ints takes 20ms to be processed, while if this array contains a single null the processing time raises to 7 seconds. The fix applies logic used at array_out function for efficient array traversal, keeping the pointer to the last fetched element's offset, which results in dramatical performance improvement for affected case. With this implementation, both arrays of fixed-size elements without nulls, fixed-size elements with nulls and variable-size elements are processed with the same speed. --- src/pl/plpython/plpy_typeio.c | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/src/pl/plpython/plpy_typeio.c b/src/pl/plpython/plpy_typeio.c index 7ad7a44..0e019ce 100644 --- a/src/pl/plpython/plpy_typeio.c +++ b/src/pl/plpython/plpy_typeio.c @@ -633,8 +633,10 @@ PLyList_FromArray(PLyDatumToOb *arg, Datum d) PLyDatumToOb *elm = arg->elm; PyObject *list; int length; - int lbound; int i; + char *dataptr; + bits8 *bitmap; + int bitmask; if (ARR_NDIM(array) == 0) return PyList_New(0); @@ -646,28 +648,42 @@ PLyList_FromArray(PLyDatumToOb *arg, Datum d) errdetail("PL/Python only supports one-dimensional arrays."))); length = ARR_DIMS(array)[0]; - lbound = ARR_LBOUND(array)[0]; list = PyList_New(length); if (list == NULL) PLy_elog(ERROR, "could not create new Python list"); + dataptr = ARR_DATA_PTR(array); + bitmap = ARR_NULLBITMAP(array); + bitmask = 1; + for (i = 0; i < length; i++) { - Datum elem; - bool isnull; - int offset; - - offset = lbound + i; - elem = array_ref(array, 1, &offset, arg->typlen, - elm->typlen, elm->typbyval, elm->typalign, - &isnull); - if (isnull) + /* Get source element, checking for NULL */ + if (bitmap && (*bitmap & bitmask) == 0) { Py_INCREF(Py_None); PyList_SET_ITEM(list, i, Py_None); } else - PyList_SET_ITEM(list, i, elm->func(elm, elem)); + { + Datum itemvalue; + + itemvalue = fetch_att(dataptr, elm->typbyval, elm->typlen); + PyList_SET_ITEM(list, i, elm->func(elm, itemvalue)); + dataptr = att_addlength_pointer(dataptr, elm->typlen, dataptr); + dataptr = (char *) att_align_nominal(dataptr, elm->typalign); + } + + /* advance bitmap pointer if any */ + if (bitmap) + { + bitmask <<= 1; + if (bitmask == 0x100 /* (1<<8) */) + { + bitmap++; + bitmask = 1; + } + } } return list; -- 2.7.4 (Apple Git-66)