Re: Make COPY format extendable: Extract COPY TO format implementations - Mailing list pgsql-hackers
| From | Sutou Kouhei |
|---|---|
| Subject | Re: Make COPY format extendable: Extract COPY TO format implementations |
| Date | |
| Msg-id | 20240215.153421.96888103784986803.kou@clear-code.com Whole thread Raw |
| In response to | Re: Make COPY format extendable: Extract COPY TO format implementations (Michael Paquier <michael@paquier.xyz>) |
| Responses |
Re: Make COPY format extendable: Extract COPY TO format implementations
Re: Make COPY format extendable: Extract COPY TO format implementations |
| List | pgsql-hackers |
Hi,
In <ZcxjNDtqNLvdz0f5@paquier.xyz>
"Re: Make COPY format extendable: Extract COPY TO format implementations" on Wed, 14 Feb 2024 15:52:36 +0900,
Michael Paquier <michael@paquier.xyz> wrote:
>> How about InputFunctionCallSafeWithInfo(),
>> InputFunctionCallSafeInfo() or
>> InputFunctionCallInfoCallSafe()?
>
> WithInfo() would not be a new thing. There are a couple of APIs named
> like this when manipulating catalogs, so that sounds kind of a good
> choice from here.
Thanks for the info. Let's use InputFunctionCallSafeWithInfo().
See that attached patch:
v2-0001-Reuse-fcinfo-used-in-COPY-FROM.patch
I also attach a patch for COPY TO:
v1-0001-Reuse-fcinfo-used-in-COPY-TO.patch
I measured the COPY TO patch on my environment with:
COPY (SELECT
1::int2,2::int2,3::int2,4::int2,5::int2,6::int2,7::int2,8::int2,9::int2,10::int2,11::int2,12::int2,13::int2,14::int2,15::int2,16::int2,17::int2,18::int2,19::int2,20::int2,
generate_series(1,1000000::int4)) TO '/dev/null' \watch c=5
master:
740.066ms
734.884ms
738.579ms
734.170ms
727.953ms
patched:
730.714ms
741.483ms
714.149ms
715.436ms
713.578ms
It seems that it improves performance a bit but my
environment isn't suitable for benchmark. So they may not
be valid numbers.
Thanks,
--
kou
From b677732f46f735a5601b8890000f79671e91be41 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 15 Feb 2024 15:01:08 +0900
Subject: [PATCH v2] Reuse fcinfo used in COPY FROM
Each NextCopyFrom() calls input functions or binary-input
functions. We can reuse fcinfo for them instead of creating a local
fcinfo for each call. This will improve performance.
---
src/backend/commands/copyfrom.c | 4 +
src/backend/commands/copyfromparse.c | 20 ++--
src/backend/utils/fmgr/fmgr.c | 126 +++++++++++++++++++++++
src/include/commands/copyfrom_internal.h | 1 +
src/include/fmgr.h | 12 +++
5 files changed, 154 insertions(+), 9 deletions(-)
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 1fe70b9133..ed375c012e 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1691,6 +1691,10 @@ BeginCopyFrom(ParseState *pstate,
/* We keep those variables in cstate. */
cstate->in_functions = in_functions;
cstate->typioparams = typioparams;
+ if (cstate->opts.binary)
+ cstate->fcinfo = PrepareReceiveFunctionCallInfo();
+ else
+ cstate->fcinfo = PrepareInputFunctionCallInfo();
cstate->defmap = defmap;
cstate->defexprs = defexprs;
cstate->volatile_defexprs = volatile_defexprs;
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 7cacd0b752..7907e16ea8 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -861,6 +861,7 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
num_defaults = cstate->num_defaults;
FmgrInfo *in_functions = cstate->in_functions;
Oid *typioparams = cstate->typioparams;
+ FunctionCallInfoBaseData *fcinfo = cstate->fcinfo;
int i;
int *defmap = cstate->defmap;
ExprState **defexprs = cstate->defexprs;
@@ -961,12 +962,13 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
* If ON_ERROR is specified with IGNORE, skip rows with soft
* errors
*/
- else if (!InputFunctionCallSafe(&in_functions[m],
- string,
- typioparams[m],
- att->atttypmod,
- (Node *) cstate->escontext,
- &values[m]))
+ else if (!InputFunctionCallSafeWithInfo(fcinfo,
+ &in_functions[m],
+ string,
+ typioparams[m],
+ att->atttypmod,
+ (Node *) cstate->escontext,
+ &values[m]))
{
cstate->num_errors++;
return true;
@@ -1966,7 +1968,7 @@ CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
if (fld_size == -1)
{
*isnull = true;
- return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
+ return ReceiveFunctionCallWithInfo(cstate->fcinfo, flinfo, NULL, typioparam, typmod);
}
if (fld_size < 0)
ereport(ERROR,
@@ -1987,8 +1989,8 @@ CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
cstate->attribute_buf.data[fld_size] = '\0';
/* Call the column type's binary input converter */
- result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
- typioparam, typmod);
+ result = ReceiveFunctionCallWithInfo(cstate->fcinfo, flinfo, &cstate->attribute_buf,
+ typioparam, typmod);
/* Trouble if it didn't eat the whole buffer */
if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c
index e48a86be54..14c3ed2bdb 100644
--- a/src/backend/utils/fmgr/fmgr.c
+++ b/src/backend/utils/fmgr/fmgr.c
@@ -1672,6 +1672,73 @@ DirectInputFunctionCallSafe(PGFunction func, char *str,
return true;
}
+/*
+ * Prepare callinfo for InputFunctionCallSafeWithInfo to reuse one callinfo
+ * instead of initializing it for each call. This is for performance.
+ */
+FunctionCallInfoBaseData *
+PrepareInputFunctionCallInfo(void)
+{
+ FunctionCallInfoBaseData *fcinfo;
+
+ fcinfo = (FunctionCallInfoBaseData *) palloc(SizeForFunctionCallInfo(3));
+ InitFunctionCallInfoData(*fcinfo, NULL, 3, InvalidOid, NULL, NULL);
+ fcinfo->args[0].isnull = false;
+ fcinfo->args[1].isnull = false;
+ fcinfo->args[2].isnull = false;
+ return fcinfo;
+}
+
+/*
+ * Call a previously-looked-up datatype input function, with prepared callinfo
+ * and non-exception handling of "soft" errors.
+ *
+ * This is basically like InputFunctionCallSafe, but it reuses prepared
+ * callinfo.
+ */
+bool
+InputFunctionCallSafeWithInfo(FunctionCallInfoBaseData *fcinfo,
+ FmgrInfo *flinfo, char *str,
+ Oid typioparam, int32 typmod,
+ fmNodePtr escontext,
+ Datum *result)
+{
+ if (str == NULL && flinfo->fn_strict)
+ {
+ *result = (Datum) 0; /* just return null result */
+ return true;
+ }
+
+ fcinfo->flinfo = flinfo;
+ fcinfo->context = escontext;
+ fcinfo->isnull = false;
+ fcinfo->args[0].value = CStringGetDatum(str);
+ fcinfo->args[1].value = ObjectIdGetDatum(typioparam);
+ fcinfo->args[2].value = Int32GetDatum(typmod);
+
+ *result = FunctionCallInvoke(fcinfo);
+
+ /* Result value is garbage, and could be null, if an error was reported */
+ if (SOFT_ERROR_OCCURRED(escontext))
+ return false;
+
+ /* Otherwise, should get null result if and only if str is NULL */
+ if (str == NULL)
+ {
+ if (!fcinfo->isnull)
+ elog(ERROR, "input function %u returned non-NULL",
+ flinfo->fn_oid);
+ }
+ else
+ {
+ if (fcinfo->isnull)
+ elog(ERROR, "input function %u returned NULL",
+ flinfo->fn_oid);
+ }
+
+ return true;
+}
+
/*
* Call a previously-looked-up datatype output function.
*
@@ -1731,6 +1798,65 @@ ReceiveFunctionCall(FmgrInfo *flinfo, StringInfo buf,
return result;
}
+/*
+ * Prepare callinfo for ReceiveFunctionCallWithInfo to reuse one callinfo
+ * instead of initializing it for each call. This is for performance.
+ */
+FunctionCallInfoBaseData *
+PrepareReceiveFunctionCallInfo(void)
+{
+ FunctionCallInfoBaseData *fcinfo;
+
+ fcinfo = (FunctionCallInfoBaseData *) palloc(SizeForFunctionCallInfo(3));
+ InitFunctionCallInfoData(*fcinfo, NULL, 3, InvalidOid, NULL, NULL);
+ fcinfo->args[0].isnull = false;
+ fcinfo->args[1].isnull = false;
+ fcinfo->args[2].isnull = false;
+ return fcinfo;
+}
+
+/*
+ * Call a previously-looked-up datatype binary-input function, with prepared
+ * callinfo.
+ *
+ * This is basically like ReceiveFunctionCall, but it reuses prepared
+ * callinfo.
+ */
+Datum
+ReceiveFunctionCallWithInfo(FunctionCallInfoBaseData *fcinfo,
+ FmgrInfo *flinfo, StringInfo buf,
+ Oid typioparam, int32 typmod)
+{
+ Datum result;
+
+ if (buf == NULL && flinfo->fn_strict)
+ return (Datum) 0; /* just return null result */
+
+ fcinfo->flinfo = flinfo;
+ fcinfo->isnull = false;
+ fcinfo->args[0].value = PointerGetDatum(buf);
+ fcinfo->args[1].value = ObjectIdGetDatum(typioparam);
+ fcinfo->args[2].value = Int32GetDatum(typmod);
+
+ result = FunctionCallInvoke(fcinfo);
+
+ /* Should get null result if and only if buf is NULL */
+ if (buf == NULL)
+ {
+ if (!fcinfo->isnull)
+ elog(ERROR, "receive function %u returned non-NULL",
+ flinfo->fn_oid);
+ }
+ else
+ {
+ if (fcinfo->isnull)
+ elog(ERROR, "receive function %u returned NULL",
+ flinfo->fn_oid);
+ }
+
+ return result;
+}
+
/*
* Call a previously-looked-up datatype binary-output function.
*
diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h
index cad52fcc78..8c1a227c02 100644
--- a/src/include/commands/copyfrom_internal.h
+++ b/src/include/commands/copyfrom_internal.h
@@ -97,6 +97,7 @@ typedef struct CopyFromStateData
Oid *typioparams; /* array of element types for in_functions */
ErrorSaveContext *escontext; /* soft error trapper during in_functions
* execution */
+ FunctionCallInfoBaseData *fcinfo; /* reusable callinfo for in_functions */
uint64 num_errors; /* total number of rows which contained soft
* errors */
int *defmap; /* array of default att numbers related to
diff --git a/src/include/fmgr.h b/src/include/fmgr.h
index ccb4070a25..3d3a12205b 100644
--- a/src/include/fmgr.h
+++ b/src/include/fmgr.h
@@ -708,12 +708,24 @@ extern bool DirectInputFunctionCallSafe(PGFunction func, char *str,
Oid typioparam, int32 typmod,
fmNodePtr escontext,
Datum *result);
+extern FunctionCallInfoBaseData *PrepareInputFunctionCallInfo(void);
+extern bool
+ InputFunctionCallSafeWithInfo(FunctionCallInfoBaseData *fcinfo,
+ FmgrInfo *flinfo, char *str,
+ Oid typioparam, int32 typmod,
+ fmNodePtr escontext,
+ Datum *result);
extern Datum OidInputFunctionCall(Oid functionId, char *str,
Oid typioparam, int32 typmod);
extern char *OutputFunctionCall(FmgrInfo *flinfo, Datum val);
extern char *OidOutputFunctionCall(Oid functionId, Datum val);
extern Datum ReceiveFunctionCall(FmgrInfo *flinfo, fmStringInfo buf,
Oid typioparam, int32 typmod);
+extern FunctionCallInfoBaseData *PrepareReceiveFunctionCallInfo(void);
+extern Datum
+ ReceiveFunctionCallWithInfo(FunctionCallInfoBaseData *fcinfo,
+ FmgrInfo *flinfo, fmStringInfo buf,
+ Oid typioparam, int32 typmod);
extern Datum OidReceiveFunctionCall(Oid functionId, fmStringInfo buf,
Oid typioparam, int32 typmod);
extern bytea *SendFunctionCall(FmgrInfo *flinfo, Datum val);
--
2.43.0
From dbf04dec457ad2c61d00538514cc5356e94074e1 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 15 Feb 2024 15:26:31 +0900
Subject: [PATCH v1] Reuse fcinfo used in COPY TO
Each CopyOneRowTo() calls output functions or binary-output
functions. We can reuse fcinfo for them instead of creating a local
fcinfo for each call. This will improve performance.
---
src/backend/commands/copyto.c | 14 +++++--
src/backend/utils/fmgr/fmgr.c | 79 +++++++++++++++++++++++++++++++++++
src/include/fmgr.h | 6 +++
3 files changed, 95 insertions(+), 4 deletions(-)
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index 20ffc90363..21442861f3 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -97,6 +97,7 @@ typedef struct CopyToStateData
MemoryContext copycontext; /* per-copy execution context */
FmgrInfo *out_functions; /* lookup info for output functions */
+ FunctionCallInfoBaseData *fcinfo; /* reusable callinfo for out_functions */
MemoryContext rowcontext; /* per-row evaluation context */
uint64 bytes_processed; /* number of bytes processed so far */
} CopyToStateData;
@@ -786,6 +787,10 @@ DoCopyTo(CopyToState cstate)
&isvarlena);
fmgr_info(out_func_oid, &cstate->out_functions[attnum - 1]);
}
+ if (cstate->opts.binary)
+ cstate->fcinfo = PrepareSendFunctionCallInfo();
+ else
+ cstate->fcinfo = PrepareOutputFunctionCallInfo();
/*
* Create a temporary memory context that we can reset once per row to
@@ -909,6 +914,7 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
{
bool need_delim = false;
FmgrInfo *out_functions = cstate->out_functions;
+ FunctionCallInfoBaseData *fcinfo = cstate->fcinfo;
MemoryContext oldcontext;
ListCell *cur;
char *string;
@@ -949,8 +955,8 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
{
if (!cstate->opts.binary)
{
- string = OutputFunctionCall(&out_functions[attnum - 1],
- value);
+ string = OutputFunctionCallWithInfo(fcinfo, &out_functions[attnum - 1],
+ value);
if (cstate->opts.csv_mode)
CopyAttributeOutCSV(cstate, string,
cstate->opts.force_quote_flags[attnum - 1]);
@@ -961,8 +967,8 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
{
bytea *outputbytes;
- outputbytes = SendFunctionCall(&out_functions[attnum - 1],
- value);
+ outputbytes = SendFunctionCallWithInfo(fcinfo, &out_functions[attnum - 1],
+ value);
CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
CopySendData(cstate, VARDATA(outputbytes),
VARSIZE(outputbytes) - VARHDRSZ);
diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c
index e48a86be54..ab74a643f2 100644
--- a/src/backend/utils/fmgr/fmgr.c
+++ b/src/backend/utils/fmgr/fmgr.c
@@ -1685,6 +1685,45 @@ OutputFunctionCall(FmgrInfo *flinfo, Datum val)
return DatumGetCString(FunctionCall1(flinfo, val));
}
+/*
+ * Prepare callinfo for OutputFunctionCallWithInfo to reuse one callinfo
+ * instead of initializing it for each call. This is for performance.
+ */
+FunctionCallInfoBaseData *
+PrepareOutputFunctionCallInfo(void)
+{
+ FunctionCallInfoBaseData *fcinfo;
+
+ fcinfo = (FunctionCallInfoBaseData *) palloc(SizeForFunctionCallInfo(1));
+ InitFunctionCallInfoData(*fcinfo, NULL, 1, InvalidOid, NULL, NULL);
+ fcinfo->args[0].isnull = false;
+ return fcinfo;
+}
+
+/*
+ * Call a previously-looked-up datatype output function, with prepared callinfo.
+ *
+ * This is basically like OutputFunctionCall, but it reuses prepared callinfo.
+ */
+char *
+OutputFunctionCallWithInfo(FunctionCallInfoBaseData *fcinfo,
+ FmgrInfo *flinfo, Datum val)
+{
+ Datum result;
+
+ fcinfo->flinfo = flinfo;
+ fcinfo->isnull = false;
+ fcinfo->args[0].value = val;
+
+ result = FunctionCallInvoke(fcinfo);
+
+ /* Check for null result, since caller is clearly not expecting one */
+ if (fcinfo->isnull)
+ elog(ERROR, "function %u returned NULL", flinfo->fn_oid);
+
+ return DatumGetCString(result);
+}
+
/*
* Call a previously-looked-up datatype binary-input function.
*
@@ -1746,6 +1785,46 @@ SendFunctionCall(FmgrInfo *flinfo, Datum val)
return DatumGetByteaP(FunctionCall1(flinfo, val));
}
+/*
+ * Prepare callinfo for SendFunctionCallWithInfo to reuse one callinfo
+ * instead of initializing it for each call. This is for performance.
+ */
+FunctionCallInfoBaseData *
+PrepareSendFunctionCallInfo(void)
+{
+ FunctionCallInfoBaseData *fcinfo;
+
+ fcinfo = (FunctionCallInfoBaseData *) palloc(SizeForFunctionCallInfo(1));
+ InitFunctionCallInfoData(*fcinfo, NULL, 1, InvalidOid, NULL, NULL);
+ fcinfo->args[0].isnull = false;
+ return fcinfo;
+}
+
+/*
+ * Call a previously-looked-up datatype binary-output function, with prepared
+ * callinfo.
+ *
+ * This is basically like SendFunctionCall, but it reuses prepared callinfo.
+ */
+bytea *
+SendFunctionCallWithInfo(FunctionCallInfoBaseData *fcinfo,
+ FmgrInfo *flinfo, Datum val)
+{
+ Datum result;
+
+ fcinfo->flinfo = flinfo;
+ fcinfo->isnull = false;
+ fcinfo->args[0].value = val;
+
+ result = FunctionCallInvoke(fcinfo);
+
+ /* Check for null result, since caller is clearly not expecting one */
+ if (fcinfo->isnull)
+ elog(ERROR, "function %u returned NULL", flinfo->fn_oid);
+
+ return DatumGetByteaP(result);
+}
+
/*
* As above, for I/O functions identified by OID. These are only to be used
* in seldom-executed code paths. They are not only slow but leak memory.
diff --git a/src/include/fmgr.h b/src/include/fmgr.h
index ccb4070a25..816ed31b05 100644
--- a/src/include/fmgr.h
+++ b/src/include/fmgr.h
@@ -711,12 +711,18 @@ extern bool DirectInputFunctionCallSafe(PGFunction func, char *str,
extern Datum OidInputFunctionCall(Oid functionId, char *str,
Oid typioparam, int32 typmod);
extern char *OutputFunctionCall(FmgrInfo *flinfo, Datum val);
+extern FunctionCallInfoBaseData *PrepareOutputFunctionCallInfo(void);
+extern char *OutputFunctionCallWithInfo(FunctionCallInfoBaseData *fcinfo,
+ FmgrInfo *flinfo, Datum val);
extern char *OidOutputFunctionCall(Oid functionId, Datum val);
extern Datum ReceiveFunctionCall(FmgrInfo *flinfo, fmStringInfo buf,
Oid typioparam, int32 typmod);
extern Datum OidReceiveFunctionCall(Oid functionId, fmStringInfo buf,
Oid typioparam, int32 typmod);
extern bytea *SendFunctionCall(FmgrInfo *flinfo, Datum val);
+extern FunctionCallInfoBaseData *PrepareSendFunctionCallInfo(void);
+extern bytea *SendFunctionCallWithInfo(FunctionCallInfoBaseData *fcinfo,
+ FmgrInfo *flinfo, Datum val);
extern bytea *OidSendFunctionCall(Oid functionId, Datum val);
--
2.43.0
pgsql-hackers by date: