Re: Make COPY format extendable: Extract COPY TO format implementations - Mailing list pgsql-hackers

From Sutou Kouhei
Subject Re: Make COPY format extendable: Extract COPY TO format implementations
Date
Msg-id 20240215.153421.96888103784986803.kou@clear-code.com
Whole thread Raw
In response to Re: Make COPY format extendable: Extract COPY TO format implementations  (Michael Paquier <michael@paquier.xyz>)
Responses Re: Make COPY format extendable: Extract COPY TO format implementations
Re: Make COPY format extendable: Extract COPY TO format implementations
List pgsql-hackers
Hi,

In <ZcxjNDtqNLvdz0f5@paquier.xyz>
  "Re: Make COPY format extendable: Extract COPY TO format implementations" on Wed, 14 Feb 2024 15:52:36 +0900,
  Michael Paquier <michael@paquier.xyz> wrote:

>> How about InputFunctionCallSafeWithInfo(),
>> InputFunctionCallSafeInfo() or
>> InputFunctionCallInfoCallSafe()?
> 
> WithInfo() would not be a new thing.  There are a couple of APIs named
> like this when manipulating catalogs, so that sounds kind of a good
> choice from here.

Thanks for the info. Let's use InputFunctionCallSafeWithInfo().
See that attached patch:
v2-0001-Reuse-fcinfo-used-in-COPY-FROM.patch

I also attach a patch for COPY TO:
v1-0001-Reuse-fcinfo-used-in-COPY-TO.patch

I measured the COPY TO patch on my environment with:
COPY (SELECT
1::int2,2::int2,3::int2,4::int2,5::int2,6::int2,7::int2,8::int2,9::int2,10::int2,11::int2,12::int2,13::int2,14::int2,15::int2,16::int2,17::int2,18::int2,19::int2,20::int2,
generate_series(1,1000000::int4)) TO '/dev/null' \watch c=5
 

master:
740.066ms
734.884ms
738.579ms
734.170ms
727.953ms

patched:
730.714ms
741.483ms
714.149ms
715.436ms
713.578ms

It seems that it improves performance a bit but my
environment isn't suitable for benchmark. So they may not
be valid numbers.


Thanks,
-- 
kou
From b677732f46f735a5601b8890000f79671e91be41 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 15 Feb 2024 15:01:08 +0900
Subject: [PATCH v2] Reuse fcinfo used in COPY FROM

Each NextCopyFrom() calls input functions or binary-input
functions. We can reuse fcinfo for them instead of creating a local
fcinfo for each call. This will improve performance.
---
 src/backend/commands/copyfrom.c          |   4 +
 src/backend/commands/copyfromparse.c     |  20 ++--
 src/backend/utils/fmgr/fmgr.c            | 126 +++++++++++++++++++++++
 src/include/commands/copyfrom_internal.h |   1 +
 src/include/fmgr.h                       |  12 +++
 5 files changed, 154 insertions(+), 9 deletions(-)

diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 1fe70b9133..ed375c012e 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1691,6 +1691,10 @@ BeginCopyFrom(ParseState *pstate,
     /* We keep those variables in cstate. */
     cstate->in_functions = in_functions;
     cstate->typioparams = typioparams;
+    if (cstate->opts.binary)
+        cstate->fcinfo = PrepareReceiveFunctionCallInfo();
+    else
+        cstate->fcinfo = PrepareInputFunctionCallInfo();
     cstate->defmap = defmap;
     cstate->defexprs = defexprs;
     cstate->volatile_defexprs = volatile_defexprs;
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 7cacd0b752..7907e16ea8 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -861,6 +861,7 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
                 num_defaults = cstate->num_defaults;
     FmgrInfo   *in_functions = cstate->in_functions;
     Oid           *typioparams = cstate->typioparams;
+    FunctionCallInfoBaseData *fcinfo = cstate->fcinfo;
     int            i;
     int           *defmap = cstate->defmap;
     ExprState **defexprs = cstate->defexprs;
@@ -961,12 +962,13 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
              * If ON_ERROR is specified with IGNORE, skip rows with soft
              * errors
              */
-            else if (!InputFunctionCallSafe(&in_functions[m],
-                                            string,
-                                            typioparams[m],
-                                            att->atttypmod,
-                                            (Node *) cstate->escontext,
-                                            &values[m]))
+            else if (!InputFunctionCallSafeWithInfo(fcinfo,
+                                                    &in_functions[m],
+                                                    string,
+                                                    typioparams[m],
+                                                    att->atttypmod,
+                                                    (Node *) cstate->escontext,
+                                                    &values[m]))
             {
                 cstate->num_errors++;
                 return true;
@@ -1966,7 +1968,7 @@ CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
     if (fld_size == -1)
     {
         *isnull = true;
-        return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
+        return ReceiveFunctionCallWithInfo(cstate->fcinfo, flinfo, NULL, typioparam, typmod);
     }
     if (fld_size < 0)
         ereport(ERROR,
@@ -1987,8 +1989,8 @@ CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
     cstate->attribute_buf.data[fld_size] = '\0';
 
     /* Call the column type's binary input converter */
-    result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
-                                 typioparam, typmod);
+    result = ReceiveFunctionCallWithInfo(cstate->fcinfo, flinfo, &cstate->attribute_buf,
+                                         typioparam, typmod);
 
     /* Trouble if it didn't eat the whole buffer */
     if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c
index e48a86be54..14c3ed2bdb 100644
--- a/src/backend/utils/fmgr/fmgr.c
+++ b/src/backend/utils/fmgr/fmgr.c
@@ -1672,6 +1672,73 @@ DirectInputFunctionCallSafe(PGFunction func, char *str,
     return true;
 }
 
+/*
+ * Prepare callinfo for InputFunctionCallSafeWithInfo to reuse one callinfo
+ * instead of initializing it for each call. This is for performance.
+ */
+FunctionCallInfoBaseData *
+PrepareInputFunctionCallInfo(void)
+{
+    FunctionCallInfoBaseData *fcinfo;
+
+    fcinfo = (FunctionCallInfoBaseData *) palloc(SizeForFunctionCallInfo(3));
+    InitFunctionCallInfoData(*fcinfo, NULL, 3, InvalidOid, NULL, NULL);
+    fcinfo->args[0].isnull = false;
+    fcinfo->args[1].isnull = false;
+    fcinfo->args[2].isnull = false;
+    return fcinfo;
+}
+
+/*
+ * Call a previously-looked-up datatype input function, with prepared callinfo
+ * and non-exception handling of "soft" errors.
+ *
+ * This is basically like InputFunctionCallSafe, but it reuses prepared
+ * callinfo.
+ */
+bool
+InputFunctionCallSafeWithInfo(FunctionCallInfoBaseData *fcinfo,
+                              FmgrInfo *flinfo, char *str,
+                              Oid typioparam, int32 typmod,
+                              fmNodePtr escontext,
+                              Datum *result)
+{
+    if (str == NULL && flinfo->fn_strict)
+    {
+        *result = (Datum) 0;    /* just return null result */
+        return true;
+    }
+
+    fcinfo->flinfo = flinfo;
+    fcinfo->context = escontext;
+    fcinfo->isnull = false;
+    fcinfo->args[0].value = CStringGetDatum(str);
+    fcinfo->args[1].value = ObjectIdGetDatum(typioparam);
+    fcinfo->args[2].value = Int32GetDatum(typmod);
+
+    *result = FunctionCallInvoke(fcinfo);
+
+    /* Result value is garbage, and could be null, if an error was reported */
+    if (SOFT_ERROR_OCCURRED(escontext))
+        return false;
+
+    /* Otherwise, should get null result if and only if str is NULL */
+    if (str == NULL)
+    {
+        if (!fcinfo->isnull)
+            elog(ERROR, "input function %u returned non-NULL",
+                 flinfo->fn_oid);
+    }
+    else
+    {
+        if (fcinfo->isnull)
+            elog(ERROR, "input function %u returned NULL",
+                 flinfo->fn_oid);
+    }
+
+    return true;
+}
+
 /*
  * Call a previously-looked-up datatype output function.
  *
@@ -1731,6 +1798,65 @@ ReceiveFunctionCall(FmgrInfo *flinfo, StringInfo buf,
     return result;
 }
 
+/*
+ * Prepare callinfo for ReceiveFunctionCallWithInfo to reuse one callinfo
+ * instead of initializing it for each call. This is for performance.
+ */
+FunctionCallInfoBaseData *
+PrepareReceiveFunctionCallInfo(void)
+{
+    FunctionCallInfoBaseData *fcinfo;
+
+    fcinfo = (FunctionCallInfoBaseData *) palloc(SizeForFunctionCallInfo(3));
+    InitFunctionCallInfoData(*fcinfo, NULL, 3, InvalidOid, NULL, NULL);
+    fcinfo->args[0].isnull = false;
+    fcinfo->args[1].isnull = false;
+    fcinfo->args[2].isnull = false;
+    return fcinfo;
+}
+
+/*
+ * Call a previously-looked-up datatype binary-input function, with prepared
+ * callinfo.
+ *
+ * This is basically like ReceiveFunctionCall, but it reuses prepared
+ * callinfo.
+ */
+Datum
+ReceiveFunctionCallWithInfo(FunctionCallInfoBaseData *fcinfo,
+                            FmgrInfo *flinfo, StringInfo buf,
+                            Oid typioparam, int32 typmod)
+{
+    Datum        result;
+
+    if (buf == NULL && flinfo->fn_strict)
+        return (Datum) 0;        /* just return null result */
+
+    fcinfo->flinfo = flinfo;
+    fcinfo->isnull = false;
+    fcinfo->args[0].value = PointerGetDatum(buf);
+    fcinfo->args[1].value = ObjectIdGetDatum(typioparam);
+    fcinfo->args[2].value = Int32GetDatum(typmod);
+
+    result = FunctionCallInvoke(fcinfo);
+
+    /* Should get null result if and only if buf is NULL */
+    if (buf == NULL)
+    {
+        if (!fcinfo->isnull)
+            elog(ERROR, "receive function %u returned non-NULL",
+                 flinfo->fn_oid);
+    }
+    else
+    {
+        if (fcinfo->isnull)
+            elog(ERROR, "receive function %u returned NULL",
+                 flinfo->fn_oid);
+    }
+
+    return result;
+}
+
 /*
  * Call a previously-looked-up datatype binary-output function.
  *
diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h
index cad52fcc78..8c1a227c02 100644
--- a/src/include/commands/copyfrom_internal.h
+++ b/src/include/commands/copyfrom_internal.h
@@ -97,6 +97,7 @@ typedef struct CopyFromStateData
     Oid           *typioparams;    /* array of element types for in_functions */
     ErrorSaveContext *escontext;    /* soft error trapper during in_functions
                                      * execution */
+    FunctionCallInfoBaseData *fcinfo;    /* reusable callinfo for in_functions */
     uint64        num_errors;        /* total number of rows which contained soft
                                  * errors */
     int           *defmap;            /* array of default att numbers related to
diff --git a/src/include/fmgr.h b/src/include/fmgr.h
index ccb4070a25..3d3a12205b 100644
--- a/src/include/fmgr.h
+++ b/src/include/fmgr.h
@@ -708,12 +708,24 @@ extern bool DirectInputFunctionCallSafe(PGFunction func, char *str,
                                         Oid typioparam, int32 typmod,
                                         fmNodePtr escontext,
                                         Datum *result);
+extern FunctionCallInfoBaseData *PrepareInputFunctionCallInfo(void);
+extern bool
+            InputFunctionCallSafeWithInfo(FunctionCallInfoBaseData *fcinfo,
+                                          FmgrInfo *flinfo, char *str,
+                                          Oid typioparam, int32 typmod,
+                                          fmNodePtr escontext,
+                                          Datum *result);
 extern Datum OidInputFunctionCall(Oid functionId, char *str,
                                   Oid typioparam, int32 typmod);
 extern char *OutputFunctionCall(FmgrInfo *flinfo, Datum val);
 extern char *OidOutputFunctionCall(Oid functionId, Datum val);
 extern Datum ReceiveFunctionCall(FmgrInfo *flinfo, fmStringInfo buf,
                                  Oid typioparam, int32 typmod);
+extern FunctionCallInfoBaseData *PrepareReceiveFunctionCallInfo(void);
+extern Datum
+            ReceiveFunctionCallWithInfo(FunctionCallInfoBaseData *fcinfo,
+                                        FmgrInfo *flinfo, fmStringInfo buf,
+                                        Oid typioparam, int32 typmod);
 extern Datum OidReceiveFunctionCall(Oid functionId, fmStringInfo buf,
                                     Oid typioparam, int32 typmod);
 extern bytea *SendFunctionCall(FmgrInfo *flinfo, Datum val);
-- 
2.43.0

From dbf04dec457ad2c61d00538514cc5356e94074e1 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 15 Feb 2024 15:26:31 +0900
Subject: [PATCH v1] Reuse fcinfo used in COPY TO

Each CopyOneRowTo() calls output functions or binary-output
functions. We can reuse fcinfo for them instead of creating a local
fcinfo for each call. This will improve performance.
---
 src/backend/commands/copyto.c | 14 +++++--
 src/backend/utils/fmgr/fmgr.c | 79 +++++++++++++++++++++++++++++++++++
 src/include/fmgr.h            |  6 +++
 3 files changed, 95 insertions(+), 4 deletions(-)

diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index 20ffc90363..21442861f3 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -97,6 +97,7 @@ typedef struct CopyToStateData
     MemoryContext copycontext;    /* per-copy execution context */
 
     FmgrInfo   *out_functions;    /* lookup info for output functions */
+    FunctionCallInfoBaseData *fcinfo;    /* reusable callinfo for out_functions */
     MemoryContext rowcontext;    /* per-row evaluation context */
     uint64        bytes_processed;    /* number of bytes processed so far */
 } CopyToStateData;
@@ -786,6 +787,10 @@ DoCopyTo(CopyToState cstate)
                               &isvarlena);
         fmgr_info(out_func_oid, &cstate->out_functions[attnum - 1]);
     }
+    if (cstate->opts.binary)
+        cstate->fcinfo = PrepareSendFunctionCallInfo();
+    else
+        cstate->fcinfo = PrepareOutputFunctionCallInfo();
 
     /*
      * Create a temporary memory context that we can reset once per row to
@@ -909,6 +914,7 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
 {
     bool        need_delim = false;
     FmgrInfo   *out_functions = cstate->out_functions;
+    FunctionCallInfoBaseData *fcinfo = cstate->fcinfo;
     MemoryContext oldcontext;
     ListCell   *cur;
     char       *string;
@@ -949,8 +955,8 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
         {
             if (!cstate->opts.binary)
             {
-                string = OutputFunctionCall(&out_functions[attnum - 1],
-                                            value);
+                string = OutputFunctionCallWithInfo(fcinfo, &out_functions[attnum - 1],
+                                                    value);
                 if (cstate->opts.csv_mode)
                     CopyAttributeOutCSV(cstate, string,
                                         cstate->opts.force_quote_flags[attnum - 1]);
@@ -961,8 +967,8 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
             {
                 bytea       *outputbytes;
 
-                outputbytes = SendFunctionCall(&out_functions[attnum - 1],
-                                               value);
+                outputbytes = SendFunctionCallWithInfo(fcinfo, &out_functions[attnum - 1],
+                                                       value);
                 CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
                 CopySendData(cstate, VARDATA(outputbytes),
                              VARSIZE(outputbytes) - VARHDRSZ);
diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c
index e48a86be54..ab74a643f2 100644
--- a/src/backend/utils/fmgr/fmgr.c
+++ b/src/backend/utils/fmgr/fmgr.c
@@ -1685,6 +1685,45 @@ OutputFunctionCall(FmgrInfo *flinfo, Datum val)
     return DatumGetCString(FunctionCall1(flinfo, val));
 }
 
+/*
+ * Prepare callinfo for OutputFunctionCallWithInfo to reuse one callinfo
+ * instead of initializing it for each call. This is for performance.
+ */
+FunctionCallInfoBaseData *
+PrepareOutputFunctionCallInfo(void)
+{
+    FunctionCallInfoBaseData *fcinfo;
+
+    fcinfo = (FunctionCallInfoBaseData *) palloc(SizeForFunctionCallInfo(1));
+    InitFunctionCallInfoData(*fcinfo, NULL, 1, InvalidOid, NULL, NULL);
+    fcinfo->args[0].isnull = false;
+    return fcinfo;
+}
+
+/*
+ * Call a previously-looked-up datatype output function, with prepared callinfo.
+ *
+ * This is basically like OutputFunctionCall, but it reuses prepared callinfo.
+ */
+char *
+OutputFunctionCallWithInfo(FunctionCallInfoBaseData *fcinfo,
+                           FmgrInfo *flinfo, Datum val)
+{
+    Datum        result;
+
+    fcinfo->flinfo = flinfo;
+    fcinfo->isnull = false;
+    fcinfo->args[0].value = val;
+
+    result = FunctionCallInvoke(fcinfo);
+
+    /* Check for null result, since caller is clearly not expecting one */
+    if (fcinfo->isnull)
+        elog(ERROR, "function %u returned NULL", flinfo->fn_oid);
+
+    return DatumGetCString(result);
+}
+
 /*
  * Call a previously-looked-up datatype binary-input function.
  *
@@ -1746,6 +1785,46 @@ SendFunctionCall(FmgrInfo *flinfo, Datum val)
     return DatumGetByteaP(FunctionCall1(flinfo, val));
 }
 
+/*
+ * Prepare callinfo for SendFunctionCallWithInfo to reuse one callinfo
+ * instead of initializing it for each call. This is for performance.
+ */
+FunctionCallInfoBaseData *
+PrepareSendFunctionCallInfo(void)
+{
+    FunctionCallInfoBaseData *fcinfo;
+
+    fcinfo = (FunctionCallInfoBaseData *) palloc(SizeForFunctionCallInfo(1));
+    InitFunctionCallInfoData(*fcinfo, NULL, 1, InvalidOid, NULL, NULL);
+    fcinfo->args[0].isnull = false;
+    return fcinfo;
+}
+
+/*
+ * Call a previously-looked-up datatype binary-output function, with prepared
+ * callinfo.
+ *
+ * This is basically like SendFunctionCall, but it reuses prepared callinfo.
+ */
+bytea *
+SendFunctionCallWithInfo(FunctionCallInfoBaseData *fcinfo,
+                         FmgrInfo *flinfo, Datum val)
+{
+    Datum        result;
+
+    fcinfo->flinfo = flinfo;
+    fcinfo->isnull = false;
+    fcinfo->args[0].value = val;
+
+    result = FunctionCallInvoke(fcinfo);
+
+    /* Check for null result, since caller is clearly not expecting one */
+    if (fcinfo->isnull)
+        elog(ERROR, "function %u returned NULL", flinfo->fn_oid);
+
+    return DatumGetByteaP(result);
+}
+
 /*
  * As above, for I/O functions identified by OID.  These are only to be used
  * in seldom-executed code paths.  They are not only slow but leak memory.
diff --git a/src/include/fmgr.h b/src/include/fmgr.h
index ccb4070a25..816ed31b05 100644
--- a/src/include/fmgr.h
+++ b/src/include/fmgr.h
@@ -711,12 +711,18 @@ extern bool DirectInputFunctionCallSafe(PGFunction func, char *str,
 extern Datum OidInputFunctionCall(Oid functionId, char *str,
                                   Oid typioparam, int32 typmod);
 extern char *OutputFunctionCall(FmgrInfo *flinfo, Datum val);
+extern FunctionCallInfoBaseData *PrepareOutputFunctionCallInfo(void);
+extern char *OutputFunctionCallWithInfo(FunctionCallInfoBaseData *fcinfo,
+                                        FmgrInfo *flinfo, Datum val);
 extern char *OidOutputFunctionCall(Oid functionId, Datum val);
 extern Datum ReceiveFunctionCall(FmgrInfo *flinfo, fmStringInfo buf,
                                  Oid typioparam, int32 typmod);
 extern Datum OidReceiveFunctionCall(Oid functionId, fmStringInfo buf,
                                     Oid typioparam, int32 typmod);
 extern bytea *SendFunctionCall(FmgrInfo *flinfo, Datum val);
+extern FunctionCallInfoBaseData *PrepareSendFunctionCallInfo(void);
+extern bytea *SendFunctionCallWithInfo(FunctionCallInfoBaseData *fcinfo,
+                                       FmgrInfo *flinfo, Datum val);
 extern bytea *OidSendFunctionCall(Oid functionId, Datum val);
 
 
-- 
2.43.0


pgsql-hackers by date:

Previous
From: Kyotaro Horiguchi
Date:
Subject: Re: About a recently-added message
Next
From: "Zhijie Hou (Fujitsu)"
Date:
Subject: RE: Synchronizing slots from primary to standby