Re: Make COPY format extendable: Extract COPY TO format implementations - Mailing list pgsql-hackers

From Sutou Kouhei
Subject Re: Make COPY format extendable: Extract COPY TO format implementations
Date
Msg-id 20251014.111524.966856440891708928.kou@clear-code.com
Whole thread Raw
In response to Re: Make COPY format extendable: Extract COPY TO format implementations  (Masahiko Sawada <sawada.mshk@gmail.com>)
List pgsql-hackers
Hi,

In <CAD21AoBkA=g=PN17r_iieru+vLyLtGZ8WvohgANa2vzsMfMogQ@mail.gmail.com>
  "Re: Make COPY format extendable: Extract COPY TO format implementations" on Mon, 13 Oct 2025 14:40:31 -0700,
  Masahiko Sawada <sawada.mshk@gmail.com> wrote:

> The patch refactors the CopyToStateData so that we can both hide
> internal-use-only fields from extensions and extension can use its own
> state data, while not adding extra indirection layers. TBH I'm really
> not sure we must fully hide internal fields from extensions. Other
> extendable components seem not to strictly hide internal information
> from extensions. I'd suggest starting with only the latter point. That
> is, we merge fields in CopyToStateInternalData to CopyToStateData.
> What do you think?

OK. Let's follow the existing style. How about the attached
patch? It merges CopyToStateInternalData to CopyToStateData.


Thanks,
-- 
kou

From 325f56d4b4372f7b90b88c9c9068d253fcc9f39a Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 14 Oct 2025 11:08:23 +0900
Subject: [PATCH] Split CopyToStateData to CopyToState{,Builtin}Data

---
 src/backend/commands/copyto.c  | 170 ++++++++++++++++-----------------
 src/include/commands/copyapi.h |  66 +++++++++++++
 2 files changed, 148 insertions(+), 88 deletions(-)

diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index e5781155cdf..176d98f866b 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -21,7 +21,6 @@
 #include "access/tableam.h"
 #include "commands/copyapi.h"
 #include "commands/progress.h"
-#include "executor/execdesc.h"
 #include "executor/executor.h"
 #include "executor/tuptable.h"
 #include "libpq/libpq.h"
@@ -37,19 +36,7 @@
 #include "utils/snapmgr.h"
 
 /*
- * Represents the different dest cases we need to worry about at
- * the bottom level
- */
-typedef enum CopyDest
-{
-    COPY_FILE,                    /* to file (or a piped program) */
-    COPY_FRONTEND,                /* to frontend */
-    COPY_CALLBACK,                /* to callback function */
-} CopyDest;
-
-/*
- * This struct contains all the state variables used throughout a COPY TO
- * operation.
+ * This struct contains the state variables used by built-in format routines.
  *
  * Multi-byte encodings: all supported client-side encodings encode multi-byte
  * characters by having the first byte's high bit set. Subsequent bytes of the
@@ -62,40 +49,16 @@ typedef enum CopyDest
  * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is true
  * when we have to do it the hard way.
  */
-typedef struct CopyToStateData
+typedef struct CopyToStateBuiltinData
 {
-    /* format-specific routines */
-    const CopyToRoutine *routine;
+    CopyToStateData parent;
 
     /* low-level state data */
-    CopyDest    copy_dest;        /* type of copy source/destination */
-    FILE       *copy_file;        /* used if copy_dest == COPY_FILE */
-    StringInfo    fe_msgbuf;        /* used for all dests during COPY TO */
-
     int            file_encoding;    /* file or remote side's character encoding */
     bool        need_transcoding;    /* file encoding diff from server? */
     bool        encoding_embeds_ascii;    /* ASCII can be non-first byte? */
-
-    /* parameters from the COPY command */
-    Relation    rel;            /* relation to copy to */
-    QueryDesc  *queryDesc;        /* executable query to copy from */
-    List       *attnumlist;        /* integer list of attnums to copy */
-    char       *filename;        /* filename, or NULL for STDOUT */
-    bool        is_program;        /* is 'filename' a program to popen? */
-    copy_data_dest_cb data_dest_cb; /* function for writing data */
-
-    CopyFormatOptions opts;
-    Node       *whereClause;    /* WHERE condition (or NULL) */
-
-    /*
-     * Working state
-     */
-    MemoryContext copycontext;    /* per-copy execution context */
-
-    FmgrInfo   *out_functions;    /* lookup info for output functions */
-    MemoryContext rowcontext;    /* per-row evaluation context */
-    uint64        bytes_processed;    /* number of bytes processed so far */
-} CopyToStateData;
+}            CopyToStateBuiltinData;
+typedef struct CopyToStateBuiltinData *CopyToStateBuiltin;
 
 /* DestReceiver for COPY (query) TO */
 typedef struct
@@ -118,6 +81,7 @@ static void CopyAttributeOutCSV(CopyToState cstate, const char *string,
                                 bool use_quote);
 
 /* built-in format-specific routines */
+static size_t CopyToBuiltinGetStateSize(void);
 static void CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc);
 static void CopyToTextLikeOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo);
 static void CopyToTextOneRow(CopyToState cstate, TupleTableSlot *slot);
@@ -150,6 +114,7 @@ static void CopySendInt16(CopyToState cstate, int16 val);
 
 /* text format */
 static const CopyToRoutine CopyToRoutineText = {
+    .CopyToGetStateSize = CopyToBuiltinGetStateSize,
     .CopyToStart = CopyToTextLikeStart,
     .CopyToOutFunc = CopyToTextLikeOutFunc,
     .CopyToOneRow = CopyToTextOneRow,
@@ -158,6 +123,7 @@ static const CopyToRoutine CopyToRoutineText = {
 
 /* CSV format */
 static const CopyToRoutine CopyToRoutineCSV = {
+    .CopyToGetStateSize = CopyToBuiltinGetStateSize,
     .CopyToStart = CopyToTextLikeStart,
     .CopyToOutFunc = CopyToTextLikeOutFunc,
     .CopyToOneRow = CopyToCSVOneRow,
@@ -166,6 +132,7 @@ static const CopyToRoutine CopyToRoutineCSV = {
 
 /* binary format */
 static const CopyToRoutine CopyToRoutineBinary = {
+    .CopyToGetStateSize = CopyToBuiltinGetStateSize,
     .CopyToStart = CopyToBinaryStart,
     .CopyToOutFunc = CopyToBinaryOutFunc,
     .CopyToOneRow = CopyToBinaryOneRow,
@@ -185,18 +152,27 @@ CopyToGetRoutine(const CopyFormatOptions *opts)
     return &CopyToRoutineText;
 }
 
+/* Implementation of the allocate callback for all built-in formats */
+static size_t
+CopyToBuiltinGetStateSize(void)
+{
+    return sizeof(CopyToStateBuiltinData);
+}
+
 /* Implementation of the start callback for text and CSV formats */
 static void
 CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
 {
+    CopyToStateBuiltin cstate_builtin = (CopyToStateBuiltin) cstate;
+
     /*
      * For non-binary copy, we need to convert null_print to file encoding,
      * because it will be sent directly with CopySendString.
      */
-    if (cstate->need_transcoding)
+    if (cstate_builtin->need_transcoding)
         cstate->opts.null_print_client = pg_server_to_any(cstate->opts.null_print,
                                                           cstate->opts.null_print_len,
-                                                          cstate->file_encoding);
+                                                          cstate_builtin->file_encoding);
 
     /* if a header has been requested send the line */
     if (cstate->opts.header_line == COPY_HEADER_TRUE)
@@ -401,7 +377,7 @@ SendCopyBegin(CopyToState cstate)
     for (i = 0; i < natts; i++)
         pq_sendint16(&buf, format); /* per-column formats */
     pq_endmessage(&buf);
-    cstate->copy_dest = COPY_FRONTEND;
+    cstate->copy_dest = COPY_DEST_FRONTEND;
 }
 
 static void
@@ -448,7 +424,7 @@ CopySendEndOfRow(CopyToState cstate)
 
     switch (cstate->copy_dest)
     {
-        case COPY_FILE:
+        case COPY_DEST_FILE:
             if (fwrite(fe_msgbuf->data, fe_msgbuf->len, 1,
                        cstate->copy_file) != 1 ||
                 ferror(cstate->copy_file))
@@ -482,11 +458,11 @@ CopySendEndOfRow(CopyToState cstate)
                              errmsg("could not write to COPY file: %m")));
             }
             break;
-        case COPY_FRONTEND:
+        case COPY_DEST_FRONTEND:
             /* Dump the accumulated row as one CopyData message */
             (void) pq_putmessage(PqMsg_CopyData, fe_msgbuf->data, fe_msgbuf->len);
             break;
-        case COPY_CALLBACK:
+        case COPY_DEST_CALLBACK:
             cstate->data_dest_cb(fe_msgbuf->data, fe_msgbuf->len);
             break;
     }
@@ -507,7 +483,7 @@ CopySendTextLikeEndOfRow(CopyToState cstate)
 {
     switch (cstate->copy_dest)
     {
-        case COPY_FILE:
+        case COPY_DEST_FILE:
             /* Default line termination depends on platform */
 #ifndef WIN32
             CopySendChar(cstate, '\n');
@@ -515,7 +491,7 @@ CopySendTextLikeEndOfRow(CopyToState cstate)
             CopySendString(cstate, "\r\n");
 #endif
             break;
-        case COPY_FRONTEND:
+        case COPY_DEST_FRONTEND:
             /* The FE/BE protocol uses \n as newline for all platforms */
             CopySendChar(cstate, '\n');
             break;
@@ -631,10 +607,14 @@ BeginCopyTo(ParseState *pstate,
             List *options)
 {
     CopyToState cstate;
+    CopyToStateBuiltin cstate_builtin;
     bool        pipe = (filename == NULL && data_dest_cb == NULL);
     TupleDesc    tupDesc;
     int            num_phys_attrs;
+    MemoryContext copycontext;
     MemoryContext oldcontext;
+    CopyFormatOptions opts = {0};
+    const CopyToRoutine *routine;
     const int    progress_cols[] = {
         PROGRESS_COPY_COMMAND,
         PROGRESS_COPY_TYPE
@@ -686,24 +666,33 @@ BeginCopyTo(ParseState *pstate,
     }
 
 
-    /* Allocate workspace and zero all fields */
-    cstate = (CopyToStateData *) palloc0(sizeof(CopyToStateData));
-
     /*
      * We allocate everything used by a cstate in a new memory context. This
      * avoids memory leaks during repeated use of COPY in a query.
      */
-    cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext,
-                                                "COPY",
-                                                ALLOCSET_DEFAULT_SIZES);
+    copycontext = AllocSetContextCreate(CurrentMemoryContext,
+                                        "COPY",
+                                        ALLOCSET_DEFAULT_SIZES);
 
-    oldcontext = MemoryContextSwitchTo(cstate->copycontext);
+    oldcontext = MemoryContextSwitchTo(copycontext);
 
     /* Extract options from the statement node tree */
-    ProcessCopyOptions(pstate, &cstate->opts, false /* is_from */ , options);
+    ProcessCopyOptions(pstate, &opts, false /* is_from */ , options);
 
-    /* Set format routine */
-    cstate->routine = CopyToGetRoutine(&cstate->opts);
+    /* Get format routine */
+    routine = CopyToGetRoutine(&opts);
+
+    /* Allocate workspace and set known values */
+    MemoryContextSwitchTo(oldcontext);
+    cstate = (CopyToState) palloc0(routine->CopyToGetStateSize());
+    MemoryContextSwitchTo(copycontext);
+    if (routine == &CopyToRoutineText || routine == &CopyToRoutineCSV || routine == &CopyToRoutineBinary)
+        cstate_builtin = (CopyToStateBuiltin) cstate;
+    else
+        cstate_builtin = NULL;
+    cstate->copycontext = copycontext;
+    cstate->opts = opts;
+    cstate->routine = routine;
 
     /* Process the source/target relation or query */
     if (rel)
@@ -883,31 +872,34 @@ BeginCopyTo(ParseState *pstate,
         }
     }
 
-    /* Use client encoding when ENCODING option is not specified. */
-    if (cstate->opts.file_encoding < 0)
-        cstate->file_encoding = pg_get_client_encoding();
-    else
-        cstate->file_encoding = cstate->opts.file_encoding;
+    if (cstate_builtin)
+    {
+        /* Use client encoding when ENCODING option is not specified. */
+        if (cstate->opts.file_encoding < 0)
+            cstate_builtin->file_encoding = pg_get_client_encoding();
+        else
+            cstate_builtin->file_encoding = cstate->opts.file_encoding;
 
-    /*
-     * Set up encoding conversion info if the file and server encodings differ
-     * (see also pg_server_to_any).
-     */
-    if (cstate->file_encoding == GetDatabaseEncoding() ||
-        cstate->file_encoding == PG_SQL_ASCII)
-        cstate->need_transcoding = false;
-    else
-        cstate->need_transcoding = true;
+        /*
+         * Set up encoding conversion info if the file and server encodings
+         * differ (see also pg_server_to_any).
+         */
+        if (cstate_builtin->file_encoding == GetDatabaseEncoding() ||
+            cstate_builtin->file_encoding == PG_SQL_ASCII)
+            cstate_builtin->need_transcoding = false;
+        else
+            cstate_builtin->need_transcoding = true;
 
-    /* See Multibyte encoding comment above */
-    cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->file_encoding);
+        /* See Multibyte encoding comment above */
+        cstate_builtin->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate_builtin->file_encoding);
+    }
 
-    cstate->copy_dest = COPY_FILE;    /* default */
+    cstate->copy_dest = COPY_DEST_FILE; /* default */
 
     if (data_dest_cb)
     {
         progress_vals[1] = PROGRESS_COPY_TYPE_CALLBACK;
-        cstate->copy_dest = COPY_CALLBACK;
+        cstate->copy_dest = COPY_DEST_CALLBACK;
         cstate->data_dest_cb = data_dest_cb;
     }
     else if (pipe)
@@ -1146,13 +1138,14 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
 static void
 CopyAttributeOutText(CopyToState cstate, const char *string)
 {
+    CopyToStateBuiltin cstate_builtin = (CopyToStateBuiltin) cstate;
     const char *ptr;
     const char *start;
     char        c;
     char        delimc = cstate->opts.delim[0];
 
-    if (cstate->need_transcoding)
-        ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
+    if (cstate_builtin->need_transcoding)
+        ptr = pg_server_to_any(string, strlen(string), cstate_builtin->file_encoding);
     else
         ptr = string;
 
@@ -1170,7 +1163,7 @@ CopyAttributeOutText(CopyToState cstate, const char *string)
      * it's worth making two copies of it to get the IS_HIGHBIT_SET() test out
      * of the normal safe-encoding path.
      */
-    if (cstate->encoding_embeds_ascii)
+    if (cstate_builtin->encoding_embeds_ascii)
     {
         start = ptr;
         while ((c = *ptr) != '\0')
@@ -1225,7 +1218,7 @@ CopyAttributeOutText(CopyToState cstate, const char *string)
                 start = ptr++;    /* we include char in next run */
             }
             else if (IS_HIGHBIT_SET(c))
-                ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
+                ptr += pg_encoding_mblen(cstate_builtin->file_encoding, ptr);
             else
                 ptr++;
         }
@@ -1300,6 +1293,7 @@ static void
 CopyAttributeOutCSV(CopyToState cstate, const char *string,
                     bool use_quote)
 {
+    CopyToStateBuiltin cstate_builtin = (CopyToStateBuiltin) cstate;
     const char *ptr;
     const char *start;
     char        c;
@@ -1312,8 +1306,8 @@ CopyAttributeOutCSV(CopyToState cstate, const char *string,
     if (!use_quote && strcmp(string, cstate->opts.null_print) == 0)
         use_quote = true;
 
-    if (cstate->need_transcoding)
-        ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
+    if (cstate_builtin->need_transcoding)
+        ptr = pg_server_to_any(string, strlen(string), cstate_builtin->file_encoding);
     else
         ptr = string;
 
@@ -1342,8 +1336,8 @@ CopyAttributeOutCSV(CopyToState cstate, const char *string,
                     use_quote = true;
                     break;
                 }
-                if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
-                    tptr += pg_encoding_mblen(cstate->file_encoding, tptr);
+                if (IS_HIGHBIT_SET(c) && cstate_builtin->encoding_embeds_ascii)
+                    tptr += pg_encoding_mblen(cstate_builtin->file_encoding, tptr);
                 else
                     tptr++;
             }
@@ -1366,8 +1360,8 @@ CopyAttributeOutCSV(CopyToState cstate, const char *string,
                 CopySendChar(cstate, escapec);
                 start = ptr;    /* we include char in next run */
             }
-            if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
-                ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
+            if (IS_HIGHBIT_SET(c) && cstate_builtin->encoding_embeds_ascii)
+                ptr += pg_encoding_mblen(cstate_builtin->file_encoding, ptr);
             else
                 ptr++;
         }
diff --git a/src/include/commands/copyapi.h b/src/include/commands/copyapi.h
index 2a2d2f9876b..aece73f4ca2 100644
--- a/src/include/commands/copyapi.h
+++ b/src/include/commands/copyapi.h
@@ -15,6 +15,7 @@
 #define COPYAPI_H
 
 #include "commands/copy.h"
+#include "executor/execdesc.h"
 
 /*
  * API structure for a COPY TO format implementation. Note this must be
@@ -22,6 +23,25 @@
  */
 typedef struct CopyToRoutine
 {
+    /* ---
+     * Return state size for this routine.
+     *
+     * If this routine uses CopyToStateData as-is, `return
+     * sizeof(CopyToStateData)` can be used.
+     *
+     * If this routine needs additional data than CopyToStateData, a new
+     * struct based on CopyToStateData can be used something like:
+     *
+     * typedef struct MyCopyToStateDate {
+     *     struct CopyToStateData parent;
+     *     int define_additional_members_here;
+     * } MyCopyToStateData;
+     *
+     * In the case, this callback returns `sizeof(MyCopyToStateData)`.
+     * ---
+     */
+    size_t        (*CopyToGetStateSize) (void);
+
     /*
      * Set output function information. This callback is called once at the
      * beginning of COPY TO.
@@ -54,6 +74,52 @@ typedef struct CopyToRoutine
     void        (*CopyToEnd) (CopyToState cstate);
 } CopyToRoutine;
 
+/*
+ * Represents the different dest cases we need to worry about at
+ * the bottom level
+ */
+typedef enum CopyDest
+{
+    COPY_DEST_FILE,                /* to file (or a piped program) */
+    COPY_DEST_FRONTEND,            /* to frontend */
+    COPY_DEST_CALLBACK,            /* to callback function */
+} CopyDest;
+
+/*
+ * This struct contains the state variables used by PostgreSQL, built-in
+ * format routines and custom format routines.
+ */
+typedef struct CopyToStateData
+{
+    /* format-specific routines */
+    const CopyToRoutine *routine;
+
+    /* low-level state data */
+    CopyDest    copy_dest;        /* type of copy source/destination */
+    FILE       *copy_file;        /* used if copy_dest == COPY_FILE */
+    StringInfo    fe_msgbuf;        /* used for all dests during COPY TO */
+
+    /* parameters from the COPY command */
+    Relation    rel;            /* relation to copy to */
+    QueryDesc  *queryDesc;        /* executable query to copy from */
+    List       *attnumlist;        /* integer list of attnums to copy */
+    char       *filename;        /* filename, or NULL for STDOUT */
+    bool        is_program;        /* is 'filename' a program to popen? */
+    copy_data_dest_cb data_dest_cb; /* function for writing data */
+
+    CopyFormatOptions opts;
+    Node       *whereClause;    /* WHERE condition (or NULL) */
+
+    /*
+     * Working state
+     */
+    MemoryContext copycontext;    /* per-copy execution context */
+
+    FmgrInfo   *out_functions;    /* lookup info for output functions */
+    MemoryContext rowcontext;    /* per-row evaluation context */
+    uint64        bytes_processed;    /* number of bytes processed so far */
+} CopyToStateData;
+
 /*
  * API structure for a COPY FROM format implementation. Note this must be
  * allocated in a server-lifetime manner, typically as a static const struct.
-- 
2.51.0


pgsql-hackers by date:

Previous
From: jian he
Date:
Subject: Re: CAST(... ON DEFAULT) - WIP build on top of Error-Safe User Functions
Next
From: Jeff Davis
Date:
Subject: Re: Clarification on Role Access Rights to Table Indexes