Re: Make COPY format extendable: Extract COPY TO format implementations - Mailing list pgsql-hackers
From | Sutou Kouhei |
---|---|
Subject | Re: Make COPY format extendable: Extract COPY TO format implementations |
Date | |
Msg-id | 20251014.111524.966856440891708928.kou@clear-code.com Whole thread Raw |
In response to | Re: Make COPY format extendable: Extract COPY TO format implementations (Masahiko Sawada <sawada.mshk@gmail.com>) |
List | pgsql-hackers |
Hi, In <CAD21AoBkA=g=PN17r_iieru+vLyLtGZ8WvohgANa2vzsMfMogQ@mail.gmail.com> "Re: Make COPY format extendable: Extract COPY TO format implementations" on Mon, 13 Oct 2025 14:40:31 -0700, Masahiko Sawada <sawada.mshk@gmail.com> wrote: > The patch refactors the CopyToStateData so that we can both hide > internal-use-only fields from extensions and extension can use its own > state data, while not adding extra indirection layers. TBH I'm really > not sure we must fully hide internal fields from extensions. Other > extendable components seem not to strictly hide internal information > from extensions. I'd suggest starting with only the latter point. That > is, we merge fields in CopyToStateInternalData to CopyToStateData. > What do you think? OK. Let's follow the existing style. How about the attached patch? It merges CopyToStateInternalData to CopyToStateData. Thanks, -- kou From 325f56d4b4372f7b90b88c9c9068d253fcc9f39a Mon Sep 17 00:00:00 2001 From: Sutou Kouhei <kou@clear-code.com> Date: Tue, 14 Oct 2025 11:08:23 +0900 Subject: [PATCH] Split CopyToStateData to CopyToState{,Builtin}Data --- src/backend/commands/copyto.c | 170 ++++++++++++++++----------------- src/include/commands/copyapi.h | 66 +++++++++++++ 2 files changed, 148 insertions(+), 88 deletions(-) diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index e5781155cdf..176d98f866b 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -21,7 +21,6 @@ #include "access/tableam.h" #include "commands/copyapi.h" #include "commands/progress.h" -#include "executor/execdesc.h" #include "executor/executor.h" #include "executor/tuptable.h" #include "libpq/libpq.h" @@ -37,19 +36,7 @@ #include "utils/snapmgr.h" /* - * Represents the different dest cases we need to worry about at - * the bottom level - */ -typedef enum CopyDest -{ - COPY_FILE, /* to file (or a piped program) */ - COPY_FRONTEND, /* to frontend */ - COPY_CALLBACK, /* to callback function */ -} CopyDest; - -/* - * This struct contains all the state variables used throughout a COPY TO - * operation. + * This struct contains the state variables used by built-in format routines. * * Multi-byte encodings: all supported client-side encodings encode multi-byte * characters by having the first byte's high bit set. Subsequent bytes of the @@ -62,40 +49,16 @@ typedef enum CopyDest * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is true * when we have to do it the hard way. */ -typedef struct CopyToStateData +typedef struct CopyToStateBuiltinData { - /* format-specific routines */ - const CopyToRoutine *routine; + CopyToStateData parent; /* low-level state data */ - CopyDest copy_dest; /* type of copy source/destination */ - FILE *copy_file; /* used if copy_dest == COPY_FILE */ - StringInfo fe_msgbuf; /* used for all dests during COPY TO */ - int file_encoding; /* file or remote side's character encoding */ bool need_transcoding; /* file encoding diff from server? */ bool encoding_embeds_ascii; /* ASCII can be non-first byte? */ - - /* parameters from the COPY command */ - Relation rel; /* relation to copy to */ - QueryDesc *queryDesc; /* executable query to copy from */ - List *attnumlist; /* integer list of attnums to copy */ - char *filename; /* filename, or NULL for STDOUT */ - bool is_program; /* is 'filename' a program to popen? */ - copy_data_dest_cb data_dest_cb; /* function for writing data */ - - CopyFormatOptions opts; - Node *whereClause; /* WHERE condition (or NULL) */ - - /* - * Working state - */ - MemoryContext copycontext; /* per-copy execution context */ - - FmgrInfo *out_functions; /* lookup info for output functions */ - MemoryContext rowcontext; /* per-row evaluation context */ - uint64 bytes_processed; /* number of bytes processed so far */ -} CopyToStateData; +} CopyToStateBuiltinData; +typedef struct CopyToStateBuiltinData *CopyToStateBuiltin; /* DestReceiver for COPY (query) TO */ typedef struct @@ -118,6 +81,7 @@ static void CopyAttributeOutCSV(CopyToState cstate, const char *string, bool use_quote); /* built-in format-specific routines */ +static size_t CopyToBuiltinGetStateSize(void); static void CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc); static void CopyToTextLikeOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo); static void CopyToTextOneRow(CopyToState cstate, TupleTableSlot *slot); @@ -150,6 +114,7 @@ static void CopySendInt16(CopyToState cstate, int16 val); /* text format */ static const CopyToRoutine CopyToRoutineText = { + .CopyToGetStateSize = CopyToBuiltinGetStateSize, .CopyToStart = CopyToTextLikeStart, .CopyToOutFunc = CopyToTextLikeOutFunc, .CopyToOneRow = CopyToTextOneRow, @@ -158,6 +123,7 @@ static const CopyToRoutine CopyToRoutineText = { /* CSV format */ static const CopyToRoutine CopyToRoutineCSV = { + .CopyToGetStateSize = CopyToBuiltinGetStateSize, .CopyToStart = CopyToTextLikeStart, .CopyToOutFunc = CopyToTextLikeOutFunc, .CopyToOneRow = CopyToCSVOneRow, @@ -166,6 +132,7 @@ static const CopyToRoutine CopyToRoutineCSV = { /* binary format */ static const CopyToRoutine CopyToRoutineBinary = { + .CopyToGetStateSize = CopyToBuiltinGetStateSize, .CopyToStart = CopyToBinaryStart, .CopyToOutFunc = CopyToBinaryOutFunc, .CopyToOneRow = CopyToBinaryOneRow, @@ -185,18 +152,27 @@ CopyToGetRoutine(const CopyFormatOptions *opts) return &CopyToRoutineText; } +/* Implementation of the allocate callback for all built-in formats */ +static size_t +CopyToBuiltinGetStateSize(void) +{ + return sizeof(CopyToStateBuiltinData); +} + /* Implementation of the start callback for text and CSV formats */ static void CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc) { + CopyToStateBuiltin cstate_builtin = (CopyToStateBuiltin) cstate; + /* * For non-binary copy, we need to convert null_print to file encoding, * because it will be sent directly with CopySendString. */ - if (cstate->need_transcoding) + if (cstate_builtin->need_transcoding) cstate->opts.null_print_client = pg_server_to_any(cstate->opts.null_print, cstate->opts.null_print_len, - cstate->file_encoding); + cstate_builtin->file_encoding); /* if a header has been requested send the line */ if (cstate->opts.header_line == COPY_HEADER_TRUE) @@ -401,7 +377,7 @@ SendCopyBegin(CopyToState cstate) for (i = 0; i < natts; i++) pq_sendint16(&buf, format); /* per-column formats */ pq_endmessage(&buf); - cstate->copy_dest = COPY_FRONTEND; + cstate->copy_dest = COPY_DEST_FRONTEND; } static void @@ -448,7 +424,7 @@ CopySendEndOfRow(CopyToState cstate) switch (cstate->copy_dest) { - case COPY_FILE: + case COPY_DEST_FILE: if (fwrite(fe_msgbuf->data, fe_msgbuf->len, 1, cstate->copy_file) != 1 || ferror(cstate->copy_file)) @@ -482,11 +458,11 @@ CopySendEndOfRow(CopyToState cstate) errmsg("could not write to COPY file: %m"))); } break; - case COPY_FRONTEND: + case COPY_DEST_FRONTEND: /* Dump the accumulated row as one CopyData message */ (void) pq_putmessage(PqMsg_CopyData, fe_msgbuf->data, fe_msgbuf->len); break; - case COPY_CALLBACK: + case COPY_DEST_CALLBACK: cstate->data_dest_cb(fe_msgbuf->data, fe_msgbuf->len); break; } @@ -507,7 +483,7 @@ CopySendTextLikeEndOfRow(CopyToState cstate) { switch (cstate->copy_dest) { - case COPY_FILE: + case COPY_DEST_FILE: /* Default line termination depends on platform */ #ifndef WIN32 CopySendChar(cstate, '\n'); @@ -515,7 +491,7 @@ CopySendTextLikeEndOfRow(CopyToState cstate) CopySendString(cstate, "\r\n"); #endif break; - case COPY_FRONTEND: + case COPY_DEST_FRONTEND: /* The FE/BE protocol uses \n as newline for all platforms */ CopySendChar(cstate, '\n'); break; @@ -631,10 +607,14 @@ BeginCopyTo(ParseState *pstate, List *options) { CopyToState cstate; + CopyToStateBuiltin cstate_builtin; bool pipe = (filename == NULL && data_dest_cb == NULL); TupleDesc tupDesc; int num_phys_attrs; + MemoryContext copycontext; MemoryContext oldcontext; + CopyFormatOptions opts = {0}; + const CopyToRoutine *routine; const int progress_cols[] = { PROGRESS_COPY_COMMAND, PROGRESS_COPY_TYPE @@ -686,24 +666,33 @@ BeginCopyTo(ParseState *pstate, } - /* Allocate workspace and zero all fields */ - cstate = (CopyToStateData *) palloc0(sizeof(CopyToStateData)); - /* * We allocate everything used by a cstate in a new memory context. This * avoids memory leaks during repeated use of COPY in a query. */ - cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext, - "COPY", - ALLOCSET_DEFAULT_SIZES); + copycontext = AllocSetContextCreate(CurrentMemoryContext, + "COPY", + ALLOCSET_DEFAULT_SIZES); - oldcontext = MemoryContextSwitchTo(cstate->copycontext); + oldcontext = MemoryContextSwitchTo(copycontext); /* Extract options from the statement node tree */ - ProcessCopyOptions(pstate, &cstate->opts, false /* is_from */ , options); + ProcessCopyOptions(pstate, &opts, false /* is_from */ , options); - /* Set format routine */ - cstate->routine = CopyToGetRoutine(&cstate->opts); + /* Get format routine */ + routine = CopyToGetRoutine(&opts); + + /* Allocate workspace and set known values */ + MemoryContextSwitchTo(oldcontext); + cstate = (CopyToState) palloc0(routine->CopyToGetStateSize()); + MemoryContextSwitchTo(copycontext); + if (routine == &CopyToRoutineText || routine == &CopyToRoutineCSV || routine == &CopyToRoutineBinary) + cstate_builtin = (CopyToStateBuiltin) cstate; + else + cstate_builtin = NULL; + cstate->copycontext = copycontext; + cstate->opts = opts; + cstate->routine = routine; /* Process the source/target relation or query */ if (rel) @@ -883,31 +872,34 @@ BeginCopyTo(ParseState *pstate, } } - /* Use client encoding when ENCODING option is not specified. */ - if (cstate->opts.file_encoding < 0) - cstate->file_encoding = pg_get_client_encoding(); - else - cstate->file_encoding = cstate->opts.file_encoding; + if (cstate_builtin) + { + /* Use client encoding when ENCODING option is not specified. */ + if (cstate->opts.file_encoding < 0) + cstate_builtin->file_encoding = pg_get_client_encoding(); + else + cstate_builtin->file_encoding = cstate->opts.file_encoding; - /* - * Set up encoding conversion info if the file and server encodings differ - * (see also pg_server_to_any). - */ - if (cstate->file_encoding == GetDatabaseEncoding() || - cstate->file_encoding == PG_SQL_ASCII) - cstate->need_transcoding = false; - else - cstate->need_transcoding = true; + /* + * Set up encoding conversion info if the file and server encodings + * differ (see also pg_server_to_any). + */ + if (cstate_builtin->file_encoding == GetDatabaseEncoding() || + cstate_builtin->file_encoding == PG_SQL_ASCII) + cstate_builtin->need_transcoding = false; + else + cstate_builtin->need_transcoding = true; - /* See Multibyte encoding comment above */ - cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->file_encoding); + /* See Multibyte encoding comment above */ + cstate_builtin->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate_builtin->file_encoding); + } - cstate->copy_dest = COPY_FILE; /* default */ + cstate->copy_dest = COPY_DEST_FILE; /* default */ if (data_dest_cb) { progress_vals[1] = PROGRESS_COPY_TYPE_CALLBACK; - cstate->copy_dest = COPY_CALLBACK; + cstate->copy_dest = COPY_DEST_CALLBACK; cstate->data_dest_cb = data_dest_cb; } else if (pipe) @@ -1146,13 +1138,14 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot) static void CopyAttributeOutText(CopyToState cstate, const char *string) { + CopyToStateBuiltin cstate_builtin = (CopyToStateBuiltin) cstate; const char *ptr; const char *start; char c; char delimc = cstate->opts.delim[0]; - if (cstate->need_transcoding) - ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding); + if (cstate_builtin->need_transcoding) + ptr = pg_server_to_any(string, strlen(string), cstate_builtin->file_encoding); else ptr = string; @@ -1170,7 +1163,7 @@ CopyAttributeOutText(CopyToState cstate, const char *string) * it's worth making two copies of it to get the IS_HIGHBIT_SET() test out * of the normal safe-encoding path. */ - if (cstate->encoding_embeds_ascii) + if (cstate_builtin->encoding_embeds_ascii) { start = ptr; while ((c = *ptr) != '\0') @@ -1225,7 +1218,7 @@ CopyAttributeOutText(CopyToState cstate, const char *string) start = ptr++; /* we include char in next run */ } else if (IS_HIGHBIT_SET(c)) - ptr += pg_encoding_mblen(cstate->file_encoding, ptr); + ptr += pg_encoding_mblen(cstate_builtin->file_encoding, ptr); else ptr++; } @@ -1300,6 +1293,7 @@ static void CopyAttributeOutCSV(CopyToState cstate, const char *string, bool use_quote) { + CopyToStateBuiltin cstate_builtin = (CopyToStateBuiltin) cstate; const char *ptr; const char *start; char c; @@ -1312,8 +1306,8 @@ CopyAttributeOutCSV(CopyToState cstate, const char *string, if (!use_quote && strcmp(string, cstate->opts.null_print) == 0) use_quote = true; - if (cstate->need_transcoding) - ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding); + if (cstate_builtin->need_transcoding) + ptr = pg_server_to_any(string, strlen(string), cstate_builtin->file_encoding); else ptr = string; @@ -1342,8 +1336,8 @@ CopyAttributeOutCSV(CopyToState cstate, const char *string, use_quote = true; break; } - if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii) - tptr += pg_encoding_mblen(cstate->file_encoding, tptr); + if (IS_HIGHBIT_SET(c) && cstate_builtin->encoding_embeds_ascii) + tptr += pg_encoding_mblen(cstate_builtin->file_encoding, tptr); else tptr++; } @@ -1366,8 +1360,8 @@ CopyAttributeOutCSV(CopyToState cstate, const char *string, CopySendChar(cstate, escapec); start = ptr; /* we include char in next run */ } - if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii) - ptr += pg_encoding_mblen(cstate->file_encoding, ptr); + if (IS_HIGHBIT_SET(c) && cstate_builtin->encoding_embeds_ascii) + ptr += pg_encoding_mblen(cstate_builtin->file_encoding, ptr); else ptr++; } diff --git a/src/include/commands/copyapi.h b/src/include/commands/copyapi.h index 2a2d2f9876b..aece73f4ca2 100644 --- a/src/include/commands/copyapi.h +++ b/src/include/commands/copyapi.h @@ -15,6 +15,7 @@ #define COPYAPI_H #include "commands/copy.h" +#include "executor/execdesc.h" /* * API structure for a COPY TO format implementation. Note this must be @@ -22,6 +23,25 @@ */ typedef struct CopyToRoutine { + /* --- + * Return state size for this routine. + * + * If this routine uses CopyToStateData as-is, `return + * sizeof(CopyToStateData)` can be used. + * + * If this routine needs additional data than CopyToStateData, a new + * struct based on CopyToStateData can be used something like: + * + * typedef struct MyCopyToStateDate { + * struct CopyToStateData parent; + * int define_additional_members_here; + * } MyCopyToStateData; + * + * In the case, this callback returns `sizeof(MyCopyToStateData)`. + * --- + */ + size_t (*CopyToGetStateSize) (void); + /* * Set output function information. This callback is called once at the * beginning of COPY TO. @@ -54,6 +74,52 @@ typedef struct CopyToRoutine void (*CopyToEnd) (CopyToState cstate); } CopyToRoutine; +/* + * Represents the different dest cases we need to worry about at + * the bottom level + */ +typedef enum CopyDest +{ + COPY_DEST_FILE, /* to file (or a piped program) */ + COPY_DEST_FRONTEND, /* to frontend */ + COPY_DEST_CALLBACK, /* to callback function */ +} CopyDest; + +/* + * This struct contains the state variables used by PostgreSQL, built-in + * format routines and custom format routines. + */ +typedef struct CopyToStateData +{ + /* format-specific routines */ + const CopyToRoutine *routine; + + /* low-level state data */ + CopyDest copy_dest; /* type of copy source/destination */ + FILE *copy_file; /* used if copy_dest == COPY_FILE */ + StringInfo fe_msgbuf; /* used for all dests during COPY TO */ + + /* parameters from the COPY command */ + Relation rel; /* relation to copy to */ + QueryDesc *queryDesc; /* executable query to copy from */ + List *attnumlist; /* integer list of attnums to copy */ + char *filename; /* filename, or NULL for STDOUT */ + bool is_program; /* is 'filename' a program to popen? */ + copy_data_dest_cb data_dest_cb; /* function for writing data */ + + CopyFormatOptions opts; + Node *whereClause; /* WHERE condition (or NULL) */ + + /* + * Working state + */ + MemoryContext copycontext; /* per-copy execution context */ + + FmgrInfo *out_functions; /* lookup info for output functions */ + MemoryContext rowcontext; /* per-row evaluation context */ + uint64 bytes_processed; /* number of bytes processed so far */ +} CopyToStateData; + /* * API structure for a COPY FROM format implementation. Note this must be * allocated in a server-lifetime manner, typically as a static const struct. -- 2.51.0
pgsql-hackers by date: