From e6bafe5eab9463b253e9697e0fbd82246c316a12 Mon Sep 17 00:00:00 2001 From: jian he Date: Sun, 10 Aug 2025 23:13:38 +0800 Subject: [PATCH v18 2/3] json format for COPY TO JSON format is only supported with the COPY TO operation. It is incompatible with options such as HEADER, DEFAULT, NULL, DELIMITER, and several others. This has been thoroughly tested in src/test/regress/sql/copy.sql The CopyFormat enum was originally contributed by Joel Jacobson joel@compiler.org, later refactored by Jian He to address various issues, and further adapted by Junwang Zhao to support the newly introduced CopyToRoutine struct (commit 2e4127b6d2). Author: Joe Conway Reviewed-by: "Andrey M. Borodin" , Reviewed-by: Dean Rasheed , Reviewed-by: Daniel Verite , Reviewed-by: Andrew Dunstan , Reviewed-by: Davin Shearer , Reviewed-by: Masahiko Sawada , Reviewed-by: Alvaro Herrera discussion: https://postgr.es/m/CALvfUkBxTYy5uWPFVwpk_7ii2zgT07t3d-yR_cy4sfrrLU%3Dkcg%40mail.gmail.com discussion: https://postgr.es/m/6a04628d-0d53-41d9-9e35-5a8dc302c34c@joeconway.com --- doc/src/sgml/ref/copy.sgml | 13 +++-- src/backend/commands/copy.c | 72 +++++++++++++++++++++------- src/backend/commands/copyto.c | 76 ++++++++++++++++++++++++++---- src/backend/parser/gram.y | 8 ++++ src/backend/utils/adt/json.c | 5 +- src/bin/psql/tab-complete.in.c | 4 +- src/include/commands/copy.h | 1 + src/include/utils/json.h | 2 + src/test/regress/expected/copy.out | 76 ++++++++++++++++++++++++++++++ src/test/regress/sql/copy.sql | 47 ++++++++++++++++++ src/tools/pgindent/typedefs.list | 1 + 11 files changed, 271 insertions(+), 34 deletions(-) diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index c2d1fbc1fbe..219604ad306 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -228,10 +228,15 @@ COPY { table_name [ ( text, csv (Comma Separated Values), + json (JavaScript Object Notation), or binary. The default is text. See below for details. + + The json option is allowed only in + COPY TO. + @@ -266,7 +271,7 @@ COPY { table_name [ ( CSV format. This must be a single one-byte character. - This option is not allowed when using binary format. + This option is not allowed when using binary or json format. @@ -280,7 +285,7 @@ COPY { table_name [ ( CSV format. You might prefer an empty string even in text format for cases where you don't want to distinguish nulls from empty strings. - This option is not allowed when using binary format. + This option is not allowed when using binary or json format. @@ -303,7 +308,7 @@ COPY { table_name [ ( COPY FROM, and only when - not using binary format. + not using binary or json format. @@ -330,7 +335,7 @@ COPY { table_name [ ( COPY FROM commands. - This option is not allowed when using binary format. + This option is not allowed when using binary or json format. diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 68f69cfb9df..7fd41dba250 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -542,6 +542,8 @@ ProcessCopyOptions(ParseState *pstate, opts_out->format = COPY_FORMAT_CSV; else if (strcmp(fmt, "binary") == 0) opts_out->format = COPY_FORMAT_BINARY; + else if (strcmp(fmt, "json") == 0) + opts_out->format = COPY_FORMAT_JSON; else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), @@ -701,21 +703,42 @@ ProcessCopyOptions(ParseState *pstate, * Check for incompatible options (must do these three before inserting * defaults) */ - if (opts_out->format == COPY_FORMAT_BINARY && opts_out->delim) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ - errmsg("cannot specify %s in BINARY mode", "DELIMITER"))); + if (opts_out->delim) + { + if (opts_out->format == COPY_FORMAT_BINARY) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ + errmsg("cannot specify %s in BINARY mode", "DELIMITER")); + else if (opts_out->format == COPY_FORMAT_JSON) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot specify %s in JSON mode", "DELIMITER")); + } - if (opts_out->format == COPY_FORMAT_BINARY && opts_out->null_print) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("cannot specify %s in BINARY mode", "NULL"))); + if (opts_out->null_print) + { + if (opts_out->format == COPY_FORMAT_BINARY) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot specify %s in BINARY mode", "NULL")); + else if (opts_out->format == COPY_FORMAT_JSON) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot specify %s in JSON mode", "NULL")); + } - if (opts_out->format == COPY_FORMAT_BINARY && opts_out->default_print) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("cannot specify %s in BINARY mode", "DEFAULT"))); + if (opts_out->default_print) + { + if (opts_out->format == COPY_FORMAT_BINARY) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot specify %s in BINARY mode", "DEFAULT")); + else if (opts_out->format == COPY_FORMAT_JSON) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot specify %s in JSON mode", "DEFAULT")); + } /* Set defaults for omitted options */ if (!opts_out->delim) @@ -781,11 +804,18 @@ ProcessCopyOptions(ParseState *pstate, errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim))); /* Check header */ - if (opts_out->format == COPY_FORMAT_BINARY && opts_out->header_line != COPY_HEADER_FALSE) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ - errmsg("cannot specify %s in BINARY mode", "HEADER"))); + if (opts_out->header_line != COPY_HEADER_FALSE) + { + if (opts_out->format == COPY_FORMAT_BINARY) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ + errmsg("cannot specify %s in BINARY mode", "HEADER")); + else if(opts_out->format == COPY_FORMAT_JSON) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot specify %s in JSON mode", "HEADER")); + } /* Check quote */ if (opts_out->format != COPY_FORMAT_CSV && opts_out->quote != NULL) @@ -889,6 +919,12 @@ ProcessCopyOptions(ParseState *pstate, errmsg("COPY %s cannot be used with %s", "FREEZE", "COPY TO"))); + /* Check json format */ + if (opts_out->format == COPY_FORMAT_JSON && is_from) + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("COPY %s mode cannot be used with %s", "json", "COPY FROM")); + if (opts_out->default_print) { if (!is_from) diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index e990343bab0..13eb14debbd 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -24,6 +24,7 @@ #include "executor/execdesc.h" #include "executor/executor.h" #include "executor/tuptable.h" +#include "funcapi.h" #include "libpq/libpq.h" #include "libpq/pqformat.h" #include "mb/pg_wchar.h" @@ -31,6 +32,7 @@ #include "pgstat.h" #include "storage/fd.h" #include "tcop/tcopprot.h" +#include "utils/json.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/rel.h" @@ -125,6 +127,7 @@ static void CopyToCSVOneRow(CopyToState cstate, TupleTableSlot *slot); static void CopyToTextLikeOneRow(CopyToState cstate, TupleTableSlot *slot, bool is_csv); static void CopyToTextLikeEnd(CopyToState cstate); +static void CopyToJsonOneRow(CopyToState cstate, TupleTableSlot *slot); static void CopyToBinaryStart(CopyToState cstate, TupleDesc tupDesc); static void CopyToBinaryOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo); static void CopyToBinaryOneRow(CopyToState cstate, TupleTableSlot *slot); @@ -144,7 +147,7 @@ static void CopySendInt16(CopyToState cstate, int16 val); /* * COPY TO routines for built-in formats. * - * CSV and text formats share the same TextLike routines except for the + * CSV and text, json formats share the same TextLike routines except for the * one-row callback. */ @@ -164,6 +167,14 @@ static const CopyToRoutine CopyToRoutineCSV = { .CopyToEnd = CopyToTextLikeEnd, }; +/* json format */ +static const CopyToRoutine CopyToRoutineJson = { + .CopyToStart = CopyToTextLikeStart, + .CopyToOutFunc = CopyToTextLikeOutFunc, + .CopyToOneRow = CopyToJsonOneRow, + .CopyToEnd = CopyToTextLikeEnd, +}; + /* binary format */ static const CopyToRoutine CopyToRoutineBinary = { .CopyToStart = CopyToBinaryStart, @@ -180,12 +191,14 @@ CopyToGetRoutine(const CopyFormatOptions *opts) return &CopyToRoutineCSV; else if (opts->format == COPY_FORMAT_BINARY) return &CopyToRoutineBinary; + else if (opts->format == COPY_FORMAT_JSON) + return &CopyToRoutineJson; /* default is text */ return &CopyToRoutineText; } -/* Implementation of the start callback for text and CSV formats */ +/* Implementation of the start callback for text, CSV, and json formats */ static void CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc) { @@ -204,6 +217,8 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc) ListCell *cur; bool hdr_delim = false; + Assert(cstate->opts.format != COPY_FORMAT_JSON); + foreach(cur, cstate->attnumlist) { int attnum = lfirst_int(cur); @@ -226,7 +241,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc) } /* - * Implementation of the outfunc callback for text and CSV formats. Assign + * Implementation of the outfunc callback for text, CSV, and json formats. Assign * the output function data to the given *finfo. */ static void @@ -299,13 +314,46 @@ CopyToTextLikeOneRow(CopyToState cstate, CopySendTextLikeEndOfRow(cstate); } -/* Implementation of the end callback for text and CSV formats */ +/* Implementation of the end callback for text, CSV, and json formats */ static void CopyToTextLikeEnd(CopyToState cstate) { /* Nothing to do here */ } +/* Implementation of per-row callback for json format */ +static void +CopyToJsonOneRow(CopyToState cstate, TupleTableSlot *slot) +{ + Datum rowdata; + StringInfo result; + + /* + * If COPY TO source data come from query rather than plain table, we need + * copy CopyToState->QueryDesc->TupleDesc to slot->tts_tupleDescriptor. + * This is necessary because the slot's TupleDesc may change during query + * execution, and we depend on it when calling composite_to_json. + */ + if (!cstate->rel) + { + memcpy(TupleDescAttr(slot->tts_tupleDescriptor, 0), + TupleDescAttr(cstate->queryDesc->tupDesc, 0), + cstate->queryDesc->tupDesc->natts * sizeof(FormData_pg_attribute)); + + for (int i = 0; i < cstate->queryDesc->tupDesc->natts; i++) + populate_compact_attribute(slot->tts_tupleDescriptor, i); + + BlessTupleDesc(slot->tts_tupleDescriptor); + } + rowdata = ExecFetchSlotHeapTupleDatum(slot); + result = makeStringInfo(); + composite_to_json(rowdata, result, false); + + CopySendData(cstate, result->data, result->len); + + CopySendTextLikeEndOfRow(cstate); +} + /* * Implementation of the start callback for binary format. Send a header * for a binary copy. @@ -397,9 +445,21 @@ SendCopyBegin(CopyToState cstate) pq_beginmessage(&buf, PqMsg_CopyOutResponse); pq_sendbyte(&buf, format); /* overall format */ - pq_sendint16(&buf, natts); - for (i = 0; i < natts; i++) - pq_sendint16(&buf, format); /* per-column formats */ + if (cstate->opts.format != COPY_FORMAT_JSON) + { + pq_sendint16(&buf, natts); + for (i = 0; i < natts; i++) + pq_sendint16(&buf, format); /* per-column formats */ + } + else + { + /* + * JSON format is always one non-binary column + */ + pq_sendint16(&buf, 1); + pq_sendint16(&buf, 0); + } + pq_endmessage(&buf); cstate->copy_dest = COPY_FRONTEND; } @@ -499,7 +559,7 @@ CopySendEndOfRow(CopyToState cstate) } /* - * Wrapper function of CopySendEndOfRow for text and CSV formats. Sends the + * Wrapper function of CopySendEndOfRow for text, CSV, and json formats. Sends the * line termination and do common appropriate things for the end of row. */ static inline void diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index db43034b9db..48e2242327e 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -3528,6 +3528,10 @@ copy_opt_item: { $$ = makeDefElem("format", (Node *) makeString("csv"), @1); } + | JSON + { + $$ = makeDefElem("format", (Node *) makeString("json"), @1); + } | HEADER_P { $$ = makeDefElem("header", (Node *) makeBoolean(true), @1); @@ -3610,6 +3614,10 @@ copy_generic_opt_elem: { $$ = makeDefElem($1, $2, @1); } + | FORMAT_LA copy_generic_opt_arg + { + $$ = makeDefElem("format", $2, @1); + } ; copy_generic_opt_arg: diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c index e9d370cb3da..e517470bbc7 100644 --- a/src/backend/utils/adt/json.c +++ b/src/backend/utils/adt/json.c @@ -85,8 +85,6 @@ typedef struct JsonAggState JsonUniqueBuilderState unique_check; } JsonAggState; -static void composite_to_json(Datum composite, StringInfo result, - bool use_line_feeds); static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals, bool *nulls, int *valcount, JsonTypeCategory tcategory, Oid outfuncoid, @@ -516,8 +514,9 @@ array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds) /* * Turn a composite / record into JSON. + * Exported so COPY TO can use it. */ -static void +void composite_to_json(Datum composite, StringInfo result, bool use_line_feeds) { HeapTupleHeader td; diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c index 1f2ca946fc5..16db5373c5f 100644 --- a/src/bin/psql/tab-complete.in.c +++ b/src/bin/psql/tab-complete.in.c @@ -3344,8 +3344,10 @@ match_previous_words(int pattern_id, COMPLETE_WITH(Copy_to_options); /* Complete COPY FROM|TO filename WITH (FORMAT */ - else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "FORMAT")) + else if (Matches("COPY|\\copy", MatchAny, "FROM", MatchAny, "WITH", "(", "FORMAT")) COMPLETE_WITH("binary", "csv", "text"); + else if (Matches("COPY|\\copy", MatchAny, "TO", MatchAny, "WITH", "(", "FORMAT")) + COMPLETE_WITH("binary", "csv", "text", "json"); /* Complete COPY FROM filename WITH (ON_ERROR */ else if (Matches("COPY|\\copy", MatchAny, "FROM", MatchAny, "WITH", "(", "ON_ERROR")) diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index 686653233b2..85aedc267d6 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -56,6 +56,7 @@ typedef enum CopyFormat COPY_FORMAT_TEXT = 0, COPY_FORMAT_BINARY, COPY_FORMAT_CSV, + COPY_FORMAT_JSON, } CopyFormat; /* diff --git a/src/include/utils/json.h b/src/include/utils/json.h index 49bbda7ac06..1fa8e2ce8e2 100644 --- a/src/include/utils/json.h +++ b/src/include/utils/json.h @@ -17,6 +17,8 @@ #include "lib/stringinfo.h" /* functions in json.c */ +extern void composite_to_json(Datum composite, StringInfo result, + bool use_line_feeds); extern void escape_json(StringInfo buf, const char *str); extern void escape_json_with_len(StringInfo buf, const char *str, int len); extern void escape_json_text(StringInfo buf, const text *txt); diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out index ac66eb55aee..0fc6e84352c 100644 --- a/src/test/regress/expected/copy.out +++ b/src/test/regress/expected/copy.out @@ -73,6 +73,82 @@ copy copytest3 to stdout csv header; c1,"col with , comma","col with "" quote" 1,a,1 2,b,2 +--- test copying in JSON mode with various styles +copy copytest to stdout json; +{"style":"DOS","test":"abc\r\ndef","filler":1} +{"style":"Unix","test":"abc\ndef","filler":2} +{"style":"Mac","test":"abc\rdef","filler":3} +{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4} +copy copytest to stdout (format json); +{"style":"DOS","test":"abc\r\ndef","filler":1} +{"style":"Unix","test":"abc\ndef","filler":2} +{"style":"Mac","test":"abc\rdef","filler":3} +{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4} +-- all of the following should yield error +copy copytest to stdout (format json, delimiter '|'); +ERROR: cannot specify DELIMITER in JSON mode +copy copytest to stdout (format json, null '\N'); +ERROR: cannot specify NULL in JSON mode +copy copytest to stdout (format json, default '|'); +ERROR: cannot specify DEFAULT in JSON mode +copy copytest to stdout (format json, header); +ERROR: cannot specify HEADER in JSON mode +copy copytest to stdout (format json, header 1); +ERROR: cannot specify HEADER in JSON mode +copy copytest to stdout (format json, quote '"'); +ERROR: COPY QUOTE requires CSV mode +copy copytest to stdout (format json, escape '"'); +ERROR: COPY ESCAPE requires CSV mode +copy copytest to stdout (format json, force_quote *); +ERROR: COPY FORCE_QUOTE requires CSV mode +copy copytest to stdout (format json, force_not_null *); +ERROR: COPY FORCE_NOT_NULL requires CSV mode +copy copytest to stdout (format json, force_null *); +ERROR: COPY FORCE_NULL requires CSV mode +copy copytest to stdout (format json, on_error ignore); +ERROR: COPY ON_ERROR cannot be used with COPY TO +LINE 1: copy copytest to stdout (format json, on_error ignore); + ^ +copy copytest from stdin(format json); +ERROR: COPY json mode cannot be used with COPY FROM +-- all of the above should yield error +-- embedded escaped characters +create temp table copyjsontest ( + id bigserial, + f1 text, + f2 timestamptz); +insert into copyjsontest + select g.i, + CASE WHEN g.i % 2 = 0 THEN + 'line with '' in it: ' || g.i::text + ELSE + 'line with " in it: ' || g.i::text + END, + 'Mon Feb 10 17:32:01 1997 PST' + from generate_series(1,5) as g(i); +insert into copyjsontest (f1) values +(E'aaa\"bbb'::text), +(E'aaa\\bbb'::text), +(E'aaa\/bbb'::text), +(E'aaa\bbbb'::text), +(E'aaa\fbbb'::text), +(E'aaa\nbbb'::text), +(E'aaa\rbbb'::text), +(E'aaa\tbbb'::text); +copy copyjsontest to stdout json; +{"id":1,"f1":"line with \" in it: 1","f2":"1997-02-10T17:32:01-08:00"} +{"id":2,"f1":"line with ' in it: 2","f2":"1997-02-10T17:32:01-08:00"} +{"id":3,"f1":"line with \" in it: 3","f2":"1997-02-10T17:32:01-08:00"} +{"id":4,"f1":"line with ' in it: 4","f2":"1997-02-10T17:32:01-08:00"} +{"id":5,"f1":"line with \" in it: 5","f2":"1997-02-10T17:32:01-08:00"} +{"id":1,"f1":"aaa\"bbb","f2":null} +{"id":2,"f1":"aaa\\bbb","f2":null} +{"id":3,"f1":"aaa/bbb","f2":null} +{"id":4,"f1":"aaa\bbbb","f2":null} +{"id":5,"f1":"aaa\fbbb","f2":null} +{"id":6,"f1":"aaa\nbbb","f2":null} +{"id":7,"f1":"aaa\rbbb","f2":null} +{"id":8,"f1":"aaa\tbbb","f2":null} create temp table copytest4 ( c1 int, "colname with tab: " text); diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql index a1316c73bac..071986d427a 100644 --- a/src/test/regress/sql/copy.sql +++ b/src/test/regress/sql/copy.sql @@ -82,6 +82,53 @@ this is just a line full of junk that would error out if parsed copy copytest3 to stdout csv header; +--- test copying in JSON mode with various styles +copy copytest to stdout json; +copy copytest to stdout (format json); + +-- all of the following should yield error +copy copytest to stdout (format json, delimiter '|'); +copy copytest to stdout (format json, null '\N'); +copy copytest to stdout (format json, default '|'); +copy copytest to stdout (format json, header); +copy copytest to stdout (format json, header 1); +copy copytest to stdout (format json, quote '"'); +copy copytest to stdout (format json, escape '"'); +copy copytest to stdout (format json, force_quote *); +copy copytest to stdout (format json, force_not_null *); +copy copytest to stdout (format json, force_null *); +copy copytest to stdout (format json, on_error ignore); +copy copytest from stdin(format json); +-- all of the above should yield error + +-- embedded escaped characters +create temp table copyjsontest ( + id bigserial, + f1 text, + f2 timestamptz); + +insert into copyjsontest + select g.i, + CASE WHEN g.i % 2 = 0 THEN + 'line with '' in it: ' || g.i::text + ELSE + 'line with " in it: ' || g.i::text + END, + 'Mon Feb 10 17:32:01 1997 PST' + from generate_series(1,5) as g(i); + +insert into copyjsontest (f1) values +(E'aaa\"bbb'::text), +(E'aaa\\bbb'::text), +(E'aaa\/bbb'::text), +(E'aaa\bbbb'::text), +(E'aaa\fbbb'::text), +(E'aaa\nbbb'::text), +(E'aaa\rbbb'::text), +(E'aaa\tbbb'::text); + +copy copyjsontest to stdout json; + create temp table copytest4 ( c1 int, "colname with tab: " text); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index e6f2e93b2d6..374b40d14de 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -515,6 +515,7 @@ ConversionLocation ConvertRowtypeExpr CookedConstraint CopyDest +CopyFormat CopyFormatOptions CopyFromRoutine CopyFromState -- 2.34.1