From 365f340a0b733a3d9b5fdf540a2623c3ea9d4d8d Mon Sep 17 00:00:00 2001 From: jian he Date: Sun, 8 Mar 2026 23:58:29 +0800 Subject: [PATCH v28 4/4] COPY TO JSON: support column lists When a column list is specified (e.g. COPY t (a, b) TO ... FORMAT json), build a projected TupleDesc containing only the selected columns and form a new tuple per row via heap_form_tuple(), so that composite_to_json() emits the correct column names and values. Use HeapTupleHeaderGetDatum() directly on the formed tuple rather than heap_copy_tuple_as_datum(), since heap_form_tuple() already stamps the datum-length, type-id, and type-mod fields on t_data, avoiding an unnecessary palloc+memcpy per row. Add regression tests covering column lists with diverse data types including json, jsonb, int[], numeric, boolean, timestamp, and text, exercising various column subsets and NULL handling. Author: Andrew Dunstan Reviewed-by: jian he discussion: https://postgr.es/m/CALvfUkBxTYy5uWPFVwpk_7ii2zgT07t3d-yR_cy4sfrrLU%3Dkcg%40mail.gmail.com discussion: https://postgr.es/m/6a04628d-0d53-41d9-9e35-5a8dc302c34c@joeconway.com --- src/backend/commands/copyto.c | 105 ++++++++++++++++++++++++----- src/test/regress/expected/copy.out | 73 +++++++++++++++++++- src/test/regress/sql/copy.sql | 40 ++++++++++- 3 files changed, 197 insertions(+), 21 deletions(-) diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index 38fbf7d4424..faa8e323f56 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -88,8 +88,13 @@ typedef struct CopyToStateData char *filename; /* filename, or NULL for STDOUT */ bool is_program; /* is 'filename' a program to popen? */ bool json_row_delim_needed; /* need delimiter before next row */ - StringInfo json_buf; /* reusable buffer for JSON output, it is - * initliazed in BeginCopyTo */ + StringInfo json_buf; /* reusable buffer for JSON output, + * initialized in BeginCopyTo */ + TupleDesc tupDesc; /* Descriptor for JSON output; for a column + * list this is a projected descriptor */ + Datum *json_projvalues; /* pre-allocated projection values, or + * NULL */ + bool *json_projnulls; /* pre-allocated projection nulls, or NULL */ copy_data_dest_cb data_dest_cb; /* function for writing data */ CopyFormatOptions opts; @@ -357,19 +362,53 @@ CopyToJsonOneRow(CopyToState cstate, TupleTableSlot *slot) { Datum rowdata; - /* - * composite_to_json() requires a stable TupleDesc. Since the slot's - * descriptor (slot->tts_tupleDescriptor) can change during the execution - * of a SELECT query, we use cstate->queryDesc->tupDesc instead. This - * precaution is only necessary when the output slot's TupleDesc is of - * type RECORDOID. - */ - if (!cstate->rel && slot->tts_tupleDescriptor->tdtypeid == RECORDOID) - slot->tts_tupleDescriptor = cstate->queryDesc->tupDesc; - resetStringInfo(cstate->json_buf); - rowdata = ExecFetchSlotHeapTupleDatum(slot); + if (cstate->json_projvalues != NULL) + { + /* + * Column list case: project selected column values into sequential + * positions matching the custom TupleDesc, then form a new tuple. + */ + HeapTuple tup; + int i = 0; + + foreach_int(attnum, cstate->attnumlist) + { + cstate->json_projvalues[i] = slot->tts_values[attnum - 1]; + cstate->json_projnulls[i] = slot->tts_isnull[attnum - 1]; + i++; + } + + tup = heap_form_tuple(cstate->tupDesc, + cstate->json_projvalues, + cstate->json_projnulls); + + /* + * heap_form_tuple already stamps the datum-length, type-id, and + * type-mod fields on t_data, so we can use it directly as a composite + * Datum without the extra pallocmemcpy that heap_copy_tuple_as_datum + * would do. Any TOAST pointers in the projected values will be + * detoasted by the per-column output functions called from + * composite_to_json. + */ + rowdata = HeapTupleGetDatum(tup); + } + else + { + /* + * Full table or query without column list. Ensure the slot uses + * cstate->tupDesc so that the datum is stamped with the right type; + * for queries output type is RECORDOID this must be the blessed + * descriptor so that composite_to_json can look it up via + * lookup_rowtype_tupdesc. + */ + if (!cstate->rel && slot->tts_tupleDescriptor->tdtypeid == RECORDOID) + slot->tts_tupleDescriptor = cstate->queryDesc->tupDesc; + + rowdata = ExecFetchSlotHeapTupleDatum(slot); + } + composite_to_json(rowdata, cstate->json_buf, false); if (cstate->opts.force_array) @@ -841,6 +880,7 @@ BeginCopyTo(ParseState *pstate, tupDesc = RelationGetDescr(cstate->rel); cstate->partitions = children; + cstate->tupDesc = tupDesc; } else { @@ -978,20 +1018,49 @@ BeginCopyTo(ParseState *pstate, tupDesc = cstate->queryDesc->tupDesc; tupDesc = BlessTupleDesc(tupDesc); + cstate->tupDesc = tupDesc; } /* Generate or convert list of attributes to process */ cstate->attnumlist = CopyGetAttnums(tupDesc, cstate->rel, attnamelist); - /* JSON outputs whole rows; a column list doesn't make sense */ + /* Set up JSON-specific state */ if (cstate->opts.format == COPY_FORMAT_JSON) { cstate->json_buf = makeStringInfo(); - if (attnamelist != NIL) - ereport(ERROR, - errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("column selection is not supported in JSON mode")); + if (attnamelist != NIL && rel) + { + int natts = list_length(cstate->attnumlist); + TupleDesc resultDesc; + + /* + * Build a TupleDesc describing only the selected columns so that + * composite_to_json() emits the right column names and types. + */ + resultDesc = CreateTemplateTupleDesc(natts); + + foreach_int(attnum, cstate->attnumlist) + { + Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1); + + TupleDescInitEntry(resultDesc, + foreach_current_index(attnum) + 1, + NameStr(attr->attname), + attr->atttypid, + attr->atttypmod, + attr->attndims); + } + + cstate->tupDesc = BlessTupleDesc(resultDesc); + + /* + * Pre-allocate arrays for projecting selected column values into + * sequential positions matching the custom TupleDesc. + */ + cstate->json_projvalues = palloc_array(Datum, natts); + cstate->json_projnulls = palloc_array(bool, natts); + } } num_phys_attrs = tupDesc->natts; diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out index e1d51335e33..e44b4a1d79d 100644 --- a/src/test/regress/expected/copy.out +++ b/src/test/regress/expected/copy.out @@ -77,6 +77,9 @@ c1,"col with , comma","col with "" quote" copy (select 1 union all select 2) to stdout with (format json); {"?column?":1} {"?column?":2} +copy (select 1 as foo union all select 2) to stdout with (format json); +{"foo":1} +{"foo":2} copy (values (1), (2)) TO stdout with (format json); {"column1":1} {"column1":2} @@ -134,8 +137,6 @@ copy copytest to stdout (format json, reject_limit 1); ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE copy copytest from stdin(format json); ERROR: COPY JSON mode cannot be used with COPY FROM -copy copytest (style) to stdout (format json); -ERROR: column selection is not supported in JSON mode -- all of the above should yield error -- should fail: force_array requires json format copy copytest to stdout (format csv, force_array true); @@ -160,6 +161,74 @@ copy copytest to stdout (format json, force_array false); {"style":"Unix","test":"abc\ndef","filler":2} {"style":"Mac","test":"abc\rdef","filler":3} {"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4} +-- column list with json format +copy copytest (style, filler) to stdout (format json); +{"style":"DOS","filler":1} +{"style":"Unix","filler":2} +{"style":"Mac","filler":3} +{"style":"esc\\ape","filler":4} +copy copytest (style, filler) to stdout (format json, force_array true); +[ + {"style":"DOS","filler":1} +,{"style":"Unix","filler":2} +,{"style":"Mac","filler":3} +,{"style":"esc\\ape","filler":4} +] +copy copytest (style, test, filler) to stdout (format json, force_array true); +[ + {"style":"DOS","test":"abc\r\ndef","filler":1} +,{"style":"Unix","test":"abc\ndef","filler":2} +,{"style":"Mac","test":"abc\rdef","filler":3} +,{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4} +] +-- column list with diverse data types +create temp table copyjsontest_types ( + id int, + js json, + jsb jsonb, + arr int[], + n numeric(10,2), + b boolean, + ts timestamp, + t text); +insert into copyjsontest_types values +(1, '{"a":1}', '{"b":2}', '{1,2,3}', 3.14, true, + '2024-01-15 10:30:00', 'hello'), +(2, '[1,null,"x"]', '{"nested":{"k":"v"}}', '{4,5}', -99.99, false, + '2024-06-30 23:59:59', 'world'), +(3, 'null', 'null', '{}', null, null, null, null); +-- full table +copy copyjsontest_types to stdout (format json); +{"id":1,"js":{"a":1},"jsb":{"b": 2},"arr":[1,2,3],"n":3.14,"b":true,"ts":"2024-01-15T10:30:00","t":"hello"} +{"id":2,"js":[1,null,"x"],"jsb":{"nested": {"k": "v"}},"arr":[4,5],"n":-99.99,"b":false,"ts":"2024-06-30T23:59:59","t":"world"} +{"id":3,"js":null,"jsb":null,"arr":[],"n":null,"b":null,"ts":null,"t":null} +-- column subsets exercising each type +copy copyjsontest_types (id, js, jsb) to stdout (format json); +{"id":1,"js":{"a":1},"jsb":{"b": 2}} +{"id":2,"js":[1,null,"x"],"jsb":{"nested": {"k": "v"}}} +{"id":3,"js":null,"jsb":null} +copy copyjsontest_types (id, arr, n, b) to stdout (format json); +{"id":1,"arr":[1,2,3],"n":3.14,"b":true} +{"id":2,"arr":[4,5],"n":-99.99,"b":false} +{"id":3,"arr":[],"n":null,"b":null} +copy copyjsontest_types (jsb, t) to stdout (format json); +{"jsb":{"b": 2},"t":"hello"} +{"jsb":{"nested": {"k": "v"}},"t":"world"} +{"jsb":null,"t":null} +copy copyjsontest_types (id, ts) to stdout (format json); +{"id":1,"ts":"2024-01-15T10:30:00"} +{"id":2,"ts":"2024-06-30T23:59:59"} +{"id":3,"ts":null} +-- single column: json and jsonb +copy copyjsontest_types (js) to stdout (format json); +{"js":{"a":1}} +{"js":[1,null,"x"]} +{"js":null} +copy copyjsontest_types (jsb) to stdout (format json); +{"jsb":{"b": 2}} +{"jsb":{"nested": {"k": "v"}}} +{"jsb":null} +drop table copyjsontest_types; -- embedded escaped characters create temp table copyjsontest ( id bigserial, diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql index 764d19f4947..e4e70a82ecc 100644 --- a/src/test/regress/sql/copy.sql +++ b/src/test/regress/sql/copy.sql @@ -84,6 +84,7 @@ copy copytest3 to stdout csv header; --- test copying in JSON mode with various styles copy (select 1 union all select 2) to stdout with (format json); +copy (select 1 as foo union all select 2) to stdout with (format json); copy (values (1), (2)) TO stdout with (format json); copy (select 1 union all select 2) to stdout with (format json, force_array true); copy (values (1), (2)) TO stdout with (format json, force_array true); @@ -105,7 +106,6 @@ copy copytest to stdout (format json, force_null *); copy copytest to stdout (format json, on_error ignore); copy copytest to stdout (format json, reject_limit 1); copy copytest from stdin(format json); -copy copytest (style) to stdout (format json); -- all of the above should yield error -- should fail: force_array requires json format @@ -116,6 +116,44 @@ copy copytest to stdout (format json, force_array); copy copytest to stdout (format json, force_array true); copy copytest to stdout (format json, force_array false); +-- column list with json format +copy copytest (style, filler) to stdout (format json); +copy copytest (style, filler) to stdout (format json, force_array true); +copy copytest (style, test, filler) to stdout (format json, force_array true); + +-- column list with diverse data types +create temp table copyjsontest_types ( + id int, + js json, + jsb jsonb, + arr int[], + n numeric(10,2), + b boolean, + ts timestamp, + t text); + +insert into copyjsontest_types values +(1, '{"a":1}', '{"b":2}', '{1,2,3}', 3.14, true, + '2024-01-15 10:30:00', 'hello'), +(2, '[1,null,"x"]', '{"nested":{"k":"v"}}', '{4,5}', -99.99, false, + '2024-06-30 23:59:59', 'world'), +(3, 'null', 'null', '{}', null, null, null, null); + +-- full table +copy copyjsontest_types to stdout (format json); + +-- column subsets exercising each type +copy copyjsontest_types (id, js, jsb) to stdout (format json); +copy copyjsontest_types (id, arr, n, b) to stdout (format json); +copy copyjsontest_types (jsb, t) to stdout (format json); +copy copyjsontest_types (id, ts) to stdout (format json); + +-- single column: json and jsonb +copy copyjsontest_types (js) to stdout (format json); +copy copyjsontest_types (jsb) to stdout (format json); + +drop table copyjsontest_types; + -- embedded escaped characters create temp table copyjsontest ( id bigserial, -- 2.34.1