Re: Make COPY format extendable: Extract COPY TO format implementations - Mailing list pgsql-hackers
From | Sutou Kouhei |
---|---|
Subject | Re: Make COPY format extendable: Extract COPY TO format implementations |
Date | |
Msg-id | 20240215.153421.96888103784986803.kou@clear-code.com Whole thread Raw |
In response to | Re: Make COPY format extendable: Extract COPY TO format implementations (Michael Paquier <michael@paquier.xyz>) |
Responses |
Re: Make COPY format extendable: Extract COPY TO format implementations
Re: Make COPY format extendable: Extract COPY TO format implementations |
List | pgsql-hackers |
Hi, In <ZcxjNDtqNLvdz0f5@paquier.xyz> "Re: Make COPY format extendable: Extract COPY TO format implementations" on Wed, 14 Feb 2024 15:52:36 +0900, Michael Paquier <michael@paquier.xyz> wrote: >> How about InputFunctionCallSafeWithInfo(), >> InputFunctionCallSafeInfo() or >> InputFunctionCallInfoCallSafe()? > > WithInfo() would not be a new thing. There are a couple of APIs named > like this when manipulating catalogs, so that sounds kind of a good > choice from here. Thanks for the info. Let's use InputFunctionCallSafeWithInfo(). See that attached patch: v2-0001-Reuse-fcinfo-used-in-COPY-FROM.patch I also attach a patch for COPY TO: v1-0001-Reuse-fcinfo-used-in-COPY-TO.patch I measured the COPY TO patch on my environment with: COPY (SELECT 1::int2,2::int2,3::int2,4::int2,5::int2,6::int2,7::int2,8::int2,9::int2,10::int2,11::int2,12::int2,13::int2,14::int2,15::int2,16::int2,17::int2,18::int2,19::int2,20::int2, generate_series(1,1000000::int4)) TO '/dev/null' \watch c=5 master: 740.066ms 734.884ms 738.579ms 734.170ms 727.953ms patched: 730.714ms 741.483ms 714.149ms 715.436ms 713.578ms It seems that it improves performance a bit but my environment isn't suitable for benchmark. So they may not be valid numbers. Thanks, -- kou From b677732f46f735a5601b8890000f79671e91be41 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei <kou@clear-code.com> Date: Thu, 15 Feb 2024 15:01:08 +0900 Subject: [PATCH v2] Reuse fcinfo used in COPY FROM Each NextCopyFrom() calls input functions or binary-input functions. We can reuse fcinfo for them instead of creating a local fcinfo for each call. This will improve performance. --- src/backend/commands/copyfrom.c | 4 + src/backend/commands/copyfromparse.c | 20 ++-- src/backend/utils/fmgr/fmgr.c | 126 +++++++++++++++++++++++ src/include/commands/copyfrom_internal.h | 1 + src/include/fmgr.h | 12 +++ 5 files changed, 154 insertions(+), 9 deletions(-) diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index 1fe70b9133..ed375c012e 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -1691,6 +1691,10 @@ BeginCopyFrom(ParseState *pstate, /* We keep those variables in cstate. */ cstate->in_functions = in_functions; cstate->typioparams = typioparams; + if (cstate->opts.binary) + cstate->fcinfo = PrepareReceiveFunctionCallInfo(); + else + cstate->fcinfo = PrepareInputFunctionCallInfo(); cstate->defmap = defmap; cstate->defexprs = defexprs; cstate->volatile_defexprs = volatile_defexprs; diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index 7cacd0b752..7907e16ea8 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -861,6 +861,7 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext, num_defaults = cstate->num_defaults; FmgrInfo *in_functions = cstate->in_functions; Oid *typioparams = cstate->typioparams; + FunctionCallInfoBaseData *fcinfo = cstate->fcinfo; int i; int *defmap = cstate->defmap; ExprState **defexprs = cstate->defexprs; @@ -961,12 +962,13 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext, * If ON_ERROR is specified with IGNORE, skip rows with soft * errors */ - else if (!InputFunctionCallSafe(&in_functions[m], - string, - typioparams[m], - att->atttypmod, - (Node *) cstate->escontext, - &values[m])) + else if (!InputFunctionCallSafeWithInfo(fcinfo, + &in_functions[m], + string, + typioparams[m], + att->atttypmod, + (Node *) cstate->escontext, + &values[m])) { cstate->num_errors++; return true; @@ -1966,7 +1968,7 @@ CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo, if (fld_size == -1) { *isnull = true; - return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod); + return ReceiveFunctionCallWithInfo(cstate->fcinfo, flinfo, NULL, typioparam, typmod); } if (fld_size < 0) ereport(ERROR, @@ -1987,8 +1989,8 @@ CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo, cstate->attribute_buf.data[fld_size] = '\0'; /* Call the column type's binary input converter */ - result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf, - typioparam, typmod); + result = ReceiveFunctionCallWithInfo(cstate->fcinfo, flinfo, &cstate->attribute_buf, + typioparam, typmod); /* Trouble if it didn't eat the whole buffer */ if (cstate->attribute_buf.cursor != cstate->attribute_buf.len) diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c index e48a86be54..14c3ed2bdb 100644 --- a/src/backend/utils/fmgr/fmgr.c +++ b/src/backend/utils/fmgr/fmgr.c @@ -1672,6 +1672,73 @@ DirectInputFunctionCallSafe(PGFunction func, char *str, return true; } +/* + * Prepare callinfo for InputFunctionCallSafeWithInfo to reuse one callinfo + * instead of initializing it for each call. This is for performance. + */ +FunctionCallInfoBaseData * +PrepareInputFunctionCallInfo(void) +{ + FunctionCallInfoBaseData *fcinfo; + + fcinfo = (FunctionCallInfoBaseData *) palloc(SizeForFunctionCallInfo(3)); + InitFunctionCallInfoData(*fcinfo, NULL, 3, InvalidOid, NULL, NULL); + fcinfo->args[0].isnull = false; + fcinfo->args[1].isnull = false; + fcinfo->args[2].isnull = false; + return fcinfo; +} + +/* + * Call a previously-looked-up datatype input function, with prepared callinfo + * and non-exception handling of "soft" errors. + * + * This is basically like InputFunctionCallSafe, but it reuses prepared + * callinfo. + */ +bool +InputFunctionCallSafeWithInfo(FunctionCallInfoBaseData *fcinfo, + FmgrInfo *flinfo, char *str, + Oid typioparam, int32 typmod, + fmNodePtr escontext, + Datum *result) +{ + if (str == NULL && flinfo->fn_strict) + { + *result = (Datum) 0; /* just return null result */ + return true; + } + + fcinfo->flinfo = flinfo; + fcinfo->context = escontext; + fcinfo->isnull = false; + fcinfo->args[0].value = CStringGetDatum(str); + fcinfo->args[1].value = ObjectIdGetDatum(typioparam); + fcinfo->args[2].value = Int32GetDatum(typmod); + + *result = FunctionCallInvoke(fcinfo); + + /* Result value is garbage, and could be null, if an error was reported */ + if (SOFT_ERROR_OCCURRED(escontext)) + return false; + + /* Otherwise, should get null result if and only if str is NULL */ + if (str == NULL) + { + if (!fcinfo->isnull) + elog(ERROR, "input function %u returned non-NULL", + flinfo->fn_oid); + } + else + { + if (fcinfo->isnull) + elog(ERROR, "input function %u returned NULL", + flinfo->fn_oid); + } + + return true; +} + /* * Call a previously-looked-up datatype output function. * @@ -1731,6 +1798,65 @@ ReceiveFunctionCall(FmgrInfo *flinfo, StringInfo buf, return result; } +/* + * Prepare callinfo for ReceiveFunctionCallWithInfo to reuse one callinfo + * instead of initializing it for each call. This is for performance. + */ +FunctionCallInfoBaseData * +PrepareReceiveFunctionCallInfo(void) +{ + FunctionCallInfoBaseData *fcinfo; + + fcinfo = (FunctionCallInfoBaseData *) palloc(SizeForFunctionCallInfo(3)); + InitFunctionCallInfoData(*fcinfo, NULL, 3, InvalidOid, NULL, NULL); + fcinfo->args[0].isnull = false; + fcinfo->args[1].isnull = false; + fcinfo->args[2].isnull = false; + return fcinfo; +} + +/* + * Call a previously-looked-up datatype binary-input function, with prepared + * callinfo. + * + * This is basically like ReceiveFunctionCall, but it reuses prepared + * callinfo. + */ +Datum +ReceiveFunctionCallWithInfo(FunctionCallInfoBaseData *fcinfo, + FmgrInfo *flinfo, StringInfo buf, + Oid typioparam, int32 typmod) +{ + Datum result; + + if (buf == NULL && flinfo->fn_strict) + return (Datum) 0; /* just return null result */ + + fcinfo->flinfo = flinfo; + fcinfo->isnull = false; + fcinfo->args[0].value = PointerGetDatum(buf); + fcinfo->args[1].value = ObjectIdGetDatum(typioparam); + fcinfo->args[2].value = Int32GetDatum(typmod); + + result = FunctionCallInvoke(fcinfo); + + /* Should get null result if and only if buf is NULL */ + if (buf == NULL) + { + if (!fcinfo->isnull) + elog(ERROR, "receive function %u returned non-NULL", + flinfo->fn_oid); + } + else + { + if (fcinfo->isnull) + elog(ERROR, "receive function %u returned NULL", + flinfo->fn_oid); + } + + return result; +} + /* * Call a previously-looked-up datatype binary-output function. * diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h index cad52fcc78..8c1a227c02 100644 --- a/src/include/commands/copyfrom_internal.h +++ b/src/include/commands/copyfrom_internal.h @@ -97,6 +97,7 @@ typedef struct CopyFromStateData Oid *typioparams; /* array of element types for in_functions */ ErrorSaveContext *escontext; /* soft error trapper during in_functions * execution */ + FunctionCallInfoBaseData *fcinfo; /* reusable callinfo for in_functions */ uint64 num_errors; /* total number of rows which contained soft * errors */ int *defmap; /* array of default att numbers related to diff --git a/src/include/fmgr.h b/src/include/fmgr.h index ccb4070a25..3d3a12205b 100644 --- a/src/include/fmgr.h +++ b/src/include/fmgr.h @@ -708,12 +708,24 @@ extern bool DirectInputFunctionCallSafe(PGFunction func, char *str, Oid typioparam, int32 typmod, fmNodePtr escontext, Datum *result); +extern FunctionCallInfoBaseData *PrepareInputFunctionCallInfo(void); +extern bool + InputFunctionCallSafeWithInfo(FunctionCallInfoBaseData *fcinfo, + FmgrInfo *flinfo, char *str, + Oid typioparam, int32 typmod, + fmNodePtr escontext, + Datum *result); extern Datum OidInputFunctionCall(Oid functionId, char *str, Oid typioparam, int32 typmod); extern char *OutputFunctionCall(FmgrInfo *flinfo, Datum val); extern char *OidOutputFunctionCall(Oid functionId, Datum val); extern Datum ReceiveFunctionCall(FmgrInfo *flinfo, fmStringInfo buf, Oid typioparam, int32 typmod); +extern FunctionCallInfoBaseData *PrepareReceiveFunctionCallInfo(void); +extern Datum + ReceiveFunctionCallWithInfo(FunctionCallInfoBaseData *fcinfo, + FmgrInfo *flinfo, fmStringInfo buf, + Oid typioparam, int32 typmod); extern Datum OidReceiveFunctionCall(Oid functionId, fmStringInfo buf, Oid typioparam, int32 typmod); extern bytea *SendFunctionCall(FmgrInfo *flinfo, Datum val); -- 2.43.0 From dbf04dec457ad2c61d00538514cc5356e94074e1 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei <kou@clear-code.com> Date: Thu, 15 Feb 2024 15:26:31 +0900 Subject: [PATCH v1] Reuse fcinfo used in COPY TO Each CopyOneRowTo() calls output functions or binary-output functions. We can reuse fcinfo for them instead of creating a local fcinfo for each call. This will improve performance. --- src/backend/commands/copyto.c | 14 +++++-- src/backend/utils/fmgr/fmgr.c | 79 +++++++++++++++++++++++++++++++++++ src/include/fmgr.h | 6 +++ 3 files changed, 95 insertions(+), 4 deletions(-) diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index 20ffc90363..21442861f3 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -97,6 +97,7 @@ typedef struct CopyToStateData MemoryContext copycontext; /* per-copy execution context */ FmgrInfo *out_functions; /* lookup info for output functions */ + FunctionCallInfoBaseData *fcinfo; /* reusable callinfo for out_functions */ MemoryContext rowcontext; /* per-row evaluation context */ uint64 bytes_processed; /* number of bytes processed so far */ } CopyToStateData; @@ -786,6 +787,10 @@ DoCopyTo(CopyToState cstate) &isvarlena); fmgr_info(out_func_oid, &cstate->out_functions[attnum - 1]); } + if (cstate->opts.binary) + cstate->fcinfo = PrepareSendFunctionCallInfo(); + else + cstate->fcinfo = PrepareOutputFunctionCallInfo(); /* * Create a temporary memory context that we can reset once per row to @@ -909,6 +914,7 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot) { bool need_delim = false; FmgrInfo *out_functions = cstate->out_functions; + FunctionCallInfoBaseData *fcinfo = cstate->fcinfo; MemoryContext oldcontext; ListCell *cur; char *string; @@ -949,8 +955,8 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot) { if (!cstate->opts.binary) { - string = OutputFunctionCall(&out_functions[attnum - 1], - value); + string = OutputFunctionCallWithInfo(fcinfo, &out_functions[attnum - 1], + value); if (cstate->opts.csv_mode) CopyAttributeOutCSV(cstate, string, cstate->opts.force_quote_flags[attnum - 1]); @@ -961,8 +967,8 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot) { bytea *outputbytes; - outputbytes = SendFunctionCall(&out_functions[attnum - 1], - value); + outputbytes = SendFunctionCallWithInfo(fcinfo, &out_functions[attnum - 1], + value); CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ); CopySendData(cstate, VARDATA(outputbytes), VARSIZE(outputbytes) - VARHDRSZ); diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c index e48a86be54..ab74a643f2 100644 --- a/src/backend/utils/fmgr/fmgr.c +++ b/src/backend/utils/fmgr/fmgr.c @@ -1685,6 +1685,45 @@ OutputFunctionCall(FmgrInfo *flinfo, Datum val) return DatumGetCString(FunctionCall1(flinfo, val)); } +/* + * Prepare callinfo for OutputFunctionCallWithInfo to reuse one callinfo + * instead of initializing it for each call. This is for performance. + */ +FunctionCallInfoBaseData * +PrepareOutputFunctionCallInfo(void) +{ + FunctionCallInfoBaseData *fcinfo; + + fcinfo = (FunctionCallInfoBaseData *) palloc(SizeForFunctionCallInfo(1)); + InitFunctionCallInfoData(*fcinfo, NULL, 1, InvalidOid, NULL, NULL); + fcinfo->args[0].isnull = false; + return fcinfo; +} + +/* + * Call a previously-looked-up datatype output function, with prepared callinfo. + * + * This is basically like OutputFunctionCall, but it reuses prepared callinfo. + */ +char * +OutputFunctionCallWithInfo(FunctionCallInfoBaseData *fcinfo, + FmgrInfo *flinfo, Datum val) +{ + Datum result; + + fcinfo->flinfo = flinfo; + fcinfo->isnull = false; + fcinfo->args[0].value = val; + + result = FunctionCallInvoke(fcinfo); + + /* Check for null result, since caller is clearly not expecting one */ + if (fcinfo->isnull) + elog(ERROR, "function %u returned NULL", flinfo->fn_oid); + + return DatumGetCString(result); +} + /* * Call a previously-looked-up datatype binary-input function. * @@ -1746,6 +1785,46 @@ SendFunctionCall(FmgrInfo *flinfo, Datum val) return DatumGetByteaP(FunctionCall1(flinfo, val)); } +/* + * Prepare callinfo for SendFunctionCallWithInfo to reuse one callinfo + * instead of initializing it for each call. This is for performance. + */ +FunctionCallInfoBaseData * +PrepareSendFunctionCallInfo(void) +{ + FunctionCallInfoBaseData *fcinfo; + + fcinfo = (FunctionCallInfoBaseData *) palloc(SizeForFunctionCallInfo(1)); + InitFunctionCallInfoData(*fcinfo, NULL, 1, InvalidOid, NULL, NULL); + fcinfo->args[0].isnull = false; + return fcinfo; +} + +/* + * Call a previously-looked-up datatype binary-output function, with prepared + * callinfo. + * + * This is basically like SendFunctionCall, but it reuses prepared callinfo. + */ +bytea * +SendFunctionCallWithInfo(FunctionCallInfoBaseData *fcinfo, + FmgrInfo *flinfo, Datum val) +{ + Datum result; + + fcinfo->flinfo = flinfo; + fcinfo->isnull = false; + fcinfo->args[0].value = val; + + result = FunctionCallInvoke(fcinfo); + + /* Check for null result, since caller is clearly not expecting one */ + if (fcinfo->isnull) + elog(ERROR, "function %u returned NULL", flinfo->fn_oid); + + return DatumGetByteaP(result); +} + /* * As above, for I/O functions identified by OID. These are only to be used * in seldom-executed code paths. They are not only slow but leak memory. diff --git a/src/include/fmgr.h b/src/include/fmgr.h index ccb4070a25..816ed31b05 100644 --- a/src/include/fmgr.h +++ b/src/include/fmgr.h @@ -711,12 +711,18 @@ extern bool DirectInputFunctionCallSafe(PGFunction func, char *str, extern Datum OidInputFunctionCall(Oid functionId, char *str, Oid typioparam, int32 typmod); extern char *OutputFunctionCall(FmgrInfo *flinfo, Datum val); +extern FunctionCallInfoBaseData *PrepareOutputFunctionCallInfo(void); +extern char *OutputFunctionCallWithInfo(FunctionCallInfoBaseData *fcinfo, + FmgrInfo *flinfo, Datum val); extern char *OidOutputFunctionCall(Oid functionId, Datum val); extern Datum ReceiveFunctionCall(FmgrInfo *flinfo, fmStringInfo buf, Oid typioparam, int32 typmod); extern Datum OidReceiveFunctionCall(Oid functionId, fmStringInfo buf, Oid typioparam, int32 typmod); extern bytea *SendFunctionCall(FmgrInfo *flinfo, Datum val); +extern FunctionCallInfoBaseData *PrepareSendFunctionCallInfo(void); +extern bytea *SendFunctionCallWithInfo(FunctionCallInfoBaseData *fcinfo, + FmgrInfo *flinfo, Datum val); extern bytea *OidSendFunctionCall(Oid functionId, Datum val); -- 2.43.0
pgsql-hackers by date: