From c447629f2a3290e74c5236dc1145b8f2afb1772a Mon Sep 17 00:00:00 2001 From: jian he Date: Sun, 7 Jan 2024 13:38:48 +0800 Subject: [PATCH v16 1/1] Improve performance of Jsonb extract (via key) and cast to numerical/bool data type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit optimize for the followings cases: (jsonb(object) -> 'key')::numeric (jsonb(object) -> 'key')::int2 (jsonb(object) -> 'key')::int4 (jsonb(object) -> 'key')::int8 (jsonb(object) -> 'key')::float4 (jsonb(object) -> 'key')::float8 (jsonb(object) -> 'key')::bool for these scarenio, in the master we need extract the jsonbvalue, covert to jsonb, then convert back to jsonbvalue then cast to numerical or bool type with the pacth. extract the jsonbvalue, doing logic on top of the extracted jsonbvalue. I think this optimization will work. because: 1. validate `jsonb(object)`  is really object type work as before. 2. test if the key is in the jsonb work as before. 3. assume the key exists. In the master, we first cast jsonb to jsonbvalue then validate the jsonbvalue type is jbvNumeric with the patch, we validate  jsonbvalue type is jbvNumeric in jsonb_finish_numeric. 4. from numeric data type to other numerical data type work as before. --- src/backend/utils/adt/jsonb.c | 152 ++++++++++++++++++++++++++++ src/backend/utils/adt/jsonfuncs.c | 59 ++++++++--- src/include/catalog/pg_proc.dat | 30 ++++-- src/include/utils/jsonb.h | 8 ++ src/test/regress/expected/jsonb.out | 108 +++++++++++++++++++- src/test/regress/sql/jsonb.sql | 57 ++++++++++- src/tools/pgindent/typedefs.list | 1 + 7 files changed, 389 insertions(+), 26 deletions(-) diff --git a/src/backend/utils/adt/jsonb.c b/src/backend/utils/adt/jsonb.c index c10b3fbe..018717ab 100644 --- a/src/backend/utils/adt/jsonb.c +++ b/src/backend/utils/adt/jsonb.c @@ -17,11 +17,15 @@ #include "catalog/pg_proc.h" #include "catalog/pg_type.h" #include "funcapi.h" +#include "nodes/makefuncs.h" +#include "nodes/supportnodes.h" +#include "parser/parse_coerce.h" #include "libpq/pqformat.h" #include "miscadmin.h" #include "utils/builtins.h" #include "utils/date.h" #include "utils/datetime.h" +#include "utils/fmgroids.h" #include "utils/json.h" #include "utils/jsonb.h" #include "utils/jsonfuncs.h" @@ -2039,6 +2043,154 @@ cannotCastJsonbValue(enum jbvType type, const char *sqltype) elog(ERROR, "unknown jsonb type: %d", (int) type); } + +/* + * jsonb_cast_support() + * + * Planner support function for casting a jsonb extraction to a numeric + * or bool data type. Instead of converting a jsonbvalue to jsonb, the new + * method will cast the jsonbvalue to the desired data type directly. + */ +Datum +jsonb_cast_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + if (IsA(rawreq, SupportRequestSimplify)) + { + SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq; + FuncExpr *fexpr = req->fcall; + FuncExpr *jsonb_start_func = NULL, + *jsonb_finish_func = NULL, + *final_func = NULL; + Node *input; + Oid new_func_id = InvalidOid; + List *args; + Oid input_func_id, + collid, + inputcollid; + bool retset = false, + variadic = false; + + Assert(list_length(fexpr->args) == 1); + input = (Node *) linitial(fexpr->args); + + if (IsA(input, OpExpr)) + { + OpExpr *opExpr = castNode(OpExpr, input); + + input_func_id = opExpr->opfuncid; + collid = opExpr->opcollid; + inputcollid = opExpr->inputcollid; + args = opExpr->args; + } + else if (IsA(input, FuncExpr)) + { + FuncExpr *funcExpr = castNode(FuncExpr, input); + + input_func_id = funcExpr->funcid; + collid = funcExpr->funccollid; + inputcollid = funcExpr->inputcollid; + args = funcExpr->args; + } + else + /* not the desired pattern. */ + PG_RETURN_POINTER(NULL); + + /* build a function to return the JsonbValue directly. */ + switch (input_func_id) + { + case F_JSONB_OBJECT_FIELD: + new_func_id = F_JSONB_OBJECT_FIELD_START; + break; + + default: + new_func_id = InvalidOid; + break; + } + + if (!OidIsValid(new_func_id)) + PG_RETURN_POINTER(NULL); + + jsonb_start_func = makeFuncExpr(new_func_id, INTERNALOID, args, + collid, inputcollid, + COERCE_EXPLICIT_CALL); + jsonb_start_func->funcretset = retset; + jsonb_start_func->funcvariadic = variadic; + + /* relabel the first argument as 'internal'. */ + linitial(jsonb_start_func->args) = makeRelabelType(linitial(jsonb_start_func->args), + INTERNALOID, -1, + InvalidOid, + COERCE_IMPLICIT_CAST); + switch (fexpr->funcresulttype) + { + case INT2OID: + case INT4OID: + case INT8OID: + case FLOAT4OID: + case FLOAT8OID: + case NUMERICOID: + /* build the function to turn the JsonbValue into numeric */ + jsonb_finish_func = makeFuncExpr(F_JSONB_FINISH_NUMERIC, NUMERICOID, + list_make2(jsonb_start_func, + makeConst(OIDOID, + -1, + InvalidOid, + sizeof(Oid), + ObjectIdGetDatum(fexpr->funcresulttype), + false, + true)), + collid, inputcollid, COERCE_EXPLICIT_CALL); + + if (fexpr->funcresulttype != NUMERICOID) + { + /* + * leverage the casting system to turn the numeric to + * desired type. + */ + final_func = (FuncExpr *) coerce_type(NULL, (Node *) jsonb_finish_func, NUMERICOID, + fexpr->funcresulttype, 0, COERCION_EXPLICIT, + COERCE_EXPLICIT_CAST, fexpr->location); + } + else + final_func = jsonb_finish_func; + + PG_RETURN_POINTER(final_func); + case BOOLOID: + final_func = makeFuncExpr(F_JSONB_FINISH_BOOL, BOOLOID, + list_make1(jsonb_start_func), collid, + inputcollid, COERCE_EXPLICIT_CALL); + PG_RETURN_POINTER(final_func); + default: + PG_RETURN_POINTER(NULL); + } + } + PG_RETURN_POINTER(NULL); +} + + +Datum +jsonb_finish_numeric(PG_FUNCTION_ARGS) +{ + JsonbValue *v = (JsonbValue *) PG_GETARG_POINTER(0); + Oid final_oid = PG_GETARG_OID(1); + + if (v->type != jbvNumeric) + cannotCastJsonbValue(v->type, format_type_be(final_oid)); + PG_RETURN_NUMERIC(v->val.numeric); +} + +Datum +jsonb_finish_bool(PG_FUNCTION_ARGS) +{ + JsonbValue *v = (JsonbValue *) PG_GETARG_POINTER(0); + + if (v->type != jbvBool) + cannotCastJsonbValue(v->type, "boolean"); + PG_RETURN_BOOL(v->val.boolean); +} + Datum jsonb_bool(PG_FUNCTION_ARGS) { diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c index caaafb72..12a6f07c 100644 --- a/src/backend/utils/adt/jsonfuncs.c +++ b/src/backend/utils/adt/jsonfuncs.c @@ -492,6 +492,22 @@ static JsonParseErrorType transform_string_values_object_field_start(void *state static JsonParseErrorType transform_string_values_array_element_start(void *state, bool isnull); static JsonParseErrorType transform_string_values_scalar(void *state, char *token, JsonTokenType tokentype); +Datum +jsonbvalue_covert(JsonbValue *jbv, JsonbValueTarget target) +{ + switch (target) + { + case JsonbValue_AsJsonbValue: + PG_RETURN_POINTER(jbv); + case JsonbValue_AsJsonb: + PG_RETURN_JSONB_P(JsonbValueToJsonb(jbv)); + case JsonbValue_AsText: + PG_RETURN_TEXT_P(JsonbValueAsText(jbv)); + default: + elog(ERROR, "invalid jsonbvalue cast target type"); + } + pg_unreachable(); +} /* * pg_parse_json_or_errsave @@ -847,26 +863,37 @@ json_object_field(PG_FUNCTION_ARGS) PG_RETURN_NULL(); } +static Datum +jsonb_object_field_internal(FunctionCallInfo fcinfo, JsonbValueTarget target) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + text *key = PG_GETARG_TEXT_PP(1); + JsonbValue *v; + + if (!JB_ROOT_IS_OBJECT(jb)) + PG_RETURN_NULL(); + + v = getKeyJsonValueFromContainer(&jb->root, + VARDATA_ANY(key), + VARSIZE_ANY_EXHDR(key), + NULL); + + if (v != NULL) + return jsonbvalue_covert(v, target); + + PG_RETURN_NULL(); +} + Datum jsonb_object_field(PG_FUNCTION_ARGS) { - Jsonb *jb = PG_GETARG_JSONB_P(0); - text *key = PG_GETARG_TEXT_PP(1); - JsonbValue *v; - JsonbValue vbuf; + return jsonb_object_field_internal(fcinfo, JsonbValue_AsJsonb); +} - if (!JB_ROOT_IS_OBJECT(jb)) - PG_RETURN_NULL(); - - v = getKeyJsonValueFromContainer(&jb->root, - VARDATA_ANY(key), - VARSIZE_ANY_EXHDR(key), - &vbuf); - - if (v != NULL) - PG_RETURN_JSONB_P(JsonbValueToJsonb(v)); - - PG_RETURN_NULL(); +Datum +jsonb_object_field_start(PG_FUNCTION_ARGS) +{ + return jsonb_object_field_internal(fcinfo, JsonbValue_AsJsonbValue); } Datum diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 79793927..4c62c92d 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -4590,25 +4590,25 @@ proname => 'pg_lsn', prorettype => 'pg_lsn', proargtypes => 'numeric', prosrc => 'numeric_pg_lsn' }, -{ oid => '3556', descr => 'convert jsonb to boolean', +{ oid => '3556', descr => 'convert jsonb to boolean', prosupport => 'jsonb_cast_support', proname => 'bool', prorettype => 'bool', proargtypes => 'jsonb', prosrc => 'jsonb_bool' }, { oid => '3449', descr => 'convert jsonb to numeric', - proname => 'numeric', prorettype => 'numeric', proargtypes => 'jsonb', + proname => 'numeric', prorettype => 'numeric', proargtypes => 'jsonb', prosupport => 'jsonb_cast_support', prosrc => 'jsonb_numeric' }, -{ oid => '3450', descr => 'convert jsonb to int2', +{ oid => '3450', descr => 'convert jsonb to int2', prosupport => 'jsonb_cast_support', proname => 'int2', prorettype => 'int2', proargtypes => 'jsonb', prosrc => 'jsonb_int2' }, -{ oid => '3451', descr => 'convert jsonb to int4', +{ oid => '3451', descr => 'convert jsonb to int4', prosupport => 'jsonb_cast_support', proname => 'int4', prorettype => 'int4', proargtypes => 'jsonb', prosrc => 'jsonb_int4' }, -{ oid => '3452', descr => 'convert jsonb to int8', +{ oid => '3452', descr => 'convert jsonb to int8', prosupport => 'jsonb_cast_support', proname => 'int8', prorettype => 'int8', proargtypes => 'jsonb', prosrc => 'jsonb_int8' }, -{ oid => '3453', descr => 'convert jsonb to float4', +{ oid => '3453', descr => 'convert jsonb to float4', prosupport => 'jsonb_cast_support', proname => 'float4', prorettype => 'float4', proargtypes => 'jsonb', prosrc => 'jsonb_float4' }, -{ oid => '2580', descr => 'convert jsonb to float8', +{ oid => '2580', descr => 'convert jsonb to float8', prosupport => 'jsonb_cast_support', proname => 'float8', prorettype => 'float8', proargtypes => 'jsonb', prosrc => 'jsonb_float8' }, @@ -9983,6 +9983,22 @@ proname => 'jsonb_object_field_text', prorettype => 'text', proargtypes => 'jsonb text', proargnames => '{from_json, field_name}', prosrc => 'jsonb_object_field_text' }, +{ oid => '4552', descr => 'extract jsonbvalue from jsonb for the given field', + proname => 'jsonb_object_field_start', prorettype => 'internal', + proargtypes => 'internal text', proargnames => '{from_json, field_name}', + prosrc => 'jsonb_object_field_start' }, + +{ oid => '4553', descr => 'convert a jsonbvalue to numeric', + proname => 'jsonb_finish_numeric', prorettype => 'numeric', + proargtypes => 'internal oid', proargnames => '{from_jsonvalue,target_oid}', + prosrc => 'jsonb_finish_numeric' }, +{ oid => '4554', descr => 'convert a jsonbvalue to boolean', + proname => 'jsonb_finish_bool', prorettype => 'bool', + proargtypes => 'internal', proargnames => '{jsonvalue}', + prosrc => 'jsonb_finish_bool' }, +{ oid => '3814', descr => 'planner support for numeric(jsonb)', + proname => 'jsonb_cast_support', prorettype => 'internal', + proargtypes => 'internal', prosrc => 'jsonb_cast_support' }, { oid => '3215', proname => 'jsonb_array_element', prorettype => 'jsonb', proargtypes => 'jsonb int4', proargnames => '{from_json, element_index}', diff --git a/src/include/utils/jsonb.h b/src/include/utils/jsonb.h index e38dfd49..03e84500 100644 --- a/src/include/utils/jsonb.h +++ b/src/include/utils/jsonb.h @@ -294,6 +294,13 @@ struct JsonbValue } val; }; +typedef enum JsonbValueTarget +{ + JsonbValue_AsJsonbValue, + JsonbValue_AsJsonb, + JsonbValue_AsText, +} JsonbValueTarget; + #define IsAJsonbScalar(jsonbval) (((jsonbval)->type >= jbvNull && \ (jsonbval)->type <= jbvBool) || \ (jsonbval)->type == jbvDatetime) @@ -436,4 +443,5 @@ extern Datum jsonb_build_object_worker(int nargs, const Datum *args, const bool extern Datum jsonb_build_array_worker(int nargs, const Datum *args, const bool *nulls, const Oid *types, bool absent_on_null); +extern Datum jsonbvalue_covert(JsonbValue *jbv, JsonbValueTarget target); #endif /* __JSONB_H__ */ diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out index b597d01a..96ac3399 100644 --- a/src/test/regress/expected/jsonb.out +++ b/src/test/regress/expected/jsonb.out @@ -457,8 +457,110 @@ CREATE TEMP TABLE test_jsonb ( ); INSERT INTO test_jsonb VALUES ('scalar','"a scalar"'), +('scalarint','2'), ('array','["zero", "one","two",null,"four","five", [1,2,3],{"f1":9}]'), -('object','{"field1":"val1","field2":"val2","field3":null, "field4": 4, "field5": [1,2,3], "field6": {"f1":9}}'); +('object','{"field1":"val1","field2":"val2","field3":null, "field4": 4, + "field5": [1,2,3], "field6": {"f1":9}, + "field7": true, "field8": [1,2,3,4,5]}'); +\pset null NULL +EXPLAIN (COSTS OFF, VERBOSE) +SELECT (test_json -> 'field4')::numeric +FROM test_jsonb +WHERE json_type = 'object'; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------- + Seq Scan on pg_temp.test_jsonb + Output: jsonb_finish_numeric(jsonb_object_field_start((test_json)::internal, 'field4'::text), '1700'::oid) + Filter: (test_jsonb.json_type = 'object'::text) +(3 rows) + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT (test_json -> 'field4')::int2 +FROM test_jsonb +WHERE json_type = 'object'; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------ + Seq Scan on pg_temp.test_jsonb + Output: (jsonb_finish_numeric(jsonb_object_field_start((test_json)::internal, 'field4'::text), '21'::oid))::smallint + Filter: (test_jsonb.json_type = 'object'::text) +(3 rows) + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT (test_json -> 'field4')::int4 +FROM test_jsonb +WHERE json_type = 'object'; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------- + Seq Scan on pg_temp.test_jsonb + Output: (jsonb_finish_numeric(jsonb_object_field_start((test_json)::internal, 'field4'::text), '23'::oid))::integer + Filter: (test_jsonb.json_type = 'object'::text) +(3 rows) + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT (test_json -> 'field4')::int8 +FROM test_jsonb +WHERE json_type = 'object'; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------- + Seq Scan on pg_temp.test_jsonb + Output: (jsonb_finish_numeric(jsonb_object_field_start((test_json)::internal, 'field4'::text), '20'::oid))::bigint + Filter: (test_jsonb.json_type = 'object'::text) +(3 rows) + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT (test_json -> 'field4')::float4 +FROM test_jsonb +WHERE json_type = 'object'; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------- + Seq Scan on pg_temp.test_jsonb + Output: (jsonb_finish_numeric(jsonb_object_field_start((test_json)::internal, 'field4'::text), '700'::oid))::real + Filter: (test_jsonb.json_type = 'object'::text) +(3 rows) + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT (test_json -> 'field4')::float8 +FROM test_jsonb +WHERE json_type = 'object'; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------- + Seq Scan on pg_temp.test_jsonb + Output: (jsonb_finish_numeric(jsonb_object_field_start((test_json)::internal, 'field4'::text), '701'::oid))::double precision + Filter: (test_jsonb.json_type = 'object'::text) +(3 rows) + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT (test_json -> 'field7')::bool +FROM test_jsonb +WHERE json_type = 'object'; + QUERY PLAN +---------------------------------------------------------------------------------------------- + Seq Scan on pg_temp.test_jsonb + Output: jsonb_finish_bool(jsonb_object_field_start((test_json)::internal, 'field7'::text)) + Filter: (test_jsonb.json_type = 'object'::text) +(3 rows) + +SELECT +(test_json -> 'field4')::numeric, +(test_json -> 'field4')::int2, +(test_json -> 'field4')::int4, +(test_json -> 'field4')::int8, +(test_json -> 'field4')::float4, +(test_json -> 'field4')::float8, +(test_json -> 'field7')::bool +FROM test_jsonb +WHERE json_type = 'object'; + numeric | int2 | int4 | int8 | float4 | float8 | bool +---------+------+------+------+--------+--------+------ + 4 | 4 | 4 | 4 | 4 | 4 | t +(1 row) + +-- lets raise some errors. +SELECT (test_json -> 'field1')::int4 FROM test_jsonb WHERE json_type = 'object'; +ERROR: cannot cast jsonb string to type integer +SELECT (test_json -> 'field1')::bool FROM test_jsonb WHERE json_type = 'object'; +ERROR: cannot cast jsonb string to type boolean +\pset null '' SELECT test_json -> 'x' FROM test_jsonb WHERE json_type = 'scalar'; ?column? ---------- @@ -586,7 +688,9 @@ SELECT jsonb_object_keys(test_json) FROM test_jsonb WHERE json_type = 'object'; field4 field5 field6 -(6 rows) + field7 + field8 +(8 rows) -- nulls SELECT (test_json->'field3') IS NULL AS expect_false FROM test_jsonb WHERE json_type = 'object'; diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql index 6dae715a..be29b2e7 100644 --- a/src/test/regress/sql/jsonb.sql +++ b/src/test/regress/sql/jsonb.sql @@ -154,8 +154,63 @@ CREATE TEMP TABLE test_jsonb ( INSERT INTO test_jsonb VALUES ('scalar','"a scalar"'), +('scalarint','2'), ('array','["zero", "one","two",null,"four","five", [1,2,3],{"f1":9}]'), -('object','{"field1":"val1","field2":"val2","field3":null, "field4": 4, "field5": [1,2,3], "field6": {"f1":9}}'); +('object','{"field1":"val1","field2":"val2","field3":null, "field4": 4, + "field5": [1,2,3], "field6": {"f1":9}, + "field7": true, "field8": [1,2,3,4,5]}'); + +\pset null NULL +EXPLAIN (COSTS OFF, VERBOSE) +SELECT (test_json -> 'field4')::numeric +FROM test_jsonb +WHERE json_type = 'object'; + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT (test_json -> 'field4')::int2 +FROM test_jsonb +WHERE json_type = 'object'; + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT (test_json -> 'field4')::int4 +FROM test_jsonb +WHERE json_type = 'object'; + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT (test_json -> 'field4')::int8 +FROM test_jsonb +WHERE json_type = 'object'; + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT (test_json -> 'field4')::float4 +FROM test_jsonb +WHERE json_type = 'object'; + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT (test_json -> 'field4')::float8 +FROM test_jsonb +WHERE json_type = 'object'; + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT (test_json -> 'field7')::bool +FROM test_jsonb +WHERE json_type = 'object'; + +SELECT +(test_json -> 'field4')::numeric, +(test_json -> 'field4')::int2, +(test_json -> 'field4')::int4, +(test_json -> 'field4')::int8, +(test_json -> 'field4')::float4, +(test_json -> 'field4')::float8, +(test_json -> 'field7')::bool +FROM test_jsonb +WHERE json_type = 'object'; + +-- lets raise some errors. +SELECT (test_json -> 'field1')::int4 FROM test_jsonb WHERE json_type = 'object'; +SELECT (test_json -> 'field1')::bool FROM test_jsonb WHERE json_type = 'object'; +\pset null '' SELECT test_json -> 'x' FROM test_jsonb WHERE json_type = 'scalar'; SELECT test_json -> 'x' FROM test_jsonb WHERE json_type = 'array'; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 5fd46b7b..4ca6679e 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1265,6 +1265,7 @@ JsonArrayAgg JsonArrayConstructor JsonArrayQueryConstructor JsonBaseObjectInfo +JsonbValueTarget JsonConstructorExpr JsonConstructorExprState JsonConstructorType -- 2.34.1