Thread: jsonb_set performance degradation / multiple jsonb_set on multiple documents
jsonb_set performance degradation / multiple jsonb_set on multiple documents
From
Alexandru Lazarev
Date:
Hi PostgreSQL Community.
I tried to rewrite some plv8 stored procedures, which process in bulk JSONB documents, to PL/pgSQL.
A SP usually has to delete/update/add multiple key with the same document and do it for multiple documents (~40K) in loop.
When updating a single key PL/pgSQL wins against plv8, but when I need to update multiple keys with jsonb_set, timing increase linearly with number of jsonb_sets and takes longer than similar SP in PLV8.
Below are test-cases I've used.
QUESTION: Is it expected behavior or I do something wrong or there are some better approaches or we can treat datum as object?
test case:
PG 9.6, CentOS 7
CREATE TABLE public.configurationj2b
(
id integer NOT NULL PRIMARY KEY,
config jsonb NOT NULL
(
id integer NOT NULL PRIMARY KEY,
config jsonb NOT NULL
);
Each jsonb column has 3 top keys, and one of top-key ('data') has another 700-900 key-value pairs e.g. {"OID1":"Value1"}
PL/pgSQL SP
end;CREATE OR REPLACE FUNCTION public.process_jsonb()
RETURNS void AS
$BODY$
DECLARE
r integer;
cfg jsonb;
BEGIN
RAISE NOTICE 'start';
FOR r IN
SELECT id as device_id FROM devices
LOOP
select config into cfg from configurationj2b c where c.id = r; --select jsonb one by one
-- MULTIPLE KEYs, Conditional Busiines Logic (BL) updates
cfg := jsonb_set(cfg, '{data,OID1}', '"pl/pgsql1"');
IF cfg@>'{"data" : { "OID1":"pl/pgsql1"} }' THEN
cfg := jsonb_set(cfg, '{data,OID2}', '"pl/pgsql2"');
END IF;
IF cfg@>'{"data" : { "OID2":"pl/pgsql2"} }' THEN
cfg := jsonb_set(cfg, '{data,OID3}', '"pl/pgsql3"');
END IF;
IF cfg@>'{"data" : { "OID3":"pl/pgsql3"} }' THEN
cfg := jsonb_set(cfg, '{data,OID4}', '"pl/pgsql4"');
END IF;
IF cfg@>'{"data" : { "OID4":"pl/pgsql4"} }' THEN
cfg := jsonb_set(cfg, '{data,OID5}', '"pl/pgsql5"');
END IF;
update configurationj2b c set config = cfg where c.id = r;
END LOOP;
RAISE NOTICE 'end';
RETURN;
END
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100;
RETURNS void AS
$BODY$
DECLARE
r integer;
cfg jsonb;
BEGIN
RAISE NOTICE 'start';
FOR r IN
SELECT id as device_id FROM devices
LOOP
select config into cfg from configurationj2b c where c.id = r; --select jsonb one by one
-- MULTIPLE KEYs, Conditional Busiines Logic (BL) updates
cfg := jsonb_set(cfg, '{data,OID1}', '"pl/pgsql1"');
IF cfg@>'{"data" : { "OID1":"pl/pgsql1"} }' THEN
cfg := jsonb_set(cfg, '{data,OID2}', '"pl/pgsql2"');
END IF;
IF cfg@>'{"data" : { "OID2":"pl/pgsql2"} }' THEN
cfg := jsonb_set(cfg, '{data,OID3}', '"pl/pgsql3"');
END IF;
IF cfg@>'{"data" : { "OID3":"pl/pgsql3"} }' THEN
cfg := jsonb_set(cfg, '{data,OID4}', '"pl/pgsql4"');
END IF;
IF cfg@>'{"data" : { "OID4":"pl/pgsql4"} }' THEN
cfg := jsonb_set(cfg, '{data,OID5}', '"pl/pgsql5"');
END IF;
update configurationj2b c set config = cfg where c.id = r;
END LOOP;
RAISE NOTICE 'end';
RETURN;
END
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100;
or in pseudo-code I would have
for-each child_jsonb do
begin
foreach (key-value in parent_jsonb) do
begin
child_jsonb := jsonb_set(child_jsonb , '{key}', '"value"');
end
update child_jsonb in db;
plv8 snippet:
$BODY$var ids = plv8.execute('select id from devices');
var CFG_TABLE_NAME = 'configurationj2b';
var selPlan = plv8.prepare( "select c.config from " + CFG_TABLE_NAME + " c where c.id = $1", ['int'] );
var updPlan = plv8.prepare( 'update ' + CFG_TABLE_NAME + ' set config = $1 where id = $2', ['json','int'] )
try {
for (var i = 0; i < ids.length; i++) {
var db_cfg = selPlan.execute([ids[i].id]);
var cfg = db_cfg[0].config;
var cfg_data = cfg['data'];
cfg_data['OID1'] = 'plv8_01';
if (cfg_data['OID1'] == 'plv8_01') {
cfg_data['OID2'] = 'plv8_02'
};
if (cfg_data['OID2'] == 'plv8_02') {
cfg_data['OID3'] = 'plv8_03'
}
if (cfg_data['OID3'] == 'plv8_03') {
cfg_data['OID4'] = 'plv8_04'
}
if (cfg_data['OID4'] == 'plv8_04') {
cfg_data['OID5'] = 'plv8_05'
}
updPlan.execute([cfg, ids[i].id]);
plv8.elog(NOTICE, "UPDATED = " + ids[i].id);
}
} finally {
selPlan.free();
updPlan.free();
}
return;$BODY$
var CFG_TABLE_NAME = 'configurationj2b';
var selPlan = plv8.prepare( "select c.config from " + CFG_TABLE_NAME + " c where c.id = $1", ['int'] );
var updPlan = plv8.prepare( 'update ' + CFG_TABLE_NAME + ' set config = $1 where id = $2', ['json','int'] )
try {
for (var i = 0; i < ids.length; i++) {
var db_cfg = selPlan.execute([ids[i].id]);
var cfg = db_cfg[0].config;
var cfg_data = cfg['data'];
cfg_data['OID1'] = 'plv8_01';
if (cfg_data['OID1'] == 'plv8_01') {
cfg_data['OID2'] = 'plv8_02'
};
if (cfg_data['OID2'] == 'plv8_02') {
cfg_data['OID3'] = 'plv8_03'
}
if (cfg_data['OID3'] == 'plv8_03') {
cfg_data['OID4'] = 'plv8_04'
}
if (cfg_data['OID4'] == 'plv8_04') {
cfg_data['OID5'] = 'plv8_05'
}
updPlan.execute([cfg, ids[i].id]);
plv8.elog(NOTICE, "UPDATED = " + ids[i].id);
}
} finally {
selPlan.free();
updPlan.free();
}
return;$BODY$
but for now plv8 has other issues related to resource consumption.
So could I get similar performance in PL/pgSQL?
Re: jsonb_set performance degradation / multiple jsonb_set onmultiple documents
From
Michel Pelletier
Date:
I don't know the details of jsonb_set, Perhaps the '||' operator will perform better for you, it will overwrite existing keys, so you can build your new values in a new object, and then || it to the original.
postgres=# select '{"a": 1, "b": 2, "c": 3}'::jsonb || '{"b": 4, "c": 5}'::jsonb;
?column?
--------------------------
{"a": 1, "b": 4, "c": 5}
(1 row)
-Michel
On Fri, Mar 15, 2019 at 9:02 AM Alexandru Lazarev <alexandru.lazarev@gmail.com> wrote:
Hi PostgreSQL Community.I tried to rewrite some plv8 stored procedures, which process in bulk JSONB documents, to PL/pgSQL.A SP usually has to delete/update/add multiple key with the same document and do it for multiple documents (~40K) in loop.When updating a single key PL/pgSQL wins against plv8, but when I need to update multiple keys with jsonb_set, timing increase linearly with number of jsonb_sets and takes longer than similar SP in PLV8.Below are test-cases I've used.QUESTION: Is it expected behavior or I do something wrong or there are some better approaches or we can treat datum as object?test case:PG 9.6, CentOS 7CREATE TABLE public.configurationj2b
(
id integer NOT NULL PRIMARY KEY,
config jsonb NOT NULL);Each jsonb column has 3 top keys, and one of top-key ('data') has another 700-900 key-value pairs e.g. {"OID1":"Value1"}PL/pgSQL SPend;CREATE OR REPLACE FUNCTION public.process_jsonb()
RETURNS void AS
$BODY$
DECLARE
r integer;
cfg jsonb;
BEGIN
RAISE NOTICE 'start';
FOR r IN
SELECT id as device_id FROM devices
LOOP
select config into cfg from configurationj2b c where c.id = r; --select jsonb one by one
-- MULTIPLE KEYs, Conditional Busiines Logic (BL) updates
cfg := jsonb_set(cfg, '{data,OID1}', '"pl/pgsql1"');
IF cfg@>'{"data" : { "OID1":"pl/pgsql1"} }' THEN
cfg := jsonb_set(cfg, '{data,OID2}', '"pl/pgsql2"');
END IF;
IF cfg@>'{"data" : { "OID2":"pl/pgsql2"} }' THEN
cfg := jsonb_set(cfg, '{data,OID3}', '"pl/pgsql3"');
END IF;
IF cfg@>'{"data" : { "OID3":"pl/pgsql3"} }' THEN
cfg := jsonb_set(cfg, '{data,OID4}', '"pl/pgsql4"');
END IF;
IF cfg@>'{"data" : { "OID4":"pl/pgsql4"} }' THEN
cfg := jsonb_set(cfg, '{data,OID5}', '"pl/pgsql5"');
END IF;
update configurationj2b c set config = cfg where c.id = r;
END LOOP;
RAISE NOTICE 'end';
RETURN;
END
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100;or in pseudo-code I would havefor-each child_jsonb dobeginforeach (key-value in parent_jsonb) dobeginchild_jsonb := jsonb_set(child_jsonb , '{key}', '"value"');endupdate child_jsonb in db;plv8 snippet:$BODY$var ids = plv8.execute('select id from devices');
var CFG_TABLE_NAME = 'configurationj2b';
var selPlan = plv8.prepare( "select c.config from " + CFG_TABLE_NAME + " c where c.id = $1", ['int'] );
var updPlan = plv8.prepare( 'update ' + CFG_TABLE_NAME + ' set config = $1 where id = $2', ['json','int'] )
try {
for (var i = 0; i < ids.length; i++) {
var db_cfg = selPlan.execute([ids[i].id]);
var cfg = db_cfg[0].config;
var cfg_data = cfg['data'];
cfg_data['OID1'] = 'plv8_01';
if (cfg_data['OID1'] == 'plv8_01') {
cfg_data['OID2'] = 'plv8_02'
};
if (cfg_data['OID2'] == 'plv8_02') {
cfg_data['OID3'] = 'plv8_03'
}
if (cfg_data['OID3'] == 'plv8_03') {
cfg_data['OID4'] = 'plv8_04'
}
if (cfg_data['OID4'] == 'plv8_04') {
cfg_data['OID5'] = 'plv8_05'
}
updPlan.execute([cfg, ids[i].id]);
plv8.elog(NOTICE, "UPDATED = " + ids[i].id);
}
} finally {
selPlan.free();
updPlan.free();
}
return;$BODY$but for now plv8 has other issues related to resource consumption.So could I get similar performance in PL/pgSQL?