Thread: BUG #17596: "invalid attribute number 11" when updating partitioned table with a MULTIEXPR_SUBLINK
BUG #17596: "invalid attribute number 11" when updating partitioned table with a MULTIEXPR_SUBLINK
From
PG Bug reporting form
Date:
The following bug has been logged on the website: Bug reference: 17596 Logged by: Andre Lin Email address: 857348270@qq.com PostgreSQL version: 12.12 Operating system: Linux x86_64 GNU/Linux Description: Steps to reproduce this ERROR: drop procedure if exists insert_data_start(starts int,table_num int, table_name text); create or REPLACE procedure insert_data_start(starts int,table_num int, table_name text) as $$ declare v_sql text; BEGIN v_sql := 'insert into '||table_name||' select i, repeat((i%365)::text,5)::bytea, (i%10)::char, int8(i%550), int2(i%127), int4(i%789),round(i%20*2.5866,3)::numeric, bpchar(md5((i%300)::text)), (''aaaaa:''||(i%360))::varchar, ''bbbbb''||(i%400)||''a''::text, md5((i%300)::TEXT), (i%666)::bigint::oid, ((i%200)*2.35)::float4, float8((i%300)*2.215), ''dsa ''||(i%765), (i%35),''tinterval''||(i%44), concat(concat_ws(''.'',(i+100)%200, i%100,i%100,(i+100)%200),''/25'')::inet, date((''2022-02-17 19:12:40+08''::timestamp)), ''00:00:00'' + (i%400+1) * interval ''1 minute'', (date(''2022-02-18'')+(i%24) * interval ''1 day''+(i%24) * interval ''1 minute''+(i%24) * interval ''1 second'')::timestamp,timestamptz(date(date(''2022-02-18'')+(i%24) * interval ''1 day'')),(i%500) * interval ''1 day'',timetz(''2022-02-16 17:29:13.9+08''::timestamp + (i%500)*interval ''1 second''),concat(concat_ws(''.'',(i+100)%200, i%100,i%100,(i+100)%200),''/25'')::inet,int4(i%10)::bit(5), (i%100+1)::int::bit(5)::varbit(5), bool(i%2), concat_ws(''-'',to_hex((i%192)+1),to_hex((i%168)+1),to_hex((i%200)+1),to_hex((i%100)+1),to_hex((i%98)+1),to_hex((i%255)+1))::macaddr, point(ceil((i%68)+1),ceil((i%77)+1)),(i*987)::varchar::oidvector,polygon(box(point(i%550,i%360),point(i%50,i%22))),((i*3.156)%9999)::money,(i%798,i%666)::varchar::tid,circle(point(i%250,i%160),i%76),concat_ws(''-'',to_hex((i%192)+16),to_hex((i%168)+16),to_hex((i%200)+16),to_hex((i%100)+16),to_hex((i%98)+16),to_hex((i%239)+16))::macaddr8,line(point(i%93+1,i%120),point(i%450+2,i%36)),path(polygon(box(point(i%192,i%120),point(i%913,i%120)))),box(point(i%292+1,i%20),point(i%92+2,i%24)),lseg(point(i%92+1,i%220),point(i%238+2,i%120)),name(''ccc''||(i%12)),(''dfa in ddd''||(i%900))::text,((i*3.1123)%998)::numeric(10,2),''sdfwadsf''||(i%120),''dfewvew''||(i%30),(i%50)/13, (i%20)/17, to_hex(i*900%500),''dfjwedfv''||(i%40) from generate_series('||starts||','||table_num||') i'; execute v_sql; end; $$ language plpgsql; drop table if exists s; create table s(c0 serial,c1 BYTEA default '',c2 CHAR default 'a',c3 INT8 default 1999,c4 INT2 default -127,c5 INT4 default -1,c6 numeric default 3.14156,c7 BPCHAR default '',c8 VARCHAR default '',c9 text default 'text',c10 TEXT default '',c11 OID default '2',c12 FLOAT4 default 10.8,c13 FLOAT8 default 0.999,c14 text default 'tgose',c15 varchar default '00:05:04',c16 varchar default 'tinterval(abstime(now()), abstime(now()))',c17 INET default '148.85.65.189/25',c18 DATE default '2022-07-26 14:25:25',c19 TIME default '14:25:03',c20 TIMESTAMP default '2022-07-26 12:12:12',c21 TIMESTAMPTZ default '2022-07-26 12:12:12+08',c22 INTERVAL default interval '1 day',c23 TIMETZ default '14:36:04+08',c24 CIDR default '198.24.10.0/24',c25 bit varying(5) default '',c26 VARBIT default '00011',c27 bool default false,c28 macaddr default 'bf:77:bf:5b:5b:92',c29 point default '(10,10)',c30 oidvector default '',c31 polygon default '((2,2),(2,2),(2,2),(2,2))',c32 money default 10,c33 tid default '(2,2)',c34 circle default '<(2,2),2>',c35 macaddr8 default '12:12:12:ff:fe:12:12:12',c36 line default '{0,-1,2}',c37 path default '((2,2),(2,2),(2,2),(2,2))',c38 box default '(4,2),(3,2)',c39 lseg default '[(3,2),(4,2)]',c40 name default '',c41 text default '',c42 numeric default 1.99,c43 text default 'text', c44 text default 'text',c45 numeric default 3.141596, c46 numeric default 1.963489, c47 text default '',c48 text default 'text',c49 serial,c50 bigserial,c51 int[] default '{1,2,3}',c52 text default ''); call insert_data_start(1, 5000, 's'); analyze s; drop table if exists r; create table r(c0 serial,c1 BYTEA default '',c2 CHAR default 'a',c3 INT8 default 1999,c4 INT2 default -127,c5 INT4 default -1,c6 numeric default 3.14156,c7 BPCHAR default '',c8 VARCHAR default '',c9 text default 'text',c10 TEXT default '',c11 OID default '2',c12 FLOAT4 default 10.8,c13 FLOAT8 default 0.999,c14 text default 'tgose',c15 varchar default '00:05:04',c16 varchar default 'tinterval(abstime(now()), abstime(now()))',c17 INET default '148.85.65.189/25',c18 DATE default '2022-07-26 14:25:25',c19 TIME default '14:25:03',c20 TIMESTAMP default '2022-07-26 12:12:12',c21 TIMESTAMPTZ default '2022-07-26 12:12:12+08',c22 INTERVAL default interval '1 day',c23 TIMETZ default '14:36:04+08',c24 CIDR default '198.24.10.0/24',c25 bit varying(5) default '',c26 VARBIT default '00011',c27 bool default false,c28 macaddr default 'bf:77:bf:5b:5b:92',c29 point default '(10,10)',c30 oidvector default '',c31 polygon default '((2,2),(2,2),(2,2),(2,2))',c32 money default 10,c33 tid default '(2,2)',c34 circle default '<(2,2),2>',c35 macaddr8 default '12:12:12:ff:fe:12:12:12',c36 line default '{0,-1,2}',c37 path default '((2,2),(2,2),(2,2),(2,2))',c38 box default '(4,2),(3,2)',c39 lseg default '[(3,2),(4,2)]',c40 name default '',c41 text default '',c42 numeric default 1.99,c43 text default 'text', c44 text default 'text',c45 numeric default 3.141596, c46 numeric default 1.963489, c47 text default '',c48 text default 'text',c49 serial,c50 bigserial,c51 int[] default '{1,2,3}',c52 text default ''); call insert_data_start(1, 5000, 'r'); analyze r; drop table if exists t1; create table t1(c0 serial,c1 BYTEA default '',c2 CHAR default 'a',c3 INT8 default 1999,c4 INT2 default -127,c5 INT4 default -1,c6 numeric default 3.14156,c7 BPCHAR default '',c8 VARCHAR default '',c9 text default 'text',c10 TEXT default '',c11 OID default '2',c12 FLOAT4 default 10.8,c13 FLOAT8 default 0.999,c14 text default 'tgose',c15 varchar default '00:05:04',c16 varchar default 'tinterval(abstime(now()), abstime(now()))',c17 INET default '148.85.65.189/25',c18 DATE default '2022-07-26 14:25:25',c19 TIME default '14:25:03',c20 TIMESTAMP default '2022-07-26 12:12:12',c21 TIMESTAMPTZ default '2022-07-26 12:12:12+08',c22 INTERVAL default interval '1 day',c23 TIMETZ default '14:36:04+08',c24 CIDR default '198.24.10.0/24',c25 bit varying(5) default '',c26 VARBIT default '00011',c27 bool default false,c28 macaddr default 'bf:77:bf:5b:5b:92',c29 point default '(10,10)',c30 oidvector default '',c31 polygon default '((2,2),(2,2),(2,2),(2,2))',c32 money default 10,c33 tid default '(2,2)',c34 circle default '<(2,2),2>',c35 macaddr8 default '12:12:12:ff:fe:12:12:12',c36 line default '{0,-1,2}',c37 path default '((2,2),(2,2),(2,2),(2,2))',c38 box default '(4,2),(3,2)',c39 lseg default '[(3,2),(4,2)]',c40 name default '',c41 text default '',c42 numeric default 1.99,c43 text default 'text', c44 text default 'text',c45 numeric default 3.141596, c46 numeric default 1.963489, c47 text default '',c48 text default 'text',c49 serial,c50 bigserial,c51 int[] default '{1,2,3}',c52 text default '') partition by hash (c20); create table t1_part_0 partition of t1 for values with(modulus 5,remainder 0) partition by range (c18); create table t1_part_1 partition of t1 for values with(modulus 5,remainder 1) partition by range (c18); create table t1_part_2 partition of t1 for values with(modulus 5,remainder 2) partition by range (c18); create table t1_part_3 partition of t1 for values with(modulus 5,remainder 3) partition by range (c18); create table t1_part_4 partition of t1 for values with(modulus 5,remainder 4) partition by range (c18); create table t1_part_0_0 partition of t1_part_0 for values from ('2022-02-19 12:12:12') to ('2022-12-19 12:12:12'); create table t1_part_0_1 partition of t1_part_0 for values from ('2023-02-19 12:12:12') to ('2023-12-19 12:12:12'); create table t1_part_0_2 partition of t1_part_0 default; create table t1_part_1_0 partition of t1_part_1 for values from ('2022-02-19 12:12:12') to ('2022-12-19 12:12:12'); create table t1_part_1_1 partition of t1_part_1 for values from ('2023-02-19 12:12:12') to ('2023-12-19 12:12:12'); create table t1_part_1_2 partition of t1_part_1 default; create table t1_part_2_0 partition of t1_part_2 for values from ('2022-02-19 12:12:12') to ('2022-12-19 12:12:12'); create table t1_part_2_1 partition of t1_part_2 for values from ('2023-02-19 12:12:12') to ('2023-12-19 12:12:12'); create table t1_part_2_2 partition of t1_part_2 default; create table t1_part_3_0 partition of t1_part_3 for values from ('2022-02-19 12:12:12') to ('2022-12-19 12:12:12'); create table t1_part_3_1 partition of t1_part_3 for values from ('2023-02-19 12:12:12') to ('2023-12-19 12:12:12'); create table t1_part_3_2 partition of t1_part_3 default; create table t1_part_4_0 partition of t1_part_4 for values from ('2022-02-19 12:12:12') to ('2022-12-19 12:12:12'); create table t1_part_4_1 partition of t1_part_4 for values from ('2023-02-19 12:12:12') to ('2099-12-19 12:12:12'); create table t1_part_4_2 partition of t1_part_4 default; call insert_data_start(1, 5000, 't1'); analyze t1; update t1 set (c14) = (select t1.c10 from r limit 1) from r t2, s t3, (select c3,c12 from s t4 group by t4.c12, t4.c3) t4 where t1.c3=t2.c3 and t1.c12=t2.c12 and t2.c3=t3.c3 and t2.c12=t3.c12 and t3.c3=t4.c3 and t3.c12=t4.c12 and t1.c0 between 900 and 1500; The result is ERROR: XX000: invalid attribute number 11 LOCATION: slot_getsomeattrs_int, execTuples.c:1909 I did some investigations. The current implementation, the update plans of different partitions use the same param id for the subplan param, but they correspond to different SubPlanStates during execution, SubPlanState are generated during ExecInitSubPlan and linked to ParamExecData->execPlan. Since the plans of multiple partitions share one Param, according to the execution order, the final execPlan will be set to the SubPlanState of the last partition in the InitPlan phase. And because the order of joins in different partition plans is different, the final SubPlanState->args does not match the expectation (some are OUTER_VAR, some are INNER_VAR), and an error is occurred (failed assertion if "--enable-cassert" and coredump)
Re: BUG #17596: "invalid attribute number 11" when updating partitioned table with a MULTIEXPR_SUBLINK
From
Tom Lane
Date:
PG Bug reporting form <noreply@postgresql.org> writes: > ERROR: XX000: invalid attribute number 11 > LOCATION: slot_getsomeattrs_int, execTuples.c:1909 > I did some investigations. The current implementation, the update plans of > different partitions use the same param id for the subplan param, but they > correspond to different SubPlanStates during execution, SubPlanState are > generated during ExecInitSubPlan and linked to ParamExecData->execPlan. > Since the plans of multiple partitions share one Param, according to the > execution order, the final execPlan will be set to the SubPlanState of the > last partition in the InitPlan phase. > And because the order of joins in different partition plans is different, > the final SubPlanState->args does not match the expectation (some are > OUTER_VAR, some are INNER_VAR), and an error is occurred (failed assertion > if "--enable-cassert" and coredump) Thanks for the report! I find that this bug doesn't reproduce in v14 and up, now that we got rid of inheritance_planner(). It is a live issue before that though, because as you say MULTIEXPR_SUBLINK SubPlans can do the wrong thing if their "args" lists aren't all identical. I was confused for awhile as to why the problem hadn't emerged long before, because the business with setting ParamExecData.execPlan to point to a SubPlan is ancient. I eventually realized that it's safe in the context of initplans, because *those don't have any values to be passed in*, so their args lists are always empty. This means that it doesn't matter if we clone an initplan SubPlan and let the clones share output parameters, because they'll all be alike enough for ExecSetParamPlan's purposes. But it does matter for MULTIEXPR_SUBLINK, which abused that ancient design by adding the possibility of passed-in arguments. So what we need to do is guarantee that MULTIEXPR_SUBLINKs don't share output parameters. Fortunately, this isn't too hard, because that could only happen when inheritance_planner() clones an UPDATE targetlist, and the stuff we need to change will all be at top level of that targetlist. Proposed patch against v13 attached; it'll need to be back-patched as far as v10. We don't need any of this logic in v14 and up, but I'm thinking of putting a slightly modified version of the new comment in nodeSubplan.c into HEAD, just to memorialize the issue. regards, tom lane diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index fadd8ea731..c542464e5a 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -245,6 +245,21 @@ ExecScanSubPlan(SubPlanState *node, * ones, so this should be safe.) Unlike ExecReScanSetParamPlan, we do * *not* set bits in the parent plan node's chgParam, because we don't * want to cause a rescan of the parent. + * + * Note: we are also relying on MULTIEXPR SubPlans not sharing any output + * parameters with other SubPlans, because if one does then it is unclear + * which SubPlanState node the parameter's execPlan field will be pointing + * to when we come to evaluate the parameter. We can allow plain initplan + * SubPlans to share output parameters, because it doesn't actually matter + * which initplan SubPlan we reference as long as they all point to the + * same underlying subplan. However, that fails to hold for MULTIEXPRs + * because they can have non-empty args lists, and the "same" args might + * have mutated into different forms in different parts of a plan tree. + * There is not a problem in ordinary queries because MULTIEXPR will + * appear only in an UPDATE's top-level target list, so it won't get + * duplicated anyplace. However, when inheritance_planner clones a + * partially-planned targetlist it must take care to assign non-duplicate + * param IDs to the cloned copy. */ if (subLinkType == MULTIEXPR_SUBLINK) { diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 60e7fda6a9..27c665ac12 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1606,6 +1606,10 @@ inheritance_planner(PlannerInfo *root) /* and we haven't created PlaceHolderInfos, either */ Assert(subroot->placeholder_list == NIL); + /* Fix MULTIEXPR_SUBLINK params if any */ + if (root->multiexpr_params) + SS_make_multiexprs_unique(root, subroot); + /* Generate Path(s) for accessing this result relation */ grouping_planner(subroot, true, 0.0 /* retrieve all tuples */ ); diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 91a8851b25..927d6a43bd 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -851,6 +851,101 @@ hash_ok_operator(OpExpr *expr) } } +/* + * SS_make_multiexprs_unique + * + * After cloning an UPDATE targetlist that contains MULTIEXPR_SUBLINK + * SubPlans, inheritance_planner() must call this to assign new, unique Param + * IDs to the cloned MULTIEXPR_SUBLINKs' output parameters. See notes in + * ExecScanSubPlan. + */ +void +SS_make_multiexprs_unique(PlannerInfo *root, PlannerInfo *subroot) +{ + List *new_multiexpr_params = NIL; + int offset; + ListCell *lc; + + /* + * Find MULTIEXPR SubPlans in the cloned query. We need only look at the + * top level of the targetlist. + */ + foreach(lc, subroot->parse->targetList) + { + TargetEntry *tent = (TargetEntry *) lfirst(lc); + SubPlan *splan; + Plan *plan; + List *params; + + if (!IsA(tent->expr, SubPlan)) + continue; + splan = (SubPlan *) tent->expr; + if (splan->subLinkType != MULTIEXPR_SUBLINK) + continue; + + /* Found one, get the associated subplan */ + plan = (Plan *) list_nth(root->glob->subplans, splan->plan_id - 1); + + /* + * Generate new PARAM_EXEC Param nodes, and overwrite splan->setParam + * with their IDs. This is just like what build_subplan did when it + * made the SubPlan node we're cloning. But because the param IDs are + * assigned globally, we'll get new IDs. (We assume here that the + * subroot's tlist is a clone we can scribble on.) + */ + params = generate_subquery_params(root, + plan->targetlist, + &splan->setParam); + + /* + * We will append the replacement-Params lists to + * root->multiexpr_params, but for the moment just make a local list. + * Since we lack easy access here to the original subLinkId, we have + * to fall back on the slightly shaky assumption that the MULTIEXPR + * SubPlans appear in the targetlist in subLinkId order. This should + * be safe enough given the way that the parser builds the targetlist + * today. I wouldn't want to rely on it going forward, but since this + * code has a limited lifespan it should be fine. We can partially + * protect against problems with assertions below. + */ + new_multiexpr_params = lappend(new_multiexpr_params, params); + } + + /* + * Now we must find the Param nodes that reference the MULTIEXPR outputs + * and update their sublink IDs so they'll reference the new outputs. + * Fortunately, those too must be at top level of the cloned targetlist. + */ + offset = list_length(root->multiexpr_params); + + foreach(lc, subroot->parse->targetList) + { + TargetEntry *tent = (TargetEntry *) lfirst(lc); + Param *p; + int subqueryid; + int colno; + + if (!IsA(tent->expr, Param)) + continue; + p = (Param *) tent->expr; + if (p->paramkind != PARAM_MULTIEXPR) + continue; + subqueryid = p->paramid >> 16; + colno = p->paramid & 0xFFFF; + Assert(subqueryid > 0 && + subqueryid <= list_length(new_multiexpr_params)); + Assert(colno > 0 && + colno <= list_length((List *) list_nth(new_multiexpr_params, + subqueryid - 1))); + subqueryid += offset; + p->paramid = (subqueryid << 16) + colno; + } + + /* Finally, attach new replacement lists to the global list */ + root->multiexpr_params = list_concat(root->multiexpr_params, + new_multiexpr_params); +} + /* * SS_process_ctes: process a query's WITH list diff --git a/src/include/optimizer/subselect.h b/src/include/optimizer/subselect.h index d6a872bd2c..6f33578c8f 100644 --- a/src/include/optimizer/subselect.h +++ b/src/include/optimizer/subselect.h @@ -16,6 +16,7 @@ #include "nodes/pathnodes.h" #include "nodes/plannodes.h" +extern void SS_make_multiexprs_unique(PlannerInfo *root, PlannerInfo *subroot); extern void SS_process_ctes(PlannerInfo *root); extern JoinExpr *convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out index 2b68aef654..6e134d53f6 100644 --- a/src/test/regress/expected/inherit.out +++ b/src/test/regress/expected/inherit.out @@ -1715,6 +1715,55 @@ reset enable_seqscan; reset enable_indexscan; reset enable_bitmapscan; -- +-- Check handling of MULTIEXPR SubPlans in inherited updates +-- +create table inhpar(f1 int, f2 name); +insert into inhpar select generate_series(1,10); +create table inhcld() inherits(inhpar); +insert into inhcld select generate_series(11,10000); +vacuum analyze inhcld; +vacuum analyze inhpar; +explain (verbose, costs off) +update inhpar set (f1, f2) = (select p2.unique2, p2.stringu1 + from int4_tbl limit 1) +from onek p2 where inhpar.f1 = p2.unique1; + QUERY PLAN +----------------------------------------------------------------------------- + Update on public.inhpar + Update on public.inhpar + Update on public.inhcld inhpar_1 + -> Merge Join + Output: $4, $5, (SubPlan 1 (returns $2,$3)), inhpar.ctid, p2.ctid + Merge Cond: (p2.unique1 = inhpar.f1) + -> Index Scan using onek_unique1 on public.onek p2 + Output: p2.unique2, p2.stringu1, p2.ctid, p2.unique1 + -> Sort + Output: inhpar.ctid, inhpar.f1 + Sort Key: inhpar.f1 + -> Seq Scan on public.inhpar + Output: inhpar.ctid, inhpar.f1 + SubPlan 1 (returns $2,$3) + -> Limit + Output: (p2.unique2), (p2.stringu1) + -> Seq Scan on public.int4_tbl + Output: p2.unique2, p2.stringu1 + -> Hash Join + Output: $6, $7, (SubPlan 1 (returns $2,$3)), inhpar_1.ctid, p2.ctid + Hash Cond: (inhpar_1.f1 = p2.unique1) + -> Seq Scan on public.inhcld inhpar_1 + Output: inhpar_1.ctid, inhpar_1.f1 + -> Hash + Output: p2.unique2, p2.stringu1, p2.ctid, p2.unique1 + -> Seq Scan on public.onek p2 + Output: p2.unique2, p2.stringu1, p2.ctid, p2.unique1 +(27 rows) + +update inhpar set (f1, f2) = (select p2.unique2, p2.stringu1 + from int4_tbl limit 1) +from onek p2 where inhpar.f1 = p2.unique1; +drop table inhpar cascade; +NOTICE: drop cascades to table inhcld +-- -- Check handling of a constant-null CHECK constraint -- create table cnullparent (f1 int); diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql index 64173a8738..fd4f252c29 100644 --- a/src/test/regress/sql/inherit.sql +++ b/src/test/regress/sql/inherit.sql @@ -629,6 +629,26 @@ reset enable_seqscan; reset enable_indexscan; reset enable_bitmapscan; +-- +-- Check handling of MULTIEXPR SubPlans in inherited updates +-- +create table inhpar(f1 int, f2 name); +insert into inhpar select generate_series(1,10); +create table inhcld() inherits(inhpar); +insert into inhcld select generate_series(11,10000); +vacuum analyze inhcld; +vacuum analyze inhpar; + +explain (verbose, costs off) +update inhpar set (f1, f2) = (select p2.unique2, p2.stringu1 + from int4_tbl limit 1) +from onek p2 where inhpar.f1 = p2.unique1; +update inhpar set (f1, f2) = (select p2.unique2, p2.stringu1 + from int4_tbl limit 1) +from onek p2 where inhpar.f1 = p2.unique1; + +drop table inhpar cascade; + -- -- Check handling of a constant-null CHECK constraint --