From 34e5992d122998b756b631ebf689267b03032504 Mon Sep 17 00:00:00 2001 From: amitlan Date: Thu, 3 Sep 2020 15:53:04 +0900 Subject: [PATCH v1] Check default partition's constraint even after tuple routing A partition's constraint is not checked if a row has been inserted into it via tuple routing, because the routing process ensures that only the tuples satisfying the constraint make it to a given partition. For a default partition however, whose constraint can change on-the-fly due to concurrent addition of partitions, this assumption does not always hold. So, check the constraint even when inserting into a default partition via tuple routing. This also adds an isolation test based on the reproduction steps described by Hao Wu. Reported by: Hao Wu --- src/backend/executor/execPartition.c | 62 ++++++++++++++++++++-- .../expected/partition-concurrent-attach.out | 21 ++++++++ src/test/isolation/isolation_schedule | 1 + .../specs/partition-concurrent-attach.spec | 34 ++++++++++++ 4 files changed, 113 insertions(+), 5 deletions(-) create mode 100644 src/test/isolation/expected/partition-concurrent-attach.out create mode 100644 src/test/isolation/specs/partition-concurrent-attach.spec diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 79fcbd6..e231b32 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -51,6 +51,13 @@ * PartitionDispatchData->indexes for details on how this array is * indexed. * + * nonleaf_partitions + * Array of 'max_dispatch' elements containing pointers to + * ResultRelInfos of nonleaf partitions touched by tuple routing. These + * are currently only used for checking the partition's constraint if + * the nonleaf partition happens to be a default partition of its + * parent + * * num_dispatch * The current number of items stored in the 'partition_dispatch_info' * array. Also serves as the index of the next free array element for @@ -89,6 +96,7 @@ struct PartitionTupleRouting { Relation partition_root; PartitionDispatch *partition_dispatch_info; + ResultRelInfo **nonleaf_partitions; int num_dispatch; int max_dispatch; ResultRelInfo **partitions; @@ -283,6 +291,7 @@ ExecFindPartition(ModifyTableState *mtstate, TupleTableSlot *ecxt_scantuple_old = ecxt->ecxt_scantuple; TupleTableSlot *myslot = NULL; MemoryContext oldcxt; + ResultRelInfo *rri = NULL; /* use per-tuple context here to avoid leaking memory */ oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); @@ -352,8 +361,6 @@ ExecFindPartition(ModifyTableState *mtstate, if (partdesc->is_leaf[partidx]) { - ResultRelInfo *rri; - /* * Look to see if we've already got a ResultRelInfo for this * partition. @@ -405,9 +412,8 @@ ExecFindPartition(ModifyTableState *mtstate, if (slot == myslot) ExecClearTuple(myslot); - MemoryContextSwitchTo(oldcxt); - ecxt->ecxt_scantuple = ecxt_scantuple_old; - return rri; + /* We've reached the leaf. */ + dispatch = NULL; } else { @@ -419,6 +425,8 @@ ExecFindPartition(ModifyTableState *mtstate, /* Already built. */ Assert(dispatch->indexes[partidx] < proute->num_dispatch); + rri = proute->nonleaf_partitions[dispatch->indexes[partidx]]; + /* * Move down to the next partition level and search again * until we find a leaf partition that matches this tuple @@ -440,10 +448,37 @@ ExecFindPartition(ModifyTableState *mtstate, dispatch, partidx); Assert(dispatch->indexes[partidx] >= 0 && dispatch->indexes[partidx] < proute->num_dispatch); + + rri = proute->nonleaf_partitions[dispatch->indexes[partidx]]; dispatch = subdispatch; } } + + /* + * If this partition is the default one, we must check its partition + * constraint, because it may have changed due to partitions being + * added to the parent concurrently. We do the check here instead of + * in ExecInsert(), because we would not want to miss checking the + * constraint of any nonleaf partitions as they never make it to + * ExecInsert(). + */ + if (partidx == partdesc->boundinfo->default_index) + { + Assert(rri != NULL); + ExecPartitionCheck(rri, slot, estate, true); + } + + /* Break out and return if we've found the leaf partition. */ + if (dispatch == NULL) + break; } + + MemoryContextSwitchTo(oldcxt); + ecxt->ecxt_scantuple = ecxt_scantuple_old; + + Assert(rri != NULL); + + return rri; } /* @@ -992,6 +1027,7 @@ ExecInitPartitionDispatchInfo(EState *estate, Relation rel; PartitionDesc partdesc; PartitionDispatch pd; + ResultRelInfo *rri = NULL; int dispatchidx; MemoryContext oldcxt; @@ -1060,6 +1096,8 @@ ExecInitPartitionDispatchInfo(EState *estate, proute->max_dispatch = 4; proute->partition_dispatch_info = (PartitionDispatch *) palloc(sizeof(PartitionDispatch) * proute->max_dispatch); + proute->nonleaf_partitions = (ResultRelInfo **) + palloc(sizeof(ResultRelInfo *) * proute->max_dispatch); } else { @@ -1067,6 +1105,9 @@ ExecInitPartitionDispatchInfo(EState *estate, proute->partition_dispatch_info = (PartitionDispatch *) repalloc(proute->partition_dispatch_info, sizeof(PartitionDispatch) * proute->max_dispatch); + proute->nonleaf_partitions = (ResultRelInfo **) + repalloc(proute->nonleaf_partitions, + sizeof(ResultRelInfo *) * proute->max_dispatch); } } proute->partition_dispatch_info[dispatchidx] = pd; @@ -1081,6 +1122,17 @@ ExecInitPartitionDispatchInfo(EState *estate, parent_pd->indexes[partidx] = dispatchidx; } + /* + * Also create a minimally valid ResultRelInfo to check the partition's + * partition's constraint. + */ + if (rel->rd_rel->relispartition) + { + rri = makeNode(ResultRelInfo); + InitResultRelInfo(rri, rel, 1, proute->partition_root, 0); + } + proute->nonleaf_partitions[dispatchidx] = rri; + MemoryContextSwitchTo(oldcxt); return pd; diff --git a/src/test/isolation/expected/partition-concurrent-attach.out b/src/test/isolation/expected/partition-concurrent-attach.out new file mode 100644 index 0000000..553bb44 --- /dev/null +++ b/src/test/isolation/expected/partition-concurrent-attach.out @@ -0,0 +1,21 @@ +Parsed test spec with 2 sessions + +starting permutation: s1b s1a s2b s2i s1c s2c +step s1b: begin; +step s1a: alter table tpart attach partition tpart_2 for values from (100) to (200); +step s2b: begin; +step s2i: insert into tpart values (110,110),(120,120),(150,150); +step s1c: commit; +step s2i: <... completed> +error in steps s1c s2i: ERROR: new row for relation "tpart_default" violates partition constraint +step s2c: commit; + +starting permutation: s1b s2b s2i s1a s2c s1c +step s1b: begin; +step s2b: begin; +step s2i: insert into tpart values (110,110),(120,120),(150,150); +step s1a: alter table tpart attach partition tpart_2 for values from (100) to (200); +step s2c: commit; +step s1a: <... completed> +error in steps s2c s1a: ERROR: updated partition constraint for default partition "tpart_default" would be violated by some row +step s1c: commit; diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule index 218c87b..56c08fd 100644 --- a/src/test/isolation/isolation_schedule +++ b/src/test/isolation/isolation_schedule @@ -88,3 +88,4 @@ test: plpgsql-toast test: truncate-conflict test: serializable-parallel test: serializable-parallel-2 +test: partition-concurrent-attach diff --git a/src/test/isolation/specs/partition-concurrent-attach.spec b/src/test/isolation/specs/partition-concurrent-attach.spec new file mode 100644 index 0000000..5290e8a --- /dev/null +++ b/src/test/isolation/specs/partition-concurrent-attach.spec @@ -0,0 +1,34 @@ +# Verify that default partition constraint is enforced correctly +# in light of partitions being added concurrently to its parent +setup { +drop table if exists tpart; + create table tpart(i int, j int) partition by range(i); + create table tpart_1(like tpart); + create table tpart_2(like tpart); + create table tpart_default(like tpart); + alter table tpart attach partition tpart_1 for values from(0) to (100); + alter table tpart attach partition tpart_default default; + insert into tpart_2 values(110,110),(120,120),(150,150); +} + +session "s1" +step "s1b" { begin; } +step "s1a" { alter table tpart attach partition tpart_2 for values from (100) to (200); } +step "s1c" { commit; } + +session "s2" +step "s2b" { begin; } +step "s2i" { insert into tpart values (110,110),(120,120),(150,150); } +step "s2c" { commit; } + +teardown { drop table tpart; } + +# insert into tpart by s2 which routes to tpart_default due to not seeing +# concurrently added tpart_2 should fail, because the partition constraint +# of tpart_default would have changed due to tpart_2 having been added +permutation "s1b" "s1a" "s2b" "s2i" "s1c" "s2c" + +# reverse: now the insert into tpart_default by s2 occurs first followed by +# attach in s1, which should fail when it scans the default partitions to +# find the violating rows +permutation "s1b" "s2b" "s2i" "s1a" "s2c" "s1c" -- 1.8.3.1