From 01bc09bb547b992c9781df15b1ea9ff0e073b349 Mon Sep 17 00:00:00 2001 From: "dgrowley@gmail.com" Date: Thu, 2 Aug 2018 16:07:08 +1200 Subject: [PATCH v1 4/4] Allow partitions to be attached without blocking queries --- src/backend/commands/tablecmds.c | 231 +++++++++++++++++++++++++++++---- src/backend/nodes/copyfuncs.c | 1 + src/backend/nodes/equalfuncs.c | 1 + src/backend/optimizer/plan/planner.c | 4 + src/backend/optimizer/prep/prepunion.c | 33 +++-- src/backend/optimizer/util/plancat.c | 3 + src/backend/optimizer/util/relnode.c | 19 +-- src/backend/parser/gram.y | 16 ++- src/backend/partitioning/partprune.c | 6 +- src/backend/utils/cache/partcache.c | 24 +++- src/backend/utils/cache/relcache.c | 6 +- src/bin/psql/describe.c | 21 ++- src/include/catalog/partition.h | 7 + src/include/nodes/parsenodes.h | 2 + src/include/nodes/relation.h | 11 ++ 15 files changed, 318 insertions(+), 67 deletions(-) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index e01ca8211a..d3db294665 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -87,6 +87,7 @@ #include "storage/lmgr.h" #include "storage/lock.h" #include "storage/predicate.h" +#include "storage/procarray.h" #include "storage/smgr.h" #include "utils/acl.h" #include "utils/builtins.h" @@ -344,7 +345,7 @@ static void ATController(AlterTableStmt *parsetree, static void ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd, bool recurse, bool recursing, LOCKMODE lockmode); static void ATRewriteCatalogs(List **wqueue, LOCKMODE lockmode); -static void ATExecCmd(List **wqueue, AlteredTableInfo *tab, Relation rel, +static void ATExecCmd(List **wqueue, AlteredTableInfo *tab, Relation *relp, AlterTableCmd *cmd, LOCKMODE lockmode); static void ATRewriteTables(AlterTableStmt *parsetree, List **wqueue, LOCKMODE lockmode); @@ -479,19 +480,19 @@ static PartitionSpec *transformPartitionSpec(Relation rel, PartitionSpec *partsp static void ComputePartitionAttrs(Relation rel, List *partParams, AttrNumber *partattrs, List **partexprs, Oid *partopclass, Oid *partcollation, char strategy); static void AttachPartition(Relation attachrel, Relation rel, - PartitionBoundSpec *bound); + PartitionBoundSpec *bound, bool valid); static void CreateInheritance(Relation child_rel, Relation parent_rel); static void RemoveInheritance(Relation child_rel, Relation parent_rel); -static ObjectAddress ATExecAttachPartition(List **wqueue, Relation rel, +static ObjectAddress ATExecAttachPartition(List **wqueue, Relation *relp, PartitionCmd *cmd); static void AttachPartitionEnsureIndexes(Relation rel, Relation attachrel); static void QueuePartitionConstraintValidation(List **wqueue, Relation scanrel, List *partConstraint, bool validate_default); static void CloneRowTriggersToPartition(Relation parent, Relation partition); -static ObjectAddress ATExecDetachPartition(Relation rel, RangeVar *name); +static ObjectAddress ATExecDetachPartition(Relation rel, PartitionCmd *cmd); static ObjectAddress ATExecAttachPartitionIdx(List **wqueue, Relation rel, - RangeVar *name); + PartitionCmd *cmd); static void validatePartitionedIndex(Relation partedIdx, Relation partedTbl); static void refuseDupeIndexAttach(Relation parentIdx, Relation partIdx, Relation partitionTbl); @@ -865,7 +866,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, } /* Add the pg_partition record */ - AttachPartition(rel, parent, bound); + AttachPartition(rel, parent, bound, true); /* Update the pg_class entry. */ MarkRelationPartitioned(rel, parent, bound->is_default); @@ -3619,7 +3620,11 @@ AlterTableGetLockLevel(List *cmds) case AT_AttachPartition: case AT_DetachPartition: - cmd_lockmode = AccessExclusiveLock; + /* CONCURRENTLY option does not use an AccessExclusiveLock */ + if (IsA(cmd->def, PartitionCmd) && ((PartitionCmd *) cmd->def)->concurrently) + cmd_lockmode = ShareUpdateExclusiveLock; + else + cmd_lockmode = AccessExclusiveLock; break; default: /* oops */ @@ -4012,8 +4017,14 @@ ATRewriteCatalogs(List **wqueue, LOCKMODE lockmode) */ rel = relation_open(tab->relid, NoLock); + /* + * We must pass a pointer to rel as some sub commands such as + * ATTACH PARTITION CONCURRENTLY commit the transaction and start + * a new one, meaning that rel must be closed and reopened. + * Without this we'd end up with a pointer to the closed copy. + */ foreach(lcmd, subcmds) - ATExecCmd(wqueue, tab, rel, + ATExecCmd(wqueue, tab, &rel, castNode(AlterTableCmd, lfirst(lcmd)), lockmode); @@ -4051,10 +4062,11 @@ ATRewriteCatalogs(List **wqueue, LOCKMODE lockmode) * ATExecCmd: dispatch a subcommand to appropriate execution routine */ static void -ATExecCmd(List **wqueue, AlteredTableInfo *tab, Relation rel, +ATExecCmd(List **wqueue, AlteredTableInfo *tab, Relation *relp, AlterTableCmd *cmd, LOCKMODE lockmode) { ObjectAddress address = InvalidObjectAddress; + Relation rel = *relp; switch (cmd->subtype) { @@ -4306,15 +4318,15 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab, Relation rel, break; case AT_AttachPartition: if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) - ATExecAttachPartition(wqueue, rel, (PartitionCmd *) cmd->def); + ATExecAttachPartition(wqueue, relp, (PartitionCmd *) cmd->def); else ATExecAttachPartitionIdx(wqueue, rel, - ((PartitionCmd *) cmd->def)->name); + (PartitionCmd *) cmd->def); break; case AT_DetachPartition: /* ATPrepCmd ensures it must be a table */ Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); - ATExecDetachPartition(rel, ((PartitionCmd *) cmd->def)->name); + ATExecDetachPartition(rel, (PartitionCmd *) cmd->def); break; default: /* oops */ elog(ERROR, "unrecognized alter table type: %d", @@ -11606,7 +11618,8 @@ ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode) } static void -AttachPartition(Relation attachrel, Relation rel, PartitionBoundSpec *bound) +AttachPartition(Relation attachrel, Relation rel, PartitionBoundSpec *bound, + bool valid) { Datum values[Natts_pg_partition]; bool nulls[Natts_pg_partition]; @@ -11624,6 +11637,7 @@ AttachPartition(Relation attachrel, Relation rel, PartitionBoundSpec *bound) */ values[Anum_pg_partition_partrelid - 1] = ObjectIdGetDatum(attachrelid); values[Anum_pg_partition_parentrelid - 1] = ObjectIdGetDatum(partedrelid); + values[Anum_pg_partition_partvalid - 1] = BoolGetDatum(valid); values[Anum_pg_partition_partbound - 1] = CStringGetTextDatum(nodeToString(bound)); memset(nulls, 0, sizeof(nulls)); @@ -14170,15 +14184,16 @@ QueuePartitionConstraintValidation(List **wqueue, Relation scanrel, } /* - * ALTER TABLE ATTACH PARTITION FOR VALUES + * ALTER TABLE ATTACH PARTITION [CONCURRENTLY] FOR VALUES * * Return the address of the newly attached partition. */ static ObjectAddress -ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd) +ATExecAttachPartition(List **wqueue, Relation *relp, PartitionCmd *cmd) { Relation attachrel, - catalog; + catalog, + rel; List *partConstraint; SysScanDesc scan; ScanKeyData skey; @@ -14192,6 +14207,10 @@ ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd) List *partBoundConstraint; List *cloned; ListCell *l; + LOCKMODE lockmode; + + lockmode = cmd->concurrently ? ShareUpdateExclusiveLock : AccessExclusiveLock; + rel = *relp; /* * We must lock the default partition if one exists, because attaching a @@ -14200,8 +14219,28 @@ ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd) defaultPartOid = get_default_oid_from_partdesc(RelationGetPartitionDesc(rel)); if (OidIsValid(defaultPartOid)) + { + /* + * When attaching a partition to a partitioned table which has a + * default partition, the default partition must be locked with an + * AccessExclusiveLock so that tuples which are in the default + * partition which should now belong to the newly attached partition + * can be moved. Moving these tuples while there is concurrent + * activity on the table is difficult to do transparently, so for now + * we'll just disallow the CONCURRENTLY option when there is a default + * partition. + */ + if (cmd->concurrently) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("concurrent attach to a partitioned table with a default partition is unsupported"))); + } + LockRelationOid(defaultPartOid, AccessExclusiveLock); + } + /* Always take an AccessExclusiveLock on the relation being attached */ attachrel = heap_openrv(cmd->name, AccessExclusiveLock); /* @@ -14275,7 +14314,7 @@ ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd) List *attachrel_children; attachrel_children = get_partition_descendants(RelationGetRelid(attachrel), - AccessExclusiveLock); + lockmode); if (list_member_oid(attachrel_children, RelationGetRelid(rel))) ereport(ERROR, (errcode(ERRCODE_DUPLICATE_TABLE), @@ -14382,7 +14421,11 @@ ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd) check_new_partition_bound(RelationGetRelationName(attachrel), rel, cmd->bound); - AttachPartition(attachrel, rel, cmd->bound); + /* + * When the CONCURRENTLY option was not specified we mark the partition as + * valid right away. + */ + AttachPartition(attachrel, rel, cmd->bound, !cmd->concurrently); /* Update the pg_class entry. */ MarkRelationPartitioned(attachrel, rel, cmd->bound->is_default); @@ -14489,6 +14532,138 @@ ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd) ObjectAddressSet(address, RelationRelationId, RelationGetRelid(attachrel)); + + if (cmd->concurrently) + { + LOCKTAG attachlocktag; + LockRelId attachrelid; + LOCKTAG partedlocktag; + LockRelId partedrelid; + Relation partRelation; + HeapTuple tuple; + Oid attachoid = RelationGetRelid(attachrel); + Oid relid = RelationGetRelid(rel); + TransactionId limitXmin; + Snapshot snapshot; + VirtualTransactionId *old_snapshots; + int n_old_snapshots; + int i; + + /* + * To allow the CONCURRENT ATTACH operation we need to make this + * partition visible to other transactions. To do that we must commit + * this transaction. In order to prevent another transaction dropping + * or detaching this newly attached partition we must obtain a session + * level lock on it. We must also maintain a ShareUpdateExclusiveLock + * on the partitioned table to prevent other sessions attaching any + * other partitions. XXX is that needed? + */ + attachrelid = attachrel->rd_lockInfo.lockRelId; + SET_LOCKTAG_RELATION(attachlocktag, attachrelid.dbId, attachrelid.relId); + heap_close(attachrel, NoLock); + + partedrelid = rel->rd_lockInfo.lockRelId; + SET_LOCKTAG_RELATION(partedlocktag, partedrelid.dbId, partedrelid.relId); + heap_close(rel, NoLock); + + LockRelationIdForSession(&partedrelid, ShareUpdateExclusiveLock); + LockRelationIdForSession(&attachrelid, AccessExclusiveLock); + + snapshot = GetTransactionSnapshot(); + limitXmin = snapshot->xmin; + + /* Now begin a new transaction */ + PopActiveSnapshot(); + CommitTransactionCommand(); + StartTransactionCommand(); + + /* + * Technically we're finished with 'rel' here, but we must re-open it + * again as the calling alter table code will try to close it. We must + * also ensure that we set *relp to point to this new rel. + */ + *relp = rel = heap_open(relid, ShareUpdateExclusiveLock); + + /* + * Open and lock the partition relation. The relation's Oid cannot + * have changed as we've been holding a session-level lock while the + * transaction was commited and the new one begun. + */ + attachrel = heap_open(attachoid, AccessExclusiveLock); + + /* + * Now we must until there are no transactions left which could see + * the old list of partitions. Some of these transactions may be + * REPEATABLE READ or above in isolation level, so we cannot just + * add a new partition during their transaction. + */ + old_snapshots = GetCurrentVirtualXIDs(limitXmin, true, false, + PROC_IS_AUTOVACUUM | PROC_IN_VACUUM, + &n_old_snapshots); + + for (i = 0; i < n_old_snapshots; i++) + { + if (!VirtualTransactionIdIsValid(old_snapshots[i])) + continue; /* found uninteresting in previous cycle */ + + if (i > 0) + { + /* see if anything's changed ... */ + VirtualTransactionId *newer_snapshots; + int n_newer_snapshots; + int j; + int k; + + newer_snapshots = GetCurrentVirtualXIDs(limitXmin, + true, false, + PROC_IS_AUTOVACUUM | PROC_IN_VACUUM, + &n_newer_snapshots); + for (j = i; j < n_old_snapshots; j++) + { + if (!VirtualTransactionIdIsValid(old_snapshots[j])) + continue; /* found uninteresting in previous cycle */ + for (k = 0; k < n_newer_snapshots; k++) + { + if (VirtualTransactionIdEquals(old_snapshots[j], + newer_snapshots[k])) + break; + } + if (k >= n_newer_snapshots) /* not there anymore */ + SetInvalidVirtualTransactionId(old_snapshots[j]); + } + pfree(newer_snapshots); + } + + if (VirtualTransactionIdIsValid(old_snapshots[i])) + VirtualXactLock(old_snapshots[i], true); + } + + partRelation = heap_open(PartitionRelationId, RowExclusiveLock); + + tuple = SearchSysCacheCopy1(PARTSRELID, + ObjectIdGetDatum(attachoid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", attachoid); + + ((Form_pg_partition) GETSTRUCT(tuple))->partvalid = true; + + CatalogTupleUpdate(partRelation, &tuple->t_self, tuple); + + heap_close(partRelation, RowExclusiveLock); + + /* + * Invalidate relcache entries for the partitioned table so that new + * queries pickup the new partition. + */ + CacheInvalidateRelcacheByRelid(relid); + + /* + * Last thing to do is release the session-level lock on the parent table. + */ + UnlockRelationIdForSession(&partedrelid, ShareUpdateExclusiveLock); + UnlockRelationIdForSession(&attachrelid, AccessExclusiveLock); + } + /* keep our lock until commit */ heap_close(attachrel, NoLock); @@ -14770,12 +14945,12 @@ CloneRowTriggersToPartition(Relation parent, Relation partition) } /* - * ALTER TABLE DETACH PARTITION + * ALTER TABLE DETACH PARTITION [CONCURRENTLY] * * Return the address of the relation that is no longer a partition of rel. */ static ObjectAddress -ATExecDetachPartition(Relation rel, RangeVar *name) +ATExecDetachPartition(Relation rel, PartitionCmd *cmd) { Relation partRel, pgclass, @@ -14791,6 +14966,8 @@ ATExecDetachPartition(Relation rel, RangeVar *name) List *indexes; ListCell *cell; + if (cmd->concurrently) + elog(NOTICE, "Concurrently"); /* * We must lock the default partition, because detaching this partition @@ -14801,7 +14978,7 @@ ATExecDetachPartition(Relation rel, RangeVar *name) if (OidIsValid(defaultPartOid)) LockRelationOid(defaultPartOid, AccessExclusiveLock); - partRel = heap_openrv(name, AccessShareLock); + partRel = heap_openrv(cmd->name, AccessShareLock); /* Update pg_class tuple */ pgclass = heap_open(RelationRelationId, RowExclusiveLock); @@ -15017,7 +15194,7 @@ RangeVarCallbackForAttachIndex(const RangeVar *rv, Oid relOid, Oid oldRelOid, * ALTER INDEX i1 ATTACH PARTITION i2 */ static ObjectAddress -ATExecAttachPartitionIdx(List **wqueue, Relation parentIdx, RangeVar *name) +ATExecAttachPartitionIdx(List **wqueue, Relation parentIdx, PartitionCmd *cmd) { Relation partIdx; Relation partTbl; @@ -15027,6 +15204,12 @@ ATExecAttachPartitionIdx(List **wqueue, Relation parentIdx, RangeVar *name) Oid currParent; struct AttachIndexCallbackState state; + /* ATTACH PARTITION CONCURRENTLY is only supported on tables */ + if (cmd->concurrently) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot attach index on partitioned index concurrently"))); + /* * We need to obtain lock on the index 'name' to modify it, but we also * need to read its owning table's tuple descriptor -- so we need to lock @@ -15038,14 +15221,14 @@ ATExecAttachPartitionIdx(List **wqueue, Relation parentIdx, RangeVar *name) state.parentTblOid = parentIdx->rd_index->indrelid; state.lockedParentTbl = false; partIdxId = - RangeVarGetRelidExtended(name, AccessExclusiveLock, 0, + RangeVarGetRelidExtended(cmd->name, AccessExclusiveLock, 0, RangeVarCallbackForAttachIndex, (void *) &state); /* Not there? */ if (!OidIsValid(partIdxId)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), - errmsg("index \"%s\" does not exist", name->relname))); + errmsg("index \"%s\" does not exist", cmd->name->relname))); /* no deadlock risk: RangeVarGetRelidExtended already acquired the lock */ partIdx = relation_open(partIdxId, AccessExclusiveLock); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 7c8220cf65..376d7d0d24 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -4575,6 +4575,7 @@ _copyPartitionCmd(const PartitionCmd *from) COPY_NODE_FIELD(name); COPY_NODE_FIELD(bound); + COPY_SCALAR_FIELD(concurrently); return newnode; } diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 378f2facb8..5fc47fefdc 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -2885,6 +2885,7 @@ _equalPartitionCmd(const PartitionCmd *a, const PartitionCmd *b) { COMPARE_NODE_FIELD(name); COMPARE_NODE_FIELD(bound); + COMPARE_SCALAR_FIELD(concurrently); return true; } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index fd06da98b9..8b800151d7 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -6928,6 +6928,10 @@ apply_scanjoin_target_to_paths(PlannerInfo *root, int nappinfos; List *child_scanjoin_targets = NIL; + /* Skip invalid partitions */ + if (!child_rel) + continue; + /* Translate scan/join targets for this child. */ appinfos = find_appinfos_by_relids(root, child_rel->relids, &nappinfos); diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 3896617760..a8ae993320 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -107,12 +107,13 @@ static void expand_partitioned_rtentry_recurse(PlannerInfo *root, RangeTblEntry *parentrte, Index parentRTindex, Relation parentrel, PlanRowMark *top_parentrc, LOCKMODE lockmode, - List **appinfos); + int partidx, List **appinfos); static void expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte, Index parentRTindex, Relation parentrel, PlanRowMark *top_parentrc, Relation childrel, - List **appinfos, RangeTblEntry **childrte_p, + int partidx, List **appinfos, + RangeTblEntry **childrte_p, Index *childRTindex_p); static void make_inh_translation_list(Relation oldrelation, Relation newrelation, @@ -1642,7 +1643,7 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti) } expand_single_inheritance_child(root, rte, rti, oldrelation, oldrc, - newrelation, + newrelation, -1, &appinfos, &childrte, &childRTindex); @@ -1725,7 +1726,7 @@ expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti) * partition key columns of all the partitioned tables. */ expand_partitioned_rtentry_recurse(root, rte, rti, partrel, partrc, - lockmode, &root->append_rel_list); + lockmode, -1, &root->append_rel_list); heap_close(partrel, NoLock); } @@ -1741,7 +1742,7 @@ static void expand_partitioned_rtentry_recurse(PlannerInfo *root, RangeTblEntry *parentrte, Index parentRTindex, Relation parentrel, PlanRowMark *top_parentrc, LOCKMODE lockmode, - List **appinfos) + int partidx, List **appinfos) { int i; RangeTblEntry *childrte; @@ -1766,14 +1767,14 @@ expand_partitioned_rtentry_recurse(PlannerInfo *root, RangeTblEntry *parentrte, /* First expand the partitioned table itself. */ expand_single_inheritance_child(root, parentrte, parentRTindex, parentrel, - top_parentrc, parentrel, + top_parentrc, parentrel, partidx, appinfos, &childrte, &childRTindex); /* - * If the partitioned table has no partitions, treat this as the + * If the partitioned table has no valid partitions, treat this as the * non-inheritance case. */ - if (partdesc->nparts == 0) + if (partdesc->nvalidparts == 0) { parentrte->inh = false; return; @@ -1781,20 +1782,24 @@ expand_partitioned_rtentry_recurse(PlannerInfo *root, RangeTblEntry *parentrte, for (i = 0; i < partdesc->nparts; i++) { - Oid childOID = partdesc->oids[i]; Relation childrel; - childrel = heap_open(childOID, lockmode); + /* Skip invalid partitions */ + if (!partdesc->is_valid[i]) + continue; + + childrel = heap_open(partdesc->oids[i], lockmode); expand_single_inheritance_child(root, parentrte, parentRTindex, parentrel, top_parentrc, childrel, - appinfos, &childrte, &childRTindex); + i, appinfos, &childrte, + &childRTindex); /* If this child is itself partitioned, recurse */ if (childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) expand_partitioned_rtentry_recurse(root, childrte, childRTindex, childrel, top_parentrc, lockmode, - appinfos); + i, appinfos); /* Close child relation, but keep locks */ heap_close(childrel, NoLock); @@ -1826,7 +1831,8 @@ static void expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte, Index parentRTindex, Relation parentrel, PlanRowMark *top_parentrc, Relation childrel, - List **appinfos, RangeTblEntry **childrte_p, + int partidx, List **appinfos, + RangeTblEntry **childrte_p, Index *childRTindex_p) { Query *parse = root->parse; @@ -1878,6 +1884,7 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte, appinfo->child_relid = childRTindex; appinfo->parent_reltype = parentrel->rd_rel->reltype; appinfo->child_reltype = childrel->rd_rel->reltype; + appinfo->partidx = partidx; make_inh_translation_list(parentrel, childrel, childRTindex, &appinfo->translated_vars); appinfo->parent_reloid = parentOID; diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 8369e3ad62..b95b4fe9d6 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -1911,6 +1911,9 @@ set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel, Assert(partdesc != NULL && rel->part_scheme != NULL); rel->boundinfo = partition_bounds_copy(partdesc->boundinfo, partkey); rel->nparts = partdesc->nparts; + rel->nvalidparts = partdesc->nvalidparts; + rel->part_valid = (bool *) palloc(rel->nparts * sizeof(bool)); + memcpy(rel->part_valid, partdesc->is_valid, rel->nparts * sizeof(bool)); set_baserel_partition_key_exprs(relation, rel); rel->partition_qual = RelationGetPartitionQual(relation); } diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index c69740eda6..98a54b40c5 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -190,9 +190,11 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->has_eclass_joins = false; rel->part_scheme = NULL; rel->nparts = 0; + rel->nvalidparts = 0; rel->boundinfo = NULL; rel->partition_qual = NIL; rel->part_rels = NULL; + rel->part_valid = NULL; rel->partexprs = NULL; rel->nullable_partexprs = NULL; rel->partitioned_child_rels = NIL; @@ -269,21 +271,22 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) { ListCell *l; int nparts = rel->nparts; - int cnt_parts = 0; if (nparts > 0) rel->part_rels = (RelOptInfo **) - palloc(sizeof(RelOptInfo *) * nparts); + palloc0(sizeof(RelOptInfo *) * nparts); foreach(l, root->append_rel_list) { AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); RelOptInfo *childrel; + int partidx; /* append_rel_list contains all append rels; ignore others */ if (appinfo->parent_relid != relid) continue; + partidx = appinfo->partidx; childrel = build_simple_rel(root, appinfo->child_relid, rel); @@ -291,18 +294,10 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) if (!rel->part_scheme) continue; - /* - * The order of partition OIDs in append_rel_list is the same as - * the order in the PartitionDesc, so the order of part_rels will - * also match the PartitionDesc. See expand_partitioned_rtentry. - */ - Assert(cnt_parts < nparts); - rel->part_rels[cnt_parts] = childrel; - cnt_parts++; + /* Record the RelOptInfo of this partition */ + rel->part_rels[partidx] = childrel; } - /* We should have seen all the child partitions. */ - Assert(cnt_parts == nparts); } return rel; diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 87f5e95827..b06f6f1216 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -2012,28 +2012,30 @@ alter_table_cmds: ; partition_cmd: - /* ALTER TABLE ATTACH PARTITION FOR VALUES */ - ATTACH PARTITION qualified_name PartitionBoundSpec + /* ALTER TABLE ATTACH PARTITION [CONCURRENTLY] FOR VALUES */ + ATTACH PARTITION opt_concurrently qualified_name PartitionBoundSpec { AlterTableCmd *n = makeNode(AlterTableCmd); PartitionCmd *cmd = makeNode(PartitionCmd); n->subtype = AT_AttachPartition; - cmd->name = $3; - cmd->bound = $4; + cmd->name = $4; + cmd->bound = $5; + cmd->concurrently = $3; n->def = (Node *) cmd; $$ = (Node *) n; } - /* ALTER TABLE DETACH PARTITION */ - | DETACH PARTITION qualified_name + /* ALTER TABLE DETACH PARTITION [CONCURRENTLY] */ + | DETACH PARTITION opt_concurrently qualified_name { AlterTableCmd *n = makeNode(AlterTableCmd); PartitionCmd *cmd = makeNode(PartitionCmd); n->subtype = AT_DetachPartition; - cmd->name = $3; + cmd->name = $4; cmd->bound = NULL; + cmd->concurrently = $3; n->def = (Node *) cmd; $$ = (Node *) n; diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c index 752810d0e4..32e70ca580 100644 --- a/src/backend/partitioning/partprune.c +++ b/src/backend/partitioning/partprune.c @@ -602,7 +602,11 @@ prune_append_rel_partitions(RelOptInfo *rel) i = -1; result = NULL; while ((i = bms_next_member(partindexes, i)) >= 0) - result = bms_add_member(result, rel->part_rels[i]->relid); + { + /* Skip invalid partitions */ + if (rel->part_rels[i]) + result = bms_add_member(result, rel->part_rels[i]->relid); + } return result; } diff --git a/src/backend/utils/cache/partcache.c b/src/backend/utils/cache/partcache.c index 51a21c4793..dcb3de9959 100644 --- a/src/backend/utils/cache/partcache.c +++ b/src/backend/utils/cache/partcache.c @@ -264,10 +264,13 @@ RelationBuildPartitionDesc(Relation rel) { List *partoids; Oid *oids = NULL; + bool *isvalid = NULL; List *boundspecs = NIL; + List *isvalidlist = NIL; ListCell *cell; int i, - nparts; + nparts, + nvalidparts; PartitionKey key = RelationGetPartitionKey(rel); PartitionDesc result; MemoryContext oldcxt; @@ -304,7 +307,9 @@ RelationBuildPartitionDesc(Relation rel) partoids = NIL; while ((partTuple = systable_getnext(scan)) != NULL) { - Oid partrelid = ((Form_pg_partition) GETSTRUCT(partTuple))->partrelid; + Form_pg_partition partform = (Form_pg_partition) GETSTRUCT(partTuple); + Oid partrelid = partform->partrelid; + bool valid = partform->partvalid; HeapTuple tuple; Datum datum; bool isnull; @@ -351,6 +356,7 @@ RelationBuildPartitionDesc(Relation rel) boundspecs = lappend(boundspecs, boundspec); partoids = lappend_oid(partoids, partrelid); + isvalidlist = lappend_int(isvalidlist, valid); /* XXX int List to store bools? */ ReleaseSysCache(tuple); } @@ -359,6 +365,7 @@ RelationBuildPartitionDesc(Relation rel) heap_close(pgpart, AccessShareLock); nparts = list_length(partoids); + nvalidparts = 0; if (nparts > 0) { @@ -367,6 +374,16 @@ RelationBuildPartitionDesc(Relation rel) foreach(cell, partoids) oids[i++] = lfirst_oid(cell); + isvalid = (bool *) palloc(nparts * sizeof(bool)); + i = 0; + foreach(cell, isvalidlist) + { + isvalid[i] = (bool) lfirst_int(cell); + if (isvalid[i]) + nvalidparts++; + i++; + } + /* Convert from node to the internal representation */ if (key->strategy == PARTITION_STRATEGY_HASH) { @@ -604,6 +621,7 @@ RelationBuildPartitionDesc(Relation rel) result = (PartitionDescData *) palloc0(sizeof(PartitionDescData)); result->nparts = nparts; + result->nvalidparts = nvalidparts; if (nparts > 0) { PartitionBoundInfo boundinfo; @@ -612,6 +630,7 @@ RelationBuildPartitionDesc(Relation rel) result->oids = (Oid *) palloc0(nparts * sizeof(Oid)); result->is_leaf = (bool *) palloc(nparts * sizeof(bool)); + result->is_valid = (bool *) palloc(nparts * sizeof(bool)); boundinfo = (PartitionBoundInfoData *) palloc0(sizeof(PartitionBoundInfoData)); @@ -807,6 +826,7 @@ RelationBuildPartitionDesc(Relation rel) /* Record if the partition is a leaf partition */ result->is_leaf[index] = (get_rel_relkind(oids[i]) != RELKIND_PARTITIONED_TABLE); + result->is_valid[index] = isvalid[i]; } pfree(mapping); diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 6125421d39..feca620cff 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -1024,12 +1024,16 @@ equalPartitionDescs(PartitionKey key, PartitionDesc partdesc1, /* * Same oids? If the partitioning structure did not change, that is, * no partitions were added or removed to the relation, the oids array - * should still match element-by-element. + * should still match element-by-element. The is_valid flag must also + * match. */ for (i = 0; i < partdesc1->nparts; i++) { if (partdesc1->oids[i] != partdesc2->oids[i]) return false; + + if (partdesc1->is_valid[i] != partdesc2->is_valid[i]) + return false; } /* diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 7fde1114a0..518618ee92 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -3008,14 +3008,15 @@ describeOneTableDetails(const char *schemaname, printfPQExpBuffer(&buf, "SELECT c.oid::pg_catalog.regclass," " pg_catalog.pg_get_expr(p.partbound, p.partrelid)," - " c.relkind" + " c.relkind," + " p.partvalid" " FROM pg_catalog.pg_class c, pg_catalog.pg_partition p" " WHERE c.oid=p.partrelid AND p.parentrelid = '%s'" " ORDER BY pg_catalog.pg_get_expr(p.partbound, p.partrelid) = 'DEFAULT'," " c.oid::pg_catalog.regclass::pg_catalog.text;", oid); else printfPQExpBuffer(&buf, - "SELECT c.oid::pg_catalog.regclass,NULL,c.relkind" + "SELECT c.oid::pg_catalog.regclass,NULL,c.relkind,true" " FROM pg_catalog.pg_class c, pg_catalog.pg_inherits i" " WHERE c.oid=i.inhrelid AND i.inhparent = '%s'" " ORDER BY c.oid::pg_catalog.regclass::pg_catalog.text;", oid); @@ -3024,7 +3025,7 @@ describeOneTableDetails(const char *schemaname, printfPQExpBuffer(&buf, "SELECT c.oid::pg_catalog.regclass," " pg_catalog.pg_get_expr(c.relpartbound, c.oid)," - " c.relkind" + " c.relkind,true" " FROM pg_catalog.pg_class c, pg_catalog.pg_inherits i" " WHERE c.oid=i.inhrelid AND i.inhparent = '%s'" " ORDER BY pg_catalog.pg_get_expr(c.relpartbound, c.oid) = 'DEFAULT'," @@ -3092,20 +3093,26 @@ describeOneTableDetails(const char *schemaname, else { char *partitioned_note; + char *validity; if (*PQgetvalue(result, i, 2) == RELKIND_PARTITIONED_TABLE) partitioned_note = ", PARTITIONED"; else partitioned_note = ""; + if (strcmp(PQgetvalue(result, i, 3), "t") == 0) + validity = ""; + else + validity = " INVALID"; + if (i == 0) - printfPQExpBuffer(&buf, "%s: %s %s%s", + printfPQExpBuffer(&buf, "%s: %s %s%s%s", ct, PQgetvalue(result, i, 0), PQgetvalue(result, i, 1), - partitioned_note); + partitioned_note, validity); else - printfPQExpBuffer(&buf, "%*s %s %s%s", + printfPQExpBuffer(&buf, "%*s %s %s%s%s", ctw, "", PQgetvalue(result, i, 0), PQgetvalue(result, i, 1), - partitioned_note); + partitioned_note, validity); } if (i < tuples - 1) appendPQExpBufferChar(&buf, ','); diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h index 8cbbd227f4..e4448bbc56 100644 --- a/src/include/catalog/partition.h +++ b/src/include/catalog/partition.h @@ -27,10 +27,17 @@ typedef struct PartitionDescData { int nparts; /* Number of partitions */ + int nvalidparts; /* Number of partitions which are valid */ Oid *oids; /* Array of length 'nparts' containing * partition OIDs in order of the their bounds */ bool *is_leaf; /* Array of 'nparts' elements storing whether * a partition is a leaf partition or not */ + bool *is_valid; /* Array of 'nparts' elements storing whether + * a partition is ready for use by queries. + * When not valid a partition is being + * concurrently attached, or a concurrent + * attach failed. XXX is it worth combining + * these two arrays into flags? */ PartitionBoundInfo boundinfo; /* collection of partition bounds */ } PartitionDescData; diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 61b31fb2bb..1b85beb52b 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -857,6 +857,7 @@ typedef struct PartitionCmd NodeTag type; RangeVar *name; /* name of partition to attach/detach */ PartitionBoundSpec *bound; /* FOR VALUES, if attaching */ + bool concurrently; /* true if CONCURRENTLY keyword was used */ } PartitionCmd; /**************************************************************************** @@ -1813,6 +1814,7 @@ typedef struct AlterTableCmd /* one subcommand of an ALTER TABLE */ * constraint, or parent table */ DropBehavior behavior; /* RESTRICT or CASCADE for DROP cases */ bool missing_ok; /* skip error if missing? */ + bool concurrently; /* reduced lock level */ } AlterTableCmd; diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 41caf873fb..f975a33d5b 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -693,10 +693,14 @@ typedef struct RelOptInfo /* used for partitioned relations */ PartitionScheme part_scheme; /* Partitioning scheme. */ int nparts; /* number of partitions */ + int nvalidparts; /* number of valid partitions */ struct PartitionBoundInfoData *boundinfo; /* Partition bounds */ List *partition_qual; /* partition constraint */ struct RelOptInfo **part_rels; /* Array of RelOptInfos of partitions, * stored in the same order of bounds */ + bool *part_valid; /* Array of 'nparts' elements set to true if + * the given partition's ATTACH is complete + * and is not concurrently being DETACHed */ List **partexprs; /* Non-nullable partition key expressions. */ List **nullable_partexprs; /* Nullable partition key expressions. */ List *partitioned_child_rels; /* List of RT indexes. */ @@ -2138,6 +2142,13 @@ typedef struct AppendRelInfo Oid parent_reltype; /* OID of parent's composite type */ Oid child_reltype; /* OID of child's composite type */ + /* + * Index into PartitionDesc arrays of this partition, or -1 if the + * AppendRelInfo belongs to a inheritance child table or if it + * belongs to the top-level partitioned table. + */ + int partidx; + /* * The N'th element of this list is a Var or expression representing the * child column corresponding to the N'th column of the parent. This is -- 2.16.2.windows.1