From e18d275c59d949cfd04f4a3e2e5a52b1802f27ee Mon Sep 17 00:00:00 2001 From: "dgrowley@gmail.com" Date: Sun, 2 Dec 2018 12:25:56 +1300 Subject: [PATCH v1] Allow lock acquisitions for partitions to be delayed For partitions we can get away with only locking the partition when it's about to be accessed for the first time during query execution. When many partitions are run-time pruned, then can increase query performance significantly. --- src/backend/catalog/dependency.c | 1 + src/backend/commands/createas.c | 1 + src/backend/executor/execUtils.c | 20 +++++++++++--------- src/backend/nodes/copyfuncs.c | 1 + src/backend/nodes/equalfuncs.c | 1 + src/backend/nodes/outfuncs.c | 1 + src/backend/nodes/readfuncs.c | 1 + src/backend/optimizer/plan/planner.c | 2 ++ src/backend/optimizer/prep/prepunion.c | 10 ++++++++++ src/backend/parser/parse_relation.c | 2 ++ src/backend/replication/logical/worker.c | 1 + src/backend/rewrite/rewriteHandler.c | 1 + src/backend/utils/adt/ri_triggers.c | 2 ++ src/backend/utils/adt/ruleutils.c | 3 +++ src/backend/utils/cache/plancache.c | 15 +++++++++++---- src/include/nodes/parsenodes.h | 2 ++ 16 files changed, 51 insertions(+), 13 deletions(-) diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 7dfa3278a5..fec0c363d9 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -1415,6 +1415,7 @@ recordDependencyOnSingleRelExpr(const ObjectAddress *depender, rte.rtekind = RTE_RELATION; rte.relid = relId; rte.relkind = RELKIND_RELATION; /* no need for exactness here */ + rte.delaylock = false; rte.rellockmode = AccessShareLock; context.rtables = list_make1(list_make1(&rte)); diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index d01b258b65..146d01ad49 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -515,6 +515,7 @@ intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo) rte->rtekind = RTE_RELATION; rte->relid = intoRelationAddr.objectId; rte->relkind = relkind; + rte->delaylock = false; rte->rellockmode = RowExclusiveLock; rte->requiredPerms = ACL_INSERT; diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 2a47abc02e..4f2dc95eea 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -770,14 +770,15 @@ ExecGetRangeTableRelation(EState *estate, Index rti) Assert(rte->rtekind == RTE_RELATION); - if (!IsParallelWorker()) + if (!rte->delaylock && !IsParallelWorker()) { /* - * In a normal query, we should already have the appropriate lock, - * but verify that through an Assert. Since there's already an - * Assert inside heap_open that insists on holding some lock, it - * seems sufficient to check this only when rellockmode is higher - * than the minimum. + * In a normal query, unless the planner set the delaylock flag, + * we should already have the appropriate lock, but verify that + * through an Assert. Since there's already an Assert inside + * heap_open that insists on holding some lock, it seems + * sufficient to check this only when rellockmode is higher than + * the minimum. */ rel = heap_open(rte->relid, NoLock); Assert(rte->rellockmode == AccessShareLock || @@ -786,9 +787,10 @@ ExecGetRangeTableRelation(EState *estate, Index rti) else { /* - * If we are a parallel worker, we need to obtain our own local - * lock on the relation. This ensures sane behavior in case the - * parent process exits before we do. + * If we are a parallel worker or delaylock is set, we need to + * obtain a lock on the relation. For parallel workers, this + * ensures sane behavior in case the parent process exits before + * we do. */ rel = heap_open(rte->relid, rte->rellockmode); } diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index db49968409..e7c02350a9 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -2353,6 +2353,7 @@ _copyRangeTblEntry(const RangeTblEntry *from) COPY_SCALAR_FIELD(rtekind); COPY_SCALAR_FIELD(relid); COPY_SCALAR_FIELD(relkind); + COPY_SCALAR_FIELD(delaylock); COPY_SCALAR_FIELD(rellockmode); COPY_NODE_FIELD(tablesample); COPY_NODE_FIELD(subquery); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 3a084b4d1f..9a796ad9fe 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -2629,6 +2629,7 @@ _equalRangeTblEntry(const RangeTblEntry *a, const RangeTblEntry *b) COMPARE_SCALAR_FIELD(rtekind); COMPARE_SCALAR_FIELD(relid); COMPARE_SCALAR_FIELD(relkind); + COMPARE_SCALAR_FIELD(delaylock); COMPARE_SCALAR_FIELD(rellockmode); COMPARE_NODE_FIELD(tablesample); COMPARE_NODE_FIELD(subquery); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index f0c396530d..f45d07d542 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -3126,6 +3126,7 @@ _outRangeTblEntry(StringInfo str, const RangeTblEntry *node) case RTE_RELATION: WRITE_OID_FIELD(relid); WRITE_CHAR_FIELD(relkind); + WRITE_BOOL_FIELD(delaylock); WRITE_INT_FIELD(rellockmode); WRITE_NODE_FIELD(tablesample); break; diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index e117867de5..edcbb66ad6 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1361,6 +1361,7 @@ _readRangeTblEntry(void) case RTE_RELATION: READ_OID_FIELD(relid); READ_CHAR_FIELD(relkind); + READ_BOOL_FIELD(delaylock); READ_INT_FIELD(rellockmode); READ_NODE_FIELD(tablesample); break; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index c729a99f8b..a3ee6f57de 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -5995,6 +5995,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid) rte->rtekind = RTE_RELATION; rte->relid = tableOid; rte->relkind = RELKIND_RELATION; /* Don't be too picky. */ + rte->delaylock = false; rte->rellockmode = AccessShareLock; rte->lateral = false; rte->inh = false; @@ -6118,6 +6119,7 @@ plan_create_index_workers(Oid tableOid, Oid indexOid) rte->rtekind = RTE_RELATION; rte->relid = tableOid; rte->relkind = RELKIND_RELATION; /* Don't be too picky. */ + rte->delaylock = false; rte->rellockmode = AccessShareLock; rte->lateral = false; rte->inh = true; diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 2a1c1cb2e1..91cd45ed01 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -1784,6 +1784,16 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte, *childrte_p = childrte; childrte->relid = childOID; childrte->relkind = childrel->rd_rel->relkind; + + /* + * For partitions, we've no need to obtain the lock on the relation during + * query execution until the partition is first required. This can + * drastically reduce the number of partitions we must lock when many + * partitions are run-time pruned. + */ + childrte->delaylock = (childOID != parentOID && + parentrte->relkind == RELKIND_PARTITIONED_TABLE); + /* A partitioned child will need to be expanded further. */ if (childOID != parentOID && childrte->relkind == RELKIND_PARTITIONED_TABLE) diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c index 378cbcbf79..0e131a4796 100644 --- a/src/backend/parser/parse_relation.c +++ b/src/backend/parser/parse_relation.c @@ -1224,6 +1224,7 @@ addRangeTableEntry(ParseState *pstate, rel = parserOpenTable(pstate, relation, lockmode); rte->relid = RelationGetRelid(rel); rte->relkind = rel->rd_rel->relkind; + rte->delaylock = false; rte->rellockmode = lockmode; /* @@ -1302,6 +1303,7 @@ addRangeTableEntryForRelation(ParseState *pstate, rte->alias = alias; rte->relid = RelationGetRelid(rel); rte->relkind = rel->rd_rel->relkind; + rte->delaylock = false; rte->rellockmode = lockmode; /* diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 8d5e0946c4..5c8fece714 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -199,6 +199,7 @@ create_estate_for_relation(LogicalRepRelMapEntry *rel) rte->rtekind = RTE_RELATION; rte->relid = RelationGetRelid(rel->localrel); rte->relkind = rel->localrel->rd_rel->relkind; + rte->delaylock = false; rte->rellockmode = AccessShareLock; ExecInitRangeTable(estate, list_make1(rte)); diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index 43815d26ff..945a43af92 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -1595,6 +1595,7 @@ ApplyRetrieveRule(Query *parsetree, /* Clear fields that should not be set in a subquery RTE */ rte->relid = InvalidOid; rte->relkind = 0; + rte->delaylock = false; rte->rellockmode = 0; rte->tablesample = NULL; rte->inh = false; /* must not be set for a subquery */ diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index cdda860e73..fc61e8ab72 100644 --- a/src/backend/utils/adt/ri_triggers.c +++ b/src/backend/utils/adt/ri_triggers.c @@ -1730,6 +1730,7 @@ RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel) pkrte->rtekind = RTE_RELATION; pkrte->relid = RelationGetRelid(pk_rel); pkrte->relkind = pk_rel->rd_rel->relkind; + pkrte->delaylock = false; pkrte->rellockmode = AccessShareLock; pkrte->requiredPerms = ACL_SELECT; @@ -1737,6 +1738,7 @@ RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel) fkrte->rtekind = RTE_RELATION; fkrte->relid = RelationGetRelid(fk_rel); fkrte->relkind = fk_rel->rd_rel->relkind; + fkrte->delaylock = false; fkrte->rellockmode = AccessShareLock; fkrte->requiredPerms = ACL_SELECT; diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 4857caecaa..f8c72c54d1 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -1002,6 +1002,7 @@ pg_get_triggerdef_worker(Oid trigid, bool pretty) oldrte->rtekind = RTE_RELATION; oldrte->relid = trigrec->tgrelid; oldrte->relkind = relkind; + oldrte->delaylock = false; oldrte->rellockmode = AccessShareLock; oldrte->alias = makeAlias("old", NIL); oldrte->eref = oldrte->alias; @@ -1013,6 +1014,7 @@ pg_get_triggerdef_worker(Oid trigid, bool pretty) newrte->rtekind = RTE_RELATION; newrte->relid = trigrec->tgrelid; newrte->relkind = relkind; + newrte->delaylock = false; newrte->rellockmode = AccessShareLock; newrte->alias = makeAlias("new", NIL); newrte->eref = newrte->alias; @@ -3209,6 +3211,7 @@ deparse_context_for(const char *aliasname, Oid relid) rte->rtekind = RTE_RELATION; rte->relid = relid; rte->relkind = RELKIND_RELATION; /* no need for exactness here */ + rte->delaylock = false; rte->rellockmode = AccessShareLock; rte->alias = makeAlias(aliasname, NIL); rte->eref = rte->alias; diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index 9ec81c5f36..a992b201ad 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -1519,10 +1519,17 @@ AcquireExecutorLocks(List *stmt_list, bool acquire) continue; /* - * Acquire the appropriate type of lock on each relation OID. Note - * that we don't actually try to open the rel, and hence will not - * fail if it's been dropped entirely --- we'll just transiently - * acquire a non-conflicting lock. + * delaylock relations will be locked only when they are going + * to be accessed for the first time. + */ + if (rte->delaylock) + continue; + + /* + * Otherwise, acquire the appropriate type of lock on the + * relation's OID. Note that we don't actually try to open the + * rel, and hence will not fail if it's been dropped entirely --- + * we'll just transiently acquire a non-conflicting lock. */ if (acquire) LockRelationOid(rte->relid, rte->rellockmode); diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index e5bdc1cec5..7a52d08a25 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -986,6 +986,8 @@ typedef struct RangeTblEntry */ Oid relid; /* OID of the relation */ char relkind; /* relation kind (see pg_class.relkind) */ + bool delaylock; /* delay locking until executor needs to + * access this relation */ int rellockmode; /* lock level that query requires on the rel */ struct TableSampleClause *tablesample; /* sampling info, or NULL */ -- 2.16.2.windows.1