From 4e760e497711d0e145500c77334b4ef643762dba Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Wed, 14 Nov 2018 12:15:44 -0500 Subject: [PATCH v4 1/4] Teach RelationBuildPartitionDesc to cope with concurrent ATTACH. If a partition is added concurrently, we might see it in the list of children even though the system cache doesn't know about the updated pg_class value yet; add logic to handle that case. This does not guarantee that the results produced by RelationBuildPartitionDesc will be stable from one call to the next; it only tries to make sure that they will be sane. --- src/backend/partitioning/partdesc.c | 94 +++++++++++++++++++++++------ 1 file changed, 76 insertions(+), 18 deletions(-) diff --git a/src/backend/partitioning/partdesc.c b/src/backend/partitioning/partdesc.c index 8a4b63aa26..e89d773261 100644 --- a/src/backend/partitioning/partdesc.c +++ b/src/backend/partitioning/partdesc.c @@ -14,11 +14,19 @@ #include "postgres.h" +#include "access/genam.h" +#include "access/htup_details.h" +#include "access/table.h" +#include "catalog/indexing.h" #include "catalog/partition.h" #include "catalog/pg_inherits.h" #include "partitioning/partbounds.h" #include "partitioning/partdesc.h" +#include "storage/bufmgr.h" +#include "storage/sinval.h" #include "utils/builtins.h" +#include "utils/inval.h" +#include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/rel.h" @@ -47,43 +55,93 @@ RelationBuildPartitionDesc(Relation rel) MemoryContext oldcxt; int *mapping; - /* Get partition oids from pg_inherits */ + /* + * Get partition oids from pg_inherits. This uses a single snapshot to + * fetch the list of children, so while more children may be getting + * added concurrently, whatever this function returns will be accurate + * as of some well-defined point in time. + */ inhoids = find_inheritance_children(RelationGetRelid(rel), NoLock); nparts = list_length(inhoids); + /* Allocate arrays for OIDs and boundspecs. */ if (nparts > 0) { oids = palloc(nparts * sizeof(Oid)); boundspecs = palloc(nparts * sizeof(PartitionBoundSpec *)); } - /* Collect bound spec nodes for each partition */ + /* Collect bound spec nodes for each partition. */ i = 0; foreach(cell, inhoids) { Oid inhrelid = lfirst_oid(cell); HeapTuple tuple; - Datum datum; - bool isnull; - PartitionBoundSpec *boundspec; + PartitionBoundSpec *boundspec = NULL; + /* Try fetching the tuple from the catcache, for speed. */ tuple = SearchSysCache1(RELOID, inhrelid); - if (!HeapTupleIsValid(tuple)) - elog(ERROR, "cache lookup failed for relation %u", inhrelid); - - datum = SysCacheGetAttr(RELOID, tuple, - Anum_pg_class_relpartbound, - &isnull); - if (isnull) - elog(ERROR, "null relpartbound for relation %u", inhrelid); - boundspec = stringToNode(TextDatumGetCString(datum)); + if (HeapTupleIsValid(tuple)) + { + Datum datum; + bool isnull; + + datum = SysCacheGetAttr(RELOID, tuple, + Anum_pg_class_relpartbound, + &isnull); + if (!isnull) + boundspec = stringToNode(TextDatumGetCString(datum)); + ReleaseSysCache(tuple); + } + + /* + * The system cache may be out of date; if so, we may find no pg_class + * tuple or an old one where relpartbound is NULL. In that case, try + * the table directly. We can't just AcceptInvalidationMessages() and + * retry the system cache lookup because it's possible that a + * concurrent ATTACH PARTITION operation has removed itself to the + * ProcArray but yet added invalidation messages to the shared queue; + * InvalidateSystemCaches() would work, but seems excessive. + * + * Note that this algorithm assumes that PartitionBoundSpec we manage + * to fetch is the right one -- so this is only good enough for + * concurrent ATTACH PARTITION, not concurrent DETACH PARTITION + * or some hypothetical operation that changes the partition bounds. + */ + if (boundspec == NULL) + { + Relation pg_class; + SysScanDesc scan; + ScanKeyData key[1]; + Datum datum; + bool isnull; + + pg_class = table_open(RelationRelationId, AccessShareLock); + ScanKeyInit(&key[0], + Anum_pg_class_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(inhrelid)); + scan = systable_beginscan(pg_class, ClassOidIndexId, true, + NULL, 1, key); + tuple = systable_getnext(scan); + datum = heap_getattr(tuple, Anum_pg_class_relpartbound, + RelationGetDescr(pg_class), &isnull); + if (!isnull) + boundspec = stringToNode(TextDatumGetCString(datum)); + systable_endscan(scan); + table_close(pg_class, AccessShareLock); + } + + /* Sanity checks. */ + if (!boundspec) + elog(ERROR, "missing relpartbound for relation %u", inhrelid); if (!IsA(boundspec, PartitionBoundSpec)) elog(ERROR, "invalid relpartbound for relation %u", inhrelid); /* - * Sanity check: If the PartitionBoundSpec says this is the default - * partition, its OID should correspond to whatever's stored in - * pg_partitioned_table.partdefid; if not, the catalog is corrupt. + * If the PartitionBoundSpec says this is the default partition, its + * OID should match pg_partitioned_table.partdefid; if not, the + * catalog is corrupt. */ if (boundspec->is_default) { @@ -95,10 +153,10 @@ RelationBuildPartitionDesc(Relation rel) inhrelid, partdefid); } + /* Save results. */ oids[i] = inhrelid; boundspecs[i] = boundspec; ++i; - ReleaseSysCache(tuple); } /* Now build the actual relcache partition descriptor */ -- 2.17.2 (Apple Git-113)