"SMgrRelation hashtable corrupted" fix, phase 1 - Mailing list pgsql-patches
From | Tom Lane |
---|---|
Subject | "SMgrRelation hashtable corrupted" fix, phase 1 |
Date | |
Msg-id | 24452.1105387173@sss.pgh.pa.us Whole thread Raw |
List | pgsql-patches |
Attached is a patch that fixes the recently isolated "SMgrRelation hashtable corrupted" bug by allowing smgrclose() to happen safely without being called directly by the relcache. The reason this is an issue is that the relcache might contain a pointer to the SMgrRelation entry, and if that pointer isn't cleared then we have a dangling pointer, which on use will cause the "hashtable corrupted" failure if not worse things. The fix is for smgr.c to be aware of the possible existence of a long-lived pointer to each SMgrRelation entry, and to zero out any such pointer when smgrclose() is called. I believe this is considerably more robust than the previous approach. It's a bit uglier than before, but it's not quite tantamount to smgr depending on relcache --- smgr only knows there is a pointer, not what sort of structure it's stored in. Note that this problem doesn't exist pre-8.0 since the separate smgr cache table didn't exist in prior releases. Basically what's at stake here is keeping the relcache and smgr cache in step. The bulk of the patch consists of replacing the common patterns for smgropen() and smgrclose() calls associated with relcache entries with two new macros RelationOpenSmgr and RelationCloseSmgr, which is doubtless the way it should have been coded in the first place. Ignoring that, the changes reduce to the addition of smgrsetowner(), changing smgrclose() to clear the reference pointer if set, simplifying RelationCacheInvalidateEntry back to the way it was in 7.4, and simplifying LocalExecuteInvalidationMessage to treat relcache and smgr cache inval as independent operations. With the last change, there is no longer a reason for shared inval messages to handle relcache and smgr invals as a single operation; we can send those as two separate messages instead. I want to do this because it'll allow sizeof(SharedInvalidationMessage) to go back to 16 bytes as it was in prior releases instead of 24 bytes as it is now, saving a fair amount of shared memory space. I'm going to post that as a separate patch though, since it's logically separate. regards, tom lane *** src/backend/access/transam/xlogutils.c.orig Fri Dec 31 17:45:32 2004 --- src/backend/access/transam/xlogutils.c Mon Jan 10 13:41:19 2005 *************** *** 125,132 **** if (hentry == NULL) elog(PANIC, "_xl_remove_hash_entry: file was not found in cache"); ! if (rdesc->reldata.rd_smgr != NULL) ! smgrclose(rdesc->reldata.rd_smgr); memset(rdesc, 0, sizeof(XLogRelDesc)); memset(tpgc, 0, sizeof(FormData_pg_class)); --- 125,131 ---- if (hentry == NULL) elog(PANIC, "_xl_remove_hash_entry: file was not found in cache"); ! RelationCloseSmgr(&(rdesc->reldata)); memset(rdesc, 0, sizeof(XLogRelDesc)); memset(tpgc, 0, sizeof(FormData_pg_class)); *************** *** 233,239 **** hentry->rdesc = res; res->reldata.rd_targblock = InvalidBlockNumber; ! res->reldata.rd_smgr = smgropen(res->reldata.rd_node); /* * Create the target file if it doesn't already exist. This lets --- 232,239 ---- hentry->rdesc = res; res->reldata.rd_targblock = InvalidBlockNumber; ! res->reldata.rd_smgr = NULL; ! RelationOpenSmgr(&(res->reldata)); /* * Create the target file if it doesn't already exist. This lets *************** *** 278,284 **** rdesc = hentry->rdesc; ! if (rdesc->reldata.rd_smgr != NULL) ! smgrclose(rdesc->reldata.rd_smgr); ! rdesc->reldata.rd_smgr = NULL; } --- 278,282 ---- rdesc = hentry->rdesc; ! RelationCloseSmgr(&(rdesc->reldata)); } *** src/backend/catalog/heap.c.orig Fri Dec 31 17:45:32 2004 --- src/backend/catalog/heap.c Mon Jan 10 12:38:30 2005 *************** *** 322,328 **** if (create_storage) { Assert(rel->rd_smgr == NULL); ! rel->rd_smgr = smgropen(rel->rd_node); smgrcreate(rel->rd_smgr, rel->rd_istemp, false); } --- 322,328 ---- if (create_storage) { Assert(rel->rd_smgr == NULL); ! RelationOpenSmgr(rel); smgrcreate(rel->rd_smgr, rel->rd_istemp, false); } *************** *** 1186,1195 **** if (rel->rd_rel->relkind != RELKIND_VIEW && rel->rd_rel->relkind != RELKIND_COMPOSITE_TYPE) { ! if (rel->rd_smgr == NULL) ! rel->rd_smgr = smgropen(rel->rd_node); smgrscheduleunlink(rel->rd_smgr, rel->rd_istemp); - rel->rd_smgr = NULL; } /* --- 1186,1193 ---- if (rel->rd_rel->relkind != RELKIND_VIEW && rel->rd_rel->relkind != RELKIND_COMPOSITE_TYPE) { ! RelationOpenSmgr(rel); smgrscheduleunlink(rel->rd_smgr, rel->rd_istemp); } /* *** src/backend/catalog/index.c.orig Fri Dec 31 17:45:33 2004 --- src/backend/catalog/index.c Mon Jan 10 12:38:30 2005 *************** *** 780,790 **** */ FlushRelationBuffers(userIndexRelation, (BlockNumber) 0); ! if (userIndexRelation->rd_smgr == NULL) ! userIndexRelation->rd_smgr = smgropen(userIndexRelation->rd_node); smgrscheduleunlink(userIndexRelation->rd_smgr, userIndexRelation->rd_istemp); - userIndexRelation->rd_smgr = NULL; /* * Close and flush the index's relcache entry, to ensure relcache --- 780,788 ---- */ FlushRelationBuffers(userIndexRelation, (BlockNumber) 0); ! RelationOpenSmgr(userIndexRelation); smgrscheduleunlink(userIndexRelation->rd_smgr, userIndexRelation->rd_istemp); /* * Close and flush the index's relcache entry, to ensure relcache *************** *** 1133,1142 **** smgrclose(srel); /* schedule unlinking old relfilenode */ ! if (relation->rd_smgr == NULL) ! relation->rd_smgr = smgropen(relation->rd_node); smgrscheduleunlink(relation->rd_smgr, relation->rd_istemp); - relation->rd_smgr = NULL; /* update the pg_class row */ rd_rel->relfilenode = newrelfilenode; --- 1131,1138 ---- smgrclose(srel); /* schedule unlinking old relfilenode */ ! RelationOpenSmgr(relation); smgrscheduleunlink(relation->rd_smgr, relation->rd_istemp); /* update the pg_class row */ rd_rel->relfilenode = newrelfilenode; *** src/backend/commands/tablecmds.c.orig Fri Dec 31 17:45:37 2004 --- src/backend/commands/tablecmds.c Mon Jan 10 12:38:24 2005 *************** *** 5521,5530 **** copy_relation_data(rel, dstrel); /* schedule unlinking old physical file */ ! if (rel->rd_smgr == NULL) ! rel->rd_smgr = smgropen(rel->rd_node); smgrscheduleunlink(rel->rd_smgr, rel->rd_istemp); - rel->rd_smgr = NULL; /* * Now drop smgr references. The source was already dropped by --- 5521,5528 ---- copy_relation_data(rel, dstrel); /* schedule unlinking old physical file */ ! RelationOpenSmgr(rel); smgrscheduleunlink(rel->rd_smgr, rel->rd_istemp); /* * Now drop smgr references. The source was already dropped by *** src/backend/postmaster/bgwriter.c.orig Fri Dec 31 17:46:06 2004 --- src/backend/postmaster/bgwriter.c Mon Jan 10 13:55:08 2005 *************** *** 349,357 **** /* * After any checkpoint, close all smgr files. This is so we * won't hang onto smgr references to deleted files ! * indefinitely. (It is safe to do this because this process ! * does not have a relcache, and so no dangling references ! * could remain.) */ smgrcloseall(); --- 349,355 ---- /* * After any checkpoint, close all smgr files. This is so we * won't hang onto smgr references to deleted files ! * indefinitely. */ smgrcloseall(); *** src/backend/rewrite/rewriteDefine.c.orig Fri Dec 31 17:46:07 2004 --- src/backend/rewrite/rewriteDefine.c Mon Jan 10 12:38:18 2005 *************** *** 484,493 **** */ if (RelisBecomingView) { ! if (event_relation->rd_smgr == NULL) ! event_relation->rd_smgr = smgropen(event_relation->rd_node); smgrscheduleunlink(event_relation->rd_smgr, event_relation->rd_istemp); - event_relation->rd_smgr = NULL; } /* Close rel, but keep lock till commit... */ --- 484,491 ---- */ if (RelisBecomingView) { ! RelationOpenSmgr(event_relation); smgrscheduleunlink(event_relation->rd_smgr, event_relation->rd_istemp); } /* Close rel, but keep lock till commit... */ *** src/backend/storage/buffer/bufmgr.c.orig Mon Jan 3 13:49:41 2005 --- src/backend/storage/buffer/bufmgr.c Mon Jan 10 12:38:11 2005 *************** *** 131,138 **** isLocalBuf = reln->rd_istemp; /* Open it at the smgr level if not already done */ ! if (reln->rd_smgr == NULL) ! reln->rd_smgr = smgropen(reln->rd_node); /* Substitute proper block number if caller asked for P_NEW */ if (isExtend) --- 131,137 ---- isLocalBuf = reln->rd_istemp; /* Open it at the smgr level if not already done */ ! RelationOpenSmgr(reln); /* Substitute proper block number if caller asked for P_NEW */ if (isExtend) *************** *** 1130,1137 **** RelationGetNumberOfBlocks(Relation relation) { /* Open it at the smgr level if not already done */ ! if (relation->rd_smgr == NULL) ! relation->rd_smgr = smgropen(relation->rd_node); return smgrnblocks(relation->rd_smgr); } --- 1129,1135 ---- RelationGetNumberOfBlocks(Relation relation) { /* Open it at the smgr level if not already done */ ! RelationOpenSmgr(relation); return smgrnblocks(relation->rd_smgr); } *************** *** 1147,1154 **** RelationTruncate(Relation rel, BlockNumber nblocks) { /* Open it at the smgr level if not already done */ ! if (rel->rd_smgr == NULL) ! rel->rd_smgr = smgropen(rel->rd_node); /* Make sure rd_targblock isn't pointing somewhere past end */ rel->rd_targblock = InvalidBlockNumber; --- 1145,1151 ---- RelationTruncate(Relation rel, BlockNumber nblocks) { /* Open it at the smgr level if not already done */ ! RelationOpenSmgr(rel); /* Make sure rd_targblock isn't pointing somewhere past end */ rel->rd_targblock = InvalidBlockNumber; *************** *** 1445,1452 **** BufferDesc *bufHdr; /* Open rel at the smgr level if not already done */ ! if (rel->rd_smgr == NULL) ! rel->rd_smgr = smgropen(rel->rd_node); if (rel->rd_istemp) { --- 1442,1448 ---- BufferDesc *bufHdr; /* Open rel at the smgr level if not already done */ ! RelationOpenSmgr(rel); if (rel->rd_istemp) { *** src/backend/storage/buffer/localbuf.c.orig Fri Dec 31 17:46:08 2004 --- src/backend/storage/buffer/localbuf.c Mon Jan 10 12:38:12 2005 *************** *** 108,120 **** */ if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty) { ! SMgrRelation reln; /* Find smgr relation for buffer */ ! reln = smgropen(bufHdr->tag.rnode); /* And write... */ ! smgrwrite(reln, bufHdr->tag.blockNum, (char *) MAKE_PTR(bufHdr->data), true); --- 108,120 ---- */ if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty) { ! SMgrRelation oreln; /* Find smgr relation for buffer */ ! oreln = smgropen(bufHdr->tag.rnode); /* And write... */ ! smgrwrite(oreln, bufHdr->tag.blockNum, (char *) MAKE_PTR(bufHdr->data), true); *** src/backend/storage/smgr/smgr.c.orig Fri Dec 31 17:46:11 2004 --- src/backend/storage/smgr/smgr.c Mon Jan 10 13:47:23 2005 *************** *** 216,221 **** --- 216,222 ---- if (!found) { /* hash_search already filled in the lookup key */ + reln->smgr_owner = NULL; reln->smgr_which = 0; /* we only have md.c at present */ reln->md_fd = NULL; /* mark it not open */ } *************** *** 224,238 **** } /* ! * smgrclose() -- Close and delete an SMgrRelation object. * ! * It is the caller's responsibility not to leave any dangling references ! * to the object. (Pointers should be cleared after successful return; ! * on the off chance of failure, the SMgrRelation object will still exist.) */ void smgrclose(SMgrRelation reln) { if (!(*(smgrsw[reln->smgr_which].smgr_close)) (reln)) ereport(ERROR, (errcode_for_file_access(), --- 225,260 ---- } /* ! * smgrsetowner() -- Establish a long-lived reference to an SMgrRelation object * ! * There can be only one owner at a time; this is sufficient since currently ! * the only such owners exist in the relcache. ! */ ! void ! smgrsetowner(SMgrRelation *owner, SMgrRelation reln) ! { ! /* ! * First, unhook any old owner. (Normally there shouldn't be any, but ! * it seems possible that this can happen during swap_relation_files() ! * depending on the order of processing. It's ok to close the old ! * relcache entry early in that case.) ! */ ! if (reln->smgr_owner) ! *(reln->smgr_owner) = NULL; ! ! /* Now establish the ownership relationship. */ ! reln->smgr_owner = owner; ! *owner = reln; ! } ! ! /* ! * smgrclose() -- Close and delete an SMgrRelation object. */ void smgrclose(SMgrRelation reln) { + SMgrRelation *owner; + if (!(*(smgrsw[reln->smgr_which].smgr_close)) (reln)) ereport(ERROR, (errcode_for_file_access(), *************** *** 241,256 **** reln->smgr_rnode.dbNode, reln->smgr_rnode.relNode))); if (hash_search(SMgrRelationHash, (void *) &(reln->smgr_rnode), HASH_REMOVE, NULL) == NULL) elog(ERROR, "SMgrRelation hashtable corrupted"); } /* * smgrcloseall() -- Close all existing SMgrRelation objects. - * - * It is the caller's responsibility not to leave any dangling references. */ void smgrcloseall(void) --- 263,286 ---- reln->smgr_rnode.dbNode, reln->smgr_rnode.relNode))); + owner = reln->smgr_owner; + if (hash_search(SMgrRelationHash, (void *) &(reln->smgr_rnode), HASH_REMOVE, NULL) == NULL) elog(ERROR, "SMgrRelation hashtable corrupted"); + + /* + * Unhook the owner pointer, if any. We do this last since in the + * remote possibility of failure above, the SMgrRelation object will still + * exist. + */ + if (owner) + *owner = NULL; } /* * smgrcloseall() -- Close all existing SMgrRelation objects. */ void smgrcloseall(void) *************** *** 275,282 **** * This has the same effects as smgrclose(smgropen(rnode)), but it avoids * uselessly creating a hashtable entry only to drop it again when no * such entry exists already. - * - * It is the caller's responsibility not to leave any dangling references. */ void smgrclosenode(RelFileNode rnode) --- 305,310 ---- *** src/backend/utils/cache/inval.c.orig Mon Jan 10 10:43:49 2005 --- src/backend/utils/cache/inval.c Mon Jan 10 12:54:00 2005 *************** *** 414,430 **** } else if (msg->id == SHAREDINVALRELCACHE_ID) { - /* - * If the message includes a valid relfilenode, we must ensure - * that smgr cache entry gets zapped. The relcache will handle - * this if called, otherwise we must do it directly. - */ if (msg->rc.dbId == MyDatabaseId || msg->rc.dbId == InvalidOid) { ! if (OidIsValid(msg->rc.physId.relNode)) ! RelationCacheInvalidateEntry(msg->rc.relId, &msg->rc.physId); ! else ! RelationCacheInvalidateEntry(msg->rc.relId, NULL); for (i = 0; i < cache_callback_count; i++) { --- 414,422 ---- } else if (msg->id == SHAREDINVALRELCACHE_ID) { if (msg->rc.dbId == MyDatabaseId || msg->rc.dbId == InvalidOid) { ! RelationCacheInvalidateEntry(msg->rc.relId); for (i = 0; i < cache_callback_count; i++) { *************** *** 434,445 **** (*ccitem->function) (ccitem->arg, msg->rc.relId); } } ! else ! { ! /* might have smgr entry even if not in our database */ ! if (OidIsValid(msg->rc.physId.relNode)) ! smgrclosenode(msg->rc.physId); ! } } else elog(FATAL, "unrecognized SI message id: %d", msg->id); --- 426,442 ---- (*ccitem->function) (ccitem->arg, msg->rc.relId); } } ! /* ! * If the message includes a valid relfilenode, we must ensure ! * the smgr cache entry gets zapped. This might not have happened ! * above since the relcache entry might not have existed or might ! * have been associated with a different relfilenode. ! * ! * XXX there is no real good reason for rnode inval to be in the ! * same message at all. FIXME in 8.1. ! */ ! if (OidIsValid(msg->rc.physId.relNode)) ! smgrclosenode(msg->rc.physId); } else elog(FATAL, "unrecognized SI message id: %d", msg->id); *** src/backend/utils/cache/relcache.c.orig Fri Dec 31 17:46:18 2004 --- src/backend/utils/cache/relcache.c Mon Jan 10 12:56:50 2005 *************** *** 1659,1669 **** * ensures that the low-level file access state is updated after, say, * a vacuum truncation. */ ! if (relation->rd_smgr) ! { ! smgrclose(relation->rd_smgr); ! relation->rd_smgr = NULL; ! } /* * Never, never ever blow away a nailed-in system relation, because --- 1659,1665 ---- * ensures that the low-level file access state is updated after, say, * a vacuum truncation. */ ! RelationCloseSmgr(relation); /* * Never, never ever blow away a nailed-in system relation, because *************** *** 1857,1872 **** * * Any relcache entry matching the relid must be flushed. (Note: caller has * already determined that the relid belongs to our database or is a shared ! * relation.) If rnode isn't NULL, we must also ensure that any smgr cache ! * entry matching that rnode is flushed. ! * ! * Ordinarily, if rnode is supplied then it will match the relfilenode of ! * the target relid. However, it's possible for rnode to be different if ! * someone is engaged in a relfilenode change. In that case we want to ! * make sure we clear the right cache entries. This has to be done here ! * to keep things in sync between relcache and smgr cache --- we can't have ! * someone flushing an smgr cache entry that a relcache entry still points ! * to. * * We used to skip local relations, on the grounds that they could * not be targets of cross-backend SI update messages; but it seems --- 1853,1859 ---- * * Any relcache entry matching the relid must be flushed. (Note: caller has * already determined that the relid belongs to our database or is a shared ! * relation.) * * We used to skip local relations, on the grounds that they could * not be targets of cross-backend SI update messages; but it seems *************** *** 1875,1881 **** * local and nonlocal relations. */ void ! RelationCacheInvalidateEntry(Oid relationId, RelFileNode *rnode) { Relation relation; --- 1862,1868 ---- * local and nonlocal relations. */ void ! RelationCacheInvalidateEntry(Oid relationId) { Relation relation; *************** *** 1884,1903 **** if (PointerIsValid(relation)) { relcacheInvalsReceived++; - if (rnode) - { - /* Need to be sure smgr is flushed, but don't do it twice */ - if (relation->rd_smgr == NULL || - !RelFileNodeEquals(*rnode, relation->rd_node)) - smgrclosenode(*rnode); - } RelationFlushRelation(relation); } - else - { - if (rnode) - smgrclosenode(*rnode); - } } /* --- 1871,1878 ---- *************** *** 1946,1956 **** relation = idhentry->reldesc; /* Must close all smgr references to avoid leaving dangling ptrs */ ! if (relation->rd_smgr) ! { ! smgrclose(relation->rd_smgr); ! relation->rd_smgr = NULL; ! } /* Ignore new relations, since they are never SI targets */ if (relation->rd_createSubid != InvalidSubTransactionId) --- 1921,1927 ---- relation = idhentry->reldesc; /* Must close all smgr references to avoid leaving dangling ptrs */ ! RelationCloseSmgr(relation); /* Ignore new relations, since they are never SI targets */ if (relation->rd_createSubid != InvalidSubTransactionId) *** src/include/storage/smgr.h.orig Fri Dec 31 17:47:06 2004 --- src/include/storage/smgr.h Mon Jan 10 12:37:55 2005 *************** *** 27,38 **** --- 27,48 ---- * operations imply I/O, they just create or destroy a hashtable entry. * (But smgrclose() may release associated resources, such as OS-level file * descriptors.) + * + * An SMgrRelation may have an "owner", which is just a pointer to it from + * somewhere else; smgr.c will clear this pointer if the SMgrRelation is + * closed. We use this to avoid dangling pointers from relcache to smgr + * without having to make the smgr explicitly aware of relcache. There + * can't be more than one "owner" pointer per SMgrRelation, but that's + * all we need. */ typedef struct SMgrRelationData { /* rnode is the hashtable lookup key, so it must be first! */ RelFileNode smgr_rnode; /* relation physical identifier */ + /* pointer to owning pointer, or NULL if none */ + struct SMgrRelationData **smgr_owner; + /* additional public fields may someday exist here */ /* *************** *** 49,54 **** --- 59,65 ---- extern void smgrinit(void); extern SMgrRelation smgropen(RelFileNode rnode); + extern void smgrsetowner(SMgrRelation *owner, SMgrRelation reln); extern void smgrclose(SMgrRelation reln); extern void smgrcloseall(void); extern void smgrclosenode(RelFileNode rnode); *** src/include/utils/rel.h.orig Fri Dec 31 17:47:10 2004 --- src/include/utils/rel.h Mon Jan 10 12:37:49 2005 *************** *** 234,239 **** --- 234,264 ---- ((relation)->rd_rel->relnamespace) /* + * RelationOpenSmgr + * Open the relation at the smgr level, if not already done. + */ + #define RelationOpenSmgr(relation) \ + do { \ + if ((relation)->rd_smgr == NULL) \ + smgrsetowner(&((relation)->rd_smgr), smgropen((relation)->rd_node)); \ + } while (0) + + /* + * RelationCloseSmgr + * Close the relation at the smgr level, if not already done. + * + * Note: smgrclose should unhook from owner pointer, hence the Assert. + */ + #define RelationCloseSmgr(relation) \ + do { \ + if ((relation)->rd_smgr != NULL) \ + { \ + smgrclose((relation)->rd_smgr); \ + Assert((relation)->rd_smgr == NULL); \ + } \ + } while (0) + + /* * RELATION_IS_LOCAL * If a rel is either temp or newly created in the current transaction, * it can be assumed to be visible only to the current backend. *** src/include/utils/relcache.h.orig Fri Dec 31 17:47:10 2004 --- src/include/utils/relcache.h Mon Jan 10 12:37:49 2005 *************** *** 61,67 **** */ extern void RelationForgetRelation(Oid rid); ! extern void RelationCacheInvalidateEntry(Oid relationId, RelFileNode *rnode); extern void RelationCacheInvalidate(void); --- 61,67 ---- */ extern void RelationForgetRelation(Oid rid); ! extern void RelationCacheInvalidateEntry(Oid relationId); extern void RelationCacheInvalidate(void);
pgsql-patches by date: