Re: Report: race conditions in WAL replay routines - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Re: Report: race conditions in WAL replay routines |
Date | |
Msg-id | 23714.1328483673@sss.pgh.pa.us Whole thread Raw |
In response to | Re: Report: race conditions in WAL replay routines (Simon Riggs <simon@2ndQuadrant.com>) |
Responses |
Re: Report: race conditions in WAL replay routines
|
List | pgsql-hackers |
Simon Riggs <simon@2ndQuadrant.com> writes: > Please post the patch rather than fixing directly. There's some subtle > stuff there and it would be best to discuss first. And here's a proposed patch for not throwing ERROR during replay of DROP TABLESPACE. I had originally thought this would be a one-liner s/ERROR/LOG/, but on inspection destroy_tablespace_directories() really needs to be changed too, so that it doesn't throw error for unremovable directories. regards, tom lane diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index 8ed6d06bb2370a54349f2a86ee13e08a381eacf2..ed0c9ea3ee5a372c6926f41fc0fc93ed7d80d6cd 100644 *** a/src/backend/commands/tablespace.c --- b/src/backend/commands/tablespace.c *************** create_tablespace_directories(const char *** 626,634 **** /* * destroy_tablespace_directories * ! * Attempt to remove filesystem infrastructure * ! * 'redo' indicates we are redoing a drop from XLOG; okay if nothing there * * Returns TRUE if successful, FALSE if some subdirectory is not empty */ --- 626,638 ---- /* * destroy_tablespace_directories * ! * Attempt to remove filesystem infrastructure for the tablespace. * ! * 'redo' indicates we are redoing a drop from XLOG; in that case we should ! * not throw an ERROR for problems, just LOG them. The worst consequence of ! * not removing files here would be failure to release some disk space, which ! * does not justify throwing an error that would require manual intervention ! * to get the database running again. * * Returns TRUE if successful, FALSE if some subdirectory is not empty */ *************** destroy_tablespace_directories(Oid table *** 681,686 **** --- 685,700 ---- pfree(linkloc_with_version_dir); return true; } + else if (redo) + { + /* in redo, just log other types of error */ + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not open directory \"%s\": %m", + linkloc_with_version_dir))); + pfree(linkloc_with_version_dir); + return false; + } /* else let ReadDir report the error */ } *************** destroy_tablespace_directories(Oid table *** 704,710 **** /* remove empty directory */ if (rmdir(subfile) < 0) ! ereport(ERROR, (errcode_for_file_access(), errmsg("could not remove directory \"%s\": %m", subfile))); --- 718,724 ---- /* remove empty directory */ if (rmdir(subfile) < 0) ! ereport(redo ? LOG : ERROR, (errcode_for_file_access(), errmsg("could not remove directory \"%s\": %m", subfile))); *************** destroy_tablespace_directories(Oid table *** 716,738 **** /* remove version directory */ if (rmdir(linkloc_with_version_dir) < 0) ! ereport(ERROR, (errcode_for_file_access(), errmsg("could not remove directory \"%s\": %m", linkloc_with_version_dir))); /* * Try to remove the symlink. We must however deal with the possibility * that it's a directory instead of a symlink --- this could happen during * WAL replay (see TablespaceCreateDbspace), and it is also the case on * Windows where junction points lstat() as directories. */ linkloc = pstrdup(linkloc_with_version_dir); get_parent_directory(linkloc); if (lstat(linkloc, &st) == 0 && S_ISDIR(st.st_mode)) { if (rmdir(linkloc) < 0) ! ereport(ERROR, (errcode_for_file_access(), errmsg("could not remove directory \"%s\": %m", linkloc))); --- 730,759 ---- /* remove version directory */ if (rmdir(linkloc_with_version_dir) < 0) ! { ! ereport(redo ? LOG : ERROR, (errcode_for_file_access(), errmsg("could not remove directory \"%s\": %m", linkloc_with_version_dir))); + pfree(linkloc_with_version_dir); + return false; + } /* * Try to remove the symlink. We must however deal with the possibility * that it's a directory instead of a symlink --- this could happen during * WAL replay (see TablespaceCreateDbspace), and it is also the case on * Windows where junction points lstat() as directories. + * + * Note: in the redo case, we'll return true if this final step fails; + * there's no point in retrying it. */ linkloc = pstrdup(linkloc_with_version_dir); get_parent_directory(linkloc); if (lstat(linkloc, &st) == 0 && S_ISDIR(st.st_mode)) { if (rmdir(linkloc) < 0) ! ereport(redo ? LOG : ERROR, (errcode_for_file_access(), errmsg("could not remove directory \"%s\": %m", linkloc))); *************** destroy_tablespace_directories(Oid table *** 740,746 **** else { if (unlink(linkloc) < 0) ! ereport(ERROR, (errcode_for_file_access(), errmsg("could not remove symbolic link \"%s\": %m", linkloc))); --- 761,767 ---- else { if (unlink(linkloc) < 0) ! ereport(redo ? LOG : ERROR, (errcode_for_file_access(), errmsg("could not remove symbolic link \"%s\": %m", linkloc))); *************** tblspc_redo(XLogRecPtr lsn, XLogRecord * *** 1451,1464 **** xl_tblspc_drop_rec *xlrec = (xl_tblspc_drop_rec *) XLogRecGetData(record); /* ! * If we issued a WAL record for a drop tablespace it is because there ! * were no files in it at all. That means that no permanent objects ! * can exist in it at this point. * * It is possible for standby users to be using this tablespace as a * location for their temporary files, so if we fail to remove all * files then do conflict processing and try again, if currently * enabled. */ if (!destroy_tablespace_directories(xlrec->ts_id, true)) { --- 1472,1490 ---- xl_tblspc_drop_rec *xlrec = (xl_tblspc_drop_rec *) XLogRecGetData(record); /* ! * If we issued a WAL record for a drop tablespace it implies that ! * there were no files in it at all when the DROP was done. That means ! * that no permanent objects can exist in it at this point. * * It is possible for standby users to be using this tablespace as a * location for their temporary files, so if we fail to remove all * files then do conflict processing and try again, if currently * enabled. + * + * Other possible reasons for failure include bollixed file permissions + * on a standby server when they were okay on the primary, etc etc. + * There's not much we can do about that, so just remove what we can + * and press on. */ if (!destroy_tablespace_directories(xlrec->ts_id, true)) { *************** tblspc_redo(XLogRecPtr lsn, XLogRecord * *** 1466,1480 **** /* * If we did recovery processing then hopefully the backends who ! * wrote temp files should have cleaned up and exited by now. So ! * lets recheck before we throw an error. If !process_conflicts ! * then this will just fail again. */ if (!destroy_tablespace_directories(xlrec->ts_id, true)) ! ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("tablespace %u is not empty", ! xlrec->ts_id))); } } else --- 1492,1509 ---- /* * If we did recovery processing then hopefully the backends who ! * wrote temp files should have cleaned up and exited by now. So ! * retry before complaining. If we fail again, this is just a LOG ! * condition, because it's not worth throwing an ERROR for (as ! * that would crash the database and require manual intervention ! * before we could get past this WAL record on restart). */ if (!destroy_tablespace_directories(xlrec->ts_id, true)) ! ereport(LOG, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("tablespace %u is not empty", ! xlrec->ts_id), ! errdetail("Continuing even though some files could not be removed."))); } } else
pgsql-hackers by date: