Re: pgsql: Add kqueue(2) support to the WaitEventSet API. - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Re: pgsql: Add kqueue(2) support to the WaitEventSet API. |
Date | |
Msg-id | 27649.1582500247@sss.pgh.pa.us Whole thread Raw |
In response to | Re: pgsql: Add kqueue(2) support to the WaitEventSet API. (Tom Lane <tgl@sss.pgh.pa.us>) |
Responses |
Re: pgsql: Add kqueue(2) support to the WaitEventSet API.
|
List | pgsql-hackers |
I wrote: > Here's a draft patch that does it like that. On reflection, trying to make ReserveExternalFD serve two different use-cases was pretty messy. Here's a version that splits it into two functions. I also took the trouble to fix dblink. regards, tom lane diff --git a/contrib/dblink/dblink.c b/contrib/dblink/dblink.c index c1155e3..a5f69fc 100644 --- a/contrib/dblink/dblink.c +++ b/contrib/dblink/dblink.c @@ -200,12 +200,27 @@ dblink_get_conn(char *conname_or_str, if (connstr == NULL) connstr = conname_or_str; dblink_connstr_check(connstr); + + /* + * We must obey fd.c's limit on non-virtual file descriptors. Assume + * that a PGconn represents one long-lived FD. (Doing this here also + * ensures that VFDs are closed if needed to make room.) + */ + if (!AcquireExternalFD()) + ereport(ERROR, + (errcode(ERRCODE_SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION), + errmsg("could not establish connection"), + errdetail("There are too many open files."))); + + /* OK to make connection */ conn = PQconnectdb(connstr); + if (PQstatus(conn) == CONNECTION_BAD) { char *msg = pchomp(PQerrorMessage(conn)); PQfinish(conn); + ReleaseExternalFD(); ereport(ERROR, (errcode(ERRCODE_SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION), errmsg("could not establish connection"), @@ -282,12 +297,26 @@ dblink_connect(PG_FUNCTION_ARGS) /* check password in connection string if not superuser */ dblink_connstr_check(connstr); + + /* + * We must obey fd.c's limit on non-virtual file descriptors. Assume that + * a PGconn represents one long-lived FD. (Doing this here also ensures + * that VFDs are closed if needed to make room.) + */ + if (!AcquireExternalFD()) + ereport(ERROR, + (errcode(ERRCODE_SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION), + errmsg("could not establish connection"), + errdetail("There are too many open files."))); + + /* OK to make connection */ conn = PQconnectdb(connstr); if (PQstatus(conn) == CONNECTION_BAD) { msg = pchomp(PQerrorMessage(conn)); PQfinish(conn); + ReleaseExternalFD(); if (rconn) pfree(rconn); @@ -312,7 +341,10 @@ dblink_connect(PG_FUNCTION_ARGS) else { if (pconn->conn) + { PQfinish(pconn->conn); + ReleaseExternalFD(); + } pconn->conn = conn; } @@ -346,6 +378,7 @@ dblink_disconnect(PG_FUNCTION_ARGS) dblink_conn_not_avail(conname); PQfinish(conn); + ReleaseExternalFD(); if (rconn) { deleteConnection(conname); @@ -780,7 +813,10 @@ dblink_record_internal(FunctionCallInfo fcinfo, bool is_async) { /* if needed, close the connection to the database */ if (freeconn) + { PQfinish(conn); + ReleaseExternalFD(); + } } PG_END_TRY(); @@ -1458,7 +1494,10 @@ dblink_exec(PG_FUNCTION_ARGS) { /* if needed, close the connection to the database */ if (freeconn) + { PQfinish(conn); + ReleaseExternalFD(); + } } PG_END_TRY(); @@ -2563,6 +2602,7 @@ createNewConnection(const char *name, remoteConn *rconn) if (found) { PQfinish(rconn->conn); + ReleaseExternalFD(); pfree(rconn); ereport(ERROR, @@ -2604,6 +2644,7 @@ dblink_security_check(PGconn *conn, remoteConn *rconn) if (!PQconnectionUsedPassword(conn)) { PQfinish(conn); + ReleaseExternalFD(); if (rconn) pfree(rconn); diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c index 29c811a..74576d7 100644 --- a/contrib/postgres_fdw/connection.c +++ b/contrib/postgres_fdw/connection.c @@ -20,6 +20,7 @@ #include "miscadmin.h" #include "pgstat.h" #include "postgres_fdw.h" +#include "storage/fd.h" #include "storage/latch.h" #include "utils/hsearch.h" #include "utils/inval.h" @@ -259,10 +260,27 @@ connect_pg_server(ForeignServer *server, UserMapping *user) keywords[n] = values[n] = NULL; - /* verify connection parameters and make connection */ + /* verify the set of connection parameters */ check_conn_params(keywords, values, user); + /* + * We must obey fd.c's limit on non-virtual file descriptors. Assume + * that a PGconn represents one long-lived FD. (Doing this here also + * ensures that VFDs are closed if needed to make room.) + */ + if (!AcquireExternalFD()) + ereport(ERROR, + (errcode(ERRCODE_SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION), + errmsg("could not connect to server \"%s\"", + server->servername), + errdetail("There are too many open files."))); + + /* OK to make connection */ conn = PQconnectdbParams(keywords, values, false); + + if (!conn) + ReleaseExternalFD(); /* because the PG_CATCH block won't */ + if (!conn || PQstatus(conn) != CONNECTION_OK) ereport(ERROR, (errcode(ERRCODE_SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION), @@ -294,7 +312,10 @@ connect_pg_server(ForeignServer *server, UserMapping *user) { /* Release PGconn data structure if we managed to create one */ if (conn) + { PQfinish(conn); + ReleaseExternalFD(); + } PG_RE_THROW(); } PG_END_TRY(); @@ -312,6 +333,7 @@ disconnect_pg_server(ConnCacheEntry *entry) { PQfinish(entry->conn); entry->conn = NULL; + ReleaseExternalFD(); } } diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index fd527f2..d19408b 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -774,6 +774,7 @@ static const char *const xlogSourceNames[] = {"any", "archive", "pg_wal", "strea * openLogFile is -1 or a kernel FD for an open log file segment. * openLogSegNo identifies the segment. These variables are only used to * write the XLOG, and so will normally refer to the active segment. + * Note: call Reserve/ReleaseExternalFD to track consumption of this FD. */ static int openLogFile = -1; static XLogSegNo openLogSegNo = 0; @@ -785,6 +786,9 @@ static XLogSegNo openLogSegNo = 0; * will be just past that page. readLen indicates how much of the current * page has been read into readBuf, and readSource indicates where we got * the currently open file from. + * Note: we could use Reserve/ReleaseExternalFD to track consumption of + * this FD too; but it doesn't currently seem worthwhile, since the XLOG is + * not read by general-purpose sessions. */ static int readFile = -1; static XLogSegNo readSegNo = 0; @@ -2447,6 +2451,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) /* create/use new log file */ use_existent = true; openLogFile = XLogFileInit(openLogSegNo, &use_existent, true); + ReserveExternalFD(); } /* Make sure we have the current logfile open */ @@ -2455,6 +2460,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo, wal_segment_size); openLogFile = XLogFileOpen(openLogSegNo); + ReserveExternalFD(); } /* Add current page to the set of pending pages-to-dump */ @@ -2605,6 +2611,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo, wal_segment_size); openLogFile = XLogFileOpen(openLogSegNo); + ReserveExternalFD(); } issue_xlog_fsync(openLogFile, openLogSegNo); @@ -3811,6 +3818,7 @@ XLogFileClose(void) } openLogFile = -1; + ReleaseExternalFD(); } /* @@ -5224,6 +5232,11 @@ BootStrapXLOG(void) use_existent = false; openLogFile = XLogFileInit(1, &use_existent, false); + /* + * We needn't bother with Reserve/ReleaseExternalFD here, since we'll + * close the file again in a moment. + */ + /* Write the first page with the initial record */ errno = 0; pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_WRITE); diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 59dc4f3..462b4d7 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -602,6 +602,9 @@ retry2: pg_freeaddrinfo_all(hints.ai_family, addrs); + /* Now that we have a long-lived socket, tell fd.c about it. */ + ReserveExternalFD(); + return; startup_failed: diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index b3986be..cd61665 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -2554,9 +2554,14 @@ ClosePostmasterPorts(bool am_syslogger) (errcode_for_file_access(), errmsg_internal("could not close postmaster death monitoring pipe in child process: %m"))); postmaster_alive_fds[POSTMASTER_FD_OWN] = -1; + /* Notify fd.c that we released one pipe FD. */ + ReleaseExternalFD(); #endif - /* Close the listen sockets */ + /* + * Close the postmaster's listen sockets. These aren't tracked by fd.c, + * so we don't call ReleaseExternalFD() here. + */ for (i = 0; i < MAXLISTEN; i++) { if (ListenSocket[i] != PGINVALID_SOCKET) @@ -2566,7 +2571,10 @@ ClosePostmasterPorts(bool am_syslogger) } } - /* If using syslogger, close the read side of the pipe */ + /* + * If using syslogger, close the read side of the pipe. We don't bother + * tracking this in fd.c, either. + */ if (!am_syslogger) { #ifndef WIN32 @@ -4279,6 +4287,9 @@ BackendInitialize(Port *port) /* Save port etc. for ps status */ MyProcPort = port; + /* Tell fd.c about the long-lived FD associated with the port */ + ReserveExternalFD(); + /* * PreAuthDelay is a debugging aid for investigating problems in the * authentication cycle: it can be set in postgresql.conf to allow time to @@ -6442,6 +6453,20 @@ restore_backend_variables(BackendParameters *param, Port *port) strlcpy(pkglib_path, param->pkglib_path, MAXPGPATH); strlcpy(ExtraOptions, param->ExtraOptions, MAXPGPATH); + + /* + * We need to restore fd.c's counts of externally-opened FDs; to avoid + * confusion, be sure to do this after restoring max_safe_fds. (Note: + * BackendInitialize will handle this for port->sock.) + */ +#ifndef WIN32 + if (postmaster_alive_fds[0] >= 0) + ReserveExternalFD(); + if (postmaster_alive_fds[1] >= 0) + ReserveExternalFD(); +#endif + if (pgStatSock != PGINVALID_SOCKET) + ReserveExternalFD(); } @@ -6584,6 +6609,10 @@ InitPostmasterDeathWatchHandle(void) (errcode_for_file_access(), errmsg_internal("could not create pipe to monitor postmaster death: %m"))); + /* Notify fd.c that we've eaten two FDs for the pipe. */ + ReserveExternalFD(); + ReserveExternalFD(); + /* * Set O_NONBLOCK to allow testing for the fd's presence with a read() * call. diff --git a/src/backend/postmaster/syslogger.c b/src/backend/postmaster/syslogger.c index b2b69a7..cf7b535 100644 --- a/src/backend/postmaster/syslogger.c +++ b/src/backend/postmaster/syslogger.c @@ -562,6 +562,11 @@ SysLogger_Start(void) * This means the postmaster must continue to hold the read end of the * pipe open, so we can pass it down to the reincarnated syslogger. This * is a bit klugy but we have little choice. + * + * Also note that we don't bother counting the pipe FDs by calling + * Reserve/ReleaseExternalFD. There's no real need to account for them + * accurately in the postmaster or syslogger process, and both ends of the + * pipe will wind up closed in all other postmaster children. */ #ifndef WIN32 if (syslogPipe[0] < 0) diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index b5f4df6..34f7443 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -61,6 +61,12 @@ * BasicOpenFile, it is solely the caller's responsibility to close the file * descriptor by calling close(2). * + * If a non-virtual file descriptor needs to be held open for any length of + * time, report it to fd.c by calling AcquireExternalFD or ReserveExternalFD + * (and eventually ReleaseExternalFD), so that we can take it into account + * while deciding how many VFDs can be open. This applies to FDs obtained + * with BasicOpenFile as well as those obtained without use of any fd.c API. + * *------------------------------------------------------------------------- */ @@ -103,8 +109,8 @@ /* * We must leave some file descriptors free for system(), the dynamic loader, * and other code that tries to open files without consulting fd.c. This - * is the number left free. (While we can be pretty sure we won't get - * EMFILE, there's never any guarantee that we won't get ENFILE due to + * is the number left free. (While we try fairly hard to prevent EMFILE + * errors, there's never any guarantee that we won't get ENFILE due to * other processes chewing up FDs. So it's a bad idea to try to open files * without consulting fd.c. Nonetheless we cannot control all code.) * @@ -119,9 +125,12 @@ /* * If we have fewer than this many usable FDs after allowing for the reserved - * ones, choke. + * ones, choke. (This value is chosen to work with "ulimit -n 64", but not + * much less than that. Note that this value ensures numExternalFDs can be + * at least 16; as of this writing, the contrib/postgres_fdw regression tests + * will not pass unless that can grow to at least 14.) */ -#define FD_MINFREE 10 +#define FD_MINFREE 48 /* * A number of platforms allow individual processes to open many more files @@ -132,8 +141,8 @@ int max_files_per_process = 1000; /* - * Maximum number of file descriptors to open for either VFD entries or - * AllocateFile/AllocateDir/OpenTransientFile operations. This is initialized + * Maximum number of file descriptors to open for operations that fd.c knows + * about (VFDs, AllocateFile etc, or "external" FDs). This is initialized * to a conservative value, and remains that way indefinitely in bootstrap or * standalone-backend cases. In normal postmaster operation, the postmaster * calls set_max_safe_fds() late in initialization to update the value, and @@ -142,7 +151,7 @@ int max_files_per_process = 1000; * Note: the value of max_files_per_process is taken into account while * setting this variable, and so need not be tested separately. */ -int max_safe_fds = 32; /* default if not changed */ +int max_safe_fds = FD_MINFREE; /* default if not changed */ /* Whether it is safe to continue running after fsync() fails. */ bool data_sync_retry = false; @@ -244,6 +253,11 @@ static int maxAllocatedDescs = 0; static AllocateDesc *allocatedDescs = NULL; /* + * Number of open "external" FDs reported to Reserve/ReleaseExternalFD. + */ +static int numExternalFDs = 0; + +/* * Number of temporary files opened during the current session; * this is used in generation of tempfile names. */ @@ -1025,6 +1039,80 @@ tryAgain: return -1; /* failure */ } +/* + * AcquireExternalFD - attempt to reserve an external file descriptor + * + * This should be used by callers that need to hold a file descriptor open + * over more than a short interval, but cannot use any of the other facilities + * provided by this module. + * + * The difference between this and the underlying ReserveExternalFD function + * is that this will report failure (by setting errno and returning false) + * if "too many" external FDs are already reserved. This should be used in + * any code where the total number of FDs to be reserved is not predictable + * and small. + */ +bool +AcquireExternalFD(void) +{ + /* + * We don't want more than max_safe_fds / 3 FDs to be consumed for + * "external" FDs. + */ + if (numExternalFDs < max_safe_fds / 3) + { + ReserveExternalFD(); + return true; + } + errno = EMFILE; + return false; +} + +/* + * ReserveExternalFD - report external consumption of a file descriptor + * + * This should be used by callers that need to hold a file descriptor open + * over more than a short interval, but cannot use any of the other facilities + * provided by this module. This just tracks the use of the FD and closes + * VFDs if needed to ensure we keep NUM_RESERVED_FDS FDs available. + * + * Call this directly only in code where failure to reserve the FD would be + * fatal; for example, the WAL-writing code does so, since the alternative is + * session failure. Also, it's very unwise to do so in code that could + * consume more than one FD per process. + * + * Note: as long as everybody plays nice so that NUM_RESERVED_FDS FDs remain + * available, it doesn't matter too much whether this is called before or + * after actually opening the FD; but doing so beforehand reduces the risk of + * an EMFILE failure if not everybody played nice. In any case, it's solely + * caller's responsibility to keep the external-FD count in sync with reality. + */ +void +ReserveExternalFD(void) +{ + /* + * Release VFDs if needed to stay safe. Because we do this before + * incrementing numExternalFDs, the final state will be as desired, i.e., + * nfile + numAllocatedDescs + numExternalFDs <= max_safe_fds. + */ + ReleaseLruFiles(); + + numExternalFDs++; +} + +/* + * ReleaseExternalFD - report release of an external file descriptor + * + * This is guaranteed not to change errno, so it can be used in failure paths. + */ +void +ReleaseExternalFD(void) +{ + Assert(numExternalFDs > 0); + numExternalFDs--; +} + + #if defined(FDDEBUG) static void @@ -1185,7 +1273,7 @@ ReleaseLruFile(void) static void ReleaseLruFiles(void) { - while (nfile + numAllocatedDescs >= max_safe_fds) + while (nfile + numAllocatedDescs + numExternalFDs >= max_safe_fds) { if (!ReleaseLruFile()) break; @@ -2176,13 +2264,13 @@ reserveAllocatedDesc(void) /* * If the array hasn't yet been created in the current process, initialize - * it with FD_MINFREE / 2 elements. In many scenarios this is as many as + * it with FD_MINFREE / 3 elements. In many scenarios this is as many as * we will ever need, anyway. We don't want to look at max_safe_fds * immediately because set_max_safe_fds() may not have run yet. */ if (allocatedDescs == NULL) { - newMax = FD_MINFREE / 2; + newMax = FD_MINFREE / 3; newDescs = (AllocateDesc *) malloc(newMax * sizeof(AllocateDesc)); /* Out of memory already? Treat as fatal error. */ if (newDescs == NULL) @@ -2200,10 +2288,12 @@ reserveAllocatedDesc(void) * * We mustn't let allocated descriptors hog all the available FDs, and in * practice we'd better leave a reasonable number of FDs for VFD use. So - * set the maximum to max_safe_fds / 2. (This should certainly be at - * least as large as the initial size, FD_MINFREE / 2.) + * set the maximum to max_safe_fds / 3. (This should certainly be at + * least as large as the initial size, FD_MINFREE / 3, so we aren't + * tightening the restriction here.) Recall that "external" FDs are + * allowed to consume another third of max_safe_fds. */ - newMax = max_safe_fds / 2; + newMax = max_safe_fds / 3; if (newMax > maxAllocatedDescs) { newDescs = (AllocateDesc *) realloc(allocatedDescs, diff --git a/src/backend/storage/ipc/dsm_impl.c b/src/backend/storage/ipc/dsm_impl.c index 138bdec..1972aec 100644 --- a/src/backend/storage/ipc/dsm_impl.c +++ b/src/backend/storage/ipc/dsm_impl.c @@ -247,14 +247,17 @@ dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size, /* * Create new segment or open an existing one for attach. * - * Even though we're not going through fd.c, we should be safe against - * running out of file descriptors, because of NUM_RESERVED_FDS. We're - * only opening one extra descriptor here, and we'll close it before - * returning. + * Even though we will close the FD before returning, it seems desirable + * to use Reserve/ReleaseExternalFD, to reduce the probability of EMFILE + * failure. The fact that we won't hold the FD open long justifies using + * ReserveExternalFD rather than AcquireExternalFD, though. */ + ReserveExternalFD(); + flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0); if ((fd = shm_open(name, flags, PG_FILE_MODE_OWNER)) == -1) { + ReleaseExternalFD(); if (errno != EEXIST) ereport(elevel, (errcode_for_dynamic_shared_memory(), @@ -278,6 +281,7 @@ dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size, /* Back out what's already been done. */ save_errno = errno; close(fd); + ReleaseExternalFD(); errno = save_errno; ereport(elevel, @@ -295,6 +299,7 @@ dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size, /* Back out what's already been done. */ save_errno = errno; close(fd); + ReleaseExternalFD(); shm_unlink(name); errno = save_errno; @@ -323,6 +328,7 @@ dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size, /* Back out what's already been done. */ save_errno = errno; close(fd); + ReleaseExternalFD(); if (op == DSM_OP_CREATE) shm_unlink(name); errno = save_errno; @@ -336,6 +342,7 @@ dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size, *mapped_address = address; *mapped_size = request_size; close(fd); + ReleaseExternalFD(); return true; } diff --git a/src/backend/storage/ipc/latch.c b/src/backend/storage/ipc/latch.c index cbd4952..046ca5c 100644 --- a/src/backend/storage/ipc/latch.c +++ b/src/backend/storage/ipc/latch.c @@ -51,6 +51,7 @@ #include "port/atomics.h" #include "portability/instr_time.h" #include "postmaster/postmaster.h" +#include "storage/fd.h" #include "storage/ipc.h" #include "storage/latch.h" #include "storage/pmsignal.h" @@ -187,6 +188,9 @@ InitializeLatchSupport(void) /* Clean up, just for safety's sake; we'll set these below */ selfpipe_readfd = selfpipe_writefd = -1; selfpipe_owner_pid = 0; + /* Keep fd.c's accounting straight */ + ReleaseExternalFD(); + ReleaseExternalFD(); } else { @@ -194,6 +198,7 @@ InitializeLatchSupport(void) * Postmaster didn't create a self-pipe ... or else we're in an * EXEC_BACKEND build, in which case it doesn't matter since the * postmaster's pipe FDs were closed by the action of FD_CLOEXEC. + * fd.c won't have state to clean up, either. */ Assert(selfpipe_readfd == -1); } @@ -228,6 +233,10 @@ InitializeLatchSupport(void) selfpipe_readfd = pipefd[0]; selfpipe_writefd = pipefd[1]; selfpipe_owner_pid = MyProcPid; + + /* Tell fd.c about these two long-lived FDs */ + ReserveExternalFD(); + ReserveExternalFD(); #else /* currently, nothing to do here for Windows */ #endif @@ -604,24 +613,57 @@ CreateWaitEventSet(MemoryContext context, int nevents) set->exit_on_postmaster_death = false; #if defined(WAIT_USE_EPOLL) + if (!AcquireExternalFD()) + { + /* treat this as though epoll_create1 itself returned EMFILE */ + elog(ERROR, "epoll_create1 failed: %m"); + } #ifdef EPOLL_CLOEXEC set->epoll_fd = epoll_create1(EPOLL_CLOEXEC); if (set->epoll_fd < 0) + { + ReleaseExternalFD(); elog(ERROR, "epoll_create1 failed: %m"); + } #else /* cope with ancient glibc lacking epoll_create1 (e.g., RHEL5) */ set->epoll_fd = epoll_create(nevents); if (set->epoll_fd < 0) + { + ReleaseExternalFD(); elog(ERROR, "epoll_create failed: %m"); + } if (fcntl(set->epoll_fd, F_SETFD, FD_CLOEXEC) == -1) + { + int save_errno = errno; + + close(set->epoll_fd); + ReleaseExternalFD(); + errno = save_errno; elog(ERROR, "fcntl(F_SETFD) failed on epoll descriptor: %m"); + } #endif /* EPOLL_CLOEXEC */ #elif defined(WAIT_USE_KQUEUE) + if (!AcquireExternalFD()) + { + /* treat this as though kqueue itself returned EMFILE */ + elog(ERROR, "kqueue failed: %m"); + } set->kqueue_fd = kqueue(); if (set->kqueue_fd < 0) + { + ReleaseExternalFD(); elog(ERROR, "kqueue failed: %m"); + } if (fcntl(set->kqueue_fd, F_SETFD, FD_CLOEXEC) == -1) + { + int save_errno = errno; + + close(set->kqueue_fd); + ReleaseExternalFD(); + errno = save_errno; elog(ERROR, "fcntl(F_SETFD) failed on kqueue descriptor: %m"); + } set->report_postmaster_not_running = false; #elif defined(WAIT_USE_WIN32) @@ -655,8 +697,10 @@ FreeWaitEventSet(WaitEventSet *set) { #if defined(WAIT_USE_EPOLL) close(set->epoll_fd); + ReleaseExternalFD(); #elif defined(WAIT_USE_KQUEUE) close(set->kqueue_fd); + ReleaseExternalFD(); #elif defined(WAIT_USE_WIN32) WaitEvent *cur_event; diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index 51e2ece..2085c62 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -35,6 +35,10 @@ * Likewise, use AllocateDir/FreeDir, not opendir/closedir, to allocate * open directories (DIR*), and OpenTransientFile/CloseTransientFile for an * unbuffered file descriptor. + * + * If you really can't use any of the above, at least call AcquireExternalFD + * or ReserveExternalFD to report any file descriptors that are held for any + * length of time. Failure to do so risks unnecessary EMFILE errors. */ #ifndef FD_H #define FD_H @@ -120,7 +124,12 @@ extern int CloseTransientFile(int fd); extern int BasicOpenFile(const char *fileName, int fileFlags); extern int BasicOpenFilePerm(const char *fileName, int fileFlags, mode_t fileMode); - /* Make a directory with default permissions */ +/* Use these for other cases, and also for long-lived BasicOpenFile FDs */ +extern bool AcquireExternalFD(void); +extern void ReserveExternalFD(void); +extern void ReleaseExternalFD(void); + +/* Make a directory with default permissions */ extern int MakePGDirectory(const char *directoryName); /* Miscellaneous support routines */
pgsql-hackers by date: