Thread: Expansion of our checks for connection-loss errors
Over in the thread at [1], we've tentatively determined that the reason buildfarm member lorikeet is currently failing is that its network stack returns ECONNABORTED for (some?) connection failures, whereas our code is only expecting ECONNRESET. Fujii Masao therefore proposes that we treat ECONNABORTED the same as ECONNRESET. I think this is a good idea, but after a bit of research I feel it does not go far enough. I find these POSIX-standard errnos that also seem likely candidates to be returned for a hard loss of connection: ECONNABORTED EHOSTUNREACH ENETDOWN ENETUNREACH All of these have been in POSIX since SUSv2, so it seems unlikely that we need to #ifdef any of them. (It is in any case pretty silly that we have #ifdefs around a very small minority of our references to ECONNRESET :-(.) There are some other related errnos, such as ECONNREFUSED, that don't seem like they'd be returned for a failure of a pre-existing connection, so we don't need to include them in such tests. Accordingly, I propose the attached patch (an expansion of Fujii-san's) that causes us to test for all five errnos anyplace we had been checking for ECONNRESET. I felt that this was getting to the point where we'd better centralize the knowledge of what to check, so the patch does that, via an inline function and an admittedly hacky macro. I also upgraded some places such as strerror.c to have full support for these symbols. All of the machines I have (even as far back as HPUX 10.20) also define ENETRESET and EHOSTDOWN. However, those symbols do not appear in SUSv2. ENETRESET was added at some later point, but EHOSTDOWN is still not in POSIX. For the moment I've left these second-tier symbols out of the patch, but there's a case for adding them. I'm not sure whether there'd be any point in trying to #ifdef them. BTW, I took out the conditional defines of some of these errnos in libpq's win32.h; AFAICS that's been dead code ever since we added #define's for them to win32_port.h. Am I missing something? This seems like a bug fix to me, so I'm inclined to back-patch. regards, tom lane [1] https://www.postgresql.org/message-id/flat/E1kPc9v-0005L4-2l%40gemulon.postgresql.org diff --git a/src/backend/port/win32/socket.c b/src/backend/port/win32/socket.c index 6fbd1ed6fb..0f28c07ed1 100644 --- a/src/backend/port/win32/socket.c +++ b/src/backend/port/win32/socket.c @@ -123,10 +123,14 @@ TranslateSocketError(void) case WSAEHOSTUNREACH: case WSAEHOSTDOWN: case WSAHOST_NOT_FOUND: + errno = EHOSTUNREACH; + break; case WSAENETDOWN: - case WSAENETUNREACH: case WSAENETRESET: - errno = EHOSTUNREACH; + errno = ENETDOWN; + break; + case WSAENETUNREACH: + errno = ENETUNREACH; break; case WSAENOTCONN: case WSAESHUTDOWN: diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index d0b368530e..8937f223a8 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -712,9 +712,7 @@ errcode_for_socket_access(void) { /* Loss of connection */ case EPIPE: -#ifdef ECONNRESET - case ECONNRESET: -#endif + case ALL_CONNECTION_LOSS_ERRNOS: edata->sqlerrcode = ERRCODE_CONNECTION_FAILURE; break; diff --git a/src/bin/pg_dump/parallel.c b/src/bin/pg_dump/parallel.c index f0587f41e4..b8ab939179 100644 --- a/src/bin/pg_dump/parallel.c +++ b/src/bin/pg_dump/parallel.c @@ -1825,7 +1825,7 @@ piperead(int s, char *buf, int len) { int ret = recv(s, buf, len, 0); - if (ret < 0 && WSAGetLastError() == WSAECONNRESET) + if (ret < 0 && errno_is_connection_loss(WSAGetLastError())) { /* EOF on the pipe! */ ret = 0; diff --git a/src/include/port.h b/src/include/port.h index 84bf2c363f..ffa21e782a 100644 --- a/src/include/port.h +++ b/src/include/port.h @@ -99,6 +99,30 @@ extern void pgfnames_cleanup(char **filenames); ) #endif +/* Test for all errnos that report loss of an established TCP connection */ +static inline bool +errno_is_connection_loss(int e) +{ + if (e == ECONNRESET || + e == ECONNABORTED || + e == EHOSTUNREACH || + e == ENETDOWN || + e == ENETUNREACH) + return true; + return false; +} + +/* + * To test for connection-loss errnos in a switch statement, write + * "case ALL_CONNECTION_LOSS_ERRNOS:". + */ +#define ALL_CONNECTION_LOSS_ERRNOS \ + ECONNRESET: \ + case ECONNABORTED: \ + case EHOSTUNREACH: \ + case ENETDOWN: \ + case ENETUNREACH + /* Portable locale initialization (in exec.c) */ extern void set_pglocale_pgservice(const char *argv0, const char *app); diff --git a/src/include/port/win32_port.h b/src/include/port/win32_port.h index 8b6576b23d..7ce8c87e8d 100644 --- a/src/include/port/win32_port.h +++ b/src/include/port/win32_port.h @@ -351,6 +351,10 @@ extern int pgwin32_safestat(const char *path, struct stat *buf); #define EADDRNOTAVAIL WSAEADDRNOTAVAIL #undef EHOSTUNREACH #define EHOSTUNREACH WSAEHOSTUNREACH +#undef ENETDOWN +#define ENETDOWN WSAENETDOWN +#undef ENETUNREACH +#define ENETUNREACH WSAENETUNREACH #undef ENOTCONN #define ENOTCONN WSAENOTCONN diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c index ff840b7730..ecc3b4ba0b 100644 --- a/src/interfaces/libpq/fe-misc.c +++ b/src/interfaces/libpq/fe-misc.c @@ -679,11 +679,9 @@ retry3: if (SOCK_ERRNO == EWOULDBLOCK) return someread; #endif - /* We might get ECONNRESET here if using TCP and backend died */ -#ifdef ECONNRESET - if (SOCK_ERRNO == ECONNRESET) + /* We might get ECONNRESET etc here if using TCP and backend died */ + if (errno_is_connection_loss(SOCK_ERRNO)) goto definitelyFailed; -#endif /* pqsecure_read set the error message for us */ return -1; } @@ -769,11 +767,9 @@ retry4: if (SOCK_ERRNO == EWOULDBLOCK) return 0; #endif - /* We might get ECONNRESET here if using TCP and backend died */ -#ifdef ECONNRESET - if (SOCK_ERRNO == ECONNRESET) + /* We might get ECONNRESET etc here if using TCP and backend died */ + if (errno_is_connection_loss(SOCK_ERRNO)) goto definitelyFailed; -#endif /* pqsecure_read set the error message for us */ return -1; } diff --git a/src/interfaces/libpq/fe-secure-openssl.c b/src/interfaces/libpq/fe-secure-openssl.c index d609a38bbe..3c89c34f80 100644 --- a/src/interfaces/libpq/fe-secure-openssl.c +++ b/src/interfaces/libpq/fe-secure-openssl.c @@ -204,7 +204,7 @@ rloop: { result_errno = SOCK_ERRNO; if (result_errno == EPIPE || - result_errno == ECONNRESET) + errno_is_connection_loss(result_errno)) printfPQExpBuffer(&conn->errorMessage, libpq_gettext("server closed the connection unexpectedly\n" "\tThis probably means the server terminated abnormally\n" @@ -311,7 +311,8 @@ pgtls_write(PGconn *conn, const void *ptr, size_t len) if (n < 0) { result_errno = SOCK_ERRNO; - if (result_errno == EPIPE || result_errno == ECONNRESET) + if (result_errno == EPIPE || + errno_is_connection_loss(result_errno)) printfPQExpBuffer(&conn->errorMessage, libpq_gettext("server closed the connection unexpectedly\n" "\tThis probably means the server terminated abnormally\n" diff --git a/src/interfaces/libpq/fe-secure.c b/src/interfaces/libpq/fe-secure.c index 3311fd7a5b..f13651b544 100644 --- a/src/interfaces/libpq/fe-secure.c +++ b/src/interfaces/libpq/fe-secure.c @@ -261,14 +261,12 @@ pqsecure_raw_read(PGconn *conn, void *ptr, size_t len) /* no error message, caller is expected to retry */ break; -#ifdef ECONNRESET - case ECONNRESET: + case ALL_CONNECTION_LOSS_ERRNOS: printfPQExpBuffer(&conn->errorMessage, libpq_gettext("server closed the connection unexpectedly\n" "\tThis probably means the server terminated abnormally\n" "\tbefore or while processing the request.\n")); break; -#endif default: printfPQExpBuffer(&conn->errorMessage, @@ -374,11 +372,9 @@ retry_masked: /* Set flag for EPIPE */ REMEMBER_EPIPE(spinfo, true); -#ifdef ECONNRESET /* FALL THRU */ - case ECONNRESET: -#endif + case ALL_CONNECTION_LOSS_ERRNOS: printfPQExpBuffer(&conn->errorMessage, libpq_gettext("server closed the connection unexpectedly\n" "\tThis probably means the server terminated abnormally\n" diff --git a/src/interfaces/libpq/win32.h b/src/interfaces/libpq/win32.h index c42d7abfe3..eba4c2a358 100644 --- a/src/interfaces/libpq/win32.h +++ b/src/interfaces/libpq/win32.h @@ -16,15 +16,6 @@ #undef EAGAIN /* doesn't apply on sockets */ #undef EINTR #define EINTR WSAEINTR -#ifndef EWOULDBLOCK -#define EWOULDBLOCK WSAEWOULDBLOCK -#endif -#ifndef ECONNRESET -#define ECONNRESET WSAECONNRESET -#endif -#ifndef EINPROGRESS -#define EINPROGRESS WSAEINPROGRESS -#endif /* * support for handling Windows Socket errors diff --git a/src/port/strerror.c b/src/port/strerror.c index 375edb0f5a..c179c9ae60 100644 --- a/src/port/strerror.c +++ b/src/port/strerror.c @@ -146,16 +146,12 @@ get_errno_symbol(int errnum) return "EBUSY"; case ECHILD: return "ECHILD"; -#ifdef ECONNABORTED case ECONNABORTED: return "ECONNABORTED"; -#endif case ECONNREFUSED: return "ECONNREFUSED"; -#ifdef ECONNRESET case ECONNRESET: return "ECONNRESET"; -#endif case EDEADLK: return "EDEADLK"; case EDOM: @@ -166,10 +162,8 @@ get_errno_symbol(int errnum) return "EFAULT"; case EFBIG: return "EFBIG"; -#ifdef EHOSTUNREACH case EHOSTUNREACH: return "EHOSTUNREACH"; -#endif case EIDRM: return "EIDRM"; case EINPROGRESS: @@ -198,6 +192,10 @@ get_errno_symbol(int errnum) return "EMSGSIZE"; case ENAMETOOLONG: return "ENAMETOOLONG"; + case ENETDOWN: + return "ENETDOWN"; + case ENETUNREACH: + return "ENETUNREACH"; case ENFILE: return "ENFILE"; case ENOBUFS:
At Thu, 08 Oct 2020 15:15:54 -0400, Tom Lane <tgl@sss.pgh.pa.us> wrote in > Over in the thread at [1], we've tentatively determined that the > reason buildfarm member lorikeet is currently failing is that its > network stack returns ECONNABORTED for (some?) connection failures, > whereas our code is only expecting ECONNRESET. Fujii Masao therefore > proposes that we treat ECONNABORTED the same as ECONNRESET. I think > this is a good idea, but after a bit of research I feel it does not > go far enough. I find these POSIX-standard errnos that also seem > likely candidates to be returned for a hard loss of connection: > > ECONNABORTED > EHOSTUNREACH > ENETDOWN > ENETUNREACH > > All of these have been in POSIX since SUSv2, so it seems unlikely > that we need to #ifdef any of them. (It is in any case pretty silly > that we have #ifdefs around a very small minority of our references > to ECONNRESET :-(.) > > There are some other related errnos, such as ECONNREFUSED, that > don't seem like they'd be returned for a failure of a pre-existing > connection, so we don't need to include them in such tests. > > Accordingly, I propose the attached patch (an expansion of > Fujii-san's) that causes us to test for all five errnos anyplace > we had been checking for ECONNRESET. I felt that this was getting to > the point where we'd better centralize the knowledge of what to check, > so the patch does that, via an inline function and an admittedly hacky > macro. I also upgraded some places such as strerror.c to have full > support for these symbols. > > All of the machines I have (even as far back as HPUX 10.20) also > define ENETRESET and EHOSTDOWN. However, those symbols do not appear > in SUSv2. ENETRESET was added at some later point, but EHOSTDOWN is > still not in POSIX. For the moment I've left these second-tier > symbols out of the patch, but there's a case for adding them. I'm > not sure whether there'd be any point in trying to #ifdef them. > > BTW, I took out the conditional defines of some of these errnos in > libpq's win32.h; AFAICS that's been dead code ever since we added > #define's for them to win32_port.h. Am I missing something? > > This seems like a bug fix to me, so I'm inclined to back-patch. > > regards, tom lane > > [1] https://www.postgresql.org/message-id/flat/E1kPc9v-0005L4-2l%40gemulon.postgresql.org +1 for the direction. In terms of connection errors, connect(2) and bind(2) can return EADDRNOTAVAIL. bind(2) and listen(2) can return EADDRINUSE. FWIW I recetnly saw pgbench getting EADDRNOTAVAIL. (They have mapping from respective WSA errors in TranslateSocketError()) I'm not sure how we should treat EMFILE/ENFILE/ENOBUFS/ENOMEM from accept(2). (select(2) can return ENOMEM.) I'd make errno_is_connection_loss use ALL_CONNECTION_LOSS_ERRNOS to avoid duplication definition of the errno list. - if (ret < 0 && WSAGetLastError() == WSAECONNRESET) + if (ret < 0 && errno_is_connection_loss(WSAGetLastError())) Don't we need to use TranslateSocketError() before? + /* We might get ECONNRESET etc here if using TCP and backend died */ + if (errno_is_connection_loss(SOCK_ERRNO)) Perhaps I'm confused but SOCK_ERROR doesn't seem portable between Windows and Linux. ===== /* * These macros are needed to let error-handling code be portable between * Unix and Windows. (ugh) */ #ifdef WIN32 #define SOCK_ERRNO (WSAGetLastError()) #define SOCK_STRERROR winsock_strerror #define SOCK_ERRNO_SET(e) WSASetLastError(e) #else #define SOCK_ERRNO errno #define SOCK_STRERROR strerror_r #define SOCK_ERRNO_SET(e) (errno = (e)) #endif ===== AFAICS SOCK_ERRNO is intended to be used idiomatically as: > SOCK_STRERROR(SOCK_ERRNO, ...) The WSAE values from WSAGetLastError() and E values in errno are not compatible and needs translation by TranslateSocketError()? regards. -- Kyotaro Horiguchi NTT Open Source Software Center
Kyotaro Horiguchi <horikyota.ntt@gmail.com> writes: > At Thu, 08 Oct 2020 15:15:54 -0400, Tom Lane <tgl@sss.pgh.pa.us> wrote in >> Accordingly, I propose the attached patch (an expansion of >> Fujii-san's) that causes us to test for all five errnos anyplace >> we had been checking for ECONNRESET. > +1 for the direction. > In terms of connection errors, connect(2) and bind(2) can return > EADDRNOTAVAIL. bind(2) and listen(2) can return EADDRINUSE. FWIW I > recetnly saw pgbench getting EADDRNOTAVAIL. (They have mapping from > respective WSA errors in TranslateSocketError()) I do not think we have any issues with connection-time errors; or at least, if we do, the spots being touched here certainly shouldn't need to worry about them. These places are dealing with already-established connections. > I'd make errno_is_connection_loss use ALL_CONNECTION_LOSS_ERRNOS to > avoid duplication definition of the errno list. Hmm, might be worth doing, but I'm not sure. I am worried about whether compilers will generate equally good code that way. > - if (ret < 0 && WSAGetLastError() == WSAECONNRESET) > + if (ret < 0 && errno_is_connection_loss(WSAGetLastError())) > Don't we need to use TranslateSocketError() before? Oh, I missed that. But: > Perhaps I'm confused but SOCK_ERROR doesn't seem portable between > Windows and Linux. In that case, nothing would have worked on Windows for the last ten years, so you're mistaken. I think the actual explanation why this works, and why that test in parallel.c probably still works even with my mistake, is that win32_port.h makes sure that our values of ECONNRESET etc match WSAECONNRESET etc. IOW, we'd not actually need TranslateSocketError at all, except that it maps some not-similarly-named error codes for conditions that don't exist in Unix into ones that do. We probably do want TranslateSocketError in this parallel.c test so that anything that it maps to one of the errno_is_connection_loss codes will be recognized; but the basic cases would work anyway, unless I misunderstand this stuff entirely. regards, tom lane
At Thu, 08 Oct 2020 21:41:55 -0400, Tom Lane <tgl@sss.pgh.pa.us> wrote in > Kyotaro Horiguchi <horikyota.ntt@gmail.com> writes: > > At Thu, 08 Oct 2020 15:15:54 -0400, Tom Lane <tgl@sss.pgh.pa.us> wrote in > >> Accordingly, I propose the attached patch (an expansion of > >> Fujii-san's) that causes us to test for all five errnos anyplace > >> we had been checking for ECONNRESET. > > > +1 for the direction. > > > In terms of connection errors, connect(2) and bind(2) can return > > EADDRNOTAVAIL. bind(2) and listen(2) can return EADDRINUSE. FWIW I > > recetnly saw pgbench getting EADDRNOTAVAIL. (They have mapping from > > respective WSA errors in TranslateSocketError()) > > I do not think we have any issues with connection-time errors; > or at least, if we do, the spots being touched here certainly > shouldn't need to worry about them. These places are dealing > with already-established connections. errcode_for_socket_access() is called for connect, bind and lesten but I understand we don't consider the case since we don't have an actual issue related to the functions. > > I'd make errno_is_connection_loss use ALL_CONNECTION_LOSS_ERRNOS to > > avoid duplication definition of the errno list. > > Hmm, might be worth doing, but I'm not sure. I am worried about > whether compilers will generate equally good code that way. The two are placed side-by-side so either will do for me. > > - if (ret < 0 && WSAGetLastError() == WSAECONNRESET) > > + if (ret < 0 && errno_is_connection_loss(WSAGetLastError())) > > > Don't we need to use TranslateSocketError() before? > > Oh, I missed that. But: > > > Perhaps I'm confused but SOCK_ERROR doesn't seem portable between > > Windows and Linux. > > In that case, nothing would have worked on Windows for the last > ten years, so you're mistaken. I think the actual explanation > why this works, and why that test in parallel.c probably still > works even with my mistake, is that win32_port.h makes sure that > our values of ECONNRESET etc match WSAECONNRESET etc. Mmmmmmmmmm. Sure. > IOW, we'd not actually need TranslateSocketError at all, except > that it maps some not-similarly-named error codes for conditions > that don't exist in Unix into ones that do. We probably do want > TranslateSocketError in this parallel.c test so that anything that > it maps to one of the errno_is_connection_loss codes will be > recognized; but the basic cases would work anyway, unless I > misunderstand this stuff entirely. Yeah, that seems to work. regards. -- Kyotaro Horiguchi NTT Open Source Software Center
On 2020/10/09 4:15, Tom Lane wrote: > Over in the thread at [1], we've tentatively determined that the > reason buildfarm member lorikeet is currently failing is that its > network stack returns ECONNABORTED for (some?) connection failures, > whereas our code is only expecting ECONNRESET. Fujii Masao therefore > proposes that we treat ECONNABORTED the same as ECONNRESET. I think > this is a good idea, but after a bit of research I feel it does not > go far enough. I find these POSIX-standard errnos that also seem > likely candidates to be returned for a hard loss of connection: > > ECONNABORTED > EHOSTUNREACH > ENETDOWN > ENETUNREACH > > All of these have been in POSIX since SUSv2, so it seems unlikely > that we need to #ifdef any of them. (It is in any case pretty silly > that we have #ifdefs around a very small minority of our references > to ECONNRESET :-(.) > > There are some other related errnos, such as ECONNREFUSED, that > don't seem like they'd be returned for a failure of a pre-existing > connection, so we don't need to include them in such tests. > > Accordingly, I propose the attached patch (an expansion of > Fujii-san's) that causes us to test for all five errnos anyplace > we had been checking for ECONNRESET. +1 Thanks for expanding the patch! -#ifdef ECONNRESET - case ECONNRESET: + case ALL_CONNECTION_LOSS_ERRNOS: printfPQExpBuffer(&conn->errorMessage, libpq_gettext("server closed the connection unexpectedly\n" "\tThis probably means the server terminated abnormally\n" "\tbefore or while processing the request.\n")); This change causes the same error message to be reported for those five errno. That is, we cannot identify which errno is actually reported, from the error message. But I just wonder if it's more helpful for the troubleshooting if we, for example, append strerror() into the message so that we can easily identify errno. Thought? > I felt that this was getting to > the point where we'd better centralize the knowledge of what to check, > so the patch does that, via an inline function and an admittedly hacky > macro. I also upgraded some places such as strerror.c to have full > support for these symbols. > > All of the machines I have (even as far back as HPUX 10.20) also > define ENETRESET and EHOSTDOWN. However, those symbols do not appear > in SUSv2. ENETRESET was added at some later point, but EHOSTDOWN is > still not in POSIX. For the moment I've left these second-tier > symbols out of the patch, but there's a case for adding them. I'm > not sure whether there'd be any point in trying to #ifdef them. > > BTW, I took out the conditional defines of some of these errnos in > libpq's win32.h; AFAICS that's been dead code ever since we added > #define's for them to win32_port.h. Am I missing something? > > This seems like a bug fix to me, so I'm inclined to back-patch. +1 Regards, -- Fujii Masao Advanced Computing Technology Center Research and Development Headquarters NTT DATA CORPORATION
Fujii Masao <masao.fujii@oss.nttdata.com> writes: > On 2020/10/09 4:15, Tom Lane wrote: >> -#ifdef ECONNRESET >> - case ECONNRESET: >> + case ALL_CONNECTION_LOSS_ERRNOS: >> printfPQExpBuffer(&conn->errorMessage, >> libpq_gettext("server closed the connection unexpectedly\n" >> "\tThis probably means the server terminated abnormally\n" >> "\tbefore or while processing the request.\n")); > This change causes the same error message to be reported for those five errno. > That is, we cannot identify which errno is actually reported, from the error > message. But I just wonder if it's more helpful for the troubleshooting if we, > for example, append strerror() into the message so that we can easily > identify errno. Thought? Hmm, excellent point. While our code response to all these errors should be the same, you are right that that doesn't extend to emitting identical error texts. For EHOSTUNREACH/ENETDOWN/ENETUNREACH, we should say something like "connection to server lost", without claiming that the server crashed. It is less clear what to do with ECONNABORTED, but I'm inclined to put it in the network-problem bucket not the server-crash bucket, despite lorikeet's behavior. Thoughts? This also destroys the patch's idea that switch statements should be able to handle these all alike. If we group things as "ECONNRESET means server crash and the others are all network failures", then I'd be inclined to leave the ECONNRESET cases alone and just introduce new infrastructure to recognize all the network-failure errnos. regards, tom lane
I wrote: > Hmm, excellent point. While our code response to all these errors > should be the same, you are right that that doesn't extend to emitting > identical error texts. For EHOSTUNREACH/ENETDOWN/ENETUNREACH, we > should say something like "connection to server lost", without claiming > that the server crashed. It is less clear what to do with ECONNABORTED, > but I'm inclined to put it in the network-problem bucket not the > server-crash bucket, despite lorikeet's behavior. Thoughts? > This also destroys the patch's idea that switch statements should be > able to handle these all alike. If we group things as "ECONNRESET means > server crash and the others are all network failures", then I'd be > inclined to leave the ECONNRESET cases alone and just introduce > new infrastructure to recognize all the network-failure errnos. Actually, treating it that way seems like a good thing because it nets out as (nearly) no change to our error message behavior. The connection failure errnos fall through to the default case, which produces a perfectly reasonable report that includes strerror(). The only big thing we're changing is the set of errnos that errcode_for_socket_access will map to ERRCODE_CONNECTION_FAILURE, so this is spiritually closer to your original patch. Some other changes in the attached v2: * I incorporated Kyotaro-san's suggested improvements. * I went ahead and included ENETRESET and EHOSTDOWN, figuring that if they exist we definitely want to class them as network failures. We can worry about ifdef'ing them when and if we find a platform that hasn't got them. (I don't see any non-ugly way to make the ALL_NETWORK_FAILURE_ERRNOS macro vary for missing symbols, so I'd rather not deal with that unless it's proven necessary.) * I noticed that we were not terribly consistent about whether EPIPE is regarded as indicating a server failure like ECONNRESET does. So this patch also makes sure that EPIPE is treated like ECONNRESET everywhere. (Hence, pqsecure_raw_read's error reporting does change, since it'll now report EPIPE as server failure.) I lack a way to test this on Windows, but otherwise it feels like it's about ready. regards, tom lane diff --git a/src/backend/port/win32/socket.c b/src/backend/port/win32/socket.c index 6fbd1ed6fb..b4ef9fbd8f 100644 --- a/src/backend/port/win32/socket.c +++ b/src/backend/port/win32/socket.c @@ -121,12 +121,20 @@ TranslateSocketError(void) errno = EADDRNOTAVAIL; break; case WSAEHOSTUNREACH: - case WSAEHOSTDOWN: case WSAHOST_NOT_FOUND: + errno = EHOSTUNREACH; + break; + case WSAEHOSTDOWN: + errno = EHOSTDOWN; + break; case WSAENETDOWN: + errno = ENETDOWN; + break; case WSAENETUNREACH: + errno = ENETUNREACH; + break; case WSAENETRESET: - errno = EHOSTUNREACH; + errno = ENETRESET; break; case WSAENOTCONN: case WSAESHUTDOWN: diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index d0b368530e..637768d776 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -712,9 +712,8 @@ errcode_for_socket_access(void) { /* Loss of connection */ case EPIPE: -#ifdef ECONNRESET case ECONNRESET: -#endif + case ALL_NETWORK_FAILURE_ERRNOS: edata->sqlerrcode = ERRCODE_CONNECTION_FAILURE; break; diff --git a/src/bin/pg_dump/parallel.c b/src/bin/pg_dump/parallel.c index f0587f41e4..fa798431ea 100644 --- a/src/bin/pg_dump/parallel.c +++ b/src/bin/pg_dump/parallel.c @@ -1825,10 +1825,15 @@ piperead(int s, char *buf, int len) { int ret = recv(s, buf, len, 0); - if (ret < 0 && WSAGetLastError() == WSAECONNRESET) + if (ret < 0) { - /* EOF on the pipe! */ - ret = 0; + int werrno = TranslateSocketError(WSAGetLastError()); + + if (errno_is_connection_loss(werrno)) + { + /* EOF on the pipe! */ + ret = 0; + } } return ret; } diff --git a/src/include/port.h b/src/include/port.h index 84bf2c363f..766bb3dd6d 100644 --- a/src/include/port.h +++ b/src/include/port.h @@ -99,6 +99,36 @@ extern void pgfnames_cleanup(char **filenames); ) #endif +/* + * This macro provides a centralized list of all errnos that identify + * network-level failure of a previously-established TCP connection. This + * excludes ECONNRESET, which we treat as reporting a probable server crash. + * (EPIPE is also excluded, since it's likewise a server-crash indication, + * and not TCP either.) The macro is intended to be used in a switch + * statement, in the form "case ALL_NETWORK_FAILURE_ERRNOS:". + */ +#define ALL_NETWORK_FAILURE_ERRNOS \ + ECONNABORTED: \ + case EHOSTDOWN: \ + case EHOSTUNREACH: \ + case ENETDOWN: \ + case ENETRESET: \ + case ENETUNREACH + +/* Test for all connection-loss errnos, whether server or network failure */ +static inline bool +errno_is_connection_loss(int e) +{ + switch (e) + { + case EPIPE: + case ECONNRESET: + case ALL_NETWORK_FAILURE_ERRNOS: + return true; + } + return false; +} + /* Portable locale initialization (in exec.c) */ extern void set_pglocale_pgservice(const char *argv0, const char *app); diff --git a/src/include/port/win32_port.h b/src/include/port/win32_port.h index 8b6576b23d..e07baa555d 100644 --- a/src/include/port/win32_port.h +++ b/src/include/port/win32_port.h @@ -349,8 +349,16 @@ extern int pgwin32_safestat(const char *path, struct stat *buf); #define EADDRINUSE WSAEADDRINUSE #undef EADDRNOTAVAIL #define EADDRNOTAVAIL WSAEADDRNOTAVAIL +#undef EHOSTDOWN +#define EHOSTDOWN WSAEHOSTDOWN #undef EHOSTUNREACH #define EHOSTUNREACH WSAEHOSTUNREACH +#undef ENETDOWN +#define ENETDOWN WSAENETDOWN +#undef ENETRESET +#define ENETRESET WSAENETRESET +#undef ENETUNREACH +#define ENETUNREACH WSAENETUNREACH #undef ENOTCONN #define ENOTCONN WSAENOTCONN diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c index ff840b7730..9bcbda7eb7 100644 --- a/src/interfaces/libpq/fe-misc.c +++ b/src/interfaces/libpq/fe-misc.c @@ -679,11 +679,9 @@ retry3: if (SOCK_ERRNO == EWOULDBLOCK) return someread; #endif - /* We might get ECONNRESET here if using TCP and backend died */ -#ifdef ECONNRESET - if (SOCK_ERRNO == ECONNRESET) + /* We might get ECONNRESET etc here if connection failed */ + if (errno_is_connection_loss(SOCK_ERRNO)) goto definitelyFailed; -#endif /* pqsecure_read set the error message for us */ return -1; } @@ -769,11 +767,9 @@ retry4: if (SOCK_ERRNO == EWOULDBLOCK) return 0; #endif - /* We might get ECONNRESET here if using TCP and backend died */ -#ifdef ECONNRESET - if (SOCK_ERRNO == ECONNRESET) + /* We might get ECONNRESET etc here if connection failed */ + if (errno_is_connection_loss(SOCK_ERRNO)) goto definitelyFailed; -#endif /* pqsecure_read set the error message for us */ return -1; } diff --git a/src/interfaces/libpq/fe-secure.c b/src/interfaces/libpq/fe-secure.c index 3311fd7a5b..97c3805303 100644 --- a/src/interfaces/libpq/fe-secure.c +++ b/src/interfaces/libpq/fe-secure.c @@ -261,14 +261,13 @@ pqsecure_raw_read(PGconn *conn, void *ptr, size_t len) /* no error message, caller is expected to retry */ break; -#ifdef ECONNRESET + case EPIPE: case ECONNRESET: printfPQExpBuffer(&conn->errorMessage, libpq_gettext("server closed the connection unexpectedly\n" "\tThis probably means the server terminated abnormally\n" "\tbefore or while processing the request.\n")); break; -#endif default: printfPQExpBuffer(&conn->errorMessage, @@ -374,11 +373,9 @@ retry_masked: /* Set flag for EPIPE */ REMEMBER_EPIPE(spinfo, true); -#ifdef ECONNRESET /* FALL THRU */ case ECONNRESET: -#endif printfPQExpBuffer(&conn->errorMessage, libpq_gettext("server closed the connection unexpectedly\n" "\tThis probably means the server terminated abnormally\n" diff --git a/src/interfaces/libpq/win32.h b/src/interfaces/libpq/win32.h index c42d7abfe3..fcce1e0544 100644 --- a/src/interfaces/libpq/win32.h +++ b/src/interfaces/libpq/win32.h @@ -14,17 +14,6 @@ #define write(a,b,c) _write(a,b,c) #undef EAGAIN /* doesn't apply on sockets */ -#undef EINTR -#define EINTR WSAEINTR -#ifndef EWOULDBLOCK -#define EWOULDBLOCK WSAEWOULDBLOCK -#endif -#ifndef ECONNRESET -#define ECONNRESET WSAECONNRESET -#endif -#ifndef EINPROGRESS -#define EINPROGRESS WSAEINPROGRESS -#endif /* * support for handling Windows Socket errors diff --git a/src/port/strerror.c b/src/port/strerror.c index 375edb0f5a..43a9761d90 100644 --- a/src/port/strerror.c +++ b/src/port/strerror.c @@ -146,16 +146,12 @@ get_errno_symbol(int errnum) return "EBUSY"; case ECHILD: return "ECHILD"; -#ifdef ECONNABORTED case ECONNABORTED: return "ECONNABORTED"; -#endif case ECONNREFUSED: return "ECONNREFUSED"; -#ifdef ECONNRESET case ECONNRESET: return "ECONNRESET"; -#endif case EDEADLK: return "EDEADLK"; case EDOM: @@ -166,10 +162,10 @@ get_errno_symbol(int errnum) return "EFAULT"; case EFBIG: return "EFBIG"; -#ifdef EHOSTUNREACH + case EHOSTDOWN: + return "EHOSTDOWN"; case EHOSTUNREACH: return "EHOSTUNREACH"; -#endif case EIDRM: return "EIDRM"; case EINPROGRESS: @@ -198,6 +194,12 @@ get_errno_symbol(int errnum) return "EMSGSIZE"; case ENAMETOOLONG: return "ENAMETOOLONG"; + case ENETDOWN: + return "ENETDOWN"; + case ENETRESET: + return "ENETRESET"; + case ENETUNREACH: + return "ENETUNREACH"; case ENFILE: return "ENFILE"; case ENOBUFS: