Thread: Expansion of our checks for connection-loss errors

Expansion of our checks for connection-loss errors

From
Tom Lane
Date:
Over in the thread at [1], we've tentatively determined that the
reason buildfarm member lorikeet is currently failing is that its
network stack returns ECONNABORTED for (some?) connection failures,
whereas our code is only expecting ECONNRESET.  Fujii Masao therefore
proposes that we treat ECONNABORTED the same as ECONNRESET.  I think
this is a good idea, but after a bit of research I feel it does not
go far enough.  I find these POSIX-standard errnos that also seem
likely candidates to be returned for a hard loss of connection:

    ECONNABORTED
    EHOSTUNREACH
    ENETDOWN
    ENETUNREACH

All of these have been in POSIX since SUSv2, so it seems unlikely
that we need to #ifdef any of them.  (It is in any case pretty silly
that we have #ifdefs around a very small minority of our references
to ECONNRESET :-(.)

There are some other related errnos, such as ECONNREFUSED, that
don't seem like they'd be returned for a failure of a pre-existing
connection, so we don't need to include them in such tests.

Accordingly, I propose the attached patch (an expansion of
Fujii-san's) that causes us to test for all five errnos anyplace
we had been checking for ECONNRESET.  I felt that this was getting to
the point where we'd better centralize the knowledge of what to check,
so the patch does that, via an inline function and an admittedly hacky
macro.  I also upgraded some places such as strerror.c to have full
support for these symbols.

All of the machines I have (even as far back as HPUX 10.20) also
define ENETRESET and EHOSTDOWN.  However, those symbols do not appear
in SUSv2.  ENETRESET was added at some later point, but EHOSTDOWN is
still not in POSIX.  For the moment I've left these second-tier
symbols out of the patch, but there's a case for adding them.  I'm
not sure whether there'd be any point in trying to #ifdef them.

BTW, I took out the conditional defines of some of these errnos in
libpq's win32.h; AFAICS that's been dead code ever since we added
#define's for them to win32_port.h.  Am I missing something?

This seems like a bug fix to me, so I'm inclined to back-patch.

            regards, tom lane

[1] https://www.postgresql.org/message-id/flat/E1kPc9v-0005L4-2l%40gemulon.postgresql.org

diff --git a/src/backend/port/win32/socket.c b/src/backend/port/win32/socket.c
index 6fbd1ed6fb..0f28c07ed1 100644
--- a/src/backend/port/win32/socket.c
+++ b/src/backend/port/win32/socket.c
@@ -123,10 +123,14 @@ TranslateSocketError(void)
         case WSAEHOSTUNREACH:
         case WSAEHOSTDOWN:
         case WSAHOST_NOT_FOUND:
+            errno = EHOSTUNREACH;
+            break;
         case WSAENETDOWN:
-        case WSAENETUNREACH:
         case WSAENETRESET:
-            errno = EHOSTUNREACH;
+            errno = ENETDOWN;
+            break;
+        case WSAENETUNREACH:
+            errno = ENETUNREACH;
             break;
         case WSAENOTCONN:
         case WSAESHUTDOWN:
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index d0b368530e..8937f223a8 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -712,9 +712,7 @@ errcode_for_socket_access(void)
     {
             /* Loss of connection */
         case EPIPE:
-#ifdef ECONNRESET
-        case ECONNRESET:
-#endif
+        case ALL_CONNECTION_LOSS_ERRNOS:
             edata->sqlerrcode = ERRCODE_CONNECTION_FAILURE;
             break;

diff --git a/src/bin/pg_dump/parallel.c b/src/bin/pg_dump/parallel.c
index f0587f41e4..b8ab939179 100644
--- a/src/bin/pg_dump/parallel.c
+++ b/src/bin/pg_dump/parallel.c
@@ -1825,7 +1825,7 @@ piperead(int s, char *buf, int len)
 {
     int            ret = recv(s, buf, len, 0);

-    if (ret < 0 && WSAGetLastError() == WSAECONNRESET)
+    if (ret < 0 && errno_is_connection_loss(WSAGetLastError()))
     {
         /* EOF on the pipe! */
         ret = 0;
diff --git a/src/include/port.h b/src/include/port.h
index 84bf2c363f..ffa21e782a 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -99,6 +99,30 @@ extern void pgfnames_cleanup(char **filenames);
 )
 #endif

+/* Test for all errnos that report loss of an established TCP connection */
+static inline bool
+errno_is_connection_loss(int e)
+{
+    if (e == ECONNRESET ||
+        e == ECONNABORTED ||
+        e == EHOSTUNREACH ||
+        e == ENETDOWN ||
+        e == ENETUNREACH)
+        return true;
+    return false;
+}
+
+/*
+ * To test for connection-loss errnos in a switch statement, write
+ * "case ALL_CONNECTION_LOSS_ERRNOS:".
+ */
+#define ALL_CONNECTION_LOSS_ERRNOS \
+    ECONNRESET: \
+    case ECONNABORTED: \
+    case EHOSTUNREACH: \
+    case ENETDOWN: \
+    case ENETUNREACH
+
 /* Portable locale initialization (in exec.c) */
 extern void set_pglocale_pgservice(const char *argv0, const char *app);

diff --git a/src/include/port/win32_port.h b/src/include/port/win32_port.h
index 8b6576b23d..7ce8c87e8d 100644
--- a/src/include/port/win32_port.h
+++ b/src/include/port/win32_port.h
@@ -351,6 +351,10 @@ extern int    pgwin32_safestat(const char *path, struct stat *buf);
 #define EADDRNOTAVAIL WSAEADDRNOTAVAIL
 #undef EHOSTUNREACH
 #define EHOSTUNREACH WSAEHOSTUNREACH
+#undef ENETDOWN
+#define ENETDOWN WSAENETDOWN
+#undef ENETUNREACH
+#define ENETUNREACH WSAENETUNREACH
 #undef ENOTCONN
 #define ENOTCONN WSAENOTCONN

diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c
index ff840b7730..ecc3b4ba0b 100644
--- a/src/interfaces/libpq/fe-misc.c
+++ b/src/interfaces/libpq/fe-misc.c
@@ -679,11 +679,9 @@ retry3:
         if (SOCK_ERRNO == EWOULDBLOCK)
             return someread;
 #endif
-        /* We might get ECONNRESET here if using TCP and backend died */
-#ifdef ECONNRESET
-        if (SOCK_ERRNO == ECONNRESET)
+        /* We might get ECONNRESET etc here if using TCP and backend died */
+        if (errno_is_connection_loss(SOCK_ERRNO))
             goto definitelyFailed;
-#endif
         /* pqsecure_read set the error message for us */
         return -1;
     }
@@ -769,11 +767,9 @@ retry4:
         if (SOCK_ERRNO == EWOULDBLOCK)
             return 0;
 #endif
-        /* We might get ECONNRESET here if using TCP and backend died */
-#ifdef ECONNRESET
-        if (SOCK_ERRNO == ECONNRESET)
+        /* We might get ECONNRESET etc here if using TCP and backend died */
+        if (errno_is_connection_loss(SOCK_ERRNO))
             goto definitelyFailed;
-#endif
         /* pqsecure_read set the error message for us */
         return -1;
     }
diff --git a/src/interfaces/libpq/fe-secure-openssl.c b/src/interfaces/libpq/fe-secure-openssl.c
index d609a38bbe..3c89c34f80 100644
--- a/src/interfaces/libpq/fe-secure-openssl.c
+++ b/src/interfaces/libpq/fe-secure-openssl.c
@@ -204,7 +204,7 @@ rloop:
             {
                 result_errno = SOCK_ERRNO;
                 if (result_errno == EPIPE ||
-                    result_errno == ECONNRESET)
+                    errno_is_connection_loss(result_errno))
                     printfPQExpBuffer(&conn->errorMessage,
                                       libpq_gettext("server closed the connection unexpectedly\n"
                                                     "\tThis probably means the server terminated abnormally\n"
@@ -311,7 +311,8 @@ pgtls_write(PGconn *conn, const void *ptr, size_t len)
             if (n < 0)
             {
                 result_errno = SOCK_ERRNO;
-                if (result_errno == EPIPE || result_errno == ECONNRESET)
+                if (result_errno == EPIPE ||
+                    errno_is_connection_loss(result_errno))
                     printfPQExpBuffer(&conn->errorMessage,
                                       libpq_gettext("server closed the connection unexpectedly\n"
                                                     "\tThis probably means the server terminated abnormally\n"
diff --git a/src/interfaces/libpq/fe-secure.c b/src/interfaces/libpq/fe-secure.c
index 3311fd7a5b..f13651b544 100644
--- a/src/interfaces/libpq/fe-secure.c
+++ b/src/interfaces/libpq/fe-secure.c
@@ -261,14 +261,12 @@ pqsecure_raw_read(PGconn *conn, void *ptr, size_t len)
                 /* no error message, caller is expected to retry */
                 break;

-#ifdef ECONNRESET
-            case ECONNRESET:
+            case ALL_CONNECTION_LOSS_ERRNOS:
                 printfPQExpBuffer(&conn->errorMessage,
                                   libpq_gettext("server closed the connection unexpectedly\n"
                                                 "\tThis probably means the server terminated abnormally\n"
                                                 "\tbefore or while processing the request.\n"));
                 break;
-#endif

             default:
                 printfPQExpBuffer(&conn->errorMessage,
@@ -374,11 +372,9 @@ retry_masked:
                 /* Set flag for EPIPE */
                 REMEMBER_EPIPE(spinfo, true);

-#ifdef ECONNRESET
                 /* FALL THRU */

-            case ECONNRESET:
-#endif
+            case ALL_CONNECTION_LOSS_ERRNOS:
                 printfPQExpBuffer(&conn->errorMessage,
                                   libpq_gettext("server closed the connection unexpectedly\n"
                                                 "\tThis probably means the server terminated abnormally\n"
diff --git a/src/interfaces/libpq/win32.h b/src/interfaces/libpq/win32.h
index c42d7abfe3..eba4c2a358 100644
--- a/src/interfaces/libpq/win32.h
+++ b/src/interfaces/libpq/win32.h
@@ -16,15 +16,6 @@
 #undef EAGAIN                    /* doesn't apply on sockets */
 #undef EINTR
 #define EINTR WSAEINTR
-#ifndef EWOULDBLOCK
-#define EWOULDBLOCK WSAEWOULDBLOCK
-#endif
-#ifndef ECONNRESET
-#define ECONNRESET WSAECONNRESET
-#endif
-#ifndef EINPROGRESS
-#define EINPROGRESS WSAEINPROGRESS
-#endif

 /*
  * support for handling Windows Socket errors
diff --git a/src/port/strerror.c b/src/port/strerror.c
index 375edb0f5a..c179c9ae60 100644
--- a/src/port/strerror.c
+++ b/src/port/strerror.c
@@ -146,16 +146,12 @@ get_errno_symbol(int errnum)
             return "EBUSY";
         case ECHILD:
             return "ECHILD";
-#ifdef ECONNABORTED
         case ECONNABORTED:
             return "ECONNABORTED";
-#endif
         case ECONNREFUSED:
             return "ECONNREFUSED";
-#ifdef ECONNRESET
         case ECONNRESET:
             return "ECONNRESET";
-#endif
         case EDEADLK:
             return "EDEADLK";
         case EDOM:
@@ -166,10 +162,8 @@ get_errno_symbol(int errnum)
             return "EFAULT";
         case EFBIG:
             return "EFBIG";
-#ifdef EHOSTUNREACH
         case EHOSTUNREACH:
             return "EHOSTUNREACH";
-#endif
         case EIDRM:
             return "EIDRM";
         case EINPROGRESS:
@@ -198,6 +192,10 @@ get_errno_symbol(int errnum)
             return "EMSGSIZE";
         case ENAMETOOLONG:
             return "ENAMETOOLONG";
+        case ENETDOWN:
+            return "ENETDOWN";
+        case ENETUNREACH:
+            return "ENETUNREACH";
         case ENFILE:
             return "ENFILE";
         case ENOBUFS:

Re: Expansion of our checks for connection-loss errors

From
Kyotaro Horiguchi
Date:
At Thu, 08 Oct 2020 15:15:54 -0400, Tom Lane <tgl@sss.pgh.pa.us> wrote in 
> Over in the thread at [1], we've tentatively determined that the
> reason buildfarm member lorikeet is currently failing is that its
> network stack returns ECONNABORTED for (some?) connection failures,
> whereas our code is only expecting ECONNRESET.  Fujii Masao therefore
> proposes that we treat ECONNABORTED the same as ECONNRESET.  I think
> this is a good idea, but after a bit of research I feel it does not
> go far enough.  I find these POSIX-standard errnos that also seem
> likely candidates to be returned for a hard loss of connection:
> 
>     ECONNABORTED
>     EHOSTUNREACH
>     ENETDOWN
>     ENETUNREACH
> 
> All of these have been in POSIX since SUSv2, so it seems unlikely
> that we need to #ifdef any of them.  (It is in any case pretty silly
> that we have #ifdefs around a very small minority of our references
> to ECONNRESET :-(.)
> 
> There are some other related errnos, such as ECONNREFUSED, that
> don't seem like they'd be returned for a failure of a pre-existing
> connection, so we don't need to include them in such tests.
> 
> Accordingly, I propose the attached patch (an expansion of
> Fujii-san's) that causes us to test for all five errnos anyplace
> we had been checking for ECONNRESET.  I felt that this was getting to
> the point where we'd better centralize the knowledge of what to check,
> so the patch does that, via an inline function and an admittedly hacky
> macro.  I also upgraded some places such as strerror.c to have full
> support for these symbols.
> 
> All of the machines I have (even as far back as HPUX 10.20) also
> define ENETRESET and EHOSTDOWN.  However, those symbols do not appear
> in SUSv2.  ENETRESET was added at some later point, but EHOSTDOWN is
> still not in POSIX.  For the moment I've left these second-tier
> symbols out of the patch, but there's a case for adding them.  I'm
> not sure whether there'd be any point in trying to #ifdef them.
> 
> BTW, I took out the conditional defines of some of these errnos in
> libpq's win32.h; AFAICS that's been dead code ever since we added
> #define's for them to win32_port.h.  Am I missing something?
> 
> This seems like a bug fix to me, so I'm inclined to back-patch.
> 
>             regards, tom lane
> 
> [1] https://www.postgresql.org/message-id/flat/E1kPc9v-0005L4-2l%40gemulon.postgresql.org

+1 for the direction.

In terms of connection errors, connect(2) and bind(2) can return
EADDRNOTAVAIL. bind(2) and listen(2) can return EADDRINUSE. FWIW I
recetnly saw pgbench getting EADDRNOTAVAIL. (They have mapping from
respective WSA errors in TranslateSocketError())

I'm not sure how we should treat EMFILE/ENFILE/ENOBUFS/ENOMEM from
accept(2). (select(2) can return ENOMEM.)

I'd make errno_is_connection_loss use ALL_CONNECTION_LOSS_ERRNOS to
avoid duplication definition of the errno list.

-    if (ret < 0 && WSAGetLastError() == WSAECONNRESET)
+    if (ret < 0 && errno_is_connection_loss(WSAGetLastError()))

Don't we need to use TranslateSocketError() before?

+        /* We might get ECONNRESET etc here if using TCP and backend died */
+        if (errno_is_connection_loss(SOCK_ERRNO))

Perhaps I'm confused but SOCK_ERROR doesn't seem portable between
Windows and Linux.

=====
/*
 * These macros are needed to let error-handling code be portable between
 * Unix and Windows.  (ugh)
 */
#ifdef WIN32
#define SOCK_ERRNO (WSAGetLastError())
#define SOCK_STRERROR winsock_strerror
#define SOCK_ERRNO_SET(e) WSASetLastError(e)
#else
#define SOCK_ERRNO errno
#define SOCK_STRERROR strerror_r
#define SOCK_ERRNO_SET(e) (errno = (e))
#endif
=====

AFAICS SOCK_ERRNO is intended to be used idiomatically as:

> SOCK_STRERROR(SOCK_ERRNO, ...)

The WSAE values from WSAGetLastError() and E values in errno are not
compatible and needs translation by TranslateSocketError()?

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center



Re: Expansion of our checks for connection-loss errors

From
Tom Lane
Date:
Kyotaro Horiguchi <horikyota.ntt@gmail.com> writes:
> At Thu, 08 Oct 2020 15:15:54 -0400, Tom Lane <tgl@sss.pgh.pa.us> wrote in 
>> Accordingly, I propose the attached patch (an expansion of
>> Fujii-san's) that causes us to test for all five errnos anyplace
>> we had been checking for ECONNRESET.

> +1 for the direction.

> In terms of connection errors, connect(2) and bind(2) can return
> EADDRNOTAVAIL. bind(2) and listen(2) can return EADDRINUSE. FWIW I
> recetnly saw pgbench getting EADDRNOTAVAIL. (They have mapping from
> respective WSA errors in TranslateSocketError())

I do not think we have any issues with connection-time errors;
or at least, if we do, the spots being touched here certainly
shouldn't need to worry about them.  These places are dealing
with already-established connections.

> I'd make errno_is_connection_loss use ALL_CONNECTION_LOSS_ERRNOS to
> avoid duplication definition of the errno list.

Hmm, might be worth doing, but I'm not sure.  I am worried about
whether compilers will generate equally good code that way.

> -    if (ret < 0 && WSAGetLastError() == WSAECONNRESET)
> +    if (ret < 0 && errno_is_connection_loss(WSAGetLastError()))

> Don't we need to use TranslateSocketError() before?

Oh, I missed that.  But:

> Perhaps I'm confused but SOCK_ERROR doesn't seem portable between
> Windows and Linux.

In that case, nothing would have worked on Windows for the last
ten years, so you're mistaken.  I think the actual explanation
why this works, and why that test in parallel.c probably still
works even with my mistake, is that win32_port.h makes sure that
our values of ECONNRESET etc match WSAECONNRESET etc.

IOW, we'd not actually need TranslateSocketError at all, except
that it maps some not-similarly-named error codes for conditions
that don't exist in Unix into ones that do.  We probably do want
TranslateSocketError in this parallel.c test so that anything that
it maps to one of the errno_is_connection_loss codes will be
recognized; but the basic cases would work anyway, unless I
misunderstand this stuff entirely.

            regards, tom lane



Re: Expansion of our checks for connection-loss errors

From
Kyotaro Horiguchi
Date:
At Thu, 08 Oct 2020 21:41:55 -0400, Tom Lane <tgl@sss.pgh.pa.us> wrote in 
> Kyotaro Horiguchi <horikyota.ntt@gmail.com> writes:
> > At Thu, 08 Oct 2020 15:15:54 -0400, Tom Lane <tgl@sss.pgh.pa.us> wrote in 
> >> Accordingly, I propose the attached patch (an expansion of
> >> Fujii-san's) that causes us to test for all five errnos anyplace
> >> we had been checking for ECONNRESET.
> 
> > +1 for the direction.
> 
> > In terms of connection errors, connect(2) and bind(2) can return
> > EADDRNOTAVAIL. bind(2) and listen(2) can return EADDRINUSE. FWIW I
> > recetnly saw pgbench getting EADDRNOTAVAIL. (They have mapping from
> > respective WSA errors in TranslateSocketError())
> 
> I do not think we have any issues with connection-time errors;
> or at least, if we do, the spots being touched here certainly
> shouldn't need to worry about them.  These places are dealing
> with already-established connections.

errcode_for_socket_access() is called for connect, bind and lesten but
I understand we don't consider the case since we don't have an actual
issue related to the functions.

> > I'd make errno_is_connection_loss use ALL_CONNECTION_LOSS_ERRNOS to
> > avoid duplication definition of the errno list.
> 
> Hmm, might be worth doing, but I'm not sure.  I am worried about
> whether compilers will generate equally good code that way.

The two are placed side-by-side so either will do for me.

> > -    if (ret < 0 && WSAGetLastError() == WSAECONNRESET)
> > +    if (ret < 0 && errno_is_connection_loss(WSAGetLastError()))
> 
> > Don't we need to use TranslateSocketError() before?
> 
> Oh, I missed that.  But:
> 
> > Perhaps I'm confused but SOCK_ERROR doesn't seem portable between
> > Windows and Linux.
> 
> In that case, nothing would have worked on Windows for the last
> ten years, so you're mistaken.  I think the actual explanation
> why this works, and why that test in parallel.c probably still
> works even with my mistake, is that win32_port.h makes sure that
> our values of ECONNRESET etc match WSAECONNRESET etc.

Mmmmmmmmmm. Sure.

> IOW, we'd not actually need TranslateSocketError at all, except
> that it maps some not-similarly-named error codes for conditions
> that don't exist in Unix into ones that do.  We probably do want
> TranslateSocketError in this parallel.c test so that anything that
> it maps to one of the errno_is_connection_loss codes will be
> recognized; but the basic cases would work anyway, unless I
> misunderstand this stuff entirely.

Yeah, that seems to work.

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center



Re: Expansion of our checks for connection-loss errors

From
Fujii Masao
Date:

On 2020/10/09 4:15, Tom Lane wrote:
> Over in the thread at [1], we've tentatively determined that the
> reason buildfarm member lorikeet is currently failing is that its
> network stack returns ECONNABORTED for (some?) connection failures,
> whereas our code is only expecting ECONNRESET.  Fujii Masao therefore
> proposes that we treat ECONNABORTED the same as ECONNRESET.  I think
> this is a good idea, but after a bit of research I feel it does not
> go far enough.  I find these POSIX-standard errnos that also seem
> likely candidates to be returned for a hard loss of connection:
> 
>     ECONNABORTED
>     EHOSTUNREACH
>     ENETDOWN
>     ENETUNREACH
> 
> All of these have been in POSIX since SUSv2, so it seems unlikely
> that we need to #ifdef any of them.  (It is in any case pretty silly
> that we have #ifdefs around a very small minority of our references
> to ECONNRESET :-(.)
> 
> There are some other related errnos, such as ECONNREFUSED, that
> don't seem like they'd be returned for a failure of a pre-existing
> connection, so we don't need to include them in such tests.
> 
> Accordingly, I propose the attached patch (an expansion of
> Fujii-san's) that causes us to test for all five errnos anyplace
> we had been checking for ECONNRESET.

+1

Thanks for expanding the patch!

-#ifdef ECONNRESET
-            case ECONNRESET:
+            case ALL_CONNECTION_LOSS_ERRNOS:
                  printfPQExpBuffer(&conn->errorMessage,
                                    libpq_gettext("server closed the connection unexpectedly\n"
                                                  "\tThis probably means the server terminated abnormally\n"
                                                  "\tbefore or while processing the request.\n"));

This change causes the same error message to be reported for those five errno.
That is, we cannot identify which errno is actually reported, from the error
message. But I just wonder if it's more helpful for the troubleshooting if we,
for example, append strerror() into the message so that we can easily
identify errno. Thought?


> I felt that this was getting to
> the point where we'd better centralize the knowledge of what to check,
> so the patch does that, via an inline function and an admittedly hacky
> macro.  I also upgraded some places such as strerror.c to have full
> support for these symbols.
> 
> All of the machines I have (even as far back as HPUX 10.20) also
> define ENETRESET and EHOSTDOWN.  However, those symbols do not appear
> in SUSv2.  ENETRESET was added at some later point, but EHOSTDOWN is
> still not in POSIX.  For the moment I've left these second-tier
> symbols out of the patch, but there's a case for adding them.  I'm
> not sure whether there'd be any point in trying to #ifdef them.
> 
> BTW, I took out the conditional defines of some of these errnos in
> libpq's win32.h; AFAICS that's been dead code ever since we added
> #define's for them to win32_port.h.  Am I missing something?
> 
> This seems like a bug fix to me, so I'm inclined to back-patch.

+1

Regards,

-- 
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION



Re: Expansion of our checks for connection-loss errors

From
Tom Lane
Date:
Fujii Masao <masao.fujii@oss.nttdata.com> writes:
> On 2020/10/09 4:15, Tom Lane wrote:
>> -#ifdef ECONNRESET
>> -            case ECONNRESET:
>> +            case ALL_CONNECTION_LOSS_ERRNOS:
>>                   printfPQExpBuffer(&conn->errorMessage,
>>                                     libpq_gettext("server closed the connection unexpectedly\n"
>>                                                   "\tThis probably means the server terminated abnormally\n"
>>                                                   "\tbefore or while processing the request.\n"));

> This change causes the same error message to be reported for those five errno.
> That is, we cannot identify which errno is actually reported, from the error
> message. But I just wonder if it's more helpful for the troubleshooting if we,
> for example, append strerror() into the message so that we can easily
> identify errno. Thought?

Hmm, excellent point.  While our code response to all these errors
should be the same, you are right that that doesn't extend to emitting
identical error texts.  For EHOSTUNREACH/ENETDOWN/ENETUNREACH, we
should say something like "connection to server lost", without claiming
that the server crashed.  It is less clear what to do with ECONNABORTED,
but I'm inclined to put it in the network-problem bucket not the
server-crash bucket, despite lorikeet's behavior.  Thoughts?

This also destroys the patch's idea that switch statements should be
able to handle these all alike.  If we group things as "ECONNRESET means
server crash and the others are all network failures", then I'd be
inclined to leave the ECONNRESET cases alone and just introduce
new infrastructure to recognize all the network-failure errnos.

            regards, tom lane



Re: Expansion of our checks for connection-loss errors

From
Tom Lane
Date:
I wrote:
> Hmm, excellent point.  While our code response to all these errors
> should be the same, you are right that that doesn't extend to emitting
> identical error texts.  For EHOSTUNREACH/ENETDOWN/ENETUNREACH, we
> should say something like "connection to server lost", without claiming
> that the server crashed.  It is less clear what to do with ECONNABORTED,
> but I'm inclined to put it in the network-problem bucket not the
> server-crash bucket, despite lorikeet's behavior.  Thoughts?

> This also destroys the patch's idea that switch statements should be
> able to handle these all alike.  If we group things as "ECONNRESET means
> server crash and the others are all network failures", then I'd be
> inclined to leave the ECONNRESET cases alone and just introduce
> new infrastructure to recognize all the network-failure errnos.

Actually, treating it that way seems like a good thing because it nets
out as (nearly) no change to our error message behavior.  The connection
failure errnos fall through to the default case, which produces a
perfectly reasonable report that includes strerror().  The only big thing
we're changing is the set of errnos that errcode_for_socket_access will
map to ERRCODE_CONNECTION_FAILURE, so this is spiritually closer to your
original patch.

Some other changes in the attached v2:

* I incorporated Kyotaro-san's suggested improvements.

* I went ahead and included ENETRESET and EHOSTDOWN, figuring that
if they exist we definitely want to class them as network failures.
We can worry about ifdef'ing them when and if we find a platform
that hasn't got them.  (I don't see any non-ugly way to make the
ALL_NETWORK_FAILURE_ERRNOS macro vary for missing symbols, so I'd
rather not deal with that unless it's proven necessary.)

* I noticed that we were not terribly consistent about whether
EPIPE is regarded as indicating a server failure like ECONNRESET
does.  So this patch also makes sure that EPIPE is treated like
ECONNRESET everywhere.  (Hence, pqsecure_raw_read's error reporting
does change, since it'll now report EPIPE as server failure.)

I lack a way to test this on Windows, but otherwise it feels
like it's about ready.

            regards, tom lane

diff --git a/src/backend/port/win32/socket.c b/src/backend/port/win32/socket.c
index 6fbd1ed6fb..b4ef9fbd8f 100644
--- a/src/backend/port/win32/socket.c
+++ b/src/backend/port/win32/socket.c
@@ -121,12 +121,20 @@ TranslateSocketError(void)
             errno = EADDRNOTAVAIL;
             break;
         case WSAEHOSTUNREACH:
-        case WSAEHOSTDOWN:
         case WSAHOST_NOT_FOUND:
+            errno = EHOSTUNREACH;
+            break;
+        case WSAEHOSTDOWN:
+            errno = EHOSTDOWN;
+            break;
         case WSAENETDOWN:
+            errno = ENETDOWN;
+            break;
         case WSAENETUNREACH:
+            errno = ENETUNREACH;
+            break;
         case WSAENETRESET:
-            errno = EHOSTUNREACH;
+            errno = ENETRESET;
             break;
         case WSAENOTCONN:
         case WSAESHUTDOWN:
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index d0b368530e..637768d776 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -712,9 +712,8 @@ errcode_for_socket_access(void)
     {
             /* Loss of connection */
         case EPIPE:
-#ifdef ECONNRESET
         case ECONNRESET:
-#endif
+        case ALL_NETWORK_FAILURE_ERRNOS:
             edata->sqlerrcode = ERRCODE_CONNECTION_FAILURE;
             break;

diff --git a/src/bin/pg_dump/parallel.c b/src/bin/pg_dump/parallel.c
index f0587f41e4..fa798431ea 100644
--- a/src/bin/pg_dump/parallel.c
+++ b/src/bin/pg_dump/parallel.c
@@ -1825,10 +1825,15 @@ piperead(int s, char *buf, int len)
 {
     int            ret = recv(s, buf, len, 0);

-    if (ret < 0 && WSAGetLastError() == WSAECONNRESET)
+    if (ret < 0)
     {
-        /* EOF on the pipe! */
-        ret = 0;
+        int            werrno = TranslateSocketError(WSAGetLastError());
+
+        if (errno_is_connection_loss(werrno))
+        {
+            /* EOF on the pipe! */
+            ret = 0;
+        }
     }
     return ret;
 }
diff --git a/src/include/port.h b/src/include/port.h
index 84bf2c363f..766bb3dd6d 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -99,6 +99,36 @@ extern void pgfnames_cleanup(char **filenames);
 )
 #endif

+/*
+ * This macro provides a centralized list of all errnos that identify
+ * network-level failure of a previously-established TCP connection.  This
+ * excludes ECONNRESET, which we treat as reporting a probable server crash.
+ * (EPIPE is also excluded, since it's likewise a server-crash indication,
+ * and not TCP either.)  The macro is intended to be used in a switch
+ * statement, in the form "case ALL_NETWORK_FAILURE_ERRNOS:".
+ */
+#define ALL_NETWORK_FAILURE_ERRNOS \
+    ECONNABORTED: \
+    case EHOSTDOWN: \
+    case EHOSTUNREACH: \
+    case ENETDOWN: \
+    case ENETRESET: \
+    case ENETUNREACH
+
+/* Test for all connection-loss errnos, whether server or network failure */
+static inline bool
+errno_is_connection_loss(int e)
+{
+    switch (e)
+    {
+        case EPIPE:
+        case ECONNRESET:
+        case ALL_NETWORK_FAILURE_ERRNOS:
+            return true;
+    }
+    return false;
+}
+
 /* Portable locale initialization (in exec.c) */
 extern void set_pglocale_pgservice(const char *argv0, const char *app);

diff --git a/src/include/port/win32_port.h b/src/include/port/win32_port.h
index 8b6576b23d..e07baa555d 100644
--- a/src/include/port/win32_port.h
+++ b/src/include/port/win32_port.h
@@ -349,8 +349,16 @@ extern int    pgwin32_safestat(const char *path, struct stat *buf);
 #define EADDRINUSE WSAEADDRINUSE
 #undef EADDRNOTAVAIL
 #define EADDRNOTAVAIL WSAEADDRNOTAVAIL
+#undef EHOSTDOWN
+#define EHOSTDOWN WSAEHOSTDOWN
 #undef EHOSTUNREACH
 #define EHOSTUNREACH WSAEHOSTUNREACH
+#undef ENETDOWN
+#define ENETDOWN WSAENETDOWN
+#undef ENETRESET
+#define ENETRESET WSAENETRESET
+#undef ENETUNREACH
+#define ENETUNREACH WSAENETUNREACH
 #undef ENOTCONN
 #define ENOTCONN WSAENOTCONN

diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c
index ff840b7730..9bcbda7eb7 100644
--- a/src/interfaces/libpq/fe-misc.c
+++ b/src/interfaces/libpq/fe-misc.c
@@ -679,11 +679,9 @@ retry3:
         if (SOCK_ERRNO == EWOULDBLOCK)
             return someread;
 #endif
-        /* We might get ECONNRESET here if using TCP and backend died */
-#ifdef ECONNRESET
-        if (SOCK_ERRNO == ECONNRESET)
+        /* We might get ECONNRESET etc here if connection failed */
+        if (errno_is_connection_loss(SOCK_ERRNO))
             goto definitelyFailed;
-#endif
         /* pqsecure_read set the error message for us */
         return -1;
     }
@@ -769,11 +767,9 @@ retry4:
         if (SOCK_ERRNO == EWOULDBLOCK)
             return 0;
 #endif
-        /* We might get ECONNRESET here if using TCP and backend died */
-#ifdef ECONNRESET
-        if (SOCK_ERRNO == ECONNRESET)
+        /* We might get ECONNRESET etc here if connection failed */
+        if (errno_is_connection_loss(SOCK_ERRNO))
             goto definitelyFailed;
-#endif
         /* pqsecure_read set the error message for us */
         return -1;
     }
diff --git a/src/interfaces/libpq/fe-secure.c b/src/interfaces/libpq/fe-secure.c
index 3311fd7a5b..97c3805303 100644
--- a/src/interfaces/libpq/fe-secure.c
+++ b/src/interfaces/libpq/fe-secure.c
@@ -261,14 +261,13 @@ pqsecure_raw_read(PGconn *conn, void *ptr, size_t len)
                 /* no error message, caller is expected to retry */
                 break;

-#ifdef ECONNRESET
+            case EPIPE:
             case ECONNRESET:
                 printfPQExpBuffer(&conn->errorMessage,
                                   libpq_gettext("server closed the connection unexpectedly\n"
                                                 "\tThis probably means the server terminated abnormally\n"
                                                 "\tbefore or while processing the request.\n"));
                 break;
-#endif

             default:
                 printfPQExpBuffer(&conn->errorMessage,
@@ -374,11 +373,9 @@ retry_masked:
                 /* Set flag for EPIPE */
                 REMEMBER_EPIPE(spinfo, true);

-#ifdef ECONNRESET
                 /* FALL THRU */

             case ECONNRESET:
-#endif
                 printfPQExpBuffer(&conn->errorMessage,
                                   libpq_gettext("server closed the connection unexpectedly\n"
                                                 "\tThis probably means the server terminated abnormally\n"
diff --git a/src/interfaces/libpq/win32.h b/src/interfaces/libpq/win32.h
index c42d7abfe3..fcce1e0544 100644
--- a/src/interfaces/libpq/win32.h
+++ b/src/interfaces/libpq/win32.h
@@ -14,17 +14,6 @@
 #define write(a,b,c) _write(a,b,c)

 #undef EAGAIN                    /* doesn't apply on sockets */
-#undef EINTR
-#define EINTR WSAEINTR
-#ifndef EWOULDBLOCK
-#define EWOULDBLOCK WSAEWOULDBLOCK
-#endif
-#ifndef ECONNRESET
-#define ECONNRESET WSAECONNRESET
-#endif
-#ifndef EINPROGRESS
-#define EINPROGRESS WSAEINPROGRESS
-#endif

 /*
  * support for handling Windows Socket errors
diff --git a/src/port/strerror.c b/src/port/strerror.c
index 375edb0f5a..43a9761d90 100644
--- a/src/port/strerror.c
+++ b/src/port/strerror.c
@@ -146,16 +146,12 @@ get_errno_symbol(int errnum)
             return "EBUSY";
         case ECHILD:
             return "ECHILD";
-#ifdef ECONNABORTED
         case ECONNABORTED:
             return "ECONNABORTED";
-#endif
         case ECONNREFUSED:
             return "ECONNREFUSED";
-#ifdef ECONNRESET
         case ECONNRESET:
             return "ECONNRESET";
-#endif
         case EDEADLK:
             return "EDEADLK";
         case EDOM:
@@ -166,10 +162,10 @@ get_errno_symbol(int errnum)
             return "EFAULT";
         case EFBIG:
             return "EFBIG";
-#ifdef EHOSTUNREACH
+        case EHOSTDOWN:
+            return "EHOSTDOWN";
         case EHOSTUNREACH:
             return "EHOSTUNREACH";
-#endif
         case EIDRM:
             return "EIDRM";
         case EINPROGRESS:
@@ -198,6 +194,12 @@ get_errno_symbol(int errnum)
             return "EMSGSIZE";
         case ENAMETOOLONG:
             return "ENAMETOOLONG";
+        case ENETDOWN:
+            return "ENETDOWN";
+        case ENETRESET:
+            return "ENETRESET";
+        case ENETUNREACH:
+            return "ENETUNREACH";
         case ENFILE:
             return "ENFILE";
         case ENOBUFS: