Re: A couple of random BF failures in kerberosCheck - Mailing list pgsql-hackers

From Tom Lane
Subject Re: A couple of random BF failures in kerberosCheck
Date
Msg-id 3397.1564872168@sss.pgh.pa.us
Whole thread Raw
In response to Re: A couple of random BF failures in kerberosCheck  (Tom Lane <tgl@sss.pgh.pa.us>)
Responses Re: A couple of random BF failures in kerberosCheck
List pgsql-hackers
I wrote:
> * kerberos/t/001_auth.pl just blithely assumes that it can pick
> any random port above 48K and that's guaranteed to be free.
> Maybe we should split out the code in get_new_node for finding
> a free TCP port, so we can call it here?

I've confirmed that the reason it's failing on my machine is exactly
that krb5kdc tries to bind to a socket that is still in TIME_WAIT state.
Also, it looks like the socket is typically one that was used by the
GSSAPI client side (no surprise, the test leaves a lot more of those
than the one server socket), so we'd have no record of it even if we
were somehow saving state from prior runs.

So I propose the attached patch, which seems to fix this for me.

The particular case I'm looking at (running these tests in a tight
loop) is of course not that interesting, but I argue that it's just
increasing the odds of failure enough that I can isolate the cause.
A buildfarm animal running both kerberos and ldap tests is almost
certainly at risk of such a failure with low probability.

(Still don't know what actually happened in those two buildfarm
failures, though.)

            regards, tom lane

diff --git a/src/test/kerberos/t/001_auth.pl b/src/test/kerberos/t/001_auth.pl
index 237b6fb..34845a7 100644
--- a/src/test/kerberos/t/001_auth.pl
+++ b/src/test/kerberos/t/001_auth.pl
@@ -69,7 +69,7 @@ my $krb5_conf   = "${TestLib::tmp_check}/krb5.conf";
 my $kdc_conf    = "${TestLib::tmp_check}/kdc.conf";
 my $krb5_log    = "${TestLib::tmp_check}/krb5libs.log";
 my $kdc_log     = "${TestLib::tmp_check}/krb5kdc.log";
-my $kdc_port    = int(rand() * 16384) + 49152;
+my $kdc_port    = get_free_port();
 my $kdc_datadir = "${TestLib::tmp_check}/krb5kdc";
 my $kdc_pidfile = "${TestLib::tmp_check}/krb5kdc.pid";
 my $keytab      = "${TestLib::tmp_check}/krb5.keytab";
diff --git a/src/test/ldap/t/001_auth.pl b/src/test/ldap/t/001_auth.pl
index 84a3300..47bc090 100644
--- a/src/test/ldap/t/001_auth.pl
+++ b/src/test/ldap/t/001_auth.pl
@@ -55,7 +55,7 @@ my $slapd_pidfile = "${TestLib::tmp_check}/slapd.pid";
 my $slapd_logfile = "${TestLib::tmp_check}/slapd.log";
 my $ldap_conf     = "${TestLib::tmp_check}/ldap.conf";
 my $ldap_server   = 'localhost';
-my $ldap_port     = int(rand() * 16384) + 49152;
+my $ldap_port     = get_free_port();
 my $ldaps_port    = $ldap_port + 1;
 my $ldap_url      = "ldap://$ldap_server:$ldap_port";
 my $ldaps_url     = "ldaps://$ldap_server:$ldaps_port";
diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm
index 6019f37..1481160 100644
--- a/src/test/perl/PostgresNode.pm
+++ b/src/test/perl/PostgresNode.pm
@@ -63,6 +63,9 @@ PostgresNode - class representing PostgreSQL server instance
   # Stop the server
   $node->stop('fast');
 
+  # Find a free, unprivileged TCP port to bind some other service to
+  my $port = PostgresNode->get_free_port();
+
 =head1 DESCRIPTION
 
 PostgresNode contains a set of routines able to work on a PostgreSQL node,
@@ -102,6 +105,7 @@ use Scalar::Util qw(blessed);
 
 our @EXPORT = qw(
   get_new_node
+  get_free_port
 );
 
 our ($use_tcp, $test_localhost, $test_pghost, $last_host_assigned,
@@ -1071,9 +1075,68 @@ sub get_new_node
     my $class = 'PostgresNode';
     $class = shift if scalar(@_) % 2 != 1;
     my ($name, %params) = @_;
-    my $port_is_forced = defined $params{port};
-    my $found          = $port_is_forced;
-    my $port = $port_is_forced ? $params{port} : $last_port_assigned;
+
+    # Select a port.
+    my $port;
+    if (defined $params{port})
+    {
+        $port = $params{port};
+    }
+    else
+    {
+        # When selecting a port, we look for an unassigned TCP port number,
+        # even if we intend to use only Unix-domain sockets.  This is clearly
+        # necessary on $use_tcp (Windows) configurations, and it seems like a
+        # good idea on Unixen as well.
+        $port = get_free_port();
+    }
+
+    # Select a host.
+    my $host = $test_pghost;
+    if ($params{own_host})
+    {
+        if ($use_tcp)
+        {
+            $last_host_assigned++;
+            $last_host_assigned > 254 and BAIL_OUT("too many own_host nodes");
+            $host = '127.0.0.' . $last_host_assigned;
+        }
+        else
+        {
+            $host = "$test_pghost/$name"; # Assume $name =~ /^[-_a-zA-Z0-9]+$/
+            mkdir $host;
+        }
+    }
+
+    # Lock port number found by creating a new node
+    my $node = $class->new($name, $host, $port);
+
+    # Add node to list of nodes
+    push(@all_nodes, $node);
+
+    return $node;
+}
+
+=pod
+
+=item PostgresNode->get_free_port()
+
+Locate an unprivileged (high) TCP port that's not currently bound to
+anything.  This is used by get_new_node, and is also exported for use
+by test cases that need to start other, non-Postgres servers.
+
+Ports assigned to existing PostgresNode objects are automatically
+excluded, even if those servers are not currently running.
+
+XXX A port available now may become unavailable by the time we start
+the desired service.
+
+=cut
+
+sub get_free_port
+{
+    my $found = 0;
+    my $port  = $last_port_assigned;
 
     while ($found == 0)
     {
@@ -1090,24 +1153,18 @@ sub get_new_node
             $found = 0 if ($node->port == $port);
         }
 
-        # Check to see if anything else is listening on this TCP port.  This
-        # is *necessary* on $use_tcp (Windows) configurations.  Seek a port
-        # available for all possible listen_addresses values, for own_host
-        # nodes and so the caller can harness this port for the widest range
-        # of purposes.  The 0.0.0.0 test achieves that for post-2006 Cygwin,
-        # which automatically sets SO_EXCLUSIVEADDRUSE.  The same holds for
-        # MSYS (a Cygwin fork).  Testing 0.0.0.0 is insufficient for Windows
-        # native Perl (https://stackoverflow.com/a/14388707), so we also test
+        # Check to see if anything else is listening on this TCP port.
+        # Seek a port available for all possible listen_addresses values,
+        # so callers can harness this port for the widest range of purposes.
+        # The 0.0.0.0 test achieves that for post-2006 Cygwin, which
+        # automatically sets SO_EXCLUSIVEADDRUSE.  The same holds for MSYS (a
+        # Cygwin fork).  Testing 0.0.0.0 is insufficient for Windows native
+        # Perl (https://stackoverflow.com/a/14388707), so we also test
         # individual addresses.
         #
-        # This seems like a good idea on Unixen as well, even though we don't
-        # ask the postmaster to open a TCP port on Unix.  On Non-Linux,
-        # non-Windows kernels, binding to 127.0.0.1/24 addresses other than
-        # 127.0.0.1 might fail with EADDRNOTAVAIL.  Binding to 0.0.0.0 is
-        # unnecessary on non-Windows systems.
-        #
-        # XXX A port available now may become unavailable by the time we start
-        # the postmaster.
+        # On non-Linux, non-Windows kernels, binding to 127.0.0/24 addresses
+        # other than 127.0.0.1 might fail with EADDRNOTAVAIL.  Binding to
+        # 0.0.0.0 is unnecessary on non-Windows systems.
         if ($found == 1)
         {
             foreach my $addr (qw(127.0.0.1),
@@ -1120,33 +1177,10 @@ sub get_new_node
 
     print "# Found port $port\n";
 
-    # Select a host.
-    my $host = $test_pghost;
-    if ($params{own_host})
-    {
-        if ($use_tcp)
-        {
-            $last_host_assigned++;
-            $last_host_assigned > 254 and BAIL_OUT("too many own_host nodes");
-            $host = '127.0.0.' . $last_host_assigned;
-        }
-        else
-        {
-            $host = "$test_pghost/$name"; # Assume $name =~ /^[-_a-zA-Z0-9]+$/
-            mkdir $host;
-        }
-    }
-
-    # Lock port number found by creating a new node
-    my $node = $class->new($name, $host, $port);
-
-    # Add node to list of nodes
-    push(@all_nodes, $node);
+    # Update port for next time
+    $last_port_assigned = $port;
 
-    # And update port for next time
-    $port_is_forced or $last_port_assigned = $port;
-
-    return $node;
+    return $port;
 }
 
 # Internal routine to check whether a host:port is available to bind

pgsql-hackers by date:

Previous
From: Julien Rouhaud
Date:
Subject: Re: Feature improvement: can we add queryId for pg_catalog.pg_stat_activityview?
Next
From: Andres Freund
Date:
Subject: Re: Redacting information from logs