Re: Postgres 7.4.7 hang in async_notify - Mailing list pgsql-bugs

From Tom Lane
Subject Re: Postgres 7.4.7 hang in async_notify
Date
Msg-id 28071.1117746362@sss.pgh.pa.us
Whole thread Raw
In response to Postgres 7.4.7 hang in async_notify  (pgsql-bugs@counterstorm.com)
Responses Re: Postgres 7.4.7 hang in async_notify
List pgsql-bugs
pgsql-bugs@counterstorm.com writes:
> We saw the problem with async_notify again (See thread with subject
> "Postgres 7.4.6 hang in async_notify" in first message to this list
> dated "Mon, 25 Apr 2005 15:42:35 -0400") in a production setting.

I've applied a patch that (I think) will fix this.  Attached is the
7.4-branch version of it.

            regards, tom lane

Index: src/backend/libpq/be-secure.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/libpq/be-secure.c,v
retrieving revision 1.43.2.1
diff -c -r1.43.2.1 be-secure.c
*** src/backend/libpq/be-secure.c    18 Dec 2003 22:49:34 -0000    1.43.2.1
--- src/backend/libpq/be-secure.c    2 Jun 2005 20:43:58 -0000
***************
*** 79,85 ****
  #include <sys/stat.h>
  #include <signal.h>
  #include <fcntl.h>
- #include <errno.h>
  #include <ctype.h>
  #include <sys/socket.h>
  #include <unistd.h>
--- 79,84 ----
***************
*** 97,102 ****
--- 96,103 ----

  #include "libpq/libpq.h"
  #include "miscadmin.h"
+ #include "tcop/tcopprot.h"
+

  #ifdef USE_SSL
  static DH  *load_dh_file(int keylength);
***************
*** 301,308 ****
--- 302,315 ----
      }
      else
  #endif
+     {
+         prepare_for_client_read();
+
          n = recv(port->sock, ptr, len, 0);

+         client_read_ended();
+     }
+
      return n;
  }

***************
*** 395,400 ****
--- 402,474 ----
  /*                          SSL specific code                        */
  /* ------------------------------------------------------------ */
  #ifdef USE_SSL
+
+ /*
+  * Private substitute BIO: this wraps the SSL library's standard socket BIO
+  * so that we can enable and disable interrupts just while calling recv().
+  * We cannot have interrupts occurring while the bulk of openssl runs,
+  * because it uses malloc() and possibly other non-reentrant libc facilities.
+  *
+  * As of openssl 0.9.7, we can use the reasonably clean method of interposing
+  * a wrapper around the standard socket BIO's sock_read() method.  This relies
+  * on the fact that sock_read() doesn't call anything non-reentrant, in fact
+  * not much of anything at all except recv().  If this ever changes we'd
+  * probably need to duplicate the code of sock_read() in order to push the
+  * interrupt enable/disable down yet another level.
+  */
+
+ static bool my_bio_initialized = false;
+ static BIO_METHOD my_bio_methods;
+ static int (*std_sock_read) (BIO *h, char *buf, int size);
+
+ static int
+ my_sock_read(BIO *h, char *buf, int size)
+ {
+     int        res;
+
+     prepare_for_client_read();
+
+     res = std_sock_read(h, buf, size);
+
+     client_read_ended();
+
+     return res;
+ }
+
+ static BIO_METHOD *
+ my_BIO_s_socket(void)
+ {
+     if (!my_bio_initialized)
+     {
+         memcpy(&my_bio_methods, BIO_s_socket(), sizeof(BIO_METHOD));
+         std_sock_read = my_bio_methods.bread;
+         my_bio_methods.bread = my_sock_read;
+         my_bio_initialized = true;
+     }
+     return &my_bio_methods;
+ }
+
+ /* This should exactly match openssl's SSL_set_fd except for using my BIO */
+ static int
+ my_SSL_set_fd(SSL *s, int fd)
+ {
+     int ret=0;
+     BIO *bio=NULL;
+
+     bio=BIO_new(my_BIO_s_socket());
+
+     if (bio == NULL)
+     {
+         SSLerr(SSL_F_SSL_SET_FD,ERR_R_BUF_LIB);
+         goto err;
+     }
+     BIO_set_fd(bio,fd,BIO_NOCLOSE);
+     SSL_set_bio(s,bio,bio);
+     ret=1;
+ err:
+     return(ret);
+ }
+
  /*
   *    Load precomputed DH parameters.
   *
***************
*** 718,724 ****
      Assert(!port->peer);

      if (!(port->ssl = SSL_new(SSL_context)) ||
!         !SSL_set_fd(port->ssl, port->sock) ||
          SSL_accept(port->ssl) <= 0)
      {
          ereport(COMMERROR,
--- 792,798 ----
      Assert(!port->peer);

      if (!(port->ssl = SSL_new(SSL_context)) ||
!         !my_SSL_set_fd(port->ssl, port->sock) ||
          SSL_accept(port->ssl) <= 0)
      {
          ereport(COMMERROR,
Index: src/backend/tcop/postgres.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/tcop/postgres.c,v
retrieving revision 1.375.2.2
diff -c -r1.375.2.2 postgres.c
*** src/backend/tcop/postgres.c    26 Sep 2004 00:26:53 -0000    1.375.2.2
--- src/backend/tcop/postgres.c    2 Jun 2005 20:43:58 -0000
***************
*** 109,114 ****
--- 109,121 ----
  static bool xact_started = false;

  /*
+  * Flag to indicate that we are doing the outer loop's read-from-client,
+  * as opposed to any random read from client that might happen within
+  * commands like COPY FROM STDIN.
+  */
+ static bool DoingCommandRead = false;
+
+ /*
   * Flags to implement skip-till-Sync-after-error behavior for messages of
   * the extended query protocol.
   */
***************
*** 400,405 ****
--- 407,456 ----
      return result;
  }

+ /*
+  * prepare_for_client_read -- set up to possibly block on client input
+  *
+  * This must be called immediately before any low-level read from the
+  * client connection.  It is necessary to do it at a sufficiently low level
+  * that there won't be any other operations except the read kernel call
+  * itself between this call and the subsequent client_read_ended() call.
+  * In particular there mustn't be use of malloc() or other potentially
+  * non-reentrant libc functions.  This restriction makes it safe for us
+  * to allow interrupt service routines to execute nontrivial code while
+  * we are waiting for input.
+  */
+ void
+ prepare_for_client_read(void)
+ {
+     if (DoingCommandRead)
+     {
+         /* Enable immediate processing of asynchronous signals */
+         EnableNotifyInterrupt();
+
+         /* Allow "die" interrupt to be processed while waiting */
+         ImmediateInterruptOK = true;
+
+         /* And don't forget to detect one that already arrived */
+         QueryCancelPending = false;
+         CHECK_FOR_INTERRUPTS();
+     }
+ }
+
+ /*
+  * client_read_ended -- get out of the client-input state
+  */
+ void
+ client_read_ended(void)
+ {
+     if (DoingCommandRead)
+     {
+         ImmediateInterruptOK = false;
+         QueryCancelPending = false;        /* forget any CANCEL signal */
+
+         DisableNotifyInterrupt();
+     }
+ }
+

  /*
   * Parse a query string and pass it through the rewriter.
***************
*** 2706,2711 ****
--- 2757,2763 ----
          CritSectionCount = 0;    /* should be unnecessary, but... */
          disable_sig_alarm(true);
          QueryCancelPending = false;        /* again in case timeout occurred */
+         DoingCommandRead = false;
          DisableNotifyInterrupt();
          debug_query_string = NULL;

***************
*** 2814,2833 ****
          }

          /*
!          * (2) deal with pending asynchronous NOTIFY from other backends,
!          * and enable async.c's signal handler to execute NOTIFY directly.
!          * Then set up other stuff needed before blocking for input.
           */
!         QueryCancelPending = false;        /* forget any earlier CANCEL
!                                          * signal */
!
!         EnableNotifyInterrupt();
!
!         /* Allow "die" interrupt to be processed while waiting */
!         ImmediateInterruptOK = true;
!         /* and don't forget to detect one that already arrived */
!         QueryCancelPending = false;
!         CHECK_FOR_INTERRUPTS();

          /*
           * (3) read a command (loop blocks here)
--- 2866,2878 ----
          }

          /*
!          * (2) Allow asynchronous signals to be executed immediately
!          * if they come in while we are waiting for client input.
!          * (This must be conditional since we don't want, say, reads on
!          * behalf of COPY FROM STDIN doing the same thing.)
           */
!         QueryCancelPending = false;        /* forget any earlier CANCEL signal */
!         DoingCommandRead = true;

          /*
           * (3) read a command (loop blocks here)
***************
*** 2837,2846 ****
          /*
           * (4) disable async signal conditions again.
           */
!         ImmediateInterruptOK = false;
!         QueryCancelPending = false;        /* forget any CANCEL signal */
!
!         DisableNotifyInterrupt();

          /*
           * (5) check for any other interesting events that happened while
--- 2882,2888 ----
          /*
           * (4) disable async signal conditions again.
           */
!         DoingCommandRead = false;

          /*
           * (5) check for any other interesting events that happened while
Index: src/include/tcop/tcopprot.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/tcop/tcopprot.h,v
retrieving revision 1.60
diff -c -r1.60 tcopprot.h
*** src/include/tcop/tcopprot.h    4 Aug 2003 02:40:15 -0000    1.60
--- src/include/tcop/tcopprot.h    2 Jun 2005 20:43:58 -0000
***************
*** 49,54 ****
--- 49,56 ----
  extern void die(SIGNAL_ARGS);
  extern void quickdie(SIGNAL_ARGS);
  extern void authdie(SIGNAL_ARGS);
+ extern void prepare_for_client_read(void);
+ extern void client_read_ended(void);
  extern int    PostgresMain(int argc, char *argv[], const char *username);
  extern void ResetUsage(void);
  extern void ShowUsage(const char *title);

pgsql-bugs by date:

Previous
From: Tom Lane
Date:
Subject: Re: Postgres 7.4.7 hang in async_notify
Next
From: pgsql-bugs@counterstorm.com
Date:
Subject: Re: Postgres 7.4.7 hang in async_notify