Thread: Re: [BUGS] BUG #2195: log_min_messages crash server when in DEBUG3 to

Re: [BUGS] BUG #2195: log_min_messages crash server when in DEBUG3 to

From
Bruce Momjian
Date:
Tom Lane wrote:
> Bruce Momjian <pgman@candle.pha.pa.us> writes:
> > configure \
> >     --with-x \
> >     --with-tcl \
> >     --with-perl \
> >     --enable-pltcl-unknown \
> >     --with-tclconfig=/u/lib \
> >     --with-tkconfig=/u/lib \
> >     --with-includes="/usr/local/include/readline /usr/contrib/include" \
> >     --with-libraries="/usr/local/lib /usr/contrib/lib" \
> >     --enable-locale \
> >     --enable-multibyte \
> >     --with-openssl \
> >     --enable-cassert \
> >     --enable-thread-safety \
> >     --enable-nls
>
> Still can't reproduce it here.  What stack trace do you get from the
> crash?

Sorry it took me so long to get you a backtrace.  I have found the
problem.   The backtrace:

    #0  0x285119b5 in strlen () from /shlib/libc.so.2
    #1  0x8307bec in fmtstr (value=0x0, leftjust=0, minlen=0, maxwidth=0,
    pointflag=0, target=0x8046d9c) at snprintf.c:775
    #2  0x83079c8 in dopr (target=0x8046d9c, format=0x84e501e " child[%d]:
    starting with (", args=0x8046e40 "�]") at snprintf.c:710
    #3  0x830669a in pg_vsnprintf (str=0x84e5128 "", count=255,
    fmt=0x84e501c "%s child[%d]: starting with (", args=0x8046e3c "")
        at snprintf.c:157
    #4  0x81a4508 in appendStringInfoVA (str=0x8046e14, fmt=0x84e501c "%s
    child[%d]: starting with (", args=0x8046e3c "")
        at stringinfo.c:125
    #5  0x82dafba in errmsg_internal (fmt=0x83f2a21 "%s child[%d]: starting
    with (") at elog.c:673
    #6  0x820a4fb in BackendRun (port=0x8046ecc) at postmaster.c:2912
    #7  0x820ac89 in SubPostmasterMain (argc=3, argv=0x8047168) at
    postmaster.c:3349
    #8  0x81af04a in main (argc=3, argv=0x8047168) at main.c:262
    #9  0x808b513 in __start ()

and here is the line in postmaster.c:

    /*
     * Debug: print arguments being passed to backend
     */
    ereport(DEBUG3,
            (errmsg_internal("%s child[%d]: starting with (",
                             progname, (int) getpid())));

The problem is that for EXEC_BACKEND, which is the default for WIN32 and
what I used for testing, progname is NULL because we do not set it after
the fork/exec.  Most versions of printf report "(null)" for a null
argument to printf("%s"), but our port/snprintf.c version aborts, which
is probably better in terms of finding bugs.  If you set EXEC_BACKEND
and run log_min_messages='debug5' in postgresql.conf and start a
backend, you will see:

    DEBUG:  (null) child[8349]: starting with (

while on Win32 and my OS, you get a crash.  This problem exists back to
8.0 when we added EXEC_BACKEND.  My guess is few Win32 users set
log_min_messages to debug3 or lower in postgresql.conf.  If it is set
via SET, the backend has already started and things should be OK.
However, there are several error messages that print progname, so having
it NULL is bad.

The attached patch fixes the problem by setting progname early in
main(), like we do for all other applications.  We could try a more
limited patch in earlier branches, but I am unsure how we can be sure
that would be hit all use cases.  By putting it right at the top of
main(), we are sure to have that variable set.

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073
Index: src/backend/main/main.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/main/main.c,v
retrieving revision 1.99
diff -c -c -r1.99 main.c
*** src/backend/main/main.c    5 Jan 2006 03:01:34 -0000    1.99
--- src/backend/main/main.c    31 Jan 2006 18:54:18 -0000
***************
*** 45,51 ****
  #include "libpq/pqsignal.h"
  #endif

!

  int
  main(int argc, char *argv[])
--- 45,51 ----
  #include "libpq/pqsignal.h"
  #endif

! const char *progname;

  int
  main(int argc, char *argv[])
***************
*** 77,82 ****
--- 77,84 ----
      char       *env_locale;
  #endif

+     progname = get_progname(argv[0]);
+
      /*
       * On some platforms, unaligned memory accesses result in a kernel trap;
       * the default kernel behavior is to emulate the memory access, but this
***************
*** 246,252 ****
       * possibly first argument) we were called with. The lack of consistency
       * here is historical.
       */
!     if (strcmp(get_progname(argv[0]), "postmaster") == 0)
      {
          /* Called as "postmaster" */
          exit(PostmasterMain(argc, argv));
--- 248,254 ----
       * possibly first argument) we were called with. The lack of consistency
       * here is historical.
       */
!     if (strcmp(progname, "postmaster") == 0)
      {
          /* Called as "postmaster" */
          exit(PostmasterMain(argc, argv));
Index: src/backend/postmaster/postmaster.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/postmaster/postmaster.c,v
retrieving revision 1.479
diff -c -c -r1.479 postmaster.c
*** src/backend/postmaster/postmaster.c    6 Jan 2006 02:58:25 -0000    1.479
--- src/backend/postmaster/postmaster.c    31 Jan 2006 18:54:19 -0000
***************
*** 171,179 ****
   */
  int            ReservedBackends;

-
- static const char *progname = NULL;
-
  /* The socket(s) we're listening to. */
  #define MAXLISTEN    64
  static int    ListenSocket[MAXLISTEN];
--- 171,176 ----
***************
*** 383,391 ****
      char       *userDoption = NULL;
      int            i;

-     /* This will call exit() if strdup() fails. */
-     progname = get_progname(argv[0]);
-
      MyProcPid = PostmasterPid = getpid();

      IsPostmasterEnvironment = true;
--- 380,385 ----
Index: src/include/postmaster/postmaster.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/postmaster/postmaster.h,v
retrieving revision 1.11
diff -c -c -r1.11 postmaster.h
*** src/include/postmaster/postmaster.h    20 Aug 2005 23:26:33 -0000    1.11
--- src/include/postmaster/postmaster.h    31 Jan 2006 18:54:20 -0000
***************
*** 34,39 ****
--- 34,40 ----
  extern HANDLE PostmasterHandle;
  #endif

+ extern const char *progname;

  extern int    PostmasterMain(int argc, char *argv[]);
  extern void ClosePostmasterPorts(bool am_syslogger);

Re: [BUGS] BUG #2195: log_min_messages crash server when in DEBUG3 to

From
Tom Lane
Date:
Bruce Momjian <pgman@candle.pha.pa.us> writes:
> The problem is that for EXEC_BACKEND, which is the default for WIN32 and
> what I used for testing, progname is NULL because we do not set it after
> the fork/exec.  Most versions of printf report "(null)" for a null
> argument to printf("%s"), but our port/snprintf.c version aborts, which
> is probably better in terms of finding bugs.

Good catch.

> The attached patch fixes the problem by setting progname early in
> main(), like we do for all other applications.

It'd probably be best to strdup() argv[0], because of the likelihood
that the argv area will get clobbered at some point by ps_status.c.
It was OK to have a private static in postmaster.c, because the
postmaster doesn't ever call ps_status.c, but if we are going to expose
the variable in child processes it needs to be more globally valid.

I see that get_progname strdup's its argument in some cases but not
all --- so simplest fix is to make it always do so.

            regards, tom lane

Re: [BUGS] BUG #2195: log_min_messages crash server when in DEBUG3 to

From
Bruce Momjian
Date:
Tom Lane wrote:
> Bruce Momjian <pgman@candle.pha.pa.us> writes:
> > The problem is that for EXEC_BACKEND, which is the default for WIN32 and
> > what I used for testing, progname is NULL because we do not set it after
> > the fork/exec.  Most versions of printf report "(null)" for a null
> > argument to printf("%s"), but our port/snprintf.c version aborts, which
> > is probably better in terms of finding bugs.
>
> Good catch.
>
> > The attached patch fixes the problem by setting progname early in
> > main(), like we do for all other applications.
>
> It'd probably be best to strdup() argv[0], because of the likelihood
> that the argv area will get clobbered at some point by ps_status.c.
> It was OK to have a private static in postmaster.c, because the
> postmaster doesn't ever call ps_status.c, but if we are going to expose
> the variable in child processes it needs to be more globally valid.
>
> I see that get_progname strdup's its argument in some cases but not
> all --- so simplest fix is to make it always do so.

OK, new version that updates get_progname to always strdup().

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073
Index: src/backend/main/main.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/main/main.c,v
retrieving revision 1.99
diff -c -c -r1.99 main.c
*** src/backend/main/main.c    5 Jan 2006 03:01:34 -0000    1.99
--- src/backend/main/main.c    31 Jan 2006 21:03:01 -0000
***************
*** 45,51 ****
  #include "libpq/pqsignal.h"
  #endif

!

  int
  main(int argc, char *argv[])
--- 45,51 ----
  #include "libpq/pqsignal.h"
  #endif

! const char *progname;

  int
  main(int argc, char *argv[])
***************
*** 77,82 ****
--- 77,84 ----
      char       *env_locale;
  #endif

+     progname = get_progname(argv[0]);
+
      /*
       * On some platforms, unaligned memory accesses result in a kernel trap;
       * the default kernel behavior is to emulate the memory access, but this
***************
*** 246,252 ****
       * possibly first argument) we were called with. The lack of consistency
       * here is historical.
       */
!     if (strcmp(get_progname(argv[0]), "postmaster") == 0)
      {
          /* Called as "postmaster" */
          exit(PostmasterMain(argc, argv));
--- 248,254 ----
       * possibly first argument) we were called with. The lack of consistency
       * here is historical.
       */
!     if (strcmp(progname, "postmaster") == 0)
      {
          /* Called as "postmaster" */
          exit(PostmasterMain(argc, argv));
Index: src/backend/postmaster/postmaster.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/postmaster/postmaster.c,v
retrieving revision 1.479
diff -c -c -r1.479 postmaster.c
*** src/backend/postmaster/postmaster.c    6 Jan 2006 02:58:25 -0000    1.479
--- src/backend/postmaster/postmaster.c    31 Jan 2006 21:03:04 -0000
***************
*** 171,179 ****
   */
  int            ReservedBackends;

-
- static const char *progname = NULL;
-
  /* The socket(s) we're listening to. */
  #define MAXLISTEN    64
  static int    ListenSocket[MAXLISTEN];
--- 171,176 ----
***************
*** 383,391 ****
      char       *userDoption = NULL;
      int            i;

-     /* This will call exit() if strdup() fails. */
-     progname = get_progname(argv[0]);
-
      MyProcPid = PostmasterPid = getpid();

      IsPostmasterEnvironment = true;
--- 380,385 ----
Index: src/include/postmaster/postmaster.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/postmaster/postmaster.h,v
retrieving revision 1.11
diff -c -c -r1.11 postmaster.h
*** src/include/postmaster/postmaster.h    20 Aug 2005 23:26:33 -0000    1.11
--- src/include/postmaster/postmaster.h    31 Jan 2006 21:03:06 -0000
***************
*** 34,39 ****
--- 34,40 ----
  extern HANDLE PostmasterHandle;
  #endif

+ extern const char *progname;

  extern int    PostmasterMain(int argc, char *argv[]);
  extern void ClosePostmasterPorts(bool am_syslogger);
Index: src/port/path.c
===================================================================
RCS file: /cvsroot/pgsql/src/port/path.c,v
retrieving revision 1.63
diff -c -c -r1.63 path.c
*** src/port/path.c    23 Dec 2005 22:34:22 -0000    1.63
--- src/port/path.c    31 Jan 2006 21:03:06 -0000
***************
*** 388,394 ****
  const char *
  get_progname(const char *argv0)
  {
!     const char *nodir_name;

      nodir_name = last_dir_separator(argv0);
      if (nodir_name)
--- 388,395 ----
  const char *
  get_progname(const char *argv0)
  {
!     const char  *nodir_name;
!     const char    *progname;

      nodir_name = last_dir_separator(argv0);
      if (nodir_name)
***************
*** 396,420 ****
      else
          nodir_name = skip_drive(argv0);

! #if defined(__CYGWIN__) || defined(WIN32)
!     /* strip .exe suffix, regardless of case */
!     if (strlen(nodir_name) > sizeof(EXE) - 1 &&
!         pg_strcasecmp(nodir_name + strlen(nodir_name) - (sizeof(EXE) - 1), EXE) == 0)
      {
!         char       *progname;

!         progname = strdup(nodir_name);    /* leaks memory, but called only once */
!         if (progname == NULL)
!         {
!             fprintf(stderr, "%s: out of memory\n", nodir_name);
!             exit(1);            /* This could exit the postmaster */
!         }
          progname[strlen(progname) - (sizeof(EXE) - 1)] = '\0';
-         nodir_name = progname;
-     }
  #endif

!     return nodir_name;
  }


--- 397,421 ----
      else
          nodir_name = skip_drive(argv0);

!     /*
!      *    Make a copy in case argv[0] is modified by ps_status.
!      *    Leaks memory, but called only once.
!      */
!     progname = strdup(nodir_name);
!     if (progname == NULL)
      {
!         fprintf(stderr, "%s: out of memory\n", nodir_name);
!         exit(1);            /* This could exit the postmaster */
!     }

! #if defined(__CYGWIN__) || defined(WIN32)
!     /* strip ".exe" suffix, regardless of case */
!     if (strlen(progname) > sizeof(EXE) - 1 &&
!         pg_strcasecmp(progname + strlen(progname) - (sizeof(EXE) - 1), EXE) == 0)
          progname[strlen(progname) - (sizeof(EXE) - 1)] = '\0';
  #endif

!     return progname;
  }



Re: [BUGS] BUG #2195: log_min_messages crash server when in

From
Bruce Momjian
Date:
Applied to HEAD, 8.1.X and 8.0.X.

---------------------------------------------------------------------------

Bruce Momjian wrote:
> Tom Lane wrote:
> > Bruce Momjian <pgman@candle.pha.pa.us> writes:
> > > The problem is that for EXEC_BACKEND, which is the default for WIN32 and
> > > what I used for testing, progname is NULL because we do not set it after
> > > the fork/exec.  Most versions of printf report "(null)" for a null
> > > argument to printf("%s"), but our port/snprintf.c version aborts, which
> > > is probably better in terms of finding bugs.
> >
> > Good catch.
> >
> > > The attached patch fixes the problem by setting progname early in
> > > main(), like we do for all other applications.
> >
> > It'd probably be best to strdup() argv[0], because of the likelihood
> > that the argv area will get clobbered at some point by ps_status.c.
> > It was OK to have a private static in postmaster.c, because the
> > postmaster doesn't ever call ps_status.c, but if we are going to expose
> > the variable in child processes it needs to be more globally valid.
> >
> > I see that get_progname strdup's its argument in some cases but not
> > all --- so simplest fix is to make it always do so.
>
> OK, new version that updates get_progname to always strdup().
>
> --
>   Bruce Momjian                        |  http://candle.pha.pa.us
>   pgman@candle.pha.pa.us               |  (610) 359-1001
>   +  If your life is a hard drive,     |  13 Roberts Road
>   +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

> Index: src/backend/main/main.c
> ===================================================================
> RCS file: /cvsroot/pgsql/src/backend/main/main.c,v
> retrieving revision 1.99
> diff -c -c -r1.99 main.c
> *** src/backend/main/main.c    5 Jan 2006 03:01:34 -0000    1.99
> --- src/backend/main/main.c    31 Jan 2006 21:03:01 -0000
> ***************
> *** 45,51 ****
>   #include "libpq/pqsignal.h"
>   #endif
>
> !
>
>   int
>   main(int argc, char *argv[])
> --- 45,51 ----
>   #include "libpq/pqsignal.h"
>   #endif
>
> ! const char *progname;
>
>   int
>   main(int argc, char *argv[])
> ***************
> *** 77,82 ****
> --- 77,84 ----
>       char       *env_locale;
>   #endif
>
> +     progname = get_progname(argv[0]);
> +
>       /*
>        * On some platforms, unaligned memory accesses result in a kernel trap;
>        * the default kernel behavior is to emulate the memory access, but this
> ***************
> *** 246,252 ****
>        * possibly first argument) we were called with. The lack of consistency
>        * here is historical.
>        */
> !     if (strcmp(get_progname(argv[0]), "postmaster") == 0)
>       {
>           /* Called as "postmaster" */
>           exit(PostmasterMain(argc, argv));
> --- 248,254 ----
>        * possibly first argument) we were called with. The lack of consistency
>        * here is historical.
>        */
> !     if (strcmp(progname, "postmaster") == 0)
>       {
>           /* Called as "postmaster" */
>           exit(PostmasterMain(argc, argv));
> Index: src/backend/postmaster/postmaster.c
> ===================================================================
> RCS file: /cvsroot/pgsql/src/backend/postmaster/postmaster.c,v
> retrieving revision 1.479
> diff -c -c -r1.479 postmaster.c
> *** src/backend/postmaster/postmaster.c    6 Jan 2006 02:58:25 -0000    1.479
> --- src/backend/postmaster/postmaster.c    31 Jan 2006 21:03:04 -0000
> ***************
> *** 171,179 ****
>    */
>   int            ReservedBackends;
>
> -
> - static const char *progname = NULL;
> -
>   /* The socket(s) we're listening to. */
>   #define MAXLISTEN    64
>   static int    ListenSocket[MAXLISTEN];
> --- 171,176 ----
> ***************
> *** 383,391 ****
>       char       *userDoption = NULL;
>       int            i;
>
> -     /* This will call exit() if strdup() fails. */
> -     progname = get_progname(argv[0]);
> -
>       MyProcPid = PostmasterPid = getpid();
>
>       IsPostmasterEnvironment = true;
> --- 380,385 ----
> Index: src/include/postmaster/postmaster.h
> ===================================================================
> RCS file: /cvsroot/pgsql/src/include/postmaster/postmaster.h,v
> retrieving revision 1.11
> diff -c -c -r1.11 postmaster.h
> *** src/include/postmaster/postmaster.h    20 Aug 2005 23:26:33 -0000    1.11
> --- src/include/postmaster/postmaster.h    31 Jan 2006 21:03:06 -0000
> ***************
> *** 34,39 ****
> --- 34,40 ----
>   extern HANDLE PostmasterHandle;
>   #endif
>
> + extern const char *progname;
>
>   extern int    PostmasterMain(int argc, char *argv[]);
>   extern void ClosePostmasterPorts(bool am_syslogger);
> Index: src/port/path.c
> ===================================================================
> RCS file: /cvsroot/pgsql/src/port/path.c,v
> retrieving revision 1.63
> diff -c -c -r1.63 path.c
> *** src/port/path.c    23 Dec 2005 22:34:22 -0000    1.63
> --- src/port/path.c    31 Jan 2006 21:03:06 -0000
> ***************
> *** 388,394 ****
>   const char *
>   get_progname(const char *argv0)
>   {
> !     const char *nodir_name;
>
>       nodir_name = last_dir_separator(argv0);
>       if (nodir_name)
> --- 388,395 ----
>   const char *
>   get_progname(const char *argv0)
>   {
> !     const char  *nodir_name;
> !     const char    *progname;
>
>       nodir_name = last_dir_separator(argv0);
>       if (nodir_name)
> ***************
> *** 396,420 ****
>       else
>           nodir_name = skip_drive(argv0);
>
> ! #if defined(__CYGWIN__) || defined(WIN32)
> !     /* strip .exe suffix, regardless of case */
> !     if (strlen(nodir_name) > sizeof(EXE) - 1 &&
> !         pg_strcasecmp(nodir_name + strlen(nodir_name) - (sizeof(EXE) - 1), EXE) == 0)
>       {
> !         char       *progname;
>
> !         progname = strdup(nodir_name);    /* leaks memory, but called only once */
> !         if (progname == NULL)
> !         {
> !             fprintf(stderr, "%s: out of memory\n", nodir_name);
> !             exit(1);            /* This could exit the postmaster */
> !         }
>           progname[strlen(progname) - (sizeof(EXE) - 1)] = '\0';
> -         nodir_name = progname;
> -     }
>   #endif
>
> !     return nodir_name;
>   }
>
>
> --- 397,421 ----
>       else
>           nodir_name = skip_drive(argv0);
>
> !     /*
> !      *    Make a copy in case argv[0] is modified by ps_status.
> !      *    Leaks memory, but called only once.
> !      */
> !     progname = strdup(nodir_name);
> !     if (progname == NULL)
>       {
> !         fprintf(stderr, "%s: out of memory\n", nodir_name);
> !         exit(1);            /* This could exit the postmaster */
> !     }
>
> ! #if defined(__CYGWIN__) || defined(WIN32)
> !     /* strip ".exe" suffix, regardless of case */
> !     if (strlen(progname) > sizeof(EXE) - 1 &&
> !         pg_strcasecmp(progname + strlen(progname) - (sizeof(EXE) - 1), EXE) == 0)
>           progname[strlen(progname) - (sizeof(EXE) - 1)] = '\0';
>   #endif
>
> !     return progname;
>   }
>
>

>
> ---------------------------(end of broadcast)---------------------------
> TIP 3: Have you checked our extensive FAQ?
>
>                http://www.postgresql.org/docs/faq

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073