pg_ctl -w (wait) option on Windows - Mailing list pgsql-patches

From Dave Page
Subject pg_ctl -w (wait) option on Windows
Date
Msg-id 46816B7A.40101@postgresql.org
Whole thread Raw
Responses Re: pg_ctl -w (wait) option on Windows  (Magnus Hagander <magnus@hagander.net>)
List pgsql-patches
The attached patch implements the following fixes to pg_ctl on Windows:

- Fix the -w (wait) option to work in Windows service mode, per bug
#3382. This is required on Windows because pg_ctl reports running status
to the service control manager when actually still in recovery/startup,
causing any dependent services to start too early.

- Prevent the -w option being passed to the postmaster.

- Read the postmaster options file when starting as a Windows service.

As a side note, I noticed whilst testing this that the -w option relies
on libpq to provide the username with which to connect to the server to
test the connection. This will fail if that user is not the one that
initialized the cluster, or if the superuser name has been changed. I
hit the former case because I initdb'ed in my dev environment, and then
setup the server to run as a service under a dedicated user account. I
realise this won't affect normal users, and falls neatly into the 'don't
do that then' category, but I thought I'd mention it :-)

Regards, Dave
Index: pg_ctl.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/bin/pg_ctl/pg_ctl.c,v
retrieving revision 1.80
diff -c -r1.80 pg_ctl.c
*** pg_ctl.c    31 May 2007 15:13:04 -0000    1.80
--- pg_ctl.c    26 Jun 2007 19:24:06 -0000
***************
*** 126,136 ****
  static void WINAPI pgwin32_ServiceMain(DWORD, LPTSTR *);
  static void pgwin32_doRunAsService(void);
  static int    CreateRestrictedProcess(char *cmd, PROCESS_INFORMATION * processInfo);
  #endif
  static pgpid_t get_pgpid(void);
  static char **readfile(const char *path);
! static int    start_postmaster(void);
! static bool test_postmaster_connection(void);
  static bool postmaster_is_alive(pid_t pid);

  static char def_postopts_file[MAXPGPATH];
--- 126,147 ----
  static void WINAPI pgwin32_ServiceMain(DWORD, LPTSTR *);
  static void pgwin32_doRunAsService(void);
  static int    CreateRestrictedProcess(char *cmd, PROCESS_INFORMATION * processInfo);
+
+ static SERVICE_STATUS status;
+ static SERVICE_STATUS_HANDLE hStatus = (SERVICE_STATUS_HANDLE) 0;
+ static HANDLE shutdownHandles[2];
+ static pid_t postmasterPID = -1;
+
+ #define shutdownEvent      shutdownHandles[0]
+ #define postmasterProcess shutdownHandles[1]
  #endif
+
  static pgpid_t get_pgpid(void);
  static char **readfile(const char *path);
! static int start_postmaster(void);
! static void read_post_opts(void);
!
! static bool test_postmaster_connection(bool);
  static bool postmaster_is_alive(pid_t pid);

  static char def_postopts_file[MAXPGPATH];
***************
*** 391,405 ****



! /* Find the pgport and try a connection */
  static bool
! test_postmaster_connection(void)
  {
!     PGconn       *conn;
!     bool        success = false;
!     int            i;
!     char        portstr[32];
!     char       *p;

      *portstr = '\0';

--- 402,421 ----



! /*
!  * Find the pgport and try a connection
!  * Note that the checkpoint parameter enables a Windows service control
!  * manager checkpoint, it's got nothing to do with database checkpoints!!
!  */
  static bool
! test_postmaster_connection(bool do_checkpoint)
  {
!     PGconn    *conn;
!     bool    success = false;
!     int    i;
!     char    portstr[32];
!     char    *p;
!     char    connstr[128]; /* Should be way more than enough! */

      *portstr = '\0';

***************
*** 464,473 ****
      if (!*portstr)
          snprintf(portstr, sizeof(portstr), "%d", DEF_PGPORT);

      for (i = 0; i < wait_seconds; i++)
      {
!         if ((conn = PQsetdbLogin(NULL, portstr, NULL, NULL,
!                                  "postgres", NULL, NULL)) != NULL &&
              (PQstatus(conn) == CONNECTION_OK ||
               (strcmp(PQerrorMessage(conn),
                       PQnoPasswordSupplied) == 0)))
--- 480,491 ----
      if (!*portstr)
          snprintf(portstr, sizeof(portstr), "%d", DEF_PGPORT);

+     /* We need to set a connect timeout otherwise on Windows the SCM will probably timeout first */
+     snprintf(connstr, sizeof(connstr), "dbname=postgres port=%s connect_timeout=5", portstr);
+
      for (i = 0; i < wait_seconds; i++)
      {
!         if ((conn = PQconnectdb(connstr)) != NULL &&
              (PQstatus(conn) == CONNECTION_OK ||
               (strcmp(PQerrorMessage(conn),
                       PQnoPasswordSupplied) == 0)))
***************
*** 479,485 ****
          else
          {
              PQfinish(conn);
!             print_msg(".");
              pg_usleep(1000000); /* 1 sec */
          }
      }
--- 497,521 ----
          else
          {
              PQfinish(conn);
!
! #if defined(WIN32)
!             if (do_checkpoint)
!             {
!                 /*
!                  * Increment the wait hint by 6 secs (connection timeout + sleep)
!                  * We must do this to indicate to the SCM that our startup time is
!                  * changing, otherwise it'll usually send a stop signal after 20
!                  * seconds, despite incrementing the checkpoint counter.
!                  */
!                 status.dwWaitHint += 6000;
!                 status.dwCheckPoint++;
!                 SetServiceStatus(hStatus, (LPSERVICE_STATUS) & status);
!             }
!
!             else
! #endif
!                 print_msg(".");
!
              pg_usleep(1000000); /* 1 sec */
          }
      }
***************
*** 508,531 ****
  }
  #endif

-
-
  static void
! do_start(void)
  {
-     pgpid_t        pid;
-     pgpid_t        old_pid = 0;
      char       *optline = NULL;
-     int            exitcode;
-
-     if (ctl_command != RESTART_COMMAND)
-     {
-         old_pid = get_pgpid();
-         if (old_pid != 0)
-             write_stderr(_("%s: another server might be running; "
-                            "trying to start server anyway\n"),
-                          progname);
-     }

      if (post_opts == NULL)
      {
--- 544,553 ----
  }
  #endif

  static void
! read_post_opts(void)
  {
      char       *optline = NULL;

      if (post_opts == NULL)
      {
***************
*** 536,542 ****
                              postopts_file : def_postopts_file);
          if (optlines == NULL)
          {
!             if (ctl_command == START_COMMAND)
                  post_opts = "";
              else
              {
--- 558,564 ----
                              postopts_file : def_postopts_file);
          if (optlines == NULL)
          {
!             if (ctl_command == START_COMMAND || ctl_command == RUN_AS_SERVICE_COMMAND)
                  post_opts = "";
              else
              {
***************
*** 576,581 ****
--- 598,622 ----
                  post_opts = optline;
          }
      }
+ }
+
+ static void
+ do_start(void)
+ {
+     pgpid_t        pid;
+     pgpid_t        old_pid = 0;
+     int            exitcode;
+
+     if (ctl_command != RESTART_COMMAND)
+     {
+         old_pid = get_pgpid();
+         if (old_pid != 0)
+             write_stderr(_("%s: another server might be running; "
+                            "trying to start server anyway\n"),
+                          progname);
+     }
+
+     read_post_opts();

      /* No -D or -D already added during server start */
      if (ctl_command == RESTART_COMMAND || pgdata_opt == NULL)
***************
*** 642,648 ****
      {
          print_msg(_("waiting for server to start..."));

!         if (test_postmaster_connection() == false)
          {
              printf(_("could not start server\n"));
              exit(1);
--- 683,689 ----
      {
          print_msg(_("waiting for server to start..."));

!         if (test_postmaster_connection(false) == false)
          {
              printf(_("could not start server\n"));
              exit(1);
***************
*** 982,988 ****
          strcat(cmdLine, "\"");
      }

!     if (do_wait)
          strcat(cmdLine, " -w");

      if (post_opts)
--- 1023,1029 ----
          strcat(cmdLine, "\"");
      }

!     if (registration && do_wait)
          strcat(cmdLine, " -w");

      if (post_opts)
***************
*** 1065,1079 ****
      CloseServiceHandle(hSCM);
  }

-
- static SERVICE_STATUS status;
- static SERVICE_STATUS_HANDLE hStatus = (SERVICE_STATUS_HANDLE) 0;
- static HANDLE shutdownHandles[2];
- static pid_t postmasterPID = -1;
-
- #define shutdownEvent      shutdownHandles[0]
- #define postmasterProcess shutdownHandles[1]
-
  static void
  pgwin32_SetServiceStatus(DWORD currentState)
  {
--- 1106,1111 ----
***************
*** 1118,1123 ****
--- 1150,1156 ----
  {
      PROCESS_INFORMATION pi;
      DWORD        ret;
+     DWORD           check_point_start;

      /* Initialize variables */
      status.dwWin32ExitCode = S_OK;
***************
*** 1130,1142 ****

      memset(&pi, 0, sizeof(pi));

      /* Register the control request handler */
      if ((hStatus = RegisterServiceCtrlHandler(register_servicename, pgwin32_ServiceHandler)) ==
(SERVICE_STATUS_HANDLE)0) 
          return;

      if ((shutdownEvent = CreateEvent(NULL, true, false, NULL)) == NULL)
          return;
!
      /* Start the postmaster */
      pgwin32_SetServiceStatus(SERVICE_START_PENDING);
      if (!CreateRestrictedProcess(pgwin32_CommandLine(false), &pi))
--- 1163,1177 ----

      memset(&pi, 0, sizeof(pi));

+         read_post_opts();
+
      /* Register the control request handler */
      if ((hStatus = RegisterServiceCtrlHandler(register_servicename, pgwin32_ServiceHandler)) ==
(SERVICE_STATUS_HANDLE)0) 
          return;

      if ((shutdownEvent = CreateEvent(NULL, true, false, NULL)) == NULL)
          return;
!
      /* Start the postmaster */
      pgwin32_SetServiceStatus(SERVICE_START_PENDING);
      if (!CreateRestrictedProcess(pgwin32_CommandLine(false), &pi))
***************
*** 1147,1156 ****
--- 1182,1208 ----
      postmasterPID = pi.dwProcessId;
      postmasterProcess = pi.hProcess;
      CloseHandle(pi.hThread);
+
+     if (do_wait)
+     {
+         write_eventlog(EVENTLOG_INFORMATION_TYPE, _("Waiting for server startup...\n"));
+         if (test_postmaster_connection(true) == false)
+         {
+                     write_eventlog(EVENTLOG_INFORMATION_TYPE, _("Timed out waiting for server startup\n"));
+              pgwin32_SetServiceStatus(SERVICE_STOPPED);
+             return;
+         }
+         write_eventlog(EVENTLOG_INFORMATION_TYPE, _("Server started and accepting connections\n"));
+     }
+
+         /* Save the checkpoint value as it might have been incremented in test_postmaster_connection */
+         check_point_start = status.dwCheckPoint;
+
      pgwin32_SetServiceStatus(SERVICE_RUNNING);

      /* Wait for quit... */
      ret = WaitForMultipleObjects(2, shutdownHandles, FALSE, INFINITE);
+
      pgwin32_SetServiceStatus(SERVICE_STOP_PENDING);
      switch (ret)
      {

pgsql-patches by date:

Previous
From: Heikki Linnakangas
Date:
Subject: Re: Load Distributed Checkpoints, final patch
Next
From: Gregory Stark
Date:
Subject: Re: Load Distributed Checkpoints, final patch