Re: archive_keepalive_command - Mailing list pgsql-hackers

From Bruce Momjian
Subject Re: archive_keepalive_command
Date
Msg-id 20120827134822.GE11088@momjian.us
Whole thread Raw
In response to Re: archive_keepalive_command  (Simon Riggs <simon@2ndQuadrant.com>)
Responses Re: archive_keepalive_command
List pgsql-hackers
Where are we on this?

---------------------------------------------------------------------------

On Mon, Jan 16, 2012 at 01:52:35AM +0000, Simon Riggs wrote:
> On Fri, Dec 16, 2011 at 3:01 PM, Simon Riggs <simon@2ndquadrant.com> wrote:
> > archive_command and restore_command describe how to ship WAL files
> > to/from an archive.
> >
> > When there is nothing to ship, we delay sending WAL files. When no WAL
> > files, the standby has no information at all.
> >
> > To provide some form of keepalive on quiet systems the
> > archive_keepalive_command provides a generic hook to implement
> > keepalives. This is implemented as a separate command to avoid storing
> > keepalive messages in the archive, or at least allow overwrites using
> > a single filename like "keepalive".
> >
> > Examples
> > archive_keepalive_command = 'arch_cmd keepalive'   # sends a file
> > called "keepalive" to archive, overwrites allowed
> > archive_keepalive_command = 'arch_cmd %f.%t.keepalive  #sends a file
> > like 000000010000000AB00000000FE.20111216143517.keepalive
> >
> > If there is no WAL file to send, then we send a keepalive file
> > instead. Keepalive is a small file that contains same contents as a
> > streaming keepalive message (re: other patch on that).
> >
> > If no WAL file is available and we are attempting to restore in
> > standby_mode, then we execute restore_keepalive_command to see if a
> > keepalive file is available. Checks for a file in the specific
> > keepalive format and then uses that to update last received info from
> > master.
> >
> > e.g.
> > restore_keepalive_command = 'restore_cmd keepalive'   # gets a file
> > called "keepalive" to archive, overwrites allowed
> 
> Patch.
> 
> -- 
>  Simon Riggs                   http://www.2ndQuadrant.com/
>  PostgreSQL Development, 24x7 Support, Training & Services

> diff --git a/src/backend/access/transam/recovery.conf.sample b/src/backend/access/transam/recovery.conf.sample
> index 5acfa57..fab288c 100644
> --- a/src/backend/access/transam/recovery.conf.sample
> +++ b/src/backend/access/transam/recovery.conf.sample
> @@ -43,6 +43,13 @@
>  #
>  #restore_command = ''        # e.g. 'cp /mnt/server/archivedir/%f %p'
>  #
> +# restore_keepalive_command
> +#
> +# specifies an optional shell command to download keepalive files
> +#  e.g. archive_keepalive_command = 'cp -f %p $ARCHIVE/keepalive </dev/null'
> +#  e.g. restore_keepalive_command = 'cp $ARCHIVE/keepalive %p'
> +#
> +#restore_keepalive_command = ''
>  #
>  # archive_cleanup_command
>  #
> diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
> index ce659ec..2729141 100644
> --- a/src/backend/access/transam/xlog.c
> +++ b/src/backend/access/transam/xlog.c
> @@ -73,8 +73,10 @@ int            CheckPointSegments = 3;
>  int            wal_keep_segments = 0;
>  int            XLOGbuffers = -1;
>  int            XLogArchiveTimeout = 0;
> +int            XLogArchiveKeepaliveTimeout = 10;    /* XXX set to 60 before commit */
>  bool        XLogArchiveMode = false;
>  char       *XLogArchiveCommand = NULL;
> +char       *XLogArchiveKeepaliveCommand = NULL;
>  bool        EnableHotStandby = false;
>  bool        fullPageWrites = true;
>  bool        log_checkpoints = false;
> @@ -188,6 +190,7 @@ static bool restoredFromArchive = false;
>  
>  /* options taken from recovery.conf for archive recovery */
>  static char *recoveryRestoreCommand = NULL;
> +static char *recoveryRestoreKeepaliveCommand = NULL;
>  static char *recoveryEndCommand = NULL;
>  static char *archiveCleanupCommand = NULL;
>  static RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET;
> @@ -634,6 +637,7 @@ static int    emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
>  static void XLogFileClose(void);
>  static bool RestoreArchivedFile(char *path, const char *xlogfname,
>                      const char *recovername, off_t expectedSize);
> +static void RestoreKeepaliveFile(void);
>  static void ExecuteRecoveryCommand(char *command, char *commandName,
>                         bool failOnerror);
>  static void PreallocXlogFiles(XLogRecPtr endptr);
> @@ -2718,7 +2722,10 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli,
>                                                        "RECOVERYXLOG",
>                                                        XLogSegSize);
>              if (!restoredFromArchive)
> +            {
> +                RestoreKeepaliveFile();
>                  return -1;
> +            }
>              break;
>  
>          case XLOG_FROM_PG_XLOG:
> @@ -3179,6 +3186,192 @@ not_available:
>      return false;
>  }
>  
> +static void
> +RestoreKeepaliveFile(void)
> +{
> +    char        keepalivepath[MAXPGPATH];
> +    char        keepaliveRestoreCmd[MAXPGPATH];
> +    char       *dp;
> +    char       *endp;
> +    const char *sp;
> +    int            rc;
> +    bool        signaled;
> +    struct stat stat_buf;
> +
> +    /* In standby mode, restore_command might not be supplied */
> +    if (recoveryRestoreKeepaliveCommand == NULL)
> +        return;
> +
> +    snprintf(keepalivepath, MAXPGPATH, XLOGDIR "/archive_status/KEEPALIVE");
> +
> +    /*
> +     * Make sure there is no existing file in keepalivepath
> +     */
> +    if (stat(keepalivepath, &stat_buf) == 0)
> +    {
> +        if (unlink(keepalivepath) != 0)
> +            ereport(FATAL,
> +                    (errcode_for_file_access(),
> +                     errmsg("could not remove file \"%s\": %m",
> +                            keepalivepath)));
> +    }
> +
> +    /*
> +     * construct the command to be executed
> +     */
> +    dp = keepaliveRestoreCmd;
> +    endp = keepaliveRestoreCmd + MAXPGPATH - 1;
> +    *endp = '\0';
> +
> +    for (sp = recoveryRestoreKeepaliveCommand; *sp; sp++)
> +    {
> +        if (*sp == '%')
> +        {
> +            switch (sp[1])
> +            {
> +                case 'p':
> +                    /* %p: relative path of target file */
> +                    sp++;
> +                    StrNCpy(dp, keepalivepath, endp - dp);
> +                    make_native_path(dp);
> +                    dp += strlen(dp);
> +                    break;
> +                case '%':
> +                    /* convert %% to a single % */
> +                    sp++;
> +                    if (dp < endp)
> +                        *dp++ = *sp;
> +                    break;
> +                default:
> +                    /* otherwise treat the % as not special */
> +                    if (dp < endp)
> +                        *dp++ = *sp;
> +                    break;
> +            }
> +        }
> +        else
> +        {
> +            if (dp < endp)
> +                *dp++ = *sp;
> +        }
> +    }
> +    *dp = '\0';
> +
> +    ereport(DEBUG2,
> +            (errmsg_internal("executing restore keepalive command \"%s\"",
> +                             keepaliveRestoreCmd)));
> +
> +    /*
> +     * Check signals before restore command and reset afterwards.
> +     */
> +    PreRestoreCommand();
> +
> +    /*
> +     * Copy keepalive from archival storage to archive_status dir
> +     */
> +    rc = system(keepaliveRestoreCmd);
> +
> +    PostRestoreCommand();
> +
> +    if (rc == 0)
> +    {
> +        /*
> +         * command apparently succeeded, but let's check the file is there
> +         */
> +        if (stat(keepalivepath, &stat_buf) == 0)
> +        {
> +            char    kptime[15];
> +            char    kptimezone[4];
> +            char    *kdata;
> +            char    ch;
> +            int        r;
> +            FILE    *fd;
> +
> +            fd = AllocateFile(keepalivepath, "r");
> +            if (!fd)
> +            {
> +                ereport(ERROR,
> +                        (errcode_for_file_access(),
> +                         errmsg("could not read file \"%s\": %m",
> +                                keepalivepath)));
> +            }
> +            kdata = palloc(stat_buf.st_size + 1);
> +            r = fread(kdata, stat_buf.st_size, 1, fd);
> +            kdata[stat_buf.st_size] = '\0';
> +
> +            /*
> +             * Close and remove the keepalive file
> +             */
> +            if (r != 1 || ferror(fd) || FreeFile(fd))
> +                ereport(ERROR,
> +                            (errcode_for_file_access(),
> +                         errmsg("could not read file \"%s\": %m",
> +                                keepalivepath)));
> +
> +            /*
> +             * Parse the keepalive file
> +             */
> +            if (sscanf(kdata, "KEEPALIVE TIME: %14s%3s%c",
> +                        kptime, kptimezone, &ch) != 3 || ch != '\n')
> +                ereport(ERROR,
> +                        (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
> +                         errmsg("invalid data in file \"%s\"", keepalivepath)));
> +            kptime[14] = '\0';
> +            kptimezone[3] = '\0';
> +
> +            ereport(DEBUG2,
> +                    (errmsg("restored keepalive from archive %s%s", kptime, kptimezone)));
> +
> +            XLogReceiptSource = XLOG_FROM_ARCHIVE;
> +            XLogReceiptTime = GetCurrentTimestamp();
> +            SetCurrentChunkStartTime(XLogReceiptTime);
> +
> +            if (unlink(keepalivepath) != 0)
> +                ereport(ERROR,
> +                        (errcode_for_file_access(),
> +                         errmsg("could not remove file \"%s\": %m",
> +                                keepalivepath)));
> +            return;
> +        }
> +    }
> +
> +    /*
> +     * Remember, we rollforward UNTIL the restore fails so failure here is
> +     * just part of the process... that makes it difficult to determine
> +     * whether the restore failed because there isn't an archive to restore,
> +     * or because the administrator has specified the restore program
> +     * incorrectly.  We have to assume the former.
> +     *
> +     * However, if the failure was due to any sort of signal, it's best to
> +     * punt and abort recovery.  (If we "return false" here, upper levels will
> +     * assume that recovery is complete and start up the database!) It's
> +     * essential to abort on child SIGINT and SIGQUIT, because per spec
> +     * system() ignores SIGINT and SIGQUIT while waiting; if we see one of
> +     * those it's a good bet we should have gotten it too.
> +     *
> +     * On SIGTERM, assume we have received a fast shutdown request, and exit
> +     * cleanly. It's pure chance whether we receive the SIGTERM first, or the
> +     * child process. If we receive it first, the signal handler will call
> +     * proc_exit, otherwise we do it here. If we or the child process received
> +     * SIGTERM for any other reason than a fast shutdown request, postmaster
> +     * will perform an immediate shutdown when it sees us exiting
> +     * unexpectedly.
> +     *
> +     * Per the Single Unix Spec, shells report exit status > 128 when a called
> +     * command died on a signal.  Also, 126 and 127 are used to report
> +     * problems such as an unfindable command; treat those as fatal errors
> +     * too.
> +     */
> +    if (WIFSIGNALED(rc) && WTERMSIG(rc) == SIGTERM)
> +        proc_exit(1);
> +
> +    signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
> +
> +    ereport(signaled ? FATAL : DEBUG2,
> +        (errmsg("could not restore keepalive file from archive: return code %d",
> +                    rc)));
> +}
> +
>  /*
>   * Attempt to execute an external shell command during recovery.
>   *
> @@ -5304,6 +5497,13 @@ readRecoveryCommandFile(void)
>                      (errmsg_internal("restore_command = '%s'",
>                                       recoveryRestoreCommand)));
>          }
> +        else if (strcmp(item->name, "restore_keepalive_command") == 0)
> +        {
> +            recoveryRestoreKeepaliveCommand = pstrdup(item->value);
> +            ereport(DEBUG2,
> +                    (errmsg_internal("restore_keepalive_command = '%s'",
> +                                     recoveryRestoreKeepaliveCommand)));
> +        }
>          else if (strcmp(item->name, "recovery_end_command") == 0)
>          {
>              recoveryEndCommand = pstrdup(item->value);
> @@ -10102,3 +10302,52 @@ WALWriterLatch(void)
>  {
>      return &XLogCtl->WALWriterLatch;
>  }
> +
> +/*
> + * Write a keepalive and return the values of path and filename
> + */
> +void
> +XLogWriteKeepaliveFile(void)
> +{
> +    char        keepalivepath[MAXPGPATH];
> +    char        xlogfname[MAXFNAMELEN];
> +    XLogRecPtr    lastFlushRecPtr = GetFlushRecPtr();
> +    pg_time_t    stamp_time;
> +    char        strfbuf[128];
> +    uint32        log;
> +    uint32        seg;
> +    FILE       *fd;
> +
> +    XLByteToSeg(lastFlushRecPtr, log, seg);
> +    XLogFileName(xlogfname, ThisTimeLineID, log, seg);
> +
> +    /* Use the log timezone here, not the session timezone */
> +    stamp_time = (pg_time_t) time(NULL);
> +    pg_strftime(strfbuf, sizeof(strfbuf),
> +                "%Y%m%d%H%M%S%Z",
> +                pg_localtime(&stamp_time, log_timezone));
> +
> +    KeepaliveFilePath(keepalivepath, xlogfname, strfbuf);
> +
> +    elog(DEBUG4, "keepalive %s", keepalivepath);
> +
> +    fd = AllocateFile(keepalivepath, "w");
> +    if (fd == NULL)
> +    {
> +        ereport(LOG,
> +                (errcode_for_file_access(),
> +                 errmsg("could not create archive keepalive file \"%s\": %m",
> +                        keepalivepath)));
> +        return;
> +    }
> +    fprintf(fd, "KEEPALIVE TIME: %s\n", strfbuf);
> +    if (fflush(fd) || ferror(fd) || FreeFile(fd))
> +        ereport(ERROR,
> +                (errcode_for_file_access(),
> +                 errmsg("could not write file \"%s\": %m",
> +                        keepalivepath)));
> +
> +    /* Notify archiver that it's got something to do */
> +    if (IsUnderPostmaster)
> +        SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER);
> +}
> diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
> index 0b792d2..29882b1 100644
> --- a/src/backend/postmaster/checkpointer.c
> +++ b/src/backend/postmaster/checkpointer.c
> @@ -164,6 +164,7 @@ static double ckpt_cached_elapsed;
>  
>  static pg_time_t last_checkpoint_time;
>  static pg_time_t last_xlog_switch_time;
> +static pg_time_t last_xlog_keepalive_time;
>  
>  /* Prototypes for private functions */
>  
> @@ -241,7 +242,7 @@ CheckpointerMain(void)
>      /*
>       * Initialize so that first time-driven event happens at the correct time.
>       */
> -    last_checkpoint_time = last_xlog_switch_time = (pg_time_t) time(NULL);
> +    last_xlog_keepalive_time = last_checkpoint_time = last_xlog_switch_time = (pg_time_t) time(NULL);
>  
>      /*
>       * Create a resource owner to keep track of our resources (currently only
> @@ -546,6 +547,7 @@ CheckpointerMain(void)
>  
>  /*
>   * CheckArchiveTimeout -- check for archive_timeout and switch xlog files
> + *                            or write keepalive files
>   *
>   * This will switch to a new WAL file and force an archive file write
>   * if any activity is recorded in the current WAL file, including just
> @@ -556,47 +558,83 @@ CheckArchiveTimeout(void)
>  {
>      pg_time_t    now;
>      pg_time_t    last_time;
> +    bool        switched = false;
>  
> -    if (XLogArchiveTimeout <= 0 || RecoveryInProgress())
> +    if (RecoveryInProgress())
>          return;
>  
>      now = (pg_time_t) time(NULL);
>  
> +    if (XLogArchiveTimeout > 0)
> +    {
> +        /* First we do a quick check using possibly-stale local state. */
> +        if ((int) (now - last_xlog_switch_time) >= XLogArchiveTimeout)
> +        {
> +            /*
> +             * Update local state ... note that last_xlog_switch_time is the last time
> +             * a switch was performed *or requested*.
> +             */
> +            last_time = GetLastSegSwitchTime();
> +
> +            last_xlog_switch_time = Max(last_xlog_switch_time, last_time);
> +
> +            /* Now we can do the real check */
> +            if ((int) (now - last_xlog_switch_time) >= XLogArchiveTimeout)
> +            {
> +                XLogRecPtr    switchpoint;
> +
> +                /* OK, it's time to switch */
> +                switchpoint = RequestXLogSwitch();
> +
> +                /*
> +                 * If the returned pointer points exactly to a segment boundary,
> +                 * assume nothing happened.
> +                 */
> +                if ((switchpoint.xrecoff % XLogSegSize) != 0)
> +                    ereport(DEBUG1,
> +                        (errmsg("transaction log switch forced (archive_timeout=%d)",
> +                                XLogArchiveTimeout)));
> +
> +                /*
> +                 * Update state in any case, so we don't retry constantly when the
> +                 * system is idle.
> +                 */
> +                last_xlog_switch_time = now;
> +                switched = true;
> +            }
> +        }
> +    }
> +
> +    if (switched || !XLogArchiveKeepaliveCommandSet())
> +        return;
> +
>      /* First we do a quick check using possibly-stale local state. */
> -    if ((int) (now - last_xlog_switch_time) < XLogArchiveTimeout)
> +    if ((int) (now - last_xlog_switch_time) < XLogArchiveKeepaliveTimeout)
>          return;
>  
>      /*
> -     * Update local state ... note that last_xlog_switch_time is the last time
> -     * a switch was performed *or requested*.
> +     * Update local state if we didn't do it already.
>       */
> -    last_time = GetLastSegSwitchTime();
> -
> -    last_xlog_switch_time = Max(last_xlog_switch_time, last_time);
> +    if (XLogArchiveTimeout <= 0)
> +        last_time = GetLastSegSwitchTime();
>  
>      /* Now we can do the real check */
> -    if ((int) (now - last_xlog_switch_time) >= XLogArchiveTimeout)
> -    {
> -        XLogRecPtr    switchpoint;
> +    if ((int) (now - last_xlog_switch_time) < XLogArchiveKeepaliveTimeout)
> +        return;
>  
> -        /* OK, it's time to switch */
> -        switchpoint = RequestXLogSwitch();
> +    if ((int) (now - last_xlog_keepalive_time) < XLogArchiveKeepaliveTimeout)
> +        return;
>  
> -        /*
> -         * If the returned pointer points exactly to a segment boundary,
> -         * assume nothing happened.
> -         */
> -        if ((switchpoint.xrecoff % XLogSegSize) != 0)
> -            ereport(DEBUG1,
> -                (errmsg("transaction log switch forced (archive_timeout=%d)",
> -                        XLogArchiveTimeout)));
> +    /*
> +     * Write a keepalive file for archive_keepalive_command
> +     */
> +    XLogWriteKeepaliveFile();
>  
> -        /*
> -         * Update state in any case, so we don't retry constantly when the
> -         * system is idle.
> -         */
> -        last_xlog_switch_time = now;
> -    }
> +    /*
> +     * We don't log a message to say keepalive sent
> +     */
> +
> +    last_xlog_keepalive_time = now;
>  }
>  
>  /*
> diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
> index 37fc735..e8c19bb 100644
> --- a/src/backend/postmaster/pgarch.c
> +++ b/src/backend/postmaster/pgarch.c
> @@ -51,7 +51,8 @@
>   * Timer definitions.
>   * ----------
>   */
> -#define PGARCH_AUTOWAKE_INTERVAL 60        /* How often to force a poll of the
> +/* XXX change only for testing */
> +#define PGARCH_AUTOWAKE_INTERVAL 10        /* How often to force a poll of the
>                                           * archive status directory; in
>                                           * seconds. */
>  #define PGARCH_RESTART_INTERVAL 10        /* How often to attempt to restart a
> @@ -108,10 +109,14 @@ static void ArchSigTermHandler(SIGNAL_ARGS);
>  static void pgarch_waken(SIGNAL_ARGS);
>  static void pgarch_waken_stop(SIGNAL_ARGS);
>  static void pgarch_MainLoop(void);
> -static void pgarch_ArchiverCopyLoop(void);
> +static void pgarch_ArchiverCopyLoop(bool timedout);
>  static bool pgarch_archiveXlog(char *xlog);
> +static void pgarch_archiveKeepalive(void);
>  static bool pgarch_readyXlog(char *xlog);
>  static void pgarch_archiveDone(char *xlog);
> +static void constructArchiveCommand(char *archcmd, const char *archcmdtemplate,
> +                        const char *filepath, const char *filename);
> +static bool executeArchiveCommand(const char *archcmd, const char *description);
>  
>  
>  /* ------------------------------------------------------------
> @@ -351,6 +356,7 @@ pgarch_MainLoop(void)
>  {
>      pg_time_t    last_copy_time = 0;
>      bool        time_to_stop;
> +    bool        timedout = false;
>  
>      /*
>       * We run the copy loop immediately upon entry, in case there are
> @@ -401,7 +407,8 @@ pgarch_MainLoop(void)
>          if (wakened || time_to_stop)
>          {
>              wakened = false;
> -            pgarch_ArchiverCopyLoop();
> +            pgarch_ArchiverCopyLoop(timedout);
> +            timedout = false;
>              last_copy_time = time(NULL);
>          }
>  
> @@ -424,7 +431,10 @@ pgarch_MainLoop(void)
>                                 WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
>                                 timeout * 1000L);
>                  if (rc & WL_TIMEOUT)
> +                {
> +                    timedout = true;
>                      wakened = true;
> +                }
>              }
>              else
>                  wakened = true;
> @@ -444,9 +454,10 @@ pgarch_MainLoop(void)
>   * Archives all outstanding xlogs then returns
>   */
>  static void
> -pgarch_ArchiverCopyLoop(void)
> +pgarch_ArchiverCopyLoop(bool timedout)
>  {
>      char        xlog[MAX_XFN_CHARS + 1];
> +    bool        sentfile = false;
>  
>      /*
>       * loop through all xlogs with archive_status of .ready and archive
> @@ -486,6 +497,8 @@ pgarch_ArchiverCopyLoop(void)
>              {
>                  ereport(WARNING,
>                          (errmsg("archive_mode enabled, yet archive_command is not set")));
> +                if (!sentfile && timedout)
> +                    pgarch_archiveKeepalive();
>                  return;
>              }
>  
> @@ -493,6 +506,7 @@ pgarch_ArchiverCopyLoop(void)
>              {
>                  /* successful */
>                  pgarch_archiveDone(xlog);
> +                sentfile = true;
>                  break;            /* out of inner retry loop */
>              }
>              else
> @@ -508,151 +522,117 @@ pgarch_ArchiverCopyLoop(void)
>              }
>          }
>      }
> +
> +    if (!sentfile && timedout)
> +        pgarch_archiveKeepalive();
>  }
>  
>  /*
> - * pgarch_archiveXlog
> - *
> - * Invokes system(3) to copy one archive file to wherever it should go
> - *
> - * Returns true if successful
> + * pgarch_archiveXlog - executes archive_command for latest WAL file
>   */
>  static bool
>  pgarch_archiveXlog(char *xlog)
>  {
>      char        xlogarchcmd[MAXPGPATH];
> -    char        pathname[MAXPGPATH];
>      char        activitymsg[MAXFNAMELEN + 16];
> -    char       *dp;
> -    char       *endp;
> -    const char *sp;
> -    int            rc;
> +    char        xlogfilepath[MAXPGPATH];
> +
> +    snprintf(xlogfilepath, MAXPGPATH, XLOGDIR "/%s", xlog);
> +
> +    constructArchiveCommand(xlogarchcmd, XLogArchiveCommand,
> +                            xlogfilepath, xlog);
> +
> +    /* Report archive activity in PS display */
> +    snprintf(activitymsg, sizeof(activitymsg), "archiving %s", xlog);
> +    set_ps_display(activitymsg, false);
> +
> +    if (!executeArchiveCommand(xlogarchcmd, "archive command"))
> +        return false;
> +
> +    ereport(DEBUG1,
> +            (errmsg("archived transaction log file \"%s\"", xlog)));
> +
> +    snprintf(activitymsg, sizeof(activitymsg), "last was %s", xlog);
> +    set_ps_display(activitymsg, false);
> +
> +    return true;
> +}
> +
> +/*
> + * pgarch_archiveKeepalive - executes archive_keepalive_command
> + */
> +static void
> +pgarch_archiveKeepalive(void)
> +{
> +#define    LENGTH_DOT_KEEPALIVE    10
> +    char        keepalivearchcmd[MAXPGPATH];
> +    char        keepalivepath[MAXPGPATH];
> +    char        XLogArchiveStatusDir[MAXPGPATH];
> +    char        keepalive[MAX_XFN_CHARS + LENGTH_DOT_KEEPALIVE + 1];
> +    DIR           *rldir;
> +    struct dirent *rlde;
> +    bool        found = false;
>  
> -    snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog);
> +    if (!XLogArchiveKeepaliveCommandSet())
> +        return;
>  
>      /*
> -     * construct the command to be executed
> +     * open xlog status directory and read through list of keepalives,
> +     * looking for latest file. It is possible to optimise this code
> +     * though only a single file is expected on the vast majority
> +     * of calls, so....
>       */
> -    dp = xlogarchcmd;
> -    endp = xlogarchcmd + MAXPGPATH - 1;
> -    *endp = '\0';
>  
> -    for (sp = XLogArchiveCommand; *sp; sp++)
> +    snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGDIR "/archive_status");
> +    rldir = AllocateDir(XLogArchiveStatusDir);
> +    if (rldir == NULL)
> +        ereport(ERROR,
> +                (errcode_for_file_access(),
> +                 errmsg("could not open archive status directory \"%s\": %m",
> +                        XLogArchiveStatusDir)));
> +
> +    while ((rlde = ReadDir(rldir, XLogArchiveStatusDir)) != NULL)
>      {
> -        if (*sp == '%')
> +        int            basenamelen = (int) strlen(rlde->d_name) - LENGTH_DOT_KEEPALIVE;
> +
> +        if (strcmp(rlde->d_name + basenamelen, ".keepalive") == 0)
>          {
> -            switch (sp[1])
> +            if (!found)
>              {
> -                case 'p':
> -                    /* %p: relative path of source file */
> -                    sp++;
> -                    strlcpy(dp, pathname, endp - dp);
> -                    make_native_path(dp);
> -                    dp += strlen(dp);
> -                    break;
> -                case 'f':
> -                    /* %f: filename of source file */
> -                    sp++;
> -                    strlcpy(dp, xlog, endp - dp);
> -                    dp += strlen(dp);
> -                    break;
> -                case '%':
> -                    /* convert %% to a single % */
> -                    sp++;
> -                    if (dp < endp)
> -                        *dp++ = *sp;
> -                    break;
> -                default:
> -                    /* otherwise treat the % as not special */
> -                    if (dp < endp)
> -                        *dp++ = *sp;
> -                    break;
> +                strcpy(keepalive, rlde->d_name);
> +                found = true;
> +            }
> +            else
> +            {
> +                if (strcmp(rlde->d_name, keepalive) > 0)
> +                {
> +                    sprintf(keepalivepath, "%s/%s", XLogArchiveStatusDir, keepalive);
> +                    unlink(keepalivepath);
> +                    strcpy(keepalive, rlde->d_name);
> +                }
> +                else
> +                {
> +                    sprintf(keepalivepath, "%s/%s", XLogArchiveStatusDir, rlde->d_name);
> +                    unlink(keepalivepath);
> +                }
>              }
> -        }
> -        else
> -        {
> -            if (dp < endp)
> -                *dp++ = *sp;
>          }
>      }
> -    *dp = '\0';
> -
> -    ereport(DEBUG3,
> -            (errmsg_internal("executing archive command \"%s\"",
> -                             xlogarchcmd)));
> -
> -    /* Report archive activity in PS display */
> -    snprintf(activitymsg, sizeof(activitymsg), "archiving %s", xlog);
> -    set_ps_display(activitymsg, false);
> +    FreeDir(rldir);
>  
> -    rc = system(xlogarchcmd);
> -    if (rc != 0)
> -    {
> -        /*
> -         * If either the shell itself, or a called command, died on a signal,
> -         * abort the archiver.    We do this because system() ignores SIGINT and
> -         * SIGQUIT while waiting; so a signal is very likely something that
> -         * should have interrupted us too.    If we overreact it's no big deal,
> -         * the postmaster will just start the archiver again.
> -         *
> -         * Per the Single Unix Spec, shells report exit status > 128 when a
> -         * called command died on a signal.
> -         */
> -        int            lev = (WIFSIGNALED(rc) || WEXITSTATUS(rc) > 128) ? FATAL : LOG;
> +    if (!found)
> +        return;
>  
> -        if (WIFEXITED(rc))
> -        {
> -            ereport(lev,
> -                    (errmsg("archive command failed with exit code %d",
> -                            WEXITSTATUS(rc)),
> -                     errdetail("The failed archive command was: %s",
> -                               xlogarchcmd)));
> -        }
> -        else if (WIFSIGNALED(rc))
> -        {
> -#if defined(WIN32)
> -            ereport(lev,
> -                  (errmsg("archive command was terminated by exception 0x%X",
> -                          WTERMSIG(rc)),
> -                   errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
> -                   errdetail("The failed archive command was: %s",
> -                             xlogarchcmd)));
> -#elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST
> -            ereport(lev,
> -                    (errmsg("archive command was terminated by signal %d: %s",
> -                            WTERMSIG(rc),
> -              WTERMSIG(rc) < NSIG ? sys_siglist[WTERMSIG(rc)] : "(unknown)"),
> -                     errdetail("The failed archive command was: %s",
> -                               xlogarchcmd)));
> -#else
> -            ereport(lev,
> -                    (errmsg("archive command was terminated by signal %d",
> -                            WTERMSIG(rc)),
> -                     errdetail("The failed archive command was: %s",
> -                               xlogarchcmd)));
> -#endif
> -        }
> -        else
> -        {
> -            ereport(lev,
> -                (errmsg("archive command exited with unrecognized status %d",
> -                        rc),
> -                 errdetail("The failed archive command was: %s",
> -                           xlogarchcmd)));
> -        }
> +    sprintf(keepalivepath, "%s/%s", XLogArchiveStatusDir, keepalive);
> +    constructArchiveCommand(keepalivearchcmd, XLogArchiveKeepaliveCommand,
> +                            keepalivepath, keepalive);
> +    if (!executeArchiveCommand(keepalivearchcmd, "archive keepalive command"))
> +        return;
>  
> -        snprintf(activitymsg, sizeof(activitymsg), "failed on %s", xlog);
> -        set_ps_display(activitymsg, false);
> +    unlink(keepalivepath);
>  
> -        return false;
> -    }
>      ereport(DEBUG1,
> -            (errmsg("archived transaction log file \"%s\"", xlog)));
> -
> -    snprintf(activitymsg, sizeof(activitymsg), "last was %s", xlog);
> -    set_ps_display(activitymsg, false);
> -
> -    return true;
> +            (errmsg("archived keepalive file \"%s\"", keepalive)));
>  }
>  
>  /*
> @@ -753,3 +733,138 @@ pgarch_archiveDone(char *xlog)
>                   errmsg("could not rename file \"%s\" to \"%s\": %m",
>                          rlogready, rlogdone)));
>  }
> +
> +/*
> + * Constructs the executable archive command from a template for a given file
> + */
> +static void
> +constructArchiveCommand(char *archcmd, const char *archcmdtemplate,
> +                        const char *filepath, const char *filename)
> +{
> +    char       *dp;
> +    char       *endp;
> +    const char *sp;
> +
> +    /*
> +     * construct the command to be executed
> +     */
> +    dp = archcmd;
> +    endp = archcmd + MAXPGPATH - 1;
> +    *endp = '\0';
> +
> +    for (sp = archcmdtemplate; *sp; sp++)
> +    {
> +        if (*sp == '%')
> +        {
> +            switch (sp[1])
> +            {
> +                case 'p':
> +                    /* %p: relative path of source file */
> +                    sp++;
> +                    strlcpy(dp, filepath, endp - dp);
> +                    make_native_path(dp);
> +                    dp += strlen(dp);
> +                    break;
> +                case 'f':
> +                    /* %f: filename of source file */
> +                    sp++;
> +                    strlcpy(dp, filename, endp - dp);
> +                    dp += strlen(dp);
> +                    break;
> +                case '%':
> +                    /* convert %% to a single % */
> +                    sp++;
> +                    if (dp < endp)
> +                        *dp++ = *sp;
> +                    break;
> +                default:
> +                    /* otherwise treat the % as not special */
> +                    if (dp < endp)
> +                        *dp++ = *sp;
> +                    break;
> +            }
> +        }
> +        else
> +        {
> +            if (dp < endp)
> +                *dp++ = *sp;
> +        }
> +    }
> +    *dp = '\0';
> +}
> +
> +/*
> + * Invokes system(3) to execute the supplied archive command
> + *
> + * Returns true if successful
> + */
> +static bool
> +executeArchiveCommand(const char *archcmd, const char *description)
> +{
> +    int            rc;
> +
> +    ereport(DEBUG3,
> +            (errmsg_internal("executing %s \"%s\"",
> +                             description, archcmd)));
> +
> +    rc = system(archcmd);
> +    if (rc != 0)
> +    {
> +        /*
> +         * If either the shell itself, or a called command, died on a signal,
> +         * abort the archiver.    We do this because system() ignores SIGINT and
> +         * SIGQUIT while waiting; so a signal is very likely something that
> +         * should have interrupted us too.    If we overreact it's no big deal,
> +         * the postmaster will just start the archiver again.
> +         *
> +         * Per the Single Unix Spec, shells report exit status > 128 when a
> +         * called command died on a signal.
> +         */
> +        int            lev = (WIFSIGNALED(rc) || WEXITSTATUS(rc) > 128) ? FATAL : LOG;
> +
> +        if (WIFEXITED(rc))
> +        {
> +            ereport(lev,
> +                    (errmsg("%s failed with exit code %d",
> +                            description, WEXITSTATUS(rc)),
> +                     errdetail("The failed archive command was: %s",
> +                               archcmd)));
> +        }
> +        else if (WIFSIGNALED(rc))
> +        {
> +#if defined(WIN32)
> +            ereport(lev,
> +                  (errmsg("%s was terminated by exception 0x%X",
> +                          description, WTERMSIG(rc)),
> +                   errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
> +                   errdetail("The failed archive command was: %s",
> +                             archcmd)));
> +#elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST
> +            ereport(lev,
> +                    (errmsg("%s was terminated by signal %d: %s",
> +                            description, WTERMSIG(rc),
> +              WTERMSIG(rc) < NSIG ? sys_siglist[WTERMSIG(rc)] : "(unknown)"),
> +                     errdetail("The failed archive command was: %s",
> +                               archcmd)));
> +#else
> +            ereport(lev,
> +                    (errmsg("%s was terminated by signal %d",
> +                            description, WTERMSIG(rc)),
> +                     errdetail("The failed archive command was: %s",
> +                               archcmd)));
> +#endif
> +        }
> +        else
> +        {
> +            ereport(lev,
> +                (errmsg("%s exited with unrecognized status %d",
> +                        description, rc),
> +                 errdetail("The failed archive command was: %s",
> +                           archcmd)));
> +        }
> +
> +        return false;
> +    }
> +
> +    return true;
> +}
> diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
> index 5c910dd..16bd77f 100644
> --- a/src/backend/utils/misc/guc.c
> +++ b/src/backend/utils/misc/guc.c
> @@ -189,6 +189,7 @@ static bool check_timezone_abbreviations(char **newval, void **extra, GucSource
>  static void assign_timezone_abbreviations(const char *newval, void *extra);
>  static void pg_timezone_abbrev_initialize(void);
>  static const char *show_archive_command(void);
> +static const char *show_archive_keepalive_command(void);
>  static void assign_tcp_keepalives_idle(int newval, void *extra);
>  static void assign_tcp_keepalives_interval(int newval, void *extra);
>  static void assign_tcp_keepalives_count(int newval, void *extra);
> @@ -2531,6 +2532,16 @@ static struct config_string ConfigureNamesString[] =
>      },
>  
>      {
> +        {"archive_keepalive_command", PGC_SIGHUP, WAL_ARCHIVING,
> +            gettext_noop("Sets the shell command that will be called to send a keepalive file."),
> +            NULL
> +        },
> +        &XLogArchiveKeepaliveCommand,
> +        "",
> +        NULL, NULL, show_archive_keepalive_command
> +    },
> +
> +    {
>          {"client_encoding", PGC_USERSET, CLIENT_CONN_LOCALE,
>              gettext_noop("Sets the client's character set encoding."),
>              NULL,
> @@ -8490,6 +8501,15 @@ show_archive_command(void)
>          return "(disabled)";
>  }
>  
> +static const char *
> +show_archive_keepalive_command(void)
> +{
> +    if (XLogArchivingActive())
> +        return XLogArchiveKeepaliveCommand;
> +    else
> +        return "(disabled)";
> +}
> +
>  static void
>  assign_tcp_keepalives_idle(int newval, void *extra)
>  {
> diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
> index 315db46..085d5bb 100644
> --- a/src/backend/utils/misc/postgresql.conf.sample
> +++ b/src/backend/utils/misc/postgresql.conf.sample
> @@ -189,6 +189,10 @@
>                  # placeholders: %p = path of file to archive
>                  #               %f = file name only
>                  # e.g. 'test ! -f /mnt/server/archivedir/%f && cp %p /mnt/server/archivedir/%f'
> +#archive_keepalive_command = ''    # command to use to archive keepalive message files
> +                # placeholders: %p = path of keepalive file
> +                #               %f = keepalive file name only
> +                # e.g. 'cp %p /mnt/server/archivedir/%f'
>  #archive_timeout = 0        # force a logfile segment switch after this
>                  # number of seconds; 0 disables
>  
> diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
> index 1ddf4bf..63174c5 100644
> --- a/src/include/access/xlog.h
> +++ b/src/include/access/xlog.h
> @@ -191,6 +191,8 @@ extern int    XLOGbuffers;
>  extern int    XLogArchiveTimeout;
>  extern bool XLogArchiveMode;
>  extern char *XLogArchiveCommand;
> +extern char *XLogArchiveKeepaliveCommand;
> +extern int XLogArchiveKeepaliveTimeout;
>  extern bool EnableHotStandby;
>  extern bool log_checkpoints;
>  
> @@ -205,6 +207,7 @@ extern int    wal_level;
>  
>  #define XLogArchivingActive()    (XLogArchiveMode && wal_level >= WAL_LEVEL_ARCHIVE)
>  #define XLogArchiveCommandSet() (XLogArchiveCommand[0] != '\0')
> +#define XLogArchiveKeepaliveCommandSet() (XLogArchiveKeepaliveCommand[0] != '\0')
>  
>  /*
>   * Is WAL-logging necessary for archival or log-shipping, or can we skip
> diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
> index db6380f..51e6558 100644
> --- a/src/include/access/xlog_internal.h
> +++ b/src/include/access/xlog_internal.h
> @@ -233,6 +233,9 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader;
>  #define StatusFilePath(path, xlog, suffix)    \
>      snprintf(path, MAXPGPATH, XLOGDIR "/archive_status/%s%s", xlog, suffix)
>  
> +#define KeepaliveFilePath(path, kfname, timestr)    \
> +    snprintf(path, MAXPGPATH, XLOGDIR "/archive_status/%s.%s.keepalive", kfname, timestr)
> +
>  #define BackupHistoryFileName(fname, tli, log, seg, offset) \
>      snprintf(fname, MAXFNAMELEN, "%08X%08X%08X.%08X.backup", tli, log, seg, offset)
>  
> @@ -258,6 +261,11 @@ typedef struct RmgrData
>  extern const RmgrData RmgrTable[];
>  
>  /*
> + * Exported to support writing keepalives from archiver
> + */
> +extern void XLogWriteKeepaliveFile(void);
> +
> +/*
>   * Exported to support xlog switching from checkpointer
>   */
>  extern pg_time_t GetLastSegSwitchTime(void);

> 
> -- 
> Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
> To make changes to your subscription:
> http://www.postgresql.org/mailpref/pgsql-hackers


--  Bruce Momjian  <bruce@momjian.us>        http://momjian.us EnterpriseDB
http://enterprisedb.com
 + It's impossible for everything to be true. +



pgsql-hackers by date:

Previous
From: Bruce Momjian
Date:
Subject: Re: pgindent README correction
Next
From: Bruce Momjian
Date:
Subject: Re: Caching for stable expressions with constant arguments v6