Re: patch for new feature: Buffer Cache Hibernation - Mailing list pgsql-hackers

From Bruce Momjian
Subject Re: patch for new feature: Buffer Cache Hibernation
Date
Msg-id 201110140002.p9E02HB11710@momjian.us
Whole thread Raw
In response to Re: patch for new feature: Buffer Cache Hibernation  (Mitsuru IWASAKI <iwasaki@jp.FreeBSD.org>)
Responses Re: patch for new feature: Buffer Cache Hibernation
List pgsql-hackers
Should this be marked as TODO?

---------------------------------------------------------------------------

Mitsuru IWASAKI wrote:
> Hi,
> 
> > On 05/07/2011 03:32 AM, Mitsuru IWASAKI wrote:
> > > For 1, I've just finish my work.  The latest patch is available at:
> > > http://people.freebsd.org/~iwasaki/postgres/buffer-cache-hibernation-postgresql-20110507.patch
> > >    
> > 
> > Reminder here--we can't accept code based on it being published to a web 
> > page.  You'll need to e-mail it to the pgsql-hackers mailing list to be 
> > considered for the next PostgreSQL CommitFest, which is starting in a 
> > few weeks.  Code submitted to the mailing list is considered a release 
> > of it to the project under the PostgreSQL license, which we can't just 
> > assume for things when given only a URL to them.
> 
> Sorry about that, but I had enough time to revise my patches this week-end.
> I attached the patches in this mail, and will update CommitFest page soon.
> 
> > Also, you suggested you were out of time to work on this.  If that's the 
> > case, we'd like to know that so we don't keep cc'ing you about things in 
> > expectation of an answer.  Someone else may pick this up as a project to 
> > continue working on.  But it's going to need a fair amount of revision 
> > before it matches what people want here, and I'm not sure how much of 
> > what you've written is going to end up in any commit that may happen 
> > from this idea.
> 
> It seems that I don't have enough time to complete this work.
> You don't need to keep cc'ing me, and I'm very happy if postgres to be
> the first DBMS which support buffer cache hibernation feature.
> 
> Thanks!
> 
> 
> diff --git src/backend/access/transam/xlog.c src/backend/access/transam/xlog.c
> index b0e4c41..7a3a207 100644
> --- src/backend/access/transam/xlog.c
> +++ src/backend/access/transam/xlog.c
> @@ -4834,6 +4834,19 @@ ReadControlFile(void)
>  #endif
>  }
>  
> +bool
> +GetControlFile(ControlFileData *controlFile)
> +{
> +    if (ControlFile == NULL)
> +    {
> +        return false;
> +    }
> +
> +    memcpy(controlFile, ControlFile, sizeof(ControlFileData));
> +
> +    return true;
> +}
> +
>  void
>  UpdateControlFile(void)
>  {
> diff --git src/backend/bootstrap/bootstrap.c src/backend/bootstrap/bootstrap.c
> index fc093cc..7ecf6bb 100644
> --- src/backend/bootstrap/bootstrap.c
> +++ src/backend/bootstrap/bootstrap.c
> @@ -360,6 +360,15 @@ AuxiliaryProcessMain(int argc, char *argv[])
>      BaseInit();
>  
>      /*
> +     * Only StartupProcess can call ResumeBufferCacheHibernation() after
> +     * InitFileAccess() and smgrinit().
> +     */
> +    if (auxType == StartupProcess && BufferCacheHibernationLevel > 0)
> +    {
> +        ResumeBufferCacheHibernation();
> +    }
> +
> +    /*
>       * When we are an auxiliary process, we aren't going to do the full
>       * InitPostgres pushups, but there are a couple of things that need to get
>       * lit up even in an auxiliary process.
> diff --git src/backend/storage/buffer/buf_init.c src/backend/storage/buffer/buf_init.c
> index dadb49d..52eb51a 100644
> --- src/backend/storage/buffer/buf_init.c
> +++ src/backend/storage/buffer/buf_init.c
> @@ -127,6 +127,14 @@ InitBufferPool(void)
>  
>      /* Init other shared buffer-management stuff */
>      StrategyInitialize(!foundDescs);
> +
> +    if (BufferCacheHibernationLevel > 0)
> +    {
> +        ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS,
> +            (char *)BufferDescriptors, sizeof(BufferDesc), NBuffers);
> +        ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS,
> +            (char *)BufferBlocks, BLCKSZ, NBuffers);
> +    }
>  }
>  
>  /*
> diff --git src/backend/storage/buffer/bufmgr.c src/backend/storage/buffer/bufmgr.c
> index f96685d..dba8ebf 100644
> --- src/backend/storage/buffer/bufmgr.c
> +++ src/backend/storage/buffer/bufmgr.c
> @@ -31,6 +31,7 @@
>  #include "postgres.h"
>  
>  #include <sys/file.h>
> +#include <sys/stat.h>
>  #include <unistd.h>
>  
>  #include "catalog/catalog.h"
> @@ -61,6 +62,13 @@
>  #define BUF_WRITTEN                0x01
>  #define BUF_REUSABLE            0x02
>  
> +/*
> + * Buffer Cache Hibernation stuff.
> + */
> +/* enable this to debug buffer cache hibernation. */
> +#if 0
> +#define DEBUG_BUFFER_CACHE_HIBERNATION
> +#endif
>  
>  /* GUC variables */
>  bool        zero_damaged_pages = false;
> @@ -765,6 +773,16 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
>                  }
>              }
>  
> +#ifdef DEBUG_BUFFER_CACHE_HIBERNATION
> +            elog(DEBUG5,
> +                "alloc  [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d",
> +                    buf->buf_id, buf->flags, buf->usage_count, buf->refcount,
> +                    buf->wait_backend_pid, buf->freeNext,
> +                    newHash, newTag.rnode.spcNode,
> +                    newTag.rnode.dbNode, newTag.rnode.relNode,
> +                    newTag.forkNum, newTag.blockNum);
> +#endif
> +
>              return buf;
>          }
>  
> @@ -800,6 +818,16 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
>       * the old content is no longer relevant.  (The usage_count starts out at
>       * 1 so that the buffer can survive one clock-sweep pass.)
>       */
> +#ifdef DEBUG_BUFFER_CACHE_HIBERNATION
> +    elog(DEBUG5,
> +        "rename [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d",
> +            buf->buf_id, buf->flags, buf->usage_count, buf->refcount,
> +            buf->wait_backend_pid, buf->freeNext,
> +            oldHash, oldTag.rnode.spcNode,
> +            oldTag.rnode.dbNode, oldTag.rnode.relNode,
> +            oldTag.forkNum, oldTag.blockNum);
> +#endif
> +
>      buf->tag = newTag;
>      buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_CHECKPOINT_NEEDED | BM_IO_ERROR | BM_PERMANENT);
>      if (relpersistence == RELPERSISTENCE_PERMANENT)
> @@ -2772,3 +2800,716 @@ local_buffer_write_error_callback(void *arg)
>          pfree(path);
>      }
>  }
> +
> +/* ----------------------------------------------------------------
> + *        Buffer Cache Hibernation support stuff
> + *
> + * Suspend/resume buffer cache data structure using hibernation files
> + * at shutdown/startup.
> + * ----------------------------------------------------------------
> + */
> +
> +int    BufferCacheHibernationLevel = 0;
> +
> +#define    BUFFER_CACHE_HIBERNATION_FILE_STRATEGY        "global/pg_buffer_cache_hibernation_strategy"
> +#define    BUFFER_CACHE_HIBERNATION_FILE_DESCRIPTORS    "global/pg_buffer_cache_hibernation_descriptors"
> +#define    BUFFER_CACHE_HIBERNATION_FILE_BLOCKS        "global/pg_buffer_cache_hibernation_blocks"
> +#define    BUFFER_CACHE_HIBERNATION_FILE_CRC32            "global/pg_buffer_cache_hibernation_crc32"
> +
> +static struct
> +{
> +    char        *hibernation_file;
> +    char        *data_ptr;
> +    Size        record_length;    
> +    Size        num_records;    
> +    pg_crc32    crc;
> +} BufferCacheHibernationData[] =
> +{
> +    /* BufferStrategyControl */
> +    {
> +        BUFFER_CACHE_HIBERNATION_FILE_STRATEGY,
> +        NULL, 0, 0, 0
> +    },
> +
> +    /* BufferDescriptors */
> +    {
> +        BUFFER_CACHE_HIBERNATION_FILE_DESCRIPTORS,
> +        NULL, 0, 0, 0
> +    },
> +
> +    /* BufferBlocks */
> +    {
> +        BUFFER_CACHE_HIBERNATION_FILE_BLOCKS,
> +        NULL, 0, 0, 0
> +    },
> +
> +    /* End-of-list marker */
> +    {
> +        NULL,
> +        NULL, 0, 0, 0
> +    },
> +};
> +
> +static ControlFileData    controlFile;
> +static bool                controlFileInitialized = false;
> +
> +/*
> + * AtProcExit_BufferCacheHibernation:
> + *         store the buffer cache into hibernation files at shutdown.
> + */
> +static void
> +AtProcExit_BufferCacheHibernation(int code, Datum arg)
> +{
> +    BufferHibernationFileType    id;
> +    int                            i;
> +    int                            fd;
> +
> +    if (BufferCacheHibernationLevel == 0)
> +    {
> +        return;
> +    }
> +
> +    /*
> +     * get the control file to check the system state validation.
> +     */
> +    if (GetControlFile(&controlFile) == false)
> +    {
> +        elog(WARNING,
> +            "could not get control file, "
> +            "aborting buffer cache hibernation");
> +        return;
> +    }
> +
> +    if (controlFile.state != DB_SHUTDOWNED)
> +    {
> +        elog(WARNING,
> +            "database system was not shut down normally, "
> +            "aborting buffer cache hibernation");
> +        return;
> +    }
> +
> +    /*
> +     * suspend buffer cache data structure into hibernation files.
> +     */
> +    for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
> +    {
> +        Size        record_length;
> +        Size        num_records;
> +        char        *ptr;
> +        pg_crc32    crc;
> +
> +        if (BufferCacheHibernationLevel < 2 &&
> +            id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> +        {
> +            continue;
> +        }
> +
> +        if (BufferCacheHibernationData[id].data_ptr == NULL ||
> +            BufferCacheHibernationData[id].record_length == 0 ||
> +            BufferCacheHibernationData[id].num_records == 0)
> +        {
> +            elog(WARNING,
> +                "ResisterBufferCacheHibernation() was not called for %s",
> +                BufferCacheHibernationData[id].hibernation_file);
> +            goto cleanup;
> +        }
> +
> +        fd = BasicOpenFile(BufferCacheHibernationData[id].hibernation_file,
> +                O_CREAT | O_WRONLY | O_TRUNC | PG_BINARY, S_IRUSR | S_IWUSR);
> +        if (fd < 0)
> +        {
> +            elog(WARNING,
> +                "could not open %s",
> +                BufferCacheHibernationData[id].hibernation_file);
> +            goto cleanup;
> +        }
> +
> +        record_length = BufferCacheHibernationData[id].record_length;
> +        num_records = BufferCacheHibernationData[id].num_records;
> +
> +        elog(NOTICE,
> +            "buffer cache hibernate into %s",
> +            BufferCacheHibernationData[id].hibernation_file);
> +
> +        INIT_CRC32(crc);
> +        for (i = 0; i < num_records; i++)
> +        {
> +            ptr = BufferCacheHibernationData[id].data_ptr + (i * record_length);
> +            if (write(fd, (void *)ptr, record_length) != record_length)
> +            {
> +                elog(WARNING,
> +                    "could not write %s",
> +                    BufferCacheHibernationData[id].hibernation_file);
> +                goto cleanup;
> +            }
> +
> +            COMP_CRC32(crc, ptr, record_length);
> +        }
> +
> +        FIN_CRC32(crc);
> +        close(fd);
> +
> +        BufferCacheHibernationData[id].crc = crc;
> +    }
> +
> +    /*
> +     * save the computed crc values for the validations at resuming.
> +     */
> +    fd = BasicOpenFile(BUFFER_CACHE_HIBERNATION_FILE_CRC32,
> +            O_CREAT | O_WRONLY | O_TRUNC | PG_BINARY, S_IRUSR | S_IWUSR);
> +    if (fd < 0)
> +    {
> +        elog(WARNING,
> +            "could not open %s",
> +            BUFFER_CACHE_HIBERNATION_FILE_CRC32);
> +        goto cleanup;
> +    }
> +
> +    for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
> +    {
> +        pg_crc32    crc;
> +
> +        if (BufferCacheHibernationLevel < 2 &&
> +            id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> +        {
> +            continue;
> +        }
> +
> +        crc = BufferCacheHibernationData[id].crc;
> +        if (write(fd, (void *)&crc, sizeof(pg_crc32)) != sizeof(pg_crc32))
> +        {
> +            elog(WARNING,
> +                "could not write %s for %s",
> +                BUFFER_CACHE_HIBERNATION_FILE_CRC32,
> +                BufferCacheHibernationData[id].hibernation_file);
> +            goto cleanup;
> +        }
> +    }
> +    close(fd);
> +
> +    elog(NOTICE,
> +        "buffer cache suspended successfully");
> +
> +    return;
> +
> +cleanup:
> +    for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
> +    {
> +        unlink(BufferCacheHibernationData[id].hibernation_file);
> +    }
> +
> +    return;
> +}
> +
> +/*
> + * ResisterBufferCacheHibernation:
> + *         register the buffer cache data structure info.
> + */
> +void
> +ResisterBufferCacheHibernation(BufferHibernationFileType id, char *ptr, Size record_length, Size num_records)
> +{
> +    static bool                    first_time = true;
> +
> +    if (BufferCacheHibernationLevel == 0)
> +    {
> +        return;
> +    }
> +
> +    if (id != BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY &&
> +        id != BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS &&
> +        id != BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> +    {
> +        return;
> +    }
> +
> +    if (first_time)
> +    {
> +        /*
> +         * AtProcExit_BufferCacheHibernation to be called at shutdown.
> +         */
> +        on_shmem_exit(AtProcExit_BufferCacheHibernation, 0);
> +        first_time = false;
> +    }
> +
> +    /*
> +     * get the control file to check the system state and
> +     * hibernation file validations.
> +     */
> +    if (controlFileInitialized == false)
> +    {
> +        if (GetControlFile(&controlFile) == true)
> +        {
> +            controlFileInitialized = true;
> +        }
> +    }
> +
> +    BufferCacheHibernationData[id].data_ptr = ptr;
> +    BufferCacheHibernationData[id].record_length = record_length;
> +    BufferCacheHibernationData[id].num_records = num_records;
> +}
> +
> +/*
> + * ResumeBufferCacheHibernation:
> + *         resume the buffer cache from hibernation file at startup.
> + */
> +void
> +ResumeBufferCacheHibernation(void)
> +{
> +    BufferHibernationFileType    id;
> +    int                            i;
> +    int                            fd;
> +    Size                        num_records;
> +    Size                        record_length;
> +    char                        *buf_common;
> +    int                            oldNBuffers;
> +    bool                        buffer_block_processed;
> +
> +    if (BufferCacheHibernationLevel == 0)
> +    {
> +        return;
> +    }
> +
> +    buf_common = NULL;
> +    buffer_block_processed = false;
> +
> +    /*
> +     * lock all buffer descriptors to prevent other processes from
> +     * updating buffers.
> +     */
> +    for (i = 0; i < NBuffers; i++)
> +    {
> +        BufferDesc    *buf;
> +
> +        buf = &BufferDescriptors[i];
> +        LockBufHdr(buf);
> +    }
> +
> +    /*
> +     * get the control file to check the system state and
> +     * hibernation file validations.
> +     */
> +    if (controlFileInitialized == false)
> +    {
> +        elog(WARNING,
> +            "could not get control file, "
> +            "aborting buffer cache hibernation");
> +        goto cleanup;
> +    }
> +
> +    if (controlFile.state != DB_SHUTDOWNED)
> +    {
> +        elog(WARNING,
> +            "database system was not shut down normally, "
> +            "aborting buffer cache hibernation");
> +        goto cleanup;
> +    }
> +
> +    /*
> +     * read the crc values which was computed when the hibernation
> +     * files were created.
> +     */
> +    fd = BasicOpenFile(BUFFER_CACHE_HIBERNATION_FILE_CRC32,
> +            O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR);
> +    if (fd < 0)
> +    {
> +        elog(WARNING,
> +            "could not open %s",
> +            BUFFER_CACHE_HIBERNATION_FILE_CRC32);
> +        goto cleanup;
> +    }
> +
> +    for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
> +    {
> +        pg_crc32    crc;
> +
> +        if (BufferCacheHibernationLevel < 2 &&
> +            id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> +        {
> +            continue;
> +        }
> +
> +        if (read(fd, (void *)&crc, sizeof(pg_crc32)) != sizeof(pg_crc32))
> +        {
> +            if (BufferCacheHibernationLevel == 2 &&
> +                id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> +            {
> +                /*
> +                 * if buffer_cache_hibernation_level changes 1 to 2,
> +                 * the crc value of buffer block hibernation file may not exist.
> +                 * just ignore it here.
> +                 */
> +                continue;
> +            }
> +
> +            elog(WARNING,
> +                "could not read %s for %s",
> +                BUFFER_CACHE_HIBERNATION_FILE_CRC32,
> +                BufferCacheHibernationData[id].hibernation_file);
> +            close(fd);
> +            goto cleanup;
> +        }
> +        BufferCacheHibernationData[id].crc = crc;
> +    }
> +
> +    close(fd);
> +
> +    /*
> +     * allocate a buffer to read the contents of the hibernation files
> +     * for validations.
> +     */
> +    record_length = 0;
> +    for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
> +    {
> +        if (record_length < BufferCacheHibernationData[id].record_length)
> +        {
> +            record_length = BufferCacheHibernationData[id].record_length;
> +        }
> +    }
> +
> +    buf_common = malloc(record_length);
> +    Assert(buf_common != NULL);
> +
> +    /* assume that the number of buffers have not changed. */
> +    oldNBuffers = NBuffers;
> +
> +    /*
> +     * check if all hibernation files are valid.
> +     */
> +    for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
> +    {
> +        struct stat    sb;
> +        pg_crc32    crc;
> +
> +        if (BufferCacheHibernationLevel < 2 &&
> +            id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> +        {
> +            continue;
> +        }
> +
> +        if (BufferCacheHibernationData[id].data_ptr == NULL ||
> +            BufferCacheHibernationData[id].record_length == 0 ||
> +            BufferCacheHibernationData[id].num_records == 0)
> +        {
> +            elog(WARNING,
> +                "ResisterBufferCacheHibernation() was not called for %s",
> +                BufferCacheHibernationData[id].hibernation_file);
> +            goto cleanup;
> +        }
> +
> +        fd = BasicOpenFile(BufferCacheHibernationData[id].hibernation_file,
> +                O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR);
> +        if (fd < 0)
> +        {
> +            if (BufferCacheHibernationLevel == 2 &&
> +                id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> +            {
> +                /*
> +                 * if buffer_cache_hibernation_level changes 1 to 2,
> +                 * the buffer block hibernation file may not exist.
> +                 * just ignore it here.
> +                 */
> +                continue;
> +            }
> +
> +            goto cleanup;
> +        }
> +
> +        if (fstat(fd, &sb) < 0)
> +        {
> +            elog(WARNING,
> +                "could not get stats of the buffer cache hibernation file: %s",
> +                BufferCacheHibernationData[id].hibernation_file);
> +            close(fd);
> +            goto cleanup;
> +        }
> +
> +        record_length = BufferCacheHibernationData[id].record_length;
> +        num_records = BufferCacheHibernationData[id].num_records;
> +
> +        if (sb.st_size != (record_length * num_records))
> +        {
> +            /* The size of StrategyControl should be the same always. */
> +            if (id == BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY ||
> +                (sb.st_size % record_length) > 0)
> +            {
> +                elog(WARNING,
> +                    "size mismatch on the buffer cache hibernation file: %s",
> +                    BufferCacheHibernationData[id].hibernation_file);
> +                close(fd);
> +                goto cleanup;
> +            }
> +
> +            /*
> +             * The number of records of buffer descriptors and blocks
> +             * should be the same.
> +             */
> +            if (oldNBuffers != NBuffers &&
> +                oldNBuffers != (sb.st_size / record_length))
> +            {
> +                elog(WARNING,
> +                    "size mismatch on the buffer cache hibernation file: %s",
> +                    BufferCacheHibernationData[id].hibernation_file);
> +                close(fd);
> +                goto cleanup;
> +            }
> +            
> +            oldNBuffers = sb.st_size / record_length;
> +
> +            elog(NOTICE,
> +                "shared_buffers have changed from %d to %d: %s",
> +                oldNBuffers, NBuffers,
> +                BufferCacheHibernationData[id].hibernation_file);
> +
> +            /* use the original size to compute CRC of the hibernation file. */
> +            num_records = oldNBuffers;
> +        }
> +
> +        if ((pg_time_t)sb.st_mtime < controlFile.time)
> +        {
> +            elog(WARNING,
> +                "the hibernation file is older than control file: %s",
> +                BufferCacheHibernationData[id].hibernation_file);
> +            close(fd);
> +            goto cleanup;
> +        }
> +
> +        INIT_CRC32(crc);
> +        for (i = 0; i < num_records; i++)
> +        {
> +            if (read(fd, (void *)buf_common, record_length) != record_length)
> +            {
> +                elog(WARNING,
> +                    "could not read the buffer cache hibernation file: %s",
> +                    BufferCacheHibernationData[id].hibernation_file);
> +                close(fd);
> +                goto cleanup;
> +            }
> +
> +            COMP_CRC32(crc, buf_common, record_length);
> +
> +            /*
> +             * buffer descriptors validations.
> +             */
> +            if (id == BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS)
> +            {
> +                BufferDesc    *buf;
> +                BufFlags    abnormal_flags;
> +
> +                if (i >= NBuffers)
> +                {
> +                    continue;
> +                }
> +
> +                abnormal_flags = (BM_DIRTY | BM_IO_IN_PROGRESS | BM_IO_ERROR |
> +                                  BM_JUST_DIRTIED | BM_PIN_COUNT_WAITER);
> +
> +                buf = (BufferDesc *)buf_common;
> +
> +                if (buf->flags & abnormal_flags)
> +                {
> +                    elog(WARNING,
> +                        "abnormal flags in buffer descriptors: %d",
> +                        buf->flags);
> +                    close(fd);
> +                    goto cleanup;
> +                }
> +
> +                if (buf->usage_count > BM_MAX_USAGE_COUNT)
> +                {
> +                    elog(WARNING,
> +                        "invalid usage count in buffer descriptors: %d",
> +                        buf->usage_count);
> +                    close(fd);
> +                    goto cleanup;
> +                }
> +
> +                if (buf->buf_id < 0 || buf->buf_id >= num_records)
> +                {
> +                    elog(WARNING,
> +                        "invalid buffer id in buffer descriptors: %d",
> +                        buf->buf_id);
> +                    close(fd);
> +                    goto cleanup;
> +                }
> +            }
> +        }
> +
> +        FIN_CRC32(crc);
> +        close(fd);
> +
> +        if (!EQ_CRC32(BufferCacheHibernationData[id].crc, crc))
> +        {
> +            elog(WARNING,
> +                "crc mismatch on the buffer cache hibernation file: %s",
> +                BufferCacheHibernationData[id].hibernation_file);
> +            close(fd);
> +            goto cleanup;
> +        }
> +    }
> +
> +    /*
> +     * resume the buffer cache data structure from the hibernation files.
> +     */
> +    for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
> +    {
> +        int            fd;
> +        char        *ptr;
> +
> +        if (BufferCacheHibernationLevel < 2 &&
> +            id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> +        {
> +            continue;
> +        }
> +
> +        record_length = BufferCacheHibernationData[id].record_length;
> +        num_records = BufferCacheHibernationData[id].num_records;
> +
> +        if (id != BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY)
> +        {
> +            /* use the smaller number of buffers. */
> +            num_records = (oldNBuffers < NBuffers)? oldNBuffers : NBuffers;
> +        }
> +
> +        fd = BasicOpenFile(BufferCacheHibernationData[id].hibernation_file,
> +                O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR);
> +        if (fd < 0)
> +        {
> +            if (BufferCacheHibernationLevel == 2 &&
> +                id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> +            {
> +                /*
> +                 * if buffer_cache_hibernation_level changes 1 to 2,
> +                 * the buffer block hibernation file may not exist.
> +                 * just ignore it here.
> +                 */
> +                continue;
> +            }
> +
> +            goto cleanup;
> +        }
> +
> +        elog(NOTICE,
> +            "buffer cache resume from %s(%d bytes * %d records)",
> +            BufferCacheHibernationData[id].hibernation_file,
> +            record_length, num_records);
> +
> +        for (i = 0; i < num_records; i++)
> +        {
> +            ptr = BufferCacheHibernationData[id].data_ptr + (i * record_length);
> +            read(fd, (void *)ptr, record_length);
> +
> +            /* Re-lock the buffer descriptor if necessary. */
> +            if (id == BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS)
> +            {
> +                BufferDesc    *buf;
> +
> +                buf = (BufferDesc *)ptr;
> +                if (IsUnlockBufHdr(buf))
> +                {
> +                    LockBufHdr(buf);
> +                }
> +            }
> +        }
> +
> +        close(fd);
> +
> +        if (id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> +        {
> +            buffer_block_processed = true;
> +        }
> +    }
> +
> +    if (buffer_block_processed == false)
> +    {
> +        /* we didn't use the buffer block hibernation file, so delete it now. */
> +        id = BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS;
> +        unlink(BufferCacheHibernationData[id].hibernation_file);
> +    }
> +
> +    /*
> +     * set the rest data structures (eg. lookup hashtable) up
> +     * based on the buffer descriptors.
> +     */
> +    num_records = (oldNBuffers < NBuffers)? oldNBuffers : NBuffers;
> +    for (i = 0; i < num_records; i++)
> +    {
> +        BufferDesc        *buf;
> +        BufferTag        newTag;
> +        uint32            newHash;
> +        int                buf_id;
> +
> +        buf = &BufferDescriptors[i];
> +        if (buf->tag.rnode.spcNode    == InvalidOid &&
> +            buf->tag.rnode.dbNode    == InvalidOid &&
> +            buf->tag.rnode.relNode    == InvalidOid)
> +        {
> +            continue;
> +        }
> +
> +        INIT_BUFFERTAG(newTag, buf->tag.rnode, buf->tag.forkNum, buf->tag.blockNum);
> +        newHash = BufTableHashCode(&newTag);
> +
> +        if (buffer_block_processed == false)
> +        {
> +            Block            bufBlock;
> +            SMgrRelation    smgr;
> +
> +            /*
> +             * re-read buffer block.
> +             */
> +            bufBlock = BufHdrGetBlock(buf);
> +            smgr = smgropen(buf->tag.rnode, InvalidBackendId);
> +            smgrread(smgr, newTag.forkNum, newTag.blockNum, (char *) bufBlock);
> +        }
> +
> +        buf_id = BufTableInsert(&newTag, newHash, buf->buf_id);
> +        if (buf_id != -1)
> +        {
> +            /* the entry exists already, return it to the freelist. */
> +            buf->refcount = 0;
> +            buf->flags = 0;
> +            InvalidateBuffer(buf);
> +            continue;
> +        }
> +
> +        /* clear wait_backend_pid because the process was terminated already. */
> +        buf->wait_backend_pid = 0;
> +
> +#ifdef DEBUG_BUFFER_CACHE_HIBERNATION
> +        elog(DEBUG5,
> +            "resume [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d",
> +                buf->buf_id, buf->flags, buf->usage_count, buf->refcount,
> +                buf->wait_backend_pid, buf->freeNext,
> +                newHash, newTag.rnode.spcNode,
> +                newTag.rnode.dbNode, newTag.rnode.relNode,
> +                newTag.forkNum, newTag.blockNum);
> +#endif
> +    }
> +
> +    /*
> +     * adjust StrategyControl based on the change of shared_buffers.
> +     */
> +    if (oldNBuffers != NBuffers)
> +    {
> +        AdjustStrategyControl(oldNBuffers);
> +    }
> +
> +    elog(NOTICE,
> +        "buffer cache resumed successfully");
> +
> +cleanup:
> +    for (i = 0; i < NBuffers; i++)
> +    {
> +        BufferDesc    *buf;
> +
> +        buf = &BufferDescriptors[i];
> +        UnlockBufHdr(buf);
> +    }
> +
> +    if (buf_common != NULL)
> +    {
> +        free(buf_common);
> +    }
> +
> +    return;
> +}
> diff --git src/backend/storage/buffer/freelist.c src/backend/storage/buffer/freelist.c
> index bf9903b..ffc101d 100644
> --- src/backend/storage/buffer/freelist.c
> +++ src/backend/storage/buffer/freelist.c
> @@ -347,6 +347,12 @@ StrategyInitialize(bool init)
>      }
>      else
>          Assert(!init);
> +
> +    if (BufferCacheHibernationLevel > 0)
> +    {
> +        ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY,
> +            (char *)StrategyControl, sizeof(BufferStrategyControl), 1);
> +    }
>  }
>  
>  
> @@ -521,3 +527,47 @@ StrategyRejectBuffer(BufferAccessStrategy strategy, volatile BufferDesc *buf)
>  
>      return true;
>  }
> +
> +/*
> + * AdjustStrategyControl -- adjust the member variables of StrategyControl
> + *
> + * If the shared_buffers setting had changed, restored StrategyControl
> + * needs to be adjusted for in both cases of shrinking and enlarging.
> + * This is called only from bufmgr.c:ResumeBufferCacheHibernation().
> + */
> +void
> +AdjustStrategyControl(int oldNBuffers)
> +{
> +    if (oldNBuffers == NBuffers)
> +    {
> +        return;
> +    }
> +
> +    /* enlarge or shrink the free buffer based on current NBuffers. */
> +    StrategyControl->lastFreeBuffer = NBuffers - 1;
> +
> +    /* shared_buffers shrunk. */
> +    if (oldNBuffers > NBuffers)
> +    {
> +        if (StrategyControl->nextVictimBuffer >= NBuffers)
> +        {
> +            /* set the tail of buffers. */
> +            StrategyControl->nextVictimBuffer = NBuffers - 1;
> +        }
> +
> +        if (StrategyControl->firstFreeBuffer >= NBuffers)
> +        {
> +            /* set FREENEXT_END_OF_LIST(-1). */
> +            StrategyControl->firstFreeBuffer = FREENEXT_END_OF_LIST;
> +        }
> +    }
> +    else
> +    /* shared_buffers enlarged. */
> +    {
> +        if (StrategyControl->firstFreeBuffer < 0)
> +        {
> +            /* set the next entry of the tail of old buffers. */
> +            StrategyControl->firstFreeBuffer = oldNBuffers;
> +        }
> +    }
> +}
> diff --git src/backend/utils/misc/guc.c src/backend/utils/misc/guc.c
> index 738e215..5affc6e 100644
> --- src/backend/utils/misc/guc.c
> +++ src/backend/utils/misc/guc.c
> @@ -2361,6 +2361,18 @@ static struct config_int ConfigureNamesInt[] =
>          NULL, NULL, NULL
>      },
>  
> +    {
> +        {"buffer_cache_hibernation_level", PGC_POSTMASTER, UNGROUPED,
> +            gettext_noop("Sets buffer cache hibernation level."),
> +            gettext_noop("0 to disable(default), "
> +                         "1 for saving buffer descriptors only(recommended), "
> +                         "2 for saving buffer descriptors and buffer blocks(slower at shutdown).")
> +        },
> +        &BufferCacheHibernationLevel,
> +        0, 0, 2,
> +        NULL, NULL, NULL
> +    },
> +
>      /* End-of-list marker */
>      {
>          {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
> diff --git src/backend/utils/misc/postgresql.conf.sample src/backend/utils/misc/postgresql.conf.sample
> index b8a1582..44b6ff3 100644
> --- src/backend/utils/misc/postgresql.conf.sample
> +++ src/backend/utils/misc/postgresql.conf.sample
> @@ -119,6 +119,17 @@
>  #maintenance_work_mem = 16MB        # min 1MB
>  #max_stack_depth = 2MB            # min 100kB
>  
> +
> +# Buffer Cache Hibernation:
> +#  Suspend/resume buffer cache data structure using hibernation files
> +#  at shutdown/startup.
> +#buffer_cache_hibernation_level = 0    # Sets buffer cache hibernation level.
> +                    # 0 to disable(default),
> +                    # 1 for saving buffer descriptors only
> +                    #   (recommended),
> +                    # 2 for saving buffer descriptors and
> +                    #   buffer blocks(slower at shutdown).
> +
>  # - Kernel Resource Usage -
>  
>  #max_files_per_process = 1000        # min 25
> diff --git src/include/access/xlog.h src/include/access/xlog.h
> index 7056fd6..7a9fb99 100644
> --- src/include/access/xlog.h
> +++ src/include/access/xlog.h
> @@ -13,6 +13,7 @@
>  
>  #include "access/rmgr.h"
>  #include "access/xlogdefs.h"
> +#include "catalog/pg_control.h"
>  #include "lib/stringinfo.h"
>  #include "storage/buf.h"
>  #include "utils/pg_crc.h"
> @@ -294,6 +295,7 @@ extern bool XLogInsertAllowed(void);
>  extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream);
>  extern XLogRecPtr GetXLogReplayRecPtr(void);
>  
> +extern bool GetControlFile(ControlFileData *controlFile);
>  extern void UpdateControlFile(void);
>  extern uint64 GetSystemIdentifier(void);
>  extern Size XLOGShmemSize(void);
> diff --git src/include/storage/buf_internals.h src/include/storage/buf_internals.h
> index b7d4ea5..d537ef1 100644
> --- src/include/storage/buf_internals.h
> +++ src/include/storage/buf_internals.h
> @@ -167,6 +167,7 @@ typedef struct sbufdesc
>   */
>  #define LockBufHdr(bufHdr)        SpinLockAcquire(&(bufHdr)->buf_hdr_lock)
>  #define UnlockBufHdr(bufHdr)    SpinLockRelease(&(bufHdr)->buf_hdr_lock)
> +#define IsUnlockBufHdr(bufHdr)    SpinLockFree(&(bufHdr)->buf_hdr_lock)
>  
>  
>  /* in buf_init.c */
> @@ -190,6 +191,7 @@ extern bool StrategyRejectBuffer(BufferAccessStrategy strategy,
>  extern int    StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc);
>  extern Size StrategyShmemSize(void);
>  extern void StrategyInitialize(bool init);
> +extern void AdjustStrategyControl(int oldNBuffers);
>  
>  /* buf_table.c */
>  extern Size BufTableShmemSize(int size);
> diff --git src/include/storage/bufmgr.h src/include/storage/bufmgr.h
> index b8fc87e..ddfeb9d 100644
> --- src/include/storage/bufmgr.h
> +++ src/include/storage/bufmgr.h
> @@ -211,6 +211,20 @@ extern void BgBufferSync(void);
>  
>  extern void AtProcExit_LocalBuffers(void);
>  
> +/* buffer cache hibernation support stuff */
> +extern int    BufferCacheHibernationLevel;
> +
> +typedef enum BufferHibernationFileType
> +{   
> +    BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY,
> +    BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS,
> +    BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS
> +} BufferHibernationFileType;
> +
> +extern void ResisterBufferCacheHibernation(BufferHibernationFileType id,
> +                char *ptr, Size record_length, Size num_records);
> +extern void ResumeBufferCacheHibernation(void);
> +
>  /* in freelist.c */
>  extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype);
>  extern void FreeAccessStrategy(BufferAccessStrategy strategy);
> 
> -- 
> Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
> To make changes to your subscription:
> http://www.postgresql.org/mailpref/pgsql-hackers

--  Bruce Momjian  <bruce@momjian.us>        http://momjian.us EnterpriseDB
http://enterprisedb.com
 + It's impossible for everything to be true. +


pgsql-hackers by date:

Previous
From: Bruce Momjian
Date:
Subject: Re: Remove support for 'userlocks'?
Next
From: Bruce Momjian
Date:
Subject: Re: WIP: AuthenticationMD5 protocol documentation clarification