Re: patch for new feature: Buffer Cache Hibernation - Mailing list pgsql-hackers
From | Bruce Momjian |
---|---|
Subject | Re: patch for new feature: Buffer Cache Hibernation |
Date | |
Msg-id | 201110140002.p9E02HB11710@momjian.us Whole thread Raw |
In response to | Re: patch for new feature: Buffer Cache Hibernation (Mitsuru IWASAKI <iwasaki@jp.FreeBSD.org>) |
Responses |
Re: patch for new feature: Buffer Cache Hibernation
|
List | pgsql-hackers |
Should this be marked as TODO? --------------------------------------------------------------------------- Mitsuru IWASAKI wrote: > Hi, > > > On 05/07/2011 03:32 AM, Mitsuru IWASAKI wrote: > > > For 1, I've just finish my work. The latest patch is available at: > > > http://people.freebsd.org/~iwasaki/postgres/buffer-cache-hibernation-postgresql-20110507.patch > > > > > > > Reminder here--we can't accept code based on it being published to a web > > page. You'll need to e-mail it to the pgsql-hackers mailing list to be > > considered for the next PostgreSQL CommitFest, which is starting in a > > few weeks. Code submitted to the mailing list is considered a release > > of it to the project under the PostgreSQL license, which we can't just > > assume for things when given only a URL to them. > > Sorry about that, but I had enough time to revise my patches this week-end. > I attached the patches in this mail, and will update CommitFest page soon. > > > Also, you suggested you were out of time to work on this. If that's the > > case, we'd like to know that so we don't keep cc'ing you about things in > > expectation of an answer. Someone else may pick this up as a project to > > continue working on. But it's going to need a fair amount of revision > > before it matches what people want here, and I'm not sure how much of > > what you've written is going to end up in any commit that may happen > > from this idea. > > It seems that I don't have enough time to complete this work. > You don't need to keep cc'ing me, and I'm very happy if postgres to be > the first DBMS which support buffer cache hibernation feature. > > Thanks! > > > diff --git src/backend/access/transam/xlog.c src/backend/access/transam/xlog.c > index b0e4c41..7a3a207 100644 > --- src/backend/access/transam/xlog.c > +++ src/backend/access/transam/xlog.c > @@ -4834,6 +4834,19 @@ ReadControlFile(void) > #endif > } > > +bool > +GetControlFile(ControlFileData *controlFile) > +{ > + if (ControlFile == NULL) > + { > + return false; > + } > + > + memcpy(controlFile, ControlFile, sizeof(ControlFileData)); > + > + return true; > +} > + > void > UpdateControlFile(void) > { > diff --git src/backend/bootstrap/bootstrap.c src/backend/bootstrap/bootstrap.c > index fc093cc..7ecf6bb 100644 > --- src/backend/bootstrap/bootstrap.c > +++ src/backend/bootstrap/bootstrap.c > @@ -360,6 +360,15 @@ AuxiliaryProcessMain(int argc, char *argv[]) > BaseInit(); > > /* > + * Only StartupProcess can call ResumeBufferCacheHibernation() after > + * InitFileAccess() and smgrinit(). > + */ > + if (auxType == StartupProcess && BufferCacheHibernationLevel > 0) > + { > + ResumeBufferCacheHibernation(); > + } > + > + /* > * When we are an auxiliary process, we aren't going to do the full > * InitPostgres pushups, but there are a couple of things that need to get > * lit up even in an auxiliary process. > diff --git src/backend/storage/buffer/buf_init.c src/backend/storage/buffer/buf_init.c > index dadb49d..52eb51a 100644 > --- src/backend/storage/buffer/buf_init.c > +++ src/backend/storage/buffer/buf_init.c > @@ -127,6 +127,14 @@ InitBufferPool(void) > > /* Init other shared buffer-management stuff */ > StrategyInitialize(!foundDescs); > + > + if (BufferCacheHibernationLevel > 0) > + { > + ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS, > + (char *)BufferDescriptors, sizeof(BufferDesc), NBuffers); > + ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS, > + (char *)BufferBlocks, BLCKSZ, NBuffers); > + } > } > > /* > diff --git src/backend/storage/buffer/bufmgr.c src/backend/storage/buffer/bufmgr.c > index f96685d..dba8ebf 100644 > --- src/backend/storage/buffer/bufmgr.c > +++ src/backend/storage/buffer/bufmgr.c > @@ -31,6 +31,7 @@ > #include "postgres.h" > > #include <sys/file.h> > +#include <sys/stat.h> > #include <unistd.h> > > #include "catalog/catalog.h" > @@ -61,6 +62,13 @@ > #define BUF_WRITTEN 0x01 > #define BUF_REUSABLE 0x02 > > +/* > + * Buffer Cache Hibernation stuff. > + */ > +/* enable this to debug buffer cache hibernation. */ > +#if 0 > +#define DEBUG_BUFFER_CACHE_HIBERNATION > +#endif > > /* GUC variables */ > bool zero_damaged_pages = false; > @@ -765,6 +773,16 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, > } > } > > +#ifdef DEBUG_BUFFER_CACHE_HIBERNATION > + elog(DEBUG5, > + "alloc [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d", > + buf->buf_id, buf->flags, buf->usage_count, buf->refcount, > + buf->wait_backend_pid, buf->freeNext, > + newHash, newTag.rnode.spcNode, > + newTag.rnode.dbNode, newTag.rnode.relNode, > + newTag.forkNum, newTag.blockNum); > +#endif > + > return buf; > } > > @@ -800,6 +818,16 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, > * the old content is no longer relevant. (The usage_count starts out at > * 1 so that the buffer can survive one clock-sweep pass.) > */ > +#ifdef DEBUG_BUFFER_CACHE_HIBERNATION > + elog(DEBUG5, > + "rename [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d", > + buf->buf_id, buf->flags, buf->usage_count, buf->refcount, > + buf->wait_backend_pid, buf->freeNext, > + oldHash, oldTag.rnode.spcNode, > + oldTag.rnode.dbNode, oldTag.rnode.relNode, > + oldTag.forkNum, oldTag.blockNum); > +#endif > + > buf->tag = newTag; > buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_CHECKPOINT_NEEDED | BM_IO_ERROR | BM_PERMANENT); > if (relpersistence == RELPERSISTENCE_PERMANENT) > @@ -2772,3 +2800,716 @@ local_buffer_write_error_callback(void *arg) > pfree(path); > } > } > + > +/* ---------------------------------------------------------------- > + * Buffer Cache Hibernation support stuff > + * > + * Suspend/resume buffer cache data structure using hibernation files > + * at shutdown/startup. > + * ---------------------------------------------------------------- > + */ > + > +int BufferCacheHibernationLevel = 0; > + > +#define BUFFER_CACHE_HIBERNATION_FILE_STRATEGY "global/pg_buffer_cache_hibernation_strategy" > +#define BUFFER_CACHE_HIBERNATION_FILE_DESCRIPTORS "global/pg_buffer_cache_hibernation_descriptors" > +#define BUFFER_CACHE_HIBERNATION_FILE_BLOCKS "global/pg_buffer_cache_hibernation_blocks" > +#define BUFFER_CACHE_HIBERNATION_FILE_CRC32 "global/pg_buffer_cache_hibernation_crc32" > + > +static struct > +{ > + char *hibernation_file; > + char *data_ptr; > + Size record_length; > + Size num_records; > + pg_crc32 crc; > +} BufferCacheHibernationData[] = > +{ > + /* BufferStrategyControl */ > + { > + BUFFER_CACHE_HIBERNATION_FILE_STRATEGY, > + NULL, 0, 0, 0 > + }, > + > + /* BufferDescriptors */ > + { > + BUFFER_CACHE_HIBERNATION_FILE_DESCRIPTORS, > + NULL, 0, 0, 0 > + }, > + > + /* BufferBlocks */ > + { > + BUFFER_CACHE_HIBERNATION_FILE_BLOCKS, > + NULL, 0, 0, 0 > + }, > + > + /* End-of-list marker */ > + { > + NULL, > + NULL, 0, 0, 0 > + }, > +}; > + > +static ControlFileData controlFile; > +static bool controlFileInitialized = false; > + > +/* > + * AtProcExit_BufferCacheHibernation: > + * store the buffer cache into hibernation files at shutdown. > + */ > +static void > +AtProcExit_BufferCacheHibernation(int code, Datum arg) > +{ > + BufferHibernationFileType id; > + int i; > + int fd; > + > + if (BufferCacheHibernationLevel == 0) > + { > + return; > + } > + > + /* > + * get the control file to check the system state validation. > + */ > + if (GetControlFile(&controlFile) == false) > + { > + elog(WARNING, > + "could not get control file, " > + "aborting buffer cache hibernation"); > + return; > + } > + > + if (controlFile.state != DB_SHUTDOWNED) > + { > + elog(WARNING, > + "database system was not shut down normally, " > + "aborting buffer cache hibernation"); > + return; > + } > + > + /* > + * suspend buffer cache data structure into hibernation files. > + */ > + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++) > + { > + Size record_length; > + Size num_records; > + char *ptr; > + pg_crc32 crc; > + > + if (BufferCacheHibernationLevel < 2 && > + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS) > + { > + continue; > + } > + > + if (BufferCacheHibernationData[id].data_ptr == NULL || > + BufferCacheHibernationData[id].record_length == 0 || > + BufferCacheHibernationData[id].num_records == 0) > + { > + elog(WARNING, > + "ResisterBufferCacheHibernation() was not called for %s", > + BufferCacheHibernationData[id].hibernation_file); > + goto cleanup; > + } > + > + fd = BasicOpenFile(BufferCacheHibernationData[id].hibernation_file, > + O_CREAT | O_WRONLY | O_TRUNC | PG_BINARY, S_IRUSR | S_IWUSR); > + if (fd < 0) > + { > + elog(WARNING, > + "could not open %s", > + BufferCacheHibernationData[id].hibernation_file); > + goto cleanup; > + } > + > + record_length = BufferCacheHibernationData[id].record_length; > + num_records = BufferCacheHibernationData[id].num_records; > + > + elog(NOTICE, > + "buffer cache hibernate into %s", > + BufferCacheHibernationData[id].hibernation_file); > + > + INIT_CRC32(crc); > + for (i = 0; i < num_records; i++) > + { > + ptr = BufferCacheHibernationData[id].data_ptr + (i * record_length); > + if (write(fd, (void *)ptr, record_length) != record_length) > + { > + elog(WARNING, > + "could not write %s", > + BufferCacheHibernationData[id].hibernation_file); > + goto cleanup; > + } > + > + COMP_CRC32(crc, ptr, record_length); > + } > + > + FIN_CRC32(crc); > + close(fd); > + > + BufferCacheHibernationData[id].crc = crc; > + } > + > + /* > + * save the computed crc values for the validations at resuming. > + */ > + fd = BasicOpenFile(BUFFER_CACHE_HIBERNATION_FILE_CRC32, > + O_CREAT | O_WRONLY | O_TRUNC | PG_BINARY, S_IRUSR | S_IWUSR); > + if (fd < 0) > + { > + elog(WARNING, > + "could not open %s", > + BUFFER_CACHE_HIBERNATION_FILE_CRC32); > + goto cleanup; > + } > + > + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++) > + { > + pg_crc32 crc; > + > + if (BufferCacheHibernationLevel < 2 && > + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS) > + { > + continue; > + } > + > + crc = BufferCacheHibernationData[id].crc; > + if (write(fd, (void *)&crc, sizeof(pg_crc32)) != sizeof(pg_crc32)) > + { > + elog(WARNING, > + "could not write %s for %s", > + BUFFER_CACHE_HIBERNATION_FILE_CRC32, > + BufferCacheHibernationData[id].hibernation_file); > + goto cleanup; > + } > + } > + close(fd); > + > + elog(NOTICE, > + "buffer cache suspended successfully"); > + > + return; > + > +cleanup: > + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++) > + { > + unlink(BufferCacheHibernationData[id].hibernation_file); > + } > + > + return; > +} > + > +/* > + * ResisterBufferCacheHibernation: > + * register the buffer cache data structure info. > + */ > +void > +ResisterBufferCacheHibernation(BufferHibernationFileType id, char *ptr, Size record_length, Size num_records) > +{ > + static bool first_time = true; > + > + if (BufferCacheHibernationLevel == 0) > + { > + return; > + } > + > + if (id != BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY && > + id != BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS && > + id != BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS) > + { > + return; > + } > + > + if (first_time) > + { > + /* > + * AtProcExit_BufferCacheHibernation to be called at shutdown. > + */ > + on_shmem_exit(AtProcExit_BufferCacheHibernation, 0); > + first_time = false; > + } > + > + /* > + * get the control file to check the system state and > + * hibernation file validations. > + */ > + if (controlFileInitialized == false) > + { > + if (GetControlFile(&controlFile) == true) > + { > + controlFileInitialized = true; > + } > + } > + > + BufferCacheHibernationData[id].data_ptr = ptr; > + BufferCacheHibernationData[id].record_length = record_length; > + BufferCacheHibernationData[id].num_records = num_records; > +} > + > +/* > + * ResumeBufferCacheHibernation: > + * resume the buffer cache from hibernation file at startup. > + */ > +void > +ResumeBufferCacheHibernation(void) > +{ > + BufferHibernationFileType id; > + int i; > + int fd; > + Size num_records; > + Size record_length; > + char *buf_common; > + int oldNBuffers; > + bool buffer_block_processed; > + > + if (BufferCacheHibernationLevel == 0) > + { > + return; > + } > + > + buf_common = NULL; > + buffer_block_processed = false; > + > + /* > + * lock all buffer descriptors to prevent other processes from > + * updating buffers. > + */ > + for (i = 0; i < NBuffers; i++) > + { > + BufferDesc *buf; > + > + buf = &BufferDescriptors[i]; > + LockBufHdr(buf); > + } > + > + /* > + * get the control file to check the system state and > + * hibernation file validations. > + */ > + if (controlFileInitialized == false) > + { > + elog(WARNING, > + "could not get control file, " > + "aborting buffer cache hibernation"); > + goto cleanup; > + } > + > + if (controlFile.state != DB_SHUTDOWNED) > + { > + elog(WARNING, > + "database system was not shut down normally, " > + "aborting buffer cache hibernation"); > + goto cleanup; > + } > + > + /* > + * read the crc values which was computed when the hibernation > + * files were created. > + */ > + fd = BasicOpenFile(BUFFER_CACHE_HIBERNATION_FILE_CRC32, > + O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR); > + if (fd < 0) > + { > + elog(WARNING, > + "could not open %s", > + BUFFER_CACHE_HIBERNATION_FILE_CRC32); > + goto cleanup; > + } > + > + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++) > + { > + pg_crc32 crc; > + > + if (BufferCacheHibernationLevel < 2 && > + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS) > + { > + continue; > + } > + > + if (read(fd, (void *)&crc, sizeof(pg_crc32)) != sizeof(pg_crc32)) > + { > + if (BufferCacheHibernationLevel == 2 && > + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS) > + { > + /* > + * if buffer_cache_hibernation_level changes 1 to 2, > + * the crc value of buffer block hibernation file may not exist. > + * just ignore it here. > + */ > + continue; > + } > + > + elog(WARNING, > + "could not read %s for %s", > + BUFFER_CACHE_HIBERNATION_FILE_CRC32, > + BufferCacheHibernationData[id].hibernation_file); > + close(fd); > + goto cleanup; > + } > + BufferCacheHibernationData[id].crc = crc; > + } > + > + close(fd); > + > + /* > + * allocate a buffer to read the contents of the hibernation files > + * for validations. > + */ > + record_length = 0; > + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++) > + { > + if (record_length < BufferCacheHibernationData[id].record_length) > + { > + record_length = BufferCacheHibernationData[id].record_length; > + } > + } > + > + buf_common = malloc(record_length); > + Assert(buf_common != NULL); > + > + /* assume that the number of buffers have not changed. */ > + oldNBuffers = NBuffers; > + > + /* > + * check if all hibernation files are valid. > + */ > + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++) > + { > + struct stat sb; > + pg_crc32 crc; > + > + if (BufferCacheHibernationLevel < 2 && > + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS) > + { > + continue; > + } > + > + if (BufferCacheHibernationData[id].data_ptr == NULL || > + BufferCacheHibernationData[id].record_length == 0 || > + BufferCacheHibernationData[id].num_records == 0) > + { > + elog(WARNING, > + "ResisterBufferCacheHibernation() was not called for %s", > + BufferCacheHibernationData[id].hibernation_file); > + goto cleanup; > + } > + > + fd = BasicOpenFile(BufferCacheHibernationData[id].hibernation_file, > + O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR); > + if (fd < 0) > + { > + if (BufferCacheHibernationLevel == 2 && > + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS) > + { > + /* > + * if buffer_cache_hibernation_level changes 1 to 2, > + * the buffer block hibernation file may not exist. > + * just ignore it here. > + */ > + continue; > + } > + > + goto cleanup; > + } > + > + if (fstat(fd, &sb) < 0) > + { > + elog(WARNING, > + "could not get stats of the buffer cache hibernation file: %s", > + BufferCacheHibernationData[id].hibernation_file); > + close(fd); > + goto cleanup; > + } > + > + record_length = BufferCacheHibernationData[id].record_length; > + num_records = BufferCacheHibernationData[id].num_records; > + > + if (sb.st_size != (record_length * num_records)) > + { > + /* The size of StrategyControl should be the same always. */ > + if (id == BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY || > + (sb.st_size % record_length) > 0) > + { > + elog(WARNING, > + "size mismatch on the buffer cache hibernation file: %s", > + BufferCacheHibernationData[id].hibernation_file); > + close(fd); > + goto cleanup; > + } > + > + /* > + * The number of records of buffer descriptors and blocks > + * should be the same. > + */ > + if (oldNBuffers != NBuffers && > + oldNBuffers != (sb.st_size / record_length)) > + { > + elog(WARNING, > + "size mismatch on the buffer cache hibernation file: %s", > + BufferCacheHibernationData[id].hibernation_file); > + close(fd); > + goto cleanup; > + } > + > + oldNBuffers = sb.st_size / record_length; > + > + elog(NOTICE, > + "shared_buffers have changed from %d to %d: %s", > + oldNBuffers, NBuffers, > + BufferCacheHibernationData[id].hibernation_file); > + > + /* use the original size to compute CRC of the hibernation file. */ > + num_records = oldNBuffers; > + } > + > + if ((pg_time_t)sb.st_mtime < controlFile.time) > + { > + elog(WARNING, > + "the hibernation file is older than control file: %s", > + BufferCacheHibernationData[id].hibernation_file); > + close(fd); > + goto cleanup; > + } > + > + INIT_CRC32(crc); > + for (i = 0; i < num_records; i++) > + { > + if (read(fd, (void *)buf_common, record_length) != record_length) > + { > + elog(WARNING, > + "could not read the buffer cache hibernation file: %s", > + BufferCacheHibernationData[id].hibernation_file); > + close(fd); > + goto cleanup; > + } > + > + COMP_CRC32(crc, buf_common, record_length); > + > + /* > + * buffer descriptors validations. > + */ > + if (id == BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS) > + { > + BufferDesc *buf; > + BufFlags abnormal_flags; > + > + if (i >= NBuffers) > + { > + continue; > + } > + > + abnormal_flags = (BM_DIRTY | BM_IO_IN_PROGRESS | BM_IO_ERROR | > + BM_JUST_DIRTIED | BM_PIN_COUNT_WAITER); > + > + buf = (BufferDesc *)buf_common; > + > + if (buf->flags & abnormal_flags) > + { > + elog(WARNING, > + "abnormal flags in buffer descriptors: %d", > + buf->flags); > + close(fd); > + goto cleanup; > + } > + > + if (buf->usage_count > BM_MAX_USAGE_COUNT) > + { > + elog(WARNING, > + "invalid usage count in buffer descriptors: %d", > + buf->usage_count); > + close(fd); > + goto cleanup; > + } > + > + if (buf->buf_id < 0 || buf->buf_id >= num_records) > + { > + elog(WARNING, > + "invalid buffer id in buffer descriptors: %d", > + buf->buf_id); > + close(fd); > + goto cleanup; > + } > + } > + } > + > + FIN_CRC32(crc); > + close(fd); > + > + if (!EQ_CRC32(BufferCacheHibernationData[id].crc, crc)) > + { > + elog(WARNING, > + "crc mismatch on the buffer cache hibernation file: %s", > + BufferCacheHibernationData[id].hibernation_file); > + close(fd); > + goto cleanup; > + } > + } > + > + /* > + * resume the buffer cache data structure from the hibernation files. > + */ > + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++) > + { > + int fd; > + char *ptr; > + > + if (BufferCacheHibernationLevel < 2 && > + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS) > + { > + continue; > + } > + > + record_length = BufferCacheHibernationData[id].record_length; > + num_records = BufferCacheHibernationData[id].num_records; > + > + if (id != BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY) > + { > + /* use the smaller number of buffers. */ > + num_records = (oldNBuffers < NBuffers)? oldNBuffers : NBuffers; > + } > + > + fd = BasicOpenFile(BufferCacheHibernationData[id].hibernation_file, > + O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR); > + if (fd < 0) > + { > + if (BufferCacheHibernationLevel == 2 && > + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS) > + { > + /* > + * if buffer_cache_hibernation_level changes 1 to 2, > + * the buffer block hibernation file may not exist. > + * just ignore it here. > + */ > + continue; > + } > + > + goto cleanup; > + } > + > + elog(NOTICE, > + "buffer cache resume from %s(%d bytes * %d records)", > + BufferCacheHibernationData[id].hibernation_file, > + record_length, num_records); > + > + for (i = 0; i < num_records; i++) > + { > + ptr = BufferCacheHibernationData[id].data_ptr + (i * record_length); > + read(fd, (void *)ptr, record_length); > + > + /* Re-lock the buffer descriptor if necessary. */ > + if (id == BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS) > + { > + BufferDesc *buf; > + > + buf = (BufferDesc *)ptr; > + if (IsUnlockBufHdr(buf)) > + { > + LockBufHdr(buf); > + } > + } > + } > + > + close(fd); > + > + if (id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS) > + { > + buffer_block_processed = true; > + } > + } > + > + if (buffer_block_processed == false) > + { > + /* we didn't use the buffer block hibernation file, so delete it now. */ > + id = BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS; > + unlink(BufferCacheHibernationData[id].hibernation_file); > + } > + > + /* > + * set the rest data structures (eg. lookup hashtable) up > + * based on the buffer descriptors. > + */ > + num_records = (oldNBuffers < NBuffers)? oldNBuffers : NBuffers; > + for (i = 0; i < num_records; i++) > + { > + BufferDesc *buf; > + BufferTag newTag; > + uint32 newHash; > + int buf_id; > + > + buf = &BufferDescriptors[i]; > + if (buf->tag.rnode.spcNode == InvalidOid && > + buf->tag.rnode.dbNode == InvalidOid && > + buf->tag.rnode.relNode == InvalidOid) > + { > + continue; > + } > + > + INIT_BUFFERTAG(newTag, buf->tag.rnode, buf->tag.forkNum, buf->tag.blockNum); > + newHash = BufTableHashCode(&newTag); > + > + if (buffer_block_processed == false) > + { > + Block bufBlock; > + SMgrRelation smgr; > + > + /* > + * re-read buffer block. > + */ > + bufBlock = BufHdrGetBlock(buf); > + smgr = smgropen(buf->tag.rnode, InvalidBackendId); > + smgrread(smgr, newTag.forkNum, newTag.blockNum, (char *) bufBlock); > + } > + > + buf_id = BufTableInsert(&newTag, newHash, buf->buf_id); > + if (buf_id != -1) > + { > + /* the entry exists already, return it to the freelist. */ > + buf->refcount = 0; > + buf->flags = 0; > + InvalidateBuffer(buf); > + continue; > + } > + > + /* clear wait_backend_pid because the process was terminated already. */ > + buf->wait_backend_pid = 0; > + > +#ifdef DEBUG_BUFFER_CACHE_HIBERNATION > + elog(DEBUG5, > + "resume [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d", > + buf->buf_id, buf->flags, buf->usage_count, buf->refcount, > + buf->wait_backend_pid, buf->freeNext, > + newHash, newTag.rnode.spcNode, > + newTag.rnode.dbNode, newTag.rnode.relNode, > + newTag.forkNum, newTag.blockNum); > +#endif > + } > + > + /* > + * adjust StrategyControl based on the change of shared_buffers. > + */ > + if (oldNBuffers != NBuffers) > + { > + AdjustStrategyControl(oldNBuffers); > + } > + > + elog(NOTICE, > + "buffer cache resumed successfully"); > + > +cleanup: > + for (i = 0; i < NBuffers; i++) > + { > + BufferDesc *buf; > + > + buf = &BufferDescriptors[i]; > + UnlockBufHdr(buf); > + } > + > + if (buf_common != NULL) > + { > + free(buf_common); > + } > + > + return; > +} > diff --git src/backend/storage/buffer/freelist.c src/backend/storage/buffer/freelist.c > index bf9903b..ffc101d 100644 > --- src/backend/storage/buffer/freelist.c > +++ src/backend/storage/buffer/freelist.c > @@ -347,6 +347,12 @@ StrategyInitialize(bool init) > } > else > Assert(!init); > + > + if (BufferCacheHibernationLevel > 0) > + { > + ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY, > + (char *)StrategyControl, sizeof(BufferStrategyControl), 1); > + } > } > > > @@ -521,3 +527,47 @@ StrategyRejectBuffer(BufferAccessStrategy strategy, volatile BufferDesc *buf) > > return true; > } > + > +/* > + * AdjustStrategyControl -- adjust the member variables of StrategyControl > + * > + * If the shared_buffers setting had changed, restored StrategyControl > + * needs to be adjusted for in both cases of shrinking and enlarging. > + * This is called only from bufmgr.c:ResumeBufferCacheHibernation(). > + */ > +void > +AdjustStrategyControl(int oldNBuffers) > +{ > + if (oldNBuffers == NBuffers) > + { > + return; > + } > + > + /* enlarge or shrink the free buffer based on current NBuffers. */ > + StrategyControl->lastFreeBuffer = NBuffers - 1; > + > + /* shared_buffers shrunk. */ > + if (oldNBuffers > NBuffers) > + { > + if (StrategyControl->nextVictimBuffer >= NBuffers) > + { > + /* set the tail of buffers. */ > + StrategyControl->nextVictimBuffer = NBuffers - 1; > + } > + > + if (StrategyControl->firstFreeBuffer >= NBuffers) > + { > + /* set FREENEXT_END_OF_LIST(-1). */ > + StrategyControl->firstFreeBuffer = FREENEXT_END_OF_LIST; > + } > + } > + else > + /* shared_buffers enlarged. */ > + { > + if (StrategyControl->firstFreeBuffer < 0) > + { > + /* set the next entry of the tail of old buffers. */ > + StrategyControl->firstFreeBuffer = oldNBuffers; > + } > + } > +} > diff --git src/backend/utils/misc/guc.c src/backend/utils/misc/guc.c > index 738e215..5affc6e 100644 > --- src/backend/utils/misc/guc.c > +++ src/backend/utils/misc/guc.c > @@ -2361,6 +2361,18 @@ static struct config_int ConfigureNamesInt[] = > NULL, NULL, NULL > }, > > + { > + {"buffer_cache_hibernation_level", PGC_POSTMASTER, UNGROUPED, > + gettext_noop("Sets buffer cache hibernation level."), > + gettext_noop("0 to disable(default), " > + "1 for saving buffer descriptors only(recommended), " > + "2 for saving buffer descriptors and buffer blocks(slower at shutdown).") > + }, > + &BufferCacheHibernationLevel, > + 0, 0, 2, > + NULL, NULL, NULL > + }, > + > /* End-of-list marker */ > { > {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL > diff --git src/backend/utils/misc/postgresql.conf.sample src/backend/utils/misc/postgresql.conf.sample > index b8a1582..44b6ff3 100644 > --- src/backend/utils/misc/postgresql.conf.sample > +++ src/backend/utils/misc/postgresql.conf.sample > @@ -119,6 +119,17 @@ > #maintenance_work_mem = 16MB # min 1MB > #max_stack_depth = 2MB # min 100kB > > + > +# Buffer Cache Hibernation: > +# Suspend/resume buffer cache data structure using hibernation files > +# at shutdown/startup. > +#buffer_cache_hibernation_level = 0 # Sets buffer cache hibernation level. > + # 0 to disable(default), > + # 1 for saving buffer descriptors only > + # (recommended), > + # 2 for saving buffer descriptors and > + # buffer blocks(slower at shutdown). > + > # - Kernel Resource Usage - > > #max_files_per_process = 1000 # min 25 > diff --git src/include/access/xlog.h src/include/access/xlog.h > index 7056fd6..7a9fb99 100644 > --- src/include/access/xlog.h > +++ src/include/access/xlog.h > @@ -13,6 +13,7 @@ > > #include "access/rmgr.h" > #include "access/xlogdefs.h" > +#include "catalog/pg_control.h" > #include "lib/stringinfo.h" > #include "storage/buf.h" > #include "utils/pg_crc.h" > @@ -294,6 +295,7 @@ extern bool XLogInsertAllowed(void); > extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream); > extern XLogRecPtr GetXLogReplayRecPtr(void); > > +extern bool GetControlFile(ControlFileData *controlFile); > extern void UpdateControlFile(void); > extern uint64 GetSystemIdentifier(void); > extern Size XLOGShmemSize(void); > diff --git src/include/storage/buf_internals.h src/include/storage/buf_internals.h > index b7d4ea5..d537ef1 100644 > --- src/include/storage/buf_internals.h > +++ src/include/storage/buf_internals.h > @@ -167,6 +167,7 @@ typedef struct sbufdesc > */ > #define LockBufHdr(bufHdr) SpinLockAcquire(&(bufHdr)->buf_hdr_lock) > #define UnlockBufHdr(bufHdr) SpinLockRelease(&(bufHdr)->buf_hdr_lock) > +#define IsUnlockBufHdr(bufHdr) SpinLockFree(&(bufHdr)->buf_hdr_lock) > > > /* in buf_init.c */ > @@ -190,6 +191,7 @@ extern bool StrategyRejectBuffer(BufferAccessStrategy strategy, > extern int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc); > extern Size StrategyShmemSize(void); > extern void StrategyInitialize(bool init); > +extern void AdjustStrategyControl(int oldNBuffers); > > /* buf_table.c */ > extern Size BufTableShmemSize(int size); > diff --git src/include/storage/bufmgr.h src/include/storage/bufmgr.h > index b8fc87e..ddfeb9d 100644 > --- src/include/storage/bufmgr.h > +++ src/include/storage/bufmgr.h > @@ -211,6 +211,20 @@ extern void BgBufferSync(void); > > extern void AtProcExit_LocalBuffers(void); > > +/* buffer cache hibernation support stuff */ > +extern int BufferCacheHibernationLevel; > + > +typedef enum BufferHibernationFileType > +{ > + BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY, > + BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS, > + BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS > +} BufferHibernationFileType; > + > +extern void ResisterBufferCacheHibernation(BufferHibernationFileType id, > + char *ptr, Size record_length, Size num_records); > +extern void ResumeBufferCacheHibernation(void); > + > /* in freelist.c */ > extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype); > extern void FreeAccessStrategy(BufferAccessStrategy strategy); > > -- > Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) > To make changes to your subscription: > http://www.postgresql.org/mailpref/pgsql-hackers -- Bruce Momjian <bruce@momjian.us> http://momjian.us EnterpriseDB http://enterprisedb.com + It's impossible for everything to be true. +
pgsql-hackers by date: