From: | Cédric Villemain <cedric(dot)villemain(dot)debian(at)gmail(dot)com> |
---|---|
To: | Bruce Momjian <bruce(at)momjian(dot)us> |
Cc: | Mitsuru IWASAKI <iwasaki(at)jp(dot)freebsd(dot)org>, greg(at)2ndquadrant(dot)com, pgsql-hackers(at)postgresql(dot)org |
Subject: | Re: patch for new feature: Buffer Cache Hibernation |
Date: | 2011-10-14 08:44:20 |
Message-ID: | CAF6yO=0FmfKJqFRhwbiUrmwFQp+wkOa4CmavYQ7AFdf+zEvbSQ@mail.gmail.com |
Views: | Raw Message | Whole Thread | Download mbox | Resend email |
Thread: | |
Lists: | pgsql-hackers |
2011/10/14 Bruce Momjian <bruce(at)momjian(dot)us>:
>
> Should this be marked as TODO?
I suppose TODO items *are* wanted and so working on them should remove
the pain to convince people here to accept the feature, aren't they ?
>
> ---------------------------------------------------------------------------
>
> Mitsuru IWASAKI wrote:
>> Hi,
>>
>> > On 05/07/2011 03:32 AM, Mitsuru IWASAKI wrote:
>> > > For 1, I've just finish my work. The latest patch is available at:
>> > > http://people.freebsd.org/~iwasaki/postgres/buffer-cache-hibernation-postgresql-20110507.patch
>> > >
>> >
>> > Reminder here--we can't accept code based on it being published to a web
>> > page. You'll need to e-mail it to the pgsql-hackers mailing list to be
>> > considered for the next PostgreSQL CommitFest, which is starting in a
>> > few weeks. Code submitted to the mailing list is considered a release
>> > of it to the project under the PostgreSQL license, which we can't just
>> > assume for things when given only a URL to them.
>>
>> Sorry about that, but I had enough time to revise my patches this week-end.
>> I attached the patches in this mail, and will update CommitFest page soon.
>>
>> > Also, you suggested you were out of time to work on this. If that's the
>> > case, we'd like to know that so we don't keep cc'ing you about things in
>> > expectation of an answer. Someone else may pick this up as a project to
>> > continue working on. But it's going to need a fair amount of revision
>> > before it matches what people want here, and I'm not sure how much of
>> > what you've written is going to end up in any commit that may happen
>> > from this idea.
>>
>> It seems that I don't have enough time to complete this work.
>> You don't need to keep cc'ing me, and I'm very happy if postgres to be
>> the first DBMS which support buffer cache hibernation feature.
>>
>> Thanks!
>>
>>
>> diff --git src/backend/access/transam/xlog.c src/backend/access/transam/xlog.c
>> index b0e4c41..7a3a207 100644
>> --- src/backend/access/transam/xlog.c
>> +++ src/backend/access/transam/xlog.c
>> @@ -4834,6 +4834,19 @@ ReadControlFile(void)
>> #endif
>> }
>>
>> +bool
>> +GetControlFile(ControlFileData *controlFile)
>> +{
>> + if (ControlFile == NULL)
>> + {
>> + return false;
>> + }
>> +
>> + memcpy(controlFile, ControlFile, sizeof(ControlFileData));
>> +
>> + return true;
>> +}
>> +
>> void
>> UpdateControlFile(void)
>> {
>> diff --git src/backend/bootstrap/bootstrap.c src/backend/bootstrap/bootstrap.c
>> index fc093cc..7ecf6bb 100644
>> --- src/backend/bootstrap/bootstrap.c
>> +++ src/backend/bootstrap/bootstrap.c
>> @@ -360,6 +360,15 @@ AuxiliaryProcessMain(int argc, char *argv[])
>> BaseInit();
>>
>> /*
>> + * Only StartupProcess can call ResumeBufferCacheHibernation() after
>> + * InitFileAccess() and smgrinit().
>> + */
>> + if (auxType == StartupProcess && BufferCacheHibernationLevel > 0)
>> + {
>> + ResumeBufferCacheHibernation();
>> + }
>> +
>> + /*
>> * When we are an auxiliary process, we aren't going to do the full
>> * InitPostgres pushups, but there are a couple of things that need to get
>> * lit up even in an auxiliary process.
>> diff --git src/backend/storage/buffer/buf_init.c src/backend/storage/buffer/buf_init.c
>> index dadb49d..52eb51a 100644
>> --- src/backend/storage/buffer/buf_init.c
>> +++ src/backend/storage/buffer/buf_init.c
>> @@ -127,6 +127,14 @@ InitBufferPool(void)
>>
>> /* Init other shared buffer-management stuff */
>> StrategyInitialize(!foundDescs);
>> +
>> + if (BufferCacheHibernationLevel > 0)
>> + {
>> + ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS,
>> + (char *)BufferDescriptors, sizeof(BufferDesc), NBuffers);
>> + ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS,
>> + (char *)BufferBlocks, BLCKSZ, NBuffers);
>> + }
>> }
>>
>> /*
>> diff --git src/backend/storage/buffer/bufmgr.c src/backend/storage/buffer/bufmgr.c
>> index f96685d..dba8ebf 100644
>> --- src/backend/storage/buffer/bufmgr.c
>> +++ src/backend/storage/buffer/bufmgr.c
>> @@ -31,6 +31,7 @@
>> #include "postgres.h"
>>
>> #include <sys/file.h>
>> +#include <sys/stat.h>
>> #include <unistd.h>
>>
>> #include "catalog/catalog.h"
>> @@ -61,6 +62,13 @@
>> #define BUF_WRITTEN 0x01
>> #define BUF_REUSABLE 0x02
>>
>> +/*
>> + * Buffer Cache Hibernation stuff.
>> + */
>> +/* enable this to debug buffer cache hibernation. */
>> +#if 0
>> +#define DEBUG_BUFFER_CACHE_HIBERNATION
>> +#endif
>>
>> /* GUC variables */
>> bool zero_damaged_pages = false;
>> @@ -765,6 +773,16 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
>> }
>> }
>>
>> +#ifdef DEBUG_BUFFER_CACHE_HIBERNATION
>> + elog(DEBUG5,
>> + "alloc [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d",
>> + buf->buf_id, buf->flags, buf->usage_count, buf->refcount,
>> + buf->wait_backend_pid, buf->freeNext,
>> + newHash, newTag.rnode.spcNode,
>> + newTag.rnode.dbNode, newTag.rnode.relNode,
>> + newTag.forkNum, newTag.blockNum);
>> +#endif
>> +
>> return buf;
>> }
>>
>> @@ -800,6 +818,16 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
>> * the old content is no longer relevant. (The usage_count starts out at
>> * 1 so that the buffer can survive one clock-sweep pass.)
>> */
>> +#ifdef DEBUG_BUFFER_CACHE_HIBERNATION
>> + elog(DEBUG5,
>> + "rename [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d",
>> + buf->buf_id, buf->flags, buf->usage_count, buf->refcount,
>> + buf->wait_backend_pid, buf->freeNext,
>> + oldHash, oldTag.rnode.spcNode,
>> + oldTag.rnode.dbNode, oldTag.rnode.relNode,
>> + oldTag.forkNum, oldTag.blockNum);
>> +#endif
>> +
>> buf->tag = newTag;
>> buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_CHECKPOINT_NEEDED | BM_IO_ERROR | BM_PERMANENT);
>> if (relpersistence == RELPERSISTENCE_PERMANENT)
>> @@ -2772,3 +2800,716 @@ local_buffer_write_error_callback(void *arg)
>> pfree(path);
>> }
>> }
>> +
>> +/* ----------------------------------------------------------------
>> + * Buffer Cache Hibernation support stuff
>> + *
>> + * Suspend/resume buffer cache data structure using hibernation files
>> + * at shutdown/startup.
>> + * ----------------------------------------------------------------
>> + */
>> +
>> +int BufferCacheHibernationLevel = 0;
>> +
>> +#define BUFFER_CACHE_HIBERNATION_FILE_STRATEGY "global/pg_buffer_cache_hibernation_strategy"
>> +#define BUFFER_CACHE_HIBERNATION_FILE_DESCRIPTORS "global/pg_buffer_cache_hibernation_descriptors"
>> +#define BUFFER_CACHE_HIBERNATION_FILE_BLOCKS "global/pg_buffer_cache_hibernation_blocks"
>> +#define BUFFER_CACHE_HIBERNATION_FILE_CRC32 "global/pg_buffer_cache_hibernation_crc32"
>> +
>> +static struct
>> +{
>> + char *hibernation_file;
>> + char *data_ptr;
>> + Size record_length;
>> + Size num_records;
>> + pg_crc32 crc;
>> +} BufferCacheHibernationData[] =
>> +{
>> + /* BufferStrategyControl */
>> + {
>> + BUFFER_CACHE_HIBERNATION_FILE_STRATEGY,
>> + NULL, 0, 0, 0
>> + },
>> +
>> + /* BufferDescriptors */
>> + {
>> + BUFFER_CACHE_HIBERNATION_FILE_DESCRIPTORS,
>> + NULL, 0, 0, 0
>> + },
>> +
>> + /* BufferBlocks */
>> + {
>> + BUFFER_CACHE_HIBERNATION_FILE_BLOCKS,
>> + NULL, 0, 0, 0
>> + },
>> +
>> + /* End-of-list marker */
>> + {
>> + NULL,
>> + NULL, 0, 0, 0
>> + },
>> +};
>> +
>> +static ControlFileData controlFile;
>> +static bool controlFileInitialized = false;
>> +
>> +/*
>> + * AtProcExit_BufferCacheHibernation:
>> + * store the buffer cache into hibernation files at shutdown.
>> + */
>> +static void
>> +AtProcExit_BufferCacheHibernation(int code, Datum arg)
>> +{
>> + BufferHibernationFileType id;
>> + int i;
>> + int fd;
>> +
>> + if (BufferCacheHibernationLevel == 0)
>> + {
>> + return;
>> + }
>> +
>> + /*
>> + * get the control file to check the system state validation.
>> + */
>> + if (GetControlFile(&controlFile) == false)
>> + {
>> + elog(WARNING,
>> + "could not get control file, "
>> + "aborting buffer cache hibernation");
>> + return;
>> + }
>> +
>> + if (controlFile.state != DB_SHUTDOWNED)
>> + {
>> + elog(WARNING,
>> + "database system was not shut down normally, "
>> + "aborting buffer cache hibernation");
>> + return;
>> + }
>> +
>> + /*
>> + * suspend buffer cache data structure into hibernation files.
>> + */
>> + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
>> + {
>> + Size record_length;
>> + Size num_records;
>> + char *ptr;
>> + pg_crc32 crc;
>> +
>> + if (BufferCacheHibernationLevel < 2 &&
>> + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
>> + {
>> + continue;
>> + }
>> +
>> + if (BufferCacheHibernationData[id].data_ptr == NULL ||
>> + BufferCacheHibernationData[id].record_length == 0 ||
>> + BufferCacheHibernationData[id].num_records == 0)
>> + {
>> + elog(WARNING,
>> + "ResisterBufferCacheHibernation() was not called for %s",
>> + BufferCacheHibernationData[id].hibernation_file);
>> + goto cleanup;
>> + }
>> +
>> + fd = BasicOpenFile(BufferCacheHibernationData[id].hibernation_file,
>> + O_CREAT | O_WRONLY | O_TRUNC | PG_BINARY, S_IRUSR | S_IWUSR);
>> + if (fd < 0)
>> + {
>> + elog(WARNING,
>> + "could not open %s",
>> + BufferCacheHibernationData[id].hibernation_file);
>> + goto cleanup;
>> + }
>> +
>> + record_length = BufferCacheHibernationData[id].record_length;
>> + num_records = BufferCacheHibernationData[id].num_records;
>> +
>> + elog(NOTICE,
>> + "buffer cache hibernate into %s",
>> + BufferCacheHibernationData[id].hibernation_file);
>> +
>> + INIT_CRC32(crc);
>> + for (i = 0; i < num_records; i++)
>> + {
>> + ptr = BufferCacheHibernationData[id].data_ptr + (i * record_length);
>> + if (write(fd, (void *)ptr, record_length) != record_length)
>> + {
>> + elog(WARNING,
>> + "could not write %s",
>> + BufferCacheHibernationData[id].hibernation_file);
>> + goto cleanup;
>> + }
>> +
>> + COMP_CRC32(crc, ptr, record_length);
>> + }
>> +
>> + FIN_CRC32(crc);
>> + close(fd);
>> +
>> + BufferCacheHibernationData[id].crc = crc;
>> + }
>> +
>> + /*
>> + * save the computed crc values for the validations at resuming.
>> + */
>> + fd = BasicOpenFile(BUFFER_CACHE_HIBERNATION_FILE_CRC32,
>> + O_CREAT | O_WRONLY | O_TRUNC | PG_BINARY, S_IRUSR | S_IWUSR);
>> + if (fd < 0)
>> + {
>> + elog(WARNING,
>> + "could not open %s",
>> + BUFFER_CACHE_HIBERNATION_FILE_CRC32);
>> + goto cleanup;
>> + }
>> +
>> + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
>> + {
>> + pg_crc32 crc;
>> +
>> + if (BufferCacheHibernationLevel < 2 &&
>> + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
>> + {
>> + continue;
>> + }
>> +
>> + crc = BufferCacheHibernationData[id].crc;
>> + if (write(fd, (void *)&crc, sizeof(pg_crc32)) != sizeof(pg_crc32))
>> + {
>> + elog(WARNING,
>> + "could not write %s for %s",
>> + BUFFER_CACHE_HIBERNATION_FILE_CRC32,
>> + BufferCacheHibernationData[id].hibernation_file);
>> + goto cleanup;
>> + }
>> + }
>> + close(fd);
>> +
>> + elog(NOTICE,
>> + "buffer cache suspended successfully");
>> +
>> + return;
>> +
>> +cleanup:
>> + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
>> + {
>> + unlink(BufferCacheHibernationData[id].hibernation_file);
>> + }
>> +
>> + return;
>> +}
>> +
>> +/*
>> + * ResisterBufferCacheHibernation:
>> + * register the buffer cache data structure info.
>> + */
>> +void
>> +ResisterBufferCacheHibernation(BufferHibernationFileType id, char *ptr, Size record_length, Size num_records)
>> +{
>> + static bool first_time = true;
>> +
>> + if (BufferCacheHibernationLevel == 0)
>> + {
>> + return;
>> + }
>> +
>> + if (id != BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY &&
>> + id != BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS &&
>> + id != BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
>> + {
>> + return;
>> + }
>> +
>> + if (first_time)
>> + {
>> + /*
>> + * AtProcExit_BufferCacheHibernation to be called at shutdown.
>> + */
>> + on_shmem_exit(AtProcExit_BufferCacheHibernation, 0);
>> + first_time = false;
>> + }
>> +
>> + /*
>> + * get the control file to check the system state and
>> + * hibernation file validations.
>> + */
>> + if (controlFileInitialized == false)
>> + {
>> + if (GetControlFile(&controlFile) == true)
>> + {
>> + controlFileInitialized = true;
>> + }
>> + }
>> +
>> + BufferCacheHibernationData[id].data_ptr = ptr;
>> + BufferCacheHibernationData[id].record_length = record_length;
>> + BufferCacheHibernationData[id].num_records = num_records;
>> +}
>> +
>> +/*
>> + * ResumeBufferCacheHibernation:
>> + * resume the buffer cache from hibernation file at startup.
>> + */
>> +void
>> +ResumeBufferCacheHibernation(void)
>> +{
>> + BufferHibernationFileType id;
>> + int i;
>> + int fd;
>> + Size num_records;
>> + Size record_length;
>> + char *buf_common;
>> + int oldNBuffers;
>> + bool buffer_block_processed;
>> +
>> + if (BufferCacheHibernationLevel == 0)
>> + {
>> + return;
>> + }
>> +
>> + buf_common = NULL;
>> + buffer_block_processed = false;
>> +
>> + /*
>> + * lock all buffer descriptors to prevent other processes from
>> + * updating buffers.
>> + */
>> + for (i = 0; i < NBuffers; i++)
>> + {
>> + BufferDesc *buf;
>> +
>> + buf = &BufferDescriptors[i];
>> + LockBufHdr(buf);
>> + }
>> +
>> + /*
>> + * get the control file to check the system state and
>> + * hibernation file validations.
>> + */
>> + if (controlFileInitialized == false)
>> + {
>> + elog(WARNING,
>> + "could not get control file, "
>> + "aborting buffer cache hibernation");
>> + goto cleanup;
>> + }
>> +
>> + if (controlFile.state != DB_SHUTDOWNED)
>> + {
>> + elog(WARNING,
>> + "database system was not shut down normally, "
>> + "aborting buffer cache hibernation");
>> + goto cleanup;
>> + }
>> +
>> + /*
>> + * read the crc values which was computed when the hibernation
>> + * files were created.
>> + */
>> + fd = BasicOpenFile(BUFFER_CACHE_HIBERNATION_FILE_CRC32,
>> + O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR);
>> + if (fd < 0)
>> + {
>> + elog(WARNING,
>> + "could not open %s",
>> + BUFFER_CACHE_HIBERNATION_FILE_CRC32);
>> + goto cleanup;
>> + }
>> +
>> + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
>> + {
>> + pg_crc32 crc;
>> +
>> + if (BufferCacheHibernationLevel < 2 &&
>> + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
>> + {
>> + continue;
>> + }
>> +
>> + if (read(fd, (void *)&crc, sizeof(pg_crc32)) != sizeof(pg_crc32))
>> + {
>> + if (BufferCacheHibernationLevel == 2 &&
>> + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
>> + {
>> + /*
>> + * if buffer_cache_hibernation_level changes 1 to 2,
>> + * the crc value of buffer block hibernation file may not exist.
>> + * just ignore it here.
>> + */
>> + continue;
>> + }
>> +
>> + elog(WARNING,
>> + "could not read %s for %s",
>> + BUFFER_CACHE_HIBERNATION_FILE_CRC32,
>> + BufferCacheHibernationData[id].hibernation_file);
>> + close(fd);
>> + goto cleanup;
>> + }
>> + BufferCacheHibernationData[id].crc = crc;
>> + }
>> +
>> + close(fd);
>> +
>> + /*
>> + * allocate a buffer to read the contents of the hibernation files
>> + * for validations.
>> + */
>> + record_length = 0;
>> + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
>> + {
>> + if (record_length < BufferCacheHibernationData[id].record_length)
>> + {
>> + record_length = BufferCacheHibernationData[id].record_length;
>> + }
>> + }
>> +
>> + buf_common = malloc(record_length);
>> + Assert(buf_common != NULL);
>> +
>> + /* assume that the number of buffers have not changed. */
>> + oldNBuffers = NBuffers;
>> +
>> + /*
>> + * check if all hibernation files are valid.
>> + */
>> + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
>> + {
>> + struct stat sb;
>> + pg_crc32 crc;
>> +
>> + if (BufferCacheHibernationLevel < 2 &&
>> + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
>> + {
>> + continue;
>> + }
>> +
>> + if (BufferCacheHibernationData[id].data_ptr == NULL ||
>> + BufferCacheHibernationData[id].record_length == 0 ||
>> + BufferCacheHibernationData[id].num_records == 0)
>> + {
>> + elog(WARNING,
>> + "ResisterBufferCacheHibernation() was not called for %s",
>> + BufferCacheHibernationData[id].hibernation_file);
>> + goto cleanup;
>> + }
>> +
>> + fd = BasicOpenFile(BufferCacheHibernationData[id].hibernation_file,
>> + O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR);
>> + if (fd < 0)
>> + {
>> + if (BufferCacheHibernationLevel == 2 &&
>> + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
>> + {
>> + /*
>> + * if buffer_cache_hibernation_level changes 1 to 2,
>> + * the buffer block hibernation file may not exist.
>> + * just ignore it here.
>> + */
>> + continue;
>> + }
>> +
>> + goto cleanup;
>> + }
>> +
>> + if (fstat(fd, &sb) < 0)
>> + {
>> + elog(WARNING,
>> + "could not get stats of the buffer cache hibernation file: %s",
>> + BufferCacheHibernationData[id].hibernation_file);
>> + close(fd);
>> + goto cleanup;
>> + }
>> +
>> + record_length = BufferCacheHibernationData[id].record_length;
>> + num_records = BufferCacheHibernationData[id].num_records;
>> +
>> + if (sb.st_size != (record_length * num_records))
>> + {
>> + /* The size of StrategyControl should be the same always. */
>> + if (id == BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY ||
>> + (sb.st_size % record_length) > 0)
>> + {
>> + elog(WARNING,
>> + "size mismatch on the buffer cache hibernation file: %s",
>> + BufferCacheHibernationData[id].hibernation_file);
>> + close(fd);
>> + goto cleanup;
>> + }
>> +
>> + /*
>> + * The number of records of buffer descriptors and blocks
>> + * should be the same.
>> + */
>> + if (oldNBuffers != NBuffers &&
>> + oldNBuffers != (sb.st_size / record_length))
>> + {
>> + elog(WARNING,
>> + "size mismatch on the buffer cache hibernation file: %s",
>> + BufferCacheHibernationData[id].hibernation_file);
>> + close(fd);
>> + goto cleanup;
>> + }
>> +
>> + oldNBuffers = sb.st_size / record_length;
>> +
>> + elog(NOTICE,
>> + "shared_buffers have changed from %d to %d: %s",
>> + oldNBuffers, NBuffers,
>> + BufferCacheHibernationData[id].hibernation_file);
>> +
>> + /* use the original size to compute CRC of the hibernation file. */
>> + num_records = oldNBuffers;
>> + }
>> +
>> + if ((pg_time_t)sb.st_mtime < controlFile.time)
>> + {
>> + elog(WARNING,
>> + "the hibernation file is older than control file: %s",
>> + BufferCacheHibernationData[id].hibernation_file);
>> + close(fd);
>> + goto cleanup;
>> + }
>> +
>> + INIT_CRC32(crc);
>> + for (i = 0; i < num_records; i++)
>> + {
>> + if (read(fd, (void *)buf_common, record_length) != record_length)
>> + {
>> + elog(WARNING,
>> + "could not read the buffer cache hibernation file: %s",
>> + BufferCacheHibernationData[id].hibernation_file);
>> + close(fd);
>> + goto cleanup;
>> + }
>> +
>> + COMP_CRC32(crc, buf_common, record_length);
>> +
>> + /*
>> + * buffer descriptors validations.
>> + */
>> + if (id == BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS)
>> + {
>> + BufferDesc *buf;
>> + BufFlags abnormal_flags;
>> +
>> + if (i >= NBuffers)
>> + {
>> + continue;
>> + }
>> +
>> + abnormal_flags = (BM_DIRTY | BM_IO_IN_PROGRESS | BM_IO_ERROR |
>> + BM_JUST_DIRTIED | BM_PIN_COUNT_WAITER);
>> +
>> + buf = (BufferDesc *)buf_common;
>> +
>> + if (buf->flags & abnormal_flags)
>> + {
>> + elog(WARNING,
>> + "abnormal flags in buffer descriptors: %d",
>> + buf->flags);
>> + close(fd);
>> + goto cleanup;
>> + }
>> +
>> + if (buf->usage_count > BM_MAX_USAGE_COUNT)
>> + {
>> + elog(WARNING,
>> + "invalid usage count in buffer descriptors: %d",
>> + buf->usage_count);
>> + close(fd);
>> + goto cleanup;
>> + }
>> +
>> + if (buf->buf_id < 0 || buf->buf_id >= num_records)
>> + {
>> + elog(WARNING,
>> + "invalid buffer id in buffer descriptors: %d",
>> + buf->buf_id);
>> + close(fd);
>> + goto cleanup;
>> + }
>> + }
>> + }
>> +
>> + FIN_CRC32(crc);
>> + close(fd);
>> +
>> + if (!EQ_CRC32(BufferCacheHibernationData[id].crc, crc))
>> + {
>> + elog(WARNING,
>> + "crc mismatch on the buffer cache hibernation file: %s",
>> + BufferCacheHibernationData[id].hibernation_file);
>> + close(fd);
>> + goto cleanup;
>> + }
>> + }
>> +
>> + /*
>> + * resume the buffer cache data structure from the hibernation files.
>> + */
>> + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
>> + {
>> + int fd;
>> + char *ptr;
>> +
>> + if (BufferCacheHibernationLevel < 2 &&
>> + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
>> + {
>> + continue;
>> + }
>> +
>> + record_length = BufferCacheHibernationData[id].record_length;
>> + num_records = BufferCacheHibernationData[id].num_records;
>> +
>> + if (id != BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY)
>> + {
>> + /* use the smaller number of buffers. */
>> + num_records = (oldNBuffers < NBuffers)? oldNBuffers : NBuffers;
>> + }
>> +
>> + fd = BasicOpenFile(BufferCacheHibernationData[id].hibernation_file,
>> + O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR);
>> + if (fd < 0)
>> + {
>> + if (BufferCacheHibernationLevel == 2 &&
>> + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
>> + {
>> + /*
>> + * if buffer_cache_hibernation_level changes 1 to 2,
>> + * the buffer block hibernation file may not exist.
>> + * just ignore it here.
>> + */
>> + continue;
>> + }
>> +
>> + goto cleanup;
>> + }
>> +
>> + elog(NOTICE,
>> + "buffer cache resume from %s(%d bytes * %d records)",
>> + BufferCacheHibernationData[id].hibernation_file,
>> + record_length, num_records);
>> +
>> + for (i = 0; i < num_records; i++)
>> + {
>> + ptr = BufferCacheHibernationData[id].data_ptr + (i * record_length);
>> + read(fd, (void *)ptr, record_length);
>> +
>> + /* Re-lock the buffer descriptor if necessary. */
>> + if (id == BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS)
>> + {
>> + BufferDesc *buf;
>> +
>> + buf = (BufferDesc *)ptr;
>> + if (IsUnlockBufHdr(buf))
>> + {
>> + LockBufHdr(buf);
>> + }
>> + }
>> + }
>> +
>> + close(fd);
>> +
>> + if (id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
>> + {
>> + buffer_block_processed = true;
>> + }
>> + }
>> +
>> + if (buffer_block_processed == false)
>> + {
>> + /* we didn't use the buffer block hibernation file, so delete it now. */
>> + id = BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS;
>> + unlink(BufferCacheHibernationData[id].hibernation_file);
>> + }
>> +
>> + /*
>> + * set the rest data structures (eg. lookup hashtable) up
>> + * based on the buffer descriptors.
>> + */
>> + num_records = (oldNBuffers < NBuffers)? oldNBuffers : NBuffers;
>> + for (i = 0; i < num_records; i++)
>> + {
>> + BufferDesc *buf;
>> + BufferTag newTag;
>> + uint32 newHash;
>> + int buf_id;
>> +
>> + buf = &BufferDescriptors[i];
>> + if (buf->tag.rnode.spcNode == InvalidOid &&
>> + buf->tag.rnode.dbNode == InvalidOid &&
>> + buf->tag.rnode.relNode == InvalidOid)
>> + {
>> + continue;
>> + }
>> +
>> + INIT_BUFFERTAG(newTag, buf->tag.rnode, buf->tag.forkNum, buf->tag.blockNum);
>> + newHash = BufTableHashCode(&newTag);
>> +
>> + if (buffer_block_processed == false)
>> + {
>> + Block bufBlock;
>> + SMgrRelation smgr;
>> +
>> + /*
>> + * re-read buffer block.
>> + */
>> + bufBlock = BufHdrGetBlock(buf);
>> + smgr = smgropen(buf->tag.rnode, InvalidBackendId);
>> + smgrread(smgr, newTag.forkNum, newTag.blockNum, (char *) bufBlock);
>> + }
>> +
>> + buf_id = BufTableInsert(&newTag, newHash, buf->buf_id);
>> + if (buf_id != -1)
>> + {
>> + /* the entry exists already, return it to the freelist. */
>> + buf->refcount = 0;
>> + buf->flags = 0;
>> + InvalidateBuffer(buf);
>> + continue;
>> + }
>> +
>> + /* clear wait_backend_pid because the process was terminated already. */
>> + buf->wait_backend_pid = 0;
>> +
>> +#ifdef DEBUG_BUFFER_CACHE_HIBERNATION
>> + elog(DEBUG5,
>> + "resume [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d",
>> + buf->buf_id, buf->flags, buf->usage_count, buf->refcount,
>> + buf->wait_backend_pid, buf->freeNext,
>> + newHash, newTag.rnode.spcNode,
>> + newTag.rnode.dbNode, newTag.rnode.relNode,
>> + newTag.forkNum, newTag.blockNum);
>> +#endif
>> + }
>> +
>> + /*
>> + * adjust StrategyControl based on the change of shared_buffers.
>> + */
>> + if (oldNBuffers != NBuffers)
>> + {
>> + AdjustStrategyControl(oldNBuffers);
>> + }
>> +
>> + elog(NOTICE,
>> + "buffer cache resumed successfully");
>> +
>> +cleanup:
>> + for (i = 0; i < NBuffers; i++)
>> + {
>> + BufferDesc *buf;
>> +
>> + buf = &BufferDescriptors[i];
>> + UnlockBufHdr(buf);
>> + }
>> +
>> + if (buf_common != NULL)
>> + {
>> + free(buf_common);
>> + }
>> +
>> + return;
>> +}
>> diff --git src/backend/storage/buffer/freelist.c src/backend/storage/buffer/freelist.c
>> index bf9903b..ffc101d 100644
>> --- src/backend/storage/buffer/freelist.c
>> +++ src/backend/storage/buffer/freelist.c
>> @@ -347,6 +347,12 @@ StrategyInitialize(bool init)
>> }
>> else
>> Assert(!init);
>> +
>> + if (BufferCacheHibernationLevel > 0)
>> + {
>> + ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY,
>> + (char *)StrategyControl, sizeof(BufferStrategyControl), 1);
>> + }
>> }
>>
>>
>> @@ -521,3 +527,47 @@ StrategyRejectBuffer(BufferAccessStrategy strategy, volatile BufferDesc *buf)
>>
>> return true;
>> }
>> +
>> +/*
>> + * AdjustStrategyControl -- adjust the member variables of StrategyControl
>> + *
>> + * If the shared_buffers setting had changed, restored StrategyControl
>> + * needs to be adjusted for in both cases of shrinking and enlarging.
>> + * This is called only from bufmgr.c:ResumeBufferCacheHibernation().
>> + */
>> +void
>> +AdjustStrategyControl(int oldNBuffers)
>> +{
>> + if (oldNBuffers == NBuffers)
>> + {
>> + return;
>> + }
>> +
>> + /* enlarge or shrink the free buffer based on current NBuffers. */
>> + StrategyControl->lastFreeBuffer = NBuffers - 1;
>> +
>> + /* shared_buffers shrunk. */
>> + if (oldNBuffers > NBuffers)
>> + {
>> + if (StrategyControl->nextVictimBuffer >= NBuffers)
>> + {
>> + /* set the tail of buffers. */
>> + StrategyControl->nextVictimBuffer = NBuffers - 1;
>> + }
>> +
>> + if (StrategyControl->firstFreeBuffer >= NBuffers)
>> + {
>> + /* set FREENEXT_END_OF_LIST(-1). */
>> + StrategyControl->firstFreeBuffer = FREENEXT_END_OF_LIST;
>> + }
>> + }
>> + else
>> + /* shared_buffers enlarged. */
>> + {
>> + if (StrategyControl->firstFreeBuffer < 0)
>> + {
>> + /* set the next entry of the tail of old buffers. */
>> + StrategyControl->firstFreeBuffer = oldNBuffers;
>> + }
>> + }
>> +}
>> diff --git src/backend/utils/misc/guc.c src/backend/utils/misc/guc.c
>> index 738e215..5affc6e 100644
>> --- src/backend/utils/misc/guc.c
>> +++ src/backend/utils/misc/guc.c
>> @@ -2361,6 +2361,18 @@ static struct config_int ConfigureNamesInt[] =
>> NULL, NULL, NULL
>> },
>>
>> + {
>> + {"buffer_cache_hibernation_level", PGC_POSTMASTER, UNGROUPED,
>> + gettext_noop("Sets buffer cache hibernation level."),
>> + gettext_noop("0 to disable(default), "
>> + "1 for saving buffer descriptors only(recommended), "
>> + "2 for saving buffer descriptors and buffer blocks(slower at shutdown).")
>> + },
>> + &BufferCacheHibernationLevel,
>> + 0, 0, 2,
>> + NULL, NULL, NULL
>> + },
>> +
>> /* End-of-list marker */
>> {
>> {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
>> diff --git src/backend/utils/misc/postgresql.conf.sample src/backend/utils/misc/postgresql.conf.sample
>> index b8a1582..44b6ff3 100644
>> --- src/backend/utils/misc/postgresql.conf.sample
>> +++ src/backend/utils/misc/postgresql.conf.sample
>> @@ -119,6 +119,17 @@
>> #maintenance_work_mem = 16MB # min 1MB
>> #max_stack_depth = 2MB # min 100kB
>>
>> +
>> +# Buffer Cache Hibernation:
>> +# Suspend/resume buffer cache data structure using hibernation files
>> +# at shutdown/startup.
>> +#buffer_cache_hibernation_level = 0 # Sets buffer cache hibernation level.
>> + # 0 to disable(default),
>> + # 1 for saving buffer descriptors only
>> + # (recommended),
>> + # 2 for saving buffer descriptors and
>> + # buffer blocks(slower at shutdown).
>> +
>> # - Kernel Resource Usage -
>>
>> #max_files_per_process = 1000 # min 25
>> diff --git src/include/access/xlog.h src/include/access/xlog.h
>> index 7056fd6..7a9fb99 100644
>> --- src/include/access/xlog.h
>> +++ src/include/access/xlog.h
>> @@ -13,6 +13,7 @@
>>
>> #include "access/rmgr.h"
>> #include "access/xlogdefs.h"
>> +#include "catalog/pg_control.h"
>> #include "lib/stringinfo.h"
>> #include "storage/buf.h"
>> #include "utils/pg_crc.h"
>> @@ -294,6 +295,7 @@ extern bool XLogInsertAllowed(void);
>> extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream);
>> extern XLogRecPtr GetXLogReplayRecPtr(void);
>>
>> +extern bool GetControlFile(ControlFileData *controlFile);
>> extern void UpdateControlFile(void);
>> extern uint64 GetSystemIdentifier(void);
>> extern Size XLOGShmemSize(void);
>> diff --git src/include/storage/buf_internals.h src/include/storage/buf_internals.h
>> index b7d4ea5..d537ef1 100644
>> --- src/include/storage/buf_internals.h
>> +++ src/include/storage/buf_internals.h
>> @@ -167,6 +167,7 @@ typedef struct sbufdesc
>> */
>> #define LockBufHdr(bufHdr) SpinLockAcquire(&(bufHdr)->buf_hdr_lock)
>> #define UnlockBufHdr(bufHdr) SpinLockRelease(&(bufHdr)->buf_hdr_lock)
>> +#define IsUnlockBufHdr(bufHdr) SpinLockFree(&(bufHdr)->buf_hdr_lock)
>>
>>
>> /* in buf_init.c */
>> @@ -190,6 +191,7 @@ extern bool StrategyRejectBuffer(BufferAccessStrategy strategy,
>> extern int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc);
>> extern Size StrategyShmemSize(void);
>> extern void StrategyInitialize(bool init);
>> +extern void AdjustStrategyControl(int oldNBuffers);
>>
>> /* buf_table.c */
>> extern Size BufTableShmemSize(int size);
>> diff --git src/include/storage/bufmgr.h src/include/storage/bufmgr.h
>> index b8fc87e..ddfeb9d 100644
>> --- src/include/storage/bufmgr.h
>> +++ src/include/storage/bufmgr.h
>> @@ -211,6 +211,20 @@ extern void BgBufferSync(void);
>>
>> extern void AtProcExit_LocalBuffers(void);
>>
>> +/* buffer cache hibernation support stuff */
>> +extern int BufferCacheHibernationLevel;
>> +
>> +typedef enum BufferHibernationFileType
>> +{
>> + BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY,
>> + BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS,
>> + BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS
>> +} BufferHibernationFileType;
>> +
>> +extern void ResisterBufferCacheHibernation(BufferHibernationFileType id,
>> + char *ptr, Size record_length, Size num_records);
>> +extern void ResumeBufferCacheHibernation(void);
>> +
>> /* in freelist.c */
>> extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype);
>> extern void FreeAccessStrategy(BufferAccessStrategy strategy);
>>
>> --
>> Sent via pgsql-hackers mailing list (pgsql-hackers(at)postgresql(dot)org)
>> To make changes to your subscription:
>> http://www.postgresql.org/mailpref/pgsql-hackers
>
> --
> Bruce Momjian <bruce(at)momjian(dot)us> http://momjian.us
> EnterpriseDB http://enterprisedb.com
>
> + It's impossible for everything to be true. +
>
> --
> Sent via pgsql-hackers mailing list (pgsql-hackers(at)postgresql(dot)org)
> To make changes to your subscription:
> http://www.postgresql.org/mailpref/pgsql-hackers
>
--
Cédric Villemain +33 (0)6 20 30 22 52
http://2ndQuadrant.fr/
PostgreSQL: Support 24x7 - Développement, Expertise et Formation
From | Date | Subject | |
---|---|---|---|
Next Message | Willy-Bas Loos | 2011-10-14 10:20:12 | [HACKERS] register creation date of table |
Previous Message | Devrim GÜNDÜZ | 2011-10-14 06:57:13 | Re: Will Index-only-scan be in 9.2 |