From 987611777fdae13a2fc75faa3015d8adabcafb85 Mon Sep 17 00:00:00 2001 From: David Christensen Date: Thu, 20 Oct 2022 17:05:33 -0400 Subject: [PATCH 4/6] Add page_checksums32 page feature This is an example page feature which utilizes 2 bytes of the reserved page space and the existing 2-byte pd_checksum to store the total 32-bit page checksum that we currently calculate and throw half away. It also serves as an illustration of writing/using a page feature. --- src/backend/access/transam/xlog.c | 4 +- src/backend/backup/basebackup.c | 28 +++++++++--- src/backend/storage/page/bufpage.c | 53 +++++++++++++++++++---- src/backend/utils/misc/guc_tables.c | 11 +++++ src/bin/initdb/initdb.c | 18 ++++++-- src/bin/pg_controldata/pg_controldata.c | 3 ++ src/common/pagefeat.c | 14 +++++- src/include/common/pagefeat.h | 5 ++- src/include/storage/checksum.h | 1 + src/include/storage/checksum_impl.h | 57 +++++++++++++++++++++++++ 10 files changed, 172 insertions(+), 22 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 21a134a663..a1379efd49 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -4233,7 +4233,9 @@ bool DataChecksumsEnabled(void) { Assert(ControlFile != NULL); - return (ControlFile->data_checksum_version > 0); + return (ControlFile->data_checksum_version > 0) || \ + PageFeatureSetHasFeature(ControlFile->page_features, PF_PAGE_CHECKSUMS32); + } /* diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c index 74fb529380..90cd97b938 100644 --- a/src/backend/backup/basebackup.c +++ b/src/backend/backup/basebackup.c @@ -25,6 +25,7 @@ #include "commands/defrem.h" #include "common/compression.h" #include "common/file_perm.h" +#include "common/pagefeat.h" #include "lib/stringinfo.h" #include "miscadmin.h" #include "nodes/pg_list.h" @@ -1492,7 +1493,7 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename, int fd; BlockNumber blkno = 0; bool block_retry = false; - uint16 checksum; + uint32 checksum, page_checksum; int checksum_failures = 0; off_t cnt; int i; @@ -1608,9 +1609,24 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename, */ if (!PageIsNew(page) && PageGetLSN(page) < sink->bbs_state->startptr) { - checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE); - phdr = (PageHeader) page; - if (phdr->pd_checksum != checksum) + char *extended_checksum_loc = NULL; + + /* are we using extended checksums? */ + if ((extended_checksum_loc = ClusterPageFeatureOffset(page, PF_PAGE_CHECKSUMS32))) + { + page_checksum = *(uint16*)extended_checksum_loc; + page_checksum <<= 16; + page_checksum += ((PageHeader)page)->pd_checksum; + checksum = (uint32)pg_checksum32_page(page, blkno + segmentno * RELSEG_SIZE, extended_checksum_loc); + } + else + { + phdr = (PageHeader) page; + page_checksum = (uint32)phdr->pd_checksum; + checksum = pg_checksum_page(page, blkno + segmentno * RELSEG_SIZE); + } + + if (page_checksum != checksum) { /* * Retry the block on the first failure. It's @@ -1661,9 +1677,9 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename, ereport(WARNING, (errmsg("checksum verification failed in " "file \"%s\", block %u: calculated " - "%X but expected %X", + "%lu but expected %lu", readfilename, blkno, checksum, - phdr->pd_checksum))); + page_checksum))); if (checksum_failures == 5) ereport(WARNING, (errmsg("further checksum verification " diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index 74a7bdce33..b50b4e76a0 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -93,8 +93,9 @@ PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags) bool checksum_failure = false; bool header_sane = false; bool all_zeroes = false; - uint16 checksum = 0; - + uint32 checksum = 0; + uint32 page_checksum = 0; + char *extended_checksum_loc = NULL; /* * Don't verify page data unless the page passes basic non-zero test */ @@ -102,9 +103,22 @@ PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags) { if (DataChecksumsEnabled()) { - checksum = pg_checksum_page((char *) page, blkno); - - if (checksum != p->pd_checksum) + /* are we using extended checksums? */ + if ((extended_checksum_loc = ClusterPageFeatureOffset(page, PF_PAGE_CHECKSUMS32))) + { + /* high bits of the existing checksum are stored as a uint16 at extended_checksum_loc, low bits in pd_checksum */ + page_checksum = *((uint16*)extended_checksum_loc); + page_checksum <<= 16; + page_checksum += p->pd_checksum; + checksum = (uint32)pg_checksum32_page(page, blkno, extended_checksum_loc); + } + else + { + /* traditional checksums in the pd_checksum field */ + page_checksum = (uint32)p->pd_checksum; + checksum = (uint32)pg_checksum_page((char *) page, blkno); + } + if (checksum != page_checksum) checksum_failure = true; } @@ -150,7 +164,7 @@ PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags) ereport(WARNING, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("page verification failed, calculated checksum %u but expected %u", - checksum, p->pd_checksum))); + checksum, page_checksum))); if ((flags & PIV_REPORT_STAT) != 0) pgstat_report_checksum_failure(); @@ -1510,6 +1524,7 @@ char * PageSetChecksumCopy(Page page, BlockNumber blkno) { static char *pageCopy = NULL; + char *extended_checksum_loc = NULL; /* If we don't need a checksum, just return the passed-in data */ if (PageIsNew(page) || !DataChecksumsEnabled()) @@ -1525,7 +1540,18 @@ PageSetChecksumCopy(Page page, BlockNumber blkno) pageCopy = MemoryContextAlloc(TopMemoryContext, BLCKSZ); memcpy(pageCopy, (char *) page, BLCKSZ); - ((PageHeader) pageCopy)->pd_checksum = pg_checksum_page(pageCopy, blkno); + + if ((extended_checksum_loc = ClusterPageFeatureOffset(pageCopy, PF_PAGE_CHECKSUMS32))) + { + /* 32-bit checksums split storage between pd_checksum and page feature offset */ + uint32 checksum = pg_checksum32_page((char*)pageCopy, blkno, extended_checksum_loc); + + *(uint16*)extended_checksum_loc = (uint16)(checksum >> 16); + ((PageHeader) pageCopy)->pd_checksum = (uint16)(checksum & 0xFFFF); + + } + else + ((PageHeader) pageCopy)->pd_checksum = pg_checksum_page(pageCopy, blkno); return pageCopy; } @@ -1538,9 +1564,20 @@ PageSetChecksumCopy(Page page, BlockNumber blkno) void PageSetChecksumInplace(Page page, BlockNumber blkno) { + char *extended_checksum_loc = NULL; + /* If we don't need a checksum, just return */ if (PageIsNew(page) || !DataChecksumsEnabled()) return; - ((PageHeader) page)->pd_checksum = pg_checksum_page((char *) page, blkno); + /* are we using extended checksums? */ + if ((extended_checksum_loc = ClusterPageFeatureOffset(page, PF_PAGE_CHECKSUMS32))) + { + /* 32-bit checksums split storage between pd_checksum and page feature offset */ + uint32 checksum = pg_checksum32_page((char*)page, blkno, extended_checksum_loc); + *(uint16*)extended_checksum_loc = (uint16)(checksum >> 16); + ((PageHeader) page)->pd_checksum = (uint16)(checksum & 0xFFFF); + } + else + ((PageHeader) page)->pd_checksum = pg_checksum_page(page, blkno); } diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index a2d10d149d..c2fc61f81e 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -1798,6 +1798,17 @@ struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"page_checksums32", PGC_INTERNAL, PRESET_OPTIONS, + gettext_noop("Shows whether 32-bit extended checksums are turned on for this cluster."), + NULL, + GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE | GUC_RUNTIME_COMPUTED + }, + &page_feature_page_checksums32, + false, + NULL, NULL, NULL + }, + { {"syslog_sequence_numbers", PGC_SIGHUP, LOGGING_WHERE, gettext_noop("Add sequence number to syslog messages to avoid duplicate suppression."), diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 40561d5d61..43311f0b79 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -150,6 +150,7 @@ static bool do_sync = true; static bool sync_only = false; static bool show_setting = false; static bool data_checksums = false; +static bool page_checksums32 = false; static char *xlog_dir = NULL; static char *str_wal_segment_size_mb = NULL; static int wal_segment_size_mb; @@ -1322,10 +1323,11 @@ bootstrap_template1(void) unsetenv("PGCLIENTENCODING"); snprintf(cmd, sizeof(cmd), - "\"%s\" --boot -X %d %s %s %s %s", + "\"%s\" --boot -X %d %s %s %s %s %s", backend_exec, wal_segment_size_mb * (1024 * 1024), data_checksums ? "-k" : "", + page_checksums32 ? "-e page_checksums32" : "", boot_options, extra_options, debug ? "-d 5" : ""); @@ -2148,6 +2150,7 @@ usage(const char *progname) printf(_(" -g, --allow-group-access allow group read/execute on data directory\n")); printf(_(" --icu-locale=LOCALE set ICU locale ID for new databases\n")); printf(_(" -k, --data-checksums use data page checksums\n")); + printf(_(" -K, --extended-checksums use extended data page checksums\n")); printf(_(" --locale=LOCALE set default locale for new databases\n")); printf(_(" --lc-collate=, --lc-ctype=, --lc-messages=LOCALE\n" " --lc-monetary=, --lc-numeric=, --lc-time=LOCALE\n" @@ -2803,6 +2806,7 @@ main(int argc, char *argv[]) {"waldir", required_argument, NULL, 'X'}, {"wal-segsize", required_argument, NULL, 12}, {"data-checksums", no_argument, NULL, 'k'}, + {"extended-checksums", no_argument, NULL, 'K'}, {"allow-group-access", no_argument, NULL, 'g'}, {"discard-caches", no_argument, NULL, 14}, {"locale-provider", required_argument, NULL, 15}, @@ -2848,7 +2852,7 @@ main(int argc, char *argv[]) /* process command-line options */ - while ((c = getopt_long(argc, argv, "A:dD:E:gkL:nNsST:U:WX:", long_options, &option_index)) != -1) + while ((c = getopt_long(argc, argv, "A:dD:E:gkKL:nNsST:U:WX:", long_options, &option_index)) != -1) { switch (c) { @@ -2900,6 +2904,9 @@ main(int argc, char *argv[]) case 'k': data_checksums = true; break; + case 'K': + page_checksums32 = true; + break; case 'L': share_path = pg_strdup(optarg); break; @@ -3015,6 +3022,9 @@ main(int argc, char *argv[]) if (pwprompt && pwfilename) pg_fatal("password prompt and password file cannot be specified together"); + if (data_checksums && page_checksums32) + pg_fatal("data checksums and extended data checksums cannot be specified together"); + check_authmethod_unspecified(&authmethodlocal); check_authmethod_unspecified(&authmethodhost); @@ -3068,7 +3078,9 @@ main(int argc, char *argv[]) printf("\n"); - if (data_checksums) + if (page_checksums32) + printf(_("Extended data page checksums are enabled.\n")); + else if (data_checksums) printf(_("Data page checksums are enabled.\n")); else printf(_("Data page checksums are disabled.\n")); diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index 22e6122458..56eb8d88a2 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -331,5 +331,8 @@ main(int argc, char *argv[]) mock_auth_nonce_str); printf(_("Reserved page size for features: %d\n"), CalculateReservedPageSize(ControlFile->page_features)); + printf(_("Using extended checksums: %s\n"), + PageFeatureSetHasFeature(ControlFile->page_features, PF_PAGE_CHECKSUMS32) \ + ? _("yes") : _("no")); return 0; } diff --git a/src/common/pagefeat.c b/src/common/pagefeat.c index 75f589714b..a3e3e6686e 100644 --- a/src/common/pagefeat.c +++ b/src/common/pagefeat.c @@ -16,10 +16,13 @@ #include "common/pagefeat.h" #include "utils/guc.h" -/* global variable to store the reserved_page_size */ +/* global variables */ int reserved_page_size; PageFeatureSet cluster_page_features; +/* status GUCs, display only. set by XLog startup */ +bool page_feature_page_checksums32; + /* * A "page feature" is an optional cluster-defined additional data field that * is stored in the "reserved_page_size" area in the footer of a given Page. @@ -37,8 +40,15 @@ typedef struct PageFeatureDesc char *guc_name; } PageFeatureDesc; -/* these are the fixed widths for each feature type, indexed by feature */ +/* These are the fixed widths for each feature type, indexed by feature. This + * is also used to lookup page features by the bootstrap process and expose + * the state of this page feature as a readonly boolean GUC, so when adding a + * named feature here ensure you also update the guc_tables file to add this, + * or the attempt to set the GUC will fail. */ + static PageFeatureDesc feature_descs[PF_MAX_FEATURE] = { + /* PF_PAGE_CHECKSUMS32 */ + { 2, "page_checksums32" }, }; diff --git a/src/include/common/pagefeat.h b/src/include/common/pagefeat.h index 18e3249522..baff058de0 100644 --- a/src/include/common/pagefeat.h +++ b/src/include/common/pagefeat.h @@ -16,6 +16,7 @@ /* revealed for GUCs */ extern int reserved_page_size; +extern bool page_feature_page_checksums32; /* forward declaration to avoid circular includes */ typedef Pointer Page; @@ -27,8 +28,8 @@ extern PageFeatureSet cluster_page_features; /* bit offset for features flags */ typedef enum { - /* TODO: add features here */ - PF_MAX_FEATURE = 0 /* must be last */ + PF_PAGE_CHECKSUMS32 = 0, + PF_MAX_FEATURE /* must be last */ } PageFeature; /* prototypes */ diff --git a/src/include/storage/checksum.h b/src/include/storage/checksum.h index 1904fabd5a..d0bcc01ca6 100644 --- a/src/include/storage/checksum.h +++ b/src/include/storage/checksum.h @@ -20,5 +20,6 @@ * 4-byte boundary. */ extern uint16 pg_checksum_page(char *page, BlockNumber blkno); +extern uint32 pg_checksum32_page(char *page, BlockNumber blkno, char*offset); #endif /* CHECKSUM_H */ diff --git a/src/include/storage/checksum_impl.h b/src/include/storage/checksum_impl.h index d2eb75f769..87d9f96484 100644 --- a/src/include/storage/checksum_impl.h +++ b/src/include/storage/checksum_impl.h @@ -213,3 +213,60 @@ pg_checksum_page(char *page, BlockNumber blkno) */ return (uint16) ((checksum % 65535) + 1); } + + +/* + * Compute and return a 32-bit checksum for a Postgres page. + * + * Beware that the 16-bit portion of the page that cksum points to is + * transiently zeroed, as is the pd_checksums field. The storage location for + * this is determined by the PageFeatures in play for cluster, so we are + * storing the + * + * The checksum includes the block number (to detect the case where a page is + * somehow moved to a different location), the page header (excluding the + * checksum itself), and the page data. + * + * The high bits of this are stored in the overflow storage area of the page + * pointed to by *cksum, leaving the pd_checksum field with the same checksum + * you'd expect if running the pg_checksum_page function. + */ +uint32 +pg_checksum32_page(char *page, BlockNumber blkno, char *cksum) +{ + PGChecksummablePage *cpage = (PGChecksummablePage *) page; + uint16 save_pd,save_ext,*ptr; + uint32 checksum; + + /* We only calculate the checksum for properly-initialized pages */ + Assert(!PageIsNew((Page) page)); + /* Ensure that the cksum pointer is in the page range on this page */ + Assert(cksum >= page && cksum <= (page + BLCKSZ - sizeof(uint16))); + + ptr = (uint16*)cksum; + + /* + * Save the existing checksum locations and temporarily set it to zero, so + * that the checksum calculation isn't affected by the old checksum stored + * on the page. Restore it after, because actually updating the checksum + * is NOT part of the API of this function. + */ + + save_ext = *ptr; + save_pd = cpage->phdr.pd_checksum; + *ptr = 0; + cpage->phdr.pd_checksum = 0; + + checksum = pg_checksum_block(cpage); + + /* restore */ + *ptr = save_ext; + cpage->phdr.pd_checksum = save_pd; + + /* Mix in the block number to detect transposed pages */ + checksum ^= blkno; + + /* ensure we have non-zero return value here; this does double-up on our + * coset for group 1 here, but it's a nice property to preserve */ + return (checksum == 0 ? 1 : checksum); +} -- 2.37.0 (Apple Git-136)