Re: pg_verify_checksums and -fno-strict-aliasing - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Re: pg_verify_checksums and -fno-strict-aliasing |
Date | |
Msg-id | 21037.1535745351@sss.pgh.pa.us Whole thread Raw |
In response to | Re: pg_verify_checksums and -fno-strict-aliasing (Tom Lane <tgl@sss.pgh.pa.us>) |
Responses |
Re: pg_verify_checksums and -fno-strict-aliasing
|
List | pgsql-hackers |
I wrote: > Some of these places might be performance-critical enough that adding > a palloc/pfree cycle would not be nice. What I was considering doing > was inventing something like > > typedef union PGAlignedBuffer > { > char data[BLCKSZ]; > double force_align; > } PGAlignedBuffer; Here's a proposed patch using that approach. Although some of the places that were using "char buf[BLCKSZ]" variables weren't doing anything that really requires better alignment, I made almost all of them use PGAlignedBuffer variables anyway, on the grounds that you typically get better memcpy speed for aligned than unaligned transfers. I also fixed a few places that were using the palloc solution, and one that was actually doing hand-alignment of the pointer (ugh). I didn't try to be thorough about getting rid of such pallocs, just fix places that seemed likely to be performance-critical. This needs to be back-patched as relevant, of course. regards, tom lane diff --git a/contrib/bloom/blinsert.c b/contrib/bloom/blinsert.c index 4afdea7..5ae1dda 100644 --- a/contrib/bloom/blinsert.c +++ b/contrib/bloom/blinsert.c @@ -36,7 +36,7 @@ typedef struct int64 indtuples; /* total number of tuples indexed */ MemoryContext tmpCtx; /* temporary memory context reset after each * tuple */ - char data[BLCKSZ]; /* cached page */ + PGAlignedBuffer data; /* cached page */ int count; /* number of tuples in cached page */ } BloomBuildState; @@ -52,7 +52,7 @@ flushCachedPage(Relation index, BloomBuildState *buildstate) state = GenericXLogStart(index); page = GenericXLogRegisterBuffer(state, buffer, GENERIC_XLOG_FULL_IMAGE); - memcpy(page, buildstate->data, BLCKSZ); + memcpy(page, buildstate->data.data, BLCKSZ); GenericXLogFinish(state); UnlockReleaseBuffer(buffer); } @@ -63,8 +63,8 @@ flushCachedPage(Relation index, BloomBuildState *buildstate) static void initCachedPage(BloomBuildState *buildstate) { - memset(buildstate->data, 0, BLCKSZ); - BloomInitPage(buildstate->data, 0); + memset(buildstate->data.data, 0, BLCKSZ); + BloomInitPage(buildstate->data.data, 0); buildstate->count = 0; } @@ -84,7 +84,7 @@ bloomBuildCallback(Relation index, HeapTuple htup, Datum *values, itup = BloomFormTuple(&buildstate->blstate, &htup->t_self, values, isnull); /* Try to add next item to cached page */ - if (BloomPageAddItem(&buildstate->blstate, buildstate->data, itup)) + if (BloomPageAddItem(&buildstate->blstate, buildstate->data.data, itup)) { /* Next item was added successfully */ buildstate->count++; @@ -98,7 +98,7 @@ bloomBuildCallback(Relation index, HeapTuple htup, Datum *values, initCachedPage(buildstate); - if (!BloomPageAddItem(&buildstate->blstate, buildstate->data, itup)) + if (!BloomPageAddItem(&buildstate->blstate, buildstate->data.data, itup)) { /* We shouldn't be here since we're inserting to the empty page */ elog(ERROR, "could not add new bloom tuple to empty page"); diff --git a/contrib/pg_prewarm/pg_prewarm.c b/contrib/pg_prewarm/pg_prewarm.c index 3cbb7c2..1a00c59 100644 --- a/contrib/pg_prewarm/pg_prewarm.c +++ b/contrib/pg_prewarm/pg_prewarm.c @@ -36,7 +36,7 @@ typedef enum PREWARM_BUFFER } PrewarmType; -static char blockbuffer[BLCKSZ]; +static PGAlignedBuffer blockbuffer; /* * pg_prewarm(regclass, mode text, fork text, @@ -178,7 +178,7 @@ pg_prewarm(PG_FUNCTION_ARGS) for (block = first_block; block <= last_block; ++block) { CHECK_FOR_INTERRUPTS(); - smgrread(rel->rd_smgr, forkNumber, block, blockbuffer); + smgrread(rel->rd_smgr, forkNumber, block, blockbuffer.data); ++blocks_done; } } diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c index 8107697..ef5c367 100644 --- a/src/backend/access/gin/ginentrypage.c +++ b/src/backend/access/gin/ginentrypage.c @@ -616,7 +616,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf, Page lpage = PageGetTempPageCopy(BufferGetPage(origbuf)); Page rpage = PageGetTempPageCopy(BufferGetPage(origbuf)); Size pageSize = PageGetPageSize(lpage); - char tupstore[2 * BLCKSZ]; + PGAlignedBuffer tupstore[2]; /* could need 2 pages' worth of tuples */ entryPreparePage(btree, lpage, off, insertData, updateblkno); @@ -625,7 +625,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf, * one after another in a temporary workspace. */ maxoff = PageGetMaxOffsetNumber(lpage); - ptr = tupstore; + ptr = tupstore[0].data; for (i = FirstOffsetNumber; i <= maxoff; i++) { if (i == off) @@ -658,7 +658,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf, GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize); - ptr = tupstore; + ptr = tupstore[0].data; maxoff++; lsize = 0; diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c index e32807e..915b9ca 100644 --- a/src/backend/access/gin/ginfast.c +++ b/src/backend/access/gin/ginfast.c @@ -64,18 +64,15 @@ writeListPage(Relation index, Buffer buffer, size = 0; OffsetNumber l, off; - char *workspace; + PGAlignedBuffer workspace; char *ptr; - /* workspace could be a local array; we use palloc for alignment */ - workspace = palloc(BLCKSZ); - START_CRIT_SECTION(); GinInitBuffer(buffer, GIN_LIST); off = FirstOffsetNumber; - ptr = workspace; + ptr = workspace.data; for (i = 0; i < ntuples; i++) { @@ -127,7 +124,7 @@ writeListPage(Relation index, Buffer buffer, XLogRegisterData((char *) &data, sizeof(ginxlogInsertListPage)); XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT); - XLogRegisterBufData(0, workspace, size); + XLogRegisterBufData(0, workspace.data, size); recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE); PageSetLSN(page, recptr); @@ -140,8 +137,6 @@ writeListPage(Relation index, Buffer buffer, END_CRIT_SECTION(); - pfree(workspace); - return freesize; } diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index 3ec29a5..27d86b9 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -1000,7 +1000,7 @@ static bool _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks) { BlockNumber lastblock; - char zerobuf[BLCKSZ]; + PGAlignedBuffer zerobuf; Page page; HashPageOpaque ovflopaque; @@ -1013,7 +1013,7 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks) if (lastblock < firstblock || lastblock == InvalidBlockNumber) return false; - page = (Page) zerobuf; + page = (Page) zerobuf.data; /* * Initialize the page. Just zeroing the page won't work; see @@ -1034,11 +1034,11 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks) log_newpage(&rel->rd_node, MAIN_FORKNUM, lastblock, - zerobuf, + zerobuf.data, true); RelationOpenSmgr(rel); - smgrextend(rel->rd_smgr, MAIN_FORKNUM, lastblock, zerobuf, false); + smgrextend(rel->rd_smgr, MAIN_FORKNUM, lastblock, zerobuf.data, false); return true; } diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index b8bfe23..27e8b01 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2709,7 +2709,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, HeapTuple *heaptuples; int i; int ndone; - char *scratch = NULL; + PGAlignedBuffer scratch; Page page; bool needwal; Size saveFreeSpace; @@ -2727,14 +2727,6 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, xid, cid, options); /* - * Allocate some memory to use for constructing the WAL record. Using - * palloc() within a critical section is not safe, so we allocate this - * beforehand. - */ - if (needwal) - scratch = palloc(BLCKSZ); - - /* * We're about to do the actual inserts -- but check for conflict first, * to minimize the possibility of having to roll back work we've just * done. @@ -2826,7 +2818,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, uint8 info = XLOG_HEAP2_MULTI_INSERT; char *tupledata; int totaldatalen; - char *scratchptr = scratch; + char *scratchptr = scratch.data; bool init; int bufflags = 0; @@ -2885,7 +2877,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, scratchptr += datalen; } totaldatalen = scratchptr - tupledata; - Assert((scratchptr - scratch) < BLCKSZ); + Assert((scratchptr - scratch.data) < BLCKSZ); if (need_tuple_data) xlrec->flags |= XLH_INSERT_CONTAINS_NEW_TUPLE; @@ -2912,7 +2904,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, bufflags |= REGBUF_KEEP_DATA; XLogBeginInsert(); - XLogRegisterData((char *) xlrec, tupledata - scratch); + XLogRegisterData((char *) xlrec, tupledata - scratch.data); XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags); XLogRegisterBufData(0, tupledata, totaldatalen); diff --git a/src/backend/access/transam/generic_xlog.c b/src/backend/access/transam/generic_xlog.c index ce02354..e421f29 100644 --- a/src/backend/access/transam/generic_xlog.c +++ b/src/backend/access/transam/generic_xlog.c @@ -61,14 +61,11 @@ typedef struct /* State of generic xlog record construction */ struct GenericXLogState { - /* - * page's images. Should be first in this struct to have MAXALIGN'ed - * images addresses, because some code working with pages directly aligns - * addresses, not offsets from beginning of page - */ - char images[MAX_GENERIC_XLOG_PAGES * BLCKSZ]; + /* Info about each page, see above */ PageData pages[MAX_GENERIC_XLOG_PAGES]; bool isLogged; + /* Page images (properly aligned) */ + PGAlignedBuffer images[MAX_GENERIC_XLOG_PAGES]; }; static void writeFragment(PageData *pageData, OffsetNumber offset, @@ -251,12 +248,12 @@ computeDelta(PageData *pageData, Page curpage, Page targetpage) #ifdef WAL_DEBUG if (XLOG_DEBUG) { - char tmp[BLCKSZ]; + PGAlignedBuffer tmp; - memcpy(tmp, curpage, BLCKSZ); - applyPageRedo(tmp, pageData->delta, pageData->deltaLen); - if (memcmp(tmp, targetpage, targetLower) != 0 || - memcmp(tmp + targetUpper, targetpage + targetUpper, + memcpy(tmp.data, curpage, BLCKSZ); + applyPageRedo(tmp.data, pageData->delta, pageData->deltaLen); + if (memcmp(tmp.data, targetpage, targetLower) != 0 || + memcmp(tmp.data + targetUpper, targetpage + targetUpper, BLCKSZ - targetUpper) != 0) elog(ERROR, "result of generic xlog apply does not match"); } @@ -277,7 +274,7 @@ GenericXLogStart(Relation relation) for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++) { - state->pages[i].image = state->images + BLCKSZ * i; + state->pages[i].image = state->images[i].data; state->pages[i].buffer = InvalidBuffer; } diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 65db2e4..29d792f 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -3210,8 +3210,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) { char path[MAXPGPATH]; char tmppath[MAXPGPATH]; - char zbuffer_raw[XLOG_BLCKSZ + MAXIMUM_ALIGNOF]; - char *zbuffer; + PGAlignedXLogBuffer zbuffer; XLogSegNo installed_segno; XLogSegNo max_segno; int fd; @@ -3263,17 +3262,13 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) * fsync below) that all the indirect blocks are down on disk. Therefore, * fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the * log file. - * - * Note: ensure the buffer is reasonably well-aligned; this may save a few - * cycles transferring data to the kernel. */ - zbuffer = (char *) MAXALIGN(zbuffer_raw); - memset(zbuffer, 0, XLOG_BLCKSZ); + memset(zbuffer.data, 0, XLOG_BLCKSZ); for (nbytes = 0; nbytes < wal_segment_size; nbytes += XLOG_BLCKSZ) { errno = 0; pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE); - if ((int) write(fd, zbuffer, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ) + if ((int) write(fd, zbuffer.data, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ) { int save_errno = errno; @@ -3380,7 +3375,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, { char path[MAXPGPATH]; char tmppath[MAXPGPATH]; - char buffer[XLOG_BLCKSZ]; + PGAlignedXLogBuffer buffer; int srcfd; int fd; int nbytes; @@ -3423,7 +3418,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, * zeros. */ if (nread < sizeof(buffer)) - memset(buffer, 0, sizeof(buffer)); + memset(buffer.data, 0, sizeof(buffer)); if (nread > 0) { @@ -3432,7 +3427,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, if (nread > sizeof(buffer)) nread = sizeof(buffer); pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_READ); - r = read(srcfd, buffer, nread); + r = read(srcfd, buffer.data, nread); if (r != nread) { if (r < 0) @@ -3450,7 +3445,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, } errno = 0; pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_WRITE); - if ((int) write(fd, buffer, sizeof(buffer)) != (int) sizeof(buffer)) + if ((int) write(fd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer)) { int save_errno = errno; diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c index 5bea073..c819204 100644 --- a/src/backend/access/transam/xloginsert.c +++ b/src/backend/access/transam/xloginsert.c @@ -809,12 +809,12 @@ XLogCompressBackupBlock(char *page, uint16 hole_offset, uint16 hole_length, int32 len; int32 extra_bytes = 0; char *source; - char tmp[BLCKSZ]; + PGAlignedBuffer tmp; if (hole_length != 0) { /* must skip the hole */ - source = tmp; + source = tmp.data; memcpy(source, page, hole_offset); memcpy(source + hole_offset, page + (hole_offset + hole_length), @@ -917,7 +917,7 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std) if (lsn <= RedoRecPtr) { int flags; - char copied_buffer[BLCKSZ]; + PGAlignedBuffer copied_buffer; char *origdata = (char *) BufferGetBlock(buffer); RelFileNode rnode; ForkNumber forkno; @@ -935,11 +935,11 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std) uint16 lower = ((PageHeader) page)->pd_lower; uint16 upper = ((PageHeader) page)->pd_upper; - memcpy(copied_buffer, origdata, lower); - memcpy(copied_buffer + upper, origdata + upper, BLCKSZ - upper); + memcpy(copied_buffer.data, origdata, lower); + memcpy(copied_buffer.data + upper, origdata + upper, BLCKSZ - upper); } else - memcpy(copied_buffer, origdata, BLCKSZ); + memcpy(copied_buffer.data, origdata, BLCKSZ); XLogBeginInsert(); @@ -948,7 +948,7 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std) flags |= REGBUF_STANDARD; BufferGetTag(buffer, &rnode, &forkno, &blkno); - XLogRegisterBlock(0, &rnode, forkno, blkno, copied_buffer, flags); + XLogRegisterBlock(0, &rnode, forkno, blkno, copied_buffer.data, flags); recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI_FOR_HINT); } diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index 4c633c6..d5daafd 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -1412,7 +1412,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page) { DecodedBkpBlock *bkpb; char *ptr; - char tmp[BLCKSZ]; + PGAlignedBuffer tmp; if (!record->blocks[block_id].in_use) return false; @@ -1425,7 +1425,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page) if (bkpb->bimg_info & BKPIMAGE_IS_COMPRESSED) { /* If a backup block image is compressed, decompress it */ - if (pglz_decompress(ptr, bkpb->bimg_len, tmp, + if (pglz_decompress(ptr, bkpb->bimg_len, tmp.data, BLCKSZ - bkpb->hole_length) < 0) { report_invalid_record(record, "invalid compressed image at %X/%X, block %d", @@ -1434,7 +1434,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page) block_id); return false; } - ptr = tmp; + ptr = tmp.data; } /* generate page, taking into account hole if necessary */ diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 48743db..4d60787 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -11276,21 +11276,14 @@ static void copy_relation_data(SMgrRelation src, SMgrRelation dst, ForkNumber forkNum, char relpersistence) { - char *buf; + PGAlignedBuffer buf; Page page; bool use_wal; bool copying_initfork; BlockNumber nblocks; BlockNumber blkno; - /* - * palloc the buffer so that it's MAXALIGN'd. If it were just a local - * char[] array, the compiler might align it on any byte boundary, which - * can seriously hurt transfer speed to and from the kernel; not to - * mention possibly making log_newpage's accesses to the page header fail. - */ - buf = (char *) palloc(BLCKSZ); - page = (Page) buf; + page = (Page) buf.data; /* * The init fork for an unlogged relation in many respects has to be @@ -11314,7 +11307,7 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst, /* If we got a cancel signal during the copy of the data, quit */ CHECK_FOR_INTERRUPTS(); - smgrread(src, forkNum, blkno, buf); + smgrread(src, forkNum, blkno, buf.data); if (!PageIsVerified(page, blkno)) ereport(ERROR, @@ -11340,11 +11333,9 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst, * rel, because there's no need for smgr to schedule an fsync for this * write; we'll do it ourselves below. */ - smgrextend(dst, forkNum, blkno, buf, true); + smgrextend(dst, forkNum, blkno, buf.data, true); } - pfree(buf); - /* * If the rel is WAL-logged, must fsync before commit. We use heap_sync * to ensure that the toast table gets fsync'd too. (For a temp or diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index c83ff3b..8005f11 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -496,11 +496,11 @@ SendTimeLineHistory(TimeLineHistoryCmd *cmd) bytesleft = histfilelen; while (bytesleft > 0) { - char rbuf[BLCKSZ]; + PGAlignedBuffer rbuf; int nread; pgstat_report_wait_start(WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ); - nread = read(fd, rbuf, sizeof(rbuf)); + nread = read(fd, rbuf.data, sizeof(rbuf)); pgstat_report_wait_end(); if (nread < 0) ereport(ERROR, @@ -513,7 +513,7 @@ SendTimeLineHistory(TimeLineHistoryCmd *cmd) errmsg("could not read file \"%s\": read %d of %zu", path, nread, (Size) bytesleft))); - pq_sendbytes(&buf, rbuf, nread); + pq_sendbytes(&buf, rbuf.data, nread); bytesleft -= nread; } CloseTransientFile(fd); diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c index efbede7..4a52284 100644 --- a/src/backend/storage/file/buffile.c +++ b/src/backend/storage/file/buffile.c @@ -96,7 +96,7 @@ struct BufFile off_t curOffset; /* offset part of current pos */ int pos; /* next read/write position in buffer */ int nbytes; /* total # of valid bytes in buffer */ - char buffer[BLCKSZ]; + PGAlignedBuffer buffer; }; static BufFile *makeBufFileCommon(int nfiles); @@ -437,7 +437,7 @@ BufFileLoadBuffer(BufFile *file) * Read whatever we can get, up to a full bufferload. */ file->nbytes = FileRead(thisfile, - file->buffer, + file->buffer.data, sizeof(file->buffer), WAIT_EVENT_BUFFILE_READ); if (file->nbytes < 0) @@ -502,7 +502,7 @@ BufFileDumpBuffer(BufFile *file) file->offsets[file->curFile] = file->curOffset; } bytestowrite = FileWrite(thisfile, - file->buffer + wpos, + file->buffer.data + wpos, bytestowrite, WAIT_EVENT_BUFFILE_WRITE); if (bytestowrite <= 0) @@ -572,7 +572,7 @@ BufFileRead(BufFile *file, void *ptr, size_t size) nthistime = size; Assert(nthistime > 0); - memcpy(ptr, file->buffer + file->pos, nthistime); + memcpy(ptr, file->buffer.data + file->pos, nthistime); file->pos += nthistime; ptr = (void *) ((char *) ptr + nthistime); @@ -621,7 +621,7 @@ BufFileWrite(BufFile *file, void *ptr, size_t size) nthistime = size; Assert(nthistime > 0); - memcpy(file->buffer + file->pos, ptr, nthistime); + memcpy(file->buffer.data + file->pos, ptr, nthistime); file->dirty = true; file->pos += nthistime; diff --git a/src/backend/utils/sort/logtape.c b/src/backend/utils/sort/logtape.c index 62d7797..756c1ff 100644 --- a/src/backend/utils/sort/logtape.c +++ b/src/backend/utils/sort/logtape.c @@ -240,11 +240,11 @@ ltsWriteBlock(LogicalTapeSet *lts, long blocknum, void *buffer) */ while (blocknum > lts->nBlocksWritten) { - char zerobuf[BLCKSZ]; + PGAlignedBuffer zerobuf; - MemSet(zerobuf, 0, sizeof(zerobuf)); + MemSet(zerobuf.data, 0, sizeof(zerobuf)); - ltsWriteBlock(lts, lts->nBlocksWritten, zerobuf); + ltsWriteBlock(lts, lts->nBlocksWritten, zerobuf.data); } /* Write the requested block */ diff --git a/src/bin/pg_rewind/copy_fetch.c b/src/bin/pg_rewind/copy_fetch.c index 160a912..6a3c03a 100644 --- a/src/bin/pg_rewind/copy_fetch.c +++ b/src/bin/pg_rewind/copy_fetch.c @@ -156,7 +156,7 @@ recurse_dir(const char *datadir, const char *parentpath, static void rewind_copy_file_range(const char *path, off_t begin, off_t end, bool trunc) { - char buf[BLCKSZ]; + PGAlignedBuffer buf; char srcpath[MAXPGPATH]; int srcfd; @@ -182,7 +182,7 @@ rewind_copy_file_range(const char *path, off_t begin, off_t end, bool trunc) else len = end - begin; - readlen = read(srcfd, buf, len); + readlen = read(srcfd, buf.data, len); if (readlen < 0) pg_fatal("could not read file \"%s\": %s\n", @@ -190,7 +190,7 @@ rewind_copy_file_range(const char *path, off_t begin, off_t end, bool trunc) else if (readlen == 0) pg_fatal("unexpected EOF while reading file \"%s\"\n", srcpath); - write_target_range(buf, begin, readlen); + write_target_range(buf.data, begin, readlen); begin += readlen; } diff --git a/src/bin/pg_verify_checksums/pg_verify_checksums.c b/src/bin/pg_verify_checksums/pg_verify_checksums.c index bf7feed..922d4dd 100644 --- a/src/bin/pg_verify_checksums/pg_verify_checksums.c +++ b/src/bin/pg_verify_checksums/pg_verify_checksums.c @@ -75,8 +75,8 @@ skipfile(const char *fn) static void scan_file(const char *fn, BlockNumber segmentno) { - char buf[BLCKSZ]; - PageHeader header = (PageHeader) buf; + PGAlignedBuffer buf; + PageHeader header = (PageHeader) buf.data; int f; BlockNumber blockno; @@ -93,7 +93,7 @@ scan_file(const char *fn, BlockNumber segmentno) for (blockno = 0;; blockno++) { uint16 csum; - int r = read(f, buf, BLCKSZ); + int r = read(f, buf.data, BLCKSZ); if (r == 0) break; @@ -109,7 +109,7 @@ scan_file(const char *fn, BlockNumber segmentno) if (PageIsNew(header)) continue; - csum = pg_checksum_page(buf, blockno + segmentno * RELSEG_SIZE); + csum = pg_checksum_page(buf.data, blockno + segmentno * RELSEG_SIZE); if (csum != header->pd_checksum) { if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_VERSION) diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c index 2a557b3..4affbb9 100644 --- a/src/bin/pg_waldump/pg_waldump.c +++ b/src/bin/pg_waldump/pg_waldump.c @@ -209,13 +209,13 @@ search_directory(const char *directory, const char *fname) /* set WalSegSz if file is successfully opened */ if (fd >= 0) { - char buf[XLOG_BLCKSZ]; + PGAlignedXLogBuffer buf; int r; - r = read(fd, buf, XLOG_BLCKSZ); + r = read(fd, buf.data, XLOG_BLCKSZ); if (r == XLOG_BLCKSZ) { - XLogLongPageHeader longhdr = (XLogLongPageHeader) buf; + XLogLongPageHeader longhdr = (XLogLongPageHeader) buf.data; WalSegSz = longhdr->xlp_seg_size; diff --git a/src/include/c.h b/src/include/c.h index 1e50103..193e9ac 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -989,6 +989,32 @@ extern void ExceptionalCondition(const char *conditionName, * ---------------------------------------------------------------- */ +/* + * Use this, not "char buf[BLCKSZ]", to declare a field or local variable + * holding a page buffer, if that page might be accessed as a page and not + * just a string of bytes. Otherwise the variable might be under-aligned, + * causing problems on alignment-picky hardware. (In some places, we use + * this to declare buffers even though we only pass them to read() and + * write(), because copying to/from aligned buffers is usually faster than + * using unaligned buffers.) We include both "double" and "int64" in the + * union to ensure that the compiler knows the value must be MAXALIGN'ed + * (cf. configure's computation of MAXIMUM_ALIGNOF). + */ +typedef union PGAlignedBuffer +{ + char data[BLCKSZ]; + double force_align_d; + int64 force_align_i64; +} PGAlignedBuffer; + +/* Same, but for an XLOG_BLCKSZ-sized buffer */ +typedef union PGAlignedXLogBuffer +{ + char data[XLOG_BLCKSZ]; + double force_align_d; + int64 force_align_i64; +} PGAlignedXLogBuffer; + /* msb for char */ #define HIGHBIT (0x80) #define IS_HIGHBIT_SET(ch) ((unsigned char)(ch) & HIGHBIT)
pgsql-hackers by date: