From fa9a18dfa478f88b66e3f831057ecd9e986147b3 Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Mon, 20 Jun 2022 10:21:22 +0200 Subject: [PATCH v6] Add protections in xlog record APIs against large numbers and overflows. Before this; it was possible for an extension to create malicious WAL records that were too large to replay; or that would overflow the xl_tot_len field, causing potential corruption in WAL record IO ops. Emitting invalid records was also possible through pg_logical_emit_message(), which allowed you to emit arbitrary amounts of data up to 2GB, much higher than the replay limit of 1GB. --- src/backend/access/transam/xloginsert.c | 59 +++++++++++++++++++++---- src/backend/access/transam/xlogreader.c | 12 ++++- src/include/access/xlogreader.h | 36 ++++++++++++++- src/include/access/xlogrecord.h | 4 ++ 4 files changed, 101 insertions(+), 10 deletions(-) diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c index 2ce9be2cc7..a8ac036cbd 100644 --- a/src/backend/access/transam/xloginsert.c +++ b/src/backend/access/transam/xloginsert.c @@ -141,7 +141,18 @@ static XLogRecData *XLogRecordAssemble(RmgrId rmid, uint8 info, bool *topxid_included); static bool XLogCompressBackupBlock(char *page, uint16 hole_offset, uint16 hole_length, char *dest, uint16 *dlen); +static inline void XLogErrorDataLimitExceeded(); +/* + * Error due to exceeding the maximum size of a WAL record, or registering + * more datas than are being accounted for by the XLog infrastructure. + */ +static inline void +XLogErrorDataLimitExceeded() +{ + elog(ERROR, "too much WAL data"); +} + /* * Begin constructing a WAL record. This must be called before the * XLogRegister* functions and XLogInsert(). @@ -352,10 +363,21 @@ XLogRegisterData(char *data, int len) { XLogRecData *rdata; - Assert(begininsert_called); + Assert(begininsert_called && len >= 0 && XLogRecLengthIsValid(len)); + + /* + * Check against max_rdatas; and ensure we don't fill a record with + * more data than can be replayed. Records are allocated in one chunk + * with some overhead, so ensure XLogRecLengthIsValid() for that size + * of record. + * + * We also protect against overflows by first casting to uint64 - with only + * uint32-operations this could still be an issue. + */ + if (unlikely(num_rdatas >= max_rdatas) || + unlikely(!XLogRecLengthIsValid((uint64) mainrdata_len + (uint64) len))) + XLogErrorDataLimitExceeded(); - if (num_rdatas >= max_rdatas) - elog(ERROR, "too much WAL data"); rdata = &rdatas[num_rdatas++]; rdata->data = data; @@ -391,7 +413,7 @@ XLogRegisterBufData(uint8 block_id, char *data, int len) registered_buffer *regbuf; XLogRecData *rdata; - Assert(begininsert_called); + Assert(begininsert_called && len >= 0 && len <= UINT16_MAX); /* find the registered buffer struct */ regbuf = ®istered_buffers[block_id]; @@ -399,8 +421,16 @@ XLogRegisterBufData(uint8 block_id, char *data, int len) elog(ERROR, "no block with id %d registered with WAL insertion", block_id); - if (num_rdatas >= max_rdatas) - elog(ERROR, "too much WAL data"); + /* + * Check against max_rdatas; and ensure we don't register more data per + * buffer than can be handled by the physical data format; + * i.e. that regbuf->rdata_len does not grow beyond what + * XLogRecordBlockHeader->data_length can hold. + */ + if (unlikely(num_rdatas >= max_rdatas) || + unlikely(regbuf->rdata_len + len > UINT16_MAX)) + XLogErrorDataLimitExceeded(); + rdata = &rdatas[num_rdatas++]; rdata->data = data; @@ -519,7 +549,7 @@ XLogRecordAssemble(RmgrId rmid, uint8 info, XLogRecPtr *fpw_lsn, int *num_fpi, bool *topxid_included) { XLogRecData *rdt; - uint32 total_len = 0; + uint64 total_len = 0; int block_id; pg_crc32c rdata_crc; registered_buffer *prev_regbuf = NULL; @@ -756,12 +786,18 @@ XLogRecordAssemble(RmgrId rmid, uint8 info, if (needs_data) { + /* + * When copying to XLogRecordBlockHeader, the length is narrowed + * to an uint16. We double-check that that is still correct. + */ + Assert(regbuf->rdata_len <= UINT16_MAX); + /* * Link the caller-supplied rdata chain for this buffer to the * overall list. */ bkpb.fork_flags |= BKPBLOCK_HAS_DATA; - bkpb.data_length = regbuf->rdata_len; + bkpb.data_length = (uint16) regbuf->rdata_len; total_len += regbuf->rdata_len; rdt_datas_last->next = regbuf->rdata_head; @@ -858,6 +894,13 @@ XLogRecordAssemble(RmgrId rmid, uint8 info, for (rdt = hdr_rdt.next; rdt != NULL; rdt = rdt->next) COMP_CRC32C(rdata_crc, rdt->data, rdt->len); + /* + * Ensure that xlogreader.c can read the record by ensuring that the + * data section of the WAL record (plus infra overhead) can be allocated. + */ + if (unlikely(!XLogRecLengthIsValid(total_len))) + XLogErrorDataLimitExceeded(); + /* * Fill in the fields in the record header. Prev-link is filled in later, * once we know where in the WAL the record will be inserted. The CRC does diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index cf5db23cb8..63a4ff0a0d 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -669,7 +669,17 @@ restart: report_invalid_record(state, "invalid record length at %X/%X: wanted %u, got %u", LSN_FORMAT_ARGS(RecPtr), - (uint32) SizeOfXLogRecord, total_len); + (uint32) SizeOfXLogRecord, + total_len); + goto err; + } + if (!XLogRecLengthIsValid(total_len)) + { + report_invalid_record(state, + "invalid record length at %X/%X: wanted %u, got %u", + LSN_FORMAT_ARGS(RecPtr), + (uint32) XLogRecMaxLength, + total_len); goto err; } gotheader = false; diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h index e73ea4a840..b2939b13c5 100644 --- a/src/include/access/xlogreader.h +++ b/src/include/access/xlogreader.h @@ -36,6 +36,7 @@ #ifndef FRONTEND #include "access/transam.h" +#include "utils/memutils.h" #endif #include "access/xlogrecord.h" @@ -425,7 +426,40 @@ extern bool DecodeXLogRecord(XLogReaderState *state, #ifndef FRONTEND extern FullTransactionId XLogRecGetFullXid(XLogReaderState *record); -#endif +#else +#define MaxAllocSize (Size) 0x3FFFFFFF +#endif /* ifndef FRONTEND */ + +/* + * XLogRecMaxSize - the maximum WAL record size that the xlogreader + * infrastructure can handle. + * + * Defined by MaxAllocSize and the various overheads used in the + * DecodedXLogRecord infrastructure. + * + * see DecodeXLogRecordRequiredSpace and allocate_recordbuf for places where + * allocations of full records + overhead are done. + */ +#define XLogRecMaxLength \ +( \ + Min( \ + (MaxAllocSize - ( \ + offsetof(DecodedXLogRecord, blocks[0]) + \ + (sizeof(DecodedBkpBlock) * (XLR_MAX_BLOCK_ID + 1)) + \ + (MAXIMUM_ALIGNOF - 1) + \ + (MAXIMUM_ALIGNOF - 1) + \ + (MAXIMUM_ALIGNOF - 1) \ + )), \ + MaxAllocSize - (MaxAllocSize % XLOG_BLCKSZ) \ + ) \ +) + +/* + * XLogRecLengthIsValid + * + * Check that this length does not exceed the maximum xlog record length + */ +#define XLogRecLengthIsValid(xlr_size) ((xlr_size) <= XLogRecMaxLength) extern bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page); extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len); diff --git a/src/include/access/xlogrecord.h b/src/include/access/xlogrecord.h index 052ac6817a..3575363e84 100644 --- a/src/include/access/xlogrecord.h +++ b/src/include/access/xlogrecord.h @@ -37,6 +37,10 @@ * The XLogRecordBlockHeader, XLogRecordDataHeaderShort and * XLogRecordDataHeaderLong structs all begin with a single 'id' byte. It's * used to distinguish between block references, and the main data structs. + * + * Note that xlogreader.c limits the total size of the xl_tot_len to + * MaxAllocSize (1GB - 1). This means that this is also the maximum size + * of a single WAL record - we would be unable to replay any larger record. */ typedef struct XLogRecord { -- 2.30.2