From ef08eb591f0d539ca4603686302a2f454c960662 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Wed, 22 Jan 2025 16:09:51 -0500 Subject: [PATCH v2.6 28/34] bufmgr: Implement AIO write support As of this commit there are no users of these AIO facilities, that'll come in later commits. Author: Reviewed-By: Discussion: https://postgr.es/m/ Backpatch: --- src/include/storage/aio.h | 2 + src/include/storage/bufmgr.h | 2 + src/backend/storage/aio/aio_callback.c | 2 + src/backend/storage/buffer/bufmgr.c | 88 ++++++++++++++++++++++++++ 4 files changed, 94 insertions(+) diff --git a/src/include/storage/aio.h b/src/include/storage/aio.h index cdf54b90b15..b53aa9748c3 100644 --- a/src/include/storage/aio.h +++ b/src/include/storage/aio.h @@ -182,8 +182,10 @@ typedef enum PgAioHandleCallbackID PGAIO_HCB_MD_WRITEV, PGAIO_HCB_SHARED_BUFFER_READV, + PGAIO_HCB_SHARED_BUFFER_WRITEV, PGAIO_HCB_LOCAL_BUFFER_READV, + PGAIO_HCB_LOCAL_BUFFER_WRITEV, } PgAioHandleCallbackID; diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index db9a4673097..a2bff99fb55 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -176,7 +176,9 @@ extern PGDLLIMPORT int backend_flush_after; extern PGDLLIMPORT int bgwriter_flush_after; extern const PgAioHandleCallbacks aio_shared_buffer_readv_cb; +extern const PgAioHandleCallbacks aio_shared_buffer_writev_cb; extern const PgAioHandleCallbacks aio_local_buffer_readv_cb; +extern const PgAioHandleCallbacks aio_local_buffer_writev_cb; /* in buf_init.c */ extern PGDLLIMPORT char *BufferBlocks; diff --git a/src/backend/storage/aio/aio_callback.c b/src/backend/storage/aio/aio_callback.c index fb6ac058a09..7162f722e3c 100644 --- a/src/backend/storage/aio/aio_callback.c +++ b/src/backend/storage/aio/aio_callback.c @@ -44,8 +44,10 @@ static const PgAioHandleCallbacksEntry aio_handle_cbs[] = { CALLBACK_ENTRY(PGAIO_HCB_MD_WRITEV, aio_md_writev_cb), CALLBACK_ENTRY(PGAIO_HCB_SHARED_BUFFER_READV, aio_shared_buffer_readv_cb), + CALLBACK_ENTRY(PGAIO_HCB_SHARED_BUFFER_WRITEV, aio_shared_buffer_writev_cb), CALLBACK_ENTRY(PGAIO_HCB_LOCAL_BUFFER_READV, aio_local_buffer_readv_cb), + CALLBACK_ENTRY(PGAIO_HCB_LOCAL_BUFFER_WRITEV, aio_local_buffer_writev_cb), #undef CALLBACK_ENTRY }; diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 20544b39ef9..1f47edaa7b9 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -6467,6 +6467,42 @@ SharedBufferCompleteRead(int buf_off, Buffer buffer, uint8 flags, bool failed) return result; } +static uint64 +BufferCompleteWriteShared(Buffer buffer, bool release_lock, bool failed) +{ + BufferDesc *bufHdr; + bool result = false; + + Assert(BufferIsValid(buffer)); + + bufHdr = GetBufferDescriptor(buffer - 1); + +#ifdef USE_ASSERT_CHECKING + { + uint32 buf_state = pg_atomic_read_u32(&bufHdr->state); + + Assert(buf_state & BM_VALID); + Assert(buf_state & BM_TAG_VALID); + Assert(buf_state & BM_IO_IN_PROGRESS); + Assert(buf_state & BM_DIRTY); + } +#endif + + TerminateBufferIO(bufHdr, /* clear_dirty = */ true, + failed ? BM_IO_ERROR : 0, + /* forget_owner = */ false, + /* syncio = */ false); + + /* + * The initiator of IO is not managing the lock (i.e. called + * LWLockDisown()), we are. + */ + if (release_lock) + LWLockReleaseDisowned(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); + + return result; +} + /* * Helper to prepare IO on shared buffers for execution, shared between reads * and writes. @@ -6555,6 +6591,12 @@ shared_buffer_readv_stage(PgAioHandle *ioh, uint8 cb_data) * - result.error_data is the offset of the first page that failed * verification in a larger IO */ +static void +shared_buffer_writev_stage(PgAioHandle *ioh, uint8 cb_data) +{ + shared_buffer_stage_common(ioh, true); +} + static void buffer_readv_report(PgAioResult result, const PgAioTargetData *target_data, int elevel) { @@ -6641,6 +6683,33 @@ shared_buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 c return buffer_readv_complete_common(ioh, prior_result, false, cb_data); } +static PgAioResult +shared_buffer_writev_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data) +{ + PgAioResult result = prior_result; + uint64 *io_data; + uint8 handle_data_len; + + ereport(DEBUG5, + errmsg("%s: %d %d", __func__, prior_result.status, prior_result.result), + errhidestmt(true), errhidecontext(true)); + + io_data = pgaio_io_get_handle_data(ioh, &handle_data_len); + + /* FIXME: handle outright errors */ + + for (int io_data_off = 0; io_data_off < handle_data_len; io_data_off++) + { + Buffer buf = io_data[io_data_off]; + + /* FIXME: handle short writes / failures */ + /* FIXME: ioh->target_data.shared_buffer.release_lock */ + BufferCompleteWriteShared(buf, true, false); + } + + return result; +} + /* * Helper to stage IO on local buffers for execution, shared between reads * and writes. @@ -6685,6 +6754,16 @@ local_buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb return buffer_readv_complete_common(ioh, prior_result, true, cb_data); } +static void +local_buffer_writev_stage(PgAioHandle *ioh, uint8 cb_data) +{ + /* + * Currently this is unreachable as the only write support is for + * checkpointer / bgwriter, which don't deal with local buffers. + */ + elog(ERROR, "not yet"); +} + /* readv callback is is passed READ_BUFFERS_* flags as callback data */ const PgAioHandleCallbacks aio_shared_buffer_readv_cb = { @@ -6693,6 +6772,11 @@ const PgAioHandleCallbacks aio_shared_buffer_readv_cb = { .report = buffer_readv_report, }; +const PgAioHandleCallbacks aio_shared_buffer_writev_cb = { + .stage = shared_buffer_writev_stage, + .complete_shared = shared_buffer_writev_complete, +}; + /* readv callback is is passed READ_BUFFERS_* flags as callback data */ const PgAioHandleCallbacks aio_local_buffer_readv_cb = { .stage = local_buffer_readv_stage, @@ -6706,3 +6790,7 @@ const PgAioHandleCallbacks aio_local_buffer_readv_cb = { .complete_local = local_buffer_readv_complete, .report = buffer_readv_report, }; + +const PgAioHandleCallbacks aio_local_buffer_writev_cb = { + .stage = local_buffer_writev_stage, +}; -- 2.48.1.76.g4e746b1a31.dirty