From cc21c2b363d6bfc1826b764ae94b33dcf1e60aaa Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Fri, 5 Apr 2024 19:57:43 +0200 Subject: [PATCH v20240405 2/3] Try copying larger chunks of data from the same file When reconstructing a file from incremental backups, try copying data in larger chunks, not individual blocks, to reduce overhead. This applies to all copy methods, including copy_file_range() - or rather especially that, as the overhead may be particularly significant. This is implemented by looking for runs of up to 128 blocks (1MB) to be copied from the same source file, and processing them at once. This commit only applies this to copy_file_range, the read/write copy and checksum calculation is still done block-by-block. Try copying larger chunks of data from the dame file (cleanup) This is primarily a cleanup/simplification of the previous commit, to make the code cleaned and easier to understand. It also extends the batching to the regular read/write calls, and checksum calculation (it only worked for copy_file_range before). --- src/bin/pg_combinebackup/reconstruct.c | 91 ++++++++++++++++---------- 1 file changed, 57 insertions(+), 34 deletions(-) diff --git a/src/bin/pg_combinebackup/reconstruct.c b/src/bin/pg_combinebackup/reconstruct.c index ea740fd6232..dd7a8841a55 100644 --- a/src/bin/pg_combinebackup/reconstruct.c +++ b/src/bin/pg_combinebackup/reconstruct.c @@ -63,10 +63,10 @@ static void write_reconstructed_file(char *input_filename, bool debug, bool dry_run); static void read_bytes(rfile *rf, void *buffer, unsigned length); -static void write_block(int wfd, char *output_filename, - uint8 *buffer, - pg_checksum_context *checksum_ctx); -static void read_block(rfile *s, off_t off, uint8 *buffer); +static void write_blocks(int wfd, char *output_filename, + uint8 *buffer, int nblocks, + pg_checksum_context *checksum_ctx); +static void read_blocks(rfile *s, off_t off, uint8 *buffer, int nblocks); /* * state of the asynchronous prefetcher @@ -570,7 +570,7 @@ write_reconstructed_file(char *input_filename, bool dry_run) { int wfd = -1; - unsigned i; + unsigned next_idx; unsigned zero_blocks = 0; prefetch_state prefetch; @@ -653,19 +653,42 @@ write_reconstructed_file(char *input_filename, pg_fatal("could not open file \"%s\": %m", output_filename); /* Read and write the blocks as required. */ - for (i = 0; i < block_length; ++i) + next_idx = 0; + while (next_idx < block_length) { - uint8 buffer[BLCKSZ]; - rfile *s = sourcemap[i]; +#define BLOCK_COUNT(first, last) ((last) - (first) + 1) +#define BATCH_SIZE 128 /* 1MB */ + uint8 buffer[BATCH_SIZE * BLCKSZ]; + int first_idx = next_idx; + int last_idx = next_idx; + rfile *s = sourcemap[first_idx]; + int nblocks; + + /* + * Determine the range of blocks coming from the same source file, + * but not more than BLOCK_COUNT (1MB) at a time. The range starts + * at first_idx, ends with last_idx (both are inclusive). + */ + while ((last_idx + 1 < block_length) && /* valid block */ + (sourcemap[last_idx+1] == s) && /* same file */ + (BLOCK_COUNT(first_idx, last_idx) < BATCH_SIZE)) /* 1MB */ + last_idx += 1; + + /* Calculate batch size, set start of the next loop. */ + nblocks = BLOCK_COUNT(first_idx, last_idx); + next_idx += nblocks; + + Assert(nblocks <= BATCH_SIZE); + Assert(next_idx == (last_idx + 1)); /* Update accounting information. */ if (s == NULL) - ++zero_blocks; + zero_blocks += nblocks; else { - s->num_blocks_read++; + s->num_blocks_read += nblocks; s->highest_offset_read = Max(s->highest_offset_read, - offsetmap[i] + BLCKSZ); + offsetmap[last_idx] + BLCKSZ); } /* Skip the rest of this in dry-run mode. */ @@ -673,7 +696,7 @@ write_reconstructed_file(char *input_filename, continue; /* do prefetching if enabled */ - prefetch_blocks(&prefetch, i, block_length, sourcemap, offsetmap); + prefetch_blocks(&prefetch, last_idx, block_length, sourcemap, offsetmap); /* Read or zero-fill the block as appropriate. */ if (s == NULL) @@ -684,28 +707,28 @@ write_reconstructed_file(char *input_filename, */ memset(buffer, 0, BLCKSZ); - /* Write out the block, update the checksum if needed. */ - write_block(wfd, output_filename, buffer, checksum_ctx); + /* Write out the block(s), update the checksum if needed. */ + write_blocks(wfd, output_filename, buffer, nblocks, checksum_ctx); /* Nothing else to do for zero-filled blocks. */ continue; } - /* Copy the block using the appropriate copy method. */ + /* Copy the block(s) using the appropriate copy method. */ if (copy_method != COPY_METHOD_COPY_FILE_RANGE) { /* - * Read the block from the correct source file, and then write it - * out, possibly with a checksum update. + * Read the batch of blocks from the correct source file, and then + * write it out, possibly with a checksum update. */ - read_block(s, offsetmap[i], buffer); - write_block(wfd, output_filename, buffer, checksum_ctx); + read_blocks(s, offsetmap[first_idx], buffer, nblocks); + write_blocks(wfd, output_filename, buffer, nblocks, checksum_ctx); } else /* use copy_file_range */ { #if defined(HAVE_COPY_FILE_RANGE) /* copy_file_range modifies the offset, so use a local copy */ - off_t off = offsetmap[i]; + off_t off = offsetmap[first_idx]; size_t nwritten = 0; /* @@ -716,7 +739,7 @@ write_reconstructed_file(char *input_filename, { int wb; - wb = copy_file_range(s->fd, &off, wfd, NULL, BLCKSZ - nwritten, 0); + wb = copy_file_range(s->fd, &off, wfd, NULL, (BLCKSZ * nblocks) - nwritten, 0); if (wb < 0) pg_fatal("error while copying file range from \"%s\" to \"%s\": %m", @@ -724,18 +747,18 @@ write_reconstructed_file(char *input_filename, nwritten += wb; - } while (BLCKSZ > nwritten); + } while ((nblocks * BLCKSZ) > nwritten); /* * When checksum calculation not needed, we're done, otherwise - * read the block and pass it to the checksum calculation. + * read the blocks and pass them to the checksum calculation. */ if (checksum_ctx->type == CHECKSUM_TYPE_NONE) continue; - read_block(s, offsetmap[i], buffer); + read_blocks(s, offsetmap[first_idx], buffer, nblocks); - if (pg_checksum_update(checksum_ctx, buffer, BLCKSZ) < 0) + if (pg_checksum_update(checksum_ctx, buffer, (nblocks * BLCKSZ)) < 0) pg_fatal("could not update checksum of file \"%s\"", output_filename); #else @@ -774,22 +797,22 @@ write_reconstructed_file(char *input_filename, * provided only for the error message. */ static void -write_block(int fd, char *output_filename, - uint8 *buffer, pg_checksum_context *checksum_ctx) +write_blocks(int fd, char *output_filename, + uint8 *buffer, int nblocks, pg_checksum_context *checksum_ctx) { int wb; - if ((wb = write(fd, buffer, BLCKSZ)) != BLCKSZ) + if ((wb = write(fd, buffer, nblocks * BLCKSZ)) != (nblocks * BLCKSZ)) { if (wb < 0) pg_fatal("could not write file \"%s\": %m", output_filename); else pg_fatal("could not write file \"%s\": wrote only %d of %d bytes", - output_filename, wb, BLCKSZ); + output_filename, wb, (nblocks * BLCKSZ)); } /* Update the checksum computation. */ - if (pg_checksum_update(checksum_ctx, buffer, BLCKSZ) < 0) + if (pg_checksum_update(checksum_ctx, buffer, (nblocks * BLCKSZ)) < 0) pg_fatal("could not update checksum of file \"%s\"", output_filename); } @@ -798,19 +821,19 @@ write_block(int fd, char *output_filename, * Read a block of data (BLCKSZ bytes) into the the buffer. */ static void -read_block(rfile *s, off_t off, uint8 *buffer) +read_blocks(rfile *s, off_t off, uint8 *buffer, int nblocks) { int rb; /* Read the block from the correct source, except if dry-run. */ - rb = pg_pread(s->fd, buffer, BLCKSZ, off); - if (rb != BLCKSZ) + rb = pg_pread(s->fd, buffer, (nblocks * BLCKSZ), off); + if (rb != (nblocks * BLCKSZ)) { if (rb < 0) pg_fatal("could not read file \"%s\": %m", s->filename); else pg_fatal("could not read file \"%s\": read only %d of %d bytes at offset %llu", - s->filename, rb, BLCKSZ, + s->filename, rb, (nblocks * BLCKSZ), (unsigned long long) off); } } -- 2.44.0