Re: [PATCH] add relation and block-level filtering to pg_waldump - Mailing list pgsql-hackers

From David Christensen
Subject Re: [PATCH] add relation and block-level filtering to pg_waldump
Date
Msg-id lzwnhiai4i.fsf@veeddrois.attlocal.net
Whole thread Raw
In response to Re: [PATCH] add relation and block-level filtering to pg_waldump  (Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>)
Responses Re: [PATCH] add relation and block-level filtering to pg_waldump
Re: [PATCH] add relation and block-level filtering to pg_waldump
List pgsql-hackers
Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> writes:

> On Fri, Feb 25, 2022 at 12:36 AM David Christensen
> <david.christensen@crunchydata.com> wrote:
>>
>> Greetings,
>>
>> This patch adds the ability to specify a RelFileNode and optional BlockNum to limit output of
>> pg_waldump records to only those which match the given criteria.  This should be more performant
>> than `pg_waldump | grep` as well as more reliable given specific variations in output style
>> depending on how the blocks are specified.
>>
>> This currently affects only the main fork, but we could presumably add the option to filter by fork
>> as well, if that is considered useful.
>
> Thanks for the patch. This is not adding something that users can't do
> right now, but definitely improves the usability of the pg_waldump as
> it avoids external filterings. Also, it can give the stats/info at
> table and block level. So, +1 from my side.

Attached is V2 with additional feedback from this email, as well as the specification of the
ForkNumber and FPW as specifiable options.

Best,

David

From 1b04f04317d364006371bdab0db9086f79138b25 Mon Sep 17 00:00:00 2001
From: David Christensen <david.christensen@crunchydata.com>
Date: Fri, 25 Feb 2022 12:52:56 -0600
Subject: [PATCH] Add additional filtering options to pg_waldump

This feature allows you to only output records that are targeting a specific RelFileNode and optional
BlockNumber within this relation, while specifying which ForkNum you want to filter to.

We also add the independent ability to filter via Full Page Write.
---
 doc/src/sgml/ref/pg_waldump.sgml |  48 ++++++++++++
 src/bin/pg_waldump/pg_waldump.c  | 128 ++++++++++++++++++++++++++++++-
 2 files changed, 175 insertions(+), 1 deletion(-)

diff --git a/doc/src/sgml/ref/pg_waldump.sgml b/doc/src/sgml/ref/pg_waldump.sgml
index 5735a161ce..f157175764 100644
--- a/doc/src/sgml/ref/pg_waldump.sgml
+++ b/doc/src/sgml/ref/pg_waldump.sgml
@@ -100,6 +100,44 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>-k <replaceable>block</replaceable></option></term>
+      <term><option>--block=<replaceable>block</replaceable></option></term>
+      <listitem>
+       <para>
+        Display only records touching the given block. (Requires also
+        providing the relation via <option>--relation</option>.)
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--fork=<replaceable>fork</replaceable></option></term>
+      <listitem>
+       <para>
+        When using the <option>--relation</option> filter, output only records
+        from the given fork.  The valid values here are <literal>0</literal>
+        for the main fork, <literal>1</literal> for the Free Space
+        Map, <literal>2</literal> for the Visibility Map,
+        and <literal>3</literal> for the Init fork.  If unspecified, defaults
+        to the main fork.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>-l
<replaceable>tbl</replaceable>/<replaceable>db</replaceable>/<replaceable>rel</replaceable></option></term>
+
<term><option>--relation=<replaceable>tbl</replaceable>/<replaceable>db</replaceable>/<replaceable>rel</replaceable></option></term>
+      <listitem>
+       <para>
+        Display only records touching the given relation.  The relation is
+        specified via tablespace OID, database OID, and relfilenode separated
+        by slashes, for instance, <literal>1234/12345/12345</literal>.  This
+        is the same format used for relations in the WAL dump output.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-n <replaceable>limit</replaceable></option></term>
       <term><option>--limit=<replaceable>limit</replaceable></option></term>
@@ -183,6 +221,16 @@ PostgreSQL documentation
        </listitem>
      </varlistentry>
 
+     <varlistentry>
+       <term><option>-w</option></term>
+       <term><option>--fullpage</option></term>
+       <listitem>
+       <para>
+       Filter records to only those which have full page writes.
+       </para>
+       </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-x <replaceable>xid</replaceable></option></term>
       <term><option>--xid=<replaceable>xid</replaceable></option></term>
diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c
index a6251e1a96..a527cd4dc6 100644
--- a/src/bin/pg_waldump/pg_waldump.c
+++ b/src/bin/pg_waldump/pg_waldump.c
@@ -55,6 +55,12 @@ typedef struct XLogDumpConfig
     bool        filter_by_rmgr_enabled;
     TransactionId filter_by_xid;
     bool        filter_by_xid_enabled;
+    RelFileNode filter_by_relation;
+    bool        filter_by_relation_enabled;
+    BlockNumber    filter_by_relation_block;
+    bool        filter_by_relation_block_enabled;
+    ForkNumber    filter_by_relation_forknum;
+    bool        filter_by_fpw;
 } XLogDumpConfig;
 
 typedef struct Stats
@@ -394,6 +400,56 @@ WALDumpReadPage(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen,
     return count;
 }
 
+/*
+ * Boolean to return whether the given WAL record matches a specific relation and optional block
+ */
+static bool
+XLogRecordMatchesRelationBlock(XLogReaderState *record, RelFileNode matchRnode, BlockNumber matchBlock, ForkNumber
matchFork)
+{
+    int            block_id;
+
+    for (block_id = 0; block_id <= record->max_block_id; block_id++)
+    {
+        RelFileNode rnode;
+        ForkNumber    forknum;
+        BlockNumber blk;
+
+        if (!XLogRecHasBlockRef(record, block_id))
+            continue;
+
+        XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
+
+        if (forknum == matchFork &&
+            matchRnode.spcNode == rnode.spcNode &&
+            matchRnode.dbNode == rnode.dbNode &&
+            matchRnode.relNode == rnode.relNode &&
+            (matchBlock == InvalidBlockNumber || matchBlock == blk))
+            return true;
+    }
+
+    return false;
+}
+
+/*
+ * Boolean to return whether the given WAL record contains a full page write
+ */
+static bool
+XLogRecordHasFPW(XLogReaderState *record)
+{
+    int            block_id;
+
+    for (block_id = 0; block_id <= record->max_block_id; block_id++)
+    {
+        if (!XLogRecHasBlockRef(record, block_id))
+            continue;
+
+        if (XLogRecHasBlockImage(record, block_id))
+            return true;
+    }
+
+    return false;
+}
+
 /*
  * Calculate the size of a record, split into !FPI and FPI parts.
  */
@@ -767,6 +823,10 @@ usage(void)
     printf(_("  -b, --bkp-details      output detailed information about backup blocks\n"));
     printf(_("  -e, --end=RECPTR       stop reading at WAL location RECPTR\n"));
     printf(_("  -f, --follow           keep retrying after reaching end of WAL\n"));
+    printf(_("  -k, --block=N          with --relation, only show records matching this block\n"));
+    printf(_("      --fork=N           with --relation, only show records matching this fork\n"
+             "                         (defaults to 0, the main fork)\n"));
+    printf(_("  -l, --relation=N/N/N   only show records that touch a specific relation\n"));
     printf(_("  -n, --limit=N          number of records to display\n"));
     printf(_("  -p, --path=PATH        directory in which to find log segment files or a\n"
              "                         directory with a ./pg_wal that contains such files\n"
@@ -779,6 +839,7 @@ usage(void)
              "                         (default: 1 or the value used in STARTSEG)\n"));
     printf(_("  -V, --version          output version information, then exit\n"));
     printf(_("  -x, --xid=XID          only show records with transaction ID XID\n"));
+    printf(_("  -w, --fullpage         only show records with a full page write\n"));
     printf(_("  -z, --stats[=record]   show statistics instead of records\n"
              "                         (optionally, show per-record statistics)\n"));
     printf(_("  -?, --help             show this help, then exit\n"));
@@ -802,12 +863,16 @@ main(int argc, char **argv)
 
     static struct option long_options[] = {
         {"bkp-details", no_argument, NULL, 'b'},
+        {"block", required_argument, NULL, 'k'},
         {"end", required_argument, NULL, 'e'},
         {"follow", no_argument, NULL, 'f'},
+        {"fork", required_argument, NULL, 1},
+        {"fullpage", no_argument, NULL, 'w'},
         {"help", no_argument, NULL, '?'},
         {"limit", required_argument, NULL, 'n'},
         {"path", required_argument, NULL, 'p'},
         {"quiet", no_argument, NULL, 'q'},
+        {"relation", required_argument, NULL, 'l'},
         {"rmgr", required_argument, NULL, 'r'},
         {"start", required_argument, NULL, 's'},
         {"timeline", required_argument, NULL, 't'},
@@ -860,6 +925,10 @@ main(int argc, char **argv)
     config.filter_by_rmgr_enabled = false;
     config.filter_by_xid = InvalidTransactionId;
     config.filter_by_xid_enabled = false;
+    config.filter_by_relation_enabled = false;
+    config.filter_by_relation_block_enabled = false;
+    config.filter_by_relation_forknum = MAIN_FORKNUM;
+    config.filter_by_fpw = false;
     config.stats = false;
     config.stats_per_record = false;
 
@@ -872,7 +941,7 @@ main(int argc, char **argv)
         goto bad_argument;
     }
 
-    while ((option = getopt_long(argc, argv, "be:fn:p:qr:s:t:x:z",
+    while ((option = getopt_long(argc, argv, "be:fk:l:n:p:qr:s:t:wx:z",
                                  long_options, &optindex)) != -1)
     {
         switch (option)
@@ -892,6 +961,41 @@ main(int argc, char **argv)
             case 'f':
                 config.follow = true;
                 break;
+             case 1: /* fork number */
+                if (sscanf(optarg, "%u", &config.filter_by_relation_forknum) != 1 ||
+                    config.filter_by_relation_forknum >= MAX_FORKNUM)
+                {
+                    pg_log_error("could not parse valid fork number (0..%d) \"%s\"",
+                                 MAX_FORKNUM - 1, optarg);
+                    goto bad_argument;
+                }
+                break;
+            case 'k':
+                if (sscanf(optarg, "%u", &config.filter_by_relation_block) != 1 ||
+                    !BlockNumberIsValid(config.filter_by_relation_block))
+                {
+                    pg_log_error("could not parse valid block number \"%s\"", optarg);
+                    goto bad_argument;
+                }
+                config.filter_by_relation_block_enabled = true;
+                break;
+            case 'l':
+                if (sscanf(optarg, "%u/%u/%u",
+                           &config.filter_by_relation.spcNode,
+                           &config.filter_by_relation.dbNode,
+                           &config.filter_by_relation.relNode) != 3 ||
+                    !OidIsValid(config.filter_by_relation.spcNode) ||
+                    !OidIsValid(config.filter_by_relation.dbNode) ||
+                    !OidIsValid(config.filter_by_relation.relNode)
+                    )
+                {
+                    pg_log_error("could not parse valid relation from \"%s\"/"
+                                 " (expecting \"tablespace OID/database OID/"
+                                 "relation filenode\")", optarg);
+                    goto bad_argument;
+                }
+                config.filter_by_relation_enabled = true;
+                break;
             case 'n':
                 if (sscanf(optarg, "%d", &config.stop_after_records) != 1)
                 {
@@ -949,6 +1053,9 @@ main(int argc, char **argv)
                     goto bad_argument;
                 }
                 break;
+            case 'w':
+                config.filter_by_fpw = true;
+                break;
             case 'x':
                 if (sscanf(optarg, "%u", &config.filter_by_xid) != 1)
                 {
@@ -978,6 +1085,12 @@ main(int argc, char **argv)
         }
     }
 
+    if (config.filter_by_relation_block_enabled && !config.filter_by_relation_enabled)
+    {
+        pg_log_error("--block option requires --relation option to be specified");
+        goto bad_argument;
+    }
+
     if ((optind + 2) < argc)
     {
         pg_log_error("too many command-line arguments (first is \"%s\")",
@@ -1150,6 +1263,19 @@ main(int argc, char **argv)
             config.filter_by_xid != record->xl_xid)
             continue;
 
+        /* check for extended filtering */
+        if (config.filter_by_relation_enabled &&
+            !XLogRecordMatchesRelationBlock(
+                xlogreader_state,
+                config.filter_by_relation,
+                config.filter_by_relation_block_enabled ? config.filter_by_relation_block : InvalidBlockNumber,
+                config.filter_by_relation_forknum
+                ))
+            continue;
+
+        if (config.filter_by_fpw && !XLogRecordHasFPW(xlogreader_state))
+            continue;
+
         /* perform any per-record work */
         if (!config.quiet)
         {
-- 
2.32.0 (Apple Git-132)


--

pgsql-hackers by date:

Previous
From: Andres Freund
Date:
Subject: Re: Two noncritical bugs of pg_waldump
Next
From: Jeff Davis
Date:
Subject: Re: Proposal: Support custom authentication methods using hooks