Re: Making empty Bitmapsets always be NULL - Mailing list pgsql-hackers

From Tom Lane
Subject Re: Making empty Bitmapsets always be NULL
Date
Msg-id 2686153.1677881312@sss.pgh.pa.us
Whole thread Raw
In response to Re: Making empty Bitmapsets always be NULL  (David Rowley <dgrowleyml@gmail.com>)
Responses Re: Making empty Bitmapsets always be NULL  (David Rowley <dgrowleyml@gmail.com>)
List pgsql-hackers
David Rowley <dgrowleyml@gmail.com> writes:
> On Fri, 3 Mar 2023 at 15:17, Tom Lane <tgl@sss.pgh.pa.us> wrote:
>> Another point here is that I'm pretty sure that just about all
>> bitmapsets we deal with are only one or two words, so I'm not
>> convinced you're going to get any performance win to justify
>> the added management overhead.

> It's true that the majority of Bitmapsets are going to be just 1 word,
> but it's important to acknowledge that we do suffer in some more
> extreme cases when Bitmapsets become large. Partition with large
> numbers of partitions is one such case.

Maybe, but optimizing for that while pessimizing every other case
doesn't sound very attractive from here.  I think we need some
benchmarks on normal-size bitmapsets before considering doing much
in this area.

Also, if we're going to make any sort of changes here it'd probably
behoove us to make struct Bitmapset private in bitmapset.c, so that
we can have confidence that nobody is playing games with them.
I had a go at that and was pleasantly surprised to find that
actually nobody has; the attached passes check-world.  It'd probably
be smart to commit this as a follow-on to 00b41463c, whether or not
we go any further.

Also, given that we do this, I don't think that check_bitmapset_invariants
as you propose it is worth the trouble.  The reason we've gone to such
lengths with checking List invariants is that initially we had a large
number of places doing creative and not-too-structured things with Lists,
plus we've made several absolutely fundamental changes to that data
structure.  Despite the far larger bug surface, I don't recall that those
invariant checks ever found anything after the initial rounds of changes.
So I don't buy that there's an argument for a similarly expensive set
of checks here.  bitmapset.c is small enough that we should be able to
pretty much prove it correct by eyeball.

            regards, tom lane

diff --git a/src/backend/nodes/Makefile b/src/backend/nodes/Makefile
index af12c64878..a6eb2a87bb 100644
--- a/src/backend/nodes/Makefile
+++ b/src/backend/nodes/Makefile
@@ -54,7 +54,6 @@ node_headers = \
     commands/trigger.h \
     executor/tuptable.h \
     foreign/fdwapi.h \
-    nodes/bitmapset.h \
     nodes/extensible.h \
     nodes/lockoptions.h \
     nodes/miscnodes.h \
diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index 7ba3cf635b..c11daeb303 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -24,6 +24,34 @@
 #include "port/pg_bitutils.h"


+/*
+ * Data representation
+ *
+ * Larger bitmap word sizes generally give better performance, so long as
+ * they're not wider than the processor can handle efficiently.  We use
+ * 64-bit words if pointers are that large, else 32-bit words.
+ */
+#if SIZEOF_VOID_P >= 8
+
+#define BITS_PER_BITMAPWORD 64
+typedef uint64 bitmapword;        /* must be an unsigned type */
+typedef int64 signedbitmapword; /* must be the matching signed type */
+
+#else
+
+#define BITS_PER_BITMAPWORD 32
+typedef uint32 bitmapword;        /* must be an unsigned type */
+typedef int32 signedbitmapword; /* must be the matching signed type */
+
+#endif
+
+struct Bitmapset
+{
+    NodeTag        type;            /* it's a Node */
+    int            nwords;            /* number of words in array */
+    bitmapword    words[FLEXIBLE_ARRAY_MEMBER];    /* really [nwords] */
+};
+
 #define WORDNUM(x)    ((x) / BITS_PER_BITMAPWORD)
 #define BITNUM(x)    ((x) % BITS_PER_BITMAPWORD)

diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index ecbcadb8bf..fd5d61bfd4 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -65,7 +65,6 @@ my @all_input_files = qw(
   commands/trigger.h
   executor/tuptable.h
   foreign/fdwapi.h
-  nodes/bitmapset.h
   nodes/extensible.h
   nodes/lockoptions.h
   nodes/miscnodes.h
@@ -132,6 +131,19 @@ my @special_read_write;
 # node types we don't want any support functions for, just node tags
 my @nodetag_only;

+# Nodes with custom copy/equal implementations are skipped from
+# .funcs.c but need case statements in .switch.c.
+my @custom_copy_equal;
+
+# Similarly for custom read/write implementations.
+my @custom_read_write;
+
+# Similarly for custom query jumble implementation.
+my @custom_query_jumble;
+
+# Track node types with manually assigned NodeTag numbers.
+my %manual_nodetag_number;
+
 # types that are copied by straight assignment
 my @scalar_types = qw(
   bits32 bool char double int int8 int16 int32 int64 long uint8 uint16 uint32 uint64
@@ -166,18 +178,12 @@ push @no_equal,           qw(List);
 push @no_query_jumble,    qw(List);
 push @special_read_write, qw(List);

-# Nodes with custom copy/equal implementations are skipped from
-# .funcs.c but need case statements in .switch.c.
-my @custom_copy_equal;
-
-# Similarly for custom read/write implementations.
-my @custom_read_write;
-
-# Similarly for custom query jumble implementation.
-my @custom_query_jumble;
-
-# Track node types with manually assigned NodeTag numbers.
-my %manual_nodetag_number;
+# Likewise, Bitmapset is specially handled rather than being parsed
+# out of a header file.  This supports making it an opaque struct.
+push @node_types,         qw(Bitmapset);
+push @custom_copy_equal,  qw(Bitmapset);
+push @no_query_jumble,    qw(Bitmapset);
+push @special_read_write, qw(Bitmapset);

 # This is a struct, so we can copy it by assignment.  Equal support is
 # currently not required.
diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c
index 29a1858441..541626e697 100644
--- a/src/backend/nodes/tidbitmap.c
+++ b/src/backend/nodes/tidbitmap.c
@@ -42,7 +42,6 @@

 #include "access/htup_details.h"
 #include "common/hashfn.h"
-#include "nodes/bitmapset.h"
 #include "nodes/tidbitmap.h"
 #include "storage/lwlock.h"
 #include "utils/dsa.h"
@@ -72,7 +71,14 @@
  */
 #define PAGES_PER_CHUNK  (BLCKSZ / 32)

-/* We use BITS_PER_BITMAPWORD and typedef bitmapword from nodes/bitmapset.h */
+/* We use the same BITS_PER_BITMAPWORD and typedef bitmapword as bitmapset.c */
+#if SIZEOF_VOID_P >= 8
+#define BITS_PER_BITMAPWORD 64
+typedef uint64 bitmapword;        /* must be an unsigned type */
+#else
+#define BITS_PER_BITMAPWORD 32
+typedef uint32 bitmapword;        /* must be an unsigned type */
+#endif

 #define WORDNUM(x)    ((x) / BITS_PER_BITMAPWORD)
 #define BITNUM(x)    ((x) % BITS_PER_BITMAPWORD)
diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h
index 14de6a9ff1..3f5be0b9a3 100644
--- a/src/include/nodes/bitmapset.h
+++ b/src/include/nodes/bitmapset.h
@@ -18,43 +18,14 @@
 #ifndef BITMAPSET_H
 #define BITMAPSET_H

-#include "nodes/nodes.h"
-
 /*
  * Forward decl to save including pg_list.h
  */
 struct List;

-/*
- * Data representation
- *
- * Larger bitmap word sizes generally give better performance, so long as
- * they're not wider than the processor can handle efficiently.  We use
- * 64-bit words if pointers are that large, else 32-bit words.
- */
-#if SIZEOF_VOID_P >= 8
-
-#define BITS_PER_BITMAPWORD 64
-typedef uint64 bitmapword;        /* must be an unsigned type */
-typedef int64 signedbitmapword; /* must be the matching signed type */
-
-#else
-
-#define BITS_PER_BITMAPWORD 32
-typedef uint32 bitmapword;        /* must be an unsigned type */
-typedef int32 signedbitmapword; /* must be the matching signed type */
-
-#endif
-
-typedef struct Bitmapset
-{
-    pg_node_attr(custom_copy_equal, special_read_write, no_query_jumble)
-
-    NodeTag        type;
-    int            nwords;            /* number of words in array */
-    bitmapword    words[FLEXIBLE_ARRAY_MEMBER];    /* really [nwords] */
-} Bitmapset;

+/* struct Bitmapset is private in bitmapset.c */
+typedef struct Bitmapset Bitmapset;

 /* result of bms_subset_compare */
 typedef enum
diff --git a/src/include/nodes/meson.build b/src/include/nodes/meson.build
index efe0834afb..81f1e7a890 100644
--- a/src/include/nodes/meson.build
+++ b/src/include/nodes/meson.build
@@ -15,7 +15,6 @@ node_support_input_i = [
   'commands/trigger.h',
   'executor/tuptable.h',
   'foreign/fdwapi.h',
-  'nodes/bitmapset.h',
   'nodes/extensible.h',
   'nodes/lockoptions.h',
   'nodes/miscnodes.h',

pgsql-hackers by date:

Previous
From: Thomas Munro
Date:
Subject: Re: odd buildfarm failure - "pg_ctl: control file appears to be corrupt"
Next
From: Daniel Gustafsson
Date:
Subject: Re: Raising the SCRAM iteration count