Re: initdb / bootstrap design - Mailing list pgsql-hackers

From Tom Lane
Subject Re: initdb / bootstrap design
Date
Msg-id 203900.1645321586@sss.pgh.pa.us
Whole thread Raw
In response to Re: initdb / bootstrap design  (Tom Lane <tgl@sss.pgh.pa.us>)
Responses Re: initdb / bootstrap design
List pgsql-hackers
I wrote:
> However, redirection does sound like a very easy answer ...

I tried it like that (full patch attached) and the results are
intensely disappointing.  On my Mac laptop, the time needed for
50 iterations of initdb drops from 16.8 sec to 16.75 sec.
On my RHEL8 workstation, the change is actually in the wrong
direction, from 18.75s to 18.9s.  I conclude that the time
spent on postgres.bki data transfer is so far down in the noise
as to be overwhelmed by irrelevancies.  (Which, in fact, is
what perf told me before I started --- but I'd hoped that the
number of system calls would diminish noticeably.  Seems not.)

Not sure that this is worth pursuing any further.

            regards, tom lane

diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index 9fa8fdd4cf..667c829064 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -635,6 +635,8 @@ InsertOneTuple(void)

 /* ----------------
  *        InsertOneValue
+ *
+ * Fill the i'th column of the current tuple with the given value.
  * ----------------
  */
 void
@@ -653,6 +655,21 @@ InsertOneValue(char *value, int i)

     elog(DEBUG4, "inserting column %d value \"%s\"", i, value);

+    /*
+     * In order to make the contents of postgres.bki architecture-independent,
+     * certain values in it are represented symbolically, and we perform the
+     * necessary replacements here.
+     */
+    if (strcmp(value, "NAMEDATALEN") == 0)
+        value = CppAsString2(NAMEDATALEN);
+    else if (strcmp(value, "SIZEOF_POINTER") == 0)
+        value = CppAsString2(SIZEOF_VOID_P);
+    else if (strcmp(value, "ALIGNOF_POINTER") == 0)
+        value = (SIZEOF_VOID_P == 4) ? "i" : "d";
+    else if (strcmp(value, "FLOAT8PASSBYVAL") == 0)
+        value = FLOAT8PASSBYVAL ? "true" : "false";
+
+    /* Now convert the value to internal form */
     typoid = TupleDescAttr(boot_reldesc->rd_att, i)->atttypid;

     boot_get_type_io_data(typoid,
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 97f15971e2..9850f342bf 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -265,13 +265,13 @@ static void setup_privileges(FILE *cmdfd);
 static void set_info_version(void);
 static void setup_schema(FILE *cmdfd);
 static void load_plpgsql(FILE *cmdfd);
+static void set_remaining_details(FILE *cmdfd);
 static void vacuum_db(FILE *cmdfd);
 static void make_template0(FILE *cmdfd);
 static void make_postgres(FILE *cmdfd);
 static void trapsig(int signum);
 static void check_ok(void);
 static char *escape_quotes(const char *src);
-static char *escape_quotes_bki(const char *src);
 static int    locale_date_order(const char *locale);
 static void check_locale_name(int category, const char *locale,
                               char **canonname);
@@ -336,32 +336,6 @@ escape_quotes(const char *src)
     return result;
 }

-/*
- * Escape a field value to be inserted into the BKI data.
- * Run the value through escape_quotes (which will be inverted
- * by the backend's DeescapeQuotedString() function), then wrap
- * the value in single quotes, even if that isn't strictly necessary.
- */
-static char *
-escape_quotes_bki(const char *src)
-{
-    char       *result;
-    char       *data = escape_quotes(src);
-    char       *resultp;
-    char       *datap;
-
-    result = (char *) pg_malloc(strlen(data) + 3);
-    resultp = result;
-    *resultp++ = '\'';
-    for (datap = data; *datap; datap++)
-        *resultp++ = *datap;
-    *resultp++ = '\'';
-    *resultp = '\0';
-
-    free(data);
-    return result;
-}
-
 /*
  * make a copy of the array of lines, with token replaced by replacement
  * the first time it occurs on each line.
@@ -1354,22 +1328,26 @@ static void
 bootstrap_template1(void)
 {
     PG_CMD_DECL;
-    char      **line;
-    char      **bki_lines;
+    FILE       *infile;
     char        headerline[MAXPGPATH];
-    char        buf[64];
+    char        firstline[MAXPGPATH];

     printf(_("running bootstrap script ... "));
     fflush(stdout);

-    bki_lines = readfile(bki_file);
-
     /* Check that bki file appears to be of the right version */

     snprintf(headerline, sizeof(headerline), "# PostgreSQL %s\n",
              PG_MAJORVERSION);

-    if (strcmp(headerline, *bki_lines) != 0)
+    if ((infile = fopen(bki_file, "r")) == NULL)
+    {
+        pg_log_error("could not open file \"%s\" for reading: %m", bki_file);
+        exit(1);
+    }
+
+    if (fgets(firstline, sizeof(firstline), infile) == NULL ||
+        strcmp(headerline, firstline) != 0)
     {
         pg_log_error("input file \"%s\" does not belong to PostgreSQL %s",
                      bki_file, PG_VERSION);
@@ -1379,56 +1357,24 @@ bootstrap_template1(void)
         exit(1);
     }

-    /* Substitute for various symbols used in the BKI file */
-
-    sprintf(buf, "%d", NAMEDATALEN);
-    bki_lines = replace_token(bki_lines, "NAMEDATALEN", buf);
-
-    sprintf(buf, "%d", (int) sizeof(Pointer));
-    bki_lines = replace_token(bki_lines, "SIZEOF_POINTER", buf);
-
-    bki_lines = replace_token(bki_lines, "ALIGNOF_POINTER",
-                              (sizeof(Pointer) == 4) ? "i" : "d");
-
-    bki_lines = replace_token(bki_lines, "FLOAT8PASSBYVAL",
-                              FLOAT8PASSBYVAL ? "true" : "false");
-
-    bki_lines = replace_token(bki_lines, "POSTGRES",
-                              escape_quotes_bki(username));
-
-    bki_lines = replace_token(bki_lines, "ENCODING",
-                              encodingid_to_string(encodingid));
-
-    bki_lines = replace_token(bki_lines, "LC_COLLATE",
-                              escape_quotes_bki(lc_collate));
-
-    bki_lines = replace_token(bki_lines, "LC_CTYPE",
-                              escape_quotes_bki(lc_ctype));
+    fclose(infile);

     /* Also ensure backend isn't confused by this environment var: */
     unsetenv("PGCLIENTENCODING");

     snprintf(cmd, sizeof(cmd),
-             "\"%s\" --boot -X %d %s %s %s %s",
+             "\"%s\" --boot -X %d %s %s %s %s <\"%s\"",
              backend_exec,
              wal_segment_size_mb * (1024 * 1024),
              data_checksums ? "-k" : "",
              boot_options, extra_options,
-             debug ? "-d 5" : "");
-
+             debug ? "-d 5" : "",
+             bki_file);

     PG_CMD_OPEN;
-
-    for (line = bki_lines; *line != NULL; line++)
-    {
-        PG_CMD_PUTS(*line);
-        free(*line);
-    }
-
+    /* Nothing to write, since backend reads bki_file directly */
     PG_CMD_CLOSE;

-    free(bki_lines);
-
     check_ok();
 }

@@ -1622,12 +1568,11 @@ setup_collation(FILE *cmdfd)
 static void
 setup_privileges(FILE *cmdfd)
 {
-    char      **line;
-    char      **priv_lines;
-    static char *privileges_setup[] = {
+    const char *const *line;
+    static const char *const privileges_setup[] = {
         "UPDATE pg_class "
         "  SET relacl = (SELECT array_agg(a.acl) FROM "
-        " (SELECT E'=r/\"$POSTGRES_SUPERUSERNAME\"' as acl "
+        " (SELECT '=r/\"POSTGRES\"' as acl "
         "  UNION SELECT unnest(pg_catalog.acldefault("
         "    CASE WHEN relkind = " CppAsString2(RELKIND_SEQUENCE) " THEN 's' "
         "         ELSE 'r' END::\"char\"," CppAsString2(BOOTSTRAP_SUPERUSERID) "::oid))"
@@ -1759,9 +1704,7 @@ setup_privileges(FILE *cmdfd)
         NULL
     };

-    priv_lines = replace_token(privileges_setup, "$POSTGRES_SUPERUSERNAME",
-                               escape_quotes(username));
-    for (line = priv_lines; *line != NULL; line++)
+    for (line = privileges_setup; *line != NULL; line++)
         PG_CMD_PUTS(*line);
 }

@@ -1822,6 +1765,48 @@ load_plpgsql(FILE *cmdfd)
     PG_CMD_PUTS("CREATE EXTENSION plpgsql;\n\n");
 }

+/*
+ * Set some remaining details that aren't known when postgres.bki is made.
+ *
+ * Up to now, the bootstrap superuser has been named "POSTGRES".
+ * Replace that with the user-specified name (often "postgres").
+ * Also, insert the desired locale and encoding details in pg_database.
+ *
+ * Note: this must run after setup_privileges(), which expects the superuser
+ * name to still be "POSTGRES".
+ */
+static void
+set_remaining_details(FILE *cmdfd)
+{
+    char      **line;
+    char      **detail_lines;
+
+    /*
+     * Ideally we'd change the superuser name with ALTER USER, but the backend
+     * will reject that with "session user cannot be renamed", so we must
+     * cheat.  (In any case, we'd need a function to escape an identifier, not
+     * a string literal.)  Likewise, we can't change template1's
+     * locale/encoding without cheating.
+     */
+    static char *final_details[] = {
+        "UPDATE pg_authid SET rolname = E'SUPERUSER_NAME' WHERE rolname = 'POSTGRES';\n\n",
+        "UPDATE pg_database SET encoding = E'ENCODING', datcollate = E'LC_COLLATE', datctype = E'LC_CTYPE';\n\n",
+        NULL
+    };
+
+    detail_lines = replace_token(final_details, "SUPERUSER_NAME",
+                                 escape_quotes(username));
+    detail_lines = replace_token(detail_lines, "ENCODING",
+                                 encodingid_to_string(encodingid));
+    detail_lines = replace_token(detail_lines, "LC_COLLATE",
+                                 escape_quotes(lc_collate));
+    detail_lines = replace_token(detail_lines, "LC_CTYPE",
+                                 escape_quotes(lc_ctype));
+
+    for (line = detail_lines; *line != NULL; line++)
+        PG_CMD_PUTS(*line);
+}
+
 /*
  * clean everything up in template1
  */
@@ -2851,6 +2836,8 @@ initialize_data_directory(void)

     load_plpgsql(cmdfd);

+    set_remaining_details(cmdfd);
+
     vacuum_db(cmdfd);

     make_template0(cmdfd);
diff --git a/src/include/catalog/pg_database.dat b/src/include/catalog/pg_database.dat
index e7e42d6023..87aad30146 100644
--- a/src/include/catalog/pg_database.dat
+++ b/src/include/catalog/pg_database.dat
@@ -12,11 +12,15 @@

 [

+# We initialize template1's encoding as PG_SQL_ASCII and its locales as C.
+# initdb will change that during database initialization; however, the
+# post-bootstrap initialization session will run with those values.
+
 { oid => '1', oid_symbol => 'TemplateDbOid',
   descr => 'default template for new databases',
-  datname => 'template1', encoding => 'ENCODING', datistemplate => 't',
+  datname => 'template1', encoding => 'PG_SQL_ASCII', datistemplate => 't',
   datallowconn => 't', datconnlimit => '-1', datfrozenxid => '0',
-  datminmxid => '1', dattablespace => 'pg_default', datcollate => 'LC_COLLATE',
-  datctype => 'LC_CTYPE', datacl => '_null_' },
+  datminmxid => '1', dattablespace => 'pg_default', datcollate => 'C',
+  datctype => 'C', datacl => '_null_' },

 ]
diff --git a/src/include/catalog/pg_database.h b/src/include/catalog/pg_database.h
index 76adbd4aad..9fd424d58d 100644
--- a/src/include/catalog/pg_database.h
+++ b/src/include/catalog/pg_database.h
@@ -38,7 +38,7 @@ CATALOG(pg_database,1262,DatabaseRelationId) BKI_SHARED_RELATION BKI_ROWTYPE_OID
     Oid            datdba BKI_DEFAULT(POSTGRES) BKI_LOOKUP(pg_authid);

     /* character encoding */
-    int32        encoding;
+    int32        encoding BKI_LOOKUP(encoding);

     /* allowed as CREATE DATABASE template? */
     bool        datistemplate;

pgsql-hackers by date:

Previous
From: Peter Geoghegan
Date:
Subject: Re: Removing more vacuumlazy.c special cases, relfrozenxid optimizations
Next
From: Andres Freund
Date:
Subject: Re: Removing more vacuumlazy.c special cases, relfrozenxid optimizations