Getting rid of regression test input/ and output/ files - Mailing list pgsql-hackers

I did some desultory investigation of the idea proposed at [1]
that we should refactor the regression test scripts to try to
reduce their interdependencies.  I soon realized that one of
the stumbling blocks to this is that we've tended to concentrate
data-loading COPY commands, as well as C function creation
commands, into a few files to reduce the notational cruft of
substituting path names and the like into the test scripts.
That is, we don't want to have even more scripts that have to be
translated from input/foo.source and output/foo.source into
runnable scripts and test results.

This led me to wonder why we couldn't get rid of that entire
mechanism in favor of some less-painful way of getting that
information into the scripts.  If we had the desired values in
psql variables, we could do what we need easily, for example
instead of

CREATE FUNCTION check_primary_key ()
    RETURNS trigger
    AS '@libdir@/refint@DLSUFFIX@'
    LANGUAGE C;

something like

CREATE FUNCTION check_primary_key ()
    RETURNS trigger
    AS :'LIBDIR'
    '/refint'
    :'DLSUFFIX'
    LANGUAGE C;

(The extra line breaks are needed to convince SQL that the
adjacent string literals should be concatenated.  We couldn't
have done this so easily before psql had the :'variable'
notation, but that came in in 9.0.)

I see two ways we could get the info from pg_regress into psql
variables:

1. Add "-v VARIABLE=VALUE" switches to the psql invocations.
This requires no new psql capability, but it does introduce
the problem of getting correct shell quoting of the values.
I think we'd need to either duplicate appendShellString in
pg_regress.c, or start linking both libpq and libpgfeutils.a
into pg_regress to be able to use appendShellString itself.
In the past we've not wanted to link libpq into pg_regress
(though I admit I've forgotten the argument for not doing so).

2. Export the values from pg_regress as environment variables,
and then add a way for the test scripts to read those variables.
I was a bit surprised to realize that we didn't have any way
to do that already --- psql has \setenv, so why did we never
invent \getenv?

On the whole I prefer #2, as it seems cleaner and it adds some
actually useful-to-end-users psql functionality.

Attached is a really incomplete, quick-n-dirty POC showing that
this can be made to work.  If there aren't objections or better
ideas, I'll see about fleshing this out.

            regards, tom lane

[1] https://www.postgresql.org/message-id/20211217182518.GA2529654%40rfd.leadboat.com

diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c
index ccd7b48108..fb3bab9494 100644
--- a/src/bin/psql/command.c
+++ b/src/bin/psql/command.c
@@ -98,6 +98,8 @@ static backslashResult process_command_g_options(char *first_option,
                                                  bool active_branch,
                                                  const char *cmd);
 static backslashResult exec_command_gdesc(PsqlScanState scan_state, bool active_branch);
+static backslashResult exec_command_getenv(PsqlScanState scan_state, bool active_branch,
+                                           const char *cmd);
 static backslashResult exec_command_gexec(PsqlScanState scan_state, bool active_branch);
 static backslashResult exec_command_gset(PsqlScanState scan_state, bool active_branch);
 static backslashResult exec_command_help(PsqlScanState scan_state, bool active_branch);
@@ -348,6 +350,8 @@ exec_command(const char *cmd,
         status = exec_command_g(scan_state, active_branch, cmd);
     else if (strcmp(cmd, "gdesc") == 0)
         status = exec_command_gdesc(scan_state, active_branch);
+    else if (strcmp(cmd, "getenv") == 0)
+        status = exec_command_getenv(scan_state, active_branch, cmd);
     else if (strcmp(cmd, "gexec") == 0)
         status = exec_command_gexec(scan_state, active_branch);
     else if (strcmp(cmd, "gset") == 0)
@@ -1481,6 +1485,43 @@ exec_command_gdesc(PsqlScanState scan_state, bool active_branch)
     return status;
 }

+/*
+ * \getenv -- set variable from environment variable
+ */
+static backslashResult
+exec_command_getenv(PsqlScanState scan_state, bool active_branch,
+                    const char *cmd)
+{
+    bool        success = true;
+
+    if (active_branch)
+    {
+        char       *myvar = psql_scan_slash_option(scan_state,
+                                                   OT_NORMAL, NULL, false);
+        char       *envvar = psql_scan_slash_option(scan_state,
+                                                    OT_NORMAL, NULL, false);
+
+        if (!myvar || !envvar)
+        {
+            pg_log_error("\\%s: missing required argument", cmd);
+            success = false;
+        }
+        else
+        {
+            char       *envval = getenv(envvar);
+
+            if (envval && !SetVariable(pset.vars, myvar, envval))
+                success = false;
+        }
+        free(myvar);
+        free(envvar);
+    }
+    else
+        ignore_slash_options(scan_state);
+
+    return success ? PSQL_CMD_SKIP_LINE : PSQL_CMD_ERROR;
+}
+
 /*
  * \gexec -- send query and execute each field of result
  */
diff --git a/src/test/regress/input/copy.source b/src/test/regress/input/copy.source
index 8acb516801..ad42d02a22 100644
--- a/src/test/regress/input/copy.source
+++ b/src/test/regress/input/copy.source
@@ -2,14 +2,20 @@
 -- COPY
 --

+-- directory paths are passed to us in environment variables
+\getenv ABS_SRCDIR ABS_SRCDIR
+\getenv ABS_BUILDDIR ABS_BUILDDIR
+
 -- CLASS POPULATION
 --    (any resemblance to real life is purely coincidental)
 --
-COPY aggtest FROM '@abs_srcdir@/data/agg.data';
+COPY aggtest FROM :'ABS_SRCDIR'
+    '/data/agg.data';

 COPY onek FROM '@abs_srcdir@/data/onek.data';

-COPY onek TO '@abs_builddir@/results/onek.data';
+COPY onek TO :'ABS_BUILDDIR'
+    '/results/onek.data';

 DELETE FROM onek;

diff --git a/src/test/regress/input/create_function_0.source b/src/test/regress/input/create_function_0.source
index f47f635789..122771fda8 100644
--- a/src/test/regress/input/create_function_0.source
+++ b/src/test/regress/input/create_function_0.source
@@ -2,11 +2,17 @@
 -- CREATE_FUNCTION_0
 --

+-- directory path and DLSUFFIX are passed to us in environment variables
+\getenv LIBDIR LIBDIR
+\getenv DLSUFFIX DLSUFFIX
+
 -- Create a bunch of C functions that will be used by later tests:

 CREATE FUNCTION check_primary_key ()
     RETURNS trigger
-    AS '@libdir@/refint@DLSUFFIX@'
+    AS :'LIBDIR'
+    '/refint'
+    :'DLSUFFIX'
     LANGUAGE C;

 CREATE FUNCTION check_foreign_key ()
diff --git a/src/test/regress/output/copy.source b/src/test/regress/output/copy.source
index 25bdec6c60..d21b150d1a 100644
--- a/src/test/regress/output/copy.source
+++ b/src/test/regress/output/copy.source
@@ -1,12 +1,17 @@
 --
 -- COPY
 --
+-- directory paths are passed to us in environment variables
+\getenv ABS_SRCDIR ABS_SRCDIR
+\getenv ABS_BUILDDIR ABS_BUILDDIR
 -- CLASS POPULATION
 --    (any resemblance to real life is purely coincidental)
 --
-COPY aggtest FROM '@abs_srcdir@/data/agg.data';
+COPY aggtest FROM :'ABS_SRCDIR'
+    '/data/agg.data';
 COPY onek FROM '@abs_srcdir@/data/onek.data';
-COPY onek TO '@abs_builddir@/results/onek.data';
+COPY onek TO :'ABS_BUILDDIR'
+    '/results/onek.data';
 DELETE FROM onek;
 COPY onek FROM '@abs_builddir@/results/onek.data';
 COPY tenk1 FROM '@abs_srcdir@/data/tenk.data';
diff --git a/src/test/regress/output/create_function_0.source b/src/test/regress/output/create_function_0.source
index 342bc40e11..b602543433 100644
--- a/src/test/regress/output/create_function_0.source
+++ b/src/test/regress/output/create_function_0.source
@@ -1,10 +1,15 @@
 --
 -- CREATE_FUNCTION_0
 --
+-- directory path and DLSUFFIX are passed to us in environment variables
+\getenv LIBDIR LIBDIR
+\getenv DLSUFFIX DLSUFFIX
 -- Create a bunch of C functions that will be used by later tests:
 CREATE FUNCTION check_primary_key ()
     RETURNS trigger
-    AS '@libdir@/refint@DLSUFFIX@'
+    AS :'LIBDIR'
+    '/refint'
+    :'DLSUFFIX'
     LANGUAGE C;
 CREATE FUNCTION check_foreign_key ()
     RETURNS trigger
diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c
index 2c8a600bad..a8c877aa2e 100644
--- a/src/test/regress/pg_regress.c
+++ b/src/test/regress/pg_regress.c
@@ -509,6 +509,12 @@ convert_sourcefiles_in(const char *source_subdir, const char *dest_dir, const ch
     /* We might need to replace @testtablespace@ */
     snprintf(testtablespace, MAXPGPATH, "%s/testtablespace", outputdir);

+    setenv("ABS_SRCDIR", inputdir, 1);
+    setenv("ABS_BUILDDIR", outputdir, 1);
+    setenv("TESTTABLESPACE", testtablespace, 1);
+    setenv("LIBDIR", dlpath, 1);
+    setenv("DLSUFFIX", DLSUFFIX, 1);
+
     /* finally loop on each file and do the replacement */
     for (name = names; *name; name++)
     {

pgsql-hackers by date:

Previous
From: Tom Lane
Date:
Subject: Re: sequences vs. synchronous replication
Next
From: Maciek Sakrejda
Date:
Subject: Re: should we document an example to set multiple libraries in shared_preload_libraries?