Re: [PATCH] - Provide robust alternatives for replace_string - Mailing list pgsql-hackers

From Tom Lane
Subject Re: [PATCH] - Provide robust alternatives for replace_string
Date
Msg-id 1315832.1599345736@sss.pgh.pa.us
Whole thread Raw
In response to Re: [PATCH] - Provide robust alternatives for replace_string  (Alvaro Herrera <alvherre@2ndquadrant.com>)
Responses Re: [PATCH] - Provide robust alternatives for replace_string  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-hackers
Alvaro Herrera <alvherre@2ndquadrant.com> writes:
> Note that starting with commit 67a472d71c98 you can use pg_get_line and
> not worry about the hard part of this anymore :-)

pg_get_line as it stands isn't quite suitable, because it just hands
back a "char *" string, not a StringInfo that you can do further
processing on.

However, I'd already grown a bit dissatisfied with exposing only that
API, because the code 8f8154a50 added to hba.c couldn't use pg_get_line
either, and had to duplicate the logic.  So the attached revised patch
splits pg_get_line into two pieces, one with the existing char * API
and one that appends to a caller-provided StringInfo.  (hba.c needs the
append-rather-than-reset behavior, and it might be useful elsewhere
too.)

While here, I couldn't resist getting rid of ecpg_filter()'s hard-wired
line length limit too.

This version looks committable to me, though perhaps someone has
further thoughts?

            regards, tom lane

diff --git a/src/backend/libpq/hba.c b/src/backend/libpq/hba.c
index 5991a21cf2..9f106653f3 100644
--- a/src/backend/libpq/hba.c
+++ b/src/backend/libpq/hba.c
@@ -502,33 +502,8 @@ tokenize_file(const char *filename, FILE *file, List **tok_lines, int elevel)
         /* Collect the next input line, handling backslash continuations */
         resetStringInfo(&buf);

-        while (!feof(file) && !ferror(file))
+        while (pg_get_line_append(file, &buf))
         {
-            /* Make sure there's a reasonable amount of room in the buffer */
-            enlargeStringInfo(&buf, 128);
-
-            /* Read some data, appending it to what we already have */
-            if (fgets(buf.data + buf.len, buf.maxlen - buf.len, file) == NULL)
-            {
-                int            save_errno = errno;
-
-                if (!ferror(file))
-                    break;        /* normal EOF */
-                /* I/O error! */
-                ereport(elevel,
-                        (errcode_for_file_access(),
-                         errmsg("could not read file \"%s\": %m", filename)));
-                err_msg = psprintf("could not read file \"%s\": %s",
-                                   filename, strerror(save_errno));
-                resetStringInfo(&buf);
-                break;
-            }
-            buf.len += strlen(buf.data + buf.len);
-
-            /* If we haven't got a whole line, loop to read more */
-            if (!(buf.len > 0 && buf.data[buf.len - 1] == '\n'))
-                continue;
-
             /* Strip trailing newline, including \r in case we're on Windows */
             buf.len = pg_strip_crlf(buf.data);

@@ -551,6 +526,19 @@ tokenize_file(const char *filename, FILE *file, List **tok_lines, int elevel)
             break;
         }

+        if (ferror(file))
+        {
+            /* I/O error! */
+            int            save_errno = errno;
+
+            ereport(elevel,
+                    (errcode_for_file_access(),
+                     errmsg("could not read file \"%s\": %m", filename)));
+            err_msg = psprintf("could not read file \"%s\": %s",
+                               filename, strerror(save_errno));
+            break;
+        }
+
         /* Parse fields */
         lineptr = buf.data;
         while (*lineptr && err_msg == NULL)
diff --git a/src/common/pg_get_line.c b/src/common/pg_get_line.c
index 38433675d4..eae02b9596 100644
--- a/src/common/pg_get_line.c
+++ b/src/common/pg_get_line.c
@@ -49,21 +49,7 @@ pg_get_line(FILE *stream)

     initStringInfo(&buf);

-    /* Read some data, appending it to whatever we already have */
-    while (fgets(buf.data + buf.len, buf.maxlen - buf.len, stream) != NULL)
-    {
-        buf.len += strlen(buf.data + buf.len);
-
-        /* Done if we have collected a newline */
-        if (buf.len > 0 && buf.data[buf.len - 1] == '\n')
-            return buf.data;
-
-        /* Make some more room in the buffer, and loop to read more data */
-        enlargeStringInfo(&buf, 128);
-    }
-
-    /* Did fgets() fail because of an I/O error? */
-    if (ferror(stream))
+    if (!pg_get_line_append(stream, &buf))
     {
         /* ensure that free() doesn't mess up errno */
         int            save_errno = errno;
@@ -73,13 +59,49 @@ pg_get_line(FILE *stream)
         return NULL;
     }

-    /* If we read no data before reaching EOF, we should return NULL */
-    if (buf.len == 0)
+    return buf.data;
+}
+
+/*
+ * pg_get_line_append()
+ *
+ * This has similar behavior to pg_get_line(), and thence to fgets(),
+ * except that the collected data is appended to whatever is in *buf.
+ *
+ * Returns true if a line was successfully collected (including the
+ * case of a non-newline-terminated line at EOF).  Returns false if
+ * there was an I/O error or no data was available before EOF.
+ * (Check ferror(stream) to distinguish these cases.)
+ *
+ * In the false-result case, the contents of *buf are logically unmodified,
+ * though it's possible that the buffer has been resized.
+ */
+bool
+pg_get_line_append(FILE *stream, StringInfo buf)
+{
+    int            orig_len = buf->len;
+
+    /* Read some data, appending it to whatever we already have */
+    while (fgets(buf->data + buf->len, buf->maxlen - buf->len, stream) != NULL)
     {
-        pfree(buf.data);
-        return NULL;
+        buf->len += strlen(buf->data + buf->len);
+
+        /* Done if we have collected a newline */
+        if (buf->len > orig_len && buf->data[buf->len - 1] == '\n')
+            return true;
+
+        /* Make some more room in the buffer, and loop to read more data */
+        enlargeStringInfo(buf, 128);
     }

-    /* No newline at EOF ... so return what we have */
-    return buf.data;
+    /* Check for I/O errors and EOF */
+    if (ferror(stream) || buf->len == orig_len)
+    {
+        /* Discard any data we collected before detecting error */
+        buf->len = orig_len;
+        return false;
+    }
+
+    /* No newline at EOF, but we did collect some data */
+    return true;
 }
diff --git a/src/include/common/string.h b/src/include/common/string.h
index 18aa1dc5aa..50c241a811 100644
--- a/src/include/common/string.h
+++ b/src/include/common/string.h
@@ -10,6 +10,8 @@
 #ifndef COMMON_STRING_H
 #define COMMON_STRING_H

+struct StringInfoData;            /* avoid including stringinfo.h here */
+
 /* functions in src/common/string.c */
 extern bool pg_str_endswith(const char *str, const char *end);
 extern int    strtoint(const char *pg_restrict str, char **pg_restrict endptr,
@@ -19,6 +21,7 @@ extern int    pg_strip_crlf(char *str);

 /* functions in src/common/pg_get_line.c */
 extern char *pg_get_line(FILE *stream);
+extern bool pg_get_line_append(FILE *stream, struct StringInfoData *buf);

 /* functions in src/common/sprompt.c */
 extern char *simple_prompt(const char *prompt, bool echo);
diff --git a/src/interfaces/ecpg/test/pg_regress_ecpg.c b/src/interfaces/ecpg/test/pg_regress_ecpg.c
index 46b9e78fe5..a2d7b70d9a 100644
--- a/src/interfaces/ecpg/test/pg_regress_ecpg.c
+++ b/src/interfaces/ecpg/test/pg_regress_ecpg.c
@@ -19,8 +19,9 @@
 #include "postgres_fe.h"

 #include "pg_regress.h"
+#include "common/string.h"
+#include "lib/stringinfo.h"

-#define LINEBUFSIZE 300

 static void
 ecpg_filter(const char *sourcefile, const char *outfile)
@@ -31,7 +32,7 @@ ecpg_filter(const char *sourcefile, const char *outfile)
      */
     FILE       *s,
                *t;
-    char        linebuf[LINEBUFSIZE];
+    StringInfoData linebuf;

     s = fopen(sourcefile, "r");
     if (!s)
@@ -46,13 +47,14 @@ ecpg_filter(const char *sourcefile, const char *outfile)
         exit(2);
     }

-    while (fgets(linebuf, LINEBUFSIZE, s))
+    initStringInfo(&linebuf);
+
+    while (pg_get_line_append(s, &linebuf))
     {
         /* check for "#line " in the beginning */
-        if (strstr(linebuf, "#line ") == linebuf)
+        if (strstr(linebuf.data, "#line ") == linebuf.data)
         {
-            char       *p = strchr(linebuf, '"');
-            char       *n;
+            char       *p = strchr(linebuf.data, '"');
             int            plen = 1;

             while (*p && (*(p + plen) == '.' || strchr(p + plen, '/') != NULL))
@@ -62,13 +64,15 @@ ecpg_filter(const char *sourcefile, const char *outfile)
             /* plen is one more than the number of . and / characters */
             if (plen > 1)
             {
-                n = (char *) malloc(plen);
-                strlcpy(n, p + 1, plen);
-                replace_string(linebuf, n, "");
+                memmove(p + 1, p + plen, strlen(p + plen) + 1);
+                /* we don't bother to fix up linebuf.len */
             }
         }
-        fputs(linebuf, t);
+        fputs(linebuf.data, t);
+        resetStringInfo(&linebuf);
     }
+
+    pfree(linebuf.data);
     fclose(s);
     fclose(t);
 }
@@ -87,40 +91,42 @@ ecpg_start_test(const char *testname,
     PID_TYPE    pid;
     char        inprg[MAXPGPATH];
     char        insource[MAXPGPATH];
-    char       *outfile_stdout,
+    StringInfoData testname_dash;
+    char        outfile_stdout[MAXPGPATH],
                 expectfile_stdout[MAXPGPATH];
-    char       *outfile_stderr,
+    char        outfile_stderr[MAXPGPATH],
                 expectfile_stderr[MAXPGPATH];
-    char       *outfile_source,
+    char        outfile_source[MAXPGPATH],
                 expectfile_source[MAXPGPATH];
     char        cmd[MAXPGPATH * 3];
-    char       *testname_dash;
     char       *appnameenv;

     snprintf(inprg, sizeof(inprg), "%s/%s", inputdir, testname);
+    snprintf(insource, sizeof(insource), "%s.c", testname);
+
+    initStringInfo(&testname_dash);
+    appendStringInfoString(&testname_dash, testname);
+    replace_string(&testname_dash, "/", "-");

-    testname_dash = strdup(testname);
-    replace_string(testname_dash, "/", "-");
     snprintf(expectfile_stdout, sizeof(expectfile_stdout),
              "%s/expected/%s.stdout",
-             outputdir, testname_dash);
+             outputdir, testname_dash.data);
     snprintf(expectfile_stderr, sizeof(expectfile_stderr),
              "%s/expected/%s.stderr",
-             outputdir, testname_dash);
+             outputdir, testname_dash.data);
     snprintf(expectfile_source, sizeof(expectfile_source),
              "%s/expected/%s.c",
-             outputdir, testname_dash);
-
-    /*
-     * We can use replace_string() here because the replacement string does
-     * not occupy more space than the replaced one.
-     */
-    outfile_stdout = strdup(expectfile_stdout);
-    replace_string(outfile_stdout, "/expected/", "/results/");
-    outfile_stderr = strdup(expectfile_stderr);
-    replace_string(outfile_stderr, "/expected/", "/results/");
-    outfile_source = strdup(expectfile_source);
-    replace_string(outfile_source, "/expected/", "/results/");
+             outputdir, testname_dash.data);
+
+    snprintf(outfile_stdout, sizeof(outfile_stdout),
+             "%s/results/%s.stdout",
+             outputdir, testname_dash.data);
+    snprintf(outfile_stderr, sizeof(outfile_stderr),
+             "%s/results/%s.stderr",
+             outputdir, testname_dash.data);
+    snprintf(outfile_source, sizeof(outfile_source),
+             "%s/results/%s.c",
+             outputdir, testname_dash.data);

     add_stringlist_item(resultfiles, outfile_stdout);
     add_stringlist_item(expectfiles, expectfile_stdout);
@@ -134,18 +140,15 @@ ecpg_start_test(const char *testname,
     add_stringlist_item(expectfiles, expectfile_source);
     add_stringlist_item(tags, "source");

-    snprintf(insource, sizeof(insource), "%s.c", testname);
     ecpg_filter(insource, outfile_source);

-    snprintf(inprg, sizeof(inprg), "%s/%s", inputdir, testname);
-
     snprintf(cmd, sizeof(cmd),
              "\"%s\" >\"%s\" 2>\"%s\"",
              inprg,
              outfile_stdout,
              outfile_stderr);

-    appnameenv = psprintf("PGAPPNAME=ecpg/%s", testname_dash);
+    appnameenv = psprintf("PGAPPNAME=ecpg/%s", testname_dash.data);
     putenv(appnameenv);

     pid = spawn_process(cmd);
@@ -160,10 +163,7 @@ ecpg_start_test(const char *testname,
     unsetenv("PGAPPNAME");
     free(appnameenv);

-    free(testname_dash);
-    free(outfile_stdout);
-    free(outfile_stderr);
-    free(outfile_source);
+    free(testname_dash.data);

     return pid;
 }
diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c
index d82e0189dc..d83442f467 100644
--- a/src/test/regress/pg_regress.c
+++ b/src/test/regress/pg_regress.c
@@ -31,8 +31,10 @@

 #include "common/logging.h"
 #include "common/restricted_token.h"
+#include "common/string.h"
 #include "common/username.h"
 #include "getopt_long.h"
+#include "lib/stringinfo.h"
 #include "libpq/pqcomm.h"        /* needed for UNIXSOCK_PATH() */
 #include "pg_config_paths.h"
 #include "pg_regress.h"
@@ -435,22 +437,32 @@ string_matches_pattern(const char *str, const char *pattern)
 }

 /*
- * Replace all occurrences of a string in a string with a different string.
- * NOTE: Assumes there is enough room in the target buffer!
+ * Replace all occurrences of "replace" in "string" with "replacement".
+ * The StringInfo will be suitably enlarged if necessary.
+ *
+ * Note: this is optimized on the assumption that most calls will find
+ * no more than one occurrence of "replace", and quite likely none.
  */
 void
-replace_string(char *string, const char *replace, const char *replacement)
+replace_string(StringInfo string, const char *replace, const char *replacement)
 {
+    int            pos = 0;
     char       *ptr;

-    while ((ptr = strstr(string, replace)) != NULL)
+    while ((ptr = strstr(string->data + pos, replace)) != NULL)
     {
-        char       *dup = pg_strdup(string);
+        /* Must copy the remainder of the string out of the StringInfo */
+        char       *suffix = pg_strdup(ptr + strlen(replace));

-        strlcpy(string, dup, ptr - string + 1);
-        strcat(string, replacement);
-        strcat(string, dup + (ptr - string) + strlen(replace));
-        free(dup);
+        /* Truncate StringInfo at start of found string ... */
+        string->len = ptr - string->data;
+        /* ... and append the replacement */
+        appendStringInfoString(string, replacement);
+        /* Next search should start after the replacement */
+        pos = string->len;
+        /* Put back the remainder of the string */
+        appendStringInfoString(string, suffix);
+        free(suffix);
     }
 }

@@ -521,7 +533,7 @@ convert_sourcefiles_in(const char *source_subdir, const char *dest_dir, const ch
         char        prefix[MAXPGPATH];
         FILE       *infile,
                    *outfile;
-        char        line[1024];
+        StringInfoData line;

         /* reject filenames not finishing in ".source" */
         if (strlen(*name) < 8)
@@ -551,15 +563,21 @@ convert_sourcefiles_in(const char *source_subdir, const char *dest_dir, const ch
                     progname, destfile, strerror(errno));
             exit(2);
         }
-        while (fgets(line, sizeof(line), infile))
+
+        initStringInfo(&line);
+
+        while (pg_get_line_append(infile, &line))
         {
-            replace_string(line, "@abs_srcdir@", inputdir);
-            replace_string(line, "@abs_builddir@", outputdir);
-            replace_string(line, "@testtablespace@", testtablespace);
-            replace_string(line, "@libdir@", dlpath);
-            replace_string(line, "@DLSUFFIX@", DLSUFFIX);
-            fputs(line, outfile);
+            replace_string(&line, "@abs_srcdir@", inputdir);
+            replace_string(&line, "@abs_builddir@", outputdir);
+            replace_string(&line, "@testtablespace@", testtablespace);
+            replace_string(&line, "@libdir@", dlpath);
+            replace_string(&line, "@DLSUFFIX@", DLSUFFIX);
+            fputs(line.data, outfile);
+            resetStringInfo(&line);
         }
+
+        pfree(line.data);
         fclose(infile);
         fclose(outfile);
     }
diff --git a/src/test/regress/pg_regress.h b/src/test/regress/pg_regress.h
index ee6e0d42f4..726f9c9048 100644
--- a/src/test/regress/pg_regress.h
+++ b/src/test/regress/pg_regress.h
@@ -18,6 +18,8 @@
 #define INVALID_PID INVALID_HANDLE_VALUE
 #endif

+struct StringInfoData;            /* avoid including stringinfo.h here */
+
 /* simple list of strings */
 typedef struct _stringlist
 {
@@ -49,5 +51,6 @@ int            regression_main(int argc, char *argv[],
                             init_function ifunc, test_function tfunc);
 void        add_stringlist_item(_stringlist **listhead, const char *str);
 PID_TYPE    spawn_process(const char *cmdline);
-void        replace_string(char *string, const char *replace, const char *replacement);
+void        replace_string(struct StringInfoData *string,
+                           const char *replace, const char *replacement);
 bool        file_exists(const char *file);

pgsql-hackers by date:

Previous
From: Justin Pryzby
Date:
Subject: Re: v13: show extended stats target in \d
Next
From: Tomas Vondra
Date:
Subject: Re: WIP: BRIN multi-range indexes