Re: Lift line-length limit for pg_service.conf - Mailing list pgsql-hackers

From Tom Lane
Subject Re: Lift line-length limit for pg_service.conf
Date
Msg-id 1384110.1600823668@sss.pgh.pa.us
Whole thread Raw
In response to Re: Lift line-length limit for pg_service.conf  (Tom Lane <tgl@sss.pgh.pa.us>)
Responses Re: Lift line-length limit for pg_service.conf
List pgsql-hackers
I wrote:
> Yeah.  In a quick scan, it appears that there is only one caller that
> tries to save the result directly.  So I considered making that caller
> do a pstrdup and eliminating the extra thrashing in t_readline itself.
> But it seemed too fragile; somebody would get it wrong and then have
> excess space consumption for their dictionary.

I had a better idea: if we get rid of t_readline() itself, which has
been deprecated for years anyway, we can have the calling layer
tsearch_readline_xxx maintain a StringInfo across the whole file
read process and thus get rid of alloc/free cycles for the StringInfo.

However ... while working on that, I realized that the usage of
the "curline" field in that code is completely unsafe.  We save
a pointer to the result of tsearch_readline() for possible use
by the error context callback, *but the caller is likely to free that
string at some point*.  So there is a window where an error would
result in trying to print already-freed data.

It looks like all of the core-code dictionaries free the result
string at the bottoms of their loops, so that the window for
trouble is pretty much empty.  But contrib/dict_xsyn doesn't
do it like that, and so it's surely at risk.  Any external
dictionaries that have copied that code would also be at risk.

So the attached adds a pstrdup/pfree to ensure that "curline"
has its own storage, putting us right back at two palloc/pfree
cycles per line.  I don't think there's a lot of choice though;
in fact, I'm leaning to the idea that we need to back-patch
that part of this.  The odds of trouble in a production build
probably aren't high, but still...

            regards, tom lane

diff --git a/src/backend/tsearch/dict_thesaurus.c b/src/backend/tsearch/dict_thesaurus.c
index cb0835982d..64c979086d 100644
--- a/src/backend/tsearch/dict_thesaurus.c
+++ b/src/backend/tsearch/dict_thesaurus.c
@@ -286,11 +286,6 @@ thesaurusRead(const char *filename, DictThesaurus *d)
                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
                      errmsg("unexpected end of line")));

-        /*
-         * Note: currently, tsearch_readline can't return lines exceeding 4KB,
-         * so overflow of the word counts is impossible.  But that may not
-         * always be true, so let's check.
-         */
         if (nwrd != (uint16) nwrd || posinsubst != (uint16) posinsubst)
             ereport(ERROR,
                     (errcode(ERRCODE_CONFIG_FILE_ERROR),
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index a916dd6cb6..c85939631e 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -14,6 +14,7 @@
 #include "postgres.h"

 #include "catalog/pg_collation.h"
+#include "common/string.h"
 #include "storage/fd.h"
 #include "tsearch/ts_locale.h"
 #include "tsearch/ts_public.h"
@@ -128,6 +129,7 @@ tsearch_readline_begin(tsearch_readline_state *stp,
         return false;
     stp->filename = filename;
     stp->lineno = 0;
+    initStringInfo(&stp->buf);
     stp->curline = NULL;
     /* Setup error traceback support for ereport() */
     stp->cb.callback = tsearch_readline_callback;
@@ -146,11 +148,44 @@ char *
 tsearch_readline(tsearch_readline_state *stp)
 {
     char       *result;
+    int            len;

+    /* Advance line number to use in error reports */
     stp->lineno++;
-    stp->curline = NULL;
-    result = t_readline(stp->fp);
-    stp->curline = result;
+
+    /* Clear curline, it's no longer relevant */
+    if (stp->curline)
+    {
+        pfree(stp->curline);
+        stp->curline = NULL;
+    }
+
+    /* Collect next line, if there is one */
+    if (!pg_get_line_buf(stp->fp, &stp->buf))
+        return NULL;
+
+    /* Make sure the input is valid UTF-8 */
+    len = stp->buf.len;
+    (void) pg_verify_mbstr(PG_UTF8, stp->buf.data, len, false);
+
+    /* And convert */
+    result = pg_any_to_server(stp->buf.data, len, PG_UTF8);
+    if (result == stp->buf.data)
+    {
+        /*
+         * Conversion didn't pstrdup, so we must.  We can use the length of
+         * the original string, because no conversion was done.
+         */
+        result = pnstrdup(result, len);
+    }
+
+    /*
+     * Save a copy of the line for possible use in error reports.  (We cannot
+     * just save "result", since it's likely to get pfree'd at some point by
+     * the caller; an error after that would try to access freed data.)
+     */
+    stp->curline = pstrdup(result);
+
     return result;
 }

@@ -160,7 +195,16 @@ tsearch_readline(tsearch_readline_state *stp)
 void
 tsearch_readline_end(tsearch_readline_state *stp)
 {
+    /* Suppress use of curline in any error reported below */
+    if (stp->curline)
+    {
+        pfree(stp->curline);
+        stp->curline = NULL;
+    }
+
+    pfree(stp->buf.data);
     FreeFile(stp->fp);
+
     /* Pop the error context stack */
     error_context_stack = stp->cb.previous;
 }
@@ -176,8 +220,7 @@ tsearch_readline_callback(void *arg)

     /*
      * We can't include the text of the config line for errors that occur
-     * during t_readline() itself.  This is only partly a consequence of our
-     * arms-length use of that routine: the major cause of such errors is
+     * during tsearch_readline() itself.  The major cause of such errors is
      * encoding violations, and we daren't try to print error messages
      * containing badly-encoded data.
      */
@@ -193,43 +236,6 @@ tsearch_readline_callback(void *arg)
 }


-/*
- * Read the next line from a tsearch data file (expected to be in UTF-8), and
- * convert it to database encoding if needed. The returned string is palloc'd.
- * NULL return means EOF.
- *
- * Note: direct use of this function is now deprecated.  Go through
- * tsearch_readline() to provide better error reporting.
- */
-char *
-t_readline(FILE *fp)
-{
-    int            len;
-    char       *recoded;
-    char        buf[4096];        /* lines must not be longer than this */
-
-    if (fgets(buf, sizeof(buf), fp) == NULL)
-        return NULL;
-
-    len = strlen(buf);
-
-    /* Make sure the input is valid UTF-8 */
-    (void) pg_verify_mbstr(PG_UTF8, buf, len, false);
-
-    /* And convert */
-    recoded = pg_any_to_server(buf, len, PG_UTF8);
-    if (recoded == buf)
-    {
-        /*
-         * conversion didn't pstrdup, so we must. We can use the length of the
-         * original string, because no conversion was done.
-         */
-        recoded = pnstrdup(recoded, len);
-    }
-
-    return recoded;
-}
-
 /*
  * lowerstr --- fold null-terminated string to lower case
  *
diff --git a/src/include/tsearch/ts_locale.h b/src/include/tsearch/ts_locale.h
index cc4bd9ab20..3c2e635f86 100644
--- a/src/include/tsearch/ts_locale.h
+++ b/src/include/tsearch/ts_locale.h
@@ -15,6 +15,7 @@
 #include <ctype.h>
 #include <limits.h>

+#include "lib/stringinfo.h"
 #include "mb/pg_wchar.h"
 #include "utils/pg_locale.h"

@@ -33,7 +34,8 @@ typedef struct
     FILE       *fp;
     const char *filename;
     int            lineno;
-    char       *curline;
+    StringInfoData buf;            /* current input line, in UTF-8 */
+    char       *curline;        /* input line in DB's encoding, or NULL */
     ErrorContextCallback cb;
 } tsearch_readline_state;

@@ -57,6 +59,4 @@ extern bool tsearch_readline_begin(tsearch_readline_state *stp,
 extern char *tsearch_readline(tsearch_readline_state *stp);
 extern void tsearch_readline_end(tsearch_readline_state *stp);

-extern char *t_readline(FILE *fp);
-
 #endif                            /* __TSLOCALE_H__ */

pgsql-hackers by date:

Previous
From: "tsunakawa.takay@fujitsu.com"
Date:
Subject: RE: Global snapshots
Next
From: Tom Lane
Date:
Subject: Re: new heapcheck contrib module