to_char() vs. long numeric formatting strings from locale - Mailing list pgsql-bugs

From Tom Lane
Subject to_char() vs. long numeric formatting strings from locale
Date
Msg-id 638232.1776790821@sss.pgh.pa.us
Whole thread
List pgsql-bugs
The numeric variants of to_char() allocate an output buffer of 8 bytes
per format character, reasoning that no format code can produce more
than that much output.  In general this is true, but there is a
potential exception: format codes L, G, and so on emit verbatim copies
of the currency_symbol, thousands_sep, etc strings from the active
LC_NUMERIC locale, and there's not an a-priori upper limit on the
lengths of those strings.  So in principle you could get a buffer
overrun.

I'm not aware of any real-world locales having such strings that
exceed 8 bytes, so it seems like we can close off this risk with
minimal effort by just truncating the locale's strings at 8 bytes,
as attached.

This bug was reported to pgsql-security by Xint Code as a potential
security issue.  However we decided it doesn't seem worth the CVE
treatment, because exploiting it would require getting a malicious
locale definition installed underneath a PG server.  That's a big ask
considering that locale definitions normally come from platform-owned
directories.  (On some platforms you might be able to point a program
at some other locale data source using an environment variable ... but
if you can control the server's environment then there are far more
powerful attacks available, eg via changing PATH or LD_LIBRARY_PATH.)

Despite that, it seems worth fixing as a run-of-the-mill bug.
Any objections to the attached?

            regards, tom lane

From efccedf261ab519b2b5ebe9767d0d78a35be7727 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 8 Apr 2026 17:09:16 -0400
Subject: [PATCH v1] Guard against overly-long numeric formatting symbols from
 locale.

to_char() allocates its output buffer with 8 bytes per formatting
code in the pattern.  If the locale's currency symbol, thousands
separator, or decimal or sign symbol is more than 8 bytes long,
in principle we could overrun the output buffer.  No such locales
exist in the real world, so it seems sufficient to truncate the
symbol if we do see it's too long.

Reported-by: Xint Code
Author: Tom Lane <tgl@sss.pgh.pa.us>
Backpatch-through: 14
---
 src/backend/utils/adt/formatting.c | 61 +++++++++++++++++++++---------
 1 file changed, 43 insertions(+), 18 deletions(-)

diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 0716aff22b6..9a8c99336b5 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1114,6 +1114,7 @@ static void NUM_prepare_locale(NUMProc *Np);
 static const char *get_last_relevant_decnum(const char *num);
 static void NUM_numpart_from_char(NUMProc *Np, int id, size_t input_len);
 static void NUM_numpart_to_char(NUMProc *Np, int id);
+static void NUM_add_locale_symbol(NUMProc *Np, const char *pattern);
 static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
                            char *number, size_t input_len, int to_char_out_pre_spaces,
                            int sign, bool is_to_char, Oid collid);
@@ -5568,11 +5569,9 @@ NUM_numpart_to_char(NUMProc *Np, int id)
         {
             if (Np->Num->lsign == NUM_LSIGN_PRE)
             {
-                if (Np->sign == '-')
-                    strcpy(Np->inout_p, Np->L_negative_sign);
-                else
-                    strcpy(Np->inout_p, Np->L_positive_sign);
-                Np->inout_p += strlen(Np->inout_p);
+                NUM_add_locale_symbol(Np, (Np->sign == '-') ?
+                                      Np->L_negative_sign :
+                                      Np->L_positive_sign);
                 Np->sign_wrote = true;
             }
         }
@@ -5637,8 +5636,7 @@ NUM_numpart_to_char(NUMProc *Np, int id)
             {
                 if (!Np->last_relevant || *Np->last_relevant != '.')
                 {
-                    strcpy(Np->inout_p, Np->decimal);    /* Write DEC/D */
-                    Np->inout_p += strlen(Np->inout_p);
+                    NUM_add_locale_symbol(Np, Np->decimal); /* Write DEC/D */
                 }

                 /*
@@ -5647,8 +5645,7 @@ NUM_numpart_to_char(NUMProc *Np, int id)
                 else if (IS_FILLMODE(Np->Num) &&
                          Np->last_relevant && *Np->last_relevant == '.')
                 {
-                    strcpy(Np->inout_p, Np->decimal);    /* Write DEC/D */
-                    Np->inout_p += strlen(Np->inout_p);
+                    NUM_add_locale_symbol(Np, Np->decimal); /* Write DEC/D */
                 }
             }
             else
@@ -5706,11 +5703,9 @@ NUM_numpart_to_char(NUMProc *Np, int id)
             }
             else if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_POST)
             {
-                if (Np->sign == '-')
-                    strcpy(Np->inout_p, Np->L_negative_sign);
-                else
-                    strcpy(Np->inout_p, Np->L_positive_sign);
-                Np->inout_p += strlen(Np->inout_p);
+                NUM_add_locale_symbol(Np, (Np->sign == '-') ?
+                                      Np->L_negative_sign :
+                                      Np->L_positive_sign);
             }
         }
     }
@@ -5718,6 +5713,23 @@ NUM_numpart_to_char(NUMProc *Np, int id)
     ++Np->num_curr;
 }

+/*
+ * Append locale-specific symbol to Np->inout.
+ * Note we don't null-terminate the output
+ */
+static void
+NUM_add_locale_symbol(NUMProc *Np, const char *pattern)
+{
+    size_t        pattern_len = strlen(pattern);
+
+    /* Truncate symbol if it's potentially too long */
+    if (unlikely(pattern_len > NUM_MAX_ITEM_SIZ))
+        pattern_len = pg_mbcliplen(pattern, pattern_len,
+                                   NUM_MAX_ITEM_SIZ);
+    memcpy(Np->inout_p, pattern, pattern_len);
+    Np->inout_p += pattern_len;
+}
+
 /*
  * Skip over "n" input characters, but only if they aren't numeric data
  */
@@ -5965,6 +5977,10 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
                     pattern_len = strlen(pattern);
                     if (Np->is_to_char)
                     {
+                        /* Truncate symbol if it's potentially too long */
+                        if (unlikely(pattern_len > NUM_MAX_ITEM_SIZ))
+                            pattern_len = pg_mbcliplen(pattern, pattern_len,
+                                                       NUM_MAX_ITEM_SIZ);
                         if (!Np->num_in)
                         {
                             if (IS_FILLMODE(Np->Num))
@@ -5972,19 +5988,21 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
                             else
                             {
                                 /* just in case there are MB chars */
-                                pattern_len = pg_mbstrlen(pattern);
+                                pattern_len = pg_mbstrlen_with_len(pattern,
+                                                                   pattern_len);
                                 memset(Np->inout_p, ' ', pattern_len);
                                 Np->inout_p += pattern_len - 1;
                             }
                         }
                         else
                         {
-                            strcpy(Np->inout_p, pattern);
+                            memcpy(Np->inout_p, pattern, pattern_len);
                             Np->inout_p += pattern_len - 1;
                         }
                     }
                     else
                     {
+                        /* Here we do not truncate the symbol ... */
                         if (!Np->num_in)
                         {
                             if (IS_FILLMODE(Np->Num))
@@ -6009,11 +6027,18 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
                     pattern = Np->L_currency_symbol;
                     if (Np->is_to_char)
                     {
-                        strcpy(Np->inout_p, pattern);
-                        Np->inout_p += strlen(pattern) - 1;
+                        /* Truncate symbol if it's potentially too long */
+                        pattern_len = strlen(pattern);
+                        if (unlikely(pattern_len > NUM_MAX_ITEM_SIZ))
+                            pattern_len = pg_mbcliplen(pattern, pattern_len,
+                                                       NUM_MAX_ITEM_SIZ);
+
+                        memcpy(Np->inout_p, pattern, pattern_len);
+                        Np->inout_p += pattern_len - 1;
                     }
                     else
                     {
+                        /* Here we do not truncate the symbol ... */
                         NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len);
                         continue;
                     }
--
2.43.7


pgsql-bugs by date:

Previous
From: Srinath Reddy Sadipiralla
Date:
Subject: Re: Bug in CREATE TABLE .. LIKE .. INCLUDING STATISTICS?
Next
From: Tom Lane
Date:
Subject: Potential buffer overrun in spell.c's CheckAffix()