Better sanity checking for message length words - Mailing list pgsql-hackers

From Tom Lane
Subject Better sanity checking for message length words
Date
Msg-id 2003757.1619373089@sss.pgh.pa.us
Whole thread Raw
Responses Re: Better sanity checking for message length words  (Aleksander Alekseev <aleksander@timescale.com>)
List pgsql-hackers
We had a report [1] of a case where a broken client application
sent some garbage to the server, which then hung up because it
misinterpreted the garbage as a very long message length, and was
sitting waiting for data that would never arrive.  There is already
a sanity check on the message type byte in postgres.c's SocketBackend
(which the trouble case accidentally got past because 'S' is a valid
type code); but the only check on the message length is that it be
at least 4.

pq_getmessage() does have the ability to enforce an upper limit on
message length, but we only use that capability for authentication
messages, and not entirely consistently even there.

Meanwhile on the client side, libpq has had simple message-length
sanity checking for ages: it disbelieves message lengths greater
than 30000 bytes unless the message type is one of a short list
of types that can be long.

So the attached proposed patch changes things to make it required
not optional for callers of pq_getmessage to provide an upper length
bound, and installs the same sort of short-vs-long message heuristic
as libpq has in the server.  This is also a good opportunity for
other callers to absorb the lesson SocketBackend learned many years
ago: we should validate the message type code *before* believing
anything about the message length.  All of this is just heuristic
of course, but I think it makes for a substantial reduction in the
trouble surface.

Given the small number of complaints to date, I'm hesitant to
back-patch this: if there's anybody out there with valid use for
long messages that I didn't think should be long, this might break
things for them.  But I think it'd be reasonable to sneak it into
v14, since we've not started beta yet.

Thoughts?

            regards, tom lane

[1] https://www.postgresql.org/message-id/flat/YIKCCXcEozx9iiBU%40c720-r368166.fritz.box

diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 0813424768..fdf57f1556 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -265,6 +265,7 @@ CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
                 {
                     /* Try to receive another message */
                     int            mtype;
+                    int            maxmsglen;

             readmessage:
                     HOLD_CANCEL_INTERRUPTS();
@@ -274,11 +275,33 @@ CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
                         ereport(ERROR,
                                 (errcode(ERRCODE_CONNECTION_FAILURE),
                                  errmsg("unexpected EOF on client connection with an open transaction")));
-                    if (pq_getmessage(cstate->fe_msgbuf, 0))
+                    /* Validate message type and set packet size limit */
+                    switch (mtype)
+                    {
+                        case 'd':    /* CopyData */
+                            maxmsglen = PQ_LARGE_MESSAGE_LIMIT;
+                            break;
+                        case 'c':    /* CopyDone */
+                        case 'f':    /* CopyFail */
+                        case 'H':    /* Flush */
+                        case 'S':    /* Sync */
+                            maxmsglen = PQ_SMALL_MESSAGE_LIMIT;
+                            break;
+                        default:
+                            ereport(ERROR,
+                                    (errcode(ERRCODE_PROTOCOL_VIOLATION),
+                                     errmsg("unexpected message type 0x%02X during COPY from stdin",
+                                            mtype)));
+                            maxmsglen = 0;    /* keep compiler quiet */
+                            break;
+                    }
+                    /* Now collect the message body */
+                    if (pq_getmessage(cstate->fe_msgbuf, maxmsglen))
                         ereport(ERROR,
                                 (errcode(ERRCODE_CONNECTION_FAILURE),
                                  errmsg("unexpected EOF on client connection with an open transaction")));
                     RESUME_CANCEL_INTERRUPTS();
+                    /* ... and process it */
                     switch (mtype)
                     {
                         case 'd':    /* CopyData */
@@ -304,11 +327,7 @@ CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
                              */
                             goto readmessage;
                         default:
-                            ereport(ERROR,
-                                    (errcode(ERRCODE_PROTOCOL_VIOLATION),
-                                     errmsg("unexpected message type 0x%02X during COPY from stdin",
-                                            mtype)));
-                            break;
+                            Assert(false);    /* NOT REACHED */
                     }
                 }
                 avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
diff --git a/src/backend/libpq/auth.c b/src/backend/libpq/auth.c
index 27865b14a0..45a91235a4 100644
--- a/src/backend/libpq/auth.c
+++ b/src/backend/libpq/auth.c
@@ -210,6 +210,7 @@ static int    PerformRadiusTransaction(const char *server, const char *secret, cons

 /*
  * Maximum accepted size of GSS and SSPI authentication tokens.
+ * We also use this as a limit on ordinary password packet lengths.
  *
  * Kerberos tickets are usually quite small, but the TGTs issued by Windows
  * domain controllers include an authorization field known as the Privilege
@@ -724,7 +725,7 @@ recv_password_packet(Port *port)
     }

     initStringInfo(&buf);
-    if (pq_getmessage(&buf, 0)) /* receive password */
+    if (pq_getmessage(&buf, PG_MAX_AUTH_TOKEN_LENGTH))    /* receive password */
     {
         /* EOF - pq_getmessage already logged a suitable message */
         pfree(buf.data);
diff --git a/src/backend/libpq/pqcomm.c b/src/backend/libpq/pqcomm.c
index 8066ee1d1e..b9ccd4473f 100644
--- a/src/backend/libpq/pqcomm.c
+++ b/src/backend/libpq/pqcomm.c
@@ -1203,7 +1203,7 @@ pq_is_reading_msg(void)
  *        is removed.  Also, s->cursor is initialized to zero for convenience
  *        in scanning the message contents.
  *
- *        If maxlen is not zero, it is an upper limit on the length of the
+ *        maxlen is the upper limit on the length of the
  *        message we are willing to accept.  We abort the connection (by
  *        returning EOF) if client tries to send more than that.
  *
@@ -1230,8 +1230,7 @@ pq_getmessage(StringInfo s, int maxlen)

     len = pg_ntoh32(len);

-    if (len < 4 ||
-        (maxlen > 0 && len > maxlen))
+    if (len < 4 || len > maxlen)
     {
         ereport(COMMERROR,
                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 52fe9aba66..6fefc3bedc 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -1704,6 +1704,7 @@ static void
 ProcessRepliesIfAny(void)
 {
     unsigned char firstchar;
+    int            maxmsglen;
     int            r;
     bool        received = false;

@@ -1733,9 +1734,28 @@ ProcessRepliesIfAny(void)
             break;
         }

+        /* Validate message type and set packet size limit */
+        switch (firstchar)
+        {
+            case 'd':
+                maxmsglen = PQ_LARGE_MESSAGE_LIMIT;
+                break;
+            case 'c':
+            case 'X':
+                maxmsglen = PQ_SMALL_MESSAGE_LIMIT;
+                break;
+            default:
+                ereport(FATAL,
+                        (errcode(ERRCODE_PROTOCOL_VIOLATION),
+                         errmsg("invalid standby message type \"%c\"",
+                                firstchar)));
+                maxmsglen = 0;    /* keep compiler quiet */
+                break;
+        }
+
         /* Read the message contents */
         resetStringInfo(&reply_message);
-        if (pq_getmessage(&reply_message, 0))
+        if (pq_getmessage(&reply_message, maxmsglen))
         {
             ereport(COMMERROR,
                     (errcode(ERRCODE_PROTOCOL_VIOLATION),
@@ -1743,7 +1763,7 @@ ProcessRepliesIfAny(void)
             proc_exit(0);
         }

-        /* Handle the very limited subset of commands expected in this phase */
+        /* ... and process it */
         switch (firstchar)
         {
                 /*
@@ -1776,10 +1796,7 @@ ProcessRepliesIfAny(void)
                 proc_exit(0);

             default:
-                ereport(FATAL,
-                        (errcode(ERRCODE_PROTOCOL_VIOLATION),
-                         errmsg("invalid standby message type \"%c\"",
-                                firstchar)));
+                Assert(false);    /* NOT REACHED */
         }
     }

diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 1216a2b397..2d6d145ecc 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -343,6 +343,7 @@ static int
 SocketBackend(StringInfo inBuf)
 {
     int            qtype;
+    int            maxmsglen;

     /*
      * Get message type code from the frontend.
@@ -375,7 +376,9 @@ SocketBackend(StringInfo inBuf)
     /*
      * Validate message type code before trying to read body; if we have lost
      * sync, better to say "command unknown" than to run out of memory because
-     * we used garbage as a length word.
+     * we used garbage as a length word.  We can also select a type-dependent
+     * limit on what a sane length word could be.  (The limit could be chosen
+     * more granularly, but it's not clear it's worth fussing over.)
      *
      * This also gives us a place to set the doing_extended_query_message flag
      * as soon as possible.
@@ -383,28 +386,37 @@ SocketBackend(StringInfo inBuf)
     switch (qtype)
     {
         case 'Q':                /* simple query */
+            maxmsglen = PQ_LARGE_MESSAGE_LIMIT;
             doing_extended_query_message = false;
             break;

         case 'F':                /* fastpath function call */
+            maxmsglen = PQ_LARGE_MESSAGE_LIMIT;
             doing_extended_query_message = false;
             break;

         case 'X':                /* terminate */
+            maxmsglen = PQ_SMALL_MESSAGE_LIMIT;
             doing_extended_query_message = false;
             ignore_till_sync = false;
             break;

         case 'B':                /* bind */
+        case 'P':                /* parse */
+            maxmsglen = PQ_LARGE_MESSAGE_LIMIT;
+            doing_extended_query_message = true;
+            break;
+
         case 'C':                /* close */
         case 'D':                /* describe */
         case 'E':                /* execute */
         case 'H':                /* flush */
-        case 'P':                /* parse */
+            maxmsglen = PQ_SMALL_MESSAGE_LIMIT;
             doing_extended_query_message = true;
             break;

         case 'S':                /* sync */
+            maxmsglen = PQ_SMALL_MESSAGE_LIMIT;
             /* stop any active skip-till-Sync */
             ignore_till_sync = false;
             /* mark not-extended, so that a new error doesn't begin skip */
@@ -412,8 +424,13 @@ SocketBackend(StringInfo inBuf)
             break;

         case 'd':                /* copy data */
+            maxmsglen = PQ_LARGE_MESSAGE_LIMIT;
+            doing_extended_query_message = false;
+            break;
+
         case 'c':                /* copy done */
         case 'f':                /* copy fail */
+            maxmsglen = PQ_SMALL_MESSAGE_LIMIT;
             doing_extended_query_message = false;
             break;

@@ -427,6 +444,7 @@ SocketBackend(StringInfo inBuf)
             ereport(FATAL,
                     (errcode(ERRCODE_PROTOCOL_VIOLATION),
                      errmsg("invalid frontend message type %d", qtype)));
+            maxmsglen = 0;        /* keep compiler quiet */
             break;
     }

@@ -435,7 +453,7 @@ SocketBackend(StringInfo inBuf)
      * after the type code; we can read the message contents independently of
      * the type.
      */
-    if (pq_getmessage(inBuf, 0))
+    if (pq_getmessage(inBuf, maxmsglen))
         return EOF;            /* suitable message already logged */
     RESUME_CANCEL_INTERRUPTS();

diff --git a/src/include/libpq/libpq.h b/src/include/libpq/libpq.h
index 3ebbc8d665..a2be9a9bfe 100644
--- a/src/include/libpq/libpq.h
+++ b/src/include/libpq/libpq.h
@@ -21,6 +21,15 @@
 #include "storage/latch.h"


+/*
+ * Callers of pq_getmessage() must supply a maximum expected message size.
+ * By convention, if there's not any specific reason to use another value,
+ * use PQ_SMALL_MESSAGE_LIMIT for messages that shouldn't be too long, and
+ * PQ_LARGE_MESSAGE_LIMIT for messages that can be long.
+ */
+#define PQ_SMALL_MESSAGE_LIMIT    30000    /* frontend libpq uses this value too */
+#define PQ_LARGE_MESSAGE_LIMIT    MaxAllocSize
+
 typedef struct
 {
     void        (*comm_reset) (void);

pgsql-hackers by date:

Previous
From: Julien Rouhaud
Date:
Subject: Re: compute_query_id and pg_stat_statements
Next
From: Stephen Frost
Date:
Subject: Re: Allowing to create LEAKPROOF functions to non-superuser