Re: [BUGS] BUG #14512: Backslashes in LIKE - Mailing list pgsql-bugs

From Tom Lane
Subject Re: [BUGS] BUG #14512: Backslashes in LIKE
Date
Msg-id 10287.1485286334@sss.pgh.pa.us
Whole thread Raw
In response to Re: [BUGS] BUG #14512: Backslashes in LIKE  ("David G. Johnston" <david.g.johnston@gmail.com>)
List pgsql-bugs
"David G. Johnston" <david.g.johnston@gmail.com> writes:
> Then consider a feature request that a malformed pattern be detected and
> fail independent of the data being checked. Such non-deterministic failure
> is at least a POLA violation and makes what should be a basically
> compile-time error into a run-time one.

Meh.  We could do something like the attached, but I think it would be a
net performance drag in practically all cases, and I doubt it is worth it.

            regards, tom lane

diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 91fe109..e94e64a 100644
*** a/src/backend/utils/adt/like.c
--- b/src/backend/utils/adt/like.c
*************** SB_lower_char(unsigned char c, pg_locale
*** 146,155 ****
--- 146,181 ----

  #include "like_match.c"

+ /*
+  * Check that pattern is legal (which reduces to checking that there is no
+  * backslash at the end).  We do this separately so that we'll throw an error
+  * for any invalid pattern, even if the matching logic doesn't ever examine
+  * all of it.  In consequence, the matching logic need not defend itself
+  * against invalid patterns.
+  */
+ static inline void
+ check_like_pattern(char *p, int plen)
+ {
+     /* This can be stupid even in multibyte encodings. */
+     while (plen-- > 0)
+     {
+         if (*p++ == '\\')
+         {
+             if (unlikely(plen == 0))
+                 ereport(ERROR,
+                         (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
+                  errmsg("LIKE pattern must not end with escape character")));
+             /* otherwise ignore next character, even if it's backslash */
+             p++, plen--;
+         }
+     }
+ }
+
  /* Generic for all cases not requiring inline case-folding */
  static inline int
  GenericMatchText(char *s, int slen, char *p, int plen)
  {
+     check_like_pattern(p, plen);
      if (pg_database_encoding_max_length() == 1)
          return SB_MatchText(s, slen, p, plen, 0, true);
      else if (GetDatabaseEncoding() == PG_UTF8)
*************** Generic_Text_IC_like(text *str, text *pa
*** 179,184 ****
--- 205,211 ----
                                                      PointerGetDatum(pat)));
          p = VARDATA(pat);
          plen = (VARSIZE(pat) - VARHDRSZ);
+         check_like_pattern(p, plen);
          str = DatumGetTextP(DirectFunctionCall1Coll(lower, collation,
                                                      PointerGetDatum(str)));
          s = VARDATA(str);
*************** Generic_Text_IC_like(text *str, text *pa
*** 217,222 ****
--- 244,250 ----

          p = VARDATA_ANY(pat);
          plen = VARSIZE_ANY_EXHDR(pat);
+         check_like_pattern(p, plen);
          s = VARDATA_ANY(str);
          slen = VARSIZE_ANY_EXHDR(str);
          return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
*************** bytealike(PG_FUNCTION_ARGS)
*** 326,331 ****
--- 354,360 ----
      slen = VARSIZE_ANY_EXHDR(str);
      p = VARDATA_ANY(pat);
      plen = VARSIZE_ANY_EXHDR(pat);
+     check_like_pattern(p, plen);

      result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);

*************** byteanlike(PG_FUNCTION_ARGS)
*** 347,352 ****
--- 376,382 ----
      slen = VARSIZE_ANY_EXHDR(str);
      p = VARDATA_ANY(pat);
      plen = VARSIZE_ANY_EXHDR(pat);
+     check_like_pattern(p, plen);

      result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);

diff --git a/src/backend/utils/adt/like_match.c b/src/backend/utils/adt/like_match.c
index 1c37229..a69b41b 100644
*** a/src/backend/utils/adt/like_match.c
--- b/src/backend/utils/adt/like_match.c
*************** MatchText(char *t, int tlen, char *p, in
*** 99,110 ****
          if (*p == '\\')
          {
              /* Next pattern byte must match literally, whatever it is */
              NextByte(p, plen);
-             /* ... and there had better be one, per SQL standard */
-             if (plen <= 0)
-                 ereport(ERROR,
-                         (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
-                  errmsg("LIKE pattern must not end with escape character")));
              if (GETCHAR(*p) != GETCHAR(*t))
                  return LIKE_FALSE;
          }
--- 99,106 ----
          if (*p == '\\')
          {
              /* Next pattern byte must match literally, whatever it is */
+             /* (and check_like_pattern() checked that there is one) */
              NextByte(p, plen);
              if (GETCHAR(*p) != GETCHAR(*t))
                  return LIKE_FALSE;
          }
*************** MatchText(char *t, int tlen, char *p, in
*** 160,172 ****
               * end of the text.
               */
              if (*p == '\\')
!             {
!                 if (plen < 2)
!                     ereport(ERROR,
!                             (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
!                              errmsg("LIKE pattern must not end with escape character")));
!                 firstpat = GETCHAR(p[1]);
!             }
              else
                  firstpat = GETCHAR(*p);

--- 156,162 ----
               * end of the text.
               */
              if (*p == '\\')
!                 firstpat = GETCHAR(p[1]);        /* we know this is safe */
              else
                  firstpat = GETCHAR(*p);


-- 
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs

pgsql-bugs by date:

Previous
From: "David G. Johnston"
Date:
Subject: Re: [BUGS] BUG #14512: Backslashes in LIKE
Next
From: shawn.inder@waltzapp.com
Date:
Subject: [BUGS] BUG #14513: Missing column names when viewing tables