Re: [BUGS] BUG #14512: Backslashes in LIKE - Mailing list pgsql-bugs
| From | Tom Lane |
|---|---|
| Subject | Re: [BUGS] BUG #14512: Backslashes in LIKE |
| Date | |
| Msg-id | 10287.1485286334@sss.pgh.pa.us Whole thread Raw |
| In response to | Re: [BUGS] BUG #14512: Backslashes in LIKE ("David G. Johnston" <david.g.johnston@gmail.com>) |
| List | pgsql-bugs |
"David G. Johnston" <david.g.johnston@gmail.com> writes:
> Then consider a feature request that a malformed pattern be detected and
> fail independent of the data being checked. Such non-deterministic failure
> is at least a POLA violation and makes what should be a basically
> compile-time error into a run-time one.
Meh. We could do something like the attached, but I think it would be a
net performance drag in practically all cases, and I doubt it is worth it.
regards, tom lane
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 91fe109..e94e64a 100644
*** a/src/backend/utils/adt/like.c
--- b/src/backend/utils/adt/like.c
*************** SB_lower_char(unsigned char c, pg_locale
*** 146,155 ****
--- 146,181 ----
#include "like_match.c"
+ /*
+ * Check that pattern is legal (which reduces to checking that there is no
+ * backslash at the end). We do this separately so that we'll throw an error
+ * for any invalid pattern, even if the matching logic doesn't ever examine
+ * all of it. In consequence, the matching logic need not defend itself
+ * against invalid patterns.
+ */
+ static inline void
+ check_like_pattern(char *p, int plen)
+ {
+ /* This can be stupid even in multibyte encodings. */
+ while (plen-- > 0)
+ {
+ if (*p++ == '\\')
+ {
+ if (unlikely(plen == 0))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
+ errmsg("LIKE pattern must not end with escape character")));
+ /* otherwise ignore next character, even if it's backslash */
+ p++, plen--;
+ }
+ }
+ }
+
/* Generic for all cases not requiring inline case-folding */
static inline int
GenericMatchText(char *s, int slen, char *p, int plen)
{
+ check_like_pattern(p, plen);
if (pg_database_encoding_max_length() == 1)
return SB_MatchText(s, slen, p, plen, 0, true);
else if (GetDatabaseEncoding() == PG_UTF8)
*************** Generic_Text_IC_like(text *str, text *pa
*** 179,184 ****
--- 205,211 ----
PointerGetDatum(pat)));
p = VARDATA(pat);
plen = (VARSIZE(pat) - VARHDRSZ);
+ check_like_pattern(p, plen);
str = DatumGetTextP(DirectFunctionCall1Coll(lower, collation,
PointerGetDatum(str)));
s = VARDATA(str);
*************** Generic_Text_IC_like(text *str, text *pa
*** 217,222 ****
--- 244,250 ----
p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat);
+ check_like_pattern(p, plen);
s = VARDATA_ANY(str);
slen = VARSIZE_ANY_EXHDR(str);
return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
*************** bytealike(PG_FUNCTION_ARGS)
*** 326,331 ****
--- 354,360 ----
slen = VARSIZE_ANY_EXHDR(str);
p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat);
+ check_like_pattern(p, plen);
result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
*************** byteanlike(PG_FUNCTION_ARGS)
*** 347,352 ****
--- 376,382 ----
slen = VARSIZE_ANY_EXHDR(str);
p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat);
+ check_like_pattern(p, plen);
result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
diff --git a/src/backend/utils/adt/like_match.c b/src/backend/utils/adt/like_match.c
index 1c37229..a69b41b 100644
*** a/src/backend/utils/adt/like_match.c
--- b/src/backend/utils/adt/like_match.c
*************** MatchText(char *t, int tlen, char *p, in
*** 99,110 ****
if (*p == '\\')
{
/* Next pattern byte must match literally, whatever it is */
NextByte(p, plen);
- /* ... and there had better be one, per SQL standard */
- if (plen <= 0)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
- errmsg("LIKE pattern must not end with escape character")));
if (GETCHAR(*p) != GETCHAR(*t))
return LIKE_FALSE;
}
--- 99,106 ----
if (*p == '\\')
{
/* Next pattern byte must match literally, whatever it is */
+ /* (and check_like_pattern() checked that there is one) */
NextByte(p, plen);
if (GETCHAR(*p) != GETCHAR(*t))
return LIKE_FALSE;
}
*************** MatchText(char *t, int tlen, char *p, in
*** 160,172 ****
* end of the text.
*/
if (*p == '\\')
! {
! if (plen < 2)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
! errmsg("LIKE pattern must not end with escape character")));
! firstpat = GETCHAR(p[1]);
! }
else
firstpat = GETCHAR(*p);
--- 156,162 ----
* end of the text.
*/
if (*p == '\\')
! firstpat = GETCHAR(p[1]); /* we know this is safe */
else
firstpat = GETCHAR(*p);
--
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs
pgsql-bugs by date: