Re: UTF8MatchText - Mailing list pgsql-patches
From | Andrew Dunstan |
---|---|
Subject | Re: UTF8MatchText |
Date | |
Msg-id | 46505986.1020005@dunslane.net Whole thread Raw |
In response to | Re: UTF8MatchText (Andrew Dunstan <andrew@dunslane.net>) |
Responses |
Re: UTF8MatchText
|
List | pgsql-patches |
oops. patch attached this time Andrew Dunstan wrote: > > > I wrote: >> >> >>> >>> It is only when you have a pattern like '%_' when this is a problem >>> and we could detect this and do byte by byte when it's not. Now we >>> check (*p == '\\') || (*p == '_') in each iteration when we scan >>> over characters for '%', and we could do it once and have different >>> loops for the two cases. >>> >>> Other than this part that I think can be optimized I don't see >>> anything wrong with the idea behind the patch. To make the '%' case >>> fast might be an important optimization for a lot of use cases. It's >>> not uncommon that '%' matches a bigger part of the string than the >>> rest of the pattern. >>> >> >> >> Are you sure? The big remaining char-matching bottleneck will surely >> be in the code that scans for a place to start matching a %. But >> that's exactly where we can't use byte matching for cases where the >> charset might include AB and BA as characters - the pattern might >> contain %BA and the string AB. However, this isn't a danger for UTF8, >> which leads me to think that we do indeed need a special case for >> UTF8, but for a different improvement from that proposed in the >> original patch. I'll post an updated patch shortly. >> > > Here is a patch that implements this. Please analyse for possible > breakage. > > cheers > > andrew > > > > ---------------------------(end of broadcast)--------------------------- > TIP 9: In versions below 8.0, the planner will ignore your desire to > choose an index scan if your joining column's datatypes do not > match > Index: src/backend/utils/adt/like.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/utils/adt/like.c,v retrieving revision 1.68 diff -c -r1.68 like.c *** src/backend/utils/adt/like.c 27 Feb 2007 23:48:08 -0000 1.68 --- src/backend/utils/adt/like.c 20 May 2007 14:16:22 -0000 *************** *** 28,48 **** #define LIKE_ABORT (-1) ! static int MatchText(char *t, int tlen, char *p, int plen); ! static int MatchTextIC(char *t, int tlen, char *p, int plen); ! static int MatchBytea(char *t, int tlen, char *p, int plen); ! static text *do_like_escape(text *, text *); ! static int MBMatchText(char *t, int tlen, char *p, int plen); ! static int MBMatchTextIC(char *t, int tlen, char *p, int plen); static text *MB_do_like_escape(text *, text *); /*-------------------- * Support routine for MatchText. Compares given multibyte streams * as wide characters. If they match, returns 1 otherwise returns 0. *-------------------- */ ! static int wchareq(char *p1, char *p2) { int p1_len; --- 28,51 ---- #define LIKE_ABORT (-1) ! static int SB_MatchText(char *t, int tlen, char *p, int plen); ! static int SB_MatchTextIC(char *t, int tlen, char *p, int plen); ! static text *SB_do_like_escape(text *, text *); ! static int MB_MatchText(char *t, int tlen, char *p, int plen); static text *MB_do_like_escape(text *, text *); + static int UTF8_MatchText(char *t, int tlen, char *p, int plen); + + static int GenericMatchText(char *s, int slen, char* p, int plen); + static int mbtexticlike(text *str, text *pat); + /*-------------------- * Support routine for MatchText. Compares given multibyte streams * as wide characters. If they match, returns 1 otherwise returns 0. *-------------------- */ ! static inline int wchareq(char *p1, char *p2) { int p1_len; *************** *** 72,86 **** * of getting a single character transformed to the system's wchar_t format. * So now, we just downcase the strings using lower() and apply regular LIKE * comparison. This should be revisited when we install better locale support. - * - * Note that MBMatchText and MBMatchTextIC do exactly the same thing now. - * Is it worth refactoring to avoid duplicated code? They might become - * different again in the future. */ /* Set up to compile like_match.c for multibyte characters */ #define CHAREQ(p1, p2) wchareq(p1, p2) - #define ICHAREQ(p1, p2) wchareq(p1, p2) #define NextChar(p, plen) \ do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0) #define CopyAdvChar(dst, src, srclen) \ --- 75,87 ---- * of getting a single character transformed to the system's wchar_t format. * So now, we just downcase the strings using lower() and apply regular LIKE * comparison. This should be revisited when we install better locale support. */ + #define NextByte(p, plen) ((p)++, (plen)--) + #define BYTEEQ(p1, p2) (*(p1) == *(p2)) + /* Set up to compile like_match.c for multibyte characters */ #define CHAREQ(p1, p2) wchareq(p1, p2) #define NextChar(p, plen) \ do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0) #define CopyAdvChar(dst, src, srclen) \ *************** *** 89,122 **** while (__l-- > 0) \ *(dst)++ = *(src)++; \ } while (0) ! #define MatchText MBMatchText ! #define MatchTextIC MBMatchTextIC #define do_like_escape MB_do_like_escape #include "like_match.c" ! #undef CHAREQ ! #undef ICHAREQ ! #undef NextChar ! #undef CopyAdvChar ! #undef MatchText ! #undef MatchTextIC ! #undef do_like_escape /* Set up to compile like_match.c for single-byte characters */ ! #define CHAREQ(p1, p2) (*(p1) == *(p2)) ! #define ICHAREQ(p1, p2) (tolower((unsigned char) *(p1)) == tolower((unsigned char) *(p2))) ! #define NextChar(p, plen) ((p)++, (plen)--) #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--) #include "like_match.c" ! /* And some support for BYTEA */ ! #define BYTEA_CHAREQ(p1, p2) (*(p1) == *(p2)) ! #define BYTEA_NextChar(p, plen) ((p)++, (plen)--) ! #define BYTEA_CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--) /* * interface routines called by the function manager --- 90,173 ---- while (__l-- > 0) \ *(dst)++ = *(src)++; \ } while (0) + #define BYTEEQIC(b1, b2) BYTEEQ(b1, b2) ! #define MatchText MB_MatchText #define do_like_escape MB_do_like_escape #include "like_match.c" ! /* Set up to compile like_match.c for UTF8 characters */ ! #define CHAREQ(p1, p2) wchareq(p1, p2) ! #define NextChar(p, plen) \ ! do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0) ! #define CopyAdvChar(dst, src, srclen) \ ! do { int __l = pg_mblen(src); \ ! (srclen) -= __l; \ ! while (__l-- > 0) \ ! *(dst)++ = *(src)++; \ ! } while (0) ! #define BYTEEQIC(b1, b2) BYTEEQ(b1, b2) ! #define UTF8_OPT ! ! #define MatchText UTF8_MatchText ! ! #include "like_match.c" /* Set up to compile like_match.c for single-byte characters */ ! #define CHAREQ(p1, p2) BYTEEQ(p1, p2) ! #define NextChar(p, plen) NextByte(p, plen) #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--) + #define BYTEEQIC(b1, b2) BYTEEQ(b1, b2) + + #define MatchText SB_MatchText + #define do_like_escape SB_do_like_escape #include "like_match.c" ! /* set up to compile like_match.c for single byte case insensitive matching */ + #define CHAREQ(p1, p2) (tolower((unsigned char) *(p1)) == tolower((unsigned char) *(p2))) + #define NextChar(p, plen) NextByte(p, plen) + #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--) + #define BYTEEQIC(b1, b2) CHAREQ(b1, b2) + + #define MatchText SB_MatchTextIC + + #include "like_match.c" + + static inline int + GenericMatchText(char *s, int slen, char* p, int plen) + { + if (pg_database_encoding_max_length() == 1) + return SB_MatchText(s, slen, p, plen); + else if (GetDatabaseEncoding() == PG_UTF8) + return UTF8_MatchText(s, slen, p, plen); + else + return MB_MatchText(s, slen, p, plen); + } + + static inline int + mbtexticlike(text *str, text *pat) + { + char *s, + *p; + int slen, + plen; + + /* Force inputs to lower case to achieve case insensitivity */ + str = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(str))); + pat = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(pat))); + s = VARDATA(str); + slen = (VARSIZE(str) - VARHDRSZ); + p = VARDATA(pat); + plen = (VARSIZE(pat) - VARHDRSZ); + + if (GetDatabaseEncoding() == PG_UTF8) + return UTF8_MatchText(s, slen, p, plen); + else + return MB_MatchText(s, slen, p, plen); + } /* * interface routines called by the function manager *************** *** 138,147 **** p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! if (pg_database_encoding_max_length() == 1) ! result = (MatchText(s, slen, p, plen) == LIKE_TRUE); ! else ! result = (MBMatchText(s, slen, p, plen) == LIKE_TRUE); PG_RETURN_BOOL(result); } --- 189,195 ---- p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE); PG_RETURN_BOOL(result); } *************** *** 162,171 **** p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! if (pg_database_encoding_max_length() == 1) ! result = (MatchText(s, slen, p, plen) != LIKE_TRUE); ! else ! result = (MBMatchText(s, slen, p, plen) != LIKE_TRUE); PG_RETURN_BOOL(result); } --- 210,216 ---- p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE); PG_RETURN_BOOL(result); } *************** *** 186,195 **** p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! if (pg_database_encoding_max_length() == 1) ! result = (MatchText(s, slen, p, plen) == LIKE_TRUE); ! else ! result = (MBMatchText(s, slen, p, plen) == LIKE_TRUE); PG_RETURN_BOOL(result); } --- 231,237 ---- p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE); PG_RETURN_BOOL(result); } *************** *** 210,219 **** p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! if (pg_database_encoding_max_length() == 1) ! result = (MatchText(s, slen, p, plen) != LIKE_TRUE); ! else ! result = (MBMatchText(s, slen, p, plen) != LIKE_TRUE); PG_RETURN_BOOL(result); } --- 252,258 ---- p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE); PG_RETURN_BOOL(result); } *************** *** 234,240 **** p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! result = (MatchBytea(s, slen, p, plen) == LIKE_TRUE); PG_RETURN_BOOL(result); } --- 273,279 ---- p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! result = (SB_MatchText(s, slen, p, plen) == LIKE_TRUE); PG_RETURN_BOOL(result); } *************** *** 255,261 **** p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! result = (MatchBytea(s, slen, p, plen) != LIKE_TRUE); PG_RETURN_BOOL(result); } --- 294,300 ---- p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! result = (SB_MatchText(s, slen, p, plen) != LIKE_TRUE); PG_RETURN_BOOL(result); } *************** *** 281,305 **** slen = strlen(s); p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE); } else { - /* Force inputs to lower case to achieve case insensitivity */ text *strtext; strtext = DatumGetTextP(DirectFunctionCall1(name_text, NameGetDatum(str))); ! strtext = DatumGetTextP(DirectFunctionCall1(lower, ! PointerGetDatum(strtext))); ! pat = DatumGetTextP(DirectFunctionCall1(lower, ! PointerGetDatum(pat))); ! ! s = VARDATA(strtext); ! slen = (VARSIZE(strtext) - VARHDRSZ); ! p = VARDATA(pat); ! plen = (VARSIZE(pat) - VARHDRSZ); ! result = (MBMatchTextIC(s, slen, p, plen) == LIKE_TRUE); } PG_RETURN_BOOL(result); --- 320,334 ---- slen = strlen(s); p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! result = (SB_MatchTextIC(s, slen, p, plen) == LIKE_TRUE); } else { text *strtext; strtext = DatumGetTextP(DirectFunctionCall1(name_text, NameGetDatum(str))); ! result = (mbtexticlike(strtext, pat) == LIKE_TRUE); } PG_RETURN_BOOL(result); *************** *** 322,346 **** slen = strlen(s); p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE); } else { - /* Force inputs to lower case to achieve case insensitivity */ text *strtext; strtext = DatumGetTextP(DirectFunctionCall1(name_text, NameGetDatum(str))); ! strtext = DatumGetTextP(DirectFunctionCall1(lower, ! PointerGetDatum(strtext))); ! pat = DatumGetTextP(DirectFunctionCall1(lower, ! PointerGetDatum(pat))); ! ! s = VARDATA(strtext); ! slen = (VARSIZE(strtext) - VARHDRSZ); ! p = VARDATA(pat); ! plen = (VARSIZE(pat) - VARHDRSZ); ! result = (MBMatchTextIC(s, slen, p, plen) != LIKE_TRUE); } PG_RETURN_BOOL(result); --- 351,365 ---- slen = strlen(s); p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! result = (SB_MatchTextIC(s, slen, p, plen) != LIKE_TRUE); } else { text *strtext; strtext = DatumGetTextP(DirectFunctionCall1(name_text, NameGetDatum(str))); ! result = (mbtexticlike(strtext, pat) != LIKE_TRUE); } PG_RETURN_BOOL(result); *************** *** 363,383 **** slen = (VARSIZE(str) - VARHDRSZ); p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE); } else ! { ! /* Force inputs to lower case to achieve case insensitivity */ ! str = DatumGetTextP(DirectFunctionCall1(lower, ! PointerGetDatum(str))); ! pat = DatumGetTextP(DirectFunctionCall1(lower, ! PointerGetDatum(pat))); ! s = VARDATA(str); ! slen = (VARSIZE(str) - VARHDRSZ); ! p = VARDATA(pat); ! plen = (VARSIZE(pat) - VARHDRSZ); ! result = (MBMatchTextIC(s, slen, p, plen) == LIKE_TRUE); ! } PG_RETURN_BOOL(result); } --- 382,391 ---- slen = (VARSIZE(str) - VARHDRSZ); p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! result = (SB_MatchTextIC(s, slen, p, plen) == LIKE_TRUE); } else ! result = (mbtexticlike(str, pat) == LIKE_TRUE); PG_RETURN_BOOL(result); } *************** *** 399,419 **** slen = (VARSIZE(str) - VARHDRSZ); p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE); } else ! { ! /* Force inputs to lower case to achieve case insensitivity */ ! str = DatumGetTextP(DirectFunctionCall1(lower, ! PointerGetDatum(str))); ! pat = DatumGetTextP(DirectFunctionCall1(lower, ! PointerGetDatum(pat))); ! s = VARDATA(str); ! slen = (VARSIZE(str) - VARHDRSZ); ! p = VARDATA(pat); ! plen = (VARSIZE(pat) - VARHDRSZ); ! result = (MBMatchTextIC(s, slen, p, plen) != LIKE_TRUE); ! } PG_RETURN_BOOL(result); } --- 407,416 ---- slen = (VARSIZE(str) - VARHDRSZ); p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); ! result = (SB_MatchTextIC(s, slen, p, plen) != LIKE_TRUE); } else ! result = (mbtexticlike(str, pat) != LIKE_TRUE); PG_RETURN_BOOL(result); } *************** *** 430,436 **** text *result; if (pg_database_encoding_max_length() == 1) ! result = do_like_escape(pat, esc); else result = MB_do_like_escape(pat, esc); --- 427,433 ---- text *result; if (pg_database_encoding_max_length() == 1) ! result = SB_do_like_escape(pat, esc); else result = MB_do_like_escape(pat, esc); *************** *** 446,624 **** { bytea *pat = PG_GETARG_BYTEA_P(0); bytea *esc = PG_GETARG_BYTEA_P(1); ! bytea *result; ! char *p, ! *e, ! *r; ! int plen, ! elen; ! bool afterescape; ! ! p = VARDATA(pat); ! plen = (VARSIZE(pat) - VARHDRSZ); ! e = VARDATA(esc); ! elen = (VARSIZE(esc) - VARHDRSZ); ! ! /* ! * Worst-case pattern growth is 2x --- unlikely, but it's hardly worth ! * trying to calculate the size more accurately than that. ! */ ! result = (text *) palloc(plen * 2 + VARHDRSZ); ! r = VARDATA(result); ! ! if (elen == 0) ! { ! /* ! * No escape character is wanted. Double any backslashes in the ! * pattern to make them act like ordinary characters. ! */ ! while (plen > 0) ! { ! if (*p == '\\') ! *r++ = '\\'; ! BYTEA_CopyAdvChar(r, p, plen); ! } ! } ! else ! { ! /* ! * The specified escape must be only a single character. ! */ ! BYTEA_NextChar(e, elen); ! if (elen != 0) ! ereport(ERROR, ! (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE), ! errmsg("invalid escape string"), ! errhint("Escape string must be empty or one character."))); ! ! e = VARDATA(esc); ! ! /* ! * If specified escape is '\', just copy the pattern as-is. ! */ ! if (*e == '\\') ! { ! memcpy(result, pat, VARSIZE(pat)); ! PG_RETURN_BYTEA_P(result); ! } ! ! /* ! * Otherwise, convert occurrences of the specified escape character to ! * '\', and double occurrences of '\' --- unless they immediately ! * follow an escape character! ! */ ! afterescape = false; ! while (plen > 0) ! { ! if (BYTEA_CHAREQ(p, e) && !afterescape) ! { ! *r++ = '\\'; ! BYTEA_NextChar(p, plen); ! afterescape = true; ! } ! else if (*p == '\\') ! { ! *r++ = '\\'; ! if (!afterescape) ! *r++ = '\\'; ! BYTEA_NextChar(p, plen); ! afterescape = false; ! } ! else ! { ! BYTEA_CopyAdvChar(r, p, plen); ! afterescape = false; ! } ! } ! } ! SET_VARSIZE(result, r - ((char *) result)); ! ! PG_RETURN_BYTEA_P(result); } - /* - * Same as above, but specifically for bytea (binary) datatype - */ - static int - MatchBytea(char *t, int tlen, char *p, int plen) - { - /* Fast path for match-everything pattern */ - if ((plen == 1) && (*p == '%')) - return LIKE_TRUE; - - while ((tlen > 0) && (plen > 0)) - { - if (*p == '\\') - { - /* Next pattern char must match literally, whatever it is */ - BYTEA_NextChar(p, plen); - if ((plen <= 0) || !BYTEA_CHAREQ(t, p)) - return LIKE_FALSE; - } - else if (*p == '%') - { - /* %% is the same as % according to the SQL standard */ - /* Advance past all %'s */ - while ((plen > 0) && (*p == '%')) - BYTEA_NextChar(p, plen); - /* Trailing percent matches everything. */ - if (plen <= 0) - return LIKE_TRUE; - - /* - * Otherwise, scan for a text position at which we can match the - * rest of the pattern. - */ - while (tlen > 0) - { - /* - * Optimization to prevent most recursion: don't recurse - * unless first pattern char might match this text char. - */ - if (BYTEA_CHAREQ(t, p) || (*p == '\\') || (*p == '_')) - { - int matched = MatchBytea(t, tlen, p, plen); - - if (matched != LIKE_FALSE) - return matched; /* TRUE or ABORT */ - } - - BYTEA_NextChar(t, tlen); - } - - /* - * End of text with no match, so no point in trying later places - * to start matching this pattern. - */ - return LIKE_ABORT; - } - else if ((*p != '_') && !BYTEA_CHAREQ(t, p)) - { - /* - * Not the single-character wildcard and no explicit match? Then - * time to quit... - */ - return LIKE_FALSE; - } - - BYTEA_NextChar(t, tlen); - BYTEA_NextChar(p, plen); - } - - if (tlen > 0) - return LIKE_FALSE; /* end of pattern, but not of text */ - - /* End of input string. Do we have matching pattern remaining? */ - while ((plen > 0) && (*p == '%')) /* allow multiple %'s at end of - * pattern */ - BYTEA_NextChar(p, plen); - if (plen <= 0) - return LIKE_TRUE; - - /* - * End of text with no match, so no point in trying later places to start - * matching this pattern. - */ - return LIKE_ABORT; - } /* MatchBytea() */ --- 443,450 ---- { bytea *pat = PG_GETARG_BYTEA_P(0); bytea *esc = PG_GETARG_BYTEA_P(1); ! bytea *result = SB_do_like_escape((text *)pat, (text *)esc); ! PG_RETURN_BYTEA_P((bytea *)result); } Index: src/backend/utils/adt/like_match.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/utils/adt/like_match.c,v retrieving revision 1.15 diff -c -r1.15 like_match.c *** src/backend/utils/adt/like_match.c 27 Feb 2007 23:48:08 -0000 1.15 --- src/backend/utils/adt/like_match.c 20 May 2007 14:16:24 -0000 *************** *** 9,19 **** * Before the inclusion, we need to define following macros: * * CHAREQ ! * ICHAREQ * NextChar * CopyAdvChar * MatchText (MBMatchText) - * MatchTextIC (MBMatchTextIC) * do_like_escape (MB_do_like_escape) * * Copyright (c) 1996-2007, PostgreSQL Global Development Group --- 9,18 ---- * Before the inclusion, we need to define following macros: * * CHAREQ ! * BYTEEQIC * NextChar * CopyAdvChar * MatchText (MBMatchText) * do_like_escape (MB_do_like_escape) * * Copyright (c) 1996-2007, PostgreSQL Global Development Group *************** *** 70,75 **** --- 69,82 ---- *-------------------- */ + #ifdef UTF8_OPT + #define PCT_CHAREQ(c1, c2) BYTEEQ(c1, c2) + #define PCT_NEXT(a, b) NextByte(a, b) + #else + #define PCT_CHAREQ(c1, c2) CHAREQ(c1, c2) + #define PCT_NEXT(a, b) NextChar(a, b) + #endif + static int MatchText(char *t, int tlen, char *p, int plen) { *************** *** 81,89 **** { if (*p == '\\') { ! /* Next pattern char must match literally, whatever it is */ ! NextChar(p, plen); ! if ((plen <= 0) || !CHAREQ(t, p)) return LIKE_FALSE; } else if (*p == '%') --- 88,96 ---- { if (*p == '\\') { ! /* Next byte must match literally, whatever it is */ ! NextByte(p, plen); ! if ((plen <= 0) || !BYTEEQ(t, p)) return LIKE_FALSE; } else if (*p == '%') *************** *** 91,97 **** /* %% is the same as % according to the SQL standard */ /* Advance past all %'s */ while ((plen > 0) && (*p == '%')) ! NextChar(p, plen); /* Trailing percent matches everything. */ if (plen <= 0) return LIKE_TRUE; --- 98,104 ---- /* %% is the same as % according to the SQL standard */ /* Advance past all %'s */ while ((plen > 0) && (*p == '%')) ! NextByte(p, plen); /* Trailing percent matches everything. */ if (plen <= 0) return LIKE_TRUE; *************** *** 106,112 **** * Optimization to prevent most recursion: don't recurse * unless first pattern char might match this text char. */ ! if (CHAREQ(t, p) || (*p == '\\') || (*p == '_')) { int matched = MatchText(t, tlen, p, plen); --- 113,119 ---- * Optimization to prevent most recursion: don't recurse * unless first pattern char might match this text char. */ ! if (PCT_CHAREQ(t, p) || (*p == '\\') || (*p == '_')) { int matched = MatchText(t, tlen, p, plen); *************** *** 114,120 **** return matched; /* TRUE or ABORT */ } ! NextChar(t, tlen); } /* --- 121,127 ---- return matched; /* TRUE or ABORT */ } ! PCT_NEXT(t, tlen); } /* *************** *** 123,215 **** */ return LIKE_ABORT; } ! else if ((*p != '_') && !CHAREQ(t, p)) ! { ! /* ! * Not the single-character wildcard and no explicit match? Then ! * time to quit... ! */ ! return LIKE_FALSE; ! } ! ! NextChar(t, tlen); ! NextChar(p, plen); ! } ! ! if (tlen > 0) ! return LIKE_FALSE; /* end of pattern, but not of text */ ! ! /* End of input string. Do we have matching pattern remaining? */ ! while ((plen > 0) && (*p == '%')) /* allow multiple %'s at end of ! * pattern */ ! NextChar(p, plen); ! if (plen <= 0) ! return LIKE_TRUE; ! ! /* ! * End of text with no match, so no point in trying later places to start ! * matching this pattern. ! */ ! return LIKE_ABORT; ! } /* MatchText() */ ! ! /* ! * Same as above, but ignore case ! */ ! static int ! MatchTextIC(char *t, int tlen, char *p, int plen) ! { ! /* Fast path for match-everything pattern */ ! if ((plen == 1) && (*p == '%')) ! return LIKE_TRUE; ! ! while ((tlen > 0) && (plen > 0)) ! { ! if (*p == '\\') ! { ! /* Next pattern char must match literally, whatever it is */ ! NextChar(p, plen); ! if ((plen <= 0) || !ICHAREQ(t, p)) ! return LIKE_FALSE; ! } ! else if (*p == '%') { ! /* %% is the same as % according to the SQL standard */ ! /* Advance past all %'s */ ! while ((plen > 0) && (*p == '%')) ! NextChar(p, plen); ! /* Trailing percent matches everything. */ ! if (plen <= 0) ! return LIKE_TRUE; ! ! /* ! * Otherwise, scan for a text position at which we can match the ! * rest of the pattern. ! */ ! while (tlen > 0) ! { ! /* ! * Optimization to prevent most recursion: don't recurse ! * unless first pattern char might match this text char. ! */ ! if (ICHAREQ(t, p) || (*p == '\\') || (*p == '_')) ! { ! int matched = MatchTextIC(t, tlen, p, plen); ! ! if (matched != LIKE_FALSE) ! return matched; /* TRUE or ABORT */ ! } ! ! NextChar(t, tlen); ! } ! ! /* ! * End of text with no match, so no point in trying later places ! * to start matching this pattern. ! */ ! return LIKE_ABORT; } ! else if ((*p != '_') && !ICHAREQ(t, p)) { /* * Not the single-character wildcard and no explicit match? Then --- 130,142 ---- */ return LIKE_ABORT; } ! else if (*p == '_') { ! NextChar(t, tlen); ! NextByte(p, plen); ! continue; } ! else if (!BYTEEQIC(t, p)) { /* * Not the single-character wildcard and no explicit match? Then *************** *** 217,225 **** */ return LIKE_FALSE; } ! ! NextChar(t, tlen); ! NextChar(p, plen); } if (tlen > 0) --- 144,156 ---- */ return LIKE_FALSE; } ! /* ! * It is safe to use NextByte instead of NextChar here, even for ! * multi-byte character sets, because we are not following ! * immediately after a wildcard character. ! */ ! NextByte(t, tlen); ! NextByte(p, plen); } if (tlen > 0) *************** *** 228,234 **** /* End of input string. Do we have matching pattern remaining? */ while ((plen > 0) && (*p == '%')) /* allow multiple %'s at end of * pattern */ ! NextChar(p, plen); if (plen <= 0) return LIKE_TRUE; --- 159,165 ---- /* End of input string. Do we have matching pattern remaining? */ while ((plen > 0) && (*p == '%')) /* allow multiple %'s at end of * pattern */ ! NextByte(p, plen); if (plen <= 0) return LIKE_TRUE; *************** *** 237,248 **** * matching this pattern. */ return LIKE_ABORT; ! } /* MatchTextIC() */ /* * like_escape() --- given a pattern and an ESCAPE string, * convert the pattern to use Postgres' standard backslash escape convention. */ static text * do_like_escape(text *pat, text *esc) { --- 168,181 ---- * matching this pattern. */ return LIKE_ABORT; ! } /* MatchText() */ /* * like_escape() --- given a pattern and an ESCAPE string, * convert the pattern to use Postgres' standard backslash escape convention. */ + #ifdef do_like_escape + static text * do_like_escape(text *pat, text *esc) { *************** *** 336,338 **** --- 269,288 ---- return result; } + #endif /* do_like_escape */ + + #undef CHAREQ + #undef BYTEEQIC + #undef PCT_CHAREQ + #undef PCT_NEXT + #undef NextChar + #undef CopyAdvChar + #undef MatchText + + #ifdef do_like_escape + #undef do_like_escape + #endif + + #ifdef UTF8_OPT + #undef UTF8_OPT + #endif
pgsql-patches by date: