Minor regexp hacking: code coverage, moveins() and friends - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Minor regexp hacking: code coverage, moveins() and friends |
Date | |
Msg-id | 810272.1629064063@sss.pgh.pa.us Whole thread Raw |
List | pgsql-hackers |
While trying to improve the code-coverage report for backend/regex/, I found that there are portions of copyins() and copyouts() that are just plain unreachable, because those functions are over-engineered. In point of fact, the only uses of copyins() and copyouts() are in places where the target state is brand new and cannot have any pre-existing in-arcs (resp. out-arcs). This means that all the trouble we're going to to de-duplicate the copied arcs is entirely wasted; we could just copy the source arcs without extra checking. A fairly significant fraction, though by no means all, of the calls of moveins() and moveouts() are likewise working with new target states, and so don't really need to do any de-duplication. Hence I propose 0001 attached, which creates simplified functions copyinstonew() and so on, for use when the target state is known not to have any existing arcs. I'd thought that this might show a useful improvement in regexp compilation speed, but it's pretty hard to measure any noticeable change on typical regexps such as Jacobson's web corpus. (I do see maybe a 1% improvement on that, but that's below the noise threshold so I don't take it too seriously.) It is possible to demonstrate noticeable improvement on handpicked regexes, for example on HEAD: regression=# SELECT regexp_matches('foo', 'abcdefghijklmnopq((\y|.?)+)+',''); regexp_matches ---------------- (0 rows) Time: 6.297 ms versus with patch: regression=# SELECT regexp_matches('foo', 'abcdefghijklmnopq((\y|.?)+)+',''); regexp_matches ---------------- (0 rows) Time: 5.506 ms So this isn't entirely a waste of time, but it is marginal. Improving the code-coverage numbers is probably a better argument. (0001 also adds some test cases that exercise nearly everything that's reachable without OOM conditions or cancels in regc_nfa.c.) 0002 below is some additional test cases to improve code coverage in regc_locale.c and regc_pg_locale.c. (regc_pg_locale.c is still not great, but that's mostly because much of the code is only reachable for particular choices of database encoding, so any one coverage run hits just some of it.) Barring objections, I plan to push this in a day or two; I don't think it needs much review. regards, tom lane diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c index 6d77c59e12..2b5ffcba8f 100644 --- a/src/backend/regex/regc_nfa.c +++ b/src/backend/regex/regc_nfa.c @@ -867,9 +867,37 @@ moveins(struct nfa *nfa, assert(oldState->ins == NULL); } +/* + * moveinstonew - move all in arcs of a state to another state + * + * The newState must not have any existing in-arcs. + */ +static void +moveinstonew(struct nfa *nfa, + struct state *oldState, + struct state *newState) +{ + struct arc *a; + + assert(oldState != newState); + assert(newState->ins == NULL && newState->nins == 0); + + /* + * Since there is no risk of creating duplicate arcs (given that none + * exist already), we can just use createarc directly and not spend any + * time de-duplicating. + */ + while ((a = oldState->ins) != NULL) + { + createarc(nfa, a->type, a->co, a->from, newState); + freearc(nfa, a); + } +} + /* * copyins - copy in arcs of a state to another state */ +#ifdef NOT_USED /* not currently needed */ static void copyins(struct nfa *nfa, struct state *oldState, @@ -945,6 +973,31 @@ copyins(struct nfa *nfa, } } } +#endif /* NOT_USED */ + +/* + * copyinstonew - copy in arcs of a state to another state + * + * The newState must not have any existing in-arcs. + */ +static void +copyinstonew(struct nfa *nfa, + struct state *oldState, + struct state *newState) +{ + struct arc *a; + + assert(oldState != newState); + assert(newState->ins == NULL && newState->nins == 0); + + /* + * Since there is no risk of creating duplicate arcs (given that none + * exist already), we can just use createarc directly and not spend any + * time de-duplicating. + */ + for (a = oldState->ins; a != NULL; a = a->inchain) + createarc(nfa, a->type, a->co, a->from, newState); +} /* * mergeins - merge a list of inarcs into a state @@ -1140,9 +1193,37 @@ moveouts(struct nfa *nfa, assert(oldState->outs == NULL); } +/* + * moveoutstonew - move all out arcs of a state to another state + * + * The newState must not have any existing out-arcs. + */ +static void +moveoutstonew(struct nfa *nfa, + struct state *oldState, + struct state *newState) +{ + struct arc *a; + + assert(oldState != newState); + assert(newState->outs == NULL && newState->nouts == 0); + + /* + * Since there is no risk of creating duplicate arcs (given that none + * exist already), we can just use createarc directly and not spend any + * time de-duplicating. + */ + while ((a = oldState->outs) != NULL) + { + createarc(nfa, a->type, a->co, newState, a->to); + freearc(nfa, a); + } +} + /* * copyouts - copy out arcs of a state to another state */ +#ifdef NOT_USED /* not currently needed */ static void copyouts(struct nfa *nfa, struct state *oldState, @@ -1218,6 +1299,31 @@ copyouts(struct nfa *nfa, } } } +#endif /* NOT_USED */ + +/* + * copyoutstonew - copy out arcs of a state to another state + * + * The newState must not have any existing out-arcs. + */ +static void +copyoutstonew(struct nfa *nfa, + struct state *oldState, + struct state *newState) +{ + struct arc *a; + + assert(oldState != newState); + assert(newState->outs == NULL && newState->nouts == 0); + + /* + * Since there is no risk of creating duplicate arcs (given that none + * exist already), we can just use createarc directly and not spend any + * time de-duplicating. + */ + for (a = oldState->outs; a != NULL; a = a->outchain) + createarc(nfa, a->type, a->co, newState, a->to); +} /* * cloneouts - copy out arcs of a state to another state pair, modifying type @@ -1712,7 +1818,7 @@ pull(struct nfa *nfa, s = newstate(nfa); if (NISERR()) return 0; - copyins(nfa, from, s); /* duplicate inarcs */ + copyinstonew(nfa, from, s); /* duplicate inarcs */ cparc(nfa, con, s, to); /* move constraint arc */ freearc(nfa, con); if (NISERR()) @@ -1883,7 +1989,7 @@ push(struct nfa *nfa, s = newstate(nfa); if (NISERR()) return 0; - copyouts(nfa, to, s); /* duplicate outarcs */ + copyoutstonew(nfa, to, s); /* duplicate outarcs */ cparc(nfa, con, from, s); /* move constraint arc */ freearc(nfa, con); if (NISERR()) @@ -1975,6 +2081,7 @@ combine(struct nfa *nfa, else if (a->co == RAINBOW) { /* con is incompatible if it's for a pseudocolor */ + /* (this is hypothetical; we make no such constraints today) */ if (nfa->cm->cd[con->co].flags & PSEUDO) return INCOMPATIBLE; /* otherwise, constraint constrains arc to be only its color */ @@ -2001,6 +2108,7 @@ combine(struct nfa *nfa, else if (a->co == RAINBOW) { /* con is incompatible if it's for a pseudocolor */ + /* (this is hypothetical; we make no such constraints today) */ if (nfa->cm->cd[con->co].flags & PSEUDO) return INCOMPATIBLE; /* otherwise, constraint constrains arc to be only its color */ @@ -3562,6 +3670,7 @@ carc_cmp(const void *a, const void *b) return -1; if (aa->to > bb->to) return +1; + /* This is unreached, since there should be no duplicate arcs now: */ return 0; } diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index ae3a7b6a38..979619f9a0 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -141,10 +141,12 @@ static int sortins_cmp(const void *, const void *); static void sortouts(struct nfa *, struct state *); static int sortouts_cmp(const void *, const void *); static void moveins(struct nfa *, struct state *, struct state *); -static void copyins(struct nfa *, struct state *, struct state *); +static void moveinstonew(struct nfa *, struct state *, struct state *); +static void copyinstonew(struct nfa *, struct state *, struct state *); static void mergeins(struct nfa *, struct state *, struct arc **, int); static void moveouts(struct nfa *, struct state *, struct state *); -static void copyouts(struct nfa *, struct state *, struct state *); +static void moveoutstonew(struct nfa *, struct state *, struct state *); +static void copyoutstonew(struct nfa *, struct state *, struct state *); static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int); static void delsub(struct nfa *, struct state *, struct state *); static void deltraverse(struct nfa *, struct state *, struct state *); @@ -639,7 +641,7 @@ makesearch(struct vars *v, { s2 = newstate(nfa); NOERR(); - copyouts(nfa, s, s2); + copyoutstonew(nfa, s, s2); NOERR(); for (a = s->ins; a != NULL; a = b) { @@ -752,7 +754,7 @@ parsebranch(struct vars *v, { /* implicit concat operator */ lp = newstate(v->nfa); NOERRN(); - moveins(v->nfa, right, lp); + moveinstonew(v->nfa, right, lp); } seencontent = 1; @@ -1137,8 +1139,8 @@ parseqatom(struct vars *v, s = newstate(v->nfa); s2 = newstate(v->nfa); NOERRN(); - moveouts(v->nfa, lp, s); - moveins(v->nfa, rp, s2); + moveoutstonew(v->nfa, lp, s); + moveinstonew(v->nfa, rp, s2); atom->begin = s; atom->end = s2; } @@ -1290,7 +1292,7 @@ parseqatom(struct vars *v, /* general case: need an iteration node */ s2 = newstate(v->nfa); NOERRN(); - moveouts(v->nfa, atom->end, s2); + moveoutstonew(v->nfa, atom->end, s2); NOERRN(); dupnfa(v->nfa, atom->begin, atom->end, s, s2); repeat(v, s, s2, m, n); @@ -1552,8 +1554,8 @@ repeat(struct vars *v, case PAIR(0, INF): /* loop x around */ s = newstate(v->nfa); NOERR(); - moveouts(v->nfa, lp, s); - moveins(v->nfa, rp, s); + moveoutstonew(v->nfa, lp, s); + moveinstonew(v->nfa, rp, s); EMPTYARC(lp, s); EMPTYARC(s, rp); break; @@ -1562,7 +1564,7 @@ repeat(struct vars *v, case PAIR(1, SOME): /* do as x{0,n-1}x = (x{1,n-1}|)x */ s = newstate(v->nfa); NOERR(); - moveouts(v->nfa, lp, s); + moveoutstonew(v->nfa, lp, s); dupnfa(v->nfa, s, rp, lp, s); NOERR(); repeat(v, lp, s, 1, n - 1); @@ -1573,8 +1575,8 @@ repeat(struct vars *v, s = newstate(v->nfa); s2 = newstate(v->nfa); NOERR(); - moveouts(v->nfa, lp, s); - moveins(v->nfa, rp, s2); + moveoutstonew(v->nfa, lp, s); + moveinstonew(v->nfa, rp, s2); EMPTYARC(lp, s); EMPTYARC(s2, rp); EMPTYARC(s2, s); @@ -1582,7 +1584,7 @@ repeat(struct vars *v, case PAIR(SOME, SOME): /* do as x{m-1,n-1}x */ s = newstate(v->nfa); NOERR(); - moveouts(v->nfa, lp, s); + moveoutstonew(v->nfa, lp, s); dupnfa(v->nfa, s, rp, lp, s); NOERR(); repeat(v, lp, s, m - 1, n - 1); @@ -1590,7 +1592,7 @@ repeat(struct vars *v, case PAIR(SOME, INF): /* do as x{m-1,}x */ s = newstate(v->nfa); NOERR(); - moveouts(v->nfa, lp, s); + moveoutstonew(v->nfa, lp, s); dupnfa(v->nfa, s, rp, lp, s); NOERR(); repeat(v, lp, s, m - 1, n); diff --git a/src/test/modules/test_regex/expected/test_regex.out b/src/test/modules/test_regex/expected/test_regex.out index 5a6cdf47c2..611b6c7243 100644 --- a/src/test/modules/test_regex/expected/test_regex.out +++ b/src/test/modules/test_regex/expected/test_regex.out @@ -4926,3 +4926,59 @@ select * from test_regex('(\Y)+', 'foo', 'LNP'); {"",""} (2 rows) +-- and now, tests not from either Spencer or the Tcl project +-- These cases exercise additional code paths in pushfwd()/push()/combine() +select * from test_regex('a\Y(?=45)', 'a45', 'HLP'); + test_regex +----------------------------------------------- + {0,REG_ULOOKAROUND,REG_UNONPOSIX,REG_ULOCALE} + {a} +(2 rows) + +select * from test_regex('a(?=.)c', 'ac', 'HP'); + test_regex +----------------------------------- + {0,REG_ULOOKAROUND,REG_UNONPOSIX} + {ac} +(2 rows) + +select * from test_regex('a(?=.).*(?=3)3*', 'azz33', 'HP'); + test_regex +----------------------------------- + {0,REG_ULOOKAROUND,REG_UNONPOSIX} + {azz33} +(2 rows) + +select * from test_regex('a(?=\w)\w*(?=.).*', 'az3%', 'HLP'); + test_regex +----------------------------------------------- + {0,REG_ULOOKAROUND,REG_UNONPOSIX,REG_ULOCALE} + {az3%} +(2 rows) + +-- These exercise the bulk-arc-movement paths in moveins() and moveouts(); +-- you may need to make them longer if you change BULK_ARC_OP_USE_SORT() +select * from test_regex('ABCDEFGHIJKLMNOPQRSTUVWXYZ(?:\w|a|b|c|d|e|f|0|1|2|3|4|5|6|Q)', + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ3', 'LP'); + test_regex +------------------------------- + {0,REG_UNONPOSIX,REG_ULOCALE} + {ABCDEFGHIJKLMNOPQRSTUVWXYZ3} +(2 rows) + +select * from test_regex('ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789(\Y\Y)+', + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789Z', 'LP'); + test_regex +------------------------------------------- + {1,REG_UNONPOSIX,REG_ULOCALE} + {ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789,""} +(2 rows) + +select * from test_regex('((x|xabcdefghijklmnopqrstuvwxyz0123456789)x*|[^y]z)$', + 'az', ''); + test_regex +-------------- + {2} + {az,az,NULL} +(2 rows) + diff --git a/src/test/modules/test_regex/sql/test_regex.sql b/src/test/modules/test_regex/sql/test_regex.sql index 3419564203..13a0586272 100644 --- a/src/test/modules/test_regex/sql/test_regex.sql +++ b/src/test/modules/test_regex/sql/test_regex.sql @@ -1741,3 +1741,21 @@ select * from test_regex(repeat('x*y*z*', 200), 'x', 'N'); -- regexp {(\Y)+} foo -- } 1 select * from test_regex('(\Y)+', 'foo', 'LNP'); + + +-- and now, tests not from either Spencer or the Tcl project + +-- These cases exercise additional code paths in pushfwd()/push()/combine() +select * from test_regex('a\Y(?=45)', 'a45', 'HLP'); +select * from test_regex('a(?=.)c', 'ac', 'HP'); +select * from test_regex('a(?=.).*(?=3)3*', 'azz33', 'HP'); +select * from test_regex('a(?=\w)\w*(?=.).*', 'az3%', 'HLP'); + +-- These exercise the bulk-arc-movement paths in moveins() and moveouts(); +-- you may need to make them longer if you change BULK_ARC_OP_USE_SORT() +select * from test_regex('ABCDEFGHIJKLMNOPQRSTUVWXYZ(?:\w|a|b|c|d|e|f|0|1|2|3|4|5|6|Q)', + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ3', 'LP'); +select * from test_regex('ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789(\Y\Y)+', + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789Z', 'LP'); +select * from test_regex('((x|xabcdefghijklmnopqrstuvwxyz0123456789)x*|[^y]z)$', + 'az', ''); diff --git a/src/test/modules/test_regex/expected/test_regex.out b/src/test/modules/test_regex/expected/test_regex.out index 611b6c7243..6242d0baa9 100644 --- a/src/test/modules/test_regex/expected/test_regex.out +++ b/src/test/modules/test_regex/expected/test_regex.out @@ -937,6 +937,34 @@ select * from test_regex('a[[=x=]]', 'az', '+Lb'); {0,REG_ULOCALE} (1 row) +-- expectMatch 9.9b &iL {a[[=Y=]]} ay ay +select * from test_regex('a[[=Y=]]', 'ay', 'iL'); + test_regex +----------------- + {0,REG_ULOCALE} + {ay} +(2 rows) + +select * from test_regex('a[[=Y=]]', 'ay', 'iLb'); + test_regex +----------------- + {0,REG_ULOCALE} + {ay} +(2 rows) + +-- expectNomatch 9.9c &L {a[[=Y=]]} ay +select * from test_regex('a[[=Y=]]', 'ay', 'L'); + test_regex +----------------- + {0,REG_ULOCALE} +(1 row) + +select * from test_regex('a[[=Y=]]', 'ay', 'Lb'); + test_regex +----------------- + {0,REG_ULOCALE} +(1 row) + -- expectError 9.10 & {a[0-[=x=]]} ERANGE select * from test_regex('a[0-[=x=]]', '', ''); ERROR: invalid regular expression: invalid character range @@ -2932,6 +2960,34 @@ select * from test_regex('a[^b-d]', 'aC', 'iMb'); {0,REG_UUNPORT} (1 row) +-- expectMatch 19.6 &iM {a[B-Z]} aC aC +select * from test_regex('a[B-Z]', 'aC', 'iM'); + test_regex +----------------- + {0,REG_UUNPORT} + {aC} +(2 rows) + +select * from test_regex('a[B-Z]', 'aC', 'iMb'); + test_regex +----------------- + {0,REG_UUNPORT} + {aC} +(2 rows) + +-- expectNomatch 19.7 &iM {a[^B-Z]} aC +select * from test_regex('a[^B-Z]', 'aC', 'iM'); + test_regex +----------------- + {0,REG_UUNPORT} +(1 row) + +select * from test_regex('a[^B-Z]', 'aC', 'iMb'); + test_regex +----------------- + {0,REG_UUNPORT} +(1 row) + -- doing 20 "directors and embedded options" -- expectError 20.1 & ***? BADPAT select * from test_regex('***?', '', ''); @@ -3850,6 +3906,14 @@ select * from test_regex('^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$', 'foo/bar/baz', {foo/bar/baz,foo,bar,baz} (2 rows) +-- expectMatch 24.14 PRT {^(.+?)(?:/(.+?))(?:/(.+?)\3)?$} {foo/bar/baz/quux} {foo/bar/baz/quux} {foo} {bar/baz/quux} {} +select * from test_regex('^(.+?)(?:/(.+?))(?:/(.+?)\3)?$', 'foo/bar/baz/quux', 'PRT'); + test_regex +---------------------------------------------- + {3,REG_UBACKREF,REG_UNONPOSIX,REG_USHORTEST} + {foo/bar/baz/quux,foo,bar/baz/quux,NULL} +(2 rows) + -- doing 25 "mixed quantifiers" -- # this is very incomplete as yet -- # should include | diff --git a/src/test/modules/test_regex/expected/test_regex_utf8.out b/src/test/modules/test_regex/expected/test_regex_utf8.out index 112698ac61..3b56f36c07 100644 --- a/src/test/modules/test_regex/expected/test_regex_utf8.out +++ b/src/test/modules/test_regex/expected/test_regex_utf8.out @@ -98,3 +98,109 @@ select * from test_regex('[[:alnum:]]*[[:upper:]]*[\u1000-\u2000]*\u1237', {0,REG_UBBS,REG_UNONPOSIX,REG_UUNPORT,REG_ULOCALE} (1 row) +select * from test_regex('[[:alnum:]]*[[:upper:]]*[\u1000-\u2000]*\u1237', + E'\u1500\u1237', 'iELMP'); + test_regex +---------------------------------------------------- + {0,REG_UBBS,REG_UNONPOSIX,REG_UUNPORT,REG_ULOCALE} + {ᔀሷ} +(2 rows) + +-- systematically test char classes +select * from test_regex('[[:alnum:]]+', E'x\u1500\u1237', 'L'); + test_regex +----------------- + {0,REG_ULOCALE} + {xᔀሷ} +(2 rows) + +select * from test_regex('[[:alpha:]]+', E'x\u1500\u1237', 'L'); + test_regex +----------------- + {0,REG_ULOCALE} + {xᔀሷ} +(2 rows) + +select * from test_regex('[[:ascii:]]+', E'x\u1500\u1237', 'L'); + test_regex +----------------- + {0,REG_ULOCALE} + {x} +(2 rows) + +select * from test_regex('[[:blank:]]+', E'x \t\u1500\u1237', 'L'); + test_regex +----------------- + {0,REG_ULOCALE} + {" "} +(2 rows) + +select * from test_regex('[[:cntrl:]]+', E'x\u1500\u1237', 'L'); + test_regex +----------------- + {0,REG_ULOCALE} +(1 row) + +select * from test_regex('[[:digit:]]+', E'x9\u1500\u1237', 'L'); + test_regex +----------------- + {0,REG_ULOCALE} + {9} +(2 rows) + +select * from test_regex('[[:graph:]]+', E'x\u1500\u1237', 'L'); + test_regex +----------------- + {0,REG_ULOCALE} + {xᔀሷ} +(2 rows) + +select * from test_regex('[[:lower:]]+', E'x\u1500\u1237', 'L'); + test_regex +----------------- + {0,REG_ULOCALE} + {x} +(2 rows) + +select * from test_regex('[[:print:]]+', E'x\u1500\u1237', 'L'); + test_regex +----------------- + {0,REG_ULOCALE} + {xᔀሷ} +(2 rows) + +select * from test_regex('[[:punct:]]+', E'x.\u1500\u1237', 'L'); + test_regex +----------------- + {0,REG_ULOCALE} + {.} +(2 rows) + +select * from test_regex('[[:space:]]+', E'x \t\u1500\u1237', 'L'); + test_regex +----------------- + {0,REG_ULOCALE} + {" "} +(2 rows) + +select * from test_regex('[[:upper:]]+', E'xX\u1500\u1237', 'L'); + test_regex +----------------- + {0,REG_ULOCALE} + {X} +(2 rows) + +select * from test_regex('[[:xdigit:]]+', E'xa9\u1500\u1237', 'L'); + test_regex +----------------- + {0,REG_ULOCALE} + {a9} +(2 rows) + +select * from test_regex('[[:word:]]+', E'x_\u1500\u1237', 'L'); + test_regex +----------------- + {0,REG_ULOCALE} + {x_ᔀሷ} +(2 rows) + diff --git a/src/test/modules/test_regex/sql/test_regex.sql b/src/test/modules/test_regex/sql/test_regex.sql index 13a0586272..389b8b61b3 100644 --- a/src/test/modules/test_regex/sql/test_regex.sql +++ b/src/test/modules/test_regex/sql/test_regex.sql @@ -304,6 +304,12 @@ select * from test_regex('a[[=x=]]', 'ay', '+Lb'); -- expectNomatch 9.9 &+L {a[[=x=]]} az select * from test_regex('a[[=x=]]', 'az', '+L'); select * from test_regex('a[[=x=]]', 'az', '+Lb'); +-- expectMatch 9.9b &iL {a[[=Y=]]} ay ay +select * from test_regex('a[[=Y=]]', 'ay', 'iL'); +select * from test_regex('a[[=Y=]]', 'ay', 'iLb'); +-- expectNomatch 9.9c &L {a[[=Y=]]} ay +select * from test_regex('a[[=Y=]]', 'ay', 'L'); +select * from test_regex('a[[=Y=]]', 'ay', 'Lb'); -- expectError 9.10 & {a[0-[=x=]]} ERANGE select * from test_regex('a[0-[=x=]]', '', ''); select * from test_regex('a[0-[=x=]]', '', 'b'); @@ -864,6 +870,12 @@ select * from test_regex('a[b-d]', 'aC', 'iMb'); -- expectNomatch 19.5 &iM {a[^b-d]} aC select * from test_regex('a[^b-d]', 'aC', 'iM'); select * from test_regex('a[^b-d]', 'aC', 'iMb'); +-- expectMatch 19.6 &iM {a[B-Z]} aC aC +select * from test_regex('a[B-Z]', 'aC', 'iM'); +select * from test_regex('a[B-Z]', 'aC', 'iMb'); +-- expectNomatch 19.7 &iM {a[^B-Z]} aC +select * from test_regex('a[^B-Z]', 'aC', 'iM'); +select * from test_regex('a[^B-Z]', 'aC', 'iMb'); -- doing 20 "directors and embedded options" @@ -1171,6 +1183,8 @@ select * from test_regex('z*4', '123zzzz456', '-'); select * from test_regex('z*?4', '123zzzz456', 'PT'); -- expectMatch 24.13 PT {^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$} {foo/bar/baz} {foo/bar/baz} {foo} {bar} {baz} select * from test_regex('^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$', 'foo/bar/baz', 'PT'); +-- expectMatch 24.14 PRT {^(.+?)(?:/(.+?))(?:/(.+?)\3)?$} {foo/bar/baz/quux} {foo/bar/baz/quux} {foo} {bar/baz/quux} {} +select * from test_regex('^(.+?)(?:/(.+?))(?:/(.+?)\3)?$', 'foo/bar/baz/quux', 'PRT'); -- doing 25 "mixed quantifiers" -- # this is very incomplete as yet diff --git a/src/test/modules/test_regex/sql/test_regex_utf8.sql b/src/test/modules/test_regex/sql/test_regex_utf8.sql index cfd9396194..f23907162e 100644 --- a/src/test/modules/test_regex/sql/test_regex_utf8.sql +++ b/src/test/modules/test_regex/sql/test_regex_utf8.sql @@ -58,3 +58,21 @@ select * from test_regex('[[:alnum:]]*[[:upper:]]*[\u1000-\u2000]*\u1237', E'\u1500\u1237', 'ELMP'); select * from test_regex('[[:alnum:]]*[[:upper:]]*[\u1000-\u2000]*\u1237', E'A\u1239', 'ELMP'); +select * from test_regex('[[:alnum:]]*[[:upper:]]*[\u1000-\u2000]*\u1237', + E'\u1500\u1237', 'iELMP'); + +-- systematically test char classes +select * from test_regex('[[:alnum:]]+', E'x\u1500\u1237', 'L'); +select * from test_regex('[[:alpha:]]+', E'x\u1500\u1237', 'L'); +select * from test_regex('[[:ascii:]]+', E'x\u1500\u1237', 'L'); +select * from test_regex('[[:blank:]]+', E'x \t\u1500\u1237', 'L'); +select * from test_regex('[[:cntrl:]]+', E'x\u1500\u1237', 'L'); +select * from test_regex('[[:digit:]]+', E'x9\u1500\u1237', 'L'); +select * from test_regex('[[:graph:]]+', E'x\u1500\u1237', 'L'); +select * from test_regex('[[:lower:]]+', E'x\u1500\u1237', 'L'); +select * from test_regex('[[:print:]]+', E'x\u1500\u1237', 'L'); +select * from test_regex('[[:punct:]]+', E'x.\u1500\u1237', 'L'); +select * from test_regex('[[:space:]]+', E'x \t\u1500\u1237', 'L'); +select * from test_regex('[[:upper:]]+', E'xX\u1500\u1237', 'L'); +select * from test_regex('[[:xdigit:]]+', E'xa9\u1500\u1237', 'L'); +select * from test_regex('[[:word:]]+', E'x_\u1500\u1237', 'L');
pgsql-hackers by date: