diff options
-rw-r--r-- | src/backend/regex/regexec.c | 48 | ||||
-rw-r--r-- | src/test/modules/test_regex/expected/test_regex.out | 14 | ||||
-rw-r--r-- | src/test/modules/test_regex/sql/test_regex.sql | 4 | ||||
-rw-r--r-- | src/test/regress/expected/regex.out | 13 | ||||
-rw-r--r-- | src/test/regress/sql/regex.sql | 4 |
5 files changed, 71 insertions, 12 deletions
diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c index aa5ba85514..2411e6561d 100644 --- a/src/backend/regex/regexec.c +++ b/src/backend/regex/regexec.c @@ -216,14 +216,14 @@ pg_regexec(regex_t *re, if (backref && nmatch <= v->g->nsub) { /* need larger work area */ - if (v->g->nsub + 1 <= LOCALMAT) + v->nmatch = v->g->nsub + 1; + if (v->nmatch <= LOCALMAT) v->pmatch = mat; else - v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) * - sizeof(regmatch_t)); + v->pmatch = (regmatch_t *) MALLOC(v->nmatch * sizeof(regmatch_t)); if (v->pmatch == NULL) return REG_ESPACE; - v->nmatch = v->g->nsub + 1; + zapallsubs(v->pmatch, v->nmatch); } else { @@ -488,7 +488,6 @@ find(struct vars *v, return REG_OKAY; /* find submatches */ - zapallsubs(v->pmatch, v->nmatch); return cdissect(v, v->g->tree, begin, end); } @@ -599,7 +598,6 @@ cfindloop(struct vars *v, break; /* no match with this begin point, try next */ MDEBUG(("tentative end %ld\n", LOFF(end))); /* Dissect the potential match to see if it really matches */ - zapallsubs(v->pmatch, v->nmatch); er = cdissect(v, v->g->tree, begin, end); if (er == REG_OKAY) { @@ -647,6 +645,8 @@ cfindloop(struct vars *v, /* * zapallsubs - initialize all subexpression matches to "no match" + * + * Note that p[0], the overall-match location, is not touched. */ static void zapallsubs(regmatch_t *p, @@ -716,8 +716,30 @@ subset(struct vars *v, * DFA and found that the proposed substring satisfies the DFA. (We make * the caller do that because in concatenation and iteration nodes, it's * much faster to check all the substrings against the child DFAs before we - * recurse.) Also, caller must have cleared subexpression match data via - * zaptreesubs (or zapallsubs at the top level). + * recurse.) + * + * A side-effect of a successful match is to save match locations for + * capturing subexpressions in v->pmatch[]. This is a little bit tricky, + * so we make the following rules: + * 1. Before initial entry to cdissect, all match data must have been + * cleared (this is seen to by zapallsubs). + * 2. Before any recursive entry to cdissect, the match data for that + * subexpression tree must be guaranteed clear (see zaptreesubs). + * 3. When returning REG_OKAY, each level of cdissect will have saved + * any relevant match locations. + * 4. When returning REG_NOMATCH, each level of cdissect will guarantee + * that its subexpression match locations are again clear. + * 5. No guarantees are made for error cases (i.e., other result codes). + * 6. When a level of cdissect abandons a successful sub-match, it will + * clear that subtree's match locations with zaptreesubs before trying + * any new DFA match or cdissect call for that subtree or any subtree + * to its right (that is, any subtree that could have a backref into the + * abandoned match). + * This may seem overly complicated, but it's difficult to simplify it + * because of the provision that match locations must be reset before + * any fresh DFA match (a rule that is needed to make dfa_backref safe). + * That means it won't work to just reset relevant match locations at the + * start of each cdissect level. */ static int /* regexec return code */ cdissect(struct vars *v, @@ -841,6 +863,8 @@ ccondissect(struct vars *v, MDEBUG(("%d: successful\n", t->id)); return REG_OKAY; } + /* Reset left's matches (right should have done so itself) */ + zaptreesubs(v, left); } if (er != REG_NOMATCH) return er; @@ -863,8 +887,6 @@ ccondissect(struct vars *v, return REG_NOMATCH; } MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid))); - zaptreesubs(v, left); - zaptreesubs(v, right); } /* can't get here */ @@ -922,6 +944,8 @@ crevcondissect(struct vars *v, MDEBUG(("%d: successful\n", t->id)); return REG_OKAY; } + /* Reset left's matches (right should have done so itself) */ + zaptreesubs(v, left); } if (er != REG_NOMATCH) return er; @@ -944,8 +968,6 @@ crevcondissect(struct vars *v, return REG_NOMATCH; } MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid))); - zaptreesubs(v, left); - zaptreesubs(v, right); } /* can't get here */ @@ -1214,6 +1236,7 @@ citerdissect(struct vars *v, for (i = nverified + 1; i <= k; i++) { + /* zap any match data from a non-last iteration */ zaptreesubs(v, t->child); er = cdissect(v, t->child, endpts[i - 1], endpts[i]); if (er == REG_OKAY) @@ -1426,6 +1449,7 @@ creviterdissect(struct vars *v, for (i = nverified + 1; i <= k; i++) { + /* zap any match data from a non-last iteration */ zaptreesubs(v, t->child); er = cdissect(v, t->child, endpts[i - 1], endpts[i]); if (er == REG_OKAY) diff --git a/src/test/modules/test_regex/expected/test_regex.out b/src/test/modules/test_regex/expected/test_regex.out index 6242d0baa9..4886858d66 100644 --- a/src/test/modules/test_regex/expected/test_regex.out +++ b/src/test/modules/test_regex/expected/test_regex.out @@ -2664,6 +2664,20 @@ select * from test_regex('^(.+)( \1)+$', 'abc abc abd', 'RP'); {2,REG_UBACKREF,REG_UNONPOSIX} (1 row) +-- expectNomatch 14.30 RP {^(.)\1|\1.} {abcdef} +select * from test_regex('^(.)\1|\1.', 'abcdef', 'RP'); + test_regex +-------------------------------- + {1,REG_UBACKREF,REG_UNONPOSIX} +(1 row) + +-- expectNomatch 14.31 RP {^((.)\2|..)\2} {abadef} +select * from test_regex('^((.)\2|..)\2', 'abadef', 'RP'); + test_regex +-------------------------------- + {2,REG_UBACKREF,REG_UNONPOSIX} +(1 row) + -- back reference only matches the string, not any constraints select * from test_regex('(^\w+).*\1', 'abc abc abc', 'LRP'); test_regex diff --git a/src/test/modules/test_regex/sql/test_regex.sql b/src/test/modules/test_regex/sql/test_regex.sql index 389b8b61b3..418527da3d 100644 --- a/src/test/modules/test_regex/sql/test_regex.sql +++ b/src/test/modules/test_regex/sql/test_regex.sql @@ -775,6 +775,10 @@ select * from test_regex('^(.+)( \1)+$', 'abc abc abc', 'RP'); select * from test_regex('^(.+)( \1)+$', 'abc abd abc', 'RP'); -- expectNomatch 14.29 RP {^(.+)( \1)+$} {abc abc abd} select * from test_regex('^(.+)( \1)+$', 'abc abc abd', 'RP'); +-- expectNomatch 14.30 RP {^(.)\1|\1.} {abcdef} +select * from test_regex('^(.)\1|\1.', 'abcdef', 'RP'); +-- expectNomatch 14.31 RP {^((.)\2|..)\2} {abadef} +select * from test_regex('^((.)\2|..)\2', 'abadef', 'RP'); -- back reference only matches the string, not any constraints select * from test_regex('(^\w+).*\1', 'abc abc abc', 'LRP'); diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out index 86477cc506..cbe2cfc3ea 100644 --- a/src/test/regress/expected/regex.out +++ b/src/test/regress/expected/regex.out @@ -567,6 +567,19 @@ select 'a' ~ '()+\1'; t (1 row) +-- Test ancient oversight in when to apply zaptreesubs +select 'abcdef' ~ '^(.)\1|\1.' as f; + f +--- + f +(1 row) + +select 'abadef' ~ '^((.)\2|..)\2' as f; + f +--- + f +(1 row) + -- Add coverage for some cases in checkmatchall select regexp_match('xy', '.|...'); regexp_match diff --git a/src/test/regress/sql/regex.sql b/src/test/regress/sql/regex.sql index b03a8d9ac2..c6974a43d1 100644 --- a/src/test/regress/sql/regex.sql +++ b/src/test/regress/sql/regex.sql @@ -135,6 +135,10 @@ select 'a' ~ '.. ()|\1'; select 'a' ~ '()*\1'; select 'a' ~ '()+\1'; +-- Test ancient oversight in when to apply zaptreesubs +select 'abcdef' ~ '^(.)\1|\1.' as f; +select 'abadef' ~ '^((.)\2|..)\2' as f; + -- Add coverage for some cases in checkmatchall select regexp_match('xy', '.|...'); select regexp_match('xyz', '.|...'); |