summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/regex/regexec.c48
-rw-r--r--src/test/modules/test_regex/expected/test_regex.out14
-rw-r--r--src/test/modules/test_regex/sql/test_regex.sql4
-rw-r--r--src/test/regress/expected/regex.out13
-rw-r--r--src/test/regress/sql/regex.sql4
5 files changed, 71 insertions, 12 deletions
diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c
index aa5ba85514..2411e6561d 100644
--- a/src/backend/regex/regexec.c
+++ b/src/backend/regex/regexec.c
@@ -216,14 +216,14 @@ pg_regexec(regex_t *re,
if (backref && nmatch <= v->g->nsub)
{
/* need larger work area */
- if (v->g->nsub + 1 <= LOCALMAT)
+ v->nmatch = v->g->nsub + 1;
+ if (v->nmatch <= LOCALMAT)
v->pmatch = mat;
else
- v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) *
- sizeof(regmatch_t));
+ v->pmatch = (regmatch_t *) MALLOC(v->nmatch * sizeof(regmatch_t));
if (v->pmatch == NULL)
return REG_ESPACE;
- v->nmatch = v->g->nsub + 1;
+ zapallsubs(v->pmatch, v->nmatch);
}
else
{
@@ -488,7 +488,6 @@ find(struct vars *v,
return REG_OKAY;
/* find submatches */
- zapallsubs(v->pmatch, v->nmatch);
return cdissect(v, v->g->tree, begin, end);
}
@@ -599,7 +598,6 @@ cfindloop(struct vars *v,
break; /* no match with this begin point, try next */
MDEBUG(("tentative end %ld\n", LOFF(end)));
/* Dissect the potential match to see if it really matches */
- zapallsubs(v->pmatch, v->nmatch);
er = cdissect(v, v->g->tree, begin, end);
if (er == REG_OKAY)
{
@@ -647,6 +645,8 @@ cfindloop(struct vars *v,
/*
* zapallsubs - initialize all subexpression matches to "no match"
+ *
+ * Note that p[0], the overall-match location, is not touched.
*/
static void
zapallsubs(regmatch_t *p,
@@ -716,8 +716,30 @@ subset(struct vars *v,
* DFA and found that the proposed substring satisfies the DFA. (We make
* the caller do that because in concatenation and iteration nodes, it's
* much faster to check all the substrings against the child DFAs before we
- * recurse.) Also, caller must have cleared subexpression match data via
- * zaptreesubs (or zapallsubs at the top level).
+ * recurse.)
+ *
+ * A side-effect of a successful match is to save match locations for
+ * capturing subexpressions in v->pmatch[]. This is a little bit tricky,
+ * so we make the following rules:
+ * 1. Before initial entry to cdissect, all match data must have been
+ * cleared (this is seen to by zapallsubs).
+ * 2. Before any recursive entry to cdissect, the match data for that
+ * subexpression tree must be guaranteed clear (see zaptreesubs).
+ * 3. When returning REG_OKAY, each level of cdissect will have saved
+ * any relevant match locations.
+ * 4. When returning REG_NOMATCH, each level of cdissect will guarantee
+ * that its subexpression match locations are again clear.
+ * 5. No guarantees are made for error cases (i.e., other result codes).
+ * 6. When a level of cdissect abandons a successful sub-match, it will
+ * clear that subtree's match locations with zaptreesubs before trying
+ * any new DFA match or cdissect call for that subtree or any subtree
+ * to its right (that is, any subtree that could have a backref into the
+ * abandoned match).
+ * This may seem overly complicated, but it's difficult to simplify it
+ * because of the provision that match locations must be reset before
+ * any fresh DFA match (a rule that is needed to make dfa_backref safe).
+ * That means it won't work to just reset relevant match locations at the
+ * start of each cdissect level.
*/
static int /* regexec return code */
cdissect(struct vars *v,
@@ -841,6 +863,8 @@ ccondissect(struct vars *v,
MDEBUG(("%d: successful\n", t->id));
return REG_OKAY;
}
+ /* Reset left's matches (right should have done so itself) */
+ zaptreesubs(v, left);
}
if (er != REG_NOMATCH)
return er;
@@ -863,8 +887,6 @@ ccondissect(struct vars *v,
return REG_NOMATCH;
}
MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
- zaptreesubs(v, left);
- zaptreesubs(v, right);
}
/* can't get here */
@@ -922,6 +944,8 @@ crevcondissect(struct vars *v,
MDEBUG(("%d: successful\n", t->id));
return REG_OKAY;
}
+ /* Reset left's matches (right should have done so itself) */
+ zaptreesubs(v, left);
}
if (er != REG_NOMATCH)
return er;
@@ -944,8 +968,6 @@ crevcondissect(struct vars *v,
return REG_NOMATCH;
}
MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
- zaptreesubs(v, left);
- zaptreesubs(v, right);
}
/* can't get here */
@@ -1214,6 +1236,7 @@ citerdissect(struct vars *v,
for (i = nverified + 1; i <= k; i++)
{
+ /* zap any match data from a non-last iteration */
zaptreesubs(v, t->child);
er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
if (er == REG_OKAY)
@@ -1426,6 +1449,7 @@ creviterdissect(struct vars *v,
for (i = nverified + 1; i <= k; i++)
{
+ /* zap any match data from a non-last iteration */
zaptreesubs(v, t->child);
er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
if (er == REG_OKAY)
diff --git a/src/test/modules/test_regex/expected/test_regex.out b/src/test/modules/test_regex/expected/test_regex.out
index 6242d0baa9..4886858d66 100644
--- a/src/test/modules/test_regex/expected/test_regex.out
+++ b/src/test/modules/test_regex/expected/test_regex.out
@@ -2664,6 +2664,20 @@ select * from test_regex('^(.+)( \1)+$', 'abc abc abd', 'RP');
{2,REG_UBACKREF,REG_UNONPOSIX}
(1 row)
+-- expectNomatch 14.30 RP {^(.)\1|\1.} {abcdef}
+select * from test_regex('^(.)\1|\1.', 'abcdef', 'RP');
+ test_regex
+--------------------------------
+ {1,REG_UBACKREF,REG_UNONPOSIX}
+(1 row)
+
+-- expectNomatch 14.31 RP {^((.)\2|..)\2} {abadef}
+select * from test_regex('^((.)\2|..)\2', 'abadef', 'RP');
+ test_regex
+--------------------------------
+ {2,REG_UBACKREF,REG_UNONPOSIX}
+(1 row)
+
-- back reference only matches the string, not any constraints
select * from test_regex('(^\w+).*\1', 'abc abc abc', 'LRP');
test_regex
diff --git a/src/test/modules/test_regex/sql/test_regex.sql b/src/test/modules/test_regex/sql/test_regex.sql
index 389b8b61b3..418527da3d 100644
--- a/src/test/modules/test_regex/sql/test_regex.sql
+++ b/src/test/modules/test_regex/sql/test_regex.sql
@@ -775,6 +775,10 @@ select * from test_regex('^(.+)( \1)+$', 'abc abc abc', 'RP');
select * from test_regex('^(.+)( \1)+$', 'abc abd abc', 'RP');
-- expectNomatch 14.29 RP {^(.+)( \1)+$} {abc abc abd}
select * from test_regex('^(.+)( \1)+$', 'abc abc abd', 'RP');
+-- expectNomatch 14.30 RP {^(.)\1|\1.} {abcdef}
+select * from test_regex('^(.)\1|\1.', 'abcdef', 'RP');
+-- expectNomatch 14.31 RP {^((.)\2|..)\2} {abadef}
+select * from test_regex('^((.)\2|..)\2', 'abadef', 'RP');
-- back reference only matches the string, not any constraints
select * from test_regex('(^\w+).*\1', 'abc abc abc', 'LRP');
diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out
index 86477cc506..cbe2cfc3ea 100644
--- a/src/test/regress/expected/regex.out
+++ b/src/test/regress/expected/regex.out
@@ -567,6 +567,19 @@ select 'a' ~ '()+\1';
t
(1 row)
+-- Test ancient oversight in when to apply zaptreesubs
+select 'abcdef' ~ '^(.)\1|\1.' as f;
+ f
+---
+ f
+(1 row)
+
+select 'abadef' ~ '^((.)\2|..)\2' as f;
+ f
+---
+ f
+(1 row)
+
-- Add coverage for some cases in checkmatchall
select regexp_match('xy', '.|...');
regexp_match
diff --git a/src/test/regress/sql/regex.sql b/src/test/regress/sql/regex.sql
index b03a8d9ac2..c6974a43d1 100644
--- a/src/test/regress/sql/regex.sql
+++ b/src/test/regress/sql/regex.sql
@@ -135,6 +135,10 @@ select 'a' ~ '.. ()|\1';
select 'a' ~ '()*\1';
select 'a' ~ '()+\1';
+-- Test ancient oversight in when to apply zaptreesubs
+select 'abcdef' ~ '^(.)\1|\1.' as f;
+select 'abadef' ~ '^((.)\2|..)\2' as f;
+
-- Add coverage for some cases in checkmatchall
select regexp_match('xy', '.|...');
select regexp_match('xyz', '.|...');