summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane2015-10-16 19:52:12 +0000
committerTom Lane2015-10-16 19:52:12 +0000
commit4083a52f4173b87a883537a1e2957413c2aebd9e (patch)
treebff7995b763c91c1fc599d754cc01c4c1648e4c1
parentb94c2b6a69a561906d387cec1de59ac3bb4bb185 (diff)
Miscellaneous cleanup of regular-expression compiler.
Revert our previous addition of "all" flags to copyins() and copyouts(); they're no longer needed, and were never anything but an unsightly hack. Improve a couple of infelicities in the REG_DEBUG code for dumping the NFA data structure, including adding code to count the total number of states and arcs. Add a couple of missed error checks. Add some more documentation in the README file, and some regression tests illustrating cases that exceeded the state-count limit and/or took unreasonable amounts of time before this set of patches. Back-patch to all supported branches.
-rw-r--r--src/backend/regex/regc_nfa.c55
-rw-r--r--src/backend/regex/regcomp.c11
2 files changed, 23 insertions, 43 deletions
diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c
index fe8a4a12d17..6f04321cd35 100644
--- a/src/backend/regex/regc_nfa.c
+++ b/src/backend/regex/regc_nfa.c
@@ -823,14 +823,11 @@ moveins(struct nfa * nfa,
/*
* copyins - copy in arcs of a state to another state
- *
- * Either all arcs, or only non-empty ones as determined by all value.
*/
static void
copyins(struct nfa * nfa,
struct state * oldState,
- struct state * newState,
- int all)
+ struct state * newState)
{
assert(oldState != newState);
@@ -840,8 +837,7 @@ copyins(struct nfa * nfa,
struct arc *a;
for (a = oldState->ins; a != NULL; a = a->inchain)
- if (all || a->type != EMPTY)
- cparc(nfa, a, a->from, newState);
+ cparc(nfa, a, a->from, newState);
}
else
{
@@ -873,12 +869,6 @@ copyins(struct nfa * nfa,
{
struct arc *a = oa;
- if (!all && a->type == EMPTY)
- {
- oa = oa->inchain;
- continue;
- }
-
switch (sortins_cmp(&oa, &na))
{
case -1:
@@ -904,12 +894,6 @@ copyins(struct nfa * nfa,
/* newState does not have anything matching oa */
struct arc *a = oa;
- if (!all && a->type == EMPTY)
- {
- oa = oa->inchain;
- continue;
- }
-
oa = oa->inchain;
createarc(nfa, a->type, a->co, a->from, newState);
}
@@ -1107,14 +1091,11 @@ moveouts(struct nfa * nfa,
/*
* copyouts - copy out arcs of a state to another state
- *
- * Either all arcs, or only non-empty ones as determined by all value.
*/
static void
copyouts(struct nfa * nfa,
struct state * oldState,
- struct state * newState,
- int all)
+ struct state * newState)
{
assert(oldState != newState);
@@ -1124,8 +1105,7 @@ copyouts(struct nfa * nfa,
struct arc *a;
for (a = oldState->outs; a != NULL; a = a->outchain)
- if (all || a->type != EMPTY)
- cparc(nfa, a, newState, a->to);
+ cparc(nfa, a, newState, a->to);
}
else
{
@@ -1157,12 +1137,6 @@ copyouts(struct nfa * nfa,
{
struct arc *a = oa;
- if (!all && a->type == EMPTY)
- {
- oa = oa->outchain;
- continue;
- }
-
switch (sortouts_cmp(&oa, &na))
{
case -1:
@@ -1188,12 +1162,6 @@ copyouts(struct nfa * nfa,
/* newState does not have anything matching oa */
struct arc *a = oa;
- if (!all && a->type == EMPTY)
- {
- oa = oa->outchain;
- continue;
- }
-
oa = oa->outchain;
createarc(nfa, a->type, a->co, newState, a->to);
}
@@ -1452,6 +1420,10 @@ optimize(struct nfa * nfa,
fprintf(f, "\nfinal cleanup:\n");
#endif
cleanup(nfa); /* final tidying */
+#ifdef REG_DEBUG
+ if (verbose)
+ dumpnfa(nfa, f);
+#endif
return analyze(nfa); /* and analysis */
}
@@ -1568,7 +1540,7 @@ pull(struct nfa * nfa,
s = newstate(nfa);
if (NISERR())
return 0;
- copyins(nfa, from, s, 1); /* duplicate inarcs */
+ copyins(nfa, from, s); /* duplicate inarcs */
cparc(nfa, con, s, to); /* move constraint arc */
freearc(nfa, con);
if (NISERR())
@@ -1735,7 +1707,7 @@ push(struct nfa * nfa,
s = newstate(nfa);
if (NISERR())
return 0;
- copyouts(nfa, to, s, 1); /* duplicate outarcs */
+ copyouts(nfa, to, s); /* duplicate outarcs */
cparc(nfa, con, from, s); /* move constraint arc */
freearc(nfa, con);
if (NISERR())
@@ -2952,6 +2924,8 @@ dumpnfa(struct nfa * nfa,
{
#ifdef REG_DEBUG
struct state *s;
+ int nstates = 0;
+ int narcs = 0;
fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
if (nfa->bos[0] != COLORLESS)
@@ -2964,7 +2938,12 @@ dumpnfa(struct nfa * nfa,
fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
fprintf(f, "\n");
for (s = nfa->states; s != NULL; s = s->next)
+ {
dumpstate(s, f);
+ nstates++;
+ narcs += s->nouts;
+ }
+ fprintf(f, "total of %d states, %d arcs\n", nstates, narcs);
if (nfa->parent == NULL)
dumpcolors(nfa->cm, f);
fflush(f);
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index 9e04a4f298a..f4eebf84d7b 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -136,10 +136,10 @@ static int sortins_cmp(const void *, const void *);
static void sortouts(struct nfa *, struct state *);
static int sortouts_cmp(const void *, const void *);
static void moveins(struct nfa *, struct state *, struct state *);
-static void copyins(struct nfa *, struct state *, struct state *, int);
+static void copyins(struct nfa *, struct state *, struct state *);
static void mergeins(struct nfa *, struct state *, struct arc **, int);
static void moveouts(struct nfa *, struct state *, struct state *);
-static void copyouts(struct nfa *, struct state *, struct state *, int);
+static void copyouts(struct nfa *, struct state *, struct state *);
static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int);
static void delsub(struct nfa *, struct state *, struct state *);
static void deltraverse(struct nfa *, struct state *, struct state *);
@@ -181,7 +181,6 @@ static void dumpnfa(struct nfa *, FILE *);
#ifdef REG_DEBUG
static void dumpstate(struct state *, FILE *);
static void dumparcs(struct state *, FILE *);
-static int dumprarcs(struct arc *, struct state *, FILE *, int);
static void dumparc(struct arc *, struct state *, FILE *);
static void dumpcnfa(struct cnfa *, FILE *);
static void dumpcstate(int, struct cnfa *, FILE *);
@@ -613,7 +612,9 @@ makesearch(struct vars * v,
for (s = slist; s != NULL; s = s2)
{
s2 = newstate(nfa);
- copyouts(nfa, s, s2, 1);
+ NOERR();
+ copyouts(nfa, s, s2);
+ NOERR();
for (a = s->ins; a != NULL; a = b)
{
b = a->inchain;
@@ -1997,7 +1998,7 @@ dump(regex_t *re,
dumpcolors(&g->cmap, f);
if (!NULLCNFA(g->search))
{
- printf("\nsearch:\n");
+ fprintf(f, "\nsearch:\n");
dumpcnfa(&g->search, f);
}
for (i = 1; i < g->nlacons; i++)