summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane2008-02-14 17:33:37 +0000
committerTom Lane2008-02-14 17:33:37 +0000
commita9ff630eab29a76ac57e59db1b552cb01b4e3a84 (patch)
tree02c901ae640960cc629ece426b28b54d29889abc
parent818b1d5eccfd8b8559c619d7932a8a99960177e8 (diff)
Sync our regex code with upstream changes since last time we did this, which
was Tcl 8.4.8. The main changes are to remove the never-fully-implemented code for multi-character collating elements, and to const-ify some stuff a bit more fully. In combination with the recent security patch, this commit brings us into line with Tcl 8.5.0. Note that I didn't make any effort to duplicate a lot of cosmetic changes that they made to bring their copy into line with their own style guidelines, such as adding braces around single-line IF bodies. Most of those we either had done already (such as ANSI-fication of function headers) or there is no point because pgindent would undo the change anyway.
-rw-r--r--src/backend/regex/regc_color.c33
-rw-r--r--src/backend/regex/regc_cvec.c91
-rw-r--r--src/backend/regex/regc_lex.c30
-rw-r--r--src/backend/regex/regc_locale.c88
-rw-r--r--src/backend/regex/regc_nfa.c67
-rw-r--r--src/backend/regex/regcomp.c358
-rw-r--r--src/backend/regex/regerror.c6
-rw-r--r--src/include/regex/regcustom.h4
-rw-r--r--src/include/regex/regguts.h16
9 files changed, 156 insertions, 537 deletions
diff --git a/src/backend/regex/regc_color.c b/src/backend/regex/regc_color.c
index a5293c1a65..525c7158ea 100644
--- a/src/backend/regex/regc_color.c
+++ b/src/backend/regex/regc_color.c
@@ -222,7 +222,6 @@ static color /* COLORLESS for error */
newcolor(struct colormap * cm)
{
struct colordesc *cd;
- struct colordesc *new;
size_t n;
if (CISERR())
@@ -245,24 +244,25 @@ newcolor(struct colormap * cm)
else
{
/* oops, must allocate more */
+ struct colordesc *newCd;
+
n = cm->ncds * 2;
if (cm->cd == cm->cdspace)
{
- new = (struct colordesc *) MALLOC(n *
- sizeof(struct colordesc));
- if (new != NULL)
- memcpy(VS(new), VS(cm->cdspace), cm->ncds *
+ newCd = (struct colordesc *) MALLOC(n * sizeof(struct colordesc));
+ if (newCd != NULL)
+ memcpy(VS(newCd), VS(cm->cdspace), cm->ncds *
sizeof(struct colordesc));
}
else
- new = (struct colordesc *) REALLOC(cm->cd,
- n * sizeof(struct colordesc));
- if (new == NULL)
+ newCd = (struct colordesc *)
+ REALLOC(cm->cd, n * sizeof(struct colordesc));
+ if (newCd == NULL)
{
CERR(REG_ESPACE);
return COLORLESS;
}
- cm->cd = new;
+ cm->cd = newCd;
cm->ncds = n;
assert(cm->max < cm->ncds - 1);
cm->max++;
@@ -635,21 +635,6 @@ uncolorchain(struct colormap * cm,
}
/*
- * singleton - is this character in its own color?
- */
-static int /* predicate */
-singleton(struct colormap * cm,
- chr c)
-{
- color co; /* color of c */
-
- co = GETCOLOR(cm, c);
- if (cm->cd[co].nchrs == 1 && cm->cd[co].sub == NOSUB)
- return 1;
- return 0;
-}
-
-/*
* rainbow - add arcs of all full colors (but one) between specified states
*/
static void
diff --git a/src/backend/regex/regc_cvec.c b/src/backend/regex/regc_cvec.c
index 6f2c53c5ac..0bb0554079 100644
--- a/src/backend/regex/regc_cvec.c
+++ b/src/backend/regex/regc_cvec.c
@@ -33,28 +33,26 @@
*/
/*
+ * Notes:
+ * Only (selected) functions in _this_ file should treat chr* as non-constant.
+ */
+
+/*
* newcvec - allocate a new cvec
*/
static struct cvec *
newcvec(int nchrs, /* to hold this many chrs... */
- int nranges, /* ... and this many ranges... */
- int nmcces) /* ... and this many MCCEs */
+ int nranges) /* ... and this many ranges */
{
- size_t n;
- size_t nc;
- struct cvec *cv;
+ size_t nc = (size_t) nchrs + (size_t) nranges * 2;
+ size_t n = sizeof(struct cvec) + nc * sizeof(chr);
+ struct cvec *cv = (struct cvec *) MALLOC(n);
- nc = (size_t) nchrs + (size_t) nmcces *(MAXMCCE + 1) + (size_t) nranges *2;
-
- n = sizeof(struct cvec) + (size_t) (nmcces - 1) * sizeof(chr *)
- + nc * sizeof(chr);
- cv = (struct cvec *) MALLOC(n);
if (cv == NULL)
return NULL;
cv->chrspace = nchrs;
- cv->chrs = (chr *) &cv->mcces[nmcces]; /* chrs just after MCCE ptrs */
- cv->mccespace = nmcces;
- cv->ranges = cv->chrs + nchrs + nmcces * (MAXMCCE + 1);
+ cv->chrs = (chr *) (((char *) cv) + sizeof(struct cvec));
+ cv->ranges = cv->chrs + nchrs;
cv->rangespace = nranges;
return clearcvec(cv);
}
@@ -66,17 +64,9 @@ newcvec(int nchrs, /* to hold this many chrs... */
static struct cvec *
clearcvec(struct cvec * cv)
{
- int i;
-
assert(cv != NULL);
cv->nchrs = 0;
- assert(cv->chrs == (chr *) &cv->mcces[cv->mccespace]);
- cv->nmcces = 0;
- cv->nmccechrs = 0;
cv->nranges = 0;
- for (i = 0; i < cv->mccespace; i++)
- cv->mcces[i] = NULL;
-
return cv;
}
@@ -87,7 +77,6 @@ static void
addchr(struct cvec * cv, /* character vector */
chr c) /* character to add */
{
- assert(cv->nchrs < cv->chrspace - cv->nmccechrs);
cv->chrs[cv->nchrs++] = (chr) c;
}
@@ -106,72 +95,20 @@ addrange(struct cvec * cv, /* character vector */
}
/*
- * addmcce - add an MCCE to a cvec
- */
-static void
-addmcce(struct cvec * cv, /* character vector */
- chr *startp, /* beginning of text */
- chr *endp) /* just past end of text */
-{
- int len;
- int i;
- chr *s;
- chr *d;
-
- if (startp == NULL && endp == NULL)
- return;
- len = endp - startp;
- assert(len > 0);
- assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs);
- assert(cv->nmcces < cv->mccespace);
- d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1];
- cv->mcces[cv->nmcces++] = d;
- for (s = startp, i = len; i > 0; s++, i--)
- *d++ = *s;
- *d++ = 0; /* endmarker */
- assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]);
- cv->nmccechrs += len + 1;
-}
-
-/*
- * haschr - does a cvec contain this chr?
- */
-static int /* predicate */
-haschr(struct cvec * cv, /* character vector */
- chr c) /* character to test for */
-{
- int i;
- chr *p;
-
- for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
- {
- if (*p == c)
- return 1;
- }
- for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--)
- {
- if ((*p <= c) && (c <= *(p + 1)))
- return 1;
- }
- return 0;
-}
-
-/*
* getcvec - get a cvec, remembering it as v->cv
*/
static struct cvec *
getcvec(struct vars * v, /* context */
int nchrs, /* to hold this many chrs... */
- int nranges, /* ... and this many ranges... */
- int nmcces) /* ... and this many MCCEs */
+ int nranges) /* ... and this many ranges */
{
if (v->cv != NULL && nchrs <= v->cv->chrspace &&
- nranges <= v->cv->rangespace && nmcces <= v->cv->mccespace)
+ nranges <= v->cv->rangespace)
return clearcvec(v->cv);
if (v->cv != NULL)
freecvec(v->cv);
- v->cv = newcvec(nchrs, nranges, nmcces);
+ v->cv = newcvec(nchrs, nranges);
if (v->cv == NULL)
ERR(REG_ESPACE);
diff --git a/src/backend/regex/regc_lex.c b/src/backend/regex/regc_lex.c
index fed8301cc3..64eca253a2 100644
--- a/src/backend/regex/regc_lex.c
+++ b/src/backend/regex/regc_lex.c
@@ -201,8 +201,8 @@ prefixes(struct vars * v)
*/
static void
lexnest(struct vars * v,
- chr *beginp, /* start of interpolation */
- chr *endp) /* one past end of interpolation */
+ const chr *beginp, /* start of interpolation */
+ const chr *endp) /* one past end of interpolation */
{
assert(v->savenow == NULL); /* only one level of nesting */
v->savenow = v->now;
@@ -214,47 +214,47 @@ lexnest(struct vars * v,
/*
* string constants to interpolate as expansions of things like \d
*/
-static chr backd[] = { /* \d */
+static const chr backd[] = { /* \d */
CHR('['), CHR('['), CHR(':'),
CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
CHR(':'), CHR(']'), CHR(']')
};
-static chr backD[] = { /* \D */
+static const chr backD[] = { /* \D */
CHR('['), CHR('^'), CHR('['), CHR(':'),
CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
CHR(':'), CHR(']'), CHR(']')
};
-static chr brbackd[] = { /* \d within brackets */
+static const chr brbackd[] = { /* \d within brackets */
CHR('['), CHR(':'),
CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
CHR(':'), CHR(']')
};
-static chr backs[] = { /* \s */
+static const chr backs[] = { /* \s */
CHR('['), CHR('['), CHR(':'),
CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
CHR(':'), CHR(']'), CHR(']')
};
-static chr backS[] = { /* \S */
+static const chr backS[] = { /* \S */
CHR('['), CHR('^'), CHR('['), CHR(':'),
CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
CHR(':'), CHR(']'), CHR(']')
};
-static chr brbacks[] = { /* \s within brackets */
+static const chr brbacks[] = { /* \s within brackets */
CHR('['), CHR(':'),
CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
CHR(':'), CHR(']')
};
-static chr backw[] = { /* \w */
+static const chr backw[] = { /* \w */
CHR('['), CHR('['), CHR(':'),
CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
CHR(':'), CHR(']'), CHR('_'), CHR(']')
};
-static chr backW[] = { /* \W */
+static const chr backW[] = { /* \W */
CHR('['), CHR('^'), CHR('['), CHR(':'),
CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
CHR(':'), CHR(']'), CHR('_'), CHR(']')
};
-static chr brbackw[] = { /* \w within brackets */
+static const chr brbackw[] = { /* \w within brackets */
CHR('['), CHR(':'),
CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
CHR(':'), CHR(']'), CHR('_')
@@ -722,7 +722,7 @@ lexescape(struct vars * v)
static chr esc[] = {
CHR('E'), CHR('S'), CHR('C')
};
- chr *save;
+ const chr *save;
assert(v->cflags & REG_ADVF);
@@ -1080,7 +1080,7 @@ brenext(struct vars * v,
static void
skip(struct vars * v)
{
- chr *start = v->now;
+ const chr *start = v->now;
assert(v->cflags & REG_EXPANDED);
@@ -1119,8 +1119,8 @@ newline(void)
*/
static chr
chrnamed(struct vars * v,
- chr *startp, /* start of name */
- chr *endp, /* just past end of name */
+ const chr *startp, /* start of name */
+ const chr *endp, /* just past end of name */
chr lastresort) /* what to return if name lookup fails */
{
celt c;
diff --git a/src/backend/regex/regc_locale.c b/src/backend/regex/regc_locale.c
index c18236d3c6..d832a53700 100644
--- a/src/backend/regex/regc_locale.c
+++ b/src/backend/regex/regc_locale.c
@@ -52,10 +52,10 @@
/* ASCII character-name table */
-static struct cname
+static const struct cname
{
- char *name;
- char code;
+ const char *name;
+ const char code;
} cnames[] =
{
@@ -424,45 +424,14 @@ pg_wc_tolower(pg_wchar c)
/*
- * nmcces - how many distinct MCCEs are there?
- */
-static int
-nmcces(struct vars * v)
-{
- /*
- * No multi-character collating elements defined at the moment.
- */
- return 0;
-}
-
-/*
- * nleaders - how many chrs can be first chrs of MCCEs?
- */
-static int
-nleaders(struct vars * v)
-{
- return 0;
-}
-
-/*
- * allmcces - return a cvec with all the MCCEs of the locale
- */
-static struct cvec *
-allmcces(struct vars * v, /* context */
- struct cvec * cv) /* this is supposed to have enough room */
-{
- return clearcvec(cv);
-}
-
-/*
* element - map collating-element name to celt
*/
static celt
element(struct vars * v, /* context */
- chr *startp, /* points to start of name */
- chr *endp) /* points just past end of name */
+ const chr *startp, /* points to start of name */
+ const chr *endp) /* points just past end of name */
{
- struct cname *cn;
+ const struct cname *cn;
size_t len;
/* generic: one-chr names stand for themselves */
@@ -513,7 +482,7 @@ range(struct vars * v, /* context */
if (!cases)
{ /* easy version */
- cv = getcvec(v, 0, 1, 0);
+ cv = getcvec(v, 0, 1);
NOERRN();
addrange(cv, a, b);
return cv;
@@ -527,7 +496,7 @@ range(struct vars * v, /* context */
nchrs = (b - a + 1) * 2 + 4;
- cv = getcvec(v, nchrs, 0, 0);
+ cv = getcvec(v, nchrs, 0);
NOERRN();
for (c = a; c <= b; c++)
@@ -550,7 +519,6 @@ range(struct vars * v, /* context */
static int /* predicate */
before(celt x, celt y)
{
- /* trivial because no MCCEs */
if (x < y)
return 1;
return 0;
@@ -571,7 +539,7 @@ eclass(struct vars * v, /* context */
/* crude fake equivalence class for testing */
if ((v->cflags & REG_FAKE) && c == 'x')
{
- cv = getcvec(v, 4, 0, 0);
+ cv = getcvec(v, 4, 0);
addchr(cv, (chr) 'x');
addchr(cv, (chr) 'y');
if (cases)
@@ -585,7 +553,7 @@ eclass(struct vars * v, /* context */
/* otherwise, none */
if (cases)
return allcases(v, c);
- cv = getcvec(v, 1, 0, 0);
+ cv = getcvec(v, 1, 0);
assert(cv != NULL);
addchr(cv, (chr) c);
return cv;
@@ -598,13 +566,13 @@ eclass(struct vars * v, /* context */
*/
static struct cvec *
cclass(struct vars * v, /* context */
- chr *startp, /* where the name starts */
- chr *endp, /* just past the end of the name */
+ const chr *startp, /* where the name starts */
+ const chr *endp, /* just past the end of the name */
int cases) /* case-independent? */
{
size_t len;
struct cvec *cv = NULL;
- char **namePtr;
+ const char **namePtr;
int i,
index;
@@ -612,7 +580,7 @@ cclass(struct vars * v, /* context */
* The following arrays define the valid character class names.
*/
- static char *classNames[] = {
+ static const char *classNames[] = {
"alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
"lower", "print", "punct", "space", "upper", "xdigit", NULL
};
@@ -662,7 +630,7 @@ cclass(struct vars * v, /* context */
switch ((enum classes) index)
{
case CC_PRINT:
- cv = getcvec(v, UCHAR_MAX, 0, 0);
+ cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
@@ -673,7 +641,7 @@ cclass(struct vars * v, /* context */
}
break;
case CC_ALNUM:
- cv = getcvec(v, UCHAR_MAX, 0, 0);
+ cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
@@ -684,7 +652,7 @@ cclass(struct vars * v, /* context */
}
break;
case CC_ALPHA:
- cv = getcvec(v, UCHAR_MAX, 0, 0);
+ cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
@@ -695,27 +663,27 @@ cclass(struct vars * v, /* context */
}
break;
case CC_ASCII:
- cv = getcvec(v, 0, 1, 0);
+ cv = getcvec(v, 0, 1);
if (cv)
addrange(cv, 0, 0x7f);
break;
case CC_BLANK:
- cv = getcvec(v, 2, 0, 0);
+ cv = getcvec(v, 2, 0);
addchr(cv, '\t');
addchr(cv, ' ');
break;
case CC_CNTRL:
- cv = getcvec(v, 0, 2, 0);
+ cv = getcvec(v, 0, 2);
addrange(cv, 0x0, 0x1f);
addrange(cv, 0x7f, 0x9f);
break;
case CC_DIGIT:
- cv = getcvec(v, 0, 1, 0);
+ cv = getcvec(v, 0, 1);
if (cv)
addrange(cv, (chr) '0', (chr) '9');
break;
case CC_PUNCT:
- cv = getcvec(v, UCHAR_MAX, 0, 0);
+ cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
@@ -726,7 +694,7 @@ cclass(struct vars * v, /* context */
}
break;
case CC_XDIGIT:
- cv = getcvec(v, 0, 3, 0);
+ cv = getcvec(v, 0, 3);
if (cv)
{
addrange(cv, '0', '9');
@@ -735,7 +703,7 @@ cclass(struct vars * v, /* context */
}
break;
case CC_SPACE:
- cv = getcvec(v, UCHAR_MAX, 0, 0);
+ cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
@@ -746,7 +714,7 @@ cclass(struct vars * v, /* context */
}
break;
case CC_LOWER:
- cv = getcvec(v, UCHAR_MAX, 0, 0);
+ cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
@@ -757,7 +725,7 @@ cclass(struct vars * v, /* context */
}
break;
case CC_UPPER:
- cv = getcvec(v, UCHAR_MAX, 0, 0);
+ cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
@@ -768,7 +736,7 @@ cclass(struct vars * v, /* context */
}
break;
case CC_GRAPH:
- cv = getcvec(v, UCHAR_MAX, 0, 0);
+ cv = getcvec(v, UCHAR_MAX, 0);
if (cv)
{
for (i = 0; i <= UCHAR_MAX; i++)
@@ -802,7 +770,7 @@ allcases(struct vars * v, /* context */
lc = pg_wc_tolower((chr) c);
uc = pg_wc_toupper((chr) c);
- cv = getcvec(v, 2, 0, 0);
+ cv = getcvec(v, 2, 0);
addchr(cv, lc);
if (lc != uc)
addchr(cv, uc);
diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c
index 21798c0642..e76c567988 100644
--- a/src/backend/regex/regc_nfa.c
+++ b/src/backend/regex/regc_nfa.c
@@ -349,8 +349,6 @@ newarc(struct nfa * nfa,
if (COLORED(a) && nfa->parent == NULL)
colorchain(nfa->cm, a);
-
- return;
}
/*
@@ -361,8 +359,6 @@ allocarc(struct nfa * nfa,
struct state * s)
{
struct arc *a;
- struct arcbatch *new;
- int i;
/* shortcut */
if (s->free == NULL && s->noas < ABSIZE)
@@ -375,22 +371,25 @@ allocarc(struct nfa * nfa,
/* if none at hand, get more */
if (s->free == NULL)
{
- new = (struct arcbatch *) MALLOC(sizeof(struct arcbatch));
- if (new == NULL)
+ struct arcbatch *newAb;
+ int i;
+
+ newAb = (struct arcbatch *) MALLOC(sizeof(struct arcbatch));
+ if (newAb == NULL)
{
NERR(REG_ESPACE);
return NULL;
}
- new->next = s->oas.next;
- s->oas.next = new;
+ newAb->next = s->oas.next;
+ s->oas.next = newAb;
for (i = 0; i < ABSIZE; i++)
{
- new->a[i].type = 0;
- new->a[i].freechain = &new->a[i + 1];
+ newAb->a[i].type = 0;
+ newAb->a[i].freechain = &newAb->a[i + 1];
}
- new->a[ABSIZE - 1].freechain = NULL;
- s->free = &new->a[0];
+ newAb->a[ABSIZE - 1].freechain = NULL;
+ s->free = &newAb->a[0];
}
assert(s->free != NULL);
@@ -495,20 +494,20 @@ cparc(struct nfa * nfa,
*/
static void
moveins(struct nfa * nfa,
- struct state * old,
- struct state * new)
+ struct state * oldState,
+ struct state * newState)
{
struct arc *a;
- assert(old != new);
+ assert(oldState != newState);
- while ((a = old->ins) != NULL)
+ while ((a = oldState->ins) != NULL)
{
- cparc(nfa, a, a->from, new);
+ cparc(nfa, a, a->from, newState);
freearc(nfa, a);
}
- assert(old->nins == 0);
- assert(old->ins == NULL);
+ assert(oldState->nins == 0);
+ assert(oldState->ins == NULL);
}
/*
@@ -516,15 +515,15 @@ moveins(struct nfa * nfa,
*/
static void
copyins(struct nfa * nfa,
- struct state * old,
- struct state * new)
+ struct state * oldState,
+ struct state * newState)
{
struct arc *a;
- assert(old != new);
+ assert(oldState != newState);
- for (a = old->ins; a != NULL; a = a->inchain)
- cparc(nfa, a, a->from, new);
+ for (a = oldState->ins; a != NULL; a = a->inchain)
+ cparc(nfa, a, a->from, newState);
}
/*
@@ -532,16 +531,16 @@ copyins(struct nfa * nfa,
*/
static void
moveouts(struct nfa * nfa,
- struct state * old,
- struct state * new)
+ struct state * oldState,
+ struct state * newState)
{
struct arc *a;
- assert(old != new);
+ assert(oldState != newState);
- while ((a = old->outs) != NULL)
+ while ((a = oldState->outs) != NULL)
{
- cparc(nfa, a, new, a->to);
+ cparc(nfa, a, newState, a->to);
freearc(nfa, a);
}
}
@@ -551,15 +550,15 @@ moveouts(struct nfa * nfa,
*/
static void
copyouts(struct nfa * nfa,
- struct state * old,
- struct state * new)
+ struct state * oldState,
+ struct state * newState)
{
struct arc *a;
- assert(old != new);
+ assert(oldState != newState);
- for (a = old->outs; a != NULL; a = a->outchain)
- cparc(nfa, a, new, a->to);
+ for (a = oldState->outs; a != NULL; a = a->outchain)
+ cparc(nfa, a, newState, a->to);
}
/*
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index 5004121f4e..0da4c9e070 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -51,11 +51,9 @@ static void repeat(struct vars *, struct state *, struct state *, int, int);
static void bracket(struct vars *, struct state *, struct state *);
static void cbracket(struct vars *, struct state *, struct state *);
static void brackpart(struct vars *, struct state *, struct state *);
-static chr *scanplain(struct vars *);
-static void leaders(struct vars *, struct cvec *);
+static const chr *scanplain(struct vars *);
static void onechr(struct vars *, chr, struct state *, struct state *);
static void dovec(struct vars *, struct cvec *, struct state *, struct state *);
-static celt nextleader(struct vars *, chr, chr);
static void wordchrs(struct vars *);
static struct subre *subre(struct vars *, int, int, struct state *, struct state *);
static void freesubre(struct vars *, struct subre *);
@@ -74,12 +72,12 @@ static void rfree(regex_t *);
static void dump(regex_t *, FILE *);
static void dumpst(struct subre *, FILE *, int);
static void stdump(struct subre *, FILE *, int);
-static char *stid(struct subre *, char *, size_t);
+static const char *stid(struct subre *, char *, size_t);
#endif
/* === regc_lex.c === */
static void lexstart(struct vars *);
static void prefixes(struct vars *);
-static void lexnest(struct vars *, chr *, chr *);
+static void lexnest(struct vars *, const chr *, const chr *);
static void lexword(struct vars *);
static int next(struct vars *);
static int lexescape(struct vars *);
@@ -87,7 +85,7 @@ static chr lexdigits(struct vars *, int, int, int);
static int brenext(struct vars *, chr);
static void skip(struct vars *);
static chr newline(void);
-static chr chrnamed(struct vars *, chr *, chr *, chr);
+static chr chrnamed(struct vars *, const chr *, const chr *, chr);
/* === regc_color.c === */
static void initcm(struct vars *, struct colormap *);
@@ -105,7 +103,6 @@ static void subblock(struct vars *, chr, struct state *, struct state *);
static void okcolors(struct nfa *, struct colormap *);
static void colorchain(struct colormap *, struct arc *);
static void uncolorchain(struct colormap *, struct arc *);
-static int singleton(struct colormap *, chr c);
static void rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *);
static void colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *);
@@ -168,13 +165,11 @@ static void dumpcnfa(struct cnfa *, FILE *);
static void dumpcstate(int, struct carc *, struct cnfa *, FILE *);
#endif
/* === regc_cvec.c === */
-static struct cvec *newcvec(int, int, int);
+static struct cvec *newcvec(int, int);
static struct cvec *clearcvec(struct cvec *);
static void addchr(struct cvec *, chr);
static void addrange(struct cvec *, chr, chr);
-static void addmcce(struct cvec *, chr *, chr *);
-static int haschr(struct cvec *, chr);
-static struct cvec *getcvec(struct vars *, int, int, int);
+static struct cvec *getcvec(struct vars *, int, int);
static void freecvec(struct cvec *);
/* === regc_locale.c === */
@@ -189,14 +184,11 @@ static int pg_wc_ispunct(pg_wchar c);
static int pg_wc_isspace(pg_wchar c);
static pg_wchar pg_wc_toupper(pg_wchar c);
static pg_wchar pg_wc_tolower(pg_wchar c);
-static int nmcces(struct vars *);
-static int nleaders(struct vars *);
-static struct cvec *allmcces(struct vars *, struct cvec *);
-static celt element(struct vars *, chr *, chr *);
+static celt element(struct vars *, const chr *, const chr *);
static struct cvec *range(struct vars *, celt, celt, int);
static int before(celt, celt);
static struct cvec *eclass(struct vars *, celt, int);
-static struct cvec *cclass(struct vars *, chr *, chr *, int);
+static struct cvec *cclass(struct vars *, const chr *, const chr *, int);
static struct cvec *allcases(struct vars *, chr);
static int cmp(const chr *, const chr *, size_t);
static int casecmp(const chr *, const chr *, size_t);
@@ -206,10 +198,10 @@ static int casecmp(const chr *, const chr *, size_t);
struct vars
{
regex_t *re;
- chr *now; /* scan pointer into string */
- chr *stop; /* end of string */
- chr *savenow; /* saved now and stop for "subroutine call" */
- chr *savestop;
+ const chr *now; /* scan pointer into string */
+ const chr *stop; /* end of string */
+ const chr *savenow; /* saved now and stop for "subroutine call" */
+ const chr *savestop;
int err; /* error code (0 if none) */
int cflags; /* copy of compile flags */
int lasttype; /* type of previous token */
@@ -230,10 +222,6 @@ struct vars
int ntree; /* number of tree nodes */
struct cvec *cv; /* interface cvec */
struct cvec *cv2; /* utility cvec */
- struct cvec *mcces; /* collating-element information */
-#define ISCELEADER(v,c) ((v)->mcces != NULL && haschr((v)->mcces, (c)))
- struct state *mccepbegin; /* in nfa, start of MCCE prototypes */
- struct state *mccepend; /* in nfa, end of MCCE prototypes */
struct subre *lacons; /* lookahead-constraint vector */
int nlacons; /* size of lacons */
};
@@ -275,9 +263,8 @@ struct vars
#define PREFER 'P' /* length preference */
/* is an arc colored, and hence on a color chain? */
-#define COLORED(a) ((a)->type == PLAIN || (a)->type == AHEAD || \
- (a)->type == BEHIND)
-
+#define COLORED(a) \
+ ((a)->type == PLAIN || (a)->type == AHEAD || (a)->type == BEHIND)
/* static function list */
@@ -322,7 +309,7 @@ pg_regcomp(regex_t *re,
/* initial setup (after which freev() is callable) */
v->re = re;
- v->now = (chr *) string;
+ v->now = string;
v->stop = v->now + len;
v->savenow = v->savestop = NULL;
v->err = 0;
@@ -341,7 +328,6 @@ pg_regcomp(regex_t *re,
v->treefree = NULL;
v->cv = NULL;
v->cv2 = NULL;
- v->mcces = NULL;
v->lacons = NULL;
v->nlacons = 0;
re->re_magic = REMAGIC;
@@ -363,19 +349,9 @@ pg_regcomp(regex_t *re,
ZAPCNFA(g->search);
v->nfa = newnfa(v, v->cm, (struct nfa *) NULL);
CNOERR();
- v->cv = newcvec(100, 20, 10);
+ v->cv = newcvec(100, 20);
if (v->cv == NULL)
return freev(v, REG_ESPACE);
- i = nmcces(v);
- if (i > 0)
- {
- v->mcces = newcvec(nleaders(v), 0, i);
- CNOERR();
- v->mcces = allmcces(v, v->mcces);
- leaders(v, v->mcces);
- addmcce(v->mcces, (chr *) NULL, (chr *) NULL); /* dummy */
- }
- CNOERR();
/* parsing */
lexstart(v); /* also handles prefixes */
@@ -525,8 +501,6 @@ freev(struct vars * v,
freecvec(v->cv);
if (v->cv2 != NULL)
freecvec(v->cv2);
- if (v->mcces != NULL)
- freecvec(v->mcces);
if (v->lacons != NULL)
freelacons(v->lacons, v->nlacons);
ERR(err); /* nop if err==0 */
@@ -583,15 +557,14 @@ makesearch(struct vars * v,
for (b = s->ins; b != NULL; b = b->inchain)
if (b->from != pre)
break;
- if (b != NULL)
- { /* must be split */
- if (s->tmp == NULL)
- { /* if not already in the list */
- /* (fixes bugs 505048, 230589, */
- /* 840258, 504785) */
- s->tmp = slist;
- slist = s;
- }
+ if (b != NULL && s->tmp == NULL)
+ {
+ /*
+ * Must be split if not already in the list (fixes bugs 505048,
+ * 230589, 840258, 504785).
+ */
+ s->tmp = slist;
+ slist = s;
}
}
@@ -1338,13 +1311,6 @@ cbracket(struct vars * v,
{
struct state *left = newstate(v->nfa);
struct state *right = newstate(v->nfa);
- struct state *s;
- struct arc *a; /* arc from lp */
- struct arc *ba; /* arc from left, from bracket() */
- struct arc *pa; /* MCCE-prototype arc */
- color co;
- chr *p;
- int i;
NOERR();
bracket(v, left, right);
@@ -1354,65 +1320,13 @@ cbracket(struct vars * v,
assert(lp->nouts == 0); /* all outarcs will be ours */
- /* easy part of complementing */
+ /*
+ * Easy part of complementing, and all there is to do since the MCCE code
+ * was removed.
+ */
colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp);
NOERR();
- if (v->mcces == NULL)
- { /* no MCCEs -- we're done */
- dropstate(v->nfa, left);
- assert(right->nins == 0);
- freestate(v->nfa, right);
- return;
- }
-
- /* but complementing gets messy in the presence of MCCEs... */
- NOTE(REG_ULOCALE);
- for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--)
- {
- co = GETCOLOR(v->cm, *p);
- a = findarc(lp, PLAIN, co);
- ba = findarc(left, PLAIN, co);
- if (ba == NULL)
- {
- assert(a != NULL);
- freearc(v->nfa, a);
- }
- else
- assert(a == NULL);
- s = newstate(v->nfa);
- NOERR();
- newarc(v->nfa, PLAIN, co, lp, s);
- NOERR();
- pa = findarc(v->mccepbegin, PLAIN, co);
- assert(pa != NULL);
- if (ba == NULL)
- { /* easy case, need all of them */
- cloneouts(v->nfa, pa->to, s, rp, PLAIN);
- newarc(v->nfa, '$', 1, s, rp);
- newarc(v->nfa, '$', 0, s, rp);
- colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp);
- }
- else
- { /* must be selective */
- if (findarc(ba->to, '$', 1) == NULL)
- {
- newarc(v->nfa, '$', 1, s, rp);
- newarc(v->nfa, '$', 0, s, rp);
- colorcomplement(v->nfa, v->cm, AHEAD, pa->to,
- s, rp);
- }
- for (pa = pa->to->outs; pa != NULL; pa = pa->outchain)
- if (findarc(ba->to, PLAIN, pa->co) == NULL)
- newarc(v->nfa, PLAIN, pa->co, s, rp);
- if (s->nouts == 0) /* limit of selectivity: none */
- dropstate(v->nfa, s); /* frees arc too */
- }
- NOERR();
- }
-
- delsub(v->nfa, left, right);
- assert(left->nouts == 0);
- freestate(v->nfa, left);
+ dropstate(v->nfa, left);
assert(right->nins == 0);
freestate(v->nfa, right);
}
@@ -1428,8 +1342,8 @@ brackpart(struct vars * v,
celt startc;
celt endc;
struct cvec *cv;
- chr *startp;
- chr *endp;
+ const chr *startp;
+ const chr *endp;
chr c[1];
/* parse something, get rid of special cases, take shortcuts */
@@ -1442,8 +1356,8 @@ brackpart(struct vars * v,
case PLAIN:
c[0] = v->nextvalue;
NEXT();
- /* shortcut for ordinary chr (not range, not MCCE leader) */
- if (!SEE(RANGE) && !ISCELEADER(v, c[0]))
+ /* shortcut for ordinary chr (not range) */
+ if (!SEE(RANGE))
{
onechr(v, c[0], lp, rp);
return;
@@ -1533,10 +1447,10 @@ brackpart(struct vars * v,
* Certain bits of trickery in lex.c know that this code does not try
* to look past the final bracket of the [. etc.
*/
-static chr * /* just after end of sequence */
+static const chr * /* just after end of sequence */
scanplain(struct vars * v)
{
- chr *endp;
+ const chr *endp;
assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS));
NEXT();
@@ -1555,52 +1469,6 @@ scanplain(struct vars * v)
}
/*
- * leaders - process a cvec of collating elements to also include leaders
- * Also gives all characters involved their own colors, which is almost
- * certainly necessary, and sets up little disconnected subNFA.
- */
-static void
-leaders(struct vars * v,
- struct cvec * cv)
-{
- int mcce;
- chr *p;
- chr leader;
- struct state *s;
- struct arc *a;
-
- v->mccepbegin = newstate(v->nfa);
- v->mccepend = newstate(v->nfa);
- NOERR();
-
- for (mcce = 0; mcce < cv->nmcces; mcce++)
- {
- p = cv->mcces[mcce];
- leader = *p;
- if (!haschr(cv, leader))
- {
- addchr(cv, leader);
- s = newstate(v->nfa);
- newarc(v->nfa, PLAIN, subcolor(v->cm, leader),
- v->mccepbegin, s);
- okcolors(v->nfa, v->cm);
- }
- else
- {
- a = findarc(v->mccepbegin, PLAIN,
- GETCOLOR(v->cm, leader));
- assert(a != NULL);
- s = a->to;
- assert(s != v->mccepend);
- }
- p++;
- assert(*p != 0 && *(p + 1) == 0); /* only 2-char MCCEs for now */
- newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend);
- okcolors(v->nfa, v->cm);
- }
-}
-
-/*
* onechr - fill in arcs for a plain character, and possible case complements
* This is mostly a shortcut for efficient handling of the common case.
*/
@@ -1622,7 +1490,6 @@ onechr(struct vars * v,
/*
* dovec - fill in arcs for each element of a cvec
- * This one has to handle the messy cases, like MCCEs and MCCE leaders.
*/
static void
dovec(struct vars * v,
@@ -1633,47 +1500,14 @@ dovec(struct vars * v,
chr ch,
from,
to;
- celt ce;
- chr *p;
+ const chr *p;
int i;
- color co;
- struct cvec *leads;
- struct arc *a;
- struct arc *pa; /* arc in prototype */
- struct state *s;
- struct state *ps; /* state in prototype */
-
- /* need a place to store leaders, if any */
- if (nmcces(v) > 0)
- {
- assert(v->mcces != NULL);
- if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs)
- {
- if (v->cv2 != NULL)
- free(v->cv2);
- v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces);
- NOERR();
- leads = v->cv2;
- }
- else
- leads = clearcvec(v->cv2);
- }
- else
- leads = NULL;
- /* first, get the ordinary characters out of the way */
+ /* ordinary characters */
for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
{
ch = *p;
- if (!ISCELEADER(v, ch))
- newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp);
- else
- {
- assert(singleton(v->cm, ch));
- assert(leads != NULL);
- if (!haschr(leads, ch))
- addchr(leads, ch);
- }
+ newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp);
}
/* and the ranges */
@@ -1681,103 +1515,9 @@ dovec(struct vars * v,
{
from = *p;
to = *(p + 1);
- while (from <= to && (ce = nextleader(v, from, to)) != NOCELT)
- {
- if (from < ce)
- subrange(v, from, ce - 1, lp, rp);
- assert(singleton(v->cm, ce));
- assert(leads != NULL);
- if (!haschr(leads, ce))
- addchr(leads, ce);
- from = ce + 1;
- }
if (from <= to)
subrange(v, from, to, lp, rp);
}
-
- if ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0)
- return;
-
- /* deal with the MCCE leaders */
- NOTE(REG_ULOCALE);
- for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--)
- {
- co = GETCOLOR(v->cm, *p);
- a = findarc(lp, PLAIN, co);
- if (a != NULL)
- s = a->to;
- else
- {
- s = newstate(v->nfa);
- NOERR();
- newarc(v->nfa, PLAIN, co, lp, s);
- NOERR();
- }
- pa = findarc(v->mccepbegin, PLAIN, co);
- assert(pa != NULL);
- ps = pa->to;
- newarc(v->nfa, '$', 1, s, rp);
- newarc(v->nfa, '$', 0, s, rp);
- colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp);
- NOERR();
- }
-
- /* and the MCCEs */
- for (i = 0; i < cv->nmcces; i++)
- {
- p = cv->mcces[i];
- assert(singleton(v->cm, *p));
- if (!singleton(v->cm, *p))
- {
- ERR(REG_ASSERT);
- return;
- }
- ch = *p++;
- co = GETCOLOR(v->cm, ch);
- a = findarc(lp, PLAIN, co);
- if (a != NULL)
- s = a->to;
- else
- {
- s = newstate(v->nfa);
- NOERR();
- newarc(v->nfa, PLAIN, co, lp, s);
- NOERR();
- }
- assert(*p != 0); /* at least two chars */
- assert(singleton(v->cm, *p));
- ch = *p++;
- co = GETCOLOR(v->cm, ch);
- assert(*p == 0); /* and only two, for now */
- newarc(v->nfa, PLAIN, co, s, rp);
- NOERR();
- }
-}
-
-/*
- * nextleader - find next MCCE leader within range
- */
-static celt /* NOCELT means none */
-nextleader(struct vars * v,
- chr from,
- chr to)
-{
- int i;
- chr *p;
- chr ch;
- celt it = NOCELT;
-
- if (v->mcces == NULL)
- return it;
-
- for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++)
- {
- ch = *p;
- if (from <= ch && ch <= to)
- if (it == NOCELT || ch < it)
- it = ch;
- }
- return it;
}
/*
@@ -1825,9 +1565,8 @@ subre(struct vars * v,
struct state * begin,
struct state * end)
{
- struct subre *ret;
+ struct subre *ret = v->treefree;
- ret = v->treefree;
if (ret != NULL)
v->treefree = ret->left;
else
@@ -1906,14 +1645,13 @@ static void
optst(struct vars * v,
struct subre * t)
{
- if (t == NULL)
- return;
-
- /* recurse through children */
- if (t->left != NULL)
- optst(v, t->left);
- if (t->right != NULL)
- optst(v, t->right);
+ /*
+ * DGP (2007-11-13): I assume it was the programmer's intent to eventually
+ * come back and add code to optimize subRE trees, but the routine coded
+ * just spends effort traversing the tree and doing nothing. We can do
+ * nothing with less effort.
+ */
+ return;
}
/*
@@ -2207,8 +1945,8 @@ stdump(struct subre * t,
{
fprintf(f, "\n");
dumpcnfa(&t->cnfa, f);
- fprintf(f, "\n");
}
+ fprintf(f, "\n");
if (t->left != NULL)
stdump(t->left, f, nfapresent);
if (t->right != NULL)
@@ -2218,7 +1956,7 @@ stdump(struct subre * t,
/*
* stid - identify a subtree node for dumping
*/
-static char * /* points to buf or constant string */
+static const char * /* points to buf or constant string */
stid(struct subre * t,
char *buf,
size_t bufsize)
diff --git a/src/backend/regex/regerror.c b/src/backend/regex/regerror.c
index 2e4dc6619b..0ad0e90352 100644
--- a/src/backend/regex/regerror.c
+++ b/src/backend/regex/regerror.c
@@ -40,8 +40,8 @@ static char unk[] = "*** unknown regex error code 0x%x ***";
static struct rerr
{
int code;
- char *name;
- char *explain;
+ const char *name;
+ const char *explain;
} rerrs[] =
{
@@ -63,7 +63,7 @@ pg_regerror(int errcode, /* error code, or REG_ATOI or REG_ITOA */
size_t errbuf_size) /* available space in errbuf, can be 0 */
{
struct rerr *r;
- char *msg;
+ const char *msg;
char convbuf[sizeof(unk) + 50]; /* 50 = plenty for int */
size_t len;
int icode;
diff --git a/src/include/regex/regcustom.h b/src/include/regex/regcustom.h
index 2e3ac302aa..7362c57d9b 100644
--- a/src/include/regex/regcustom.h
+++ b/src/include/regex/regcustom.h
@@ -47,9 +47,9 @@
/* internal character type and related */
typedef pg_wchar chr; /* the type itself */
typedef unsigned uchr; /* unsigned type that will hold a chr */
-typedef int celt; /* type to hold chr, MCCE number, or NOCELT */
+typedef int celt; /* type to hold chr, or NOCELT */
-#define NOCELT (-1) /* celt value which is not valid chr or MCCE */
+#define NOCELT (-1) /* celt value which is not valid chr */
#define CHR(c) ((unsigned char) (c)) /* turn char literal into chr literal */
#define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */
#define CHRBITS 32 /* bits in a chr; must not use sizeof */
diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h
index d71f942e4a..f78d913265 100644
--- a/src/include/regex/regguts.h
+++ b/src/include/regex/regguts.h
@@ -181,7 +181,7 @@ union tree
#define tcolor colors.ccolor
#define tptr ptrs.pptr
-/* internal per-color structure for the color machinery */
+/* internal per-color descriptor structure for the color machinery */
struct colordesc
{
uchr nchrs; /* number of chars of this color */
@@ -228,11 +228,11 @@ struct colormap
#endif
-
/*
* Interface definitions for locale-interface functions in locale.c.
- * Multi-character collating elements (MCCEs) cause most of the trouble.
*/
+
+/* Representation of a set of characters. */
struct cvec
{
int nchrs; /* number of chrs */
@@ -241,17 +241,9 @@ struct cvec
int nranges; /* number of ranges (chr pairs) */
int rangespace; /* number of chrs possible */
chr *ranges; /* pointer to vector of chr pairs */
- int nmcces; /* number of MCCEs */
- int mccespace; /* number of MCCEs possible */
- int nmccechrs; /* number of chrs used for MCCEs */
- chr *mcces[1]; /* pointers to 0-terminated MCCEs */
- /* and both batches of chrs are on the end */
+ /* both batches of chrs are on the end */
};
-/* caution: this value cannot be changed easily */
-#define MAXMCCE 2 /* length of longest MCCE */
-
-
/*
* definitions for NFA internal representation