summaryrefslogtreecommitdiff
path: root/src/include/regex
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/regex')
-rw-r--r--src/include/regex/cclass.h99
-rw-r--r--src/include/regex/cname.h336
-rw-r--r--src/include/regex/regcustom.h64
-rw-r--r--src/include/regex/regerrs.h22
-rw-r--r--src/include/regex/regex.h251
-rw-r--r--src/include/regex/regex2.h174
-rw-r--r--src/include/regex/regguts.h393
-rw-r--r--src/include/regex/utils.h60
8 files changed, 632 insertions, 767 deletions
diff --git a/src/include/regex/cclass.h b/src/include/regex/cclass.h
deleted file mode 100644
index 8b13c12583..0000000000
--- a/src/include/regex/cclass.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- * Copyright (c) 1992, 1993, 1994
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)cclass.h 8.3 (Berkeley) 3/20/94
- */
-
-/* character-class table */
-static struct cclass
-{
- char *name;
- char *chars;
- char *multis;
-} cclasses[] =
-
-{
- {
- "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789", ""
- },
- {
- "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
- ""
- },
- {
- "blank", " \t", ""
- },
- {
- "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
-\25\26\27\30\31\32\33\34\35\36\37\177", ""
- },
- {
- "digit", "0123456789", ""
- },
- {
- "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
- ""
- },
- {
- "lower", "abcdefghijklmnopqrstuvwxyz",
- ""
- },
- {
- "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
- ""
- },
- {
- "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
- ""
- },
- {
- "space", "\t\n\v\f\r ", ""
- },
- {
- "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
- ""
- },
- {
- "xdigit", "0123456789ABCDEFabcdef",
- ""
- },
- {
- NULL, NULL, ""
- }
-};
diff --git a/src/include/regex/cname.h b/src/include/regex/cname.h
deleted file mode 100644
index bff408e4f0..0000000000
--- a/src/include/regex/cname.h
+++ /dev/null
@@ -1,336 +0,0 @@
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- * Copyright (c) 1992, 1993, 1994
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)cname.h 8.3 (Berkeley) 3/20/94
- */
-
-/* character-name table */
-static struct cname
-{
- char *name;
- char code;
-} cnames[] =
-
-{
- {
- "NUL", '\0'
- },
- {
- "SOH", '\001'
- },
- {
- "STX", '\002'
- },
- {
- "ETX", '\003'
- },
- {
- "EOT", '\004'
- },
- {
- "ENQ", '\005'
- },
- {
- "ACK", '\006'
- },
- {
- "BEL", '\007'
- },
- {
- "alert", '\007'
- },
- {
- "BS", '\010'
- },
- {
- "backspace", '\b'
- },
- {
- "HT", '\011'
- },
- {
- "tab", '\t'
- },
- {
- "LF", '\012'
- },
- {
- "newline", '\n'
- },
- {
- "VT", '\013'
- },
- {
- "vertical-tab", '\v'
- },
- {
- "FF", '\014'
- },
- {
- "form-feed", '\f'
- },
- {
- "CR", '\015'
- },
- {
- "carriage-return", '\r'
- },
- {
- "SO", '\016'
- },
- {
- "SI", '\017'
- },
- {
- "DLE", '\020'
- },
- {
- "DC1", '\021'
- },
- {
- "DC2", '\022'
- },
- {
- "DC3", '\023'
- },
- {
- "DC4", '\024'
- },
- {
- "NAK", '\025'
- },
- {
- "SYN", '\026'
- },
- {
- "ETB", '\027'
- },
- {
- "CAN", '\030'
- },
- {
- "EM", '\031'
- },
- {
- "SUB", '\032'
- },
- {
- "ESC", '\033'
- },
- {
- "IS4", '\034'
- },
- {
- "FS", '\034'
- },
- {
- "IS3", '\035'
- },
- {
- "GS", '\035'
- },
- {
- "IS2", '\036'
- },
- {
- "RS", '\036'
- },
- {
- "IS1", '\037'
- },
- {
- "US", '\037'
- },
- {
- "space", ' '
- },
- {
- "exclamation-mark", '!'
- },
- {
- "quotation-mark", '"'
- },
- {
- "number-sign", '#'
- },
- {
- "dollar-sign", '$'
- },
- {
- "percent-sign", '%'
- },
- {
- "ampersand", '&'
- },
- {
- "apostrophe", '\''
- },
- {
- "left-parenthesis", '('
- },
- {
- "right-parenthesis", ')'
- },
- {
- "asterisk", '*'
- },
- {
- "plus-sign", '+'
- },
- {
- "comma", ','
- },
- {
- "hyphen", '-'
- },
- {
- "hyphen-minus", '-'
- },
- {
- "period", '.'
- },
- {
- "full-stop", '.'
- },
- {
- "slash", '/'
- },
- {
- "solidus", '/'
- },
- {
- "zero", '0'
- },
- {
- "one", '1'
- },
- {
- "two", '2'
- },
- {
- "three", '3'
- },
- {
- "four", '4'
- },
- {
- "five", '5'
- },
- {
- "six", '6'
- },
- {
- "seven", '7'
- },
- {
- "eight", '8'
- },
- {
- "nine", '9'
- },
- {
- "colon", ':'
- },
- {
- "semicolon", ';'
- },
- {
- "less-than-sign", '<'
- },
- {
- "equals-sign", '='
- },
- {
- "greater-than-sign", '>'
- },
- {
- "question-mark", '?'
- },
- {
- "commercial-at", '@'
- },
- {
- "left-square-bracket", '['
- },
- {
- "backslash", '\\'
- },
- {
- "reverse-solidus", '\\'
- },
- {
- "right-square-bracket", ']'
- },
- {
- "circumflex", '^'
- },
- {
- "circumflex-accent", '^'
- },
- {
- "underscore", '_'
- },
- {
- "low-line", '_'
- },
- {
- "grave-accent", '`'
- },
- {
- "left-brace", '{'
- },
- {
- "left-curly-bracket", '{'
- },
- {
- "vertical-line", '|'
- },
- {
- "right-brace", '}'
- },
- {
- "right-curly-bracket", '}'
- },
- {
- "tilde", '~'
- },
- {
- "DEL", '\177'
- },
- {
- NULL, 0
- }
-};
diff --git a/src/include/regex/regcustom.h b/src/include/regex/regcustom.h
new file mode 100644
index 0000000000..ad6e86dcce
--- /dev/null
+++ b/src/include/regex/regcustom.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $Id$
+ */
+
+/* headers if any */
+#include "postgres.h"
+
+#include <ctype.h>
+#include <limits.h>
+
+#include "mb/pg_wchar.h"
+
+
+/* overrides for regguts.h definitions, if any */
+#define FUNCPTR(name, args) (*name) args
+#define MALLOC(n) malloc(n)
+#define FREE(p) free(VS(p))
+#define REALLOC(p,n) realloc(VS(p),n)
+
+/* internal character type and related */
+typedef pg_wchar chr; /* the type itself */
+typedef unsigned uchr; /* unsigned type that will hold a chr */
+typedef int celt; /* type to hold chr, MCCE number, or NOCELT */
+#define NOCELT (-1) /* celt value which is not valid chr or MCCE */
+#define CHR(c) ((unsigned char) (c)) /* turn char literal into chr literal */
+#define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */
+#define CHRBITS 32 /* bits in a chr; must not use sizeof */
+#define CHR_MIN 0x00000000 /* smallest and largest chr; the value */
+#define CHR_MAX 0xfffffffe /* CHR_MAX-CHR_MIN+1 should fit in uchr */
+
+/* functions operating on chr */
+#define iscalnum(x) pg_isalnum(x)
+#define iscalpha(x) pg_isalpha(x)
+#define iscdigit(x) pg_isdigit(x)
+#define iscspace(x) pg_isspace(x)
+
+/* and pick up the standard header */
+#include "regex.h"
diff --git a/src/include/regex/regerrs.h b/src/include/regex/regerrs.h
new file mode 100644
index 0000000000..3515ee6ad5
--- /dev/null
+++ b/src/include/regex/regerrs.h
@@ -0,0 +1,22 @@
+/*
+ * $Id$
+ */
+
+{ REG_OKAY, "REG_OKAY", "no errors detected" },
+{ REG_NOMATCH, "REG_NOMATCH", "failed to match" },
+{ REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.8)" },
+{ REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
+{ REG_ECTYPE, "REG_ECTYPE", "invalid character class" },
+{ REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence" },
+{ REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" },
+{ REG_EBRACK, "REG_EBRACK", "brackets [] not balanced" },
+{ REG_EPAREN, "REG_EPAREN", "parentheses () not balanced" },
+{ REG_EBRACE, "REG_EBRACE", "braces {} not balanced" },
+{ REG_BADBR, "REG_BADBR", "invalid repetition count(s)" },
+{ REG_ERANGE, "REG_ERANGE", "invalid character range" },
+{ REG_ESPACE, "REG_ESPACE", "out of memory" },
+{ REG_BADRPT, "REG_BADRPT", "quantifier operand invalid" },
+{ REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" },
+{ REG_INVARG, "REG_INVARG", "invalid argument to regex function" },
+{ REG_MIXED, "REG_MIXED", "character widths of regex and string differ" },
+{ REG_BADOPT, "REG_BADOPT", "invalid embedded option" },
diff --git a/src/include/regex/regex.h b/src/include/regex/regex.h
index e0889a1108..9c0d3b9703 100644
--- a/src/include/regex/regex.h
+++ b/src/include/regex/regex.h
@@ -1,110 +1,165 @@
-/*-
- * Copyright (c) 1992 Henry Spencer.
- * Copyright (c) 1992, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer of the University of Toronto.
+#ifndef _REGEX_H_
+#define _REGEX_H_ /* never again */
+/*
+ * regular expressions
*
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
*
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
- * @(#)regex.h 8.2 (Berkeley) 1/3/94
+ * $Id$
*/
-#ifndef _REGEX_H_
-#define _REGEX_H_
-
-#include <sys/types.h>
+/*
+ * Add your own defines, if needed, here.
+ */
#include "mb/pg_wchar.h"
-/* types */
-typedef off_t regoff_t;
+/*
+ * interface types etc.
+ */
-typedef struct
-{
- int re_magic;
- size_t re_nsub; /* number of parenthesized subexpressions */
- const pg_wchar *re_endp; /* end pointer for REG_PEND */
- struct re_guts *re_g; /* none of your business :-) */
- pg_wchar *patsave; /* me too :-) */
+/*
+ * regoff_t has to be large enough to hold either off_t or ssize_t,
+ * and must be signed; it's only a guess that long is suitable.
+ */
+typedef long regoff_t;
+
+/*
+ * other interface types
+ */
+
+/* the biggie, a compiled RE (or rather, a front end to same) */
+typedef struct {
+ int re_magic; /* magic number */
+ size_t re_nsub; /* number of subexpressions */
+ long re_info; /* information about RE */
+# define REG_UBACKREF 000001
+# define REG_ULOOKAHEAD 000002
+# define REG_UBOUNDS 000004
+# define REG_UBRACES 000010
+# define REG_UBSALNUM 000020
+# define REG_UPBOTCH 000040
+# define REG_UBBS 000100
+# define REG_UNONPOSIX 000200
+# define REG_UUNSPEC 000400
+# define REG_UUNPORT 001000
+# define REG_ULOCALE 002000
+# define REG_UEMPTYMATCH 004000
+# define REG_UIMPOSSIBLE 010000
+# define REG_USHORTEST 020000
+ int re_csize; /* sizeof(character) */
+ char *re_endp; /* backward compatibility kludge */
+ /* the rest is opaque pointers to hidden innards */
+ char *re_guts; /* `char *' is more portable than `void *' */
+ char *re_fns;
} regex_t;
-typedef struct
-{
- regoff_t rm_so; /* start of match */
- regoff_t rm_eo; /* end of match */
+/* result reporting (may acquire more fields later) */
+typedef struct {
+ regoff_t rm_so; /* start of substring */
+ regoff_t rm_eo; /* end of substring */
} regmatch_t;
-/* regcomp() flags */
-#define REG_BASIC 0000
-#define REG_EXTENDED 0001
-#define REG_ICASE 0002
-#define REG_NOSUB 0004
-#define REG_NEWLINE 0010
-#define REG_NOSPEC 0020
-#define REG_PEND 0040
-#define REG_DUMP 0200
-
-/* regerror() flags */
-#define REG_NOMATCH 1
-#define REG_BADPAT 2
-#define REG_ECOLLATE 3
-#define REG_ECTYPE 4
-#define REG_EESCAPE 5
-#define REG_ESUBREG 6
-#define REG_EBRACK 7
-#define REG_EPAREN 8
-#define REG_EBRACE 9
-#define REG_BADBR 10
-#define REG_ERANGE 11
-#define REG_ESPACE 12
-#define REG_BADRPT 13
-#define REG_EMPTY 14
-#define REG_ASSERT 15
-#define REG_INVARG 16
-#define REG_ATOI 255 /* convert name to number (!) */
-#define REG_ITOA 0400 /* convert number to name (!) */
-
-/* regexec() flags */
-#define REG_NOTBOL 00001
-#define REG_NOTEOL 00002
-#define REG_STARTEND 00004
-#define REG_TRACE 00400 /* tracing of execution */
-#define REG_LARGE 01000 /* force large representation */
-#define REG_BACKR 02000 /* force use of backref code */
-
-extern int pg_regcomp(regex_t *preg, const char *pattern, int cflags);
-extern size_t pg_regerror(int errcode, const regex_t *preg,
- char *errbuf, size_t errbuf_size);
-extern int pg_regexec(const regex_t *preg, const char *string,
- size_t nmatch,
- regmatch_t *pmatch, int eflags);
-extern void pg_regfree(regex_t *preg);
-
-#endif /* !_REGEX_H_ */
+/* supplementary control and reporting */
+typedef struct {
+ regmatch_t rm_extend; /* see REG_EXPECT */
+} rm_detail_t;
+
+
+
+/*
+ * regex compilation flags
+ */
+#define REG_BASIC 000000 /* BREs (convenience) */
+#define REG_EXTENDED 000001 /* EREs */
+#define REG_ADVF 000002 /* advanced features in EREs */
+#define REG_ADVANCED 000003 /* AREs (which are also EREs) */
+#define REG_QUOTE 000004 /* no special characters, none */
+#define REG_NOSPEC REG_QUOTE /* historical synonym */
+#define REG_ICASE 000010 /* ignore case */
+#define REG_NOSUB 000020 /* don't care about subexpressions */
+#define REG_EXPANDED 000040 /* expanded format, white space & comments */
+#define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */
+#define REG_NLANCH 000200 /* ^ matches after \n, $ before */
+#define REG_NEWLINE 000300 /* newlines are line terminators */
+#define REG_PEND 000400 /* ugh -- backward-compatibility hack */
+#define REG_EXPECT 001000 /* report details on partial/limited matches */
+#define REG_BOSONLY 002000 /* temporary kludge for BOS-only matches */
+#define REG_DUMP 004000 /* none of your business :-) */
+#define REG_FAKE 010000 /* none of your business :-) */
+#define REG_PROGRESS 020000 /* none of your business :-) */
+
+
+
+/*
+ * regex execution flags
+ */
+#define REG_NOTBOL 0001 /* BOS is not BOL */
+#define REG_NOTEOL 0002 /* EOS is not EOL */
+#define REG_STARTEND 0004 /* backward compatibility kludge */
+#define REG_FTRACE 0010 /* none of your business */
+#define REG_MTRACE 0020 /* none of your business */
+#define REG_SMALL 0040 /* none of your business */
+
+
+/*
+ * error reporting
+ * Be careful if modifying the list of error codes -- the table used by
+ * regerror() is generated automatically from this file!
+ */
+#define REG_OKAY 0 /* no errors detected */
+#define REG_NOMATCH 1 /* failed to match */
+#define REG_BADPAT 2 /* invalid regexp */
+#define REG_ECOLLATE 3 /* invalid collating element */
+#define REG_ECTYPE 4 /* invalid character class */
+#define REG_EESCAPE 5 /* invalid escape \ sequence */
+#define REG_ESUBREG 6 /* invalid backreference number */
+#define REG_EBRACK 7 /* brackets [] not balanced */
+#define REG_EPAREN 8 /* parentheses () not balanced */
+#define REG_EBRACE 9 /* braces {} not balanced */
+#define REG_BADBR 10 /* invalid repetition count(s) */
+#define REG_ERANGE 11 /* invalid character range */
+#define REG_ESPACE 12 /* out of memory */
+#define REG_BADRPT 13 /* quantifier operand invalid */
+#define REG_ASSERT 15 /* "can't happen" -- you found a bug */
+#define REG_INVARG 16 /* invalid argument to regex function */
+#define REG_MIXED 17 /* character widths of regex and string differ */
+#define REG_BADOPT 18 /* invalid embedded option */
+/* two specials for debugging and testing */
+#define REG_ATOI 101 /* convert error-code name to number */
+#define REG_ITOA 102 /* convert error-code number to name */
+
+
+
+/*
+ * the prototypes for exported functions
+ */
+extern int pg_regcomp(regex_t *, const pg_wchar *, size_t, int);
+extern int pg_regexec(regex_t *, const pg_wchar *, size_t, rm_detail_t *, size_t, regmatch_t [], int);
+extern void pg_regfree(regex_t *);
+extern size_t pg_regerror(int, const regex_t *, char *, size_t);
+
+#endif /* _REGEX_H_ */
diff --git a/src/include/regex/regex2.h b/src/include/regex/regex2.h
deleted file mode 100644
index 5ceed7fe9c..0000000000
--- a/src/include/regex/regex2.h
+++ /dev/null
@@ -1,174 +0,0 @@
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- * Copyright (c) 1992, 1993, 1994
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)regex2.h 8.4 (Berkeley) 3/20/94
- */
-
-#include <limits.h>
-
-/*
- * internals of regex_t
- */
-#define MAGIC1 ((('r'^0200)<<8) | 'e')
-
-/*
- * The internal representation is a *strip*, a sequence of
- * operators ending with an endmarker. (Some terminology etc. is a
- * historical relic of earlier versions which used multiple strips.)
- * Certain oddities in the representation are there to permit running
- * the machinery backwards; in particular, any deviation from sequential
- * flow must be marked at both its source and its destination. Some
- * fine points:
- *
- * - OPLUS_ and O_PLUS are *inside* the loop they create.
- * - OQUEST_ and O_QUEST are *outside* the bypass they create.
- * - OCH_ and O_CH are *outside* the multi-way branch they create, while
- * OOR1 and OOR2 are respectively the end and the beginning of one of
- * the branches. Note that there is an implicit OOR2 following OCH_
- * and an implicit OOR1 preceding O_CH.
- *
- * In state representations, an operator's bit is on to signify a state
- * immediately *preceding* "execution" of that operator.
- */
-typedef unsigned long sop; /* strip operator */
-typedef long sopno;
-
-#define OPRMASK ((sop) 0xf8000000)
-#define OPDMASK ((sop) 0x07ffffff)
-#define OPSHIFT ((unsigned)27)
-#define OP(n) ((n)&OPRMASK)
-#define OPND(n) ((n)&OPDMASK)
-#define SOP(op, opnd) ((op)|(opnd))
-/* operators meaning operand */
-/* (back, fwd are offsets) */
-#define OEND ((size_t)1<<OPSHIFT) /* endmarker - */
-#define OCHAR ((size_t)2<<OPSHIFT) /* character unsigned char */
-#define OBOL ((size_t)3<<OPSHIFT) /* left anchor - */
-#define OEOL ((size_t)4<<OPSHIFT) /* right anchor - */
-#define OANY ((size_t)5<<OPSHIFT) /* . - */
-#define OANYOF ((size_t)6<<OPSHIFT) /* [...] set number */
-#define OBACK_ ((size_t)7<<OPSHIFT) /* begin \d paren number */
-#define O_BACK ((size_t)8<<OPSHIFT) /* end \d paren number */
-#define OPLUS_ ((size_t)9<<OPSHIFT) /* + prefix fwd to suffix */
-#define O_PLUS ((size_t)10<<OPSHIFT) /* + suffix back to prefix */
-#define OQUEST_ ((size_t)11<<OPSHIFT) /* ? prefix fwd to suffix */
-#define O_QUEST ((size_t)12<<OPSHIFT) /* ? suffix back to prefix */
-#define OLPAREN ((size_t)13<<OPSHIFT) /* ( fwd to ) */
-#define ORPAREN ((size_t)14<<OPSHIFT) /* ) back to ( */
-#define OCH_ ((size_t)15<<OPSHIFT) /* begin choice fwd to OOR2 */
-#define OOR1 ((size_t)16<<OPSHIFT) /* | pt. 1 back to OOR1 or
- * OCH_ */
-#define OOR2 ((size_t)17<<OPSHIFT) /* | pt. 2 fwd to OOR2 or
- * O_CH */
-#define O_CH ((size_t)18<<OPSHIFT) /* end choice back to OOR1 */
-#define OBOW ((size_t)19<<OPSHIFT) /* begin word - */
-#define OEOW ((size_t)20<<OPSHIFT) /* end word - */
-
-/*
- * Structure for [] character-set representation. Character sets are
- * done as bit vectors, grouped 8 to a byte vector for compactness.
- * The individual set therefore has both a pointer to the byte vector
- * and a mask to pick out the relevant bit of each byte. A hash code
- * simplifies testing whether two sets could be identical.
- *
- * This will get trickier for multicharacter collating elements. As
- * preliminary hooks for dealing with such things, we also carry along
- * a string of multi-character elements, and decide the size of the
- * vectors at run time.
- */
-typedef struct
-{
- uch *ptr; /* -> uch [csetsize] */
- uch mask; /* bit within array */
- pg_wchar hash; /* hash code */
- unsigned int lc; /* leading character (character-set) */
- size_t smultis;
- char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */
-} cset;
-
-/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
-#define CHlc(c) (((unsigned)(c)&0xff0000)>>16)
-#define CHadd(cs, c) ((cs)->ptr[(unsigned)(c)&0xffff] |= (cs)->mask, (cs)->hash += (unsigned)(c)&0xffff,\
- (cs)->lc = CHlc(c))
-#define CHsub(cs, c) ((cs)->ptr[(unsigned)(c)&0xffff] &= ~(cs)->mask, (cs)->hash -= (unsigned)(c)&0xffff)
-#define CHIN(cs, c) ((cs)->ptr[(unsigned)(c)&0xffff] & (cs)->mask && \
- ((cs)->lc == CHlc(c)))
-#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal
- * fns */
-#define MCsub(p, cs, cp) mcsub(p, cs, cp)
-#define MCin(p, cs, cp) mcin(p, cs, cp)
-
-/* stuff for character categories */
-typedef unsigned char cat_t;
-
-/*
- * main compiled-expression structure
- */
-struct re_guts
-{
- int magic;
-#define MAGIC2 ((('R'^0200)<<8)|'E')
- sop *strip; /* malloced area for strip */
- int csetsize; /* number of bits in a cset vector */
- int ncsets; /* number of csets in use */
- cset *sets; /* -> cset [ncsets] */
- uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
- int cflags; /* copy of regcomp() cflags argument */
- sopno nstates; /* = number of sops */
- sopno firststate; /* the initial OEND (normally 0) */
- sopno laststate; /* the final OEND */
- int iflags; /* internal flags */
-#define USEBOL 01 /* used ^ */
-#define USEEOL 02 /* used $ */
-#define BAD 04 /* something wrong */
- int nbol; /* number of ^ used */
- int neol; /* number of $ used */
- int ncategories; /* how many character categories */
- cat_t *categories; /* ->catspace[-CHAR_MIN] */
- pg_wchar *must; /* match must contain this string */
- int mlen; /* length of must */
- size_t nsub; /* copy of re_nsub */
- int backrefs; /* does it use back references? */
- sopno nplus; /* how deep does it nest +s? */
- /* catspace must be last */
- cat_t catspace[1]; /* actually [NC] */
-};
-
-/* misc utilities */
-#define OUT (16777216+1) /* 16777216 == 2^24 == 3 bytes */
-
-#define ISWORD(c) (((c) >= 0 && (c) <= UCHAR_MAX) && \
- (isalnum((unsigned char) (c)) || (c) == '_'))
diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h
new file mode 100644
index 0000000000..8f36d98a1f
--- /dev/null
+++ b/src/include/regex/regguts.h
@@ -0,0 +1,393 @@
+/*
+ * Internal interface definitions, etc., for the reg package
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $Id$
+ */
+
+
+
+/*
+ * Environmental customization. It should not (I hope) be necessary to
+ * alter the file you are now reading -- regcustom.h should handle it all,
+ * given care here and elsewhere.
+ */
+#include "regcustom.h"
+
+
+
+/*
+ * Things that regcustom.h might override.
+ */
+
+/* assertions */
+#ifndef assert
+# ifndef REG_DEBUG
+# define NDEBUG /* no assertions */
+# endif
+#include <assert.h>
+#endif
+
+/* voids */
+#ifndef DISCARD
+#define DISCARD void /* for throwing values away */
+#endif
+#ifndef VS
+#define VS(x) ((void *)(x)) /* cast something to generic ptr */
+#endif
+
+/* function-pointer declarator */
+#ifndef FUNCPTR
+#define FUNCPTR(name, args) (*name) args
+#endif
+
+/* memory allocation */
+#ifndef MALLOC
+#define MALLOC(n) malloc(n)
+#endif
+#ifndef REALLOC
+#define REALLOC(p, n) realloc(VS(p), n)
+#endif
+#ifndef FREE
+#define FREE(p) free(VS(p))
+#endif
+
+/* want size of a char in bits, and max value in bounded quantifiers */
+#ifndef CHAR_BIT
+#include <limits.h>
+#endif
+#ifndef _POSIX2_RE_DUP_MAX
+#define _POSIX2_RE_DUP_MAX 255 /* normally from <limits.h> */
+#endif
+
+
+
+/*
+ * misc
+ */
+
+#define NOTREACHED 0
+#define xxx 1
+
+#define DUPMAX _POSIX2_RE_DUP_MAX
+#define INFINITY (DUPMAX+1)
+
+#define REMAGIC 0xfed7 /* magic number for main struct */
+
+
+
+/*
+ * debugging facilities
+ */
+#ifdef REG_DEBUG
+/* FDEBUG does finite-state tracing */
+#define FDEBUG(arglist) { if (v->eflags&REG_FTRACE) printf arglist; }
+/* MDEBUG does higher-level tracing */
+#define MDEBUG(arglist) { if (v->eflags&REG_MTRACE) printf arglist; }
+#else
+#define FDEBUG(arglist) {}
+#define MDEBUG(arglist) {}
+#endif
+
+
+
+/*
+ * bitmap manipulation
+ */
+#define UBITS (CHAR_BIT * sizeof(unsigned))
+#define BSET(uv, sn) ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS))
+#define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS)))
+
+
+
+/*
+ * We dissect a chr into byts for colormap table indexing. Here we define
+ * a byt, which will be the same as a byte on most machines... The exact
+ * size of a byt is not critical, but about 8 bits is good, and extraction
+ * of 8-bit chunks is sometimes especially fast.
+ */
+#ifndef BYTBITS
+#define BYTBITS 8 /* bits in a byt */
+#endif
+#define BYTTAB (1<<BYTBITS) /* size of table with one entry per byt value */
+#define BYTMASK (BYTTAB-1) /* bit mask for byt */
+#define NBYTS ((CHRBITS+BYTBITS-1)/BYTBITS)
+/* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */
+
+
+
+/*
+ * As soon as possible, we map chrs into equivalence classes -- "colors" --
+ * which are of much more manageable number.
+ */
+typedef short color; /* colors of characters */
+typedef int pcolor; /* what color promotes to */
+#define COLORLESS (-1) /* impossible color */
+#define WHITE 0 /* default color, parent of all others */
+
+
+
+/*
+ * A colormap is a tree -- more precisely, a DAG -- indexed at each level
+ * by a byt of the chr, to map the chr to a color efficiently. Because
+ * lower sections of the tree can be shared, it can exploit the usual
+ * sparseness of such a mapping table. The tree is always NBYTS levels
+ * deep (in the past it was shallower during construction but was "filled"
+ * to full depth at the end of that); areas that are unaltered as yet point
+ * to "fill blocks" which are entirely WHITE in color.
+ */
+
+/* the tree itself */
+struct colors {
+ color ccolor[BYTTAB];
+};
+struct ptrs {
+ union tree *pptr[BYTTAB];
+};
+union tree {
+ struct colors colors;
+ struct ptrs ptrs;
+};
+#define tcolor colors.ccolor
+#define tptr ptrs.pptr
+
+/* internal per-color structure for the color machinery */
+struct colordesc {
+ uchr nchrs; /* number of chars of this color */
+ color sub; /* open subcolor (if any); free chain ptr */
+# define NOSUB COLORLESS
+ struct arc *arcs; /* color chain */
+ int flags;
+# define FREECOL 01 /* currently free */
+# define PSEUDO 02 /* pseudocolor, no real chars */
+# define UNUSEDCOLOR(cd) ((cd)->flags&FREECOL)
+ union tree *block; /* block of solid color, if any */
+};
+
+/* the color map itself */
+struct colormap {
+ int magic;
+# define CMMAGIC 0x876
+ struct vars *v; /* for compile error reporting */
+ size_t ncds; /* number of colordescs */
+ size_t max; /* highest in use */
+ color free; /* beginning of free chain (if non-0) */
+ struct colordesc *cd;
+# define CDEND(cm) (&(cm)->cd[(cm)->max + 1])
+# define NINLINECDS ((size_t)10)
+ struct colordesc cdspace[NINLINECDS];
+ union tree tree[NBYTS]; /* tree top, plus fill blocks */
+};
+
+/* optimization magic to do fast chr->color mapping */
+#define B0(c) ((c) & BYTMASK)
+#define B1(c) (((c)>>BYTBITS) & BYTMASK)
+#define B2(c) (((c)>>(2*BYTBITS)) & BYTMASK)
+#define B3(c) (((c)>>(3*BYTBITS)) & BYTMASK)
+#if NBYTS == 1
+#define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)])
+#endif
+/* beware, for NBYTS>1, GETCOLOR() is unsafe -- 2nd arg used repeatedly */
+#if NBYTS == 2
+#define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)])
+#endif
+#if NBYTS == 4
+#define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)])
+#endif
+
+
+
+/*
+ * Interface definitions for locale-interface functions in locale.c.
+ * Multi-character collating elements (MCCEs) cause most of the trouble.
+ */
+struct cvec {
+ int nchrs; /* number of chrs */
+ int chrspace; /* number of chrs possible */
+ chr *chrs; /* pointer to vector of chrs */
+ int nranges; /* number of ranges (chr pairs) */
+ int rangespace; /* number of chrs possible */
+ chr *ranges; /* pointer to vector of chr pairs */
+ int nmcces; /* number of MCCEs */
+ int mccespace; /* number of MCCEs possible */
+ int nmccechrs; /* number of chrs used for MCCEs */
+ chr *mcces[1]; /* pointers to 0-terminated MCCEs */
+ /* and both batches of chrs are on the end */
+};
+
+/* caution: this value cannot be changed easily */
+#define MAXMCCE 2 /* length of longest MCCE */
+
+
+
+/*
+ * definitions for NFA internal representation
+ *
+ * Having a "from" pointer within each arc may seem redundant, but it
+ * saves a lot of hassle.
+ */
+struct state;
+
+struct arc {
+ int type;
+# define ARCFREE '\0'
+ color co;
+ struct state *from; /* where it's from (and contained within) */
+ struct state *to; /* where it's to */
+ struct arc *outchain; /* *from's outs chain or free chain */
+# define freechain outchain
+ struct arc *inchain; /* *to's ins chain */
+ struct arc *colorchain; /* color's arc chain */
+};
+
+struct arcbatch { /* for bulk allocation of arcs */
+ struct arcbatch *next;
+# define ABSIZE 10
+ struct arc a[ABSIZE];
+};
+
+struct state {
+ int no;
+# define FREESTATE (-1)
+ char flag; /* marks special states */
+ int nins; /* number of inarcs */
+ struct arc *ins; /* chain of inarcs */
+ int nouts; /* number of outarcs */
+ struct arc *outs; /* chain of outarcs */
+ struct arc *free; /* chain of free arcs */
+ struct state *tmp; /* temporary for traversal algorithms */
+ struct state *next; /* chain for traversing all */
+ struct state *prev; /* back chain */
+ struct arcbatch oas; /* first arcbatch, avoid malloc in easy case */
+ int noas; /* number of arcs used in first arcbatch */
+};
+
+struct nfa {
+ struct state *pre; /* pre-initial state */
+ struct state *init; /* initial state */
+ struct state *final; /* final state */
+ struct state *post; /* post-final state */
+ int nstates; /* for numbering states */
+ struct state *states; /* state-chain header */
+ struct state *slast; /* tail of the chain */
+ struct state *free; /* free list */
+ struct colormap *cm; /* the color map */
+ color bos[2]; /* colors, if any, assigned to BOS and BOL */
+ color eos[2]; /* colors, if any, assigned to EOS and EOL */
+ struct vars *v; /* simplifies compile error reporting */
+ struct nfa *parent; /* parent NFA, if any */
+};
+
+
+
+/*
+ * definitions for compacted NFA
+ */
+struct carc {
+ color co; /* COLORLESS is list terminator */
+ int to; /* state number */
+};
+
+struct cnfa {
+ int nstates; /* number of states */
+ int ncolors; /* number of colors */
+ int flags;
+# define HASLACONS 01 /* uses lookahead constraints */
+ int pre; /* setup state number */
+ int post; /* teardown state number */
+ color bos[2]; /* colors, if any, assigned to BOS and BOL */
+ color eos[2]; /* colors, if any, assigned to EOS and EOL */
+ struct carc **states; /* vector of pointers to outarc lists */
+ struct carc *arcs; /* the area for the lists */
+};
+#define ZAPCNFA(cnfa) ((cnfa).nstates = 0)
+#define NULLCNFA(cnfa) ((cnfa).nstates == 0)
+
+
+
+/*
+ * subexpression tree
+ */
+struct subre {
+ char op; /* '|', '.' (concat), 'b' (backref), '(', '=' */
+ char flags;
+# define LONGER 01 /* prefers longer match */
+# define SHORTER 02 /* prefers shorter match */
+# define MIXED 04 /* mixed preference below */
+# define CAP 010 /* capturing parens below */
+# define BACKR 020 /* back reference below */
+# define INUSE 0100 /* in use in final tree */
+# define LOCAL 03 /* bits which may not propagate up */
+# define LMIX(f) ((f)<<2) /* LONGER -> MIXED */
+# define SMIX(f) ((f)<<1) /* SHORTER -> MIXED */
+# define UP(f) (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED))
+# define MESSY(f) ((f)&(MIXED|CAP|BACKR))
+# define PREF(f) ((f)&LOCAL)
+# define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2))
+# define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2))
+ short retry; /* index into retry memory */
+ int subno; /* subexpression number (for 'b' and '(') */
+ short min; /* min repetitions, for backref only */
+ short max; /* max repetitions, for backref only */
+ struct subre *left; /* left child, if any (also freelist chain) */
+ struct subre *right; /* right child, if any */
+ struct state *begin; /* outarcs from here... */
+ struct state *end; /* ...ending in inarcs here */
+ struct cnfa cnfa; /* compacted NFA, if any */
+ struct subre *chain; /* for bookkeeping and error cleanup */
+};
+
+
+
+/*
+ * table of function pointers for generic manipulation functions
+ * A regex_t's re_fns points to one of these.
+ */
+struct fns {
+ void FUNCPTR(free, (regex_t *));
+};
+
+
+
+/*
+ * the insides of a regex_t, hidden behind a void *
+ */
+struct guts {
+ int magic;
+# define GUTSMAGIC 0xfed9
+ int cflags; /* copy of compile flags */
+ long info; /* copy of re_info */
+ size_t nsub; /* copy of re_nsub */
+ struct subre *tree;
+ struct cnfa search; /* for fast preliminary search */
+ int ntree;
+ struct colormap cmap;
+ int FUNCPTR(compare, (const chr *, const chr *, size_t));
+ struct subre *lacons; /* lookahead-constraint vector */
+ int nlacons; /* size of lacons */
+};
diff --git a/src/include/regex/utils.h b/src/include/regex/utils.h
deleted file mode 100644
index 5831122f7c..0000000000
--- a/src/include/regex/utils.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- * Copyright (c) 1992, 1993, 1994
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)utils.h 8.3 (Berkeley) 3/20/94
- */
-
-#ifndef _REGEX_UTILS_H
-#define _REGEX_UTILS_H
-
-#include <limits.h>
-
-/* utility definitions */
-#define DUPMAX 100000000 /* xxx is this right? */
-#define INFINITY (DUPMAX + 1)
-
-#define NC (SHRT_MAX - SHRT_MIN + 1)
-
-typedef unsigned char uch;
-
-/* switch off assertions (if not already off) if no REDEBUG */
-#ifndef REDEBUG
-#ifndef NDEBUG
-#define NDEBUG /* no assertions please */
-#endif
-#endif
-
-#endif /* _REGEX_UTILS_H */