diff options
Diffstat (limited to 'src/include/regex')
-rw-r--r-- | src/include/regex/cclass.h | 99 | ||||
-rw-r--r-- | src/include/regex/cname.h | 336 | ||||
-rw-r--r-- | src/include/regex/regcustom.h | 64 | ||||
-rw-r--r-- | src/include/regex/regerrs.h | 22 | ||||
-rw-r--r-- | src/include/regex/regex.h | 251 | ||||
-rw-r--r-- | src/include/regex/regex2.h | 174 | ||||
-rw-r--r-- | src/include/regex/regguts.h | 393 | ||||
-rw-r--r-- | src/include/regex/utils.h | 60 |
8 files changed, 632 insertions, 767 deletions
diff --git a/src/include/regex/cclass.h b/src/include/regex/cclass.h deleted file mode 100644 index 8b13c12583..0000000000 --- a/src/include/regex/cclass.h +++ /dev/null @@ -1,99 +0,0 @@ -/*- - * Copyright (c) 1992, 1993, 1994 Henry Spencer. - * Copyright (c) 1992, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Henry Spencer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)cclass.h 8.3 (Berkeley) 3/20/94 - */ - -/* character-class table */ -static struct cclass -{ - char *name; - char *chars; - char *multis; -} cclasses[] = - -{ - { - "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789", "" - }, - { - "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", - "" - }, - { - "blank", " \t", "" - }, - { - "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ -\25\26\27\30\31\32\33\34\35\36\37\177", "" - }, - { - "digit", "0123456789", "" - }, - { - "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - "" - }, - { - "lower", "abcdefghijklmnopqrstuvwxyz", - "" - }, - { - "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", - "" - }, - { - "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - "" - }, - { - "space", "\t\n\v\f\r ", "" - }, - { - "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", - "" - }, - { - "xdigit", "0123456789ABCDEFabcdef", - "" - }, - { - NULL, NULL, "" - } -}; diff --git a/src/include/regex/cname.h b/src/include/regex/cname.h deleted file mode 100644 index bff408e4f0..0000000000 --- a/src/include/regex/cname.h +++ /dev/null @@ -1,336 +0,0 @@ -/*- - * Copyright (c) 1992, 1993, 1994 Henry Spencer. - * Copyright (c) 1992, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Henry Spencer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)cname.h 8.3 (Berkeley) 3/20/94 - */ - -/* character-name table */ -static struct cname -{ - char *name; - char code; -} cnames[] = - -{ - { - "NUL", '\0' - }, - { - "SOH", '\001' - }, - { - "STX", '\002' - }, - { - "ETX", '\003' - }, - { - "EOT", '\004' - }, - { - "ENQ", '\005' - }, - { - "ACK", '\006' - }, - { - "BEL", '\007' - }, - { - "alert", '\007' - }, - { - "BS", '\010' - }, - { - "backspace", '\b' - }, - { - "HT", '\011' - }, - { - "tab", '\t' - }, - { - "LF", '\012' - }, - { - "newline", '\n' - }, - { - "VT", '\013' - }, - { - "vertical-tab", '\v' - }, - { - "FF", '\014' - }, - { - "form-feed", '\f' - }, - { - "CR", '\015' - }, - { - "carriage-return", '\r' - }, - { - "SO", '\016' - }, - { - "SI", '\017' - }, - { - "DLE", '\020' - }, - { - "DC1", '\021' - }, - { - "DC2", '\022' - }, - { - "DC3", '\023' - }, - { - "DC4", '\024' - }, - { - "NAK", '\025' - }, - { - "SYN", '\026' - }, - { - "ETB", '\027' - }, - { - "CAN", '\030' - }, - { - "EM", '\031' - }, - { - "SUB", '\032' - }, - { - "ESC", '\033' - }, - { - "IS4", '\034' - }, - { - "FS", '\034' - }, - { - "IS3", '\035' - }, - { - "GS", '\035' - }, - { - "IS2", '\036' - }, - { - "RS", '\036' - }, - { - "IS1", '\037' - }, - { - "US", '\037' - }, - { - "space", ' ' - }, - { - "exclamation-mark", '!' - }, - { - "quotation-mark", '"' - }, - { - "number-sign", '#' - }, - { - "dollar-sign", '$' - }, - { - "percent-sign", '%' - }, - { - "ampersand", '&' - }, - { - "apostrophe", '\'' - }, - { - "left-parenthesis", '(' - }, - { - "right-parenthesis", ')' - }, - { - "asterisk", '*' - }, - { - "plus-sign", '+' - }, - { - "comma", ',' - }, - { - "hyphen", '-' - }, - { - "hyphen-minus", '-' - }, - { - "period", '.' - }, - { - "full-stop", '.' - }, - { - "slash", '/' - }, - { - "solidus", '/' - }, - { - "zero", '0' - }, - { - "one", '1' - }, - { - "two", '2' - }, - { - "three", '3' - }, - { - "four", '4' - }, - { - "five", '5' - }, - { - "six", '6' - }, - { - "seven", '7' - }, - { - "eight", '8' - }, - { - "nine", '9' - }, - { - "colon", ':' - }, - { - "semicolon", ';' - }, - { - "less-than-sign", '<' - }, - { - "equals-sign", '=' - }, - { - "greater-than-sign", '>' - }, - { - "question-mark", '?' - }, - { - "commercial-at", '@' - }, - { - "left-square-bracket", '[' - }, - { - "backslash", '\\' - }, - { - "reverse-solidus", '\\' - }, - { - "right-square-bracket", ']' - }, - { - "circumflex", '^' - }, - { - "circumflex-accent", '^' - }, - { - "underscore", '_' - }, - { - "low-line", '_' - }, - { - "grave-accent", '`' - }, - { - "left-brace", '{' - }, - { - "left-curly-bracket", '{' - }, - { - "vertical-line", '|' - }, - { - "right-brace", '}' - }, - { - "right-curly-bracket", '}' - }, - { - "tilde", '~' - }, - { - "DEL", '\177' - }, - { - NULL, 0 - } -}; diff --git a/src/include/regex/regcustom.h b/src/include/regex/regcustom.h new file mode 100644 index 0000000000..ad6e86dcce --- /dev/null +++ b/src/include/regex/regcustom.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * I'd appreciate being given credit for this package in the documentation + * of software which uses it, but that is not a requirement. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $Id$ + */ + +/* headers if any */ +#include "postgres.h" + +#include <ctype.h> +#include <limits.h> + +#include "mb/pg_wchar.h" + + +/* overrides for regguts.h definitions, if any */ +#define FUNCPTR(name, args) (*name) args +#define MALLOC(n) malloc(n) +#define FREE(p) free(VS(p)) +#define REALLOC(p,n) realloc(VS(p),n) + +/* internal character type and related */ +typedef pg_wchar chr; /* the type itself */ +typedef unsigned uchr; /* unsigned type that will hold a chr */ +typedef int celt; /* type to hold chr, MCCE number, or NOCELT */ +#define NOCELT (-1) /* celt value which is not valid chr or MCCE */ +#define CHR(c) ((unsigned char) (c)) /* turn char literal into chr literal */ +#define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */ +#define CHRBITS 32 /* bits in a chr; must not use sizeof */ +#define CHR_MIN 0x00000000 /* smallest and largest chr; the value */ +#define CHR_MAX 0xfffffffe /* CHR_MAX-CHR_MIN+1 should fit in uchr */ + +/* functions operating on chr */ +#define iscalnum(x) pg_isalnum(x) +#define iscalpha(x) pg_isalpha(x) +#define iscdigit(x) pg_isdigit(x) +#define iscspace(x) pg_isspace(x) + +/* and pick up the standard header */ +#include "regex.h" diff --git a/src/include/regex/regerrs.h b/src/include/regex/regerrs.h new file mode 100644 index 0000000000..3515ee6ad5 --- /dev/null +++ b/src/include/regex/regerrs.h @@ -0,0 +1,22 @@ +/* + * $Id$ + */ + +{ REG_OKAY, "REG_OKAY", "no errors detected" }, +{ REG_NOMATCH, "REG_NOMATCH", "failed to match" }, +{ REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.8)" }, +{ REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" }, +{ REG_ECTYPE, "REG_ECTYPE", "invalid character class" }, +{ REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence" }, +{ REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" }, +{ REG_EBRACK, "REG_EBRACK", "brackets [] not balanced" }, +{ REG_EPAREN, "REG_EPAREN", "parentheses () not balanced" }, +{ REG_EBRACE, "REG_EBRACE", "braces {} not balanced" }, +{ REG_BADBR, "REG_BADBR", "invalid repetition count(s)" }, +{ REG_ERANGE, "REG_ERANGE", "invalid character range" }, +{ REG_ESPACE, "REG_ESPACE", "out of memory" }, +{ REG_BADRPT, "REG_BADRPT", "quantifier operand invalid" }, +{ REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" }, +{ REG_INVARG, "REG_INVARG", "invalid argument to regex function" }, +{ REG_MIXED, "REG_MIXED", "character widths of regex and string differ" }, +{ REG_BADOPT, "REG_BADOPT", "invalid embedded option" }, diff --git a/src/include/regex/regex.h b/src/include/regex/regex.h index e0889a1108..9c0d3b9703 100644 --- a/src/include/regex/regex.h +++ b/src/include/regex/regex.h @@ -1,110 +1,165 @@ -/*- - * Copyright (c) 1992 Henry Spencer. - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Henry Spencer of the University of Toronto. +#ifndef _REGEX_H_ +#define _REGEX_H_ /* never again */ +/* + * regular expressions * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. + * I'd appreciate being given credit for this package in the documentation + * of software which uses it, but that is not a requirement. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * @(#)regex.h 8.2 (Berkeley) 1/3/94 + * $Id$ */ -#ifndef _REGEX_H_ -#define _REGEX_H_ - -#include <sys/types.h> +/* + * Add your own defines, if needed, here. + */ #include "mb/pg_wchar.h" -/* types */ -typedef off_t regoff_t; +/* + * interface types etc. + */ -typedef struct -{ - int re_magic; - size_t re_nsub; /* number of parenthesized subexpressions */ - const pg_wchar *re_endp; /* end pointer for REG_PEND */ - struct re_guts *re_g; /* none of your business :-) */ - pg_wchar *patsave; /* me too :-) */ +/* + * regoff_t has to be large enough to hold either off_t or ssize_t, + * and must be signed; it's only a guess that long is suitable. + */ +typedef long regoff_t; + +/* + * other interface types + */ + +/* the biggie, a compiled RE (or rather, a front end to same) */ +typedef struct { + int re_magic; /* magic number */ + size_t re_nsub; /* number of subexpressions */ + long re_info; /* information about RE */ +# define REG_UBACKREF 000001 +# define REG_ULOOKAHEAD 000002 +# define REG_UBOUNDS 000004 +# define REG_UBRACES 000010 +# define REG_UBSALNUM 000020 +# define REG_UPBOTCH 000040 +# define REG_UBBS 000100 +# define REG_UNONPOSIX 000200 +# define REG_UUNSPEC 000400 +# define REG_UUNPORT 001000 +# define REG_ULOCALE 002000 +# define REG_UEMPTYMATCH 004000 +# define REG_UIMPOSSIBLE 010000 +# define REG_USHORTEST 020000 + int re_csize; /* sizeof(character) */ + char *re_endp; /* backward compatibility kludge */ + /* the rest is opaque pointers to hidden innards */ + char *re_guts; /* `char *' is more portable than `void *' */ + char *re_fns; } regex_t; -typedef struct -{ - regoff_t rm_so; /* start of match */ - regoff_t rm_eo; /* end of match */ +/* result reporting (may acquire more fields later) */ +typedef struct { + regoff_t rm_so; /* start of substring */ + regoff_t rm_eo; /* end of substring */ } regmatch_t; -/* regcomp() flags */ -#define REG_BASIC 0000 -#define REG_EXTENDED 0001 -#define REG_ICASE 0002 -#define REG_NOSUB 0004 -#define REG_NEWLINE 0010 -#define REG_NOSPEC 0020 -#define REG_PEND 0040 -#define REG_DUMP 0200 - -/* regerror() flags */ -#define REG_NOMATCH 1 -#define REG_BADPAT 2 -#define REG_ECOLLATE 3 -#define REG_ECTYPE 4 -#define REG_EESCAPE 5 -#define REG_ESUBREG 6 -#define REG_EBRACK 7 -#define REG_EPAREN 8 -#define REG_EBRACE 9 -#define REG_BADBR 10 -#define REG_ERANGE 11 -#define REG_ESPACE 12 -#define REG_BADRPT 13 -#define REG_EMPTY 14 -#define REG_ASSERT 15 -#define REG_INVARG 16 -#define REG_ATOI 255 /* convert name to number (!) */ -#define REG_ITOA 0400 /* convert number to name (!) */ - -/* regexec() flags */ -#define REG_NOTBOL 00001 -#define REG_NOTEOL 00002 -#define REG_STARTEND 00004 -#define REG_TRACE 00400 /* tracing of execution */ -#define REG_LARGE 01000 /* force large representation */ -#define REG_BACKR 02000 /* force use of backref code */ - -extern int pg_regcomp(regex_t *preg, const char *pattern, int cflags); -extern size_t pg_regerror(int errcode, const regex_t *preg, - char *errbuf, size_t errbuf_size); -extern int pg_regexec(const regex_t *preg, const char *string, - size_t nmatch, - regmatch_t *pmatch, int eflags); -extern void pg_regfree(regex_t *preg); - -#endif /* !_REGEX_H_ */ +/* supplementary control and reporting */ +typedef struct { + regmatch_t rm_extend; /* see REG_EXPECT */ +} rm_detail_t; + + + +/* + * regex compilation flags + */ +#define REG_BASIC 000000 /* BREs (convenience) */ +#define REG_EXTENDED 000001 /* EREs */ +#define REG_ADVF 000002 /* advanced features in EREs */ +#define REG_ADVANCED 000003 /* AREs (which are also EREs) */ +#define REG_QUOTE 000004 /* no special characters, none */ +#define REG_NOSPEC REG_QUOTE /* historical synonym */ +#define REG_ICASE 000010 /* ignore case */ +#define REG_NOSUB 000020 /* don't care about subexpressions */ +#define REG_EXPANDED 000040 /* expanded format, white space & comments */ +#define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */ +#define REG_NLANCH 000200 /* ^ matches after \n, $ before */ +#define REG_NEWLINE 000300 /* newlines are line terminators */ +#define REG_PEND 000400 /* ugh -- backward-compatibility hack */ +#define REG_EXPECT 001000 /* report details on partial/limited matches */ +#define REG_BOSONLY 002000 /* temporary kludge for BOS-only matches */ +#define REG_DUMP 004000 /* none of your business :-) */ +#define REG_FAKE 010000 /* none of your business :-) */ +#define REG_PROGRESS 020000 /* none of your business :-) */ + + + +/* + * regex execution flags + */ +#define REG_NOTBOL 0001 /* BOS is not BOL */ +#define REG_NOTEOL 0002 /* EOS is not EOL */ +#define REG_STARTEND 0004 /* backward compatibility kludge */ +#define REG_FTRACE 0010 /* none of your business */ +#define REG_MTRACE 0020 /* none of your business */ +#define REG_SMALL 0040 /* none of your business */ + + +/* + * error reporting + * Be careful if modifying the list of error codes -- the table used by + * regerror() is generated automatically from this file! + */ +#define REG_OKAY 0 /* no errors detected */ +#define REG_NOMATCH 1 /* failed to match */ +#define REG_BADPAT 2 /* invalid regexp */ +#define REG_ECOLLATE 3 /* invalid collating element */ +#define REG_ECTYPE 4 /* invalid character class */ +#define REG_EESCAPE 5 /* invalid escape \ sequence */ +#define REG_ESUBREG 6 /* invalid backreference number */ +#define REG_EBRACK 7 /* brackets [] not balanced */ +#define REG_EPAREN 8 /* parentheses () not balanced */ +#define REG_EBRACE 9 /* braces {} not balanced */ +#define REG_BADBR 10 /* invalid repetition count(s) */ +#define REG_ERANGE 11 /* invalid character range */ +#define REG_ESPACE 12 /* out of memory */ +#define REG_BADRPT 13 /* quantifier operand invalid */ +#define REG_ASSERT 15 /* "can't happen" -- you found a bug */ +#define REG_INVARG 16 /* invalid argument to regex function */ +#define REG_MIXED 17 /* character widths of regex and string differ */ +#define REG_BADOPT 18 /* invalid embedded option */ +/* two specials for debugging and testing */ +#define REG_ATOI 101 /* convert error-code name to number */ +#define REG_ITOA 102 /* convert error-code number to name */ + + + +/* + * the prototypes for exported functions + */ +extern int pg_regcomp(regex_t *, const pg_wchar *, size_t, int); +extern int pg_regexec(regex_t *, const pg_wchar *, size_t, rm_detail_t *, size_t, regmatch_t [], int); +extern void pg_regfree(regex_t *); +extern size_t pg_regerror(int, const regex_t *, char *, size_t); + +#endif /* _REGEX_H_ */ diff --git a/src/include/regex/regex2.h b/src/include/regex/regex2.h deleted file mode 100644 index 5ceed7fe9c..0000000000 --- a/src/include/regex/regex2.h +++ /dev/null @@ -1,174 +0,0 @@ -/*- - * Copyright (c) 1992, 1993, 1994 Henry Spencer. - * Copyright (c) 1992, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Henry Spencer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)regex2.h 8.4 (Berkeley) 3/20/94 - */ - -#include <limits.h> - -/* - * internals of regex_t - */ -#define MAGIC1 ((('r'^0200)<<8) | 'e') - -/* - * The internal representation is a *strip*, a sequence of - * operators ending with an endmarker. (Some terminology etc. is a - * historical relic of earlier versions which used multiple strips.) - * Certain oddities in the representation are there to permit running - * the machinery backwards; in particular, any deviation from sequential - * flow must be marked at both its source and its destination. Some - * fine points: - * - * - OPLUS_ and O_PLUS are *inside* the loop they create. - * - OQUEST_ and O_QUEST are *outside* the bypass they create. - * - OCH_ and O_CH are *outside* the multi-way branch they create, while - * OOR1 and OOR2 are respectively the end and the beginning of one of - * the branches. Note that there is an implicit OOR2 following OCH_ - * and an implicit OOR1 preceding O_CH. - * - * In state representations, an operator's bit is on to signify a state - * immediately *preceding* "execution" of that operator. - */ -typedef unsigned long sop; /* strip operator */ -typedef long sopno; - -#define OPRMASK ((sop) 0xf8000000) -#define OPDMASK ((sop) 0x07ffffff) -#define OPSHIFT ((unsigned)27) -#define OP(n) ((n)&OPRMASK) -#define OPND(n) ((n)&OPDMASK) -#define SOP(op, opnd) ((op)|(opnd)) -/* operators meaning operand */ -/* (back, fwd are offsets) */ -#define OEND ((size_t)1<<OPSHIFT) /* endmarker - */ -#define OCHAR ((size_t)2<<OPSHIFT) /* character unsigned char */ -#define OBOL ((size_t)3<<OPSHIFT) /* left anchor - */ -#define OEOL ((size_t)4<<OPSHIFT) /* right anchor - */ -#define OANY ((size_t)5<<OPSHIFT) /* . - */ -#define OANYOF ((size_t)6<<OPSHIFT) /* [...] set number */ -#define OBACK_ ((size_t)7<<OPSHIFT) /* begin \d paren number */ -#define O_BACK ((size_t)8<<OPSHIFT) /* end \d paren number */ -#define OPLUS_ ((size_t)9<<OPSHIFT) /* + prefix fwd to suffix */ -#define O_PLUS ((size_t)10<<OPSHIFT) /* + suffix back to prefix */ -#define OQUEST_ ((size_t)11<<OPSHIFT) /* ? prefix fwd to suffix */ -#define O_QUEST ((size_t)12<<OPSHIFT) /* ? suffix back to prefix */ -#define OLPAREN ((size_t)13<<OPSHIFT) /* ( fwd to ) */ -#define ORPAREN ((size_t)14<<OPSHIFT) /* ) back to ( */ -#define OCH_ ((size_t)15<<OPSHIFT) /* begin choice fwd to OOR2 */ -#define OOR1 ((size_t)16<<OPSHIFT) /* | pt. 1 back to OOR1 or - * OCH_ */ -#define OOR2 ((size_t)17<<OPSHIFT) /* | pt. 2 fwd to OOR2 or - * O_CH */ -#define O_CH ((size_t)18<<OPSHIFT) /* end choice back to OOR1 */ -#define OBOW ((size_t)19<<OPSHIFT) /* begin word - */ -#define OEOW ((size_t)20<<OPSHIFT) /* end word - */ - -/* - * Structure for [] character-set representation. Character sets are - * done as bit vectors, grouped 8 to a byte vector for compactness. - * The individual set therefore has both a pointer to the byte vector - * and a mask to pick out the relevant bit of each byte. A hash code - * simplifies testing whether two sets could be identical. - * - * This will get trickier for multicharacter collating elements. As - * preliminary hooks for dealing with such things, we also carry along - * a string of multi-character elements, and decide the size of the - * vectors at run time. - */ -typedef struct -{ - uch *ptr; /* -> uch [csetsize] */ - uch mask; /* bit within array */ - pg_wchar hash; /* hash code */ - unsigned int lc; /* leading character (character-set) */ - size_t smultis; - char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */ -} cset; - -/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */ -#define CHlc(c) (((unsigned)(c)&0xff0000)>>16) -#define CHadd(cs, c) ((cs)->ptr[(unsigned)(c)&0xffff] |= (cs)->mask, (cs)->hash += (unsigned)(c)&0xffff,\ - (cs)->lc = CHlc(c)) -#define CHsub(cs, c) ((cs)->ptr[(unsigned)(c)&0xffff] &= ~(cs)->mask, (cs)->hash -= (unsigned)(c)&0xffff) -#define CHIN(cs, c) ((cs)->ptr[(unsigned)(c)&0xffff] & (cs)->mask && \ - ((cs)->lc == CHlc(c))) -#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal - * fns */ -#define MCsub(p, cs, cp) mcsub(p, cs, cp) -#define MCin(p, cs, cp) mcin(p, cs, cp) - -/* stuff for character categories */ -typedef unsigned char cat_t; - -/* - * main compiled-expression structure - */ -struct re_guts -{ - int magic; -#define MAGIC2 ((('R'^0200)<<8)|'E') - sop *strip; /* malloced area for strip */ - int csetsize; /* number of bits in a cset vector */ - int ncsets; /* number of csets in use */ - cset *sets; /* -> cset [ncsets] */ - uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */ - int cflags; /* copy of regcomp() cflags argument */ - sopno nstates; /* = number of sops */ - sopno firststate; /* the initial OEND (normally 0) */ - sopno laststate; /* the final OEND */ - int iflags; /* internal flags */ -#define USEBOL 01 /* used ^ */ -#define USEEOL 02 /* used $ */ -#define BAD 04 /* something wrong */ - int nbol; /* number of ^ used */ - int neol; /* number of $ used */ - int ncategories; /* how many character categories */ - cat_t *categories; /* ->catspace[-CHAR_MIN] */ - pg_wchar *must; /* match must contain this string */ - int mlen; /* length of must */ - size_t nsub; /* copy of re_nsub */ - int backrefs; /* does it use back references? */ - sopno nplus; /* how deep does it nest +s? */ - /* catspace must be last */ - cat_t catspace[1]; /* actually [NC] */ -}; - -/* misc utilities */ -#define OUT (16777216+1) /* 16777216 == 2^24 == 3 bytes */ - -#define ISWORD(c) (((c) >= 0 && (c) <= UCHAR_MAX) && \ - (isalnum((unsigned char) (c)) || (c) == '_')) diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h new file mode 100644 index 0000000000..8f36d98a1f --- /dev/null +++ b/src/include/regex/regguts.h @@ -0,0 +1,393 @@ +/* + * Internal interface definitions, etc., for the reg package + * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * I'd appreciate being given credit for this package in the documentation + * of software which uses it, but that is not a requirement. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $Id$ + */ + + + +/* + * Environmental customization. It should not (I hope) be necessary to + * alter the file you are now reading -- regcustom.h should handle it all, + * given care here and elsewhere. + */ +#include "regcustom.h" + + + +/* + * Things that regcustom.h might override. + */ + +/* assertions */ +#ifndef assert +# ifndef REG_DEBUG +# define NDEBUG /* no assertions */ +# endif +#include <assert.h> +#endif + +/* voids */ +#ifndef DISCARD +#define DISCARD void /* for throwing values away */ +#endif +#ifndef VS +#define VS(x) ((void *)(x)) /* cast something to generic ptr */ +#endif + +/* function-pointer declarator */ +#ifndef FUNCPTR +#define FUNCPTR(name, args) (*name) args +#endif + +/* memory allocation */ +#ifndef MALLOC +#define MALLOC(n) malloc(n) +#endif +#ifndef REALLOC +#define REALLOC(p, n) realloc(VS(p), n) +#endif +#ifndef FREE +#define FREE(p) free(VS(p)) +#endif + +/* want size of a char in bits, and max value in bounded quantifiers */ +#ifndef CHAR_BIT +#include <limits.h> +#endif +#ifndef _POSIX2_RE_DUP_MAX +#define _POSIX2_RE_DUP_MAX 255 /* normally from <limits.h> */ +#endif + + + +/* + * misc + */ + +#define NOTREACHED 0 +#define xxx 1 + +#define DUPMAX _POSIX2_RE_DUP_MAX +#define INFINITY (DUPMAX+1) + +#define REMAGIC 0xfed7 /* magic number for main struct */ + + + +/* + * debugging facilities + */ +#ifdef REG_DEBUG +/* FDEBUG does finite-state tracing */ +#define FDEBUG(arglist) { if (v->eflags®_FTRACE) printf arglist; } +/* MDEBUG does higher-level tracing */ +#define MDEBUG(arglist) { if (v->eflags®_MTRACE) printf arglist; } +#else +#define FDEBUG(arglist) {} +#define MDEBUG(arglist) {} +#endif + + + +/* + * bitmap manipulation + */ +#define UBITS (CHAR_BIT * sizeof(unsigned)) +#define BSET(uv, sn) ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS)) +#define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS))) + + + +/* + * We dissect a chr into byts for colormap table indexing. Here we define + * a byt, which will be the same as a byte on most machines... The exact + * size of a byt is not critical, but about 8 bits is good, and extraction + * of 8-bit chunks is sometimes especially fast. + */ +#ifndef BYTBITS +#define BYTBITS 8 /* bits in a byt */ +#endif +#define BYTTAB (1<<BYTBITS) /* size of table with one entry per byt value */ +#define BYTMASK (BYTTAB-1) /* bit mask for byt */ +#define NBYTS ((CHRBITS+BYTBITS-1)/BYTBITS) +/* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */ + + + +/* + * As soon as possible, we map chrs into equivalence classes -- "colors" -- + * which are of much more manageable number. + */ +typedef short color; /* colors of characters */ +typedef int pcolor; /* what color promotes to */ +#define COLORLESS (-1) /* impossible color */ +#define WHITE 0 /* default color, parent of all others */ + + + +/* + * A colormap is a tree -- more precisely, a DAG -- indexed at each level + * by a byt of the chr, to map the chr to a color efficiently. Because + * lower sections of the tree can be shared, it can exploit the usual + * sparseness of such a mapping table. The tree is always NBYTS levels + * deep (in the past it was shallower during construction but was "filled" + * to full depth at the end of that); areas that are unaltered as yet point + * to "fill blocks" which are entirely WHITE in color. + */ + +/* the tree itself */ +struct colors { + color ccolor[BYTTAB]; +}; +struct ptrs { + union tree *pptr[BYTTAB]; +}; +union tree { + struct colors colors; + struct ptrs ptrs; +}; +#define tcolor colors.ccolor +#define tptr ptrs.pptr + +/* internal per-color structure for the color machinery */ +struct colordesc { + uchr nchrs; /* number of chars of this color */ + color sub; /* open subcolor (if any); free chain ptr */ +# define NOSUB COLORLESS + struct arc *arcs; /* color chain */ + int flags; +# define FREECOL 01 /* currently free */ +# define PSEUDO 02 /* pseudocolor, no real chars */ +# define UNUSEDCOLOR(cd) ((cd)->flags&FREECOL) + union tree *block; /* block of solid color, if any */ +}; + +/* the color map itself */ +struct colormap { + int magic; +# define CMMAGIC 0x876 + struct vars *v; /* for compile error reporting */ + size_t ncds; /* number of colordescs */ + size_t max; /* highest in use */ + color free; /* beginning of free chain (if non-0) */ + struct colordesc *cd; +# define CDEND(cm) (&(cm)->cd[(cm)->max + 1]) +# define NINLINECDS ((size_t)10) + struct colordesc cdspace[NINLINECDS]; + union tree tree[NBYTS]; /* tree top, plus fill blocks */ +}; + +/* optimization magic to do fast chr->color mapping */ +#define B0(c) ((c) & BYTMASK) +#define B1(c) (((c)>>BYTBITS) & BYTMASK) +#define B2(c) (((c)>>(2*BYTBITS)) & BYTMASK) +#define B3(c) (((c)>>(3*BYTBITS)) & BYTMASK) +#if NBYTS == 1 +#define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)]) +#endif +/* beware, for NBYTS>1, GETCOLOR() is unsafe -- 2nd arg used repeatedly */ +#if NBYTS == 2 +#define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)]) +#endif +#if NBYTS == 4 +#define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)]) +#endif + + + +/* + * Interface definitions for locale-interface functions in locale.c. + * Multi-character collating elements (MCCEs) cause most of the trouble. + */ +struct cvec { + int nchrs; /* number of chrs */ + int chrspace; /* number of chrs possible */ + chr *chrs; /* pointer to vector of chrs */ + int nranges; /* number of ranges (chr pairs) */ + int rangespace; /* number of chrs possible */ + chr *ranges; /* pointer to vector of chr pairs */ + int nmcces; /* number of MCCEs */ + int mccespace; /* number of MCCEs possible */ + int nmccechrs; /* number of chrs used for MCCEs */ + chr *mcces[1]; /* pointers to 0-terminated MCCEs */ + /* and both batches of chrs are on the end */ +}; + +/* caution: this value cannot be changed easily */ +#define MAXMCCE 2 /* length of longest MCCE */ + + + +/* + * definitions for NFA internal representation + * + * Having a "from" pointer within each arc may seem redundant, but it + * saves a lot of hassle. + */ +struct state; + +struct arc { + int type; +# define ARCFREE '\0' + color co; + struct state *from; /* where it's from (and contained within) */ + struct state *to; /* where it's to */ + struct arc *outchain; /* *from's outs chain or free chain */ +# define freechain outchain + struct arc *inchain; /* *to's ins chain */ + struct arc *colorchain; /* color's arc chain */ +}; + +struct arcbatch { /* for bulk allocation of arcs */ + struct arcbatch *next; +# define ABSIZE 10 + struct arc a[ABSIZE]; +}; + +struct state { + int no; +# define FREESTATE (-1) + char flag; /* marks special states */ + int nins; /* number of inarcs */ + struct arc *ins; /* chain of inarcs */ + int nouts; /* number of outarcs */ + struct arc *outs; /* chain of outarcs */ + struct arc *free; /* chain of free arcs */ + struct state *tmp; /* temporary for traversal algorithms */ + struct state *next; /* chain for traversing all */ + struct state *prev; /* back chain */ + struct arcbatch oas; /* first arcbatch, avoid malloc in easy case */ + int noas; /* number of arcs used in first arcbatch */ +}; + +struct nfa { + struct state *pre; /* pre-initial state */ + struct state *init; /* initial state */ + struct state *final; /* final state */ + struct state *post; /* post-final state */ + int nstates; /* for numbering states */ + struct state *states; /* state-chain header */ + struct state *slast; /* tail of the chain */ + struct state *free; /* free list */ + struct colormap *cm; /* the color map */ + color bos[2]; /* colors, if any, assigned to BOS and BOL */ + color eos[2]; /* colors, if any, assigned to EOS and EOL */ + struct vars *v; /* simplifies compile error reporting */ + struct nfa *parent; /* parent NFA, if any */ +}; + + + +/* + * definitions for compacted NFA + */ +struct carc { + color co; /* COLORLESS is list terminator */ + int to; /* state number */ +}; + +struct cnfa { + int nstates; /* number of states */ + int ncolors; /* number of colors */ + int flags; +# define HASLACONS 01 /* uses lookahead constraints */ + int pre; /* setup state number */ + int post; /* teardown state number */ + color bos[2]; /* colors, if any, assigned to BOS and BOL */ + color eos[2]; /* colors, if any, assigned to EOS and EOL */ + struct carc **states; /* vector of pointers to outarc lists */ + struct carc *arcs; /* the area for the lists */ +}; +#define ZAPCNFA(cnfa) ((cnfa).nstates = 0) +#define NULLCNFA(cnfa) ((cnfa).nstates == 0) + + + +/* + * subexpression tree + */ +struct subre { + char op; /* '|', '.' (concat), 'b' (backref), '(', '=' */ + char flags; +# define LONGER 01 /* prefers longer match */ +# define SHORTER 02 /* prefers shorter match */ +# define MIXED 04 /* mixed preference below */ +# define CAP 010 /* capturing parens below */ +# define BACKR 020 /* back reference below */ +# define INUSE 0100 /* in use in final tree */ +# define LOCAL 03 /* bits which may not propagate up */ +# define LMIX(f) ((f)<<2) /* LONGER -> MIXED */ +# define SMIX(f) ((f)<<1) /* SHORTER -> MIXED */ +# define UP(f) (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED)) +# define MESSY(f) ((f)&(MIXED|CAP|BACKR)) +# define PREF(f) ((f)&LOCAL) +# define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2)) +# define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2)) + short retry; /* index into retry memory */ + int subno; /* subexpression number (for 'b' and '(') */ + short min; /* min repetitions, for backref only */ + short max; /* max repetitions, for backref only */ + struct subre *left; /* left child, if any (also freelist chain) */ + struct subre *right; /* right child, if any */ + struct state *begin; /* outarcs from here... */ + struct state *end; /* ...ending in inarcs here */ + struct cnfa cnfa; /* compacted NFA, if any */ + struct subre *chain; /* for bookkeeping and error cleanup */ +}; + + + +/* + * table of function pointers for generic manipulation functions + * A regex_t's re_fns points to one of these. + */ +struct fns { + void FUNCPTR(free, (regex_t *)); +}; + + + +/* + * the insides of a regex_t, hidden behind a void * + */ +struct guts { + int magic; +# define GUTSMAGIC 0xfed9 + int cflags; /* copy of compile flags */ + long info; /* copy of re_info */ + size_t nsub; /* copy of re_nsub */ + struct subre *tree; + struct cnfa search; /* for fast preliminary search */ + int ntree; + struct colormap cmap; + int FUNCPTR(compare, (const chr *, const chr *, size_t)); + struct subre *lacons; /* lookahead-constraint vector */ + int nlacons; /* size of lacons */ +}; diff --git a/src/include/regex/utils.h b/src/include/regex/utils.h deleted file mode 100644 index 5831122f7c..0000000000 --- a/src/include/regex/utils.h +++ /dev/null @@ -1,60 +0,0 @@ -/*- - * Copyright (c) 1992, 1993, 1994 Henry Spencer. - * Copyright (c) 1992, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Henry Spencer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)utils.h 8.3 (Berkeley) 3/20/94 - */ - -#ifndef _REGEX_UTILS_H -#define _REGEX_UTILS_H - -#include <limits.h> - -/* utility definitions */ -#define DUPMAX 100000000 /* xxx is this right? */ -#define INFINITY (DUPMAX + 1) - -#define NC (SHRT_MAX - SHRT_MIN + 1) - -typedef unsigned char uch; - -/* switch off assertions (if not already off) if no REDEBUG */ -#ifndef REDEBUG -#ifndef NDEBUG -#define NDEBUG /* no assertions please */ -#endif -#endif - -#endif /* _REGEX_UTILS_H */ |