Menu

[b84aab]: / src / common / match.c  Maximize  Restore  History

Download this file

365 lines (314 with data), 9.3 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
// This file is part of SmallBASIC
//
// The regular expressions routines is based on match.c by J. Kercheval:
//
// This program is distributed under the terms of the GPL v2.0 or later
// Download the GNU Public License (GPL) from www.gnu.org
//
// Copyright(C) 2000 Nicholas Christopoulos
/*
Author: J. Kercheval
Created: Sat, 01/05/1991 22:21:49
J. Kercheval Wed, 02/20/1991 22:29:01 Released to Public Domain
J. Kercheval Fri, 02/22/1991 15:29:01 fix '\' bugs (two :( of them)
J. Kercheval Sun, 03/10/1991 19:31:29 add error return to RegMatche()
J. Kercheval Sun, 03/10/1991 20:11:11 add IsValidRegPattern code
J. Kercheval Sun, 03/10/1991 20:37:11 beef up main()
J. Kercheval Tue, 03/12/1991 22:25:10 Released as V1.1 to Public Domain
The file match.c coexists in the same directory with the string class.
*/
/**
* In the pattern string:
* `*' RegMatches any sequence of characters (zero or more)
* `?' RegMatches any character
* [SET] RegMatches any character in the specified set,
* [!SET] or [^SET] RegMatches any character not in the specified set.
*
* A set is composed of characters or ranges; a range looks like
* character hyphen character (as in 0-9 or A-Z). [0-9a-zA-Z_] is the
* minimal set of characters allowed in the [..] pattern construct.
* Other characters are allowed (ie. 8 bit characters) if your system
* will support them.
*
*
* To suppress the special syntactic significance of any of `[]*?!^-\',
* and RegMatch the character exactly, precede it with a `\'.
*/
#include "common/match.h"
#include "common/smbas.h"
#include "common/sberr.h"
#ifdef USE_PCRE
#include <pcre.h>
#define OVECCOUNT 30 /* should be a multiple of 3 */
#endif
int reg_match_after_star(const char *p, char *t) SEC(BIO);
int reg_match_jk(const char *p, char *t) SEC(BIO);
int reg_match_jk(const char *p, char *t) {
char range_start, range_end; /* start and end in range */
int invert; /* is this [..] or [!..] */
int member_match; /* have I matched the [..] construct? */
int loop; /* should I terminate? */
for (; *p; p++, t++) {
/*
* if this is the end of the text then this is the end of the reg_match
*/
if (*t == '\0')
return (*p == '*' && *++p == '\0') ? reg_match_valid : reg_match_abort;
/*
* determine and react to pattern type
*/
switch (*p) {
case '?': /* single any character RegMatch */
break;
case '*': /* multiple any character RegMatch */
return reg_match_after_star(p, t);
case '[': /* [..] construct, single member/exclusion *
* character RegMatch */
{
/*
* move to beginning of range
*/
p++;
/*
* check if this is a member reg_match or exclusion reg_match
*/
invert = 0; // false
if (*p == '!' || *p == '^') {
invert = -1; // true
p++;
}
/*
* if closing bracket here or at range start then we have a malformed
* pattern
*/
if (*p == ']')
return reg_match_bad_pattern;
member_match = 0; // false
loop = -1; // true
while (loop) { /* if end of construct then loop is done */
if (*p == ']') {
loop = 0; // false
continue;
}
/*
* RegMatching a '!', '^', '-', '\' or a ']'
*/
if (*p == '\\')
range_start = range_end = *++p;
else
range_start = range_end = *p;
/*
* if end of pattern then bad pattern (Missing ']')
*/
if (*p == '\0')
return reg_match_bad_pattern;
/*
* check for range bar
*/
if (*++p == '-') {
/*
* get the range end
*/
range_end = *++p;
/*
* if end of pattern or construct then bad pattern
*/
if (range_end == '\0' || range_end == ']')
return reg_match_bad_pattern;
/*
* special character range end
*/
if (range_end == '\\') {
range_end = *++p;
/*
* if end of text then we have a bad pattern
*/
if (!range_end)
return reg_match_bad_pattern;
}
/*
* move just beyond this range
*/
p++;
}
/*
* if the text character is in range then RegMatch found. make sure
* the range letters have the proper relationship to one another
* before comparison
*/
if (range_start < range_end) {
if (*t >= range_start && *t <= range_end) {
member_match = -1; // true
loop = 0; // false
}
} else {
if (*t >= range_end && *t <= range_start) {
member_match = -1; // true
loop = 0; // false
}
}
} // while ?
/*
* if there was a match in an exclusion set then no match
*/
/*
* if there was no match in a member set then no match
*/
if ((invert && member_match) || !(invert || member_match))
return reg_match_range_failure;
/*
* if this is not an exclusion then skip the rest of the [...]
* construct that already RegMatched.
*/
if (member_match) {
while (*p != ']') {
/*
* bad pattern (Missing ']')
*/
if (*p == '\0')
return reg_match_bad_pattern;
/*
* skip exact RegMatch
*/
if (*p == '\\') {
p++;
/*
* if end of text then we have a bad pattern
*/
if (*p == '\0')
return reg_match_bad_pattern;
}
/*
* move to next pattern char
*/
p++;
} // while
}
break;
}
case '\\': /* next character is quoted and must match *
* exactly */
/*
* move pattern pointer to quoted char and fall through
*/
p++;
/*
* if end of text then we have a bad pattern
*/
if (*p == '\0')
return reg_match_bad_pattern;
/*
* must match this character exactly
*/
default:
if (*p != *t)
return reg_match_literal_failure;
} // switch!
} // first for
/*
* if end of text not reached then the pattern fails
*/
if (*t)
return reg_match_premature_end;
return reg_match_valid;
}
/*
*/
#ifdef USE_PCRE
int reg_match_pcre(const char *p, char *t)
{
pcre *re;
const char *error;
int errofs;
re =
pcre_compile(p, (opt_usepcre == 2) ? PCRE_CASELESS : 0, &error, &errofs, NULL);
if (!re) {
rt_raise("REGULAR EXPRESSION SYNTAX ERROR (offset %d) -> %s", error, errofs);
return reg_match_bad_pattern;
}
else {
int rc;
int erroffset;
int ovector[OVECCOUNT];
rc = pcre_exec(re, NULL, t, strlen(t), 0, 0, ovector, OVECCOUNT);
if (rc >= 0)
return reg_match_valid;
}
return reg_match_literal_failure;
}
#endif
/*
*/
int reg_match(const char *p, char *t) {
#ifdef USE_PCRE
if (opt_usepcre)
return reg_match_pcre(p, t);
#endif
return reg_match_jk(p, t);
}
/*----------------------------------------------------------------------------
*
* recursively call RegMatche() with final segment of PATTERN and of TEXT.
*
----------------------------------------------------------------------------*/
int reg_match_after_star(const char *p, char *t) {
int RegMatch = 1; // unused code
int nextp;
/*
* pass over existing ? and * in pattern
*/
while (*p == '?' || *p == '*') {
/*
* take one char for each ? and +
*/
if (*p == '?') {
/*
* if end of text then no RegMatch
*/
if (!*t++)
return reg_match_abort;
}
/*
* move to next char in pattern
*/
p++;
}
/*
* if end of pattern we have RegMatched regardless of text left
*/
if (!*p)
return reg_match_valid;
/*
* get the next character to RegMatch which must be a literal or '['
*/
nextp = *p;
if (nextp == '\\') {
nextp = p[1];
/*
* if end of text then we have a bad pattern
*/
if (!nextp)
return reg_match_bad_pattern;
}
/*
* Continue until we run out of text or definite result seen
*/
do {
/*
* a precondition for RegMatching is that the next character in the pattern
* RegMatch the next character in the text or that the next pattern char is
* the beginning of a range. Increment text pointer as we go here
*/
if (nextp == *t || nextp == '[')
RegMatch = reg_match(p, t);
/*
* if the end of text is reached then no RegMatch
*/
if (!*t++)
RegMatch = reg_match_abort;
} while (RegMatch != reg_match_valid && RegMatch != reg_match_abort && RegMatch != reg_match_bad_pattern);
/*
* return result
*/
return RegMatch;
}
Want the latest updates on software, tech news, and AI?
Get latest updates about software, tech news, and AI from SourceForge directly in your inbox once a month.