[MKISOFS]
[reactos.git] / reactos / sdk / tools / mkisofs / schilytools / libschily / match.c
1 /* @(#)match.c 1.24 10/08/21 Copyright 1985, 1995-2010 J. Schilling */
2 #include <schily/standard.h>
3 #include <schily/patmatch.h>
4 /*
5 * Pattern matching functions
6 *
7 * Copyright (c) 1985, 1995-2010 J. Schilling
8 */
9 /*
10 * The contents of this file are subject to the terms of the
11 * Common Development and Distribution License, Version 1.0 only
12 * (the "License"). You may not use this file except in compliance
13 * with the License.
14 *
15 * See the file CDDL.Schily.txt in this distribution for details.
16 *
17 * When distributing Covered Code, include this CDDL HEADER in each
18 * file and include the License file CDDL.Schily.txt from this distribution.
19 */
20 /*
21 * The pattern matching functions below are based on the algorithm
22 * presented by Martin Richards in:
23 *
24 * "A Compact Function for Regular Expression Pattern Matching",
25 * Software-Practice and Experience, Vol. 9, 527-534 (1979)
26 *
27 * Several changes have been made to the original source which has been
28 * written in BCPL:
29 *
30 * '/' is replaced by '!' (to allow UNIX filenames)
31 * '(',')' are replaced by '{', '}'
32 * '\'' is replaced by '\\' (UNIX compatible quote)
33 *
34 * Character classes have been added to allow "[<character list>]"
35 * to be used.
36 * Start of line '^' and end of line '$' have been added.
37 */
38
39 #undef CHAR
40
41 #ifdef __LINE_MATCH
42 #ifdef __WIDE_CHAR
43 #define patmatch patwlmatch
44 #else
45 #define opatmatch opatlmatch
46 #define patmatch patlmatch
47 #endif
48 #endif
49
50 #ifdef __WIDE_CHAR
51 #ifndef __LINE_MATCH
52 #define patcompile patwcompile
53 #define patmatch patwmatch
54 #endif
55 #define CHAR wchar_t
56 #endif
57
58 #ifndef CHAR
59 typedef unsigned char Uchar;
60 #define CHAR Uchar
61 #endif
62
63 #define ENDSTATE (-1)
64
65 /*
66 * The Interpreter
67 */
68
69
70 /*
71 * put adds a new state to the active list
72 */
73 #define put(ret, state, sp, n) { \
74 register int *lstate = state; \
75 register int *lsp = sp; \
76 register int ln = n; \
77 \
78 while (lstate < lsp) { \
79 if (*lstate++ == ln) { \
80 ret = lsp; \
81 lsp = 0; \
82 break; \
83 } \
84 } \
85 if (lsp) { \
86 *lstate++ = ln; \
87 ret = lstate; \
88 } \
89 }
90
91
92 /*
93 * match a character in class
94 */
95 #define in_class(found, pat, c) { \
96 register const CHAR *lpat = pat; \
97 register int lc = c; \
98 int lo_bound; \
99 int hi_bound; \
100 \
101 found = FALSE; \
102 \
103 if (*lpat == NOT) { \
104 lpat++; \
105 found = TRUE; \
106 } \
107 while (*lpat != RCLASS) { \
108 if (*lpat == QUOTE) \
109 lpat++; \
110 lo_bound = *lpat++; \
111 if (*lpat == RANGE) { \
112 lpat++; \
113 if (*lpat == QUOTE) \
114 lpat++; \
115 hi_bound = *lpat++; \
116 } else { \
117 hi_bound = lo_bound; \
118 } \
119 if (lo_bound <= lc && lc <= hi_bound) { \
120 found = !found; \
121 break; \
122 } \
123 } \
124 }
125
126 /*
127 * opatmatch - the old external interpreter interface.
128 *
129 * Trys to match a string beginning at offset
130 * against the compiled pattern.
131 */
132 #ifndef __WIDE_CHAR
133 EXPORT CHAR
134 *opatmatch(pat, aux, str, soff, slen, alt)
135 const CHAR *pat;
136 const int *aux;
137 const CHAR *str;
138 int soff;
139 int slen;
140 int alt;
141 {
142 int state[MAXPAT];
143
144 return (patmatch(pat, aux, str, soff, slen, alt, state));
145 }
146 #endif
147
148 /*
149 * patmatch - the external interpreter interface.
150 *
151 * Trys to match a string beginning at offset
152 * against the compiled pattern.
153 */
154 EXPORT CHAR *
155 patmatch(pat, aux, str, soff, slen, alt, state)
156 const CHAR *pat;
157 const int *aux;
158 const CHAR *str;
159 int soff;
160 int slen;
161 int alt;
162 int state[];
163 {
164 register int *sp;
165 register int *n;
166 register int *i;
167 register int p;
168 register int q, s, k;
169 int c;
170 const CHAR *lastp = (CHAR *)NULL;
171
172 #ifdef __LINE_MATCH
173 for (; soff <= slen; soff++) {
174 #endif
175
176 sp = state;
177 put(sp, state, state, 0);
178 if (alt != ENDSTATE)
179 put(sp, state, sp, alt);
180
181 for (s = soff; ; s++) {
182 /*
183 * next char from input string
184 */
185 if (s >= slen)
186 c = 0;
187 else
188 c = str[s];
189 /*
190 * first complete the closure
191 */
192 for (n = state; n < sp; ) {
193 p = *n++; /* next state number */
194 if (p == ENDSTATE)
195 continue;
196 q = aux[p]; /* get next state for pat */
197 k = pat[p]; /* get next char from pat */
198 switch (k) {
199
200 case REP:
201 put(sp, state, sp, p+1);
202 /* FALLTHRU */
203 case NIL: /* NIL matches always */
204 case STAR:
205 put(sp, state, sp, q);
206 break;
207 case LBRACK: /* alternations */
208 case ALT:
209 put(sp, state, sp, p+1);
210 if (q != ENDSTATE)
211 put(sp, state, sp, q);
212 break;
213 case START:
214 if (s == 0)
215 put(sp, state, sp, q);
216 break;
217 case END:
218 if (c == '\0')
219 put(sp, state, sp, q);
220 break;
221 }
222 }
223
224 for (i = state; i < sp; ) {
225 if (*i++ == ENDSTATE) {
226 lastp = &str[s];
227 break;
228 }
229 }
230 if (c == 0)
231 return ((CHAR *)lastp);
232
233 /*
234 * now try to match next character
235 */
236 n = sp;
237 sp = state;
238 for (i = sp; i < n; ) {
239 p = *i++; /* next active state number */
240 if (p == ENDSTATE)
241 continue;
242 k = pat[p];
243 switch (k) {
244
245 case ALT:
246 case REP:
247 case NIL:
248 case LBRACK:
249 case START:
250 case END:
251 continue;
252 case LCLASS:
253 in_class(q, &pat[p+1], c);
254 if (!q)
255 continue;
256 break;
257 case STAR:
258 put(sp, state, sp, p);
259 continue;
260 case QUOTE:
261 k = pat[p+1];
262 default:
263 if (k != c)
264 continue;
265 /* FALLTHRU */
266 case ANY:
267 break;
268 }
269 put(sp, state, sp, aux[p]);
270 }
271 if (sp == state) { /* if no new states return */
272 #ifdef __LINE_MATCH
273
274 if (lastp || (soff == slen - 1))
275 return ((CHAR *)lastp);
276 else
277 break;
278 #else
279 return ((CHAR *)lastp);
280 #endif
281 }
282 }
283 #ifdef __LINE_MATCH
284 }
285 return ((CHAR *)lastp);
286 #endif
287 }
288
289
290 #ifndef __LINE_MATCH
291 /*
292 * The Compiler
293 */
294
295 typedef struct args {
296 const CHAR *pattern;
297 int *aux;
298 int patp;
299 int length;
300 CHAR Ch;
301 } arg_t;
302
303 LOCAL void nextitem __PR((arg_t *));
304 LOCAL int prim __PR((arg_t *));
305 LOCAL int expr __PR((arg_t *, int *));
306 LOCAL void setexits __PR((int *, int, int));
307 LOCAL int join __PR((int *, int, int));
308
309 /*
310 * 'read' the next character from pattern
311 */
312 #define rch(ap) \
313 { \
314 if (++(ap)->patp >= (ap)->length) \
315 (ap)->Ch = 0; \
316 else \
317 (ap)->Ch = (ap)->pattern[(ap)->patp]; \
318 }
319
320 /*
321 * get the next item from pattern
322 */
323 LOCAL void
324 nextitem(ap)
325 arg_t *ap;
326 {
327 if (ap->Ch == QUOTE)
328 rch(ap);
329 rch(ap);
330 }
331
332 /*
333 * parse a primary
334 */
335 LOCAL int
336 prim(ap)
337 arg_t *ap;
338 {
339 int a = ap->patp;
340 int op = ap->Ch;
341 int t;
342
343 nextitem(ap);
344 switch (op) {
345
346 case '\0':
347 case ALT:
348 case RBRACK:
349 return (ENDSTATE);
350 case LCLASS:
351 while (ap->Ch != RCLASS && ap->Ch != '\0')
352 nextitem(ap);
353 if (ap->Ch == '\0')
354 return (ENDSTATE);
355 nextitem(ap);
356 break;
357 case REP:
358 t = prim(ap);
359 if (t == ENDSTATE)
360 return (ENDSTATE);
361 setexits(ap->aux, t, a);
362 break;
363 case LBRACK:
364 a = expr(ap, &ap->aux[a]);
365 if (a == ENDSTATE || ap->Ch != RBRACK)
366 return (ENDSTATE);
367 nextitem(ap);
368 break;
369 }
370 return (a);
371 }
372
373 /*
374 * parse an expression (a sequence of primaries)
375 */
376 LOCAL int
377 expr(ap, altp)
378 arg_t *ap;
379 int *altp;
380 {
381 int exits = ENDSTATE;
382 int a;
383 int *aux = ap->aux;
384 CHAR Ch;
385
386 for (;;) {
387 a = prim(ap);
388 Ch = ap->Ch;
389 if (Ch == ALT || Ch == RBRACK || Ch == '\0') {
390 exits = join(aux, exits, a);
391 if (Ch != ALT)
392 return (exits);
393 *altp = ap->patp;
394 altp = &aux[ap->patp];
395 nextitem(ap);
396 } else
397 setexits(aux, a, ap->patp);
398 }
399 }
400
401 /*
402 * set all exits in a list to a specified value
403 */
404 LOCAL void
405 setexits(aux, list, val)
406 int *aux;
407 int list;
408 int val;
409 {
410 int a;
411
412 while (list != ENDSTATE) {
413 a = aux[list];
414 aux[list] = val;
415 list = a;
416 }
417 }
418
419 /*
420 * concatenate two lists
421 */
422 LOCAL int
423 join(aux, a, b)
424 int *aux;
425 int a;
426 int b;
427 {
428 int t;
429
430 if (a == ENDSTATE)
431 return (b);
432 t = a;
433 while (aux[t] != ENDSTATE)
434 t = aux[t];
435 aux[t] = b;
436 return (a);
437 }
438
439 /*
440 * patcompile - the external compiler interface.
441 *
442 * The pattern is compiled into the aux array.
443 * Return value on success, is the outermost alternate which is != 0.
444 * Error is indicated by return of 0.
445 */
446 EXPORT int
447 patcompile(pat, len, aux)
448 const CHAR *pat;
449 int len;
450 int *aux;
451 {
452 arg_t a;
453 int alt = ENDSTATE;
454 int i;
455
456 a.pattern = pat;
457 a.length = len;
458 a.aux = aux;
459 a.patp = -1;
460
461 for (i = 0; i < len; i++)
462 aux[i] = ENDSTATE;
463 rch(&a);
464 i = expr(&a, &alt);
465 if (i == ENDSTATE)
466 return (0);
467 setexits(aux, i, ENDSTATE);
468 return (alt);
469 }
470 #endif /* LMATCH */