[RTL]
[reactos.git] / reactos / tools / wmc / mcl.c
1 /*
2 * Wine Message Compiler lexical scanner
3 *
4 * Copyright 2000 Bertho A. Stultiens (BS)
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 */
20
21 #include "config.h"
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <ctype.h>
26 #include <assert.h>
27 #include <string.h>
28
29 #include "utils.h"
30 #include "wmc.h"
31 #include "lang.h"
32
33 #include "mcy.tab.h"
34
35 /*
36 * Keywords are case insensitive. All normal input is treated as
37 * being in codepage iso-8859-1 for ascii input files (unicode
38 * page 0) and as equivalent unicode if unicode input is selected.
39 * All normal input, which is not part of a message text, is
40 * enforced to be unicode page 0. Otherwise an error will be
41 * generated. The normal file data should only be ASCII because
42 * that is the basic definition of the grammar.
43 *
44 * Byteorder or unicode input is determined automatically by
45 * reading the first 8 bytes and checking them against unicode
46 * page 0 byteorder (hibyte must be 0).
47 * -- FIXME --
48 * Alternatively, the input is checked against a special byte
49 * sequence to identify the file.
50 * -- FIXME --
51 *
52 *
53 * Keywords:
54 * Codepages
55 * Facility
56 * FacilityNames
57 * LanguageNames
58 * MessageId
59 * MessageIdTypedef
60 * Severity
61 * SeverityNames
62 * SymbolicName
63 *
64 * Default added identifiers for classes:
65 * SeverityNames:
66 * Success = 0x0
67 * Informational = 0x1
68 * Warning = 0x2
69 * Error = 0x3
70 * FacilityNames:
71 * System = 0x0FF
72 * Application = 0xFFF
73 *
74 * The 'Codepages' keyword is a wmc extension.
75 */
76
77 static const WCHAR ustr_application[] = { 'A', 'p', 'p', 'l', 'i', 'c', 'a', 't', 'i', 'o', 'n', 0 };
78 static const WCHAR ustr_codepages[] = { 'C', 'o', 'd', 'e', 'p', 'a', 'g', 'e', 's', 0 };
79 static const WCHAR ustr_english[] = { 'E', 'n', 'g', 'l', 'i', 's', 'h', 0 };
80 static const WCHAR ustr_error[] = { 'E', 'r', 'r', 'o', 'r', 0 };
81 static const WCHAR ustr_facility[] = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 0 };
82 static const WCHAR ustr_facilitynames[] = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
83 static const WCHAR ustr_informational[] = { 'I', 'n', 'f', 'o', 'r', 'm', 'a', 't', 'i', 'o', 'n', 'a', 'l', 0 };
84 static const WCHAR ustr_language[] = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 0};
85 static const WCHAR ustr_languagenames[] = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 'N', 'a', 'm', 'e', 's', 0};
86 static const WCHAR ustr_messageid[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 0 };
87 static const WCHAR ustr_messageidtypedef[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 'T', 'y', 'p', 'e', 'd', 'e', 'f', 0 };
88 static const WCHAR ustr_outputbase[] = { 'O', 'u', 't', 'p', 'u', 't', 'B', 'a', 's', 'e', 0 };
89 static const WCHAR ustr_severity[] = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 0 };
90 static const WCHAR ustr_severitynames[] = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
91 static const WCHAR ustr_success[] = { 'S', 'u', 'c', 'c', 'e', 's', 's', 0 };
92 static const WCHAR ustr_symbolicname[] = { 'S', 'y', 'm', 'b', 'o', 'l', 'i', 'c', 'N', 'a', 'm', 'e', 0 };
93 static const WCHAR ustr_system[] = { 'S', 'y', 's', 't', 'e', 'm', 0 };
94 static const WCHAR ustr_warning[] = { 'W', 'a', 'r', 'n', 'i', 'n', 'g', 0 };
95 static const WCHAR ustr_msg00001[] = { 'm', 's', 'g', '0', '0', '0', '0', '1', 0 };
96 /*
97 * This table is to beat any form of "expression building" to check for
98 * correct filename characters. It is also used for ident checks.
99 * FIXME: use it more consistently.
100 */
101
102 #define CH_SHORTNAME 0x01
103 #define CH_LONGNAME 0x02
104 #define CH_IDENT 0x04
105 #define CH_NUMBER 0x08
106 /*#define CH_WILDCARD 0x10*/
107 /*#define CH_DOT 0x20*/
108 #define CH_PUNCT 0x40
109 #define CH_INVALID 0x80
110
111 static const char char_table[256] = {
112 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x00 - 0x07 */
113 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x08 - 0x0F */
114 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x10 - 0x17 */
115 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x18 - 0x1F */
116 0x80, 0x03, 0x80, 0x03, 0x03, 0x03, 0x03, 0x03, /* 0x20 - 0x27 " !"#$%&'" */
117 0x43, 0x43, 0x10, 0x80, 0x03, 0x03, 0x22, 0x80, /* 0x28 - 0x2F "()*+,-./" */
118 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, /* 0x30 - 0x37 "01234567" */
119 0x0b, 0x0b, 0xc0, 0x80, 0x80, 0x80, 0x80, 0x10, /* 0x38 - 0x3F "89:;<=>?" */
120 0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x40 - 0x47 "@ABCDEFG" */
121 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x48 - 0x4F "HIJKLMNO" */
122 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x50 - 0x57 "PQRSTUVW" */
123 0x07, 0x07, 0x07, 0x80, 0x80, 0x80, 0x80, 0x07, /* 0x58 - 0x5F "XYZ[\]^_" */
124 0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x60 - 0x67 "`abcdefg" */
125 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x68 - 0x6F "hijklmno" */
126 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x70 - 0x77 "pqrstuvw" */
127 0x07, 0x07, 0x07, 0x03, 0x80, 0x03, 0x03, 0x80, /* 0x78 - 0x7F "xyz{|}~ " */
128 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x80 - 0x87 */
129 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x88 - 0x8F */
130 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x90 - 0x97 */
131 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x98 - 0x9F */
132 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA0 - 0xA7 */
133 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA8 - 0xAF */
134 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB0 - 0xB7 */
135 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB8 - 0xBF */
136 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC0 - 0xC7 */
137 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC8 - 0xCF */
138 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD0 - 0xD7 */
139 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD8 - 0xDF */
140 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE0 - 0xE7 */
141 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE8 - 0xEF */
142 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xF0 - 0xF7 */
143 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x80, /* 0xF8 - 0xFF */
144 };
145
146 static int isisochar(int ch)
147 {
148 return !(ch & (~0xff));
149 }
150
151 static int codepage;
152 static const union cptable *codepage_def;
153
154 void set_codepage(int cp)
155 {
156 codepage = cp;
157 codepage_def = find_codepage(codepage);
158 if(!codepage_def)
159 xyyerror("Codepage %d not found; cannot process\n", codepage);
160 }
161
162 /*
163 * Input functions
164 */
165 static int nungetstack = 0;
166 static int allocungetstack = 0;
167 static char *ungetstack = NULL;
168 static int ninputbuffer = 0;
169 static WCHAR *inputbuffer = NULL;
170 static char *xlatebuffer = NULL;
171
172 #define INPUTBUFFER_SIZE 2048 /* Must be larger than 4 and approx. large enough to hold a line */
173
174 /*
175 * Fill the input buffer with *one* line of input.
176 * The line is '\n' terminated so that scanning
177 * messages with translation works as expected
178 * (otherwise we cannot pre-translate because the
179 * language is first known one line before the
180 * actual message).
181 */
182 static int fill_inputbuffer(void)
183 {
184 int n;
185 static const char err_fatalread[] = "Fatal: reading input failed";
186 static int endian = -1;
187
188 if(!inputbuffer)
189 {
190 inputbuffer = xmalloc(INPUTBUFFER_SIZE*sizeof(WCHAR));
191 xlatebuffer = xmalloc(INPUTBUFFER_SIZE);
192 }
193
194 try_again:
195 if(!unicodein)
196 {
197 char *cptr;
198 cptr = fgets(xlatebuffer, INPUTBUFFER_SIZE, yyin);
199 if(!cptr && ferror(yyin))
200 xyyerror(err_fatalread);
201 else if(!cptr)
202 return 0;
203 assert(codepage_def != NULL);
204 n = wine_cp_mbstowcs(codepage_def, 0, xlatebuffer, strlen(xlatebuffer)+1, inputbuffer, INPUTBUFFER_SIZE);
205 if(n < 0)
206 internal_error(__FILE__, __LINE__, "Could not translate to unicode (%d)\n", n);
207 if(n <= 1)
208 goto try_again; /* Should not happen */
209 n--; /* Strip added conversion '\0' from input length */
210 /*
211 * FIXME:
212 * Detect UTF-8 in the first time we read some bytes by
213 * checking the special sequence "FE..." or something like
214 * that. I need to check www.unicode.org for details.
215 */
216 }
217 else
218 {
219 if(endian == -1)
220 {
221 n = fread(inputbuffer, 1, 8, yyin);
222 if(n != 8)
223 {
224 if(!n && ferror(yyin))
225 xyyerror(err_fatalread);
226 else
227 xyyerror("Fatal: file to short to determine byteorder (should never happen)\n");
228 }
229 if(isisochar(inputbuffer[0]) &&
230 isisochar(inputbuffer[1]) &&
231 isisochar(inputbuffer[2]) &&
232 isisochar(inputbuffer[3]))
233 {
234 #ifdef WORDS_BIGENDIAN
235 endian = WMC_BO_BIG;
236 #else
237 endian = WMC_BO_LITTLE;
238 #endif
239 }
240 else if(isisochar(BYTESWAP_WORD(inputbuffer[0])) &&
241 isisochar(BYTESWAP_WORD(inputbuffer[1])) &&
242 isisochar(BYTESWAP_WORD(inputbuffer[2])) &&
243 isisochar(BYTESWAP_WORD(inputbuffer[3])))
244 {
245 #ifdef WORDS_BIGENDIAN
246 endian = WMC_BO_LITTLE;
247 #else
248 endian = WMC_BO_BIG;
249 #endif
250 }
251 else
252 xyyerror("Fatal: cannot determine file's byteorder\n");
253 /* FIXME:
254 * Determine the file-endian with the leader-bytes
255 * "FF FE..."; can't remember the exact sequence.
256 */
257 n /= 2;
258 #ifdef WORDS_BIGENDIAN
259 if(endian == WMC_BO_LITTLE)
260 #else
261 if(endian == WMC_BO_BIG)
262 #endif
263 {
264 inputbuffer[0] = BYTESWAP_WORD(inputbuffer[0]);
265 inputbuffer[1] = BYTESWAP_WORD(inputbuffer[1]);
266 inputbuffer[2] = BYTESWAP_WORD(inputbuffer[2]);
267 inputbuffer[3] = BYTESWAP_WORD(inputbuffer[3]);
268 }
269
270 }
271 else
272 {
273 int i;
274 n = 0;
275 for(i = 0; i < INPUTBUFFER_SIZE; i++)
276 {
277 int t;
278 t = fread(&inputbuffer[i], 2, 1, yyin);
279 if(!t && ferror(yyin))
280 xyyerror(err_fatalread);
281 else if(!t && n)
282 break;
283 n++;
284 #ifdef WORDS_BIGENDIAN
285 if(endian == WMC_BO_LITTLE)
286 #else
287 if(endian == WMC_BO_BIG)
288 #endif
289 {
290 if((inputbuffer[i] = BYTESWAP_WORD(inputbuffer[i])) == '\n')
291 break;
292 }
293 else
294 {
295 if(inputbuffer[i] == '\n')
296 break;
297 }
298 }
299 }
300
301 }
302
303 if(!n)
304 {
305 mcy_warning("Re-read line (input was or converted to zilch)\n");
306 goto try_again; /* Should not happen, but could be due to stdin reading and a signal */
307 }
308
309 ninputbuffer += n;
310 return 1;
311 }
312
313 static int get_unichar(void)
314 {
315 static WCHAR *b = NULL;
316 char_number++;
317
318 if(nungetstack)
319 return ungetstack[--nungetstack];
320
321 if(!ninputbuffer)
322 {
323 if(!fill_inputbuffer())
324 return EOF;
325 b = inputbuffer;
326 }
327
328 ninputbuffer--;
329 return (int)(*b++ & 0xffff);
330 }
331
332 static void unget_unichar(int ch)
333 {
334 if(ch == EOF)
335 return;
336
337 char_number--;
338
339 if(nungetstack == allocungetstack)
340 {
341 allocungetstack += 32;
342 ungetstack = xrealloc(ungetstack, allocungetstack * sizeof(*ungetstack));
343 }
344
345 ungetstack[nungetstack++] = (WCHAR)ch;
346 }
347
348
349 /*
350 * Normal character stack.
351 * Used for number scanning.
352 */
353 static int ncharstack = 0;
354 static int alloccharstack = 0;
355 static char *charstack = NULL;
356
357 static void empty_char_stack(void)
358 {
359 ncharstack = 0;
360 }
361
362 static void push_char(int ch)
363 {
364 if(ncharstack == alloccharstack)
365 {
366 alloccharstack += 32;
367 charstack = xrealloc(charstack, alloccharstack * sizeof(*charstack));
368 }
369 charstack[ncharstack++] = (char)ch;
370 }
371
372 static int tos_char_stack(void)
373 {
374 if(!ncharstack)
375 return 0;
376 else
377 return (int)(charstack[ncharstack-1] & 0xff);
378 }
379
380 static char *get_char_stack(void)
381 {
382 return charstack;
383 }
384
385 /*
386 * Unicode character stack.
387 * Used for general scanner.
388 */
389 static int nunicharstack = 0;
390 static int allocunicharstack = 0;
391 static WCHAR *unicharstack = NULL;
392
393 static void empty_unichar_stack(void)
394 {
395 nunicharstack = 0;
396 }
397
398 static void push_unichar(int ch)
399 {
400 if(nunicharstack == allocunicharstack)
401 {
402 allocunicharstack += 128;
403 unicharstack = xrealloc(unicharstack, allocunicharstack * sizeof(*unicharstack));
404 }
405 unicharstack[nunicharstack++] = (WCHAR)ch;
406 }
407
408 #if 0
409 static int tos_unichar_stack(void)
410 {
411 if(!nunicharstack)
412 return 0;
413 else
414 return (int)(unicharstack[nunicharstack-1] & 0xffff);
415 }
416 #endif
417
418 static WCHAR *get_unichar_stack(void)
419 {
420 return unicharstack;
421 }
422
423 /*
424 * Number scanner
425 *
426 * state | ch | next state
427 * ------+-----------------+--------------------------
428 * 0 | [0] | 1
429 * 0 | [1-9] | 4
430 * 0 | . | error (should never occur)
431 * 1 | [xX] | 2
432 * 1 | [0-7] | 3
433 * 1 | [89a-wyzA-WYZ_] | error invalid digit
434 * 1 | . | return 0
435 * 2 | [0-9a-fA-F] | 2
436 * 2 | [g-zG-Z_] | error invalid hex digit
437 * 2 | . | return (hex-number) if TOS != [xX] else error
438 * 3 | [0-7] | 3
439 * 3 | [89a-zA-Z_] | error invalid octal digit
440 * 3 | . | return (octal-number)
441 * 4 | [0-9] | 4
442 * 4 | [a-zA-Z_] | error invalid decimal digit
443 * 4 | . | return (decimal-number)
444 *
445 * All non-identifier characters [^a-zA-Z_0-9] terminate the scan
446 * and return the value. This is not entirely correct, but close
447 * enough (should check punctuators as trailing context, but the
448 * char_table is not adapted to that and it is questionable whether
449 * it is worth the trouble).
450 * All non-iso-8859-1 characters are an error.
451 */
452 static int scan_number(int ch)
453 {
454 int state = 0;
455 int base = 10;
456 empty_char_stack();
457
458 while(1)
459 {
460 if(!isisochar(ch))
461 xyyerror("Invalid digit\n");
462
463 switch(state)
464 {
465 case 0:
466 if(isdigit(ch))
467 {
468 push_char(ch);
469 if(ch == '0')
470 state = 1;
471 else
472 state = 4;
473 }
474 else
475 internal_error(__FILE__, __LINE__, "Non-digit in first number-scanner state\n");
476 break;
477 case 1:
478 if(ch == 'x' || ch == 'X')
479 {
480 push_char(ch);
481 state = 2;
482 }
483 else if(ch >= '0' && ch <= '7')
484 {
485 push_char(ch);
486 state = 3;
487 }
488 else if(isalpha(ch) || ch == '_')
489 xyyerror("Invalid number digit\n");
490 else
491 {
492 unget_unichar(ch);
493 mcy_lval.num = 0;
494 return tNUMBER;
495 }
496 break;
497 case 2:
498 if(isxdigit(ch))
499 push_char(ch);
500 else if(isalpha(ch) || ch == '_' || !isxdigit(tos_char_stack()))
501 xyyerror("Invalid hex digit\n");
502 else
503 {
504 base = 16;
505 goto finish;
506 }
507 break;
508 case 3:
509 if(ch >= '0' && ch <= '7')
510 push_char(ch);
511 else if(isalnum(ch) || ch == '_')
512 xyyerror("Invalid octal digit\n");
513 else
514 {
515 base = 8;
516 goto finish;
517 }
518 break;
519 case 4:
520 if(isdigit(ch))
521 push_char(ch);
522 else if(isalnum(ch) || ch == '_')
523 xyyerror("Invalid decimal digit\n");
524 else
525 {
526 base = 10;
527 goto finish;
528 }
529 break;
530 default:
531 internal_error(__FILE__, __LINE__, "Invalid state in number-scanner\n");
532 }
533 ch = get_unichar();
534 }
535 finish:
536 unget_unichar(ch);
537 push_char(0);
538 mcy_lval.num = strtoul(get_char_stack(), NULL, base);
539 return tNUMBER;
540 }
541
542 static void newline(void)
543 {
544 line_number++;
545 char_number = 1;
546 }
547
548 static int unisort(const void *p1, const void *p2)
549 {
550 return unistricmp(((const token_t *)p1)->name, ((const token_t *)p2)->name);
551 }
552
553 static token_t *tokentable = NULL;
554 static int ntokentable = 0;
555
556 token_t *lookup_token(const WCHAR *s)
557 {
558 token_t tok;
559
560 tok.name = s;
561 return (token_t *)bsearch(&tok, tokentable, ntokentable, sizeof(*tokentable), unisort);
562 }
563
564 void add_token(tok_e type, const WCHAR *name, int tok, int cp, const WCHAR *alias, int fix)
565 {
566 ntokentable++;
567 tokentable = xrealloc(tokentable, ntokentable * sizeof(*tokentable));
568 tokentable[ntokentable-1].type = type;
569 tokentable[ntokentable-1].name = name;
570 tokentable[ntokentable-1].token = tok;
571 tokentable[ntokentable-1].codepage = cp;
572 tokentable[ntokentable-1].alias = alias;
573 tokentable[ntokentable-1].fixed = fix;
574 qsort(tokentable, ntokentable, sizeof(*tokentable), unisort);
575 }
576
577 void get_tokentable(token_t **tab, int *len)
578 {
579 assert(tab != NULL);
580 assert(len != NULL);
581 *tab = tokentable;
582 *len = ntokentable;
583 }
584
585 /*
586 * The scanner
587 *
588 */
589 int mcy_lex(void)
590 {
591 static const WCHAR ustr_dot1[] = { '.', '\n', 0 };
592 static const WCHAR ustr_dot2[] = { '.', '\r', '\n', 0 };
593 static int isinit = 0;
594 int ch;
595
596 if(!isinit)
597 {
598 isinit++;
599 set_codepage(WMC_DEFAULT_CODEPAGE);
600 add_token(tok_keyword, ustr_codepages, tCODEPAGE, 0, NULL, 0);
601 add_token(tok_keyword, ustr_facility, tFACILITY, 0, NULL, 1);
602 add_token(tok_keyword, ustr_facilitynames, tFACNAMES, 0, NULL, 1);
603 add_token(tok_keyword, ustr_language, tLANGUAGE, 0, NULL, 1);
604 add_token(tok_keyword, ustr_languagenames, tLANNAMES, 0, NULL, 1);
605 add_token(tok_keyword, ustr_messageid, tMSGID, 0, NULL, 1);
606 add_token(tok_keyword, ustr_messageidtypedef, tTYPEDEF, 0, NULL, 1);
607 add_token(tok_keyword, ustr_outputbase, tBASE, 0, NULL, 1);
608 add_token(tok_keyword, ustr_severity, tSEVERITY, 0, NULL, 1);
609 add_token(tok_keyword, ustr_severitynames, tSEVNAMES, 0, NULL, 1);
610 add_token(tok_keyword, ustr_symbolicname, tSYMNAME, 0, NULL, 1);
611 add_token(tok_severity, ustr_error, 0x03, 0, NULL, 0);
612 add_token(tok_severity, ustr_warning, 0x02, 0, NULL, 0);
613 add_token(tok_severity, ustr_informational, 0x01, 0, NULL, 0);
614 add_token(tok_severity, ustr_success, 0x00, 0, NULL, 0);
615 add_token(tok_facility, ustr_application, 0xFFF, 0, NULL, 0);
616 add_token(tok_facility, ustr_system, 0x0FF, 0, NULL, 0);
617 add_token(tok_language, ustr_english, 0x409, 437, ustr_msg00001, 0);
618 }
619
620 empty_unichar_stack();
621
622 while(1)
623 {
624 if(want_line)
625 {
626 while((ch = get_unichar()) != '\n')
627 {
628 if(ch == EOF)
629 xyyerror("Unexpected EOF\n");
630 push_unichar(ch);
631 }
632 newline();
633 push_unichar(ch);
634 push_unichar(0);
635 if(!unistrcmp(ustr_dot1, get_unichar_stack()) || !unistrcmp(ustr_dot2, get_unichar_stack()))
636 {
637 want_line = 0;
638 /* Reset the codepage to our default after each message */
639 set_codepage(WMC_DEFAULT_CODEPAGE);
640 return tMSGEND;
641 }
642 mcy_lval.str = xunistrdup(get_unichar_stack());
643 return tLINE;
644 }
645
646 ch = get_unichar();
647
648 if(ch == EOF)
649 return EOF;
650
651 if(ch == '\n')
652 {
653 newline();
654 if(want_nl)
655 {
656 want_nl = 0;
657 return tNL;
658 }
659 continue;
660 }
661
662 if(isisochar(ch))
663 {
664 if(want_file)
665 {
666 int n = 0;
667 while(n < 8 && isisochar(ch))
668 {
669 int t = char_table[ch];
670 if((t & CH_PUNCT) || !(t & CH_SHORTNAME))
671 break;
672
673 push_unichar(ch);
674 n++;
675 ch = get_unichar();
676 }
677 unget_unichar(ch);
678 push_unichar(0);
679 want_file = 0;
680 mcy_lval.str = xunistrdup(get_unichar_stack());
681 return tFILE;
682 }
683
684 if(char_table[ch] & CH_IDENT)
685 {
686 token_t *tok;
687 while(isisochar(ch) && (char_table[ch] & (CH_IDENT|CH_NUMBER)))
688 {
689 push_unichar(ch);
690 ch = get_unichar();
691 }
692 unget_unichar(ch);
693 push_unichar(0);
694 if(!(tok = lookup_token(get_unichar_stack())))
695 {
696 mcy_lval.str = xunistrdup(get_unichar_stack());
697 return tIDENT;
698 }
699 switch(tok->type)
700 {
701 case tok_keyword:
702 return tok->token;
703
704 case tok_language:
705 codepage = tok->codepage;
706 /* Fall through */
707 case tok_severity:
708 case tok_facility:
709 mcy_lval.tok = tok;
710 return tTOKEN;
711
712 default:
713 internal_error(__FILE__, __LINE__, "Invalid token type encountered\n");
714 }
715 }
716
717 if(isspace(ch)) /* Ignore space */
718 continue;
719
720 if(isdigit(ch))
721 return scan_number(ch);
722 }
723
724 switch(ch)
725 {
726 case ':':
727 case '=':
728 case '+':
729 case '(':
730 case ')':
731 return ch;
732 case ';':
733 while(ch != '\n' && ch != EOF)
734 {
735 push_unichar(ch);
736 ch = get_unichar();
737 }
738 newline();
739 push_unichar(ch); /* Include the newline */
740 push_unichar(0);
741 mcy_lval.str = xunistrdup(get_unichar_stack());
742 return tCOMMENT;
743 default:
744 xyyerror("Invalid character '%c' (0x%04x)\n", isisochar(ch) && isprint(ch) ? ch : '.', ch);
745 }
746 }
747 }