d0da3769ab45e1cdcfabd89fc224566277fb0c4a
[reactos.git] / reactos / tools / wmc / mcl.c
1 /*
2 * Wine Message Compiler lexical scanner
3 *
4 * Copyright 2000 Bertho A. Stultiens (BS)
5 *
6 */
7 #include <windows.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <ctype.h>
11 #include <assert.h>
12
13 #include "config.h"
14
15 #include "utils.h"
16 #include "wmc.h"
17 #include "lang.h"
18
19 #include "y_tab.h"
20
21 /*
22 * Keywords are case insenitive. All normal input is treated as
23 * being in codepage iso-8859-1 for ascii input files (unicode
24 * page 0) and as equivalent unicode if unicode input is selected.
25 * All normal input, which is not part of a message text, is
26 * enforced to be unicode page 0. Otherwise an error will be
27 * generated. The normal file data should only be ASCII because
28 * that is the basic definition of the grammar.
29 *
30 * Byteorder or unicode input is determined automatically by
31 * reading the first 8 bytes and checking them against unicode
32 * page 0 byteorder (hibyte must be 0).
33 * -- FIXME --
34 * Alternatively, the input is checked against a special byte
35 * sequence to identify the file.
36 * -- FIXME --
37 *
38 *
39 * Keywords:
40 * Codepages
41 * Facility
42 * FacilityNames
43 * LanguageNames
44 * MessageId
45 * MessageIdTypedef
46 * Severity
47 * SeverityNames
48 * SymbolicName
49 *
50 * Default added identifiers for classes:
51 * SeverityNames:
52 * Success = 0x0
53 * Informational = 0x1
54 * Warning = 0x2
55 * Error = 0x3
56 * FacilityNames:
57 * System = 0x0FF
58 * Application = 0xFFF
59 *
60 * The 'Codepages' keyword is a wmc extension.
61 */
62
63 static WCHAR ustr_application[] = { 'A', 'p', 'p', 'l', 'i', 'c', 'a', 't', 'i', 'o', 'n', 0 };
64 static WCHAR ustr_codepages[] = { 'C', 'o', 'd', 'e', 'p', 'a', 'g', 'e', 's', 0 };
65 static WCHAR ustr_english[] = { 'E', 'n', 'g', 'l', 'i', 's', 'h', 0 };
66 static WCHAR ustr_error[] = { 'E', 'r', 'r', 'o', 'r', 0 };
67 static WCHAR ustr_facility[] = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 0 };
68 static WCHAR ustr_facilitynames[] = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
69 static WCHAR ustr_informational[] = { 'I', 'n', 'f', 'o', 'r', 'm', 'a', 't', 'i', 'o', 'n', 'a', 'l', 0 };
70 static WCHAR ustr_language[] = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 0};
71 static WCHAR ustr_languagenames[] = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 'N', 'a', 'm', 'e', 's', 0};
72 static WCHAR ustr_messageid[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 0 };
73 static WCHAR ustr_messageidtypedef[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 'T', 'y', 'p', 'e', 'd', 'e', 'f', 0 };
74 static WCHAR ustr_outputbase[] = { 'O', 'u', 't', 'p', 'u', 't', 'B', 'a', 's', 'e', 0 };
75 static WCHAR ustr_severity[] = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 0 };
76 static WCHAR ustr_severitynames[] = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
77 static WCHAR ustr_success[] = { 'S', 'u', 'c', 'c', 'e', 's', 's', 0 };
78 static WCHAR ustr_symbolicname[] = { 'S', 'y', 'm', 'b', 'o', 'l', 'i', 'c', 'N', 'a', 'm', 'e', 0 };
79 static WCHAR ustr_system[] = { 'S', 'y', 's', 't', 'e', 'm', 0 };
80 static WCHAR ustr_warning[] = { 'W', 'a', 'r', 'n', 'i', 'n', 'g', 0 };
81 static WCHAR ustr_msg00001[] = { 'm', 's', 'g', '0', '0', '0', '0', '1', 0 };
82 /*
83 * This table is to beat any form of "expression building" to check for
84 * correct filename characters. It is also used for ident checks.
85 * FIXME: use it more consistently.
86 */
87
88 #define CH_SHORTNAME 0x01
89 #define CH_LONGNAME 0x02
90 #define CH_IDENT 0x04
91 #define CH_NUMBER 0x08
92 /*#define CH_WILDCARD 0x10*/
93 /*#define CH_DOT 0x20*/
94 #define CH_PUNCT 0x40
95 #define CH_INVALID 0x80
96
97 static const char char_table[256] = {
98 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x00 - 0x07 */
99 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x08 - 0x0F */
100 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x10 - 0x17 */
101 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x18 - 0x1F */
102 0x80, 0x03, 0x80, 0x03, 0x03, 0x03, 0x03, 0x03, /* 0x20 - 0x27 " !"#$%&'" */
103 0x43, 0x43, 0x10, 0x80, 0x03, 0x03, 0x22, 0x80, /* 0x28 - 0x2F "()*+,-./" */
104 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, /* 0x30 - 0x37 "01234567" */
105 0x0b, 0x0b, 0xc0, 0x80, 0x80, 0x80, 0x80, 0x10, /* 0x38 - 0x3F "89:;<=>?" */
106 0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x40 - 0x47 "@ABCDEFG" */
107 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x48 - 0x4F "HIJKLMNO" */
108 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x50 - 0x57 "PQRSTUVW" */
109 0x07, 0x07, 0x07, 0x80, 0x80, 0x80, 0x80, 0x07, /* 0x58 - 0x5F "XYZ[\]^_" */
110 0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x60 - 0x67 "`abcdefg" */
111 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x68 - 0x6F "hijklmno" */
112 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x70 - 0x77 "pqrstuvw" */
113 0x07, 0x07, 0x07, 0x03, 0x80, 0x03, 0x03, 0x80, /* 0x78 - 0x7F "xyz{|}~ " */
114 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x80 - 0x87 */
115 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x88 - 0x8F */
116 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x90 - 0x97 */
117 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x98 - 0x9F */
118 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA0 - 0xA7 */
119 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA8 - 0xAF */
120 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB0 - 0xB7 */
121 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB8 - 0xBF */
122 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC0 - 0xC7 */
123 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC8 - 0xCF */
124 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD0 - 0xD7 */
125 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD8 - 0xDF */
126 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE0 - 0xE7 */
127 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE8 - 0xEF */
128 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xF0 - 0xF7 */
129 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x80, /* 0xF8 - 0xFF */
130 };
131
132 static int isisochar(int ch)
133 {
134 return !(ch & (~0xff));
135 }
136
137 static int codepage;
138 //static const union cptable *codepage_def;
139
140 void set_codepage(int cp)
141 {
142 codepage = cp;
143 #if 0
144 codepage_def = find_codepage(codepage);
145 if(!codepage_def)
146 xyyerror("Codepage %d not found; cannot process", codepage);
147 #endif
148 }
149
150 /*
151 * Input functions
152 */
153 static int nungetstack = 0;
154 static int allocungetstack = 0;
155 static char *ungetstack = NULL;
156 static int ninputbuffer = 0;
157 static WCHAR *inputbuffer = NULL;
158 static char *xlatebuffer = NULL;
159
160 #define INPUTBUFFER_SIZE 2048 /* Must be larger than 4 and approx. large enough to hold a line */
161
162 /*
163 * Fill the input buffer with *one* line of input.
164 * The line is '\n' terminated so that scanning
165 * messages with translation works as expected
166 * (otherwise we cannot pre-translate because the
167 * language is first known one line before the
168 * actual message).
169 */
170 static int fill_inputbuffer(void)
171 {
172 int n;
173 static char err_fatalread[] = "Fatal: reading input failed";
174 static int endian = -1;
175
176 if(!inputbuffer)
177 {
178 inputbuffer = xmalloc(INPUTBUFFER_SIZE);
179 xlatebuffer = xmalloc(INPUTBUFFER_SIZE);
180 }
181
182 try_again:
183 if(!unicodein)
184 {
185 char *cptr;
186 cptr = fgets(xlatebuffer, INPUTBUFFER_SIZE, yyin);
187 if(!cptr && ferror(yyin))
188 xyyerror(err_fatalread);
189 else if(!cptr)
190 return 0;
191 // assert(codepage_def != NULL);
192 // n = cp_mbstowcs(codepage_def, 0, xlatebuffer, strlen(xlatebuffer)+1, inputbuffer, INPUTBUFFER_SIZE);
193 n = MultiByteToWideChar(codepage, 0, xlatebuffer, strlen(xlatebuffer)+1, inputbuffer, INPUTBUFFER_SIZE); if(n < 0)
194 internal_error(__FILE__, __LINE__, "Could not translate to unicode (%d)", n);
195 if(n <= 1)
196 goto try_again; /* Should not hapen */
197 n--; /* Strip added conversion '\0' from input length */
198 /*
199 * FIXME:
200 * Detect UTF-8 in the first time we read some bytes by
201 * checking the special sequence "FE..." or something like
202 * that. I need to check www.unicode.org for details.
203 */
204 }
205 else
206 {
207 if(endian == -1)
208 {
209 n = fread(inputbuffer, 1, 8, yyin);
210 if(n != 8)
211 {
212 if(!n && ferror(yyin))
213 xyyerror(err_fatalread);
214 else
215 xyyerror("Fatal: file to short to determine byteorder (should never happen)");
216 }
217 if(isisochar(inputbuffer[0]) &&
218 isisochar(inputbuffer[1]) &&
219 isisochar(inputbuffer[2]) &&
220 isisochar(inputbuffer[3]))
221 {
222 #ifdef WORDS_BIGENDIAN
223 endian = WMC_BO_BIG;
224 #else
225 endian = WMC_BO_LITTLE;
226 #endif
227 }
228 else if(isisochar(BYTESWAP_WORD(inputbuffer[0])) &&
229 isisochar(BYTESWAP_WORD(inputbuffer[1])) &&
230 isisochar(BYTESWAP_WORD(inputbuffer[2])) &&
231 isisochar(BYTESWAP_WORD(inputbuffer[3])))
232 {
233 #ifdef WORDS_BIGENDIAN
234 endian = WMC_BO_LITTLE;
235 #else
236 endian = WMC_BO_BIG;
237 #endif
238 }
239 else
240 xyyerror("Fatal: cannot determine file's byteorder");
241 /* FIXME:
242 * Determine the file-endian with the leader-bytes
243 * "FF FE..."; can't remember the exact sequence.
244 */
245 n /= 2;
246 #ifdef WORDS_BIGENDIAN
247 if(endian == WMC_BO_LITTLE)
248 #else
249 if(endian == WMC_BO_BIG)
250 #endif
251 {
252 inputbuffer[0] = BYTESWAP_WORD(inputbuffer[0]);
253 inputbuffer[1] = BYTESWAP_WORD(inputbuffer[1]);
254 inputbuffer[2] = BYTESWAP_WORD(inputbuffer[2]);
255 inputbuffer[3] = BYTESWAP_WORD(inputbuffer[3]);
256 }
257
258 }
259 else
260 {
261 int i;
262 n = 0;
263 for(i = 0; i < INPUTBUFFER_SIZE; i++)
264 {
265 int t;
266 t = fread(&inputbuffer[i], 2, 1, yyin);
267 if(!t && ferror(yyin))
268 xyyerror(err_fatalread);
269 else if(!t && n)
270 break;
271 n++;
272 #ifdef WORDS_BIGENDIAN
273 if(endian == WMC_BO_LITTLE)
274 #else
275 if(endian == WMC_BO_BIG)
276 #endif
277 {
278 if((inputbuffer[i] = BYTESWAP_WORD(inputbuffer[i])) == '\n')
279 break;
280 }
281 else
282 {
283 if(inputbuffer[i] == '\n')
284 break;
285 }
286 }
287 }
288
289 }
290
291 if(!n)
292 {
293 yywarning("Re-read line (input was or converted to zilch)");
294 goto try_again; /* Should not happen, but could be due to stdin reading and a signal */
295 }
296
297 ninputbuffer += n;
298 return 1;
299 }
300
301 static int get_unichar(void)
302 {
303 static WCHAR *b = NULL;
304 char_number++;
305
306 if(nungetstack)
307 return ungetstack[--nungetstack];
308
309 if(!ninputbuffer)
310 {
311 if(!fill_inputbuffer())
312 return EOF;
313 b = inputbuffer;
314 }
315
316 ninputbuffer--;
317 return (int)(*b++ & 0xffff);
318 }
319
320 static void unget_unichar(int ch)
321 {
322 if(ch == EOF)
323 return;
324
325 char_number--;
326
327 if(nungetstack == allocungetstack)
328 {
329 allocungetstack += 32;
330 ungetstack = xrealloc(ungetstack, allocungetstack * sizeof(*ungetstack));
331 }
332
333 ungetstack[nungetstack++] = (WCHAR)ch;
334 }
335
336
337 /*
338 * Normal character stack.
339 * Used for number scanning.
340 */
341 static int ncharstack = 0;
342 static int alloccharstack = 0;
343 static char *charstack = NULL;
344
345 static void empty_char_stack(void)
346 {
347 ncharstack = 0;
348 }
349
350 static void push_char(int ch)
351 {
352 if(ncharstack == alloccharstack)
353 {
354 alloccharstack += 32;
355 charstack = xrealloc(charstack, alloccharstack * sizeof(*charstack));
356 }
357 charstack[ncharstack++] = (char)ch;
358 }
359
360 static int tos_char_stack(void)
361 {
362 if(!ncharstack)
363 return 0;
364 else
365 return (int)(charstack[ncharstack-1] & 0xff);
366 }
367
368 static char *get_char_stack(void)
369 {
370 return charstack;
371 }
372
373 /*
374 * Unicode character stack.
375 * Used for general scanner.
376 */
377 static int nunicharstack = 0;
378 static int allocunicharstack = 0;
379 static WCHAR *unicharstack = NULL;
380
381 static void empty_unichar_stack(void)
382 {
383 nunicharstack = 0;
384 }
385
386 static void push_unichar(int ch)
387 {
388 if(nunicharstack == allocunicharstack)
389 {
390 allocunicharstack += 128;
391 unicharstack = xrealloc(unicharstack, allocunicharstack * sizeof(*unicharstack));
392 }
393 unicharstack[nunicharstack++] = (WCHAR)ch;
394 }
395
396 #if 0
397 static int tos_unichar_stack(void)
398 {
399 if(!nunicharstack)
400 return 0;
401 else
402 return (int)(unicharstack[nunicharstack-1] & 0xffff);
403 }
404 #endif
405
406 static WCHAR *get_unichar_stack(void)
407 {
408 return unicharstack;
409 }
410
411 /*
412 * Number scanner
413 *
414 * state | ch | next state
415 * ------+-----------------+--------------------------
416 * 0 | [0] | 1
417 * 0 | [1-9] | 4
418 * 0 | . | error (should never occur)
419 * 1 | [xX] | 2
420 * 1 | [0-7] | 3
421 * 1 | [89a-wyzA-WYZ_] | error invalid digit
422 * 1 | . | return 0
423 * 2 | [0-9a-fA-F] | 2
424 * 2 | [g-zG-Z_] | error invalid hex digit
425 * 2 | . | return (hex-number) if TOS != [xX] else error
426 * 3 | [0-7] | 3
427 * 3 | [89a-zA-Z_] | error invalid octal digit
428 * 3 | . | return (octal-number)
429 * 4 | [0-9] | 4
430 * 4 | [a-zA-Z_] | error invalid decimal digit
431 * 4 | . | return (decimal-number)
432 *
433 * All non-identifier characters [^a-zA-Z_0-9] terminate the scan
434 * and return the value. This is not entirely correct, but close
435 * enough (should check punctuators as trailing context, but the
436 * char_table is not adapted to that and it is questionable whether
437 * it is worth the trouble).
438 * All non-iso-8859-1 characters are an error.
439 */
440 static int scan_number(int ch)
441 {
442 int state = 0;
443 int base = 10;
444 empty_char_stack();
445
446 while(1)
447 {
448 if(!isisochar(ch))
449 xyyerror("Invalid digit");
450
451 switch(state)
452 {
453 case 0:
454 if(isdigit(ch))
455 {
456 push_char(ch);
457 if(ch == '0')
458 state = 1;
459 else
460 state = 4;
461 }
462 else
463 internal_error(__FILE__, __LINE__, "Non-digit in first number-scanner state");
464 break;
465 case 1:
466 if(ch == 'x' || ch == 'X')
467 {
468 push_char(ch);
469 state = 2;
470 }
471 else if(ch >= '0' && ch <= '7')
472 {
473 push_char(ch);
474 state = 3;
475 }
476 else if(isalpha(ch) || ch == '_')
477 xyyerror("Invalid number digit");
478 else
479 {
480 unget_unichar(ch);
481 yylval.num = 0;
482 return tNUMBER;
483 }
484 break;
485 case 2:
486 if(isxdigit(ch))
487 push_char(ch);
488 else if(isalpha(ch) || ch == '_' || !isxdigit(tos_char_stack()))
489 xyyerror("Invalid hex digit");
490 else
491 {
492 base = 16;
493 goto finish;
494 }
495 break;
496 case 3:
497 if(ch >= '0' && ch <= '7')
498 push_char(ch);
499 else if(isalnum(ch) || ch == '_')
500 xyyerror("Invalid octal digit");
501 else
502 {
503 base = 8;
504 goto finish;
505 }
506 break;
507 case 4:
508 if(isdigit(ch))
509 push_char(ch);
510 else if(isalnum(ch) || ch == '_')
511 xyyerror("Invalid decimal digit");
512 else
513 {
514 base = 10;
515 goto finish;
516 }
517 break;
518 default:
519 internal_error(__FILE__, __LINE__, "Invalid state in number-scanner");
520 }
521 ch = get_unichar();
522 }
523 finish:
524 unget_unichar(ch);
525 push_char(0);
526 yylval.num = strtoul(get_char_stack(), NULL, base);
527 return tNUMBER;
528 }
529
530 static void newline(void)
531 {
532 line_number++;
533 char_number = 1;
534 }
535
536 static int unisort(const void *p1, const void *p2)
537 {
538 return unistricmp(((token_t *)p1)->name, ((token_t *)p2)->name);
539 }
540
541 static token_t *tokentable = NULL;
542 static int ntokentable = 0;
543
544 token_t *lookup_token(const WCHAR *s)
545 {
546 token_t tok;
547
548 tok.name = s;
549 return (token_t *)bsearch(&tok, tokentable, ntokentable, sizeof(*tokentable), unisort);
550 }
551
552 void add_token(tok_e type, const WCHAR *name, int tok, int cp, const WCHAR *alias, int fix)
553 {
554 ntokentable++;
555 tokentable = xrealloc(tokentable, ntokentable * sizeof(*tokentable));
556 tokentable[ntokentable-1].type = type;
557 tokentable[ntokentable-1].name = name;
558 tokentable[ntokentable-1].token = tok;
559 tokentable[ntokentable-1].codepage = cp;
560 tokentable[ntokentable-1].alias = alias;
561 tokentable[ntokentable-1].fixed = fix;
562 qsort(tokentable, ntokentable, sizeof(*tokentable), unisort);
563 }
564
565 void get_tokentable(token_t **tab, int *len)
566 {
567 assert(tab != NULL);
568 assert(len != NULL);
569 *tab = tokentable;
570 *len = ntokentable;
571 }
572
573 /*
574 * The scanner
575 *
576 */
577 int yylex(void)
578 {
579 static WCHAR ustr_dot1[] = { '.', '\n', 0 };
580 static WCHAR ustr_dot2[] = { '.', '\r', '\n', 0 };
581 static int isinit = 0;
582 int ch;
583
584 if(!isinit)
585 {
586 isinit++;
587 set_codepage(WMC_DEFAULT_CODEPAGE);
588 add_token(tok_keyword, ustr_codepages, tCODEPAGE, 0, NULL, 0);
589 add_token(tok_keyword, ustr_facility, tFACILITY, 0, NULL, 1);
590 add_token(tok_keyword, ustr_facilitynames, tFACNAMES, 0, NULL, 1);
591 add_token(tok_keyword, ustr_language, tLANGUAGE, 0, NULL, 1);
592 add_token(tok_keyword, ustr_languagenames, tLANNAMES, 0, NULL, 1);
593 add_token(tok_keyword, ustr_messageid, tMSGID, 0, NULL, 1);
594 add_token(tok_keyword, ustr_messageidtypedef, tTYPEDEF, 0, NULL, 1);
595 add_token(tok_keyword, ustr_outputbase, tBASE, 0, NULL, 1);
596 add_token(tok_keyword, ustr_severity, tSEVERITY, 0, NULL, 1);
597 add_token(tok_keyword, ustr_severitynames, tSEVNAMES, 0, NULL, 1);
598 add_token(tok_keyword, ustr_symbolicname, tSYMNAME, 0, NULL, 1);
599 add_token(tok_severity, ustr_error, 0x03, 0, NULL, 0);
600 add_token(tok_severity, ustr_warning, 0x02, 0, NULL, 0);
601 add_token(tok_severity, ustr_informational, 0x01, 0, NULL, 0);
602 add_token(tok_severity, ustr_success, 0x00, 0, NULL, 0);
603 add_token(tok_facility, ustr_application, 0xFFF, 0, NULL, 0);
604 add_token(tok_facility, ustr_system, 0x0FF, 0, NULL, 0);
605 add_token(tok_language, ustr_english, 0x409, 437, ustr_msg00001, 0);
606 }
607
608 empty_unichar_stack();
609
610 while(1)
611 {
612 if(want_line)
613 {
614 while((ch = get_unichar()) != '\n')
615 {
616 if(ch == EOF)
617 xyyerror("Unexpected EOF");
618 push_unichar(ch);
619 }
620 newline();
621 push_unichar(ch);
622 push_unichar(0);
623 if(!unistrcmp(ustr_dot1, get_unichar_stack()) || !unistrcmp(ustr_dot2, get_unichar_stack()))
624 {
625 want_line = 0;
626 /* Reset the codepage to our default after each message */
627 set_codepage(WMC_DEFAULT_CODEPAGE);
628 return tMSGEND;
629 }
630 yylval.str = xunistrdup(get_unichar_stack());
631 return tLINE;
632 }
633
634 ch = get_unichar();
635
636 if(ch == EOF)
637 return EOF;
638
639 if(ch == '\n')
640 {
641 newline();
642 if(want_nl)
643 {
644 want_nl = 0;
645 return tNL;
646 }
647 continue;
648 }
649
650 if(isisochar(ch))
651 {
652 if(want_file)
653 {
654 int n = 0;
655 while(n < 8 && isisochar(ch))
656 {
657 int t = char_table[ch];
658 if((t & CH_PUNCT) || !(t & CH_SHORTNAME))
659 break;
660
661 push_unichar(ch);
662 n++;
663 ch = get_unichar();
664 }
665 unget_unichar(ch);
666 push_unichar(0);
667 want_file = 0;
668 yylval.str = xunistrdup(get_unichar_stack());
669 return tFILE;
670 }
671
672 if(char_table[ch] & CH_IDENT)
673 {
674 token_t *tok;
675 while(isisochar(ch) && (char_table[ch] & (CH_IDENT|CH_NUMBER)))
676 {
677 push_unichar(ch);
678 ch = get_unichar();
679 }
680 unget_unichar(ch);
681 push_unichar(0);
682 if(!(tok = lookup_token(get_unichar_stack())))
683 {
684 yylval.str = xunistrdup(get_unichar_stack());
685 return tIDENT;
686 }
687 switch(tok->type)
688 {
689 case tok_keyword:
690 return tok->token;
691
692 case tok_language:
693 codepage = tok->codepage;
694 /* Fall through */
695 case tok_severity:
696 case tok_facility:
697 yylval.tok = tok;
698 return tTOKEN;
699
700 default:
701 internal_error(__FILE__, __LINE__, "Invalid token type encountered");
702 }
703 }
704
705 if(isspace(ch)) /* Ignore space */
706 continue;
707
708 if(isdigit(ch))
709 return scan_number(ch);
710 }
711
712 switch(ch)
713 {
714 case ':':
715 case '=':
716 case '+':
717 case '(':
718 case ')':
719 return ch;
720 case ';':
721 while(ch != '\n' && ch != EOF)
722 {
723 push_unichar(ch);
724 ch = get_unichar();
725 }
726 newline();
727 push_unichar(ch); /* Include the newline */
728 push_unichar(0);
729 yylval.str = xunistrdup(get_unichar_stack());
730 return tCOMMENT;
731 default:
732 xyyerror("Invalid character '%c' (0x%04x)", isisochar(ch) && isprint(ch) ? ch : '.', ch);
733 }
734 }
735 }
736