minor format modifications to reduce diffs with latest wine sources to simplify updating.
[reactos.git] / reactos / tools / wmc / mcl.c
1 /*
2 * Wine Message Compiler lexical scanner
3 *
4 * Copyright 2000 Bertho A. Stultiens (BS)
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20 #include <windows.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <ctype.h>
24 #include <assert.h>
25
26 #include "config.h"
27
28 #include "utils.h"
29 #include "wmc.h"
30 #include "lang.h"
31
32 #include "y_tab.h"
33
34 /*
35 * Keywords are case insenitive. All normal input is treated as
36 * being in codepage iso-8859-1 for ascii input files (unicode
37 * page 0) and as equivalent unicode if unicode input is selected.
38 * All normal input, which is not part of a message text, is
39 * enforced to be unicode page 0. Otherwise an error will be
40 * generated. The normal file data should only be ASCII because
41 * that is the basic definition of the grammar.
42 *
43 * Byteorder or unicode input is determined automatically by
44 * reading the first 8 bytes and checking them against unicode
45 * page 0 byteorder (hibyte must be 0).
46 * -- FIXME --
47 * Alternatively, the input is checked against a special byte
48 * sequence to identify the file.
49 * -- FIXME --
50 *
51 *
52 * Keywords:
53 * Codepages
54 * Facility
55 * FacilityNames
56 * LanguageNames
57 * MessageId
58 * MessageIdTypedef
59 * Severity
60 * SeverityNames
61 * SymbolicName
62 *
63 * Default added identifiers for classes:
64 * SeverityNames:
65 * Success = 0x0
66 * Informational = 0x1
67 * Warning = 0x2
68 * Error = 0x3
69 * FacilityNames:
70 * System = 0x0FF
71 * Application = 0xFFF
72 *
73 * The 'Codepages' keyword is a wmc extension.
74 */
75
76 static WCHAR ustr_application[] = { 'A', 'p', 'p', 'l', 'i', 'c', 'a', 't', 'i', 'o', 'n', 0 };
77 static WCHAR ustr_codepages[] = { 'C', 'o', 'd', 'e', 'p', 'a', 'g', 'e', 's', 0 };
78 static WCHAR ustr_english[] = { 'E', 'n', 'g', 'l', 'i', 's', 'h', 0 };
79 static WCHAR ustr_error[] = { 'E', 'r', 'r', 'o', 'r', 0 };
80 static WCHAR ustr_facility[] = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 0 };
81 static WCHAR ustr_facilitynames[] = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
82 static WCHAR ustr_informational[] = { 'I', 'n', 'f', 'o', 'r', 'm', 'a', 't', 'i', 'o', 'n', 'a', 'l', 0 };
83 static WCHAR ustr_language[] = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 0};
84 static WCHAR ustr_languagenames[] = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 'N', 'a', 'm', 'e', 's', 0};
85 static WCHAR ustr_messageid[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 0 };
86 static WCHAR ustr_messageidtypedef[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 'T', 'y', 'p', 'e', 'd', 'e', 'f', 0 };
87 static WCHAR ustr_outputbase[] = { 'O', 'u', 't', 'p', 'u', 't', 'B', 'a', 's', 'e', 0 };
88 static WCHAR ustr_severity[] = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 0 };
89 static WCHAR ustr_severitynames[] = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
90 static WCHAR ustr_success[] = { 'S', 'u', 'c', 'c', 'e', 's', 's', 0 };
91 static WCHAR ustr_symbolicname[] = { 'S', 'y', 'm', 'b', 'o', 'l', 'i', 'c', 'N', 'a', 'm', 'e', 0 };
92 static WCHAR ustr_system[] = { 'S', 'y', 's', 't', 'e', 'm', 0 };
93 static WCHAR ustr_warning[] = { 'W', 'a', 'r', 'n', 'i', 'n', 'g', 0 };
94 static WCHAR ustr_msg00001[] = { 'm', 's', 'g', '0', '0', '0', '0', '1', 0 };
95 /*
96 * This table is to beat any form of "expression building" to check for
97 * correct filename characters. It is also used for ident checks.
98 * FIXME: use it more consistently.
99 */
100
101 #define CH_SHORTNAME 0x01
102 #define CH_LONGNAME 0x02
103 #define CH_IDENT 0x04
104 #define CH_NUMBER 0x08
105 /*#define CH_WILDCARD 0x10*/
106 /*#define CH_DOT 0x20*/
107 #define CH_PUNCT 0x40
108 #define CH_INVALID 0x80
109
110 static const char char_table[256] = {
111 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x00 - 0x07 */
112 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x08 - 0x0F */
113 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x10 - 0x17 */
114 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x18 - 0x1F */
115 0x80, 0x03, 0x80, 0x03, 0x03, 0x03, 0x03, 0x03, /* 0x20 - 0x27 " !"#$%&'" */
116 0x43, 0x43, 0x10, 0x80, 0x03, 0x03, 0x22, 0x80, /* 0x28 - 0x2F "()*+,-./" */
117 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, /* 0x30 - 0x37 "01234567" */
118 0x0b, 0x0b, 0xc0, 0x80, 0x80, 0x80, 0x80, 0x10, /* 0x38 - 0x3F "89:;<=>?" */
119 0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x40 - 0x47 "@ABCDEFG" */
120 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x48 - 0x4F "HIJKLMNO" */
121 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x50 - 0x57 "PQRSTUVW" */
122 0x07, 0x07, 0x07, 0x80, 0x80, 0x80, 0x80, 0x07, /* 0x58 - 0x5F "XYZ[\]^_" */
123 0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x60 - 0x67 "`abcdefg" */
124 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x68 - 0x6F "hijklmno" */
125 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x70 - 0x77 "pqrstuvw" */
126 0x07, 0x07, 0x07, 0x03, 0x80, 0x03, 0x03, 0x80, /* 0x78 - 0x7F "xyz{|}~ " */
127 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x80 - 0x87 */
128 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x88 - 0x8F */
129 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x90 - 0x97 */
130 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x98 - 0x9F */
131 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA0 - 0xA7 */
132 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA8 - 0xAF */
133 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB0 - 0xB7 */
134 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB8 - 0xBF */
135 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC0 - 0xC7 */
136 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC8 - 0xCF */
137 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD0 - 0xD7 */
138 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD8 - 0xDF */
139 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE0 - 0xE7 */
140 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE8 - 0xEF */
141 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xF0 - 0xF7 */
142 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x80, /* 0xF8 - 0xFF */
143 };
144
145 static int isisochar(int ch)
146 {
147 return !(ch & (~0xff));
148 }
149
150 static int codepage;
151 //static const union cptable *codepage_def;
152
153 void set_codepage(int cp)
154 {
155 codepage = cp;
156 #if 0
157 codepage_def = find_codepage(codepage);
158 if(!codepage_def)
159 xyyerror("Codepage %d not found; cannot process", codepage);
160 #endif
161 }
162
163 /*
164 * Input functions
165 */
166 static int nungetstack = 0;
167 static int allocungetstack = 0;
168 static char *ungetstack = NULL;
169 static int ninputbuffer = 0;
170 static WCHAR *inputbuffer = NULL;
171 static char *xlatebuffer = NULL;
172
173 #define INPUTBUFFER_SIZE 2048 /* Must be larger than 4 and approx. large enough to hold a line */
174
175 /*
176 * Fill the input buffer with *one* line of input.
177 * The line is '\n' terminated so that scanning
178 * messages with translation works as expected
179 * (otherwise we cannot pre-translate because the
180 * language is first known one line before the
181 * actual message).
182 */
183 static int fill_inputbuffer(void)
184 {
185 int n;
186 static char err_fatalread[] = "Fatal: reading input failed";
187 static int endian = -1;
188
189 if(!inputbuffer)
190 {
191 inputbuffer = xmalloc(INPUTBUFFER_SIZE);
192 xlatebuffer = xmalloc(INPUTBUFFER_SIZE);
193 }
194
195 try_again:
196 if(!unicodein)
197 {
198 char *cptr;
199 cptr = fgets(xlatebuffer, INPUTBUFFER_SIZE, yyin);
200 if(!cptr && ferror(yyin))
201 xyyerror(err_fatalread);
202 else if(!cptr)
203 return 0;
204 // assert(codepage_def != NULL);
205 // n = cp_mbstowcs(codepage_def, 0, xlatebuffer, strlen(xlatebuffer)+1, inputbuffer, INPUTBUFFER_SIZE);
206 n = MultiByteToWideChar(codepage, 0, xlatebuffer, strlen(xlatebuffer)+1, inputbuffer, INPUTBUFFER_SIZE); if(n < 0)
207 internal_error(__FILE__, __LINE__, "Could not translate to unicode (%d)", n);
208 if(n <= 1)
209 goto try_again; /* Should not hapen */
210 n--; /* Strip added conversion '\0' from input length */
211 /*
212 * FIXME:
213 * Detect UTF-8 in the first time we read some bytes by
214 * checking the special sequence "FE..." or something like
215 * that. I need to check www.unicode.org for details.
216 */
217 }
218 else
219 {
220 if(endian == -1)
221 {
222 n = fread(inputbuffer, 1, 8, yyin);
223 if(n != 8)
224 {
225 if(!n && ferror(yyin))
226 xyyerror(err_fatalread);
227 else
228 xyyerror("Fatal: file to short to determine byteorder (should never happen)");
229 }
230 if(isisochar(inputbuffer[0]) &&
231 isisochar(inputbuffer[1]) &&
232 isisochar(inputbuffer[2]) &&
233 isisochar(inputbuffer[3]))
234 {
235 #ifdef WORDS_BIGENDIAN
236 endian = WMC_BO_BIG;
237 #else
238 endian = WMC_BO_LITTLE;
239 #endif
240 }
241 else if(isisochar(BYTESWAP_WORD(inputbuffer[0])) &&
242 isisochar(BYTESWAP_WORD(inputbuffer[1])) &&
243 isisochar(BYTESWAP_WORD(inputbuffer[2])) &&
244 isisochar(BYTESWAP_WORD(inputbuffer[3])))
245 {
246 #ifdef WORDS_BIGENDIAN
247 endian = WMC_BO_LITTLE;
248 #else
249 endian = WMC_BO_BIG;
250 #endif
251 }
252 else
253 xyyerror("Fatal: cannot determine file's byteorder");
254 /* FIXME:
255 * Determine the file-endian with the leader-bytes
256 * "FF FE..."; can't remember the exact sequence.
257 */
258 n /= 2;
259 #ifdef WORDS_BIGENDIAN
260 if(endian == WMC_BO_LITTLE)
261 #else
262 if(endian == WMC_BO_BIG)
263 #endif
264 {
265 inputbuffer[0] = BYTESWAP_WORD(inputbuffer[0]);
266 inputbuffer[1] = BYTESWAP_WORD(inputbuffer[1]);
267 inputbuffer[2] = BYTESWAP_WORD(inputbuffer[2]);
268 inputbuffer[3] = BYTESWAP_WORD(inputbuffer[3]);
269 }
270
271 }
272 else
273 {
274 int i;
275 n = 0;
276 for(i = 0; i < INPUTBUFFER_SIZE; i++)
277 {
278 int t;
279 t = fread(&inputbuffer[i], 2, 1, yyin);
280 if(!t && ferror(yyin))
281 xyyerror(err_fatalread);
282 else if(!t && n)
283 break;
284 n++;
285 #ifdef WORDS_BIGENDIAN
286 if(endian == WMC_BO_LITTLE)
287 #else
288 if(endian == WMC_BO_BIG)
289 #endif
290 {
291 if((inputbuffer[i] = BYTESWAP_WORD(inputbuffer[i])) == '\n')
292 break;
293 }
294 else
295 {
296 if(inputbuffer[i] == '\n')
297 break;
298 }
299 }
300 }
301
302 }
303
304 if(!n)
305 {
306 yywarning("Re-read line (input was or converted to zilch)");
307 goto try_again; /* Should not happen, but could be due to stdin reading and a signal */
308 }
309
310 ninputbuffer += n;
311 return 1;
312 }
313
314 static int get_unichar(void)
315 {
316 static WCHAR *b = NULL;
317 char_number++;
318
319 if(nungetstack)
320 return ungetstack[--nungetstack];
321
322 if(!ninputbuffer)
323 {
324 if(!fill_inputbuffer())
325 return EOF;
326 b = inputbuffer;
327 }
328
329 ninputbuffer--;
330 return (int)(*b++ & 0xffff);
331 }
332
333 static void unget_unichar(int ch)
334 {
335 if(ch == EOF)
336 return;
337
338 char_number--;
339
340 if(nungetstack == allocungetstack)
341 {
342 allocungetstack += 32;
343 ungetstack = xrealloc(ungetstack, allocungetstack * sizeof(*ungetstack));
344 }
345
346 ungetstack[nungetstack++] = (WCHAR)ch;
347 }
348
349
350 /*
351 * Normal character stack.
352 * Used for number scanning.
353 */
354 static int ncharstack = 0;
355 static int alloccharstack = 0;
356 static char *charstack = NULL;
357
358 static void empty_char_stack(void)
359 {
360 ncharstack = 0;
361 }
362
363 static void push_char(int ch)
364 {
365 if(ncharstack == alloccharstack)
366 {
367 alloccharstack += 32;
368 charstack = xrealloc(charstack, alloccharstack * sizeof(*charstack));
369 }
370 charstack[ncharstack++] = (char)ch;
371 }
372
373 static int tos_char_stack(void)
374 {
375 if(!ncharstack)
376 return 0;
377 else
378 return (int)(charstack[ncharstack-1] & 0xff);
379 }
380
381 static char *get_char_stack(void)
382 {
383 return charstack;
384 }
385
386 /*
387 * Unicode character stack.
388 * Used for general scanner.
389 */
390 static int nunicharstack = 0;
391 static int allocunicharstack = 0;
392 static WCHAR *unicharstack = NULL;
393
394 static void empty_unichar_stack(void)
395 {
396 nunicharstack = 0;
397 }
398
399 static void push_unichar(int ch)
400 {
401 if(nunicharstack == allocunicharstack)
402 {
403 allocunicharstack += 128;
404 unicharstack = xrealloc(unicharstack, allocunicharstack * sizeof(*unicharstack));
405 }
406 unicharstack[nunicharstack++] = (WCHAR)ch;
407 }
408
409 #if 0
410 static int tos_unichar_stack(void)
411 {
412 if(!nunicharstack)
413 return 0;
414 else
415 return (int)(unicharstack[nunicharstack-1] & 0xffff);
416 }
417 #endif
418
419 static WCHAR *get_unichar_stack(void)
420 {
421 return unicharstack;
422 }
423
424 /*
425 * Number scanner
426 *
427 * state | ch | next state
428 * ------+-----------------+--------------------------
429 * 0 | [0] | 1
430 * 0 | [1-9] | 4
431 * 0 | . | error (should never occur)
432 * 1 | [xX] | 2
433 * 1 | [0-7] | 3
434 * 1 | [89a-wyzA-WYZ_] | error invalid digit
435 * 1 | . | return 0
436 * 2 | [0-9a-fA-F] | 2
437 * 2 | [g-zG-Z_] | error invalid hex digit
438 * 2 | . | return (hex-number) if TOS != [xX] else error
439 * 3 | [0-7] | 3
440 * 3 | [89a-zA-Z_] | error invalid octal digit
441 * 3 | . | return (octal-number)
442 * 4 | [0-9] | 4
443 * 4 | [a-zA-Z_] | error invalid decimal digit
444 * 4 | . | return (decimal-number)
445 *
446 * All non-identifier characters [^a-zA-Z_0-9] terminate the scan
447 * and return the value. This is not entirely correct, but close
448 * enough (should check punctuators as trailing context, but the
449 * char_table is not adapted to that and it is questionable whether
450 * it is worth the trouble).
451 * All non-iso-8859-1 characters are an error.
452 */
453 static int scan_number(int ch)
454 {
455 int state = 0;
456 int base = 10;
457 empty_char_stack();
458
459 while(1)
460 {
461 if(!isisochar(ch))
462 xyyerror("Invalid digit");
463
464 switch(state)
465 {
466 case 0:
467 if(isdigit(ch))
468 {
469 push_char(ch);
470 if(ch == '0')
471 state = 1;
472 else
473 state = 4;
474 }
475 else
476 internal_error(__FILE__, __LINE__, "Non-digit in first number-scanner state");
477 break;
478 case 1:
479 if(ch == 'x' || ch == 'X')
480 {
481 push_char(ch);
482 state = 2;
483 }
484 else if(ch >= '0' && ch <= '7')
485 {
486 push_char(ch);
487 state = 3;
488 }
489 else if(isalpha(ch) || ch == '_')
490 xyyerror("Invalid number digit");
491 else
492 {
493 unget_unichar(ch);
494 yylval.num = 0;
495 return tNUMBER;
496 }
497 break;
498 case 2:
499 if(isxdigit(ch))
500 push_char(ch);
501 else if(isalpha(ch) || ch == '_' || !isxdigit(tos_char_stack()))
502 xyyerror("Invalid hex digit");
503 else
504 {
505 base = 16;
506 goto finish;
507 }
508 break;
509 case 3:
510 if(ch >= '0' && ch <= '7')
511 push_char(ch);
512 else if(isalnum(ch) || ch == '_')
513 xyyerror("Invalid octal digit");
514 else
515 {
516 base = 8;
517 goto finish;
518 }
519 break;
520 case 4:
521 if(isdigit(ch))
522 push_char(ch);
523 else if(isalnum(ch) || ch == '_')
524 xyyerror("Invalid decimal digit");
525 else
526 {
527 base = 10;
528 goto finish;
529 }
530 break;
531 default:
532 internal_error(__FILE__, __LINE__, "Invalid state in number-scanner");
533 }
534 ch = get_unichar();
535 }
536 finish:
537 unget_unichar(ch);
538 push_char(0);
539 yylval.num = strtoul(get_char_stack(), NULL, base);
540 return tNUMBER;
541 }
542
543 static void newline(void)
544 {
545 line_number++;
546 char_number = 1;
547 }
548
549 static int unisort(const void *p1, const void *p2)
550 {
551 return unistricmp(((token_t *)p1)->name, ((token_t *)p2)->name);
552 }
553
554 static token_t *tokentable = NULL;
555 static int ntokentable = 0;
556
557 token_t *lookup_token(const WCHAR *s)
558 {
559 token_t tok;
560
561 tok.name = s;
562 return (token_t *)bsearch(&tok, tokentable, ntokentable, sizeof(*tokentable), unisort);
563 }
564
565 void add_token(tok_e type, const WCHAR *name, int tok, int cp, const WCHAR *alias, int fix)
566 {
567 ntokentable++;
568 tokentable = xrealloc(tokentable, ntokentable * sizeof(*tokentable));
569 tokentable[ntokentable-1].type = type;
570 tokentable[ntokentable-1].name = name;
571 tokentable[ntokentable-1].token = tok;
572 tokentable[ntokentable-1].codepage = cp;
573 tokentable[ntokentable-1].alias = alias;
574 tokentable[ntokentable-1].fixed = fix;
575 qsort(tokentable, ntokentable, sizeof(*tokentable), unisort);
576 }
577
578 void get_tokentable(token_t **tab, int *len)
579 {
580 assert(tab != NULL);
581 assert(len != NULL);
582 *tab = tokentable;
583 *len = ntokentable;
584 }
585
586 /*
587 * The scanner
588 *
589 */
590 int yylex(void)
591 {
592 static WCHAR ustr_dot1[] = { '.', '\n', 0 };
593 static WCHAR ustr_dot2[] = { '.', '\r', '\n', 0 };
594 static int isinit = 0;
595 int ch;
596
597 if(!isinit)
598 {
599 isinit++;
600 set_codepage(WMC_DEFAULT_CODEPAGE);
601 add_token(tok_keyword, ustr_codepages, tCODEPAGE, 0, NULL, 0);
602 add_token(tok_keyword, ustr_facility, tFACILITY, 0, NULL, 1);
603 add_token(tok_keyword, ustr_facilitynames, tFACNAMES, 0, NULL, 1);
604 add_token(tok_keyword, ustr_language, tLANGUAGE, 0, NULL, 1);
605 add_token(tok_keyword, ustr_languagenames, tLANNAMES, 0, NULL, 1);
606 add_token(tok_keyword, ustr_messageid, tMSGID, 0, NULL, 1);
607 add_token(tok_keyword, ustr_messageidtypedef, tTYPEDEF, 0, NULL, 1);
608 add_token(tok_keyword, ustr_outputbase, tBASE, 0, NULL, 1);
609 add_token(tok_keyword, ustr_severity, tSEVERITY, 0, NULL, 1);
610 add_token(tok_keyword, ustr_severitynames, tSEVNAMES, 0, NULL, 1);
611 add_token(tok_keyword, ustr_symbolicname, tSYMNAME, 0, NULL, 1);
612 add_token(tok_severity, ustr_error, 0x03, 0, NULL, 0);
613 add_token(tok_severity, ustr_warning, 0x02, 0, NULL, 0);
614 add_token(tok_severity, ustr_informational, 0x01, 0, NULL, 0);
615 add_token(tok_severity, ustr_success, 0x00, 0, NULL, 0);
616 add_token(tok_facility, ustr_application, 0xFFF, 0, NULL, 0);
617 add_token(tok_facility, ustr_system, 0x0FF, 0, NULL, 0);
618 add_token(tok_language, ustr_english, 0x409, 437, ustr_msg00001, 0);
619 }
620
621 empty_unichar_stack();
622
623 while(1)
624 {
625 if(want_line)
626 {
627 while((ch = get_unichar()) != '\n')
628 {
629 if(ch == EOF)
630 xyyerror("Unexpected EOF");
631 push_unichar(ch);
632 }
633 newline();
634 push_unichar(ch);
635 push_unichar(0);
636 if(!unistrcmp(ustr_dot1, get_unichar_stack()) || !unistrcmp(ustr_dot2, get_unichar_stack()))
637 {
638 want_line = 0;
639 /* Reset the codepage to our default after each message */
640 set_codepage(WMC_DEFAULT_CODEPAGE);
641 return tMSGEND;
642 }
643 yylval.str = xunistrdup(get_unichar_stack());
644 return tLINE;
645 }
646
647 ch = get_unichar();
648
649 if(ch == EOF)
650 return EOF;
651
652 if(ch == '\n')
653 {
654 newline();
655 if(want_nl)
656 {
657 want_nl = 0;
658 return tNL;
659 }
660 continue;
661 }
662
663 if(isisochar(ch))
664 {
665 if(want_file)
666 {
667 int n = 0;
668 while(n < 8 && isisochar(ch))
669 {
670 int t = char_table[ch];
671 if((t & CH_PUNCT) || !(t & CH_SHORTNAME))
672 break;
673
674 push_unichar(ch);
675 n++;
676 ch = get_unichar();
677 }
678 unget_unichar(ch);
679 push_unichar(0);
680 want_file = 0;
681 yylval.str = xunistrdup(get_unichar_stack());
682 return tFILE;
683 }
684
685 if(char_table[ch] & CH_IDENT)
686 {
687 token_t *tok;
688 while(isisochar(ch) && (char_table[ch] & (CH_IDENT|CH_NUMBER)))
689 {
690 push_unichar(ch);
691 ch = get_unichar();
692 }
693 unget_unichar(ch);
694 push_unichar(0);
695 if(!(tok = lookup_token(get_unichar_stack())))
696 {
697 yylval.str = xunistrdup(get_unichar_stack());
698 return tIDENT;
699 }
700 switch(tok->type)
701 {
702 case tok_keyword:
703 return tok->token;
704
705 case tok_language:
706 codepage = tok->codepage;
707 /* Fall through */
708 case tok_severity:
709 case tok_facility:
710 yylval.tok = tok;
711 return tTOKEN;
712
713 default:
714 internal_error(__FILE__, __LINE__, "Invalid token type encountered");
715 }
716 }
717
718 if(isspace(ch)) /* Ignore space */
719 continue;
720
721 if(isdigit(ch))
722 return scan_number(ch);
723 }
724
725 switch(ch)
726 {
727 case ':':
728 case '=':
729 case '+':
730 case '(':
731 case ')':
732 return ch;
733 case ';':
734 while(ch != '\n' && ch != EOF)
735 {
736 push_unichar(ch);
737 ch = get_unichar();
738 }
739 newline();
740 push_unichar(ch); /* Include the newline */
741 push_unichar(0);
742 yylval.str = xunistrdup(get_unichar_stack());
743 return tCOMMENT;
744 default:
745 xyyerror("Invalid character '%c' (0x%04x)", isisochar(ch) && isprint(ch) ? ch : '.', ch);
746 }
747 }
748 }
749