b9dd71db1523d72dc02b808f836b158d30eff704
[reactos.git] / reactos / tools / wmc / mcy.y
1 /*
2 * Wine Message Compiler parser
3 *
4 * Copyright 2000 Bertho A. Stultiens (BS)
5 *
6 * The basic grammar of the file is yet another example of, humpf,
7 * design. There is is mix of context-insensitive and -sentitive
8 * stuff, which makes it rather complicated.
9 * The header definitions are all context-insensitive because they have
10 * delimited arguments, whereas the message headers are (semi-) context-
11 * sensitive and the messages themselves are, well, RFC82[12] delimited.
12 * This mixture seems to originate from the time that ms and ibm were
13 * good friends and developing os/2 according to the "compatibility"
14 * switch and reading some comments here and there.
15 *
16 * I'll ignore most of the complications and concentrate on the concept
17 * which allows me to use yacc. Basically, everything is context-
18 * insensitive now, with the exception of the message-text itself and
19 * the preceding language declaration.
20 *
21 */
22
23 %{
24
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <assert.h>
28
29 #include "config.h"
30
31 #include "utils.h"
32 #include "wmc.h"
33 #include "lang.h"
34
35 static const char err_syntax[] = "Syntax error";
36 static const char err_number[] = "Number expected";
37 static const char err_ident[] = "Identifier expected";
38 static const char err_assign[] = "'=' expected";
39 static const char err_popen[] = "'(' expected";
40 static const char err_pclose[] = "')' expected";
41 static const char err_colon[] = "':' expected";
42 static const char err_msg[] = "Message expected";
43
44 /* Scanner switches */
45 int want_nl = 0; /* Request next newlinw */
46 int want_line = 0; /* Request next complete line */
47 int want_file = 0; /* Request next ident as filename */
48
49 node_t *nodehead = NULL; /* The list of all parsed elements */
50 static node_t *nodetail = NULL;
51 lan_blk_t *lanblockhead; /* List of parsed elements transposed */
52
53 static int base = 16; /* Current printout base to use (8, 10 or 16) */
54 static WCHAR *cast = NULL; /* Current typecast to use */
55
56 static int last_id = 0; /* The last message ID parsed */
57 static int last_sev = 0; /* Last severity code parsed */
58 static int last_fac = 0; /* Last facility code parsed */
59 static WCHAR *last_sym = NULL;/* Last alias symbol parsed */
60 static int have_sev; /* Set if severity parsed for current message */
61 static int have_fac; /* Set if facility parsed for current message */
62 static int have_sym; /* Set is symbol parsed for current message */
63
64 static cp_xlat_t *cpxlattab = NULL; /* Codepage translation table */
65 static int ncpxlattab = 0;
66
67 /* Prototypes */
68 static WCHAR *merge(WCHAR *s1, WCHAR *s2);
69 static lanmsg_t *new_lanmsg(lan_cp_t *lcp, WCHAR *msg);
70 static msg_t *add_lanmsg(msg_t *msg, lanmsg_t *lanmsg);
71 static msg_t *complete_msg(msg_t *msg, int id);
72 static void add_node(node_e type, void *p);
73 static void do_add_token(tok_e type, token_t *tok, const char *code);
74 static void test_id(int id);
75 static int check_languages(node_t *head);
76 static lan_blk_t *block_messages(node_t *head);
77 static void add_cpxlat(int lan, int cpin, int cpout);
78 static cp_xlat_t *find_cpxlat(int lan);
79
80 %}
81
82
83 %union {
84 WCHAR *str;
85 unsigned num;
86 token_t *tok;
87 lanmsg_t *lmp;
88 msg_t *msg;
89 lan_cp_t lcp;
90 }
91
92
93 %token tSEVNAMES tFACNAMES tLANNAMES tBASE tCODEPAGE
94 %token tTYPEDEF tNL tSYMNAME tMSGEND
95 %token tSEVERITY tFACILITY tLANGUAGE tMSGID
96 %token <str> tIDENT tLINE tFILE tCOMMENT
97 %token <num> tNUMBER
98 %token <tok> tTOKEN
99
100 %type <str> alias lines
101 %type <num> optcp id msgid clan
102 %type <tok> token
103 %type <lmp> body
104 %type <msg> bodies msg
105 %type <lcp> lang
106
107 %%
108 file : items {
109 if(!check_languages(nodehead))
110 xyyerror("No messages defined");
111 lanblockhead = block_messages(nodehead);
112 }
113 ;
114
115 items : decl
116 | items decl
117 ;
118
119 decl : global
120 | msg { add_node(nd_msg, $1); }
121 | tCOMMENT { add_node(nd_comment, $1); }
122 | error { xyyerror(err_syntax); /* `Catch all' error */ }
123 ;
124
125 global : tSEVNAMES '=' '(' smaps ')'
126 | tSEVNAMES '=' '(' smaps error { xyyerror(err_pclose); }
127 | tSEVNAMES '=' error { xyyerror(err_popen); }
128 | tSEVNAMES error { xyyerror(err_assign); }
129 | tFACNAMES '=' '(' fmaps ')'
130 | tFACNAMES '=' '(' fmaps error { xyyerror(err_pclose); }
131 | tFACNAMES '=' error { xyyerror(err_popen); }
132 | tFACNAMES error { xyyerror(err_assign); }
133 | tLANNAMES '=' '(' lmaps ')'
134 | tLANNAMES '=' '(' lmaps error { xyyerror(err_pclose); }
135 | tLANNAMES '=' error { xyyerror(err_popen); }
136 | tLANNAMES error { xyyerror(err_assign); }
137 | tCODEPAGE '=' '(' cmaps ')'
138 | tCODEPAGE '=' '(' cmaps error { xyyerror(err_pclose); }
139 | tCODEPAGE '=' error { xyyerror(err_popen); }
140 | tCODEPAGE error { xyyerror(err_assign); }
141 | tTYPEDEF '=' tIDENT { cast = $3; }
142 | tTYPEDEF '=' error { xyyerror(err_number); }
143 | tTYPEDEF error { xyyerror(err_assign); }
144 | tBASE '=' tNUMBER {
145 switch(base)
146 {
147 case 8:
148 case 10:
149 case 16:
150 base = $3;
151 break;
152 default:
153 xyyerror("Numberbase must be 8, 10 or 16");
154 }
155 }
156 | tBASE '=' error { xyyerror(err_number); }
157 | tBASE error { xyyerror(err_assign); }
158 ;
159
160 /*----------------------------------------------------------------------
161 * SeverityNames mapping
162 */
163 smaps : smap
164 | smaps smap
165 | error { xyyerror(err_ident); }
166 ;
167
168 smap : token '=' tNUMBER alias {
169 $1->token = $3;
170 $1->alias = $4;
171 if($3 & (~0x3))
172 xyyerror("Severity value out of range (0x%08x > 0x3)", $3);
173 do_add_token(tok_severity, $1, "severity");
174 }
175 | token '=' error { xyyerror(err_number); }
176 | token error { xyyerror(err_assign); }
177 ;
178
179 /*----------------------------------------------------------------------
180 * FacilityNames mapping
181 */
182 fmaps : fmap
183 | fmaps fmap
184 | error { xyyerror(err_ident); }
185 ;
186
187 fmap : token '=' tNUMBER alias {
188 $1->token = $3;
189 $1->alias = $4;
190 if($3 & (~0xfff))
191 xyyerror("Facility value out of range (0x%08x > 0xfff)", $3);
192 do_add_token(tok_facility, $1, "facility");
193 }
194 | token '=' error { xyyerror(err_number); }
195 | token error { xyyerror(err_assign); }
196 ;
197
198 alias : /* Empty */ { $$ = NULL; }
199 | ':' tIDENT { $$ = $2; }
200 | ':' error { xyyerror(err_ident); }
201 ;
202
203 /*----------------------------------------------------------------------
204 * LanguageNames mapping
205 */
206 lmaps : lmap
207 | lmaps lmap
208 | error { xyyerror(err_ident); }
209 ;
210
211 lmap : token '=' tNUMBER setfile ':' tFILE optcp {
212 $1->token = $3;
213 $1->alias = $6;
214 $1->codepage = $7;
215 do_add_token(tok_language, $1, "language");
216 if(!find_language($1->token) && !find_cpxlat($1->token))
217 yywarning("Language 0x%x not built-in, using codepage %d; use explicit codepage to override", $1->token, WMC_DEFAULT_CODEPAGE);
218 }
219 | token '=' tNUMBER setfile ':' error { xyyerror("Filename expected"); }
220 | token '=' tNUMBER error { xyyerror(err_colon); }
221 | token '=' error { xyyerror(err_number); }
222 | token error { xyyerror(err_assign); }
223 ;
224
225 optcp : /* Empty */ { $$ = 0; }
226 | ':' tNUMBER { $$ = $2; }
227 | ':' error { xyyerror("Codepage-number expected"); }
228 ;
229
230 /*----------------------------------------------------------------------
231 * Codepages mapping
232 */
233 cmaps : cmap
234 | cmaps cmap
235 | error { xyyerror(err_ident); }
236 ;
237
238 cmap : clan '=' tNUMBER ':' tNUMBER {
239 static const char err_nocp[] = "Codepage %d not builtin; cannot convert";
240 if(find_cpxlat($1))
241 xyyerror("Codepage translation already defined for language 0x%x", $1);
242 if($3 && !find_codepage($3))
243 xyyerror(err_nocp, $3);
244 if($5 && !find_codepage($5))
245 xyyerror(err_nocp, $5);
246 add_cpxlat($1, $3, $5);
247 }
248 | clan '=' tNUMBER ':' error { xyyerror(err_number); }
249 | clan '=' tNUMBER error { xyyerror(err_colon); }
250 | clan '=' error { xyyerror(err_number); }
251 | clan error { xyyerror(err_assign); }
252 ;
253
254 clan : tNUMBER { $$ = $1; }
255 | tTOKEN {
256 if($1->type != tok_language)
257 xyyerror("Language name or code expected");
258 $$ = $1->token;
259 }
260 ;
261
262 /*----------------------------------------------------------------------
263 * Message-definition parsing
264 */
265 msg : msgid sevfacsym { test_id($1); } bodies { $$ = complete_msg($4, $1); }
266 ;
267
268 msgid : tMSGID '=' id {
269 if($3 & (~0xffff))
270 xyyerror("Message ID value out of range (0x%08x > 0xffff)", $3);
271 $$ = $3;
272 }
273 | tMSGID error { xyyerror(err_assign); }
274 ;
275
276 id : /* Empty */ { $$ = ++last_id; }
277 | tNUMBER { $$ = last_id = $1; }
278 | '+' tNUMBER { $$ = last_id += $2; }
279 | '+' error { xyyerror(err_number); }
280 ;
281
282 sevfacsym: /* Empty */ { have_sev = have_fac = have_sym = 0; }
283 | sevfacsym sev { if(have_sev) xyyerror("Severity already defined"); have_sev = 1; }
284 | sevfacsym fac { if(have_fac) xyyerror("Facility already defined"); have_fac = 1; }
285 | sevfacsym sym { if(have_sym) xyyerror("Symbolname already defined"); have_sym = 1; }
286 ;
287
288 sym : tSYMNAME '=' tIDENT { last_sym = $3; }
289 | tSYMNAME '=' error { xyyerror(err_ident); }
290 | tSYMNAME error { xyyerror(err_assign); }
291 ;
292
293 sev : tSEVERITY '=' token {
294 token_t *tok = lookup_token($3->name);
295 if(!tok)
296 xyyerror("Undefined severityname");
297 if(tok->type != tok_severity)
298 xyyerror("Identifier is not of class 'severity'");
299 last_sev = tok->token;
300 }
301 | tSEVERITY '=' error { xyyerror(err_ident); }
302 | tSEVERITY error { xyyerror(err_assign); }
303 ;
304
305 fac : tFACILITY '=' token {
306 token_t *tok = lookup_token($3->name);
307 if(!tok)
308 xyyerror("Undefined facilityname");
309 if(tok->type != tok_facility)
310 xyyerror("Identifier is not of class 'facility'");
311 last_fac = tok->token;
312 }
313 | tFACILITY '=' error { xyyerror(err_ident); }
314 | tFACILITY error { xyyerror(err_assign); }
315 ;
316
317 /*----------------------------------------------------------------------
318 * Message-text parsing
319 */
320 bodies : body { $$ = add_lanmsg(NULL, $1); }
321 | bodies body { $$ = add_lanmsg($1, $2); }
322 | error { xyyerror("'Language=...' (start of message text-definition) expected"); }
323 ;
324
325 body : lang setline lines tMSGEND { $$ = new_lanmsg(&$1, $3); }
326 ;
327
328 /*
329 * The newline is to be able to set the codepage
330 * to the language based codepage for the next
331 * message to be parsed.
332 */
333 lang : tLANGUAGE setnl '=' token tNL {
334 token_t *tok = lookup_token($4->name);
335 cp_xlat_t *cpx;
336 if(!tok)
337 xyyerror("Undefined language");
338 if(tok->type != tok_language)
339 xyyerror("Identifier is not of class 'language'");
340 if((cpx = find_cpxlat(tok->token)))
341 {
342 set_codepage($$.codepage = cpx->cpin);
343 }
344 else if(!tok->codepage)
345 {
346 const language_t *lan = find_language(tok->token);
347 if(!lan)
348 {
349 /* Just set default; warning was given while parsing languagenames */
350 set_codepage($$.codepage = WMC_DEFAULT_CODEPAGE);
351 }
352 else
353 {
354 /* The default seems to be to use the DOS codepage... */
355 set_codepage($$.codepage = lan->doscp);
356 }
357 }
358 else
359 set_codepage($$.codepage = tok->codepage);
360 $$.language = tok->token;
361 }
362 | tLANGUAGE setnl '=' token error { xyyerror("Missing newline"); }
363 | tLANGUAGE setnl '=' error { xyyerror(err_ident); }
364 | tLANGUAGE error { xyyerror(err_assign); }
365 ;
366
367 lines : tLINE { $$ = $1; }
368 | lines tLINE { $$ = merge($1, $2); }
369 | error { xyyerror(err_msg); }
370 | lines error { xyyerror(err_msg); }
371 ;
372
373 /*----------------------------------------------------------------------
374 * Helper rules
375 */
376 token : tIDENT { $$ = xmalloc(sizeof(token_t)); $$->name = $1; }
377 | tTOKEN { $$ = $1; }
378 ;
379
380 setnl : /* Empty */ { want_nl = 1; }
381 ;
382
383 setline : /* Empty */ { want_line = 1; }
384 ;
385
386 setfile : /* Empty */ { want_file = 1; }
387 ;
388
389 %%
390
391 static WCHAR *merge(WCHAR *s1, WCHAR *s2)
392 {
393 int l1 = unistrlen(s1);
394 int l2 = unistrlen(s2);
395 s1 = xrealloc(s1, (l1 + l2 + 1) * sizeof(*s1));
396 unistrcpy(s1+l1, s2);
397 free(s2);
398 return s1;
399 }
400
401 static void do_add_token(tok_e type, token_t *tok, const char *code)
402 {
403 token_t *tp = lookup_token(tok->name);
404 if(tp)
405 {
406 if(tok->type != type)
407 yywarning("Type change in token");
408 if(tp != tok)
409 xyyerror("Overlapping token not the same");
410 /* else its already defined and changed */
411 if(tok->fixed)
412 xyyerror("Redefinition of %s", code);
413 tok->fixed = 1;
414 }
415 else
416 {
417 add_token(type, tok->name, tok->token, tok->codepage, tok->alias, 1);
418 free(tok);
419 }
420 }
421
422 static lanmsg_t *new_lanmsg(lan_cp_t *lcp, WCHAR *msg)
423 {
424 lanmsg_t *lmp = (lanmsg_t *)xmalloc(sizeof(lanmsg_t));
425 lmp->lan = lcp->language;
426 lmp->cp = lcp->codepage;
427 lmp->msg = msg;
428 lmp->len = unistrlen(msg) + 1; /* Include termination */
429 if(lmp->len > 4096)
430 yywarning("Message exceptionally long; might be a missing termination");
431 return lmp;
432 }
433
434 static msg_t *add_lanmsg(msg_t *msg, lanmsg_t *lanmsg)
435 {
436 int i;
437 if(!msg)
438 msg = xmalloc(sizeof(msg_t));
439 msg->msgs = xrealloc(msg->msgs, (msg->nmsgs+1) * sizeof(*(msg->msgs)));
440 msg->msgs[msg->nmsgs] = lanmsg;
441 msg->nmsgs++;
442 for(i = 0; i < msg->nmsgs-1; i++)
443 {
444 if(msg->msgs[i]->lan == lanmsg->lan)
445 xyyerror("Message for language 0x%x already defined", lanmsg->lan);
446 }
447 return msg;
448 }
449
450 static int sort_lanmsg(const void *p1, const void *p2)
451 {
452 return (*(lanmsg_t **)p1)->lan - (*(lanmsg_t **)p2)->lan;
453 }
454
455 static msg_t *complete_msg(msg_t *mp, int id)
456 {
457 assert(mp != NULL);
458 mp->id = id;
459 if(have_sym)
460 mp->sym = last_sym;
461 else
462 xyyerror("No symbolic name defined for message id %d", id);
463 mp->sev = last_sev;
464 mp->fac = last_fac;
465 qsort(mp->msgs, mp->nmsgs, sizeof(*(mp->msgs)), sort_lanmsg);
466 mp->realid = id | (last_sev << 30) | (last_fac << 16);
467 if(custombit)
468 mp->realid |= 1 << 29;
469 mp->base = base;
470 mp->cast = cast;
471 return mp;
472 }
473
474 static void add_node(node_e type, void *p)
475 {
476 node_t *ndp = (node_t *)xmalloc(sizeof(node_t));
477 ndp->type = type;
478 ndp->u.all = p;
479
480 if(nodetail)
481 {
482 ndp->prev = nodetail;
483 nodetail->next = ndp;
484 nodetail = ndp;
485 }
486 else
487 {
488 nodehead = nodetail = ndp;
489 }
490 }
491
492 static void test_id(int id)
493 {
494 node_t *ndp;
495 for(ndp = nodehead; ndp; ndp = ndp->next)
496 {
497 if(ndp->type != nd_msg)
498 continue;
499 if(ndp->u.msg->id == id && ndp->u.msg->sev == last_sev && ndp->u.msg->fac == last_fac)
500 xyyerror("MessageId %d with facility 0x%x and severity 0x%x already defined", id, last_fac, last_sev);
501 }
502 }
503
504 static int check_languages(node_t *head)
505 {
506 static char err_missing[] = "Missing definition for language 0x%x; MessageID %d, facility 0x%x, severity 0x%x";
507 node_t *ndp;
508 int nm = 0;
509 msg_t *msg = NULL;
510
511 for(ndp = head; ndp; ndp = ndp->next)
512 {
513 if(ndp->type != nd_msg)
514 continue;
515 if(!nm)
516 {
517 msg = ndp->u.msg;
518 }
519 else
520 {
521 int i;
522 msg_t *m1;
523 msg_t *m2;
524 if(ndp->u.msg->nmsgs > msg->nmsgs)
525 {
526 m1 = ndp->u.msg;
527 m2 = msg;
528 }
529 else
530 {
531 m1 = msg;
532 m2 = ndp->u.msg;
533 }
534
535 for(i = 0; i < m1->nmsgs; i++)
536 {
537 if(i > m2->nmsgs)
538 error(err_missing, m1->msgs[i]->lan, m2->id, m2->fac, m2->sev);
539 else if(m1->msgs[i]->lan < m2->msgs[i]->lan)
540 error(err_missing, m1->msgs[i]->lan, m2->id, m2->fac, m2->sev);
541 else if(m1->msgs[i]->lan > m2->msgs[i]->lan)
542 error(err_missing, m2->msgs[i]->lan, m1->id, m1->fac, m1->sev);
543 }
544 }
545 nm++;
546 }
547 return nm;
548 }
549
550 #define MSGRID(x) ((*(msg_t **)(x))->realid)
551 static int sort_msg(const void *p1, const void *p2)
552 {
553 return MSGRID(p1) > MSGRID(p2) ? 1 : (MSGRID(p1) == MSGRID(p2) ? 0 : -1);
554 /* return (*(msg_t **)p1)->realid - (*(msg_t **)p1)->realid; */
555 }
556
557 /*
558 * block_messages() basically transposes the messages
559 * from ID/language based list to a language/ID
560 * based list.
561 */
562 static lan_blk_t *block_messages(node_t *head)
563 {
564 lan_blk_t *lbp;
565 lan_blk_t *lblktail = NULL;
566 lan_blk_t *lblkhead = NULL;
567 msg_t **msgtab = NULL;
568 node_t *ndp;
569 int nmsg = 0;
570 int i;
571 int nl;
572 int factor = unicodeout ? 2 : 1;
573
574 for(ndp = head; ndp; ndp = ndp->next)
575 {
576 if(ndp->type != nd_msg)
577 continue;
578 msgtab = xrealloc(msgtab, (nmsg+1) * sizeof(*msgtab));
579 msgtab[nmsg++] = ndp->u.msg;
580 }
581
582 assert(nmsg != 0);
583 qsort(msgtab, nmsg, sizeof(*msgtab), sort_msg);
584
585 for(nl = 0; nl < msgtab[0]->nmsgs; nl++) /* This should be equal for all after check_languages() */
586 {
587 lbp = xmalloc(sizeof(lan_blk_t));
588
589 if(!lblktail)
590 {
591 lblkhead = lblktail = lbp;
592 }
593 else
594 {
595 lblktail->next = lbp;
596 lbp->prev = lblktail;
597 lblktail = lbp;
598 }
599 lbp->nblk = 1;
600 lbp->blks = xmalloc(sizeof(*lbp->blks));
601 lbp->blks[0].idlo = msgtab[0]->realid;
602 lbp->blks[0].idhi = msgtab[0]->realid;
603 /* The plus 4 is the entry header; (+3)&~3 is DWORD alignment */
604 lbp->blks[0].size = ((factor * msgtab[0]->msgs[nl]->len + 3) & ~3) + 4;
605 lbp->blks[0].msgs = xmalloc(sizeof(*lbp->blks[0].msgs));
606 lbp->blks[0].nmsg = 1;
607 lbp->blks[0].msgs[0] = msgtab[0]->msgs[nl];
608 lbp->lan = msgtab[0]->msgs[nl]->lan;
609
610 for(i = 1; i < nmsg; i++)
611 {
612 block_t *blk = &(lbp->blks[lbp->nblk-1]);
613 if(msgtab[i]->realid == blk->idhi+1)
614 {
615 blk->size += ((factor * msgtab[i]->msgs[nl]->len + 3) & ~3) + 4;
616 blk->idhi++;
617 blk->msgs = xrealloc(blk->msgs, (blk->nmsg+1) * sizeof(*blk->msgs));
618 blk->msgs[blk->nmsg++] = msgtab[i]->msgs[nl];
619 }
620 else
621 {
622 lbp->nblk++;
623 lbp->blks = xrealloc(lbp->blks, lbp->nblk * sizeof(*lbp->blks));
624 blk = &(lbp->blks[lbp->nblk-1]);
625 blk->idlo = msgtab[i]->realid;
626 blk->idhi = msgtab[i]->realid;
627 blk->size = ((factor * msgtab[i]->msgs[nl]->len + 3) & ~3) + 4;
628 blk->msgs = xmalloc(sizeof(*blk->msgs));
629 blk->nmsg = 1;
630 blk->msgs[0] = msgtab[i]->msgs[nl];
631 }
632 }
633 }
634 free(msgtab);
635 return lblkhead;
636 }
637
638 static int sc_xlat(const void *p1, const void *p2)
639 {
640 return ((cp_xlat_t *)p1)->lan - ((cp_xlat_t *)p2)->lan;
641 }
642
643 static void add_cpxlat(int lan, int cpin, int cpout)
644 {
645 cpxlattab = xrealloc(cpxlattab, (ncpxlattab+1) * sizeof(*cpxlattab));
646 cpxlattab[ncpxlattab].lan = lan;
647 cpxlattab[ncpxlattab].cpin = cpin;
648 cpxlattab[ncpxlattab].cpout = cpout;
649 ncpxlattab++;
650 qsort(cpxlattab, ncpxlattab, sizeof(*cpxlattab), sc_xlat);
651 }
652
653 static cp_xlat_t *find_cpxlat(int lan)
654 {
655 cp_xlat_t t;
656
657 if(!cpxlattab) return NULL;
658
659 t.lan = lan;
660 return (cp_xlat_t *)bsearch(&t, cpxlattab, ncpxlattab, sizeof(*cpxlattab), sc_xlat);
661 }
662