98c5e40b97d3d00ff43f289f695703d07bbf8f3c
[reactos.git] / reactos / dll / win32 / jscript / lex.c
1 /*
2 * Copyright 2008 Jacek Caban for CodeWeavers
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
17 */
18
19 #include "jscript.h"
20
21 #include "parser.tab.h"
22
23 #define LONGLONG_MAX (((LONGLONG)0x7fffffff<<32)|0xffffffff)
24
25 static const WCHAR breakW[] = {'b','r','e','a','k',0};
26 static const WCHAR caseW[] = {'c','a','s','e',0};
27 static const WCHAR catchW[] = {'c','a','t','c','h',0};
28 static const WCHAR continueW[] = {'c','o','n','t','i','n','u','e',0};
29 static const WCHAR defaultW[] = {'d','e','f','a','u','l','t',0};
30 static const WCHAR deleteW[] = {'d','e','l','e','t','e',0};
31 static const WCHAR doW[] = {'d','o',0};
32 static const WCHAR elseW[] = {'e','l','s','e',0};
33 static const WCHAR falseW[] = {'f','a','l','s','e',0};
34 static const WCHAR finallyW[] = {'f','i','n','a','l','l','y',0};
35 static const WCHAR forW[] = {'f','o','r',0};
36 static const WCHAR functionW[] = {'f','u','n','c','t','i','o','n',0};
37 static const WCHAR ifW[] = {'i','f',0};
38 static const WCHAR inW[] = {'i','n',0};
39 static const WCHAR instanceofW[] = {'i','n','s','t','a','n','c','e','o','f',0};
40 static const WCHAR newW[] = {'n','e','w',0};
41 static const WCHAR nullW[] = {'n','u','l','l',0};
42 static const WCHAR returnW[] = {'r','e','t','u','r','n',0};
43 static const WCHAR switchW[] = {'s','w','i','t','c','h',0};
44 static const WCHAR thisW[] = {'t','h','i','s',0};
45 static const WCHAR throwW[] = {'t','h','r','o','w',0};
46 static const WCHAR trueW[] = {'t','r','u','e',0};
47 static const WCHAR tryW[] = {'t','r','y',0};
48 static const WCHAR typeofW[] = {'t','y','p','e','o','f',0};
49 static const WCHAR varW[] = {'v','a','r',0};
50 static const WCHAR voidW[] = {'v','o','i','d',0};
51 static const WCHAR whileW[] = {'w','h','i','l','e',0};
52 static const WCHAR withW[] = {'w','i','t','h',0};
53
54 static const WCHAR elifW[] = {'e','l','i','f',0};
55 static const WCHAR endW[] = {'e','n','d',0};
56
57 static const struct {
58 const WCHAR *word;
59 int token;
60 BOOL no_nl;
61 } keywords[] = {
62 {breakW, kBREAK, TRUE},
63 {caseW, kCASE},
64 {catchW, kCATCH},
65 {continueW, kCONTINUE, TRUE},
66 {defaultW, kDEFAULT},
67 {deleteW, kDELETE},
68 {doW, kDO},
69 {elseW, kELSE},
70 {falseW, kFALSE},
71 {finallyW, kFINALLY},
72 {forW, kFOR},
73 {functionW, kFUNCTION},
74 {ifW, kIF},
75 {inW, kIN},
76 {instanceofW, kINSTANCEOF},
77 {newW, kNEW},
78 {nullW, kNULL},
79 {returnW, kRETURN, TRUE},
80 {switchW, kSWITCH},
81 {thisW, kTHIS},
82 {throwW, kTHROW},
83 {trueW, kTRUE},
84 {tryW, kTRY},
85 {typeofW, kTYPEOF},
86 {varW, kVAR},
87 {voidW, kVOID},
88 {whileW, kWHILE},
89 {withW, kWITH}
90 };
91
92 static int lex_error(parser_ctx_t *ctx, HRESULT hres)
93 {
94 ctx->hres = hres;
95 ctx->lexer_error = TRUE;
96 return -1;
97 }
98
99 /* ECMA-262 3rd Edition 7.6 */
100 static BOOL is_identifier_char(WCHAR c)
101 {
102 return isalnumW(c) || c == '$' || c == '_' || c == '\\';
103 }
104
105 static BOOL is_identifier_first_char(WCHAR c)
106 {
107 return isalphaW(c) || c == '$' || c == '_' || c == '\\';
108 }
109
110 static int check_keyword(parser_ctx_t *ctx, const WCHAR *word, const WCHAR **lval)
111 {
112 const WCHAR *p1 = ctx->ptr;
113 const WCHAR *p2 = word;
114
115 while(p1 < ctx->end && *p2) {
116 if(*p1 != *p2)
117 return *p1 - *p2;
118 p1++;
119 p2++;
120 }
121
122 if(*p2 || (p1 < ctx->end && is_identifier_char(*p1)))
123 return 1;
124
125 if(lval)
126 *lval = ctx->ptr;
127 ctx->ptr = p1;
128 return 0;
129 }
130
131 /* ECMA-262 3rd Edition 7.3 */
132 static BOOL is_endline(WCHAR c)
133 {
134 return c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029;
135 }
136
137 static int hex_to_int(WCHAR c)
138 {
139 if('0' <= c && c <= '9')
140 return c-'0';
141
142 if('a' <= c && c <= 'f')
143 return c-'a'+10;
144
145 if('A' <= c && c <= 'F')
146 return c-'A'+10;
147
148 return -1;
149 }
150
151 static int check_keywords(parser_ctx_t *ctx, const WCHAR **lval)
152 {
153 int min = 0, max = sizeof(keywords)/sizeof(keywords[0])-1, r, i;
154
155 while(min <= max) {
156 i = (min+max)/2;
157
158 r = check_keyword(ctx, keywords[i].word, lval);
159 if(!r) {
160 ctx->implicit_nl_semicolon = keywords[i].no_nl;
161 return keywords[i].token;
162 }
163
164 if(r > 0)
165 min = i+1;
166 else
167 max = i-1;
168 }
169
170 return 0;
171 }
172
173 static BOOL skip_html_comment(parser_ctx_t *ctx)
174 {
175 const WCHAR html_commentW[] = {'<','!','-','-',0};
176
177 if(!ctx->is_html || ctx->ptr+3 >= ctx->end ||
178 memcmp(ctx->ptr, html_commentW, sizeof(WCHAR)*4))
179 return FALSE;
180
181 ctx->nl = TRUE;
182 while(ctx->ptr < ctx->end && !is_endline(*ctx->ptr++));
183
184 return TRUE;
185 }
186
187 static BOOL skip_comment(parser_ctx_t *ctx)
188 {
189 if(ctx->ptr+1 >= ctx->end)
190 return FALSE;
191
192 if(*ctx->ptr != '/') {
193 if(*ctx->ptr == '@' && ctx->ptr+2 < ctx->end && ctx->ptr[1] == '*' && ctx->ptr[2] == '/') {
194 ctx->ptr += 3;
195 return TRUE;
196 }
197
198 return FALSE;
199 }
200
201 switch(ctx->ptr[1]) {
202 case '*':
203 ctx->ptr += 2;
204 if(ctx->ptr+2 < ctx->end && *ctx->ptr == '@' && is_identifier_char(ctx->ptr[1]))
205 return FALSE;
206 while(ctx->ptr+1 < ctx->end && (ctx->ptr[0] != '*' || ctx->ptr[1] != '/'))
207 ctx->ptr++;
208
209 if(ctx->ptr[0] == '*' && ctx->ptr[1] == '/') {
210 ctx->ptr += 2;
211 }else {
212 WARN("unexpected end of file (missing end of comment)\n");
213 ctx->ptr = ctx->end;
214 }
215 break;
216 case '/':
217 ctx->ptr += 2;
218 if(ctx->ptr+2 < ctx->end && *ctx->ptr == '@' && is_identifier_char(ctx->ptr[1]))
219 return FALSE;
220 while(ctx->ptr < ctx->end && !is_endline(*ctx->ptr))
221 ctx->ptr++;
222 break;
223 default:
224 return FALSE;
225 }
226
227 return TRUE;
228 }
229
230 static BOOL skip_spaces(parser_ctx_t *ctx)
231 {
232 while(ctx->ptr < ctx->end && (isspaceW(*ctx->ptr) || *ctx->ptr == 0xFEFF /* UTF16 BOM */)) {
233 if(is_endline(*ctx->ptr++))
234 ctx->nl = TRUE;
235 }
236
237 return ctx->ptr != ctx->end;
238 }
239
240 static BOOL unescape(WCHAR *str)
241 {
242 WCHAR *pd, *p, c;
243 int i;
244
245 pd = p = str;
246 while(*p) {
247 if(*p != '\\') {
248 *pd++ = *p++;
249 continue;
250 }
251
252 p++;
253
254 switch(*p) {
255 case '\'':
256 case '\"':
257 case '\\':
258 c = *p;
259 break;
260 case 'b':
261 c = '\b';
262 break;
263 case 't':
264 c = '\t';
265 break;
266 case 'n':
267 c = '\n';
268 break;
269 case 'f':
270 c = '\f';
271 break;
272 case 'r':
273 c = '\r';
274 break;
275 case 'x':
276 i = hex_to_int(*++p);
277 if(i == -1)
278 return FALSE;
279 c = i << 4;
280
281 i = hex_to_int(*++p);
282 if(i == -1)
283 return FALSE;
284 c += i;
285 break;
286 case 'u':
287 i = hex_to_int(*++p);
288 if(i == -1)
289 return FALSE;
290 c = i << 12;
291
292 i = hex_to_int(*++p);
293 if(i == -1)
294 return FALSE;
295 c += i << 8;
296
297 i = hex_to_int(*++p);
298 if(i == -1)
299 return FALSE;
300 c += i << 4;
301
302 i = hex_to_int(*++p);
303 if(i == -1)
304 return FALSE;
305 c += i;
306 break;
307 default:
308 if(isdigitW(*p)) {
309 c = *p++ - '0';
310 if(isdigitW(*p)) {
311 c = c*8 + (*p++ - '0');
312 if(isdigitW(*p))
313 c = c*8 + (*p++ - '0');
314 }
315 p--;
316 }
317 else
318 c = *p;
319 }
320
321 *pd++ = c;
322 p++;
323 }
324
325 *pd = 0;
326 return TRUE;
327 }
328
329 static int parse_identifier(parser_ctx_t *ctx, const WCHAR **ret)
330 {
331 const WCHAR *ptr = ctx->ptr++;
332 WCHAR *wstr;
333 int len;
334
335 while(ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr))
336 ctx->ptr++;
337
338 len = ctx->ptr-ptr;
339
340 *ret = wstr = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
341 memcpy(wstr, ptr, len*sizeof(WCHAR));
342 wstr[len] = 0;
343
344 /* FIXME: unescape */
345 return tIdentifier;
346 }
347
348 static int parse_string_literal(parser_ctx_t *ctx, const WCHAR **ret, WCHAR endch)
349 {
350 const WCHAR *ptr = ++ctx->ptr;
351 WCHAR *wstr;
352 int len;
353
354 while(ctx->ptr < ctx->end && *ctx->ptr != endch) {
355 if(*ctx->ptr++ == '\\')
356 ctx->ptr++;
357 }
358
359 if(ctx->ptr == ctx->end)
360 return lex_error(ctx, JS_E_UNTERMINATED_STRING);
361
362 len = ctx->ptr-ptr;
363
364 *ret = wstr = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
365 memcpy(wstr, ptr, len*sizeof(WCHAR));
366 wstr[len] = 0;
367
368 ctx->ptr++;
369
370 if(!unescape(wstr)) {
371 WARN("unescape failed\n");
372 return lex_error(ctx, E_FAIL);
373 }
374
375 return tStringLiteral;
376 }
377
378 static literal_t *new_double_literal(parser_ctx_t *ctx, DOUBLE d)
379 {
380 literal_t *ret = parser_alloc(ctx, sizeof(literal_t));
381
382 ret->type = LT_DOUBLE;
383 ret->u.dval = d;
384 return ret;
385 }
386
387 literal_t *new_boolean_literal(parser_ctx_t *ctx, BOOL bval)
388 {
389 literal_t *ret = parser_alloc(ctx, sizeof(literal_t));
390
391 ret->type = LT_BOOL;
392 ret->u.bval = bval;
393
394 return ret;
395 }
396
397 static BOOL parse_double_literal(parser_ctx_t *ctx, LONG int_part, double *ret)
398 {
399 LONGLONG d, hlp;
400 int exp = 0;
401
402 d = int_part;
403 while(ctx->ptr < ctx->end && isdigitW(*ctx->ptr)) {
404 hlp = d*10 + *(ctx->ptr++) - '0';
405 if(d>MAXLONGLONG/10 || hlp<0) {
406 exp++;
407 break;
408 }
409 else
410 d = hlp;
411 }
412 while(ctx->ptr < ctx->end && isdigitW(*ctx->ptr)) {
413 exp++;
414 ctx->ptr++;
415 }
416
417 if(*ctx->ptr == '.') {
418 ctx->ptr++;
419
420 while(ctx->ptr < ctx->end && isdigitW(*ctx->ptr)) {
421 hlp = d*10 + *(ctx->ptr++) - '0';
422 if(d>MAXLONGLONG/10 || hlp<0)
423 break;
424
425 d = hlp;
426 exp--;
427 }
428 while(ctx->ptr < ctx->end && isdigitW(*ctx->ptr))
429 ctx->ptr++;
430 }
431
432 if(ctx->ptr < ctx->end && (*ctx->ptr == 'e' || *ctx->ptr == 'E')) {
433 int sign = 1, e = 0;
434
435 ctx->ptr++;
436 if(ctx->ptr < ctx->end) {
437 if(*ctx->ptr == '+') {
438 ctx->ptr++;
439 }else if(*ctx->ptr == '-') {
440 sign = -1;
441 ctx->ptr++;
442 }else if(!isdigitW(*ctx->ptr)) {
443 WARN("Expected exponent part\n");
444 lex_error(ctx, E_FAIL);
445 return FALSE;
446 }
447 }
448
449 if(ctx->ptr == ctx->end) {
450 WARN("unexpected end of file\n");
451 lex_error(ctx, E_FAIL);
452 return FALSE;
453 }
454
455 while(ctx->ptr < ctx->end && isdigitW(*ctx->ptr)) {
456 if(e > INT_MAX/10 || (e = e*10 + *ctx->ptr++ - '0')<0)
457 e = INT_MAX;
458 }
459 e *= sign;
460
461 if(exp<0 && e<0 && e+exp>0) exp = INT_MIN;
462 else if(exp>0 && e>0 && e+exp<0) exp = INT_MAX;
463 else exp += e;
464 }
465
466 if(is_identifier_char(*ctx->ptr)) {
467 WARN("wrong char after zero\n");
468 lex_error(ctx, JS_E_MISSING_SEMICOLON);
469 return FALSE;
470 }
471
472 *ret = exp>=0 ? d*pow(10, exp) : d/pow(10, -exp);
473 return TRUE;
474 }
475
476 static BOOL parse_numeric_literal(parser_ctx_t *ctx, double *ret)
477 {
478 LONG l, d;
479
480 l = *ctx->ptr++ - '0';
481 if(!l) {
482 if(*ctx->ptr == 'x' || *ctx->ptr == 'X') {
483 if(++ctx->ptr == ctx->end) {
484 ERR("unexpected end of file\n");
485 return FALSE;
486 }
487
488 while(ctx->ptr < ctx->end && (d = hex_to_int(*ctx->ptr)) != -1) {
489 l = l*16 + d;
490 ctx->ptr++;
491 }
492
493 if(ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr)) {
494 WARN("unexpected identifier char\n");
495 lex_error(ctx, JS_E_MISSING_SEMICOLON);
496 return FALSE;
497 }
498
499 *ret = l;
500 return TRUE;
501 }
502
503 if(isdigitW(*ctx->ptr)) {
504 unsigned base = 8;
505 const WCHAR *ptr;
506 double val = 0;
507
508 for(ptr = ctx->ptr; ptr < ctx->end && isdigitW(*ptr); ptr++) {
509 if(*ptr > '7') {
510 base = 10;
511 break;
512 }
513 }
514
515 do {
516 val = val*base + *ctx->ptr-'0';
517 }while(++ctx->ptr < ctx->end && isdigitW(*ctx->ptr));
518
519 /* FIXME: Do we need it here? */
520 if(ctx->ptr < ctx->end && (is_identifier_char(*ctx->ptr) || *ctx->ptr == '.')) {
521 WARN("wrong char after octal literal: '%c'\n", *ctx->ptr);
522 lex_error(ctx, JS_E_MISSING_SEMICOLON);
523 return FALSE;
524 }
525
526 *ret = val;
527 return TRUE;
528 }
529
530 if(is_identifier_char(*ctx->ptr)) {
531 WARN("wrong char after zero\n");
532 lex_error(ctx, JS_E_MISSING_SEMICOLON);
533 return FALSE;
534 }
535 }
536
537 return parse_double_literal(ctx, l, ret);
538 }
539
540 static int next_token(parser_ctx_t *ctx, void *lval)
541 {
542 do {
543 if(!skip_spaces(ctx))
544 return tEOF;
545 }while(skip_comment(ctx) || skip_html_comment(ctx));
546
547 if(ctx->implicit_nl_semicolon) {
548 if(ctx->nl)
549 return ';';
550 ctx->implicit_nl_semicolon = FALSE;
551 }
552
553 if(isalphaW(*ctx->ptr)) {
554 int ret = check_keywords(ctx, lval);
555 if(ret)
556 return ret;
557
558 return parse_identifier(ctx, lval);
559 }
560
561 if(isdigitW(*ctx->ptr)) {
562 double n;
563
564 if(!parse_numeric_literal(ctx, &n))
565 return -1;
566
567 *(literal_t**)lval = new_double_literal(ctx, n);
568 return tNumericLiteral;
569 }
570
571 switch(*ctx->ptr) {
572 case '{':
573 case '(':
574 case ')':
575 case '[':
576 case ']':
577 case ';':
578 case ',':
579 case '~':
580 case '?':
581 return *ctx->ptr++;
582
583 case '}':
584 *(const WCHAR**)lval = ctx->ptr++;
585 return '}';
586
587 case '.':
588 if(++ctx->ptr < ctx->end && isdigitW(*ctx->ptr)) {
589 double n;
590 if(!parse_double_literal(ctx, 0, &n))
591 return -1;
592 *(literal_t**)lval = new_double_literal(ctx, n);
593 return tNumericLiteral;
594 }
595 return '.';
596
597 case '<':
598 if(++ctx->ptr == ctx->end) {
599 *(int*)lval = EXPR_LESS;
600 return tRelOper;
601 }
602
603 switch(*ctx->ptr) {
604 case '=': /* <= */
605 ctx->ptr++;
606 *(int*)lval = EXPR_LESSEQ;
607 return tRelOper;
608 case '<': /* << */
609 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* <<= */
610 ctx->ptr++;
611 *(int*)lval = EXPR_ASSIGNLSHIFT;
612 return tAssignOper;
613 }
614 *(int*)lval = EXPR_LSHIFT;
615 return tShiftOper;
616 default: /* < */
617 *(int*)lval = EXPR_LESS;
618 return tRelOper;
619 }
620
621 case '>':
622 if(++ctx->ptr == ctx->end) { /* > */
623 *(int*)lval = EXPR_GREATER;
624 return tRelOper;
625 }
626
627 switch(*ctx->ptr) {
628 case '=': /* >= */
629 ctx->ptr++;
630 *(int*)lval = EXPR_GREATEREQ;
631 return tRelOper;
632 case '>': /* >> */
633 if(++ctx->ptr < ctx->end) {
634 if(*ctx->ptr == '=') { /* >>= */
635 ctx->ptr++;
636 *(int*)lval = EXPR_ASSIGNRSHIFT;
637 return tAssignOper;
638 }
639 if(*ctx->ptr == '>') { /* >>> */
640 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* >>>= */
641 ctx->ptr++;
642 *(int*)lval = EXPR_ASSIGNRRSHIFT;
643 return tAssignOper;
644 }
645 *(int*)lval = EXPR_RRSHIFT;
646 return tRelOper;
647 }
648 }
649 *(int*)lval = EXPR_RSHIFT;
650 return tShiftOper;
651 default:
652 *(int*)lval = EXPR_GREATER;
653 return tRelOper;
654 }
655
656 case '+':
657 ctx->ptr++;
658 if(ctx->ptr < ctx->end) {
659 switch(*ctx->ptr) {
660 case '+': /* ++ */
661 ctx->ptr++;
662 return tINC;
663 case '=': /* += */
664 ctx->ptr++;
665 *(int*)lval = EXPR_ASSIGNADD;
666 return tAssignOper;
667 }
668 }
669 return '+';
670
671 case '-':
672 ctx->ptr++;
673 if(ctx->ptr < ctx->end) {
674 switch(*ctx->ptr) {
675 case '-': /* -- or --> */
676 ctx->ptr++;
677 if(ctx->is_html && ctx->nl && ctx->ptr < ctx->end && *ctx->ptr == '>') {
678 ctx->ptr++;
679 return tHTMLCOMMENT;
680 }
681 return tDEC;
682 case '=': /* -= */
683 ctx->ptr++;
684 *(int*)lval = EXPR_ASSIGNSUB;
685 return tAssignOper;
686 }
687 }
688 return '-';
689
690 case '*':
691 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* *= */
692 ctx->ptr++;
693 *(int*)lval = EXPR_ASSIGNMUL;
694 return tAssignOper;
695 }
696 return '*';
697
698 case '%':
699 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* %= */
700 ctx->ptr++;
701 *(int*)lval = EXPR_ASSIGNMOD;
702 return tAssignOper;
703 }
704 return '%';
705
706 case '&':
707 if(++ctx->ptr < ctx->end) {
708 switch(*ctx->ptr) {
709 case '=': /* &= */
710 ctx->ptr++;
711 *(int*)lval = EXPR_ASSIGNAND;
712 return tAssignOper;
713 case '&': /* && */
714 ctx->ptr++;
715 return tANDAND;
716 }
717 }
718 return '&';
719
720 case '|':
721 if(++ctx->ptr < ctx->end) {
722 switch(*ctx->ptr) {
723 case '=': /* |= */
724 ctx->ptr++;
725 *(int*)lval = EXPR_ASSIGNOR;
726 return tAssignOper;
727 case '|': /* || */
728 ctx->ptr++;
729 return tOROR;
730 }
731 }
732 return '|';
733
734 case '^':
735 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* ^= */
736 ctx->ptr++;
737 *(int*)lval = EXPR_ASSIGNXOR;
738 return tAssignOper;
739 }
740 return '^';
741
742 case '!':
743 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* != */
744 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* !== */
745 ctx->ptr++;
746 *(int*)lval = EXPR_NOTEQEQ;
747 return tEqOper;
748 }
749 *(int*)lval = EXPR_NOTEQ;
750 return tEqOper;
751 }
752 return '!';
753
754 case '=':
755 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* == */
756 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* === */
757 ctx->ptr++;
758 *(int*)lval = EXPR_EQEQ;
759 return tEqOper;
760 }
761 *(int*)lval = EXPR_EQ;
762 return tEqOper;
763 }
764 return '=';
765
766 case '/':
767 if(++ctx->ptr < ctx->end) {
768 if(*ctx->ptr == '=') { /* /= */
769 ctx->ptr++;
770 *(int*)lval = EXPR_ASSIGNDIV;
771 return kDIVEQ;
772 }
773 }
774 return '/';
775
776 case ':':
777 if(++ctx->ptr < ctx->end && *ctx->ptr == ':') {
778 ctx->ptr++;
779 return kDCOL;
780 }
781 return ':';
782
783 case '\"':
784 case '\'':
785 return parse_string_literal(ctx, lval, *ctx->ptr);
786
787 case '_':
788 case '$':
789 return parse_identifier(ctx, lval);
790
791 case '@':
792 return '@';
793 }
794
795 WARN("unexpected char '%c' %d\n", *ctx->ptr, *ctx->ptr);
796 return 0;
797 }
798
799 struct _cc_var_t {
800 ccval_t val;
801 struct _cc_var_t *next;
802 unsigned name_len;
803 WCHAR name[0];
804 };
805
806 void release_cc(cc_ctx_t *cc)
807 {
808 cc_var_t *iter, *next;
809
810 for(iter = cc->vars; iter; iter = next) {
811 next = iter->next;
812 heap_free(iter);
813 }
814
815 heap_free(cc);
816 }
817
818 static BOOL new_cc_var(cc_ctx_t *cc, const WCHAR *name, int len, ccval_t v)
819 {
820 cc_var_t *new_v;
821
822 if(len == -1)
823 len = strlenW(name);
824
825 new_v = heap_alloc(sizeof(cc_var_t) + (len+1)*sizeof(WCHAR));
826 if(!new_v)
827 return FALSE;
828
829 new_v->val = v;
830 memcpy(new_v->name, name, (len+1)*sizeof(WCHAR));
831 new_v->name_len = len;
832 new_v->next = cc->vars;
833 cc->vars = new_v;
834 return TRUE;
835 }
836
837 static cc_var_t *find_cc_var(cc_ctx_t *cc, const WCHAR *name, unsigned name_len)
838 {
839 cc_var_t *iter;
840
841 for(iter = cc->vars; iter; iter = iter->next) {
842 if(iter->name_len == name_len && !memcmp(iter->name, name, name_len*sizeof(WCHAR)))
843 return iter;
844 }
845
846 return NULL;
847 }
848
849 static BOOL init_cc(parser_ctx_t *ctx)
850 {
851 cc_ctx_t *cc;
852
853 static const WCHAR _win32W[] = {'_','w','i','n','3','2',0};
854 static const WCHAR _win64W[] = {'_','w','i','n','6','4',0};
855 static const WCHAR _x86W[] = {'_','x','8','6',0};
856 static const WCHAR _amd64W[] = {'_','a','m','d','6','4',0};
857 static const WCHAR _jscriptW[] = {'_','j','s','c','r','i','p','t',0};
858 static const WCHAR _jscript_buildW[] = {'_','j','s','c','r','i','p','t','_','b','u','i','l','d',0};
859 static const WCHAR _jscript_versionW[] = {'_','j','s','c','r','i','p','t','_','v','e','r','s','i','o','n',0};
860
861 if(ctx->script->cc)
862 return TRUE;
863
864 cc = heap_alloc(sizeof(cc_ctx_t));
865 if(!cc) {
866 lex_error(ctx, E_OUTOFMEMORY);
867 return FALSE;
868 }
869
870 cc->vars = NULL;
871
872 if(!new_cc_var(cc, _jscriptW, -1, ccval_bool(TRUE))
873 || !new_cc_var(cc, sizeof(void*) == 8 ? _win64W : _win32W, -1, ccval_bool(TRUE))
874 || !new_cc_var(cc, sizeof(void*) == 8 ? _amd64W : _x86W, -1, ccval_bool(TRUE))
875 || !new_cc_var(cc, _jscript_versionW, -1, ccval_num(JSCRIPT_MAJOR_VERSION + (DOUBLE)JSCRIPT_MINOR_VERSION/10.0))
876 || !new_cc_var(cc, _jscript_buildW, -1, ccval_num(JSCRIPT_BUILD_VERSION))) {
877 release_cc(cc);
878 lex_error(ctx, E_OUTOFMEMORY);
879 return FALSE;
880 }
881
882 ctx->script->cc = cc;
883 return TRUE;
884 }
885
886 static BOOL parse_cc_identifier(parser_ctx_t *ctx, const WCHAR **ret, unsigned *ret_len)
887 {
888 if(*ctx->ptr != '@') {
889 lex_error(ctx, JS_E_EXPECTED_AT);
890 return FALSE;
891 }
892
893 if(!is_identifier_first_char(*++ctx->ptr)) {
894 lex_error(ctx, JS_E_EXPECTED_IDENTIFIER);
895 return FALSE;
896 }
897
898 *ret = ctx->ptr;
899 while(++ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr));
900 *ret_len = ctx->ptr - *ret;
901 return TRUE;
902 }
903
904 int try_parse_ccval(parser_ctx_t *ctx, ccval_t *r)
905 {
906 if(!skip_spaces(ctx))
907 return -1;
908
909 if(isdigitW(*ctx->ptr)) {
910 double n;
911
912 if(!parse_numeric_literal(ctx, &n))
913 return -1;
914
915 *r = ccval_num(n);
916 return 1;
917 }
918
919 if(*ctx->ptr == '@') {
920 const WCHAR *ident;
921 unsigned ident_len;
922 cc_var_t *cc_var;
923
924 if(!parse_cc_identifier(ctx, &ident, &ident_len))
925 return -1;
926
927 cc_var = find_cc_var(ctx->script->cc, ident, ident_len);
928 *r = cc_var ? cc_var->val : ccval_num(NAN);
929 return 1;
930 }
931
932 if(!check_keyword(ctx, trueW, NULL)) {
933 *r = ccval_bool(TRUE);
934 return 1;
935 }
936
937 if(!check_keyword(ctx, falseW, NULL)) {
938 *r = ccval_bool(FALSE);
939 return 1;
940 }
941
942 return 0;
943 }
944
945 static int skip_code(parser_ctx_t *ctx, BOOL exec_else)
946 {
947 int if_depth = 1;
948 const WCHAR *ptr;
949
950 while(1) {
951 ptr = strchrW(ctx->ptr, '@');
952 if(!ptr) {
953 WARN("No @end\n");
954 return lex_error(ctx, JS_E_EXPECTED_CCEND);
955 }
956 ctx->ptr = ptr+1;
957
958 if(!check_keyword(ctx, endW, NULL)) {
959 if(--if_depth)
960 continue;
961 return 0;
962 }
963
964 if(exec_else && !check_keyword(ctx, elifW, NULL)) {
965 if(if_depth > 1)
966 continue;
967
968 if(!skip_spaces(ctx) || *ctx->ptr != '(')
969 return lex_error(ctx, JS_E_MISSING_LBRACKET);
970
971 if(!parse_cc_expr(ctx))
972 return -1;
973
974 if(!get_ccbool(ctx->ccval))
975 continue; /* skip block of code */
976
977 /* continue parsing */
978 ctx->cc_if_depth++;
979 return 0;
980 }
981
982 if(exec_else && !check_keyword(ctx, elseW, NULL)) {
983 if(if_depth > 1)
984 continue;
985
986 /* parse else block */
987 ctx->cc_if_depth++;
988 return 0;
989 }
990
991 if(!check_keyword(ctx, ifW, NULL)) {
992 if_depth++;
993 continue;
994 }
995
996 ctx->ptr++;
997 }
998 }
999
1000 static int cc_token(parser_ctx_t *ctx, void *lval)
1001 {
1002 unsigned id_len = 0;
1003 cc_var_t *var;
1004
1005 static const WCHAR cc_onW[] = {'c','c','_','o','n',0};
1006 static const WCHAR setW[] = {'s','e','t',0};
1007
1008 ctx->ptr++;
1009
1010 if(!check_keyword(ctx, cc_onW, NULL))
1011 return init_cc(ctx) ? 0 : -1;
1012
1013 if(!check_keyword(ctx, setW, NULL)) {
1014 const WCHAR *ident;
1015 unsigned ident_len;
1016 cc_var_t *var;
1017
1018 if(!init_cc(ctx))
1019 return -1;
1020
1021 if(!skip_spaces(ctx))
1022 return lex_error(ctx, JS_E_EXPECTED_AT);
1023
1024 if(!parse_cc_identifier(ctx, &ident, &ident_len))
1025 return -1;
1026
1027 if(!skip_spaces(ctx) || *ctx->ptr != '=')
1028 return lex_error(ctx, JS_E_EXPECTED_ASSIGN);
1029 ctx->ptr++;
1030
1031 if(!parse_cc_expr(ctx)) {
1032 WARN("parsing CC expression failed\n");
1033 return -1;
1034 }
1035
1036 var = find_cc_var(ctx->script->cc, ident, ident_len);
1037 if(var) {
1038 var->val = ctx->ccval;
1039 }else {
1040 if(!new_cc_var(ctx->script->cc, ident, ident_len, ctx->ccval))
1041 return lex_error(ctx, E_OUTOFMEMORY);
1042 }
1043
1044 return 0;
1045 }
1046
1047 if(!check_keyword(ctx, ifW, NULL)) {
1048 if(!init_cc(ctx))
1049 return -1;
1050
1051 if(!skip_spaces(ctx) || *ctx->ptr != '(')
1052 return lex_error(ctx, JS_E_MISSING_LBRACKET);
1053
1054 if(!parse_cc_expr(ctx))
1055 return -1;
1056
1057 if(get_ccbool(ctx->ccval)) {
1058 /* continue parsing block inside if */
1059 ctx->cc_if_depth++;
1060 return 0;
1061 }
1062
1063 return skip_code(ctx, TRUE);
1064 }
1065
1066 if(!check_keyword(ctx, elifW, NULL) || !check_keyword(ctx, elseW, NULL)) {
1067 if(!ctx->cc_if_depth)
1068 return lex_error(ctx, JS_E_SYNTAX);
1069
1070 return skip_code(ctx, FALSE);
1071 }
1072
1073 if(!check_keyword(ctx, endW, NULL)) {
1074 if(!ctx->cc_if_depth)
1075 return lex_error(ctx, JS_E_SYNTAX);
1076
1077 ctx->cc_if_depth--;
1078 return 0;
1079 }
1080
1081 if(!ctx->script->cc)
1082 return lex_error(ctx, JS_E_DISABLED_CC);
1083
1084 while(ctx->ptr+id_len < ctx->end && is_identifier_char(ctx->ptr[id_len]))
1085 id_len++;
1086 if(!id_len)
1087 return '@';
1088
1089 TRACE("var %s\n", debugstr_wn(ctx->ptr, id_len));
1090
1091 var = find_cc_var(ctx->script->cc, ctx->ptr, id_len);
1092 ctx->ptr += id_len;
1093 if(!var || var->val.is_num) {
1094 *(literal_t**)lval = new_double_literal(ctx, var ? var->val.u.n : NAN);
1095 return tNumericLiteral;
1096 }
1097
1098 *(literal_t**)lval = new_boolean_literal(ctx, var->val.u.b);
1099 return tBooleanLiteral;
1100 }
1101
1102 int parser_lex(void *lval, parser_ctx_t *ctx)
1103 {
1104 int ret;
1105
1106 ctx->nl = ctx->ptr == ctx->begin;
1107
1108 do {
1109 ret = next_token(ctx, lval);
1110 } while(ret == '@' && !(ret = cc_token(ctx, lval)));
1111
1112 return ret;
1113 }
1114
1115 literal_t *parse_regexp(parser_ctx_t *ctx)
1116 {
1117 const WCHAR *re, *flags_ptr;
1118 BOOL in_class = FALSE;
1119 DWORD re_len, flags;
1120 literal_t *ret;
1121 HRESULT hres;
1122
1123 TRACE("\n");
1124
1125 while(*--ctx->ptr != '/');
1126
1127 /* Simple regexp pre-parser; '/' if used in char class does not terminate regexp literal */
1128 re = ++ctx->ptr;
1129 while(ctx->ptr < ctx->end) {
1130 if(*ctx->ptr == '\\') {
1131 if(++ctx->ptr == ctx->end)
1132 break;
1133 }else if(in_class) {
1134 if(*ctx->ptr == '\n')
1135 break;
1136 if(*ctx->ptr == ']')
1137 in_class = FALSE;
1138 }else {
1139 if(*ctx->ptr == '/')
1140 break;
1141
1142 if(*ctx->ptr == '[')
1143 in_class = TRUE;
1144 }
1145 ctx->ptr++;
1146 }
1147
1148 if(ctx->ptr == ctx->end || *ctx->ptr != '/') {
1149 WARN("pre-parsing failed\n");
1150 return NULL;
1151 }
1152
1153 re_len = ctx->ptr-re;
1154
1155 flags_ptr = ++ctx->ptr;
1156 while(ctx->ptr < ctx->end && isalnumW(*ctx->ptr))
1157 ctx->ptr++;
1158
1159 hres = parse_regexp_flags(flags_ptr, ctx->ptr-flags_ptr, &flags);
1160 if(FAILED(hres))
1161 return NULL;
1162
1163 ret = parser_alloc(ctx, sizeof(literal_t));
1164 ret->type = LT_REGEXP;
1165 ret->u.regexp.str = re;
1166 ret->u.regexp.str_len = re_len;
1167 ret->u.regexp.flags = flags;
1168 return ret;
1169 }