[JSCRIPT] Sync with Wine Staging 3.9. CORE-14656
[reactos.git] / dll / win32 / jscript / lex.c
1 /*
2 * Copyright 2008 Jacek Caban for CodeWeavers
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
17 */
18
19 #include "config.h"
20 #include "wine/port.h"
21
22 #include <limits.h>
23
24 #include "jscript.h"
25 #include "activscp.h"
26 #include "objsafe.h"
27 #include "engine.h"
28 #include "parser.h"
29
30 #include "parser.tab.h"
31
32 #include "wine/debug.h"
33 #include "wine/unicode.h"
34
35 WINE_DEFAULT_DEBUG_CHANNEL(jscript);
36
37 static const WCHAR breakW[] = {'b','r','e','a','k',0};
38 static const WCHAR caseW[] = {'c','a','s','e',0};
39 static const WCHAR catchW[] = {'c','a','t','c','h',0};
40 static const WCHAR continueW[] = {'c','o','n','t','i','n','u','e',0};
41 static const WCHAR defaultW[] = {'d','e','f','a','u','l','t',0};
42 static const WCHAR deleteW[] = {'d','e','l','e','t','e',0};
43 static const WCHAR doW[] = {'d','o',0};
44 static const WCHAR elseW[] = {'e','l','s','e',0};
45 static const WCHAR falseW[] = {'f','a','l','s','e',0};
46 static const WCHAR finallyW[] = {'f','i','n','a','l','l','y',0};
47 static const WCHAR forW[] = {'f','o','r',0};
48 static const WCHAR functionW[] = {'f','u','n','c','t','i','o','n',0};
49 static const WCHAR ifW[] = {'i','f',0};
50 static const WCHAR inW[] = {'i','n',0};
51 static const WCHAR instanceofW[] = {'i','n','s','t','a','n','c','e','o','f',0};
52 static const WCHAR newW[] = {'n','e','w',0};
53 static const WCHAR nullW[] = {'n','u','l','l',0};
54 static const WCHAR returnW[] = {'r','e','t','u','r','n',0};
55 static const WCHAR switchW[] = {'s','w','i','t','c','h',0};
56 static const WCHAR thisW[] = {'t','h','i','s',0};
57 static const WCHAR throwW[] = {'t','h','r','o','w',0};
58 static const WCHAR trueW[] = {'t','r','u','e',0};
59 static const WCHAR tryW[] = {'t','r','y',0};
60 static const WCHAR typeofW[] = {'t','y','p','e','o','f',0};
61 static const WCHAR varW[] = {'v','a','r',0};
62 static const WCHAR voidW[] = {'v','o','i','d',0};
63 static const WCHAR whileW[] = {'w','h','i','l','e',0};
64 static const WCHAR withW[] = {'w','i','t','h',0};
65
66 static const WCHAR elifW[] = {'e','l','i','f',0};
67 static const WCHAR endW[] = {'e','n','d',0};
68
69 static const struct {
70 const WCHAR *word;
71 int token;
72 BOOL no_nl;
73 } keywords[] = {
74 {breakW, kBREAK, TRUE},
75 {caseW, kCASE},
76 {catchW, kCATCH},
77 {continueW, kCONTINUE, TRUE},
78 {defaultW, kDEFAULT},
79 {deleteW, kDELETE},
80 {doW, kDO},
81 {elseW, kELSE},
82 {falseW, kFALSE},
83 {finallyW, kFINALLY},
84 {forW, kFOR},
85 {functionW, kFUNCTION},
86 {ifW, kIF},
87 {inW, kIN},
88 {instanceofW, kINSTANCEOF},
89 {newW, kNEW},
90 {nullW, kNULL},
91 {returnW, kRETURN, TRUE},
92 {switchW, kSWITCH},
93 {thisW, kTHIS},
94 {throwW, kTHROW},
95 {trueW, kTRUE},
96 {tryW, kTRY},
97 {typeofW, kTYPEOF},
98 {varW, kVAR},
99 {voidW, kVOID},
100 {whileW, kWHILE},
101 {withW, kWITH}
102 };
103
104 static int lex_error(parser_ctx_t *ctx, HRESULT hres)
105 {
106 ctx->hres = hres;
107 ctx->lexer_error = TRUE;
108 return -1;
109 }
110
111 /* ECMA-262 3rd Edition 7.6 */
112 BOOL is_identifier_char(WCHAR c)
113 {
114 return isalnumW(c) || c == '$' || c == '_' || c == '\\';
115 }
116
117 static BOOL is_identifier_first_char(WCHAR c)
118 {
119 return isalphaW(c) || c == '$' || c == '_' || c == '\\';
120 }
121
122 static int check_keyword(parser_ctx_t *ctx, const WCHAR *word, const WCHAR **lval)
123 {
124 const WCHAR *p1 = ctx->ptr;
125 const WCHAR *p2 = word;
126
127 while(p1 < ctx->end && *p2) {
128 if(*p1 != *p2)
129 return *p1 - *p2;
130 p1++;
131 p2++;
132 }
133
134 if(*p2 || (p1 < ctx->end && is_identifier_char(*p1)))
135 return 1;
136
137 if(lval)
138 *lval = word;
139 ctx->ptr = p1;
140 return 0;
141 }
142
143 /* ECMA-262 3rd Edition 7.3 */
144 static BOOL is_endline(WCHAR c)
145 {
146 return c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029;
147 }
148
149 static int hex_to_int(WCHAR c)
150 {
151 if('0' <= c && c <= '9')
152 return c-'0';
153
154 if('a' <= c && c <= 'f')
155 return c-'a'+10;
156
157 if('A' <= c && c <= 'F')
158 return c-'A'+10;
159
160 return -1;
161 }
162
163 static int check_keywords(parser_ctx_t *ctx, const WCHAR **lval)
164 {
165 int min = 0, max = sizeof(keywords)/sizeof(keywords[0])-1, r, i;
166
167 while(min <= max) {
168 i = (min+max)/2;
169
170 r = check_keyword(ctx, keywords[i].word, lval);
171 if(!r) {
172 ctx->implicit_nl_semicolon = keywords[i].no_nl;
173 return keywords[i].token;
174 }
175
176 if(r > 0)
177 min = i+1;
178 else
179 max = i-1;
180 }
181
182 return 0;
183 }
184
185 static BOOL skip_html_comment(parser_ctx_t *ctx)
186 {
187 const WCHAR html_commentW[] = {'<','!','-','-',0};
188
189 if(!ctx->is_html || ctx->ptr+3 >= ctx->end ||
190 memcmp(ctx->ptr, html_commentW, sizeof(WCHAR)*4))
191 return FALSE;
192
193 ctx->nl = TRUE;
194 while(ctx->ptr < ctx->end && !is_endline(*ctx->ptr++));
195
196 return TRUE;
197 }
198
199 static BOOL skip_comment(parser_ctx_t *ctx)
200 {
201 if(ctx->ptr+1 >= ctx->end)
202 return FALSE;
203
204 if(*ctx->ptr != '/') {
205 if(*ctx->ptr == '@' && ctx->ptr+2 < ctx->end && ctx->ptr[1] == '*' && ctx->ptr[2] == '/') {
206 ctx->ptr += 3;
207 return TRUE;
208 }
209
210 return FALSE;
211 }
212
213 switch(ctx->ptr[1]) {
214 case '*':
215 ctx->ptr += 2;
216 if(ctx->ptr+2 < ctx->end && *ctx->ptr == '@' && is_identifier_char(ctx->ptr[1]))
217 return FALSE;
218 while(ctx->ptr+1 < ctx->end && (ctx->ptr[0] != '*' || ctx->ptr[1] != '/'))
219 ctx->ptr++;
220
221 if(ctx->ptr[0] == '*' && ctx->ptr[1] == '/') {
222 ctx->ptr += 2;
223 }else {
224 WARN("unexpected end of file (missing end of comment)\n");
225 ctx->ptr = ctx->end;
226 }
227 break;
228 case '/':
229 ctx->ptr += 2;
230 if(ctx->ptr+2 < ctx->end && *ctx->ptr == '@' && is_identifier_char(ctx->ptr[1]))
231 return FALSE;
232 while(ctx->ptr < ctx->end && !is_endline(*ctx->ptr))
233 ctx->ptr++;
234 break;
235 default:
236 return FALSE;
237 }
238
239 return TRUE;
240 }
241
242 static BOOL skip_spaces(parser_ctx_t *ctx)
243 {
244 while(ctx->ptr < ctx->end && (isspaceW(*ctx->ptr) || *ctx->ptr == 0xFEFF /* UTF16 BOM */)) {
245 if(is_endline(*ctx->ptr++))
246 ctx->nl = TRUE;
247 }
248
249 return ctx->ptr != ctx->end;
250 }
251
252 BOOL unescape(WCHAR *str)
253 {
254 WCHAR *pd, *p, c;
255 int i;
256
257 pd = p = str;
258 while(*p) {
259 if(*p != '\\') {
260 *pd++ = *p++;
261 continue;
262 }
263
264 p++;
265
266 switch(*p) {
267 case '\'':
268 case '\"':
269 case '\\':
270 c = *p;
271 break;
272 case 'b':
273 c = '\b';
274 break;
275 case 't':
276 c = '\t';
277 break;
278 case 'n':
279 c = '\n';
280 break;
281 case 'f':
282 c = '\f';
283 break;
284 case 'r':
285 c = '\r';
286 break;
287 case 'x':
288 i = hex_to_int(*++p);
289 if(i == -1)
290 return FALSE;
291 c = i << 4;
292
293 i = hex_to_int(*++p);
294 if(i == -1)
295 return FALSE;
296 c += i;
297 break;
298 case 'u':
299 i = hex_to_int(*++p);
300 if(i == -1)
301 return FALSE;
302 c = i << 12;
303
304 i = hex_to_int(*++p);
305 if(i == -1)
306 return FALSE;
307 c += i << 8;
308
309 i = hex_to_int(*++p);
310 if(i == -1)
311 return FALSE;
312 c += i << 4;
313
314 i = hex_to_int(*++p);
315 if(i == -1)
316 return FALSE;
317 c += i;
318 break;
319 default:
320 if(isdigitW(*p)) {
321 c = *p++ - '0';
322 if(isdigitW(*p)) {
323 c = c*8 + (*p++ - '0');
324 if(isdigitW(*p))
325 c = c*8 + (*p++ - '0');
326 }
327 p--;
328 }
329 else
330 c = *p;
331 }
332
333 *pd++ = c;
334 p++;
335 }
336
337 *pd = 0;
338 return TRUE;
339 }
340
341 static int parse_identifier(parser_ctx_t *ctx, const WCHAR **ret)
342 {
343 const WCHAR *ptr = ctx->ptr++;
344 WCHAR *wstr;
345 int len;
346
347 while(ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr))
348 ctx->ptr++;
349
350 len = ctx->ptr-ptr;
351
352 *ret = wstr = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
353 memcpy(wstr, ptr, len*sizeof(WCHAR));
354 wstr[len] = 0;
355
356 /* FIXME: unescape */
357 return tIdentifier;
358 }
359
360 static int parse_string_literal(parser_ctx_t *ctx, const WCHAR **ret, WCHAR endch)
361 {
362 const WCHAR *ptr = ++ctx->ptr;
363 WCHAR *wstr;
364 int len;
365
366 while(ctx->ptr < ctx->end && *ctx->ptr != endch) {
367 if(*ctx->ptr++ == '\\')
368 ctx->ptr++;
369 }
370
371 if(ctx->ptr == ctx->end)
372 return lex_error(ctx, JS_E_UNTERMINATED_STRING);
373
374 len = ctx->ptr-ptr;
375
376 *ret = wstr = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
377 memcpy(wstr, ptr, len*sizeof(WCHAR));
378 wstr[len] = 0;
379
380 ctx->ptr++;
381
382 if(!unescape(wstr)) {
383 WARN("unescape failed\n");
384 return lex_error(ctx, E_FAIL);
385 }
386
387 return tStringLiteral;
388 }
389
390 static literal_t *new_double_literal(parser_ctx_t *ctx, DOUBLE d)
391 {
392 literal_t *ret = parser_alloc(ctx, sizeof(literal_t));
393
394 ret->type = LT_DOUBLE;
395 ret->u.dval = d;
396 return ret;
397 }
398
399 literal_t *new_boolean_literal(parser_ctx_t *ctx, BOOL bval)
400 {
401 literal_t *ret = parser_alloc(ctx, sizeof(literal_t));
402
403 ret->type = LT_BOOL;
404 ret->u.bval = bval;
405
406 return ret;
407 }
408
409 HRESULT parse_decimal(const WCHAR **iter, const WCHAR *end, double *ret)
410 {
411 const WCHAR *ptr = *iter;
412 LONGLONG d = 0, hlp;
413 int exp = 0;
414
415 while(ptr < end && isdigitW(*ptr)) {
416 hlp = d*10 + *(ptr++) - '0';
417 if(d>MAXLONGLONG/10 || hlp<0) {
418 exp++;
419 break;
420 }
421 else
422 d = hlp;
423 }
424 while(ptr < end && isdigitW(*ptr)) {
425 exp++;
426 ptr++;
427 }
428
429 if(*ptr == '.') {
430 ptr++;
431
432 while(ptr < end && isdigitW(*ptr)) {
433 hlp = d*10 + *(ptr++) - '0';
434 if(d>MAXLONGLONG/10 || hlp<0)
435 break;
436
437 d = hlp;
438 exp--;
439 }
440 while(ptr < end && isdigitW(*ptr))
441 ptr++;
442 }
443
444 if(ptr < end && (*ptr == 'e' || *ptr == 'E')) {
445 int sign = 1, e = 0;
446
447 if(++ptr < end) {
448 if(*ptr == '+') {
449 ptr++;
450 }else if(*ptr == '-') {
451 sign = -1;
452 ptr++;
453 }else if(!isdigitW(*ptr)) {
454 WARN("Expected exponent part\n");
455 return E_FAIL;
456 }
457 }
458
459 if(ptr == end) {
460 WARN("unexpected end of file\n");
461 return E_FAIL;
462 }
463
464 while(ptr < end && isdigitW(*ptr)) {
465 if(e > INT_MAX/10 || (e = e*10 + *ptr++ - '0')<0)
466 e = INT_MAX;
467 }
468 e *= sign;
469
470 if(exp<0 && e<0 && e+exp>0) exp = INT_MIN;
471 else if(exp>0 && e>0 && e+exp<0) exp = INT_MAX;
472 else exp += e;
473 }
474
475 if(is_identifier_char(*ptr)) {
476 WARN("wrong char after zero\n");
477 return JS_E_MISSING_SEMICOLON;
478 }
479
480 *ret = exp>=0 ? d*pow(10, exp) : d/pow(10, -exp);
481 *iter = ptr;
482 return S_OK;
483 }
484
485 static BOOL parse_numeric_literal(parser_ctx_t *ctx, double *ret)
486 {
487 HRESULT hres;
488
489 if(*ctx->ptr == '0') {
490 ctx->ptr++;
491
492 if(*ctx->ptr == 'x' || *ctx->ptr == 'X') {
493 double r = 0;
494 int d;
495 if(++ctx->ptr == ctx->end) {
496 ERR("unexpected end of file\n");
497 return FALSE;
498 }
499
500 while(ctx->ptr < ctx->end && (d = hex_to_int(*ctx->ptr)) != -1) {
501 r = r*16 + d;
502 ctx->ptr++;
503 }
504
505 if(ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr)) {
506 WARN("unexpected identifier char\n");
507 lex_error(ctx, JS_E_MISSING_SEMICOLON);
508 return FALSE;
509 }
510
511 *ret = r;
512 return TRUE;
513 }
514
515 if(isdigitW(*ctx->ptr)) {
516 unsigned base = 8;
517 const WCHAR *ptr;
518 double val = 0;
519
520 for(ptr = ctx->ptr; ptr < ctx->end && isdigitW(*ptr); ptr++) {
521 if(*ptr > '7') {
522 base = 10;
523 break;
524 }
525 }
526
527 do {
528 val = val*base + *ctx->ptr-'0';
529 }while(++ctx->ptr < ctx->end && isdigitW(*ctx->ptr));
530
531 /* FIXME: Do we need it here? */
532 if(ctx->ptr < ctx->end && (is_identifier_char(*ctx->ptr) || *ctx->ptr == '.')) {
533 WARN("wrong char after octal literal: '%c'\n", *ctx->ptr);
534 lex_error(ctx, JS_E_MISSING_SEMICOLON);
535 return FALSE;
536 }
537
538 *ret = val;
539 return TRUE;
540 }
541
542 if(is_identifier_char(*ctx->ptr)) {
543 WARN("wrong char after zero\n");
544 lex_error(ctx, JS_E_MISSING_SEMICOLON);
545 return FALSE;
546 }
547 }
548
549 hres = parse_decimal(&ctx->ptr, ctx->end, ret);
550 if(FAILED(hres)) {
551 lex_error(ctx, hres);
552 return FALSE;
553 }
554
555 return TRUE;
556 }
557
558 static int next_token(parser_ctx_t *ctx, void *lval)
559 {
560 do {
561 if(!skip_spaces(ctx))
562 return tEOF;
563 }while(skip_comment(ctx) || skip_html_comment(ctx));
564
565 if(ctx->implicit_nl_semicolon) {
566 if(ctx->nl)
567 return ';';
568 ctx->implicit_nl_semicolon = FALSE;
569 }
570
571 if(isalphaW(*ctx->ptr)) {
572 int ret = check_keywords(ctx, lval);
573 if(ret)
574 return ret;
575
576 return parse_identifier(ctx, lval);
577 }
578
579 if(isdigitW(*ctx->ptr)) {
580 double n;
581
582 if(!parse_numeric_literal(ctx, &n))
583 return -1;
584
585 *(literal_t**)lval = new_double_literal(ctx, n);
586 return tNumericLiteral;
587 }
588
589 switch(*ctx->ptr) {
590 case '{':
591 case '(':
592 case ')':
593 case '[':
594 case ']':
595 case ';':
596 case ',':
597 case '~':
598 case '?':
599 return *ctx->ptr++;
600
601 case '}':
602 *(const WCHAR**)lval = ctx->ptr++;
603 return '}';
604
605 case '.':
606 if(ctx->ptr+1 < ctx->end && isdigitW(ctx->ptr[1])) {
607 double n;
608 HRESULT hres;
609 hres = parse_decimal(&ctx->ptr, ctx->end, &n);
610 if(FAILED(hres)) {
611 lex_error(ctx, hres);
612 return -1;
613 }
614 *(literal_t**)lval = new_double_literal(ctx, n);
615 return tNumericLiteral;
616 }
617 ctx->ptr++;
618 return '.';
619
620 case '<':
621 if(++ctx->ptr == ctx->end) {
622 *(int*)lval = EXPR_LESS;
623 return tRelOper;
624 }
625
626 switch(*ctx->ptr) {
627 case '=': /* <= */
628 ctx->ptr++;
629 *(int*)lval = EXPR_LESSEQ;
630 return tRelOper;
631 case '<': /* << */
632 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* <<= */
633 ctx->ptr++;
634 *(int*)lval = EXPR_ASSIGNLSHIFT;
635 return tAssignOper;
636 }
637 *(int*)lval = EXPR_LSHIFT;
638 return tShiftOper;
639 default: /* < */
640 *(int*)lval = EXPR_LESS;
641 return tRelOper;
642 }
643
644 case '>':
645 if(++ctx->ptr == ctx->end) { /* > */
646 *(int*)lval = EXPR_GREATER;
647 return tRelOper;
648 }
649
650 switch(*ctx->ptr) {
651 case '=': /* >= */
652 ctx->ptr++;
653 *(int*)lval = EXPR_GREATEREQ;
654 return tRelOper;
655 case '>': /* >> */
656 if(++ctx->ptr < ctx->end) {
657 if(*ctx->ptr == '=') { /* >>= */
658 ctx->ptr++;
659 *(int*)lval = EXPR_ASSIGNRSHIFT;
660 return tAssignOper;
661 }
662 if(*ctx->ptr == '>') { /* >>> */
663 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* >>>= */
664 ctx->ptr++;
665 *(int*)lval = EXPR_ASSIGNRRSHIFT;
666 return tAssignOper;
667 }
668 *(int*)lval = EXPR_RRSHIFT;
669 return tRelOper;
670 }
671 }
672 *(int*)lval = EXPR_RSHIFT;
673 return tShiftOper;
674 default:
675 *(int*)lval = EXPR_GREATER;
676 return tRelOper;
677 }
678
679 case '+':
680 ctx->ptr++;
681 if(ctx->ptr < ctx->end) {
682 switch(*ctx->ptr) {
683 case '+': /* ++ */
684 ctx->ptr++;
685 return tINC;
686 case '=': /* += */
687 ctx->ptr++;
688 *(int*)lval = EXPR_ASSIGNADD;
689 return tAssignOper;
690 }
691 }
692 return '+';
693
694 case '-':
695 ctx->ptr++;
696 if(ctx->ptr < ctx->end) {
697 switch(*ctx->ptr) {
698 case '-': /* -- or --> */
699 ctx->ptr++;
700 if(ctx->is_html && ctx->nl && ctx->ptr < ctx->end && *ctx->ptr == '>') {
701 ctx->ptr++;
702 return tHTMLCOMMENT;
703 }
704 return tDEC;
705 case '=': /* -= */
706 ctx->ptr++;
707 *(int*)lval = EXPR_ASSIGNSUB;
708 return tAssignOper;
709 }
710 }
711 return '-';
712
713 case '*':
714 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* *= */
715 ctx->ptr++;
716 *(int*)lval = EXPR_ASSIGNMUL;
717 return tAssignOper;
718 }
719 return '*';
720
721 case '%':
722 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* %= */
723 ctx->ptr++;
724 *(int*)lval = EXPR_ASSIGNMOD;
725 return tAssignOper;
726 }
727 return '%';
728
729 case '&':
730 if(++ctx->ptr < ctx->end) {
731 switch(*ctx->ptr) {
732 case '=': /* &= */
733 ctx->ptr++;
734 *(int*)lval = EXPR_ASSIGNAND;
735 return tAssignOper;
736 case '&': /* && */
737 ctx->ptr++;
738 return tANDAND;
739 }
740 }
741 return '&';
742
743 case '|':
744 if(++ctx->ptr < ctx->end) {
745 switch(*ctx->ptr) {
746 case '=': /* |= */
747 ctx->ptr++;
748 *(int*)lval = EXPR_ASSIGNOR;
749 return tAssignOper;
750 case '|': /* || */
751 ctx->ptr++;
752 return tOROR;
753 }
754 }
755 return '|';
756
757 case '^':
758 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* ^= */
759 ctx->ptr++;
760 *(int*)lval = EXPR_ASSIGNXOR;
761 return tAssignOper;
762 }
763 return '^';
764
765 case '!':
766 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* != */
767 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* !== */
768 ctx->ptr++;
769 *(int*)lval = EXPR_NOTEQEQ;
770 return tEqOper;
771 }
772 *(int*)lval = EXPR_NOTEQ;
773 return tEqOper;
774 }
775 return '!';
776
777 case '=':
778 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* == */
779 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* === */
780 ctx->ptr++;
781 *(int*)lval = EXPR_EQEQ;
782 return tEqOper;
783 }
784 *(int*)lval = EXPR_EQ;
785 return tEqOper;
786 }
787 return '=';
788
789 case '/':
790 if(++ctx->ptr < ctx->end) {
791 if(*ctx->ptr == '=') { /* /= */
792 ctx->ptr++;
793 *(int*)lval = EXPR_ASSIGNDIV;
794 return kDIVEQ;
795 }
796 }
797 return '/';
798
799 case ':':
800 if(++ctx->ptr < ctx->end && *ctx->ptr == ':') {
801 ctx->ptr++;
802 return kDCOL;
803 }
804 return ':';
805
806 case '\"':
807 case '\'':
808 return parse_string_literal(ctx, lval, *ctx->ptr);
809
810 case '_':
811 case '$':
812 return parse_identifier(ctx, lval);
813
814 case '@':
815 return '@';
816 }
817
818 WARN("unexpected char '%c' %d\n", *ctx->ptr, *ctx->ptr);
819 return 0;
820 }
821
822 struct _cc_var_t {
823 ccval_t val;
824 struct _cc_var_t *next;
825 unsigned name_len;
826 WCHAR name[0];
827 };
828
829 void release_cc(cc_ctx_t *cc)
830 {
831 cc_var_t *iter, *next;
832
833 for(iter = cc->vars; iter; iter = next) {
834 next = iter->next;
835 heap_free(iter);
836 }
837
838 heap_free(cc);
839 }
840
841 static BOOL new_cc_var(cc_ctx_t *cc, const WCHAR *name, int len, ccval_t v)
842 {
843 cc_var_t *new_v;
844
845 if(len == -1)
846 len = strlenW(name);
847
848 new_v = heap_alloc(sizeof(cc_var_t) + (len+1)*sizeof(WCHAR));
849 if(!new_v)
850 return FALSE;
851
852 new_v->val = v;
853 memcpy(new_v->name, name, (len+1)*sizeof(WCHAR));
854 new_v->name_len = len;
855 new_v->next = cc->vars;
856 cc->vars = new_v;
857 return TRUE;
858 }
859
860 static cc_var_t *find_cc_var(cc_ctx_t *cc, const WCHAR *name, unsigned name_len)
861 {
862 cc_var_t *iter;
863
864 for(iter = cc->vars; iter; iter = iter->next) {
865 if(iter->name_len == name_len && !memcmp(iter->name, name, name_len*sizeof(WCHAR)))
866 return iter;
867 }
868
869 return NULL;
870 }
871
872 static BOOL init_cc(parser_ctx_t *ctx)
873 {
874 cc_ctx_t *cc;
875
876 static const WCHAR _win32W[] = {'_','w','i','n','3','2',0};
877 static const WCHAR _win64W[] = {'_','w','i','n','6','4',0};
878 static const WCHAR _x86W[] = {'_','x','8','6',0};
879 static const WCHAR _amd64W[] = {'_','a','m','d','6','4',0};
880 static const WCHAR _jscriptW[] = {'_','j','s','c','r','i','p','t',0};
881 static const WCHAR _jscript_buildW[] = {'_','j','s','c','r','i','p','t','_','b','u','i','l','d',0};
882 static const WCHAR _jscript_versionW[] = {'_','j','s','c','r','i','p','t','_','v','e','r','s','i','o','n',0};
883
884 if(ctx->script->cc)
885 return TRUE;
886
887 cc = heap_alloc(sizeof(cc_ctx_t));
888 if(!cc) {
889 lex_error(ctx, E_OUTOFMEMORY);
890 return FALSE;
891 }
892
893 cc->vars = NULL;
894
895 if(!new_cc_var(cc, _jscriptW, -1, ccval_bool(TRUE))
896 || !new_cc_var(cc, sizeof(void*) == 8 ? _win64W : _win32W, -1, ccval_bool(TRUE))
897 || !new_cc_var(cc, sizeof(void*) == 8 ? _amd64W : _x86W, -1, ccval_bool(TRUE))
898 || !new_cc_var(cc, _jscript_versionW, -1, ccval_num(JSCRIPT_MAJOR_VERSION + (DOUBLE)JSCRIPT_MINOR_VERSION/10.0))
899 || !new_cc_var(cc, _jscript_buildW, -1, ccval_num(JSCRIPT_BUILD_VERSION))) {
900 release_cc(cc);
901 lex_error(ctx, E_OUTOFMEMORY);
902 return FALSE;
903 }
904
905 ctx->script->cc = cc;
906 return TRUE;
907 }
908
909 static BOOL parse_cc_identifier(parser_ctx_t *ctx, const WCHAR **ret, unsigned *ret_len)
910 {
911 if(*ctx->ptr != '@') {
912 lex_error(ctx, JS_E_EXPECTED_AT);
913 return FALSE;
914 }
915
916 if(!is_identifier_first_char(*++ctx->ptr)) {
917 lex_error(ctx, JS_E_EXPECTED_IDENTIFIER);
918 return FALSE;
919 }
920
921 *ret = ctx->ptr;
922 while(++ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr));
923 *ret_len = ctx->ptr - *ret;
924 return TRUE;
925 }
926
927 int try_parse_ccval(parser_ctx_t *ctx, ccval_t *r)
928 {
929 if(!skip_spaces(ctx))
930 return -1;
931
932 if(isdigitW(*ctx->ptr)) {
933 double n;
934
935 if(!parse_numeric_literal(ctx, &n))
936 return -1;
937
938 *r = ccval_num(n);
939 return 1;
940 }
941
942 if(*ctx->ptr == '@') {
943 const WCHAR *ident;
944 unsigned ident_len;
945 cc_var_t *cc_var;
946
947 if(!parse_cc_identifier(ctx, &ident, &ident_len))
948 return -1;
949
950 cc_var = find_cc_var(ctx->script->cc, ident, ident_len);
951 *r = cc_var ? cc_var->val : ccval_num(NAN);
952 return 1;
953 }
954
955 if(!check_keyword(ctx, trueW, NULL)) {
956 *r = ccval_bool(TRUE);
957 return 1;
958 }
959
960 if(!check_keyword(ctx, falseW, NULL)) {
961 *r = ccval_bool(FALSE);
962 return 1;
963 }
964
965 return 0;
966 }
967
968 static int skip_code(parser_ctx_t *ctx, BOOL exec_else)
969 {
970 int if_depth = 1;
971 const WCHAR *ptr;
972
973 while(1) {
974 ptr = strchrW(ctx->ptr, '@');
975 if(!ptr) {
976 WARN("No @end\n");
977 return lex_error(ctx, JS_E_EXPECTED_CCEND);
978 }
979 ctx->ptr = ptr+1;
980
981 if(!check_keyword(ctx, endW, NULL)) {
982 if(--if_depth)
983 continue;
984 return 0;
985 }
986
987 if(exec_else && !check_keyword(ctx, elifW, NULL)) {
988 if(if_depth > 1)
989 continue;
990
991 if(!skip_spaces(ctx) || *ctx->ptr != '(')
992 return lex_error(ctx, JS_E_MISSING_LBRACKET);
993
994 if(!parse_cc_expr(ctx))
995 return -1;
996
997 if(!get_ccbool(ctx->ccval))
998 continue; /* skip block of code */
999
1000 /* continue parsing */
1001 ctx->cc_if_depth++;
1002 return 0;
1003 }
1004
1005 if(exec_else && !check_keyword(ctx, elseW, NULL)) {
1006 if(if_depth > 1)
1007 continue;
1008
1009 /* parse else block */
1010 ctx->cc_if_depth++;
1011 return 0;
1012 }
1013
1014 if(!check_keyword(ctx, ifW, NULL)) {
1015 if_depth++;
1016 continue;
1017 }
1018
1019 ctx->ptr++;
1020 }
1021 }
1022
1023 static int cc_token(parser_ctx_t *ctx, void *lval)
1024 {
1025 unsigned id_len = 0;
1026 cc_var_t *var;
1027
1028 static const WCHAR cc_onW[] = {'c','c','_','o','n',0};
1029 static const WCHAR setW[] = {'s','e','t',0};
1030
1031 ctx->ptr++;
1032
1033 if(!check_keyword(ctx, cc_onW, NULL))
1034 return init_cc(ctx) ? 0 : -1;
1035
1036 if(!check_keyword(ctx, setW, NULL)) {
1037 const WCHAR *ident;
1038 unsigned ident_len;
1039 cc_var_t *var;
1040
1041 if(!init_cc(ctx))
1042 return -1;
1043
1044 if(!skip_spaces(ctx))
1045 return lex_error(ctx, JS_E_EXPECTED_AT);
1046
1047 if(!parse_cc_identifier(ctx, &ident, &ident_len))
1048 return -1;
1049
1050 if(!skip_spaces(ctx) || *ctx->ptr != '=')
1051 return lex_error(ctx, JS_E_EXPECTED_ASSIGN);
1052 ctx->ptr++;
1053
1054 if(!parse_cc_expr(ctx)) {
1055 WARN("parsing CC expression failed\n");
1056 return -1;
1057 }
1058
1059 var = find_cc_var(ctx->script->cc, ident, ident_len);
1060 if(var) {
1061 var->val = ctx->ccval;
1062 }else {
1063 if(!new_cc_var(ctx->script->cc, ident, ident_len, ctx->ccval))
1064 return lex_error(ctx, E_OUTOFMEMORY);
1065 }
1066
1067 return 0;
1068 }
1069
1070 if(!check_keyword(ctx, ifW, NULL)) {
1071 if(!init_cc(ctx))
1072 return -1;
1073
1074 if(!skip_spaces(ctx) || *ctx->ptr != '(')
1075 return lex_error(ctx, JS_E_MISSING_LBRACKET);
1076
1077 if(!parse_cc_expr(ctx))
1078 return -1;
1079
1080 if(get_ccbool(ctx->ccval)) {
1081 /* continue parsing block inside if */
1082 ctx->cc_if_depth++;
1083 return 0;
1084 }
1085
1086 return skip_code(ctx, TRUE);
1087 }
1088
1089 if(!check_keyword(ctx, elifW, NULL) || !check_keyword(ctx, elseW, NULL)) {
1090 if(!ctx->cc_if_depth)
1091 return lex_error(ctx, JS_E_SYNTAX);
1092
1093 return skip_code(ctx, FALSE);
1094 }
1095
1096 if(!check_keyword(ctx, endW, NULL)) {
1097 if(!ctx->cc_if_depth)
1098 return lex_error(ctx, JS_E_SYNTAX);
1099
1100 ctx->cc_if_depth--;
1101 return 0;
1102 }
1103
1104 if(!ctx->script->cc)
1105 return lex_error(ctx, JS_E_DISABLED_CC);
1106
1107 while(ctx->ptr+id_len < ctx->end && is_identifier_char(ctx->ptr[id_len]))
1108 id_len++;
1109 if(!id_len)
1110 return '@';
1111
1112 TRACE("var %s\n", debugstr_wn(ctx->ptr, id_len));
1113
1114 var = find_cc_var(ctx->script->cc, ctx->ptr, id_len);
1115 ctx->ptr += id_len;
1116 if(!var || var->val.is_num) {
1117 *(literal_t**)lval = new_double_literal(ctx, var ? var->val.u.n : NAN);
1118 return tNumericLiteral;
1119 }
1120
1121 *(literal_t**)lval = new_boolean_literal(ctx, var->val.u.b);
1122 return tBooleanLiteral;
1123 }
1124
1125 int parser_lex(void *lval, parser_ctx_t *ctx)
1126 {
1127 int ret;
1128
1129 ctx->nl = ctx->ptr == ctx->begin;
1130
1131 do {
1132 ret = next_token(ctx, lval);
1133 } while(ret == '@' && !(ret = cc_token(ctx, lval)));
1134
1135 return ret;
1136 }
1137
1138 literal_t *parse_regexp(parser_ctx_t *ctx)
1139 {
1140 const WCHAR *re, *flags_ptr;
1141 BOOL in_class = FALSE;
1142 DWORD re_len, flags;
1143 literal_t *ret;
1144 HRESULT hres;
1145
1146 TRACE("\n");
1147
1148 while(*--ctx->ptr != '/');
1149
1150 /* Simple regexp pre-parser; '/' if used in char class does not terminate regexp literal */
1151 re = ++ctx->ptr;
1152 while(ctx->ptr < ctx->end) {
1153 if(*ctx->ptr == '\\') {
1154 if(++ctx->ptr == ctx->end)
1155 break;
1156 }else if(in_class) {
1157 if(*ctx->ptr == '\n')
1158 break;
1159 if(*ctx->ptr == ']')
1160 in_class = FALSE;
1161 }else {
1162 if(*ctx->ptr == '/')
1163 break;
1164
1165 if(*ctx->ptr == '[')
1166 in_class = TRUE;
1167 }
1168 ctx->ptr++;
1169 }
1170
1171 if(ctx->ptr == ctx->end || *ctx->ptr != '/') {
1172 WARN("pre-parsing failed\n");
1173 return NULL;
1174 }
1175
1176 re_len = ctx->ptr-re;
1177
1178 flags_ptr = ++ctx->ptr;
1179 while(ctx->ptr < ctx->end && isalnumW(*ctx->ptr))
1180 ctx->ptr++;
1181
1182 hres = parse_regexp_flags(flags_ptr, ctx->ptr-flags_ptr, &flags);
1183 if(FAILED(hres))
1184 return NULL;
1185
1186 ret = parser_alloc(ctx, sizeof(literal_t));
1187 ret->type = LT_REGEXP;
1188 ret->u.regexp.str = re;
1189 ret->u.regexp.str_len = re_len;
1190 ret->u.regexp.flags = flags;
1191 return ret;
1192 }