[JSCRIPT] Sync with Wine Staging 1.9.4. CORE-10912
[reactos.git] / reactos / dll / win32 / jscript / lex.c
1 /*
2 * Copyright 2008 Jacek Caban for CodeWeavers
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
17 */
18
19 #include "jscript.h"
20
21 #include "parser.tab.h"
22
23 static const WCHAR breakW[] = {'b','r','e','a','k',0};
24 static const WCHAR caseW[] = {'c','a','s','e',0};
25 static const WCHAR catchW[] = {'c','a','t','c','h',0};
26 static const WCHAR continueW[] = {'c','o','n','t','i','n','u','e',0};
27 static const WCHAR defaultW[] = {'d','e','f','a','u','l','t',0};
28 static const WCHAR deleteW[] = {'d','e','l','e','t','e',0};
29 static const WCHAR doW[] = {'d','o',0};
30 static const WCHAR elseW[] = {'e','l','s','e',0};
31 static const WCHAR falseW[] = {'f','a','l','s','e',0};
32 static const WCHAR finallyW[] = {'f','i','n','a','l','l','y',0};
33 static const WCHAR forW[] = {'f','o','r',0};
34 static const WCHAR functionW[] = {'f','u','n','c','t','i','o','n',0};
35 static const WCHAR ifW[] = {'i','f',0};
36 static const WCHAR inW[] = {'i','n',0};
37 static const WCHAR instanceofW[] = {'i','n','s','t','a','n','c','e','o','f',0};
38 static const WCHAR newW[] = {'n','e','w',0};
39 static const WCHAR nullW[] = {'n','u','l','l',0};
40 static const WCHAR returnW[] = {'r','e','t','u','r','n',0};
41 static const WCHAR switchW[] = {'s','w','i','t','c','h',0};
42 static const WCHAR thisW[] = {'t','h','i','s',0};
43 static const WCHAR throwW[] = {'t','h','r','o','w',0};
44 static const WCHAR trueW[] = {'t','r','u','e',0};
45 static const WCHAR tryW[] = {'t','r','y',0};
46 static const WCHAR typeofW[] = {'t','y','p','e','o','f',0};
47 static const WCHAR varW[] = {'v','a','r',0};
48 static const WCHAR voidW[] = {'v','o','i','d',0};
49 static const WCHAR whileW[] = {'w','h','i','l','e',0};
50 static const WCHAR withW[] = {'w','i','t','h',0};
51
52 static const WCHAR elifW[] = {'e','l','i','f',0};
53 static const WCHAR endW[] = {'e','n','d',0};
54
55 static const struct {
56 const WCHAR *word;
57 int token;
58 BOOL no_nl;
59 } keywords[] = {
60 {breakW, kBREAK, TRUE},
61 {caseW, kCASE},
62 {catchW, kCATCH},
63 {continueW, kCONTINUE, TRUE},
64 {defaultW, kDEFAULT},
65 {deleteW, kDELETE},
66 {doW, kDO},
67 {elseW, kELSE},
68 {falseW, kFALSE},
69 {finallyW, kFINALLY},
70 {forW, kFOR},
71 {functionW, kFUNCTION},
72 {ifW, kIF},
73 {inW, kIN},
74 {instanceofW, kINSTANCEOF},
75 {newW, kNEW},
76 {nullW, kNULL},
77 {returnW, kRETURN, TRUE},
78 {switchW, kSWITCH},
79 {thisW, kTHIS},
80 {throwW, kTHROW},
81 {trueW, kTRUE},
82 {tryW, kTRY},
83 {typeofW, kTYPEOF},
84 {varW, kVAR},
85 {voidW, kVOID},
86 {whileW, kWHILE},
87 {withW, kWITH}
88 };
89
90 static int lex_error(parser_ctx_t *ctx, HRESULT hres)
91 {
92 ctx->hres = hres;
93 ctx->lexer_error = TRUE;
94 return -1;
95 }
96
97 /* ECMA-262 3rd Edition 7.6 */
98 BOOL is_identifier_char(WCHAR c)
99 {
100 return isalnumW(c) || c == '$' || c == '_' || c == '\\';
101 }
102
103 static BOOL is_identifier_first_char(WCHAR c)
104 {
105 return isalphaW(c) || c == '$' || c == '_' || c == '\\';
106 }
107
108 static int check_keyword(parser_ctx_t *ctx, const WCHAR *word, const WCHAR **lval)
109 {
110 const WCHAR *p1 = ctx->ptr;
111 const WCHAR *p2 = word;
112
113 while(p1 < ctx->end && *p2) {
114 if(*p1 != *p2)
115 return *p1 - *p2;
116 p1++;
117 p2++;
118 }
119
120 if(*p2 || (p1 < ctx->end && is_identifier_char(*p1)))
121 return 1;
122
123 if(lval)
124 *lval = ctx->ptr;
125 ctx->ptr = p1;
126 return 0;
127 }
128
129 /* ECMA-262 3rd Edition 7.3 */
130 static BOOL is_endline(WCHAR c)
131 {
132 return c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029;
133 }
134
135 static int hex_to_int(WCHAR c)
136 {
137 if('0' <= c && c <= '9')
138 return c-'0';
139
140 if('a' <= c && c <= 'f')
141 return c-'a'+10;
142
143 if('A' <= c && c <= 'F')
144 return c-'A'+10;
145
146 return -1;
147 }
148
149 static int check_keywords(parser_ctx_t *ctx, const WCHAR **lval)
150 {
151 int min = 0, max = sizeof(keywords)/sizeof(keywords[0])-1, r, i;
152
153 while(min <= max) {
154 i = (min+max)/2;
155
156 r = check_keyword(ctx, keywords[i].word, lval);
157 if(!r) {
158 ctx->implicit_nl_semicolon = keywords[i].no_nl;
159 return keywords[i].token;
160 }
161
162 if(r > 0)
163 min = i+1;
164 else
165 max = i-1;
166 }
167
168 return 0;
169 }
170
171 static BOOL skip_html_comment(parser_ctx_t *ctx)
172 {
173 const WCHAR html_commentW[] = {'<','!','-','-',0};
174
175 if(!ctx->is_html || ctx->ptr+3 >= ctx->end ||
176 memcmp(ctx->ptr, html_commentW, sizeof(WCHAR)*4))
177 return FALSE;
178
179 ctx->nl = TRUE;
180 while(ctx->ptr < ctx->end && !is_endline(*ctx->ptr++));
181
182 return TRUE;
183 }
184
185 static BOOL skip_comment(parser_ctx_t *ctx)
186 {
187 if(ctx->ptr+1 >= ctx->end)
188 return FALSE;
189
190 if(*ctx->ptr != '/') {
191 if(*ctx->ptr == '@' && ctx->ptr+2 < ctx->end && ctx->ptr[1] == '*' && ctx->ptr[2] == '/') {
192 ctx->ptr += 3;
193 return TRUE;
194 }
195
196 return FALSE;
197 }
198
199 switch(ctx->ptr[1]) {
200 case '*':
201 ctx->ptr += 2;
202 if(ctx->ptr+2 < ctx->end && *ctx->ptr == '@' && is_identifier_char(ctx->ptr[1]))
203 return FALSE;
204 while(ctx->ptr+1 < ctx->end && (ctx->ptr[0] != '*' || ctx->ptr[1] != '/'))
205 ctx->ptr++;
206
207 if(ctx->ptr[0] == '*' && ctx->ptr[1] == '/') {
208 ctx->ptr += 2;
209 }else {
210 WARN("unexpected end of file (missing end of comment)\n");
211 ctx->ptr = ctx->end;
212 }
213 break;
214 case '/':
215 ctx->ptr += 2;
216 if(ctx->ptr+2 < ctx->end && *ctx->ptr == '@' && is_identifier_char(ctx->ptr[1]))
217 return FALSE;
218 while(ctx->ptr < ctx->end && !is_endline(*ctx->ptr))
219 ctx->ptr++;
220 break;
221 default:
222 return FALSE;
223 }
224
225 return TRUE;
226 }
227
228 static BOOL skip_spaces(parser_ctx_t *ctx)
229 {
230 while(ctx->ptr < ctx->end && (isspaceW(*ctx->ptr) || *ctx->ptr == 0xFEFF /* UTF16 BOM */)) {
231 if(is_endline(*ctx->ptr++))
232 ctx->nl = TRUE;
233 }
234
235 return ctx->ptr != ctx->end;
236 }
237
238 BOOL unescape(WCHAR *str)
239 {
240 WCHAR *pd, *p, c;
241 int i;
242
243 pd = p = str;
244 while(*p) {
245 if(*p != '\\') {
246 *pd++ = *p++;
247 continue;
248 }
249
250 p++;
251
252 switch(*p) {
253 case '\'':
254 case '\"':
255 case '\\':
256 c = *p;
257 break;
258 case 'b':
259 c = '\b';
260 break;
261 case 't':
262 c = '\t';
263 break;
264 case 'n':
265 c = '\n';
266 break;
267 case 'f':
268 c = '\f';
269 break;
270 case 'r':
271 c = '\r';
272 break;
273 case 'x':
274 i = hex_to_int(*++p);
275 if(i == -1)
276 return FALSE;
277 c = i << 4;
278
279 i = hex_to_int(*++p);
280 if(i == -1)
281 return FALSE;
282 c += i;
283 break;
284 case 'u':
285 i = hex_to_int(*++p);
286 if(i == -1)
287 return FALSE;
288 c = i << 12;
289
290 i = hex_to_int(*++p);
291 if(i == -1)
292 return FALSE;
293 c += i << 8;
294
295 i = hex_to_int(*++p);
296 if(i == -1)
297 return FALSE;
298 c += i << 4;
299
300 i = hex_to_int(*++p);
301 if(i == -1)
302 return FALSE;
303 c += i;
304 break;
305 default:
306 if(isdigitW(*p)) {
307 c = *p++ - '0';
308 if(isdigitW(*p)) {
309 c = c*8 + (*p++ - '0');
310 if(isdigitW(*p))
311 c = c*8 + (*p++ - '0');
312 }
313 p--;
314 }
315 else
316 c = *p;
317 }
318
319 *pd++ = c;
320 p++;
321 }
322
323 *pd = 0;
324 return TRUE;
325 }
326
327 static int parse_identifier(parser_ctx_t *ctx, const WCHAR **ret)
328 {
329 const WCHAR *ptr = ctx->ptr++;
330 WCHAR *wstr;
331 int len;
332
333 while(ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr))
334 ctx->ptr++;
335
336 len = ctx->ptr-ptr;
337
338 *ret = wstr = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
339 memcpy(wstr, ptr, len*sizeof(WCHAR));
340 wstr[len] = 0;
341
342 /* FIXME: unescape */
343 return tIdentifier;
344 }
345
346 static int parse_string_literal(parser_ctx_t *ctx, const WCHAR **ret, WCHAR endch)
347 {
348 const WCHAR *ptr = ++ctx->ptr;
349 WCHAR *wstr;
350 int len;
351
352 while(ctx->ptr < ctx->end && *ctx->ptr != endch) {
353 if(*ctx->ptr++ == '\\')
354 ctx->ptr++;
355 }
356
357 if(ctx->ptr == ctx->end)
358 return lex_error(ctx, JS_E_UNTERMINATED_STRING);
359
360 len = ctx->ptr-ptr;
361
362 *ret = wstr = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
363 memcpy(wstr, ptr, len*sizeof(WCHAR));
364 wstr[len] = 0;
365
366 ctx->ptr++;
367
368 if(!unescape(wstr)) {
369 WARN("unescape failed\n");
370 return lex_error(ctx, E_FAIL);
371 }
372
373 return tStringLiteral;
374 }
375
376 static literal_t *new_double_literal(parser_ctx_t *ctx, DOUBLE d)
377 {
378 literal_t *ret = parser_alloc(ctx, sizeof(literal_t));
379
380 ret->type = LT_DOUBLE;
381 ret->u.dval = d;
382 return ret;
383 }
384
385 literal_t *new_boolean_literal(parser_ctx_t *ctx, BOOL bval)
386 {
387 literal_t *ret = parser_alloc(ctx, sizeof(literal_t));
388
389 ret->type = LT_BOOL;
390 ret->u.bval = bval;
391
392 return ret;
393 }
394
395 HRESULT parse_decimal(const WCHAR **iter, const WCHAR *end, double *ret)
396 {
397 const WCHAR *ptr = *iter;
398 LONGLONG d = 0, hlp;
399 int exp = 0;
400
401 while(ptr < end && isdigitW(*ptr)) {
402 hlp = d*10 + *(ptr++) - '0';
403 if(d>MAXLONGLONG/10 || hlp<0) {
404 exp++;
405 break;
406 }
407 else
408 d = hlp;
409 }
410 while(ptr < end && isdigitW(*ptr)) {
411 exp++;
412 ptr++;
413 }
414
415 if(*ptr == '.') {
416 ptr++;
417
418 while(ptr < end && isdigitW(*ptr)) {
419 hlp = d*10 + *(ptr++) - '0';
420 if(d>MAXLONGLONG/10 || hlp<0)
421 break;
422
423 d = hlp;
424 exp--;
425 }
426 while(ptr < end && isdigitW(*ptr))
427 ptr++;
428 }
429
430 if(ptr < end && (*ptr == 'e' || *ptr == 'E')) {
431 int sign = 1, e = 0;
432
433 if(++ptr < end) {
434 if(*ptr == '+') {
435 ptr++;
436 }else if(*ptr == '-') {
437 sign = -1;
438 ptr++;
439 }else if(!isdigitW(*ptr)) {
440 WARN("Expected exponent part\n");
441 return E_FAIL;
442 }
443 }
444
445 if(ptr == end) {
446 WARN("unexpected end of file\n");
447 return E_FAIL;
448 }
449
450 while(ptr < end && isdigitW(*ptr)) {
451 if(e > INT_MAX/10 || (e = e*10 + *ptr++ - '0')<0)
452 e = INT_MAX;
453 }
454 e *= sign;
455
456 if(exp<0 && e<0 && e+exp>0) exp = INT_MIN;
457 else if(exp>0 && e>0 && e+exp<0) exp = INT_MAX;
458 else exp += e;
459 }
460
461 if(is_identifier_char(*ptr)) {
462 WARN("wrong char after zero\n");
463 return JS_E_MISSING_SEMICOLON;
464 }
465
466 *ret = exp>=0 ? d*pow(10, exp) : d/pow(10, -exp);
467 *iter = ptr;
468 return S_OK;
469 }
470
471 static BOOL parse_numeric_literal(parser_ctx_t *ctx, double *ret)
472 {
473 HRESULT hres;
474
475 if(*ctx->ptr == '0') {
476 LONG d, l = 0;
477
478 ctx->ptr++;
479
480 if(*ctx->ptr == 'x' || *ctx->ptr == 'X') {
481 if(++ctx->ptr == ctx->end) {
482 ERR("unexpected end of file\n");
483 return FALSE;
484 }
485
486 while(ctx->ptr < ctx->end && (d = hex_to_int(*ctx->ptr)) != -1) {
487 l = l*16 + d;
488 ctx->ptr++;
489 }
490
491 if(ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr)) {
492 WARN("unexpected identifier char\n");
493 lex_error(ctx, JS_E_MISSING_SEMICOLON);
494 return FALSE;
495 }
496
497 *ret = l;
498 return TRUE;
499 }
500
501 if(isdigitW(*ctx->ptr)) {
502 unsigned base = 8;
503 const WCHAR *ptr;
504 double val = 0;
505
506 for(ptr = ctx->ptr; ptr < ctx->end && isdigitW(*ptr); ptr++) {
507 if(*ptr > '7') {
508 base = 10;
509 break;
510 }
511 }
512
513 do {
514 val = val*base + *ctx->ptr-'0';
515 }while(++ctx->ptr < ctx->end && isdigitW(*ctx->ptr));
516
517 /* FIXME: Do we need it here? */
518 if(ctx->ptr < ctx->end && (is_identifier_char(*ctx->ptr) || *ctx->ptr == '.')) {
519 WARN("wrong char after octal literal: '%c'\n", *ctx->ptr);
520 lex_error(ctx, JS_E_MISSING_SEMICOLON);
521 return FALSE;
522 }
523
524 *ret = val;
525 return TRUE;
526 }
527
528 if(is_identifier_char(*ctx->ptr)) {
529 WARN("wrong char after zero\n");
530 lex_error(ctx, JS_E_MISSING_SEMICOLON);
531 return FALSE;
532 }
533 }
534
535 hres = parse_decimal(&ctx->ptr, ctx->end, ret);
536 if(FAILED(hres)) {
537 lex_error(ctx, hres);
538 return FALSE;
539 }
540
541 return TRUE;
542 }
543
544 static int next_token(parser_ctx_t *ctx, void *lval)
545 {
546 do {
547 if(!skip_spaces(ctx))
548 return tEOF;
549 }while(skip_comment(ctx) || skip_html_comment(ctx));
550
551 if(ctx->implicit_nl_semicolon) {
552 if(ctx->nl)
553 return ';';
554 ctx->implicit_nl_semicolon = FALSE;
555 }
556
557 if(isalphaW(*ctx->ptr)) {
558 int ret = check_keywords(ctx, lval);
559 if(ret)
560 return ret;
561
562 return parse_identifier(ctx, lval);
563 }
564
565 if(isdigitW(*ctx->ptr)) {
566 double n;
567
568 if(!parse_numeric_literal(ctx, &n))
569 return -1;
570
571 *(literal_t**)lval = new_double_literal(ctx, n);
572 return tNumericLiteral;
573 }
574
575 switch(*ctx->ptr) {
576 case '{':
577 case '(':
578 case ')':
579 case '[':
580 case ']':
581 case ';':
582 case ',':
583 case '~':
584 case '?':
585 return *ctx->ptr++;
586
587 case '}':
588 *(const WCHAR**)lval = ctx->ptr++;
589 return '}';
590
591 case '.':
592 if(++ctx->ptr < ctx->end && isdigitW(*ctx->ptr)) {
593 double n;
594 HRESULT hres;
595 hres = parse_decimal(&ctx->ptr, ctx->end, &n);
596 if(FAILED(hres)) {
597 lex_error(ctx, hres);
598 return -1;
599 }
600 *(literal_t**)lval = new_double_literal(ctx, n);
601 return tNumericLiteral;
602 }
603 return '.';
604
605 case '<':
606 if(++ctx->ptr == ctx->end) {
607 *(int*)lval = EXPR_LESS;
608 return tRelOper;
609 }
610
611 switch(*ctx->ptr) {
612 case '=': /* <= */
613 ctx->ptr++;
614 *(int*)lval = EXPR_LESSEQ;
615 return tRelOper;
616 case '<': /* << */
617 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* <<= */
618 ctx->ptr++;
619 *(int*)lval = EXPR_ASSIGNLSHIFT;
620 return tAssignOper;
621 }
622 *(int*)lval = EXPR_LSHIFT;
623 return tShiftOper;
624 default: /* < */
625 *(int*)lval = EXPR_LESS;
626 return tRelOper;
627 }
628
629 case '>':
630 if(++ctx->ptr == ctx->end) { /* > */
631 *(int*)lval = EXPR_GREATER;
632 return tRelOper;
633 }
634
635 switch(*ctx->ptr) {
636 case '=': /* >= */
637 ctx->ptr++;
638 *(int*)lval = EXPR_GREATEREQ;
639 return tRelOper;
640 case '>': /* >> */
641 if(++ctx->ptr < ctx->end) {
642 if(*ctx->ptr == '=') { /* >>= */
643 ctx->ptr++;
644 *(int*)lval = EXPR_ASSIGNRSHIFT;
645 return tAssignOper;
646 }
647 if(*ctx->ptr == '>') { /* >>> */
648 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* >>>= */
649 ctx->ptr++;
650 *(int*)lval = EXPR_ASSIGNRRSHIFT;
651 return tAssignOper;
652 }
653 *(int*)lval = EXPR_RRSHIFT;
654 return tRelOper;
655 }
656 }
657 *(int*)lval = EXPR_RSHIFT;
658 return tShiftOper;
659 default:
660 *(int*)lval = EXPR_GREATER;
661 return tRelOper;
662 }
663
664 case '+':
665 ctx->ptr++;
666 if(ctx->ptr < ctx->end) {
667 switch(*ctx->ptr) {
668 case '+': /* ++ */
669 ctx->ptr++;
670 return tINC;
671 case '=': /* += */
672 ctx->ptr++;
673 *(int*)lval = EXPR_ASSIGNADD;
674 return tAssignOper;
675 }
676 }
677 return '+';
678
679 case '-':
680 ctx->ptr++;
681 if(ctx->ptr < ctx->end) {
682 switch(*ctx->ptr) {
683 case '-': /* -- or --> */
684 ctx->ptr++;
685 if(ctx->is_html && ctx->nl && ctx->ptr < ctx->end && *ctx->ptr == '>') {
686 ctx->ptr++;
687 return tHTMLCOMMENT;
688 }
689 return tDEC;
690 case '=': /* -= */
691 ctx->ptr++;
692 *(int*)lval = EXPR_ASSIGNSUB;
693 return tAssignOper;
694 }
695 }
696 return '-';
697
698 case '*':
699 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* *= */
700 ctx->ptr++;
701 *(int*)lval = EXPR_ASSIGNMUL;
702 return tAssignOper;
703 }
704 return '*';
705
706 case '%':
707 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* %= */
708 ctx->ptr++;
709 *(int*)lval = EXPR_ASSIGNMOD;
710 return tAssignOper;
711 }
712 return '%';
713
714 case '&':
715 if(++ctx->ptr < ctx->end) {
716 switch(*ctx->ptr) {
717 case '=': /* &= */
718 ctx->ptr++;
719 *(int*)lval = EXPR_ASSIGNAND;
720 return tAssignOper;
721 case '&': /* && */
722 ctx->ptr++;
723 return tANDAND;
724 }
725 }
726 return '&';
727
728 case '|':
729 if(++ctx->ptr < ctx->end) {
730 switch(*ctx->ptr) {
731 case '=': /* |= */
732 ctx->ptr++;
733 *(int*)lval = EXPR_ASSIGNOR;
734 return tAssignOper;
735 case '|': /* || */
736 ctx->ptr++;
737 return tOROR;
738 }
739 }
740 return '|';
741
742 case '^':
743 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* ^= */
744 ctx->ptr++;
745 *(int*)lval = EXPR_ASSIGNXOR;
746 return tAssignOper;
747 }
748 return '^';
749
750 case '!':
751 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* != */
752 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* !== */
753 ctx->ptr++;
754 *(int*)lval = EXPR_NOTEQEQ;
755 return tEqOper;
756 }
757 *(int*)lval = EXPR_NOTEQ;
758 return tEqOper;
759 }
760 return '!';
761
762 case '=':
763 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* == */
764 if(++ctx->ptr < ctx->end && *ctx->ptr == '=') { /* === */
765 ctx->ptr++;
766 *(int*)lval = EXPR_EQEQ;
767 return tEqOper;
768 }
769 *(int*)lval = EXPR_EQ;
770 return tEqOper;
771 }
772 return '=';
773
774 case '/':
775 if(++ctx->ptr < ctx->end) {
776 if(*ctx->ptr == '=') { /* /= */
777 ctx->ptr++;
778 *(int*)lval = EXPR_ASSIGNDIV;
779 return kDIVEQ;
780 }
781 }
782 return '/';
783
784 case ':':
785 if(++ctx->ptr < ctx->end && *ctx->ptr == ':') {
786 ctx->ptr++;
787 return kDCOL;
788 }
789 return ':';
790
791 case '\"':
792 case '\'':
793 return parse_string_literal(ctx, lval, *ctx->ptr);
794
795 case '_':
796 case '$':
797 return parse_identifier(ctx, lval);
798
799 case '@':
800 return '@';
801 }
802
803 WARN("unexpected char '%c' %d\n", *ctx->ptr, *ctx->ptr);
804 return 0;
805 }
806
807 struct _cc_var_t {
808 ccval_t val;
809 struct _cc_var_t *next;
810 unsigned name_len;
811 WCHAR name[0];
812 };
813
814 void release_cc(cc_ctx_t *cc)
815 {
816 cc_var_t *iter, *next;
817
818 for(iter = cc->vars; iter; iter = next) {
819 next = iter->next;
820 heap_free(iter);
821 }
822
823 heap_free(cc);
824 }
825
826 static BOOL new_cc_var(cc_ctx_t *cc, const WCHAR *name, int len, ccval_t v)
827 {
828 cc_var_t *new_v;
829
830 if(len == -1)
831 len = strlenW(name);
832
833 new_v = heap_alloc(sizeof(cc_var_t) + (len+1)*sizeof(WCHAR));
834 if(!new_v)
835 return FALSE;
836
837 new_v->val = v;
838 memcpy(new_v->name, name, (len+1)*sizeof(WCHAR));
839 new_v->name_len = len;
840 new_v->next = cc->vars;
841 cc->vars = new_v;
842 return TRUE;
843 }
844
845 static cc_var_t *find_cc_var(cc_ctx_t *cc, const WCHAR *name, unsigned name_len)
846 {
847 cc_var_t *iter;
848
849 for(iter = cc->vars; iter; iter = iter->next) {
850 if(iter->name_len == name_len && !memcmp(iter->name, name, name_len*sizeof(WCHAR)))
851 return iter;
852 }
853
854 return NULL;
855 }
856
857 static BOOL init_cc(parser_ctx_t *ctx)
858 {
859 cc_ctx_t *cc;
860
861 static const WCHAR _win32W[] = {'_','w','i','n','3','2',0};
862 static const WCHAR _win64W[] = {'_','w','i','n','6','4',0};
863 static const WCHAR _x86W[] = {'_','x','8','6',0};
864 static const WCHAR _amd64W[] = {'_','a','m','d','6','4',0};
865 static const WCHAR _jscriptW[] = {'_','j','s','c','r','i','p','t',0};
866 static const WCHAR _jscript_buildW[] = {'_','j','s','c','r','i','p','t','_','b','u','i','l','d',0};
867 static const WCHAR _jscript_versionW[] = {'_','j','s','c','r','i','p','t','_','v','e','r','s','i','o','n',0};
868
869 if(ctx->script->cc)
870 return TRUE;
871
872 cc = heap_alloc(sizeof(cc_ctx_t));
873 if(!cc) {
874 lex_error(ctx, E_OUTOFMEMORY);
875 return FALSE;
876 }
877
878 cc->vars = NULL;
879
880 if(!new_cc_var(cc, _jscriptW, -1, ccval_bool(TRUE))
881 || !new_cc_var(cc, sizeof(void*) == 8 ? _win64W : _win32W, -1, ccval_bool(TRUE))
882 || !new_cc_var(cc, sizeof(void*) == 8 ? _amd64W : _x86W, -1, ccval_bool(TRUE))
883 || !new_cc_var(cc, _jscript_versionW, -1, ccval_num(JSCRIPT_MAJOR_VERSION + (DOUBLE)JSCRIPT_MINOR_VERSION/10.0))
884 || !new_cc_var(cc, _jscript_buildW, -1, ccval_num(JSCRIPT_BUILD_VERSION))) {
885 release_cc(cc);
886 lex_error(ctx, E_OUTOFMEMORY);
887 return FALSE;
888 }
889
890 ctx->script->cc = cc;
891 return TRUE;
892 }
893
894 static BOOL parse_cc_identifier(parser_ctx_t *ctx, const WCHAR **ret, unsigned *ret_len)
895 {
896 if(*ctx->ptr != '@') {
897 lex_error(ctx, JS_E_EXPECTED_AT);
898 return FALSE;
899 }
900
901 if(!is_identifier_first_char(*++ctx->ptr)) {
902 lex_error(ctx, JS_E_EXPECTED_IDENTIFIER);
903 return FALSE;
904 }
905
906 *ret = ctx->ptr;
907 while(++ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr));
908 *ret_len = ctx->ptr - *ret;
909 return TRUE;
910 }
911
912 int try_parse_ccval(parser_ctx_t *ctx, ccval_t *r)
913 {
914 if(!skip_spaces(ctx))
915 return -1;
916
917 if(isdigitW(*ctx->ptr)) {
918 double n;
919
920 if(!parse_numeric_literal(ctx, &n))
921 return -1;
922
923 *r = ccval_num(n);
924 return 1;
925 }
926
927 if(*ctx->ptr == '@') {
928 const WCHAR *ident;
929 unsigned ident_len;
930 cc_var_t *cc_var;
931
932 if(!parse_cc_identifier(ctx, &ident, &ident_len))
933 return -1;
934
935 cc_var = find_cc_var(ctx->script->cc, ident, ident_len);
936 *r = cc_var ? cc_var->val : ccval_num(NAN);
937 return 1;
938 }
939
940 if(!check_keyword(ctx, trueW, NULL)) {
941 *r = ccval_bool(TRUE);
942 return 1;
943 }
944
945 if(!check_keyword(ctx, falseW, NULL)) {
946 *r = ccval_bool(FALSE);
947 return 1;
948 }
949
950 return 0;
951 }
952
953 static int skip_code(parser_ctx_t *ctx, BOOL exec_else)
954 {
955 int if_depth = 1;
956 const WCHAR *ptr;
957
958 while(1) {
959 ptr = strchrW(ctx->ptr, '@');
960 if(!ptr) {
961 WARN("No @end\n");
962 return lex_error(ctx, JS_E_EXPECTED_CCEND);
963 }
964 ctx->ptr = ptr+1;
965
966 if(!check_keyword(ctx, endW, NULL)) {
967 if(--if_depth)
968 continue;
969 return 0;
970 }
971
972 if(exec_else && !check_keyword(ctx, elifW, NULL)) {
973 if(if_depth > 1)
974 continue;
975
976 if(!skip_spaces(ctx) || *ctx->ptr != '(')
977 return lex_error(ctx, JS_E_MISSING_LBRACKET);
978
979 if(!parse_cc_expr(ctx))
980 return -1;
981
982 if(!get_ccbool(ctx->ccval))
983 continue; /* skip block of code */
984
985 /* continue parsing */
986 ctx->cc_if_depth++;
987 return 0;
988 }
989
990 if(exec_else && !check_keyword(ctx, elseW, NULL)) {
991 if(if_depth > 1)
992 continue;
993
994 /* parse else block */
995 ctx->cc_if_depth++;
996 return 0;
997 }
998
999 if(!check_keyword(ctx, ifW, NULL)) {
1000 if_depth++;
1001 continue;
1002 }
1003
1004 ctx->ptr++;
1005 }
1006 }
1007
1008 static int cc_token(parser_ctx_t *ctx, void *lval)
1009 {
1010 unsigned id_len = 0;
1011 cc_var_t *var;
1012
1013 static const WCHAR cc_onW[] = {'c','c','_','o','n',0};
1014 static const WCHAR setW[] = {'s','e','t',0};
1015
1016 ctx->ptr++;
1017
1018 if(!check_keyword(ctx, cc_onW, NULL))
1019 return init_cc(ctx) ? 0 : -1;
1020
1021 if(!check_keyword(ctx, setW, NULL)) {
1022 const WCHAR *ident;
1023 unsigned ident_len;
1024 cc_var_t *var;
1025
1026 if(!init_cc(ctx))
1027 return -1;
1028
1029 if(!skip_spaces(ctx))
1030 return lex_error(ctx, JS_E_EXPECTED_AT);
1031
1032 if(!parse_cc_identifier(ctx, &ident, &ident_len))
1033 return -1;
1034
1035 if(!skip_spaces(ctx) || *ctx->ptr != '=')
1036 return lex_error(ctx, JS_E_EXPECTED_ASSIGN);
1037 ctx->ptr++;
1038
1039 if(!parse_cc_expr(ctx)) {
1040 WARN("parsing CC expression failed\n");
1041 return -1;
1042 }
1043
1044 var = find_cc_var(ctx->script->cc, ident, ident_len);
1045 if(var) {
1046 var->val = ctx->ccval;
1047 }else {
1048 if(!new_cc_var(ctx->script->cc, ident, ident_len, ctx->ccval))
1049 return lex_error(ctx, E_OUTOFMEMORY);
1050 }
1051
1052 return 0;
1053 }
1054
1055 if(!check_keyword(ctx, ifW, NULL)) {
1056 if(!init_cc(ctx))
1057 return -1;
1058
1059 if(!skip_spaces(ctx) || *ctx->ptr != '(')
1060 return lex_error(ctx, JS_E_MISSING_LBRACKET);
1061
1062 if(!parse_cc_expr(ctx))
1063 return -1;
1064
1065 if(get_ccbool(ctx->ccval)) {
1066 /* continue parsing block inside if */
1067 ctx->cc_if_depth++;
1068 return 0;
1069 }
1070
1071 return skip_code(ctx, TRUE);
1072 }
1073
1074 if(!check_keyword(ctx, elifW, NULL) || !check_keyword(ctx, elseW, NULL)) {
1075 if(!ctx->cc_if_depth)
1076 return lex_error(ctx, JS_E_SYNTAX);
1077
1078 return skip_code(ctx, FALSE);
1079 }
1080
1081 if(!check_keyword(ctx, endW, NULL)) {
1082 if(!ctx->cc_if_depth)
1083 return lex_error(ctx, JS_E_SYNTAX);
1084
1085 ctx->cc_if_depth--;
1086 return 0;
1087 }
1088
1089 if(!ctx->script->cc)
1090 return lex_error(ctx, JS_E_DISABLED_CC);
1091
1092 while(ctx->ptr+id_len < ctx->end && is_identifier_char(ctx->ptr[id_len]))
1093 id_len++;
1094 if(!id_len)
1095 return '@';
1096
1097 TRACE("var %s\n", debugstr_wn(ctx->ptr, id_len));
1098
1099 var = find_cc_var(ctx->script->cc, ctx->ptr, id_len);
1100 ctx->ptr += id_len;
1101 if(!var || var->val.is_num) {
1102 *(literal_t**)lval = new_double_literal(ctx, var ? var->val.u.n : NAN);
1103 return tNumericLiteral;
1104 }
1105
1106 *(literal_t**)lval = new_boolean_literal(ctx, var->val.u.b);
1107 return tBooleanLiteral;
1108 }
1109
1110 int parser_lex(void *lval, parser_ctx_t *ctx)
1111 {
1112 int ret;
1113
1114 ctx->nl = ctx->ptr == ctx->begin;
1115
1116 do {
1117 ret = next_token(ctx, lval);
1118 } while(ret == '@' && !(ret = cc_token(ctx, lval)));
1119
1120 return ret;
1121 }
1122
1123 literal_t *parse_regexp(parser_ctx_t *ctx)
1124 {
1125 const WCHAR *re, *flags_ptr;
1126 BOOL in_class = FALSE;
1127 DWORD re_len, flags;
1128 literal_t *ret;
1129 HRESULT hres;
1130
1131 TRACE("\n");
1132
1133 while(*--ctx->ptr != '/');
1134
1135 /* Simple regexp pre-parser; '/' if used in char class does not terminate regexp literal */
1136 re = ++ctx->ptr;
1137 while(ctx->ptr < ctx->end) {
1138 if(*ctx->ptr == '\\') {
1139 if(++ctx->ptr == ctx->end)
1140 break;
1141 }else if(in_class) {
1142 if(*ctx->ptr == '\n')
1143 break;
1144 if(*ctx->ptr == ']')
1145 in_class = FALSE;
1146 }else {
1147 if(*ctx->ptr == '/')
1148 break;
1149
1150 if(*ctx->ptr == '[')
1151 in_class = TRUE;
1152 }
1153 ctx->ptr++;
1154 }
1155
1156 if(ctx->ptr == ctx->end || *ctx->ptr != '/') {
1157 WARN("pre-parsing failed\n");
1158 return NULL;
1159 }
1160
1161 re_len = ctx->ptr-re;
1162
1163 flags_ptr = ++ctx->ptr;
1164 while(ctx->ptr < ctx->end && isalnumW(*ctx->ptr))
1165 ctx->ptr++;
1166
1167 hres = parse_regexp_flags(flags_ptr, ctx->ptr-flags_ptr, &flags);
1168 if(FAILED(hres))
1169 return NULL;
1170
1171 ret = parser_alloc(ctx, sizeof(literal_t));
1172 ret->type = LT_REGEXP;
1173 ret->u.regexp.str = re;
1174 ret->u.regexp.str_len = re_len;
1175 ret->u.regexp.flags = flags;
1176 return ret;
1177 }