[VBSCRIPT] Sync with Wine Staging 4.18. CORE-16441
[reactos.git] / dll / win32 / vbscript / lex.c
1 /*
2 * Copyright 2011 Jacek Caban for CodeWeavers
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
17 */
18
19 #ifdef __REACTOS__
20 #include <wine/config.h>
21 #include <wine/port.h>
22 #endif
23 #include <assert.h>
24 #include <limits.h>
25 #include <math.h>
26
27 #include "vbscript.h"
28 #include "parse.h"
29 #include "parser.tab.h"
30
31 #include "wine/debug.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(vbscript);
34
35 static const WCHAR andW[] = {'a','n','d',0};
36 static const WCHAR byrefW[] = {'b','y','r','e','f',0};
37 static const WCHAR byvalW[] = {'b','y','v','a','l',0};
38 static const WCHAR callW[] = {'c','a','l','l',0};
39 static const WCHAR caseW[] = {'c','a','s','e',0};
40 static const WCHAR classW[] = {'c','l','a','s','s',0};
41 static const WCHAR constW[] = {'c','o','n','s','t',0};
42 static const WCHAR defaultW[] = {'d','e','f','a','u','l','t',0};
43 static const WCHAR dimW[] = {'d','i','m',0};
44 static const WCHAR doW[] = {'d','o',0};
45 static const WCHAR eachW[] = {'e','a','c','h',0};
46 static const WCHAR elseW[] = {'e','l','s','e',0};
47 static const WCHAR elseifW[] = {'e','l','s','e','i','f',0};
48 static const WCHAR emptyW[] = {'e','m','p','t','y',0};
49 static const WCHAR endW[] = {'e','n','d',0};
50 static const WCHAR eqvW[] = {'e','q','v',0};
51 static const WCHAR errorW[] = {'e','r','r','o','r',0};
52 static const WCHAR exitW[] = {'e','x','i','t',0};
53 static const WCHAR explicitW[] = {'e','x','p','l','i','c','i','t',0};
54 static const WCHAR falseW[] = {'f','a','l','s','e',0};
55 static const WCHAR forW[] = {'f','o','r',0};
56 static const WCHAR functionW[] = {'f','u','n','c','t','i','o','n',0};
57 static const WCHAR getW[] = {'g','e','t',0};
58 static const WCHAR gotoW[] = {'g','o','t','o',0};
59 static const WCHAR ifW[] = {'i','f',0};
60 static const WCHAR impW[] = {'i','m','p',0};
61 static const WCHAR inW[] = {'i','n',0};
62 static const WCHAR isW[] = {'i','s',0};
63 static const WCHAR letW[] = {'l','e','t',0};
64 static const WCHAR loopW[] = {'l','o','o','p',0};
65 static const WCHAR meW[] = {'m','e',0};
66 static const WCHAR modW[] = {'m','o','d',0};
67 static const WCHAR newW[] = {'n','e','w',0};
68 static const WCHAR nextW[] = {'n','e','x','t',0};
69 static const WCHAR notW[] = {'n','o','t',0};
70 static const WCHAR nothingW[] = {'n','o','t','h','i','n','g',0};
71 static const WCHAR nullW[] = {'n','u','l','l',0};
72 static const WCHAR onW[] = {'o','n',0};
73 static const WCHAR optionW[] = {'o','p','t','i','o','n',0};
74 static const WCHAR orW[] = {'o','r',0};
75 static const WCHAR privateW[] = {'p','r','i','v','a','t','e',0};
76 static const WCHAR propertyW[] = {'p','r','o','p','e','r','t','y',0};
77 static const WCHAR publicW[] = {'p','u','b','l','i','c',0};
78 static const WCHAR remW[] = {'r','e','m',0};
79 static const WCHAR resumeW[] = {'r','e','s','u','m','e',0};
80 static const WCHAR selectW[] = {'s','e','l','e','c','t',0};
81 static const WCHAR setW[] = {'s','e','t',0};
82 static const WCHAR stepW[] = {'s','t','e','p',0};
83 static const WCHAR stopW[] = {'s','t','o','p',0};
84 static const WCHAR subW[] = {'s','u','b',0};
85 static const WCHAR thenW[] = {'t','h','e','n',0};
86 static const WCHAR toW[] = {'t','o',0};
87 static const WCHAR trueW[] = {'t','r','u','e',0};
88 static const WCHAR untilW[] = {'u','n','t','i','l',0};
89 static const WCHAR wendW[] = {'w','e','n','d',0};
90 static const WCHAR whileW[] = {'w','h','i','l','e',0};
91 static const WCHAR xorW[] = {'x','o','r',0};
92
93 static const struct {
94 const WCHAR *word;
95 int token;
96 } keywords[] = {
97 {andW, tAND},
98 {byrefW, tBYREF},
99 {byvalW, tBYVAL},
100 {callW, tCALL},
101 {caseW, tCASE},
102 {classW, tCLASS},
103 {constW, tCONST},
104 {defaultW, tDEFAULT},
105 {dimW, tDIM},
106 {doW, tDO},
107 {eachW, tEACH},
108 {elseW, tELSE},
109 {elseifW, tELSEIF},
110 {emptyW, tEMPTY},
111 {endW, tEND},
112 {eqvW, tEQV},
113 {errorW, tERROR},
114 {exitW, tEXIT},
115 {explicitW, tEXPLICIT},
116 {falseW, tFALSE},
117 {forW, tFOR},
118 {functionW, tFUNCTION},
119 {getW, tGET},
120 {gotoW, tGOTO},
121 {ifW, tIF},
122 {impW, tIMP},
123 {inW, tIN},
124 {isW, tIS},
125 {letW, tLET},
126 {loopW, tLOOP},
127 {meW, tME},
128 {modW, tMOD},
129 {newW, tNEW},
130 {nextW, tNEXT},
131 {notW, tNOT},
132 {nothingW, tNOTHING},
133 {nullW, tNULL},
134 {onW, tON},
135 {optionW, tOPTION},
136 {orW, tOR},
137 {privateW, tPRIVATE},
138 {propertyW, tPROPERTY},
139 {publicW, tPUBLIC},
140 {remW, tREM},
141 {resumeW, tRESUME},
142 {selectW, tSELECT},
143 {setW, tSET},
144 {stepW, tSTEP},
145 {stopW, tSTOP},
146 {subW, tSUB},
147 {thenW, tTHEN},
148 {toW, tTO},
149 {trueW, tTRUE},
150 {untilW, tUNTIL},
151 {wendW, tWEND},
152 {whileW, tWHILE},
153 {xorW, tXOR}
154 };
155
156 static inline BOOL is_identifier_char(WCHAR c)
157 {
158 return iswalnum(c) || c == '_';
159 }
160
161 static int check_keyword(parser_ctx_t *ctx, const WCHAR *word, const WCHAR **lval)
162 {
163 const WCHAR *p1 = ctx->ptr;
164 const WCHAR *p2 = word;
165 WCHAR c;
166
167 while(p1 < ctx->end && *p2) {
168 c = towlower(*p1);
169 if(c != *p2)
170 return c - *p2;
171 p1++;
172 p2++;
173 }
174
175 if(*p2 || (p1 < ctx->end && is_identifier_char(*p1)))
176 return 1;
177
178 ctx->ptr = p1;
179 *lval = word;
180 return 0;
181 }
182
183 static int check_keywords(parser_ctx_t *ctx, const WCHAR **lval)
184 {
185 int min = 0, max = ARRAY_SIZE(keywords)-1, r, i;
186
187 while(min <= max) {
188 i = (min+max)/2;
189
190 r = check_keyword(ctx, keywords[i].word, lval);
191 if(!r)
192 return keywords[i].token;
193
194 if(r > 0)
195 min = i+1;
196 else
197 max = i-1;
198 }
199
200 return 0;
201 }
202
203 static int parse_identifier(parser_ctx_t *ctx, const WCHAR **ret)
204 {
205 const WCHAR *ptr = ctx->ptr++;
206 WCHAR *str;
207 int len;
208
209 while(ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr))
210 ctx->ptr++;
211 len = ctx->ptr-ptr;
212
213 str = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
214 if(!str)
215 return 0;
216
217 memcpy(str, ptr, (len+1)*sizeof(WCHAR));
218 str[len] = 0;
219 *ret = str;
220 return tIdentifier;
221 }
222
223 static int parse_string_literal(parser_ctx_t *ctx, const WCHAR **ret)
224 {
225 const WCHAR *ptr = ++ctx->ptr;
226 WCHAR *rptr;
227 int len = 0;
228
229 while(ctx->ptr < ctx->end) {
230 if(*ctx->ptr == '\n' || *ctx->ptr == '\r') {
231 FIXME("newline inside string literal\n");
232 return 0;
233 }
234
235 if(*ctx->ptr == '"') {
236 if(ctx->ptr[1] != '"')
237 break;
238 len--;
239 ctx->ptr++;
240 }
241 ctx->ptr++;
242 }
243
244 if(ctx->ptr == ctx->end) {
245 FIXME("unterminated string literal\n");
246 return 0;
247 }
248
249 len += ctx->ptr-ptr;
250
251 *ret = rptr = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
252 if(!rptr)
253 return 0;
254
255 while(ptr < ctx->ptr) {
256 if(*ptr == '"')
257 ptr++;
258 *rptr++ = *ptr++;
259 }
260
261 *rptr = 0;
262 ctx->ptr++;
263 return tString;
264 }
265
266 static int parse_numeric_literal(parser_ctx_t *ctx, void **ret)
267 {
268 BOOL use_int = TRUE;
269 LONGLONG d = 0, hlp;
270 int exp = 0;
271 double r;
272
273 if(*ctx->ptr == '0' && !('0' <= ctx->ptr[1] && ctx->ptr[1] <= '9') && ctx->ptr[1] != '.')
274 return *ctx->ptr++;
275
276 while(ctx->ptr < ctx->end && iswdigit(*ctx->ptr)) {
277 hlp = d*10 + *(ctx->ptr++) - '0';
278 if(d>MAXLONGLONG/10 || hlp<0) {
279 exp++;
280 break;
281 }
282 else
283 d = hlp;
284 }
285 while(ctx->ptr < ctx->end && iswdigit(*ctx->ptr)) {
286 exp++;
287 ctx->ptr++;
288 }
289
290 if(*ctx->ptr == '.') {
291 use_int = FALSE;
292 ctx->ptr++;
293
294 while(ctx->ptr < ctx->end && iswdigit(*ctx->ptr)) {
295 hlp = d*10 + *(ctx->ptr++) - '0';
296 if(d>MAXLONGLONG/10 || hlp<0)
297 break;
298
299 d = hlp;
300 exp--;
301 }
302 while(ctx->ptr < ctx->end && iswdigit(*ctx->ptr))
303 ctx->ptr++;
304 }
305
306 if(*ctx->ptr == 'e' || *ctx->ptr == 'E') {
307 int e = 0, sign = 1;
308
309 ctx->ptr++;
310 if(*ctx->ptr == '-') {
311 ctx->ptr++;
312 sign = -1;
313 }else if(*ctx->ptr == '+') {
314 ctx->ptr++;
315 }
316
317 if(!iswdigit(*ctx->ptr)) {
318 FIXME("Invalid numeric literal\n");
319 return 0;
320 }
321
322 use_int = FALSE;
323
324 do {
325 e = e*10 + *(ctx->ptr++) - '0';
326 if(sign == -1 && -e+exp < -(INT_MAX/100)) {
327 /* The literal will be rounded to 0 anyway. */
328 while(iswdigit(*ctx->ptr))
329 ctx->ptr++;
330 *(double*)ret = 0;
331 return tDouble;
332 }
333
334 if(sign*e + exp > INT_MAX/100) {
335 FIXME("Invalid numeric literal\n");
336 return 0;
337 }
338 } while(iswdigit(*ctx->ptr));
339
340 exp += sign*e;
341 }
342
343 if(use_int && (LONG)d == d) {
344 *(LONG*)ret = d;
345 return tInt;
346 }
347
348 r = exp>=0 ? d*pow(10, exp) : d/pow(10, -exp);
349 if(isinf(r)) {
350 FIXME("Invalid numeric literal\n");
351 return 0;
352 }
353
354 *(double*)ret = r;
355 return tDouble;
356 }
357
358 static int hex_to_int(WCHAR c)
359 {
360 if('0' <= c && c <= '9')
361 return c-'0';
362 if('a' <= c && c <= 'f')
363 return c+10-'a';
364 if('A' <= c && c <= 'F')
365 return c+10-'A';
366 return -1;
367 }
368
369 static int parse_hex_literal(parser_ctx_t *ctx, LONG *ret)
370 {
371 const WCHAR *begin = ctx->ptr;
372 LONG l = 0, d;
373
374 while((d = hex_to_int(*++ctx->ptr)) != -1)
375 l = l*16 + d;
376
377 if(begin + 9 /* max digits+1 */ < ctx->ptr || (*ctx->ptr != '&' && is_identifier_char(*ctx->ptr))) {
378 FIXME("invalid literal\n");
379 return 0;
380 }
381
382 if(*ctx->ptr == '&')
383 ctx->ptr++;
384
385 *ret = l;
386 return tInt;
387 }
388
389 static void skip_spaces(parser_ctx_t *ctx)
390 {
391 while(*ctx->ptr == ' ' || *ctx->ptr == '\t')
392 ctx->ptr++;
393 }
394
395 static int comment_line(parser_ctx_t *ctx)
396 {
397 static const WCHAR newlineW[] = {'\n','\r',0};
398 ctx->ptr = wcspbrk(ctx->ptr, newlineW);
399 if(ctx->ptr)
400 ctx->ptr++;
401 else
402 ctx->ptr = ctx->end;
403 return tNL;
404 }
405
406 static int parse_next_token(void *lval, parser_ctx_t *ctx)
407 {
408 WCHAR c;
409
410 skip_spaces(ctx);
411 if(ctx->ptr == ctx->end)
412 return ctx->last_token == tNL ? tEOF : tNL;
413
414 c = *ctx->ptr;
415
416 if('0' <= c && c <= '9')
417 return parse_numeric_literal(ctx, lval);
418
419 if(iswalpha(c)) {
420 int ret = check_keywords(ctx, lval);
421 if(!ret)
422 return parse_identifier(ctx, lval);
423 if(ret != tREM)
424 return ret;
425 c = '\'';
426 }
427
428 switch(c) {
429 case '\n':
430 case '\r':
431 ctx->ptr++;
432 return tNL;
433 case '\'':
434 return comment_line(ctx);
435 case ':':
436 case ')':
437 case ',':
438 case '=':
439 case '+':
440 case '*':
441 case '/':
442 case '^':
443 case '\\':
444 case '.':
445 case '_':
446 return *ctx->ptr++;
447 case '-':
448 if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '>')
449 return comment_line(ctx);
450 ctx->ptr++;
451 return '-';
452 case '(':
453 /* NOTE:
454 * We resolve empty brackets in lexer instead of parser to avoid complex conflicts
455 * in call statement special case |f()| without 'call' keyword
456 */
457 ctx->ptr++;
458 skip_spaces(ctx);
459 if(*ctx->ptr == ')') {
460 ctx->ptr++;
461 return tEMPTYBRACKETS;
462 }
463 return '(';
464 case '"':
465 return parse_string_literal(ctx, lval);
466 case '&':
467 if(*++ctx->ptr == 'h' || *ctx->ptr == 'H')
468 return parse_hex_literal(ctx, lval);
469 return '&';
470 case '<':
471 switch(*++ctx->ptr) {
472 case '>':
473 ctx->ptr++;
474 return tNEQ;
475 case '=':
476 ctx->ptr++;
477 return tLTEQ;
478 case '!':
479 if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '-')
480 return comment_line(ctx);
481 }
482 return '<';
483 case '>':
484 if(*++ctx->ptr == '=') {
485 ctx->ptr++;
486 return tGTEQ;
487 }
488 return '>';
489 default:
490 FIXME("Unhandled char %c in %s\n", *ctx->ptr, debugstr_w(ctx->ptr));
491 }
492
493 return 0;
494 }
495
496 int parser_lex(void *lval, parser_ctx_t *ctx)
497 {
498 int ret;
499
500 if (ctx->last_token == tEXPRESSION)
501 {
502 ctx->last_token = tNL;
503 return tEXPRESSION;
504 }
505
506 while(1) {
507 ret = parse_next_token(lval, ctx);
508 if(ret == '_') {
509 skip_spaces(ctx);
510 if(*ctx->ptr != '\n' && *ctx->ptr != '\r') {
511 FIXME("'_' not followed by newline\n");
512 return 0;
513 }
514 if(*ctx->ptr == '\r')
515 ctx->ptr++;
516 if(*ctx->ptr == '\n')
517 ctx->ptr++;
518 continue;
519 }
520 if(ret != tNL || ctx->last_token != tNL)
521 break;
522
523 ctx->last_nl = ctx->ptr-ctx->code;
524 }
525
526 return (ctx->last_token = ret);
527 }