dll/win32/vbscript/lex.c

   1 /*
   2  * Copyright 2011 Jacek Caban for CodeWeavers
   3  *
   4  * This library is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2.1 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with this library; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  17  */
  18
  19 #ifdef __REACTOS__
  20 #include <wine/config.h>
  21 #include <wine/port.h>
  22 #endif
  23 #include <assert.h>
  24 #include <limits.h>
  25 #include <math.h>
  26
  27 #include "vbscript.h"
  28 #include "parse.h"
  29 #include "parser.tab.h"
  30
  31 #include "wine/debug.h"
  32
  33 WINE_DEFAULT_DEBUG_CHANNEL(vbscript);
  34
  35 static const WCHAR andW[] = {'a','n','d',0};
  36 static const WCHAR byrefW[] = {'b','y','r','e','f',0};
  37 static const WCHAR byvalW[] = {'b','y','v','a','l',0};
  38 static const WCHAR callW[] = {'c','a','l','l',0};
  39 static const WCHAR caseW[] = {'c','a','s','e',0};
  40 static const WCHAR classW[] = {'c','l','a','s','s',0};
  41 static const WCHAR constW[] = {'c','o','n','s','t',0};
  42 static const WCHAR defaultW[] = {'d','e','f','a','u','l','t',0};
  43 static const WCHAR dimW[] = {'d','i','m',0};
  44 static const WCHAR doW[] = {'d','o',0};
  45 static const WCHAR eachW[] = {'e','a','c','h',0};
  46 static const WCHAR elseW[] = {'e','l','s','e',0};
  47 static const WCHAR elseifW[] = {'e','l','s','e','i','f',0};
  48 static const WCHAR emptyW[] = {'e','m','p','t','y',0};
  49 static const WCHAR endW[] = {'e','n','d',0};
  50 static const WCHAR eqvW[] = {'e','q','v',0};
  51 static const WCHAR errorW[] = {'e','r','r','o','r',0};
  52 static const WCHAR exitW[] = {'e','x','i','t',0};
  53 static const WCHAR explicitW[] = {'e','x','p','l','i','c','i','t',0};
  54 static const WCHAR falseW[] = {'f','a','l','s','e',0};
  55 static const WCHAR forW[] = {'f','o','r',0};
  56 static const WCHAR functionW[] = {'f','u','n','c','t','i','o','n',0};
  57 static const WCHAR getW[] = {'g','e','t',0};
  58 static const WCHAR gotoW[] = {'g','o','t','o',0};
  59 static const WCHAR ifW[] = {'i','f',0};
  60 static const WCHAR impW[] = {'i','m','p',0};
  61 static const WCHAR inW[] = {'i','n',0};
  62 static const WCHAR isW[] = {'i','s',0};
  63 static const WCHAR letW[] = {'l','e','t',0};
  64 static const WCHAR loopW[] = {'l','o','o','p',0};
  65 static const WCHAR meW[] = {'m','e',0};
  66 static const WCHAR modW[] = {'m','o','d',0};
  67 static const WCHAR newW[] = {'n','e','w',0};
  68 static const WCHAR nextW[] = {'n','e','x','t',0};
  69 static const WCHAR notW[] = {'n','o','t',0};
  70 static const WCHAR nothingW[] = {'n','o','t','h','i','n','g',0};
  71 static const WCHAR nullW[] = {'n','u','l','l',0};
  72 static const WCHAR onW[] = {'o','n',0};
  73 static const WCHAR optionW[] = {'o','p','t','i','o','n',0};
  74 static const WCHAR orW[] = {'o','r',0};
  75 static const WCHAR privateW[] = {'p','r','i','v','a','t','e',0};
  76 static const WCHAR propertyW[] = {'p','r','o','p','e','r','t','y',0};
  77 static const WCHAR publicW[] = {'p','u','b','l','i','c',0};
  78 static const WCHAR remW[] = {'r','e','m',0};
  79 static const WCHAR resumeW[] = {'r','e','s','u','m','e',0};
  80 static const WCHAR selectW[] = {'s','e','l','e','c','t',0};
  81 static const WCHAR setW[] = {'s','e','t',0};
  82 static const WCHAR stepW[] = {'s','t','e','p',0};
  83 static const WCHAR stopW[] = {'s','t','o','p',0};
  84 static const WCHAR subW[] = {'s','u','b',0};
  85 static const WCHAR thenW[] = {'t','h','e','n',0};
  86 static const WCHAR toW[] = {'t','o',0};
  87 static const WCHAR trueW[] = {'t','r','u','e',0};
  88 static const WCHAR untilW[] = {'u','n','t','i','l',0};
  89 static const WCHAR wendW[] = {'w','e','n','d',0};
  90 static const WCHAR whileW[] = {'w','h','i','l','e',0};
  91 static const WCHAR xorW[] = {'x','o','r',0};
  92
  93 static const struct {
  94     const WCHAR *word;
  95     int token;
  96 } keywords[] = {
  97     {andW,       tAND},
  98     {byrefW,     tBYREF},
  99     {byvalW,     tBYVAL},
 100     {callW,      tCALL},
 101     {caseW,      tCASE},
 102     {classW,     tCLASS},
 103     {constW,     tCONST},
 104     {defaultW,   tDEFAULT},
 105     {dimW,       tDIM},
 106     {doW,        tDO},
 107     {eachW,      tEACH},
 108     {elseW,      tELSE},
 109     {elseifW,    tELSEIF},
 110     {emptyW,     tEMPTY},
 111     {endW,       tEND},
 112     {eqvW,       tEQV},
 113     {errorW,     tERROR},
 114     {exitW,      tEXIT},
 115     {explicitW,  tEXPLICIT},
 116     {falseW,     tFALSE},
 117     {forW,       tFOR},
 118     {functionW,  tFUNCTION},
 119     {getW,       tGET},
 120     {gotoW,      tGOTO},
 121     {ifW,        tIF},
 122     {impW,       tIMP},
 123     {inW,        tIN},
 124     {isW,        tIS},
 125     {letW,       tLET},
 126     {loopW,      tLOOP},
 127     {meW,        tME},
 128     {modW,       tMOD},
 129     {newW,       tNEW},
 130     {nextW,      tNEXT},
 131     {notW,       tNOT},
 132     {nothingW,   tNOTHING},
 133     {nullW,      tNULL},
 134     {onW,        tON},
 135     {optionW,    tOPTION},
 136     {orW,        tOR},
 137     {privateW,   tPRIVATE},
 138     {propertyW,  tPROPERTY},
 139     {publicW,    tPUBLIC},
 140     {remW,       tREM},
 141     {resumeW,    tRESUME},
 142     {selectW,    tSELECT},
 143     {setW,       tSET},
 144     {stepW,      tSTEP},
 145     {stopW,      tSTOP},
 146     {subW,       tSUB},
 147     {thenW,      tTHEN},
 148     {toW,        tTO},
 149     {trueW,      tTRUE},
 150     {untilW,     tUNTIL},
 151     {wendW,      tWEND},
 152     {whileW,     tWHILE},
 153     {xorW,       tXOR}
 154 };
 155
 156 static inline BOOL is_identifier_char(WCHAR c)
 157 {
 158     return iswalnum(c) || c == '_';
 159 }
 160
 161 static int check_keyword(parser_ctx_t *ctx, const WCHAR *word, const WCHAR **lval)
 162 {
 163     const WCHAR *p1 = ctx->ptr;
 164     const WCHAR *p2 = word;
 165     WCHAR c;
 166
 167     while(p1 < ctx->end && *p2) {
 168         c = towlower(*p1);
 169         if(c != *p2)
 170             return c - *p2;
 171         p1++;
 172         p2++;
 173     }
 174
 175     if(*p2 || (p1 < ctx->end && is_identifier_char(*p1)))
 176         return 1;
 177
 178     ctx->ptr = p1;
 179     *lval = word;
 180     return 0;
 181 }
 182
 183 static int check_keywords(parser_ctx_t *ctx, const WCHAR **lval)
 184 {
 185     int min = 0, max = ARRAY_SIZE(keywords)-1, r, i;
 186
 187     while(min <= max) {
 188         i = (min+max)/2;
 189
 190         r = check_keyword(ctx, keywords[i].word, lval);
 191         if(!r)
 192             return keywords[i].token;
 193
 194         if(r > 0)
 195             min = i+1;
 196         else
 197             max = i-1;
 198     }
 199
 200     return 0;
 201 }
 202
 203 static int parse_identifier(parser_ctx_t *ctx, const WCHAR **ret)
 204 {
 205     const WCHAR *ptr = ctx->ptr++;
 206     WCHAR *str;
 207     int len;
 208
 209     while(ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr))
 210         ctx->ptr++;
 211     len = ctx->ptr-ptr;
 212
 213     str = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
 214     if(!str)
 215         return 0;
 216
 217     memcpy(str, ptr, (len+1)*sizeof(WCHAR));
 218     str[len] = 0;
 219     *ret = str;
 220     return tIdentifier;
 221 }
 222
 223 static int parse_string_literal(parser_ctx_t *ctx, const WCHAR **ret)
 224 {
 225     const WCHAR *ptr = ++ctx->ptr;
 226     WCHAR *rptr;
 227     int len = 0;
 228
 229     while(ctx->ptr < ctx->end) {
 230         if(*ctx->ptr == '\n' || *ctx->ptr == '\r') {
 231             FIXME("newline inside string literal\n");
 232             return 0;
 233         }
 234
 235        if(*ctx->ptr == '"') {
 236             if(ctx->ptr[1] != '"')
 237                 break;
 238             len--;
 239             ctx->ptr++;
 240         }
 241         ctx->ptr++;
 242     }
 243
 244     if(ctx->ptr == ctx->end) {
 245         FIXME("unterminated string literal\n");
 246         return 0;
 247     }
 248
 249     len += ctx->ptr-ptr;
 250
 251     *ret = rptr = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
 252     if(!rptr)
 253         return 0;
 254
 255     while(ptr < ctx->ptr) {
 256         if(*ptr == '"')
 257             ptr++;
 258         *rptr++ = *ptr++;
 259     }
 260
 261     *rptr = 0;
 262     ctx->ptr++;
 263     return tString;
 264 }
 265
 266 static int parse_numeric_literal(parser_ctx_t *ctx, void **ret)
 267 {
 268     BOOL use_int = TRUE;
 269     LONGLONG d = 0, hlp;
 270     int exp = 0;
 271     double r;
 272
 273     if(*ctx->ptr == '0' && !('0' <= ctx->ptr[1] && ctx->ptr[1] <= '9') && ctx->ptr[1] != '.')
 274         return *ctx->ptr++;
 275
 276     while(ctx->ptr < ctx->end && iswdigit(*ctx->ptr)) {
 277         hlp = d*10 + *(ctx->ptr++) - '0';
 278         if(d>MAXLONGLONG/10 || hlp<0) {
 279             exp++;
 280             break;
 281         }
 282         else
 283             d = hlp;
 284     }
 285     while(ctx->ptr < ctx->end && iswdigit(*ctx->ptr)) {
 286         exp++;
 287         ctx->ptr++;
 288     }
 289
 290     if(*ctx->ptr == '.') {
 291         use_int = FALSE;
 292         ctx->ptr++;
 293
 294         while(ctx->ptr < ctx->end && iswdigit(*ctx->ptr)) {
 295             hlp = d*10 + *(ctx->ptr++) - '0';
 296             if(d>MAXLONGLONG/10 || hlp<0)
 297                 break;
 298
 299             d = hlp;
 300             exp--;
 301         }
 302         while(ctx->ptr < ctx->end && iswdigit(*ctx->ptr))
 303             ctx->ptr++;
 304     }
 305
 306     if(*ctx->ptr == 'e' || *ctx->ptr == 'E') {
 307         int e = 0, sign = 1;
 308
 309         ctx->ptr++;
 310         if(*ctx->ptr == '-') {
 311             ctx->ptr++;
 312             sign = -1;
 313         }else if(*ctx->ptr == '+') {
 314             ctx->ptr++;
 315         }
 316
 317         if(!iswdigit(*ctx->ptr)) {
 318             FIXME("Invalid numeric literal\n");
 319             return 0;
 320         }
 321
 322         use_int = FALSE;
 323
 324         do {
 325             e = e*10 + *(ctx->ptr++) - '0';
 326             if(sign == -1 && -e+exp < -(INT_MAX/100)) {
 327                 /* The literal will be rounded to 0 anyway. */
 328                 while(iswdigit(*ctx->ptr))
 329                     ctx->ptr++;
 330                 *(double*)ret = 0;
 331                 return tDouble;
 332             }
 333
 334             if(sign*e + exp > INT_MAX/100) {
 335                 FIXME("Invalid numeric literal\n");
 336                 return 0;
 337             }
 338         } while(iswdigit(*ctx->ptr));
 339
 340         exp += sign*e;
 341     }
 342
 343     if(use_int && (LONG)d == d) {
 344         *(LONG*)ret = d;
 345         return tInt;
 346     }
 347
 348     r = exp>=0 ? d*pow(10, exp) : d/pow(10, -exp);
 349     if(isinf(r)) {
 350         FIXME("Invalid numeric literal\n");
 351         return 0;
 352     }
 353
 354     *(double*)ret = r;
 355     return tDouble;
 356 }
 357
 358 static int hex_to_int(WCHAR c)
 359 {
 360     if('0' <= c && c <= '9')
 361         return c-'0';
 362     if('a' <= c && c <= 'f')
 363         return c+10-'a';
 364     if('A' <= c && c <= 'F')
 365         return c+10-'A';
 366     return -1;
 367 }
 368
 369 static int parse_hex_literal(parser_ctx_t *ctx, LONG *ret)
 370 {
 371     const WCHAR *begin = ctx->ptr;
 372     LONG l = 0, d;
 373
 374     while((d = hex_to_int(*++ctx->ptr)) != -1)
 375         l = l*16 + d;
 376
 377     if(begin + 9 /* max digits+1 */ < ctx->ptr || (*ctx->ptr != '&' && is_identifier_char(*ctx->ptr))) {
 378         FIXME("invalid literal\n");
 379         return 0;
 380     }
 381
 382     if(*ctx->ptr == '&')
 383         ctx->ptr++;
 384
 385     *ret = l;
 386     return tInt;
 387 }
 388
 389 static void skip_spaces(parser_ctx_t *ctx)
 390 {
 391     while(*ctx->ptr == ' ' || *ctx->ptr == '\t')
 392         ctx->ptr++;
 393 }
 394
 395 static int comment_line(parser_ctx_t *ctx)
 396 {
 397     static const WCHAR newlineW[] = {'\n','\r',0};
 398     ctx->ptr = wcspbrk(ctx->ptr, newlineW);
 399     if(ctx->ptr)
 400         ctx->ptr++;
 401     else
 402         ctx->ptr = ctx->end;
 403     return tNL;
 404 }
 405
 406 static int parse_next_token(void *lval, parser_ctx_t *ctx)
 407 {
 408     WCHAR c;
 409
 410     skip_spaces(ctx);
 411     if(ctx->ptr == ctx->end)
 412         return ctx->last_token == tNL ? tEOF : tNL;
 413
 414     c = *ctx->ptr;
 415
 416     if('0' <= c && c <= '9')
 417         return parse_numeric_literal(ctx, lval);
 418
 419     if(iswalpha(c)) {
 420         int ret = check_keywords(ctx, lval);
 421         if(!ret)
 422             return parse_identifier(ctx, lval);
 423         if(ret != tREM)
 424             return ret;
 425         c = '\'';
 426     }
 427
 428     switch(c) {
 429     case '\n':
 430     case '\r':
 431         ctx->ptr++;
 432         return tNL;
 433     case '\'':
 434         return comment_line(ctx);
 435     case ':':
 436     case ')':
 437     case ',':
 438     case '=':
 439     case '+':
 440     case '*':
 441     case '/':
 442     case '^':
 443     case '\\':
 444     case '.':
 445     case '_':
 446         return *ctx->ptr++;
 447     case '-':
 448         if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '>')
 449             return comment_line(ctx);
 450         ctx->ptr++;
 451         return '-';
 452     case '(':
 453         /* NOTE:
 454          * We resolve empty brackets in lexer instead of parser to avoid complex conflicts
 455          * in call statement special case |f()| without 'call' keyword
 456          */
 457         ctx->ptr++;
 458         skip_spaces(ctx);
 459         if(*ctx->ptr == ')') {
 460             ctx->ptr++;
 461             return tEMPTYBRACKETS;
 462         }
 463         return '(';
 464     case '"':
 465         return parse_string_literal(ctx, lval);
 466     case '&':
 467         if(*++ctx->ptr == 'h' || *ctx->ptr == 'H')
 468             return parse_hex_literal(ctx, lval);
 469         return '&';
 470     case '<':
 471         switch(*++ctx->ptr) {
 472         case '>':
 473             ctx->ptr++;
 474             return tNEQ;
 475         case '=':
 476             ctx->ptr++;
 477             return tLTEQ;
 478         case '!':
 479             if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '-')
 480                 return comment_line(ctx);
 481         }
 482         return '<';
 483     case '>':
 484         if(*++ctx->ptr == '=') {
 485             ctx->ptr++;
 486             return tGTEQ;
 487         }
 488         return '>';
 489     default:
 490         FIXME("Unhandled char %c in %s\n", *ctx->ptr, debugstr_w(ctx->ptr));
 491     }
 492
 493     return 0;
 494 }
 495
 496 int parser_lex(void *lval, parser_ctx_t *ctx)
 497 {
 498     int ret;
 499
 500     if (ctx->last_token == tEXPRESSION)
 501     {
 502         ctx->last_token = tNL;
 503         return tEXPRESSION;
 504     }
 505
 506     while(1) {
 507         ret = parse_next_token(lval, ctx);
 508         if(ret == '_') {
 509             skip_spaces(ctx);
 510             if(*ctx->ptr != '\n' && *ctx->ptr != '\r') {
 511                 FIXME("'_' not followed by newline\n");
 512                 return 0;
 513             }
 514             if(*ctx->ptr == '\r')
 515                 ctx->ptr++;
 516             if(*ctx->ptr == '\n')
 517                 ctx->ptr++;
 518             continue;
 519         }
 520         if(ret != tNL || ctx->last_token != tNL)
 521             break;
 522
 523         ctx->last_nl = ctx->ptr-ctx->code;
 524     }
 525
 526     return (ctx->last_token = ret);
 527 }