287110a2dfa8ad6ca1cebc816167099cbd1b0451
[reactos.git] / reactos / base / shell / cmd / parser.c
1 #include <precomp.h>
2
3 #define C_OP_LOWEST C_MULTI
4 #define C_OP_HIGHEST C_PIPE
5 static const TCHAR OpString[][3] = { _T("&"), _T("||"), _T("&&"), _T("|") };
6
7 static const TCHAR RedirString[][3] = { _T("<"), _T(">"), _T(">>") };
8
9 static BOOL IsSeparator(TCHAR Char)
10 {
11 /* These three characters act like spaces to the parser */
12 return _istspace(Char) || (Char && _tcschr(_T(",;="), Char));
13 }
14
15 enum { TOK_END, TOK_NORMAL, TOK_OPERATOR, TOK_REDIRECTION,
16 TOK_BEGIN_BLOCK, TOK_END_BLOCK };
17
18 static BOOL bParseError;
19 static BOOL bLineContinuations;
20 static TCHAR ParseLine[CMDLINE_LENGTH];
21 static TCHAR *ParsePos;
22 static TCHAR CurChar;
23
24 static TCHAR CurrentToken[CMDLINE_LENGTH];
25 static int CurrentTokenType;
26 static int InsideBlock;
27
28 static TCHAR ParseChar()
29 {
30 TCHAR Char;
31
32 if (bParseError)
33 return CurChar = 0;
34
35 restart:
36 /* Although CRs can be injected into a line via an environment
37 * variable substitution, the parser ignores them - they won't
38 * even separate tokens. */
39 do
40 Char = *ParsePos++;
41 while (Char == _T('\r'));
42
43 if (!Char)
44 {
45 ParsePos--;
46 if (bLineContinuations)
47 {
48 if (!ReadLine(ParseLine, TRUE))
49 {
50 /* ^C pressed, or line was too long */
51 bParseError = TRUE;
52 }
53 else if (*(ParsePos = ParseLine))
54 {
55 goto restart;
56 }
57 }
58 }
59 return CurChar = Char;
60 }
61
62 static void ParseError()
63 {
64 if (CurrentTokenType == TOK_END)
65 ConOutResPuts(STRING_SYNTAX_COMMAND_INCORRECT);
66 else
67 ConOutPrintf(_T("%s was unexpected at this time.\n"), CurrentToken);
68 bParseError = TRUE;
69 }
70
71 /* Yes, cmd has a Lexical Analyzer. Whenever the parser gives an "xxx was
72 * unexpected at this time." message, it shows what the last token read was */
73 static int ParseToken(TCHAR ExtraEnd, BOOL PreserveSpace)
74 {
75 TCHAR *Out = CurrentToken;
76 TCHAR Char = CurChar;
77 int Type;
78 BOOL bInQuote = FALSE;
79
80 if (!PreserveSpace)
81 {
82 while (Char != _T('\n') && IsSeparator(Char))
83 Char = ParseChar();
84 }
85
86 while (Char && Char != _T('\n'))
87 {
88 bInQuote ^= (Char == _T('"'));
89 if (!bInQuote)
90 {
91 /* Check for all the myriad ways in which this token
92 * may be brought to an untimely end. */
93 if ((Char >= _T('0') && Char <= _T('9') &&
94 (ParsePos == &ParseLine[1] || IsSeparator(ParsePos[-2]))
95 && (*ParsePos == _T('<') || *ParsePos == _T('>')))
96 || _tcschr(_T(")&|<>") + (InsideBlock ? 0 : 1), Char)
97 || (!PreserveSpace && IsSeparator(Char))
98 || (Char == ExtraEnd))
99 {
100 break;
101 }
102
103 if (Char == _T('^'))
104 {
105 Char = ParseChar();
106 /* Eat up a \n, allowing line continuation */
107 if (Char == _T('\n'))
108 Char = ParseChar();
109 /* Next character is a forced literal */
110 }
111 }
112 if (Out == &CurrentToken[CMDLINE_LENGTH - 1])
113 break;
114 *Out++ = Char;
115 Char = ParseChar();
116 }
117
118 /* Check if we got at least one character before reaching a special one.
119 * If so, return them and leave the special for the next call. */
120 if (Out != CurrentToken)
121 {
122 Type = TOK_NORMAL;
123 }
124 else if (Char == _T('('))
125 {
126 Type = TOK_BEGIN_BLOCK;
127 *Out++ = Char;
128 ParseChar();
129 }
130 else if (Char == _T(')'))
131 {
132 Type = TOK_END_BLOCK;
133 *Out++ = Char;
134 ParseChar();
135 }
136 else if (Char == _T('&') || Char == _T('|'))
137 {
138 Type = TOK_OPERATOR;
139 *Out++ = Char;
140 Char = ParseChar();
141 /* check for && or || */
142 if (Char == Out[-1])
143 {
144 *Out++ = Char;
145 ParseChar();
146 }
147 }
148 else if ((Char >= _T('0') && Char <= _T('9'))
149 || (Char == _T('<') || Char == _T('>')))
150 {
151 Type = TOK_REDIRECTION;
152 if (Char >= _T('0') && Char <= _T('9'))
153 {
154 *Out++ = Char;
155 Char = ParseChar();
156 }
157 *Out++ = Char;
158 Char = ParseChar();
159 if (Char == Out[-1])
160 {
161 /* Strangely, the tokenizer allows << as well as >>... (it
162 * will cause an error when trying to parse it though) */
163 *Out++ = Char;
164 Char = ParseChar();
165 }
166 if (Char == _T('&'))
167 {
168 *Out++ = Char;
169 while (IsSeparator(Char = ParseChar()))
170 ;
171 if (Char >= _T('0') && Char <= _T('9'))
172 {
173 *Out++ = Char;
174 ParseChar();
175 }
176 }
177 }
178 else
179 {
180 Type = TOK_END;
181 }
182 *Out = _T('\0');
183 return CurrentTokenType = Type;
184 }
185
186 static BOOL ParseRedirection(REDIRECTION **List)
187 {
188 TCHAR *Tok = CurrentToken;
189 BYTE Number;
190 BYTE RedirType;
191 REDIRECTION *Redir;
192
193 if (*Tok >= _T('0') && *Tok <= _T('9'))
194 Number = *Tok++ - _T('0');
195 else
196 Number = *Tok == _T('<') ? 0 : 1;
197
198 if (*Tok++ == _T('<'))
199 {
200 RedirType = REDIR_READ;
201 if (*Tok == _T('<'))
202 goto fail;
203 }
204 else
205 {
206 RedirType = REDIR_WRITE;
207 if (*Tok == _T('>'))
208 {
209 RedirType = REDIR_APPEND;
210 Tok++;
211 }
212 }
213
214 if (!*Tok)
215 {
216 /* The file name was not part of this token, so it'll be the next one */
217 if (ParseToken(0, FALSE) != TOK_NORMAL)
218 goto fail;
219 Tok = CurrentToken;
220 }
221
222 /* If a redirection for this handle number already exists, delete it */
223 while ((Redir = *List))
224 {
225 if (Redir->Number == Number)
226 {
227 *List = Redir->Next;
228 cmd_free(Redir);
229 continue;
230 }
231 List = &Redir->Next;
232 }
233
234 Redir = cmd_alloc(FIELD_OFFSET(REDIRECTION, Filename[_tcslen(Tok) + 1]));
235 Redir->Next = NULL;
236 Redir->OldHandle = INVALID_HANDLE_VALUE;
237 Redir->Number = Number;
238 Redir->Type = RedirType;
239 _tcscpy(Redir->Filename, Tok);
240 *List = Redir;
241 return TRUE;
242 fail:
243 ParseError();
244 FreeRedirection(*List);
245 *List = NULL;
246 return FALSE;
247 }
248
249 static PARSED_COMMAND *ParseCommandOp(int OpType);
250
251 /* Parse a parenthesized block */
252 static PARSED_COMMAND *ParseBlock(REDIRECTION *RedirList)
253 {
254 PARSED_COMMAND *Cmd, *Sub, **NextPtr;
255 Cmd = cmd_alloc(sizeof(PARSED_COMMAND));
256 Cmd->Type = C_BLOCK;
257 Cmd->Next = NULL;
258 Cmd->Subcommands = NULL;
259 Cmd->Redirections = RedirList;
260
261 /* Read the block contents */
262 NextPtr = &Cmd->Subcommands;
263 InsideBlock++;
264 while (1)
265 {
266 Sub = ParseCommandOp(C_OP_LOWEST);
267 if (Sub)
268 {
269 *NextPtr = Sub;
270 NextPtr = &Sub->Next;
271 }
272 else if (bParseError)
273 {
274 InsideBlock--;
275 FreeCommand(Cmd);
276 return NULL;
277 }
278
279 if (CurrentTokenType == TOK_END_BLOCK)
280 break;
281 /* Skip past the \n */
282 ParseChar();
283 }
284 InsideBlock--;
285
286 /* Process any trailing redirections */
287 while (ParseToken(0, FALSE) == TOK_REDIRECTION)
288 {
289 if (!ParseRedirection(&Cmd->Redirections))
290 {
291 FreeCommand(Cmd);
292 return NULL;
293 }
294 }
295 return Cmd;
296 }
297
298 static PARSED_COMMAND *ParseCommandPart(void)
299 {
300 TCHAR ParsedLine[CMDLINE_LENGTH];
301 TCHAR *Pos;
302 DWORD TailOffset;
303 PARSED_COMMAND *Cmd;
304 REDIRECTION *RedirList = NULL;
305 int Type;
306
307 while (IsSeparator(CurChar))
308 {
309 if (CurChar == _T('\n'))
310 return NULL;
311 ParseChar();
312 }
313
314 if (!CurChar)
315 return NULL;
316
317 if (CurChar == _T(':'))
318 {
319 /* "Ignore" the rest of the line.
320 * (Line continuations will still be parsed, though.) */
321 while (ParseToken(0, TRUE) != TOK_END)
322 ;
323 return NULL;
324 }
325
326 if (CurChar == _T('@'))
327 {
328 ParseChar();
329 Cmd = cmd_alloc(sizeof(PARSED_COMMAND));
330 Cmd->Type = C_QUIET;
331 Cmd->Next = NULL;
332 /* @ acts like a unary operator with low precedence,
333 * so call the top-level parser */
334 Cmd->Subcommands = ParseCommandOp(C_OP_LOWEST);
335 Cmd->Redirections = NULL;
336 return Cmd;
337 }
338
339 /* Get the head of the command */
340 while (1)
341 {
342 Type = ParseToken(_T('('), FALSE);
343 if (Type == TOK_NORMAL)
344 {
345 Pos = _stpcpy(ParsedLine, CurrentToken);
346 break;
347 }
348 else if (Type == TOK_REDIRECTION)
349 {
350 if (!ParseRedirection(&RedirList))
351 return NULL;
352 }
353 else if (Type == TOK_BEGIN_BLOCK)
354 {
355 return ParseBlock(RedirList);
356 }
357 else if (Type == TOK_END_BLOCK && !RedirList)
358 {
359 return NULL;
360 }
361 else
362 {
363 ParseError();
364 FreeRedirection(RedirList);
365 return NULL;
366 }
367 }
368 TailOffset = Pos - ParsedLine;
369
370 /* FIXME: FOR, IF, and REM need special processing by the parser. */
371
372 /* Now get the tail */
373 while (1)
374 {
375 Type = ParseToken(0, TRUE);
376 if (Type == TOK_NORMAL)
377 {
378 if (Pos + _tcslen(CurrentToken) >= &ParsedLine[CMDLINE_LENGTH])
379 {
380 ParseError();
381 FreeRedirection(RedirList);
382 return NULL;
383 }
384 Pos = _stpcpy(Pos, CurrentToken);
385 }
386 else if (Type == TOK_REDIRECTION)
387 {
388 if (!ParseRedirection(&RedirList))
389 return NULL;
390 }
391 else
392 {
393 break;
394 }
395 }
396
397 Cmd = cmd_alloc(FIELD_OFFSET(PARSED_COMMAND, CommandLine[Pos + 1 - ParsedLine]));
398 Cmd->Type = C_COMMAND;
399 Cmd->Next = NULL;
400 Cmd->Subcommands = NULL;
401 Cmd->Redirections = RedirList;
402 _tcscpy(Cmd->CommandLine, ParsedLine);
403 Cmd->Tail = Cmd->CommandLine + TailOffset;
404 return Cmd;
405 }
406
407 static PARSED_COMMAND *ParseCommandOp(int OpType)
408 {
409 PARSED_COMMAND *Cmd, *Left, *Right;
410
411 if (OpType == C_OP_HIGHEST)
412 Cmd = ParseCommandPart();
413 else
414 Cmd = ParseCommandOp(OpType + 1);
415
416 if (Cmd && !_tcscmp(CurrentToken, OpString[OpType - C_OP_LOWEST]))
417 {
418 Left = Cmd;
419 Right = ParseCommandOp(OpType);
420 if (!Right)
421 {
422 if (!bParseError)
423 {
424 /* & is allowed to have an empty RHS */
425 if (OpType == C_MULTI)
426 return Left;
427 ParseError();
428 }
429 FreeCommand(Left);
430 return NULL;
431 }
432
433 Cmd = cmd_alloc(sizeof(PARSED_COMMAND));
434 Cmd->Type = OpType;
435 Cmd->Next = NULL;
436 Cmd->Redirections = NULL;
437 Cmd->Subcommands = Left;
438 Left->Next = Right;
439 Right->Next = NULL;
440 }
441
442 return Cmd;
443 }
444
445 PARSED_COMMAND *
446 ParseCommand(LPTSTR Line)
447 {
448 PARSED_COMMAND *Cmd;
449
450 if (Line)
451 {
452 _tcscpy(ParseLine, Line);
453 bLineContinuations = FALSE;
454 }
455 else
456 {
457 if (!ReadLine(ParseLine, FALSE))
458 return NULL;
459 bLineContinuations = TRUE;
460 }
461 bParseError = FALSE;
462 ParsePos = ParseLine;
463 CurChar = _T(' ');
464
465 Cmd = ParseCommandOp(C_OP_LOWEST);
466 if (Cmd)
467 {
468 if (CurrentTokenType != TOK_END)
469 ParseError();
470 if (bParseError)
471 {
472 FreeCommand(Cmd);
473 Cmd = NULL;
474 }
475 }
476 return Cmd;
477 }
478
479 /* Reconstruct a parse tree into text form;
480 * used for echoing batch file commands */
481 VOID
482 EchoCommand(PARSED_COMMAND *Cmd)
483 {
484 PARSED_COMMAND *Sub;
485 REDIRECTION *Redir;
486
487 switch (Cmd->Type)
488 {
489 case C_COMMAND:
490 ConOutPrintf(_T("%s"), Cmd->CommandLine);
491 break;
492 case C_QUIET:
493 return;
494 case C_BLOCK:
495 ConOutChar(_T('('));
496 for (Sub = Cmd->Subcommands; Sub; Sub = Sub->Next)
497 {
498 EchoCommand(Sub);
499 ConOutChar(_T('\n'));
500 }
501 ConOutChar(_T(')'));
502 break;
503 case C_MULTI:
504 case C_IFFAILURE:
505 case C_IFSUCCESS:
506 case C_PIPE:
507 Sub = Cmd->Subcommands;
508 EchoCommand(Sub);
509 ConOutPrintf(_T(" %s "), OpString[Cmd->Type - C_OP_LOWEST]);
510 EchoCommand(Sub->Next);
511 break;
512 }
513
514 for (Redir = Cmd->Redirections; Redir; Redir = Redir->Next)
515 {
516 ConOutPrintf(_T(" %c%s%s"), _T('0') + Redir->Number,
517 RedirString[Redir->Type], Redir->Filename);
518 }
519 }
520
521 VOID
522 FreeCommand(PARSED_COMMAND *Cmd)
523 {
524 if (Cmd->Subcommands)
525 FreeCommand(Cmd->Subcommands);
526 if (Cmd->Next)
527 FreeCommand(Cmd->Next);
528 FreeRedirection(Cmd->Redirections);
529 cmd_free(Cmd);
530 }