2 * COPYRIGHT: See COPYING in the top level directory
3 * PROJECT: ReactOS More Command
4 * FILE: base/applications/cmdutils/more/more.c
5 * PURPOSE: Displays text stream from STDIN or from an arbitrary number
6 * of files to STDOUT, with screen capabilities (more than CAT,
7 * but less than LESS ^^).
8 * PROGRAMMERS: Paolo Pantaleo
10 * Hermes Belusca-Maito (hermes.belusca@sfr.fr)
13 * MORE.C - external command.
15 * clone from 4nt more command
17 * 26 Sep 1999 - Paolo Pantaleo <paolopan@freemail.it>
20 * Oct 2003 - Timothy Schepens <tischepe at fastmail dot fm>
21 * use window size instead of buffer size.
35 /* PagePrompt statistics for the current file */
36 DWORD dwFileSize
; // In bytes
37 DWORD dwSumReadBytes
, dwSumReadChars
;
38 // The average number of bytes per character is equal to
39 // dwSumReadBytes / dwSumReadChars. Note that dwSumReadChars
40 // will never be == 0 when ConWritePaging (and possibly PagePrompt)
43 /* Handles for file and console */
44 HANDLE hFile
= INVALID_HANDLE_VALUE
;
45 HANDLE hStdIn
, hStdOut
;
51 PagePrompt(PCON_PAGER Pager
, DWORD Done
, DWORD Total
)
53 HANDLE hInput
= ConStreamGetOSHandle(StdIn
);
55 KEY_EVENT_RECORD KeyEvent
;
58 * Just use the simple prompt if the file being displayed is the STDIN,
59 * otherwise use the prompt with progress percentage.
61 * The progress percentage is evaluated as follows.
62 * So far we have read a total of 'dwSumReadBytes' bytes from the file.
63 * Amongst those is the latest read chunk of 'dwReadBytes' bytes, to which
64 * correspond a number of 'dwReadChars' characters with which we have called
65 * ConWritePaging who called PagePrompt. We then have: Total == dwReadChars.
66 * During this ConWritePaging call the PagePrompt was called after 'Done'
67 * number of characters over 'Total'.
68 * It should be noted that for 'dwSumReadBytes' number of bytes read it
69 * *roughly* corresponds 'dwSumReadChars' number of characters. This is
70 * because there may be some failures happening during the conversion of
71 * the bytes read to the character string for a given encoding.
72 * Therefore the number of characters displayed on screen is equal to:
73 * dwSumReadChars - Total + Done ,
74 * but the best corresponding approximed number of bytes would be:
75 * dwSumReadBytes - (Total - Done) * (dwSumReadBytes / dwSumReadChars) ,
76 * where the ratio is the average number of bytes per character.
77 * The percentage is then computed relative to the total file size.
81 ConResPuts(Pager
->Screen
->Stream
, IDS_CONTINUE
);
85 ConResPrintf(Pager
->Screen
->Stream
, IDS_CONTINUE_PROGRESS
,
86 // (dwSumReadChars - Total + Done) * 100 / dwFileSize
87 (dwSumReadBytes
- (Total
- Done
) *
88 (dwSumReadBytes
/ dwSumReadChars
)) * 100 / dwFileSize
92 // TODO: Implement prompt read line!
94 // FIXME: Does not support TTY yet!
96 /* RemoveBreakHandler */
97 SetConsoleCtrlHandler(NULL
, TRUE
);
99 GetConsoleMode(hInput
, &dwMode
);
100 dwMode
&= ~ENABLE_PROCESSED_INPUT
;
101 SetConsoleMode(hInput
, dwMode
);
105 // FIXME: Does not support TTY yet!
107 // ConInKey(&KeyEvent);
112 ReadConsoleInput(hInput
, &ir
, 1, &dwRead
);
114 while ((ir
.EventType
!= KEY_EVENT
) || (!ir
.Event
.KeyEvent
.bKeyDown
));
116 /* Got our key, return to caller */
117 KeyEvent
= ir
.Event
.KeyEvent
;
119 while ((KeyEvent
.wVirtualKeyCode
== VK_SHIFT
) ||
120 (KeyEvent
.wVirtualKeyCode
== VK_MENU
) ||
121 (KeyEvent
.wVirtualKeyCode
== VK_CONTROL
));
123 /* AddBreakHandler */
124 SetConsoleCtrlHandler(NULL
, FALSE
);
126 GetConsoleMode(hInput
, &dwMode
);
127 dwMode
|= ENABLE_PROCESSED_INPUT
;
128 SetConsoleMode(hInput
, dwMode
);
131 * Erase the full line where the cursor is, and move
132 * the cursor back to the beginning of the line.
134 ConClearLine(Pager
->Screen
->Stream
);
136 if ((KeyEvent
.wVirtualKeyCode
== VK_ESCAPE
) ||
137 ((KeyEvent
.wVirtualKeyCode
== L
'C') &&
138 (KeyEvent
.dwControlKeyState
& (LEFT_CTRL_PRESSED
| RIGHT_CTRL_PRESSED
))))
140 /* We break, output a newline */
142 ConStreamWrite(Pager
->Screen
->Stream
, &ch
, 1);
150 * See base/applications/cmdutils/clip/clip.c!IsDataUnicode()
151 * and base/applications/notepad/text.c!ReadText() for more details.
152 * Also some good code example can be found at:
153 * https://github.com/AutoIt/text-encoding-detect
158 ENCODING_UTF16LE
= 1,
159 ENCODING_UTF16BE
= 2,
167 OUT ENCODING
* Encoding OPTIONAL
,
168 OUT PDWORD SkipBytes OPTIONAL
)
170 PBYTE pBytes
= Buffer
;
171 ENCODING encFile
= ENCODING_ANSI
;
175 * See http://archives.miloush.net/michkap/archive/2007/04/22/2239345.html
176 * for more details about the algorithm and the pitfalls behind it.
177 * Of course it would be actually great to make a nice function that
178 * would work, once and for all, and put it into a library.
181 /* Look for Byte Order Marks */
182 if ((BufferSize
>= 2) && (pBytes
[0] == 0xFF) && (pBytes
[1] == 0xFE))
184 encFile
= ENCODING_UTF16LE
;
187 else if ((BufferSize
>= 2) && (pBytes
[0] == 0xFE) && (pBytes
[1] == 0xFF))
189 encFile
= ENCODING_UTF16BE
;
192 else if ((BufferSize
>= 3) && (pBytes
[0] == 0xEF) && (pBytes
[1] == 0xBB) && (pBytes
[2] == 0xBF))
194 encFile
= ENCODING_UTF8
;
200 * Try using statistical analysis. Do not rely on the return value of
201 * IsTextUnicode as we can get FALSE even if the text is in UTF-16 BE
202 * (i.e. we have some of the IS_TEXT_UNICODE_REVERSE_MASK bits set).
203 * Instead, set all the tests we want to perform, then just check
204 * the passed tests and try to deduce the string properties.
208 * This mask contains the 3 highest bits from IS_TEXT_UNICODE_NOT_ASCII_MASK
209 * and the 1st highest bit from IS_TEXT_UNICODE_NOT_UNICODE_MASK.
211 #define IS_TEXT_UNKNOWN_FLAGS_MASK ((7 << 13) | (1 << 11))
213 /* Flag out the unknown flags here, the passed tests will not have them either */
214 INT Tests
= (IS_TEXT_UNICODE_NOT_ASCII_MASK
|
215 IS_TEXT_UNICODE_NOT_UNICODE_MASK
|
216 IS_TEXT_UNICODE_REVERSE_MASK
| IS_TEXT_UNICODE_UNICODE_MASK
)
217 & ~IS_TEXT_UNKNOWN_FLAGS_MASK
;
220 IsTextUnicode(Buffer
, BufferSize
, &Tests
);
224 * As the IS_TEXT_UNICODE_NULL_BYTES or IS_TEXT_UNICODE_ILLEGAL_CHARS
225 * flags are expected to be potentially present in the result without
226 * modifying our expectations, filter them out now.
228 Results
&= ~(IS_TEXT_UNICODE_NULL_BYTES
| IS_TEXT_UNICODE_ILLEGAL_CHARS
);
231 * NOTE: The flags IS_TEXT_UNICODE_ASCII16 and
232 * IS_TEXT_UNICODE_REVERSE_ASCII16 are not reliable.
234 * NOTE2: Check for potential "bush hid the facts" effect by also
235 * checking the original results (in 'Tests') for the absence of
236 * the IS_TEXT_UNICODE_NULL_BYTES flag, as we may presumably expect
237 * that in UTF-16 text there will be at some point some NULL bytes.
238 * If not, fall back to ANSI. This shows the limitations of using the
239 * IsTextUnicode API to perform such tests, and the usage of a more
240 * improved encoding detection algorithm would be really welcome.
242 if (!(Results
& IS_TEXT_UNICODE_NOT_UNICODE_MASK
) &&
243 !(Results
& IS_TEXT_UNICODE_REVERSE_MASK
) &&
244 (Results
& IS_TEXT_UNICODE_UNICODE_MASK
) &&
245 (Tests
& IS_TEXT_UNICODE_NULL_BYTES
))
247 encFile
= ENCODING_UTF16LE
;
248 dwPos
= (Results
& IS_TEXT_UNICODE_SIGNATURE
) ? 2 : 0;
251 if (!(Results
& IS_TEXT_UNICODE_NOT_UNICODE_MASK
) &&
252 !(Results
& IS_TEXT_UNICODE_UNICODE_MASK
) &&
253 (Results
& IS_TEXT_UNICODE_REVERSE_MASK
) &&
254 (Tests
& IS_TEXT_UNICODE_NULL_BYTES
))
256 encFile
= ENCODING_UTF16BE
;
257 dwPos
= (Results
& IS_TEXT_UNICODE_REVERSE_SIGNATURE
) ? 2 : 0;
262 * Either 'Results' has neither of those masks set, as it can be
263 * the case for UTF-8 text (or ANSI), or it has both as can be the
264 * case when analysing pure binary data chunk. This is therefore
265 * invalid and we fall back to ANSI encoding.
266 * FIXME: In case of failure, assume ANSI (as long as we do not have
267 * correct tests for UTF8, otherwise we should do them, and at the
268 * very end, assume ANSI).
270 encFile
= ENCODING_ANSI
; // ENCODING_UTF8;
280 return (encFile
!= ENCODING_ANSI
);
284 * Adapted from base/shell/cmd/misc.c!FileGetString(), but with correct
285 * text encoding support. Also please note that similar code should be
286 * also used in the CMD.EXE 'TYPE' command.
287 * Contrary to CMD's FileGetString() we do not stop at new-lines.
289 * Read text data from a file and convert it from a given encoding to UTF-16.
291 * IN OUT PVOID pCacheBuffer and IN DWORD CacheBufferLength :
292 * Implementation detail so that the function uses an external user-provided
293 * buffer to store the data temporarily read from the file. The function
294 * could have used an internal buffer instead. The length is in number of bytes.
296 * IN OUT PWSTR* pBuffer and IN OUT PDWORD pnBufferLength :
297 * Reallocated buffer containing the string data converted to UTF-16.
298 * In input, contains a pointer to the original buffer and its length.
299 * In output, contains a pointer to the reallocated buffer and its length.
300 * The length is in number of characters.
302 * At first call to this function, pBuffer can be set to NULL, in which case
303 * when the function returns the pointer will point to a valid buffer.
304 * After the last call to this function, free the pBuffer pointer with:
305 * HeapFree(GetProcessHeap(), 0, *pBuffer);
307 * If Encoding is set to ENCODING_UTF16LE or ENCODING_UTF16BE, since we are
308 * compiled in UNICODE, no extra conversion is performed and therefore
309 * pBuffer is unused (remains unallocated) and one can directly use the
310 * contents of pCacheBuffer as it is expected to contain valid UTF-16 text.
312 * OUT PDWORD pdwReadBytes : Number of bytes read from the file (optional).
313 * OUT PDWORD pdwReadChars : Corresponding number of characters read (optional).
318 IN ENCODING Encoding
,
319 IN OUT PVOID pCacheBuffer
,
320 IN DWORD CacheBufferLength
,
321 IN OUT PWCHAR
* pBuffer
,
322 IN OUT PDWORD pnBufferLength
,
323 OUT PDWORD pdwReadBytes OPTIONAL
,
324 OUT PDWORD pdwReadChars OPTIONAL
)
327 UINT CodePage
= (UINT
)-1;
331 // ASSERT(pCacheBuffer && (CacheBufferLength > 0));
332 // ASSERT(CacheBufferLength % 2 == 0); // Cache buffer length MUST BE even!
333 // ASSERT(pBuffer && pnBufferLength);
335 /* Always reset the retrieved number of bytes/characters */
336 if (pdwReadBytes
) *pdwReadBytes
= 0;
337 if (pdwReadChars
) *pdwReadChars
= 0;
339 Success
= ReadFile(hFile
, pCacheBuffer
, CacheBufferLength
, &dwReadBytes
, NULL
);
340 if (!Success
|| dwReadBytes
== 0)
343 if (pdwReadBytes
) *pdwReadBytes
= dwReadBytes
;
345 if ((Encoding
== ENCODING_ANSI
) || (Encoding
== ENCODING_UTF8
))
347 /* Conversion is needed */
349 if (Encoding
== ENCODING_ANSI
)
350 CodePage
= GetConsoleCP(); // CP_ACP; // FIXME: Cache GetConsoleCP() value.
351 else // if (Encoding == ENCODING_UTF8)
354 /* Retrieve the needed buffer size */
355 len
= MultiByteToWideChar(CodePage
, 0, pCacheBuffer
, dwReadBytes
,
359 /* Failure, bail out */
363 /* Initialize the conversion buffer if needed... */
364 if (*pBuffer
== NULL
)
366 *pnBufferLength
= len
;
367 *pBuffer
= HeapAlloc(GetProcessHeap(), 0, *pnBufferLength
* sizeof(WCHAR
));
368 if (*pBuffer
== NULL
)
372 // WARN("DEBUG: Cannot allocate memory for *pBuffer!\n");
373 // ConErrFormatMessage(GetLastError());
377 /* ... or reallocate only if the new length is greater than the old one */
378 else if (len
> *pnBufferLength
)
380 PWSTR OldBuffer
= *pBuffer
;
382 *pnBufferLength
= len
;
383 *pBuffer
= HeapReAlloc(GetProcessHeap(), 0, *pBuffer
, *pnBufferLength
* sizeof(WCHAR
));
384 if (*pBuffer
== NULL
)
386 /* Do not leak old buffer */
387 HeapFree(GetProcessHeap(), 0, OldBuffer
);
390 // WARN("DEBUG: Cannot reallocate memory for *pBuffer!\n");
391 // ConErrFormatMessage(GetLastError());
396 /* Now perform the conversion proper */
397 len
= MultiByteToWideChar(CodePage
, 0, pCacheBuffer
, dwReadBytes
,
404 * No conversion needed, just convert from big to little endian if needed.
405 * pBuffer and pnBufferLength are left untouched and pCacheBuffer can be
408 PWCHAR pWChars
= pCacheBuffer
;
411 dwReadBytes
/= sizeof(WCHAR
);
413 if (Encoding
== ENCODING_UTF16BE
)
415 for (i
= 0; i
< dwReadBytes
; i
++)
417 /* Equivalent to RtlUshortByteSwap: reverse high/low bytes */
418 pWChars
[i
] = MAKEWORD(HIBYTE(pWChars
[i
]), LOBYTE(pWChars
[i
]));
421 // else if (Encoding == ENCODING_UTF16LE), we are good, nothing to do.
424 /* Return the number of characters (dwReadBytes is converted) */
425 if (pdwReadChars
) *pdwReadChars
= dwReadBytes
;
430 // INT CommandMore(LPTSTR cmd, LPTSTR param)
431 int wmain(int argc
, WCHAR
* argv
[])
434 CON_SCREEN Screen
= {StdOut
};
435 CON_PAGER Pager
= {&Screen
, 0};
439 BOOL bRet
, bContinue
;
444 #define FileCacheBufferSize 4096
445 PVOID FileCacheBuffer
= NULL
;
446 PWCHAR StringBuffer
= NULL
;
447 DWORD StringBufferLength
= 0;
448 DWORD dwReadBytes
, dwReadChars
;
450 TCHAR szFullPath
[MAX_PATH
];
452 hStdIn
= GetStdHandle(STD_INPUT_HANDLE
);
453 hStdOut
= GetStdHandle(STD_OUTPUT_HANDLE
);
455 /* Initialize the Console Standard Streams */
456 ConStreamInit(StdIn
, GetStdHandle(STD_INPUT_HANDLE
) , UTF8Text
, INVALID_CP
);
457 ConStreamInit(StdOut
, GetStdHandle(STD_OUTPUT_HANDLE
), UTF8Text
, INVALID_CP
);
458 ConStreamInit(StdErr
, GetStdHandle(STD_ERROR_HANDLE
) , UTF8Text
, INVALID_CP
);
461 * Bad usage (too much options) or we use the /? switch.
462 * Display help for the MORE command.
464 if (argc
> 1 && wcscmp(argv
[1], L
"/?") == 0)
466 ConResPuts(StdOut
, IDS_USAGE
);
470 // FIXME: Parse all the remaining parameters.
471 // Then the file list can be found at the very end.
472 // FIXME2: Use the PARSER api that can be found in EVENTCREATE.
474 // NOTE: We might try to duplicate the ConOut for read access... ?
475 hKeyboard
= CreateFileW(L
"CONIN$", GENERIC_READ
|GENERIC_WRITE
,
476 FILE_SHARE_READ
|FILE_SHARE_WRITE
, NULL
,
477 OPEN_EXISTING
, 0, NULL
);
478 FlushConsoleInputBuffer(hKeyboard
);
479 ConStreamSetOSHandle(StdIn
, hKeyboard
);
482 FileCacheBuffer
= HeapAlloc(GetProcessHeap(), 0, FileCacheBufferSize
);
483 if (!FileCacheBuffer
)
485 ConPuts(StdErr
, L
"Error: no memory\n");
486 CloseHandle(hKeyboard
);
490 /* Special case where we run 'MORE' without any argument: we use STDIN */
494 * Assign STDIN handle to hFile so that the page prompt function will
495 * know the data comes from STDIN, and will take different actions.
499 /* Update the statistics for PagePrompt */
501 dwSumReadBytes
= dwSumReadChars
= 0;
503 /* We suppose we read text from the file */
505 /* For STDIN we always suppose we are in ANSI mode */
506 // SetFilePointer(hFile, 0, NULL, FILE_BEGIN);
507 Encoding
= ENCODING_ANSI
; // ENCODING_UTF8;
509 bContinue
= ConPutsPaging(&Pager
, PagePrompt
, TRUE
, L
"");
515 bRet
= FileGetString(hFile
, Encoding
,
516 FileCacheBuffer
, FileCacheBufferSize
,
517 &StringBuffer
, &StringBufferLength
,
518 &dwReadBytes
, &dwReadChars
);
519 if (!bRet
|| dwReadBytes
== 0 || dwReadChars
== 0)
521 /* We failed at reading the file, bail out */
525 /* Update the statistics for PagePrompt */
526 dwSumReadBytes
+= dwReadBytes
;
527 dwSumReadChars
+= dwReadChars
;
529 bContinue
= ConWritePaging(&Pager
, PagePrompt
, FALSE
,
530 StringBuffer
, dwReadChars
);
531 /* If we Ctrl-C/Ctrl-Break, stop everything */
535 while (bRet
&& dwReadBytes
> 0);
539 /* We have files: read them and output them to STDOUT */
540 for (i
= 1; i
< argc
; i
++)
542 GetFullPathNameW(argv
[i
], ARRAYSIZE(szFullPath
), szFullPath
, NULL
);
543 hFile
= CreateFileW(szFullPath
,
548 0, // FILE_ATTRIBUTE_NORMAL,
550 if (hFile
== INVALID_HANDLE_VALUE
)
552 ConResPrintf(StdErr
, IDS_FILE_ACCESS
, szFullPath
);
556 /* We currently do not support files too big */
557 dwFileSize
= GetFileSize(hFile
, NULL
);
558 if (dwFileSize
== INVALID_FILE_SIZE
)
560 ConPuts(StdErr
, L
"ERROR: Invalid file size!\n");
565 /* We suppose we read text from the file */
567 /* Check whether the file is UNICODE and retrieve its encoding */
568 SetFilePointer(hFile
, 0, NULL
, FILE_BEGIN
);
569 bRet
= ReadFile(hFile
, FileCacheBuffer
, FileCacheBufferSize
, &dwReadBytes
, NULL
);
570 IsDataUnicode(FileCacheBuffer
, dwReadBytes
, &Encoding
, &SkipBytes
);
571 SetFilePointer(hFile
, SkipBytes
, NULL
, FILE_BEGIN
);
573 /* Update the statistics for PagePrompt */
574 dwSumReadBytes
= dwSumReadChars
= 0;
576 bContinue
= ConPutsPaging(&Pager
, PagePrompt
, TRUE
, L
"");
585 bRet
= FileGetString(hFile
, Encoding
,
586 FileCacheBuffer
, FileCacheBufferSize
,
587 &StringBuffer
, &StringBufferLength
,
588 &dwReadBytes
, &dwReadChars
);
589 if (!bRet
|| dwReadBytes
== 0 || dwReadChars
== 0)
592 * We failed at reading the file, bail out and
593 * continue with the other files.
598 /* Update the statistics for PagePrompt */
599 dwSumReadBytes
+= dwReadBytes
;
600 dwSumReadChars
+= dwReadChars
;
602 if ((Encoding
== ENCODING_UTF16LE
) || (Encoding
== ENCODING_UTF16BE
))
604 bContinue
= ConWritePaging(&Pager
, PagePrompt
, FALSE
,
605 FileCacheBuffer
, dwReadChars
);
609 bContinue
= ConWritePaging(&Pager
, PagePrompt
, FALSE
,
610 StringBuffer
, dwReadChars
);
612 /* If we Ctrl-C/Ctrl-Break, stop everything */
619 while (bRet
&& dwReadBytes
> 0);
625 if (StringBuffer
) HeapFree(GetProcessHeap(), 0, StringBuffer
);
626 HeapFree(GetProcessHeap(), 0, FileCacheBuffer
);
627 CloseHandle(hKeyboard
);