summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
f052817)
CORE-15548
In notepad, if there is no BOM in the input file, then judge the text encoding.
* Copyright 1998,99 Marcel Baur <mbaur@g26.ethz.ch>
* Copyright 2002 Sylvain Petreolle <spetreolle@yahoo.fr>
* Copyright 2002 Andriy Palamarchuk
* Copyright 1998,99 Marcel Baur <mbaur@g26.ethz.ch>
* Copyright 2002 Sylvain Petreolle <spetreolle@yahoo.fr>
* Copyright 2002 Andriy Palamarchuk
+ * Copyright 2019 Katayama Hirofumi MZ <katayama.hirofumi.mz@gmail.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
+ENCODING AnalyzeEncoding(const char *pBytes, DWORD dwSize)
+{
+ INT flags = IS_TEXT_UNICODE_STATISTICS;
+
+ if (dwSize <= 1)
+ return ENCODING_ANSI;
+
+ if (IsTextUnicode(pBytes, dwSize, &flags))
+ {
+ return ENCODING_UTF16LE;
+ }
+
+ if ((flags & IS_TEXT_UNICODE_REVERSE_MASK) && !(flags & IS_TEXT_UNICODE_ILLEGAL_CHARS))
+ {
+ return ENCODING_UTF16BE;
+ }
+
+ /* is it UTF-8? */
+ if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pBytes, dwSize, NULL, 0))
+ {
+ return ENCODING_UTF8;
+ }
+
+ return ENCODING_ANSI;
+}
+
BOOL
ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int *piEoln)
{
BOOL
ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int *piEoln)
{
encFile = ENCODING_UTF8;
dwPos += 3;
}
encFile = ENCODING_UTF8;
dwPos += 3;
}
+ else
+ {
+ encFile = AnalyzeEncoding((const char *)pBytes, dwSize);
+ }