From e85664a3d8dfb955b56b2f5d27a8ccd85db9454b Mon Sep 17 00:00:00 2001 From: Katayama Hirofumi MZ Date: Sun, 18 Aug 2019 22:46:56 +0900 Subject: [PATCH] [NOTEPAD] Encoding detection (#1852) CORE-15548 In notepad, if there is no BOM in the input file, then judge the text encoding. --- base/applications/notepad/text.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/base/applications/notepad/text.c b/base/applications/notepad/text.c index 6e26a7ab474..d22cf4817ee 100644 --- a/base/applications/notepad/text.c +++ b/base/applications/notepad/text.c @@ -4,6 +4,7 @@ * Copyright 1998,99 Marcel Baur * Copyright 2002 Sylvain Petreolle * Copyright 2002 Andriy Palamarchuk + * Copyright 2019 Katayama Hirofumi MZ * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -47,6 +48,32 @@ static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, D return TRUE; } +ENCODING AnalyzeEncoding(const char *pBytes, DWORD dwSize) +{ + INT flags = IS_TEXT_UNICODE_STATISTICS; + + if (dwSize <= 1) + return ENCODING_ANSI; + + if (IsTextUnicode(pBytes, dwSize, &flags)) + { + return ENCODING_UTF16LE; + } + + if ((flags & IS_TEXT_UNICODE_REVERSE_MASK) && !(flags & IS_TEXT_UNICODE_ILLEGAL_CHARS)) + { + return ENCODING_UTF16BE; + } + + /* is it UTF-8? */ + if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pBytes, dwSize, NULL, 0)) + { + return ENCODING_UTF8; + } + + return ENCODING_ANSI; +} + BOOL ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int *piEoln) { @@ -98,6 +125,10 @@ ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, encFile = ENCODING_UTF8; dwPos += 3; } + else + { + encFile = AnalyzeEncoding((const char *)pBytes, dwSize); + } switch(encFile) { -- 2.17.1