Sync with trunk (r48545)
[reactos.git] / base / applications / notepad / text.c
1 /*
2 * Notepad (text.c)
3 *
4 * Copyright 1998,99 Marcel Baur <mbaur@g26.ethz.ch>
5 * Copyright 2002 Sylvain Petreolle <spetreolle@yahoo.fr>
6 * Copyright 2002 Andriy Palamarchuk
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include <notepad.h>
24
25 static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, DWORD dwAppendLen)
26 {
27 LPWSTR pszNewText;
28
29 if (dwAppendLen > 0)
30 {
31 if (*ppszText)
32 {
33 pszNewText = (LPWSTR) HeapReAlloc(GetProcessHeap(), 0, *ppszText, (*pdwTextLen + dwAppendLen) * sizeof(WCHAR));
34 }
35 else
36 {
37 pszNewText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, dwAppendLen * sizeof(WCHAR));
38 }
39
40 if (!pszNewText)
41 return FALSE;
42
43 memcpy(pszNewText + *pdwTextLen, pszAppendText, dwAppendLen * sizeof(WCHAR));
44 *ppszText = pszNewText;
45 *pdwTextLen += dwAppendLen;
46 }
47 return TRUE;
48 }
49
50 BOOL ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, int *piEncoding, int *piEoln)
51 {
52 DWORD dwSize;
53 LPBYTE pBytes = NULL;
54 LPWSTR pszText;
55 LPWSTR pszAllocText = NULL;
56 DWORD dwPos, i;
57 DWORD dwCharCount;
58 BOOL bSuccess = FALSE;
59 BYTE b = 0;
60 int iEncoding = ENCODING_ANSI;
61 int iCodePage = 0;
62 WCHAR szCrlf[2] = { '\r', '\n' };
63 DWORD adwEolnCount[3] = { 0, 0, 0 };
64
65 *ppszText = NULL;
66 *pdwTextLen = 0;
67
68 dwSize = GetFileSize(hFile, NULL);
69 if (dwSize == INVALID_FILE_SIZE)
70 goto done;
71
72 pBytes = HeapAlloc(GetProcessHeap(), 0, dwSize + 2);
73 if (!pBytes)
74 goto done;
75
76 if (!ReadFile(hFile, pBytes, dwSize, &dwSize, NULL))
77 goto done;
78 dwPos = 0;
79
80 /* Make sure that there is a NUL character at the end, in any encoding */
81 pBytes[dwSize + 0] = '\0';
82 pBytes[dwSize + 1] = '\0';
83
84 /* Look for Byte Order Marks */
85 if ((dwSize >= 2) && (pBytes[0] == 0xFF) && (pBytes[1] == 0xFE))
86 {
87 iEncoding = ENCODING_UNICODE;
88 dwPos += 2;
89 }
90 else if ((dwSize >= 2) && (pBytes[0] == 0xFE) && (pBytes[1] == 0xFF))
91 {
92 iEncoding = ENCODING_UNICODE_BE;
93 dwPos += 2;
94 }
95 else if ((dwSize >= 3) && (pBytes[0] == 0xEF) && (pBytes[1] == 0xBB) && (pBytes[2] == 0xBF))
96 {
97 iEncoding = ENCODING_UTF8;
98 dwPos += 3;
99 }
100
101 switch(iEncoding)
102 {
103 case ENCODING_UNICODE_BE:
104 for (i = dwPos; i < dwSize-1; i += 2)
105 {
106 b = pBytes[i+0];
107 pBytes[i+0] = pBytes[i+1];
108 pBytes[i+1] = b;
109 }
110 /* fall through */
111
112 case ENCODING_UNICODE:
113 pszText = (LPWSTR) &pBytes[dwPos];
114 dwCharCount = (dwSize - dwPos) / sizeof(WCHAR);
115 break;
116
117 case ENCODING_ANSI:
118 case ENCODING_UTF8:
119 if (iEncoding == ENCODING_ANSI)
120 iCodePage = CP_ACP;
121 else if (iEncoding == ENCODING_UTF8)
122 iCodePage = CP_UTF8;
123
124 if ((dwSize - dwPos) > 0)
125 {
126 dwCharCount = MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, NULL, 0);
127 if (dwCharCount == 0)
128 goto done;
129 }
130 else
131 {
132 /* special case for files with no characters (other than BOMs) */
133 dwCharCount = 0;
134 }
135
136 pszAllocText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, (dwCharCount + 1) * sizeof(WCHAR));
137 if (!pszAllocText)
138 goto done;
139
140 if ((dwSize - dwPos) > 0)
141 {
142 if (!MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, pszAllocText, dwCharCount))
143 goto done;
144 }
145
146 pszAllocText[dwCharCount] = '\0';
147 pszText = pszAllocText;
148 break;
149 }
150
151 dwPos = 0;
152 for (i = 0; i < dwCharCount; i++)
153 {
154 switch(pszText[i])
155 {
156 case '\r':
157 if ((i < dwCharCount-1) && (pszText[i+1] == '\n'))
158 {
159 i++;
160 adwEolnCount[EOLN_CRLF]++;
161 break;
162 }
163 /* fall through */
164
165 case '\n':
166 if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos))
167 return FALSE;
168 if (!Append(ppszText, pdwTextLen, szCrlf, sizeof(szCrlf) / sizeof(szCrlf[0])))
169 return FALSE;
170 dwPos = i + 1;
171
172 if (pszText[i] == '\r')
173 adwEolnCount[EOLN_CR]++;
174 else
175 adwEolnCount[EOLN_LF]++;
176 break;
177
178 case '\0':
179 pszText[i] = ' ';
180 break;
181 }
182 }
183
184 if (!*ppszText && (pszText == pszAllocText))
185 {
186 /* special case; don't need to reallocate */
187 *ppszText = pszAllocText;
188 *pdwTextLen = dwCharCount;
189 pszAllocText = NULL;
190 }
191 else
192 {
193 /* append last remaining text */
194 if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos + 1))
195 return FALSE;
196 }
197
198 /* chose which eoln to use */
199 *piEoln = EOLN_CRLF;
200 if (adwEolnCount[EOLN_LF] > adwEolnCount[*piEoln])
201 *piEoln = EOLN_LF;
202 if (adwEolnCount[EOLN_CR] > adwEolnCount[*piEoln])
203 *piEoln = EOLN_CR;
204 *piEncoding = iEncoding;
205
206 bSuccess = TRUE;
207
208 done:
209 if (pBytes)
210 HeapFree(GetProcessHeap(), 0, pBytes);
211 if (pszAllocText)
212 HeapFree(GetProcessHeap(), 0, pszAllocText);
213
214 if (!bSuccess && *ppszText)
215 {
216 HeapFree(GetProcessHeap(), 0, *ppszText);
217 *ppszText = NULL;
218 *pdwTextLen = 0;
219 }
220 return bSuccess;
221 }
222
223 static BOOL WriteEncodedText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, int iEncoding)
224 {
225 LPBYTE pBytes = NULL;
226 LPBYTE pAllocBuffer = NULL;
227 DWORD dwPos = 0;
228 DWORD dwByteCount;
229 BYTE buffer[1024];
230 UINT iCodePage = 0;
231 DWORD dwDummy, i;
232 BOOL bSuccess = FALSE;
233 int iBufferSize, iRequiredBytes;
234 BYTE b;
235
236 while(dwPos < dwTextLen)
237 {
238 switch(iEncoding)
239 {
240 case ENCODING_UNICODE:
241 pBytes = (LPBYTE) &pszText[dwPos];
242 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR);
243 dwPos = dwTextLen;
244 break;
245
246 case ENCODING_UNICODE_BE:
247 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR);
248 if (dwByteCount > sizeof(buffer))
249 dwByteCount = sizeof(buffer);
250
251 memcpy(buffer, &pszText[dwPos], dwByteCount);
252 for (i = 0; i < dwByteCount; i += 2)
253 {
254 b = buffer[i+0];
255 buffer[i+0] = buffer[i+1];
256 buffer[i+1] = b;
257 }
258 pBytes = (LPBYTE) &buffer[dwPos];
259 dwPos += dwByteCount / sizeof(WCHAR);
260 break;
261
262 case ENCODING_ANSI:
263 case ENCODING_UTF8:
264 if (iEncoding == ENCODING_ANSI)
265 iCodePage = CP_ACP;
266 else if (iEncoding == ENCODING_UTF8)
267 iCodePage = CP_UTF8;
268
269 iRequiredBytes = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, NULL, 0, NULL, NULL);
270 if (iRequiredBytes <= 0)
271 {
272 goto done;
273 }
274 else if (iRequiredBytes < sizeof(buffer))
275 {
276 pBytes = buffer;
277 iBufferSize = sizeof(buffer);
278 }
279 else
280 {
281 pAllocBuffer = (LPBYTE) HeapAlloc(GetProcessHeap(), 0, iRequiredBytes);
282 if (!pAllocBuffer)
283 return FALSE;
284 pBytes = pAllocBuffer;
285 iBufferSize = iRequiredBytes;
286 }
287
288 dwByteCount = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, (LPSTR) pBytes, iBufferSize, NULL, NULL);
289 if (!dwByteCount)
290 goto done;
291
292 dwPos = dwTextLen;
293 break;
294
295 default:
296 goto done;
297 }
298
299 if (!WriteFile(hFile, pBytes, dwByteCount, &dwDummy, NULL))
300 goto done;
301
302 /* free the buffer, if we have allocated one */
303 if (pAllocBuffer)
304 {
305 HeapFree(GetProcessHeap(), 0, pAllocBuffer);
306 pAllocBuffer = NULL;
307 }
308 }
309 bSuccess = TRUE;
310
311 done:
312 if (pAllocBuffer)
313 HeapFree(GetProcessHeap(), 0, pAllocBuffer);
314 return bSuccess;
315 }
316
317 BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, int iEncoding, int iEoln)
318 {
319 WCHAR wcBom;
320 LPCWSTR pszLF = L"\n";
321 DWORD dwPos, dwNext;
322
323 /* Write the proper byte order marks if not ANSI */
324 if (iEncoding != ENCODING_ANSI)
325 {
326 wcBom = 0xFEFF;
327 if (!WriteEncodedText(hFile, &wcBom, 1, iEncoding))
328 return FALSE;
329 }
330
331 dwPos = 0;
332
333 /* pszText eoln are always \r\n */
334
335 do
336 {
337 /* Find the next eoln */
338 dwNext = dwPos;
339 while(dwNext < dwTextLen)
340 {
341 if (pszText[dwNext] == '\r' && pszText[dwNext + 1] == '\n')
342 break;
343 dwNext++;
344 }
345
346 if (dwNext != dwTextLen)
347 {
348 switch (iEoln)
349 {
350 case EOLN_LF:
351 /* Write text (without eoln) */
352 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, iEncoding))
353 return FALSE;
354 /* Write eoln */
355 if (!WriteEncodedText(hFile, pszLF, 1, iEncoding))
356 return FALSE;
357 break;
358 case EOLN_CR:
359 /* Write text (including \r as eoln) */
360 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 1, iEncoding))
361 return FALSE;
362 break;
363 case EOLN_CRLF:
364 /* Write text (including \r\n as eoln) */
365 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 2, iEncoding))
366 return FALSE;
367 break;
368 default:
369 return FALSE;
370 }
371 }
372 else
373 {
374 /* Write text (without eoln, since this is the end of the file) */
375 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, iEncoding))
376 return FALSE;
377 }
378
379 /* Skip \r\n */
380 dwPos = dwNext + 2;
381 }
382 while (dwPos < dwTextLen);
383
384 return TRUE;
385 }
386