6e26a7ab474d7a29bb4bcf243e52dc9f23aee4f2
[reactos.git] / base / applications / notepad / text.c
1 /*
2 * Notepad (text.c)
3 *
4 * Copyright 1998,99 Marcel Baur <mbaur@g26.ethz.ch>
5 * Copyright 2002 Sylvain Petreolle <spetreolle@yahoo.fr>
6 * Copyright 2002 Andriy Palamarchuk
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include "notepad.h"
24
25 static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, DWORD dwAppendLen)
26 {
27 LPWSTR pszNewText;
28
29 if (dwAppendLen > 0)
30 {
31 if (*ppszText)
32 {
33 pszNewText = (LPWSTR) HeapReAlloc(GetProcessHeap(), 0, *ppszText, (*pdwTextLen + dwAppendLen) * sizeof(WCHAR));
34 }
35 else
36 {
37 pszNewText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, dwAppendLen * sizeof(WCHAR));
38 }
39
40 if (!pszNewText)
41 return FALSE;
42
43 memcpy(pszNewText + *pdwTextLen, pszAppendText, dwAppendLen * sizeof(WCHAR));
44 *ppszText = pszNewText;
45 *pdwTextLen += dwAppendLen;
46 }
47 return TRUE;
48 }
49
50 BOOL
51 ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int *piEoln)
52 {
53 DWORD dwSize;
54 LPBYTE pBytes = NULL;
55 LPWSTR pszText;
56 LPWSTR pszAllocText = NULL;
57 DWORD dwPos, i;
58 DWORD dwCharCount;
59 BOOL bSuccess = FALSE;
60 BYTE b = 0;
61 ENCODING encFile = ENCODING_ANSI;
62 int iCodePage = 0;
63 WCHAR szCrlf[2] = {'\r', '\n'};
64 DWORD adwEolnCount[3] = {0, 0, 0};
65
66 *ppszText = NULL;
67 *pdwTextLen = 0;
68
69 dwSize = GetFileSize(hFile, NULL);
70 if (dwSize == INVALID_FILE_SIZE)
71 goto done;
72
73 pBytes = HeapAlloc(GetProcessHeap(), 0, dwSize + 2);
74 if (!pBytes)
75 goto done;
76
77 if (!ReadFile(hFile, pBytes, dwSize, &dwSize, NULL))
78 goto done;
79 dwPos = 0;
80
81 /* Make sure that there is a NUL character at the end, in any encoding */
82 pBytes[dwSize + 0] = '\0';
83 pBytes[dwSize + 1] = '\0';
84
85 /* Look for Byte Order Marks */
86 if ((dwSize >= 2) && (pBytes[0] == 0xFF) && (pBytes[1] == 0xFE))
87 {
88 encFile = ENCODING_UTF16LE;
89 dwPos += 2;
90 }
91 else if ((dwSize >= 2) && (pBytes[0] == 0xFE) && (pBytes[1] == 0xFF))
92 {
93 encFile = ENCODING_UTF16BE;
94 dwPos += 2;
95 }
96 else if ((dwSize >= 3) && (pBytes[0] == 0xEF) && (pBytes[1] == 0xBB) && (pBytes[2] == 0xBF))
97 {
98 encFile = ENCODING_UTF8;
99 dwPos += 3;
100 }
101
102 switch(encFile)
103 {
104 case ENCODING_UTF16BE:
105 for (i = dwPos; i < dwSize-1; i += 2)
106 {
107 b = pBytes[i+0];
108 pBytes[i+0] = pBytes[i+1];
109 pBytes[i+1] = b;
110 }
111 /* fall through */
112
113 case ENCODING_UTF16LE:
114 pszText = (LPWSTR) &pBytes[dwPos];
115 dwCharCount = (dwSize - dwPos) / sizeof(WCHAR);
116 break;
117
118 case ENCODING_ANSI:
119 case ENCODING_UTF8:
120 if (encFile == ENCODING_ANSI)
121 iCodePage = CP_ACP;
122 else if (encFile == ENCODING_UTF8)
123 iCodePage = CP_UTF8;
124
125 if ((dwSize - dwPos) > 0)
126 {
127 dwCharCount = MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, NULL, 0);
128 if (dwCharCount == 0)
129 goto done;
130 }
131 else
132 {
133 /* special case for files with no characters (other than BOMs) */
134 dwCharCount = 0;
135 }
136
137 pszAllocText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, (dwCharCount + 1) * sizeof(WCHAR));
138 if (!pszAllocText)
139 goto done;
140
141 if ((dwSize - dwPos) > 0)
142 {
143 if (!MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, pszAllocText, dwCharCount))
144 goto done;
145 }
146
147 pszAllocText[dwCharCount] = '\0';
148 pszText = pszAllocText;
149 break;
150 DEFAULT_UNREACHABLE;
151 }
152
153 dwPos = 0;
154 for (i = 0; i < dwCharCount; i++)
155 {
156 switch(pszText[i])
157 {
158 case '\r':
159 if ((i < dwCharCount-1) && (pszText[i+1] == '\n'))
160 {
161 i++;
162 adwEolnCount[EOLN_CRLF]++;
163 break;
164 }
165 /* fall through */
166
167 case '\n':
168 if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos))
169 return FALSE;
170 if (!Append(ppszText, pdwTextLen, szCrlf, ARRAY_SIZE(szCrlf)))
171 return FALSE;
172 dwPos = i + 1;
173
174 if (pszText[i] == '\r')
175 adwEolnCount[EOLN_CR]++;
176 else
177 adwEolnCount[EOLN_LF]++;
178 break;
179
180 case '\0':
181 pszText[i] = ' ';
182 break;
183 }
184 }
185
186 if (!*ppszText && (pszText == pszAllocText))
187 {
188 /* special case; don't need to reallocate */
189 *ppszText = pszAllocText;
190 *pdwTextLen = dwCharCount;
191 pszAllocText = NULL;
192 }
193 else
194 {
195 /* append last remaining text */
196 if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos + 1))
197 return FALSE;
198 }
199
200 /* chose which eoln to use */
201 *piEoln = EOLN_CRLF;
202 if (adwEolnCount[EOLN_LF] > adwEolnCount[*piEoln])
203 *piEoln = EOLN_LF;
204 if (adwEolnCount[EOLN_CR] > adwEolnCount[*piEoln])
205 *piEoln = EOLN_CR;
206 *pencFile = encFile;
207
208 bSuccess = TRUE;
209
210 done:
211 if (pBytes)
212 HeapFree(GetProcessHeap(), 0, pBytes);
213 if (pszAllocText)
214 HeapFree(GetProcessHeap(), 0, pszAllocText);
215
216 if (!bSuccess && *ppszText)
217 {
218 HeapFree(GetProcessHeap(), 0, *ppszText);
219 *ppszText = NULL;
220 *pdwTextLen = 0;
221 }
222 return bSuccess;
223 }
224
225 static BOOL WriteEncodedText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile)
226 {
227 LPBYTE pBytes = NULL;
228 LPBYTE pAllocBuffer = NULL;
229 DWORD dwPos = 0;
230 DWORD dwByteCount;
231 BYTE buffer[1024];
232 UINT iCodePage = 0;
233 DWORD dwDummy, i;
234 BOOL bSuccess = FALSE;
235 int iBufferSize, iRequiredBytes;
236 BYTE b;
237
238 while(dwPos < dwTextLen)
239 {
240 switch(encFile)
241 {
242 case ENCODING_UTF16LE:
243 pBytes = (LPBYTE) &pszText[dwPos];
244 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR);
245 dwPos = dwTextLen;
246 break;
247
248 case ENCODING_UTF16BE:
249 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR);
250 if (dwByteCount > sizeof(buffer))
251 dwByteCount = sizeof(buffer);
252
253 memcpy(buffer, &pszText[dwPos], dwByteCount);
254 for (i = 0; i < dwByteCount; i += 2)
255 {
256 b = buffer[i+0];
257 buffer[i+0] = buffer[i+1];
258 buffer[i+1] = b;
259 }
260 pBytes = (LPBYTE) &buffer[dwPos];
261 dwPos += dwByteCount / sizeof(WCHAR);
262 break;
263
264 case ENCODING_ANSI:
265 case ENCODING_UTF8:
266 if (encFile == ENCODING_ANSI)
267 iCodePage = CP_ACP;
268 else if (encFile == ENCODING_UTF8)
269 iCodePage = CP_UTF8;
270
271 iRequiredBytes = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, NULL, 0, NULL, NULL);
272 if (iRequiredBytes <= 0)
273 {
274 goto done;
275 }
276 else if (iRequiredBytes < sizeof(buffer))
277 {
278 pBytes = buffer;
279 iBufferSize = sizeof(buffer);
280 }
281 else
282 {
283 pAllocBuffer = (LPBYTE) HeapAlloc(GetProcessHeap(), 0, iRequiredBytes);
284 if (!pAllocBuffer)
285 return FALSE;
286 pBytes = pAllocBuffer;
287 iBufferSize = iRequiredBytes;
288 }
289
290 dwByteCount = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, (LPSTR) pBytes, iBufferSize, NULL, NULL);
291 if (!dwByteCount)
292 goto done;
293
294 dwPos = dwTextLen;
295 break;
296
297 default:
298 goto done;
299 }
300
301 if (!WriteFile(hFile, pBytes, dwByteCount, &dwDummy, NULL))
302 goto done;
303
304 /* free the buffer, if we have allocated one */
305 if (pAllocBuffer)
306 {
307 HeapFree(GetProcessHeap(), 0, pAllocBuffer);
308 pAllocBuffer = NULL;
309 }
310 }
311 bSuccess = TRUE;
312
313 done:
314 if (pAllocBuffer)
315 HeapFree(GetProcessHeap(), 0, pAllocBuffer);
316 return bSuccess;
317 }
318
319 BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, int iEoln)
320 {
321 WCHAR wcBom;
322 LPCWSTR pszLF = L"\n";
323 DWORD dwPos, dwNext;
324
325 /* Write the proper byte order marks if not ANSI */
326 if (encFile != ENCODING_ANSI)
327 {
328 wcBom = 0xFEFF;
329 if (!WriteEncodedText(hFile, &wcBom, 1, encFile))
330 return FALSE;
331 }
332
333 dwPos = 0;
334
335 /* pszText eoln are always \r\n */
336
337 do
338 {
339 /* Find the next eoln */
340 dwNext = dwPos;
341 while(dwNext < dwTextLen)
342 {
343 if (pszText[dwNext] == '\r' && pszText[dwNext + 1] == '\n')
344 break;
345 dwNext++;
346 }
347
348 if (dwNext != dwTextLen)
349 {
350 switch (iEoln)
351 {
352 case EOLN_LF:
353 /* Write text (without eoln) */
354 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile))
355 return FALSE;
356 /* Write eoln */
357 if (!WriteEncodedText(hFile, pszLF, 1, encFile))
358 return FALSE;
359 break;
360 case EOLN_CR:
361 /* Write text (including \r as eoln) */
362 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 1, encFile))
363 return FALSE;
364 break;
365 case EOLN_CRLF:
366 /* Write text (including \r\n as eoln) */
367 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos + 2, encFile))
368 return FALSE;
369 break;
370 default:
371 return FALSE;
372 }
373 }
374 else
375 {
376 /* Write text (without eoln, since this is the end of the file) */
377 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, encFile))
378 return FALSE;
379 }
380
381 /* Skip \r\n */
382 dwPos = dwNext + 2;
383 }
384 while (dwPos < dwTextLen);
385
386 return TRUE;
387 }