Lars Martin Hambro <lars_martin4 AT hotmail DOT com>
[reactos.git] / base / applications / notepad / text.c
1 /*
2 * Notepad (text.c)
3 *
4 * Copyright 1998,99 Marcel Baur <mbaur@g26.ethz.ch>
5 * Copyright 2002 Sylvain Petreolle <spetreolle@yahoo.fr>
6 * Copyright 2002 Andriy Palamarchuk
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22
23 #include <notepad.h>
24
25 static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, DWORD dwAppendLen)
26 {
27 LPWSTR pszNewText;
28
29 if (dwAppendLen > 0)
30 {
31 if (*ppszText)
32 {
33 pszNewText = (LPWSTR) HeapReAlloc(GetProcessHeap(), 0, *ppszText, (*pdwTextLen + dwAppendLen) * sizeof(WCHAR));
34 }
35 else
36 {
37 pszNewText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, dwAppendLen * sizeof(WCHAR));
38 }
39
40 if (!pszNewText)
41 return FALSE;
42
43 memcpy(pszNewText + *pdwTextLen, pszAppendText, dwAppendLen * sizeof(WCHAR));
44 *ppszText = pszNewText;
45 *pdwTextLen += dwAppendLen;
46 }
47 return TRUE;
48 }
49
50 BOOL ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, int *piEncoding, int *piEoln)
51 {
52 DWORD dwSize;
53 LPBYTE pBytes = NULL;
54 LPCWSTR pszText;
55 LPWSTR pszAllocText = NULL;
56 DWORD dwPos, i;
57 DWORD dwCharCount;
58 BOOL bSuccess = FALSE;
59 BYTE b = 0;
60 int iEncoding = ENCODING_ANSI;
61 int iCodePage;
62 WCHAR szCrlf[2] = { '\r', '\n' };
63 DWORD adwEolnCount[3] = { 0, 0, 0 };
64
65 *ppszText = NULL;
66 *pdwTextLen = 0;
67
68 dwSize = GetFileSize(hFile, NULL);
69 if (dwSize == INVALID_FILE_SIZE)
70 goto done;
71
72 pBytes = HeapAlloc(GetProcessHeap(), 0, dwSize + 2);
73 if (!pBytes)
74 goto done;
75
76 if (!ReadFile(hFile, pBytes, dwSize, &dwSize, NULL))
77 goto done;
78 dwPos = 0;
79
80 /* Make sure that there is a NUL character at the end, in any encoding */
81 pBytes[dwSize + 0] = '\0';
82 pBytes[dwSize + 1] = '\0';
83
84 /* Look for Byte Order Marks */
85 if ((dwSize >= 2) && (pBytes[0] == 0xFF) && (pBytes[1] == 0xFE))
86 {
87 iEncoding = ENCODING_UNICODE;
88 dwPos += 2;
89 }
90 else if ((dwSize >= 2) && (pBytes[0] == 0xFE) && (pBytes[1] == 0xFF))
91 {
92 iEncoding = ENCODING_UNICODE_BE;
93 dwPos += 2;
94 }
95 else if ((dwSize >= 3) && (pBytes[0] == 0xEF) && (pBytes[1] == 0xBB) && (pBytes[2] == 0xBF))
96 {
97 iEncoding = ENCODING_UTF8;
98 dwPos += 3;
99 }
100
101 switch(iEncoding)
102 {
103 case ENCODING_UNICODE_BE:
104 for (i = dwPos; i < dwSize-1; i += 2)
105 {
106 b = pBytes[i+0];
107 pBytes[i+0] = pBytes[i+1];
108 pBytes[i+1] = b;
109 }
110 /* fall through */
111
112 case ENCODING_UNICODE:
113 pszText = (LPCWSTR) &pBytes[dwPos];
114 dwCharCount = (dwSize - dwPos) / sizeof(WCHAR);
115 break;
116
117 case ENCODING_ANSI:
118 case ENCODING_UTF8:
119 if (iEncoding == ENCODING_ANSI)
120 iCodePage = CP_ACP;
121 else if (iEncoding == ENCODING_UTF8)
122 iCodePage = CP_UTF8;
123 else
124 goto done;
125
126 if ((dwSize - dwPos) > 0)
127 {
128 dwCharCount = MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, NULL, 0);
129 if (dwCharCount == 0)
130 goto done;
131 }
132 else
133 {
134 /* special case for files with no characters (other than BOMs) */
135 dwCharCount = 0;
136 }
137
138 pszAllocText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, (dwCharCount + 1) * sizeof(WCHAR));
139 if (!pszAllocText)
140 goto done;
141
142 if ((dwSize - dwPos) > 0)
143 {
144 if (!MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, pszAllocText, dwCharCount))
145 goto done;
146 }
147
148 pszAllocText[dwCharCount] = '\0';
149 pszText = pszAllocText;
150 break;
151 }
152
153 dwPos = 0;
154 for (i = 0; i < dwCharCount; i++)
155 {
156 switch(pszText[i])
157 {
158 case '\r':
159 if ((i < dwCharCount-1) && (pszText[i+1] == '\n'))
160 {
161 i++;
162 adwEolnCount[EOLN_CRLF]++;
163 break;
164 }
165 /* fall through */
166
167 case '\n':
168 if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos))
169 return FALSE;
170 if (!Append(ppszText, pdwTextLen, szCrlf, sizeof(szCrlf) / sizeof(szCrlf[0])))
171 return FALSE;
172 dwPos = i + 1;
173
174 if (pszText[i] == '\r')
175 adwEolnCount[EOLN_CR]++;
176 else
177 adwEolnCount[EOLN_LF]++;
178 break;
179 }
180 }
181
182 if (!*ppszText && (pszText == pszAllocText))
183 {
184 /* special case; don't need to reallocate */
185 *ppszText = pszAllocText;
186 *pdwTextLen = dwCharCount;
187 pszAllocText = NULL;
188 }
189 else
190 {
191 /* append last remaining text */
192 if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos + 1))
193 return FALSE;
194 }
195
196 /* chose which eoln to use */
197 *piEoln = EOLN_CRLF;
198 if (adwEolnCount[EOLN_LF] > adwEolnCount[*piEoln])
199 *piEoln = EOLN_LF;
200 if (adwEolnCount[EOLN_CR] > adwEolnCount[*piEoln])
201 *piEoln = EOLN_CR;
202 *piEncoding = iEncoding;
203
204 bSuccess = TRUE;
205
206 done:
207 if (pBytes)
208 HeapFree(GetProcessHeap(), 0, pBytes);
209 if (pszAllocText)
210 HeapFree(GetProcessHeap(), 0, pszAllocText);
211
212 if (!bSuccess && *ppszText)
213 {
214 HeapFree(GetProcessHeap(), 0, *ppszText);
215 *ppszText = NULL;
216 *pdwTextLen = 0;
217 }
218 return bSuccess;
219 }
220
221 static BOOL WriteEncodedText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, int iEncoding)
222 {
223 LPBYTE pBytes = NULL;
224 LPBYTE pAllocBuffer = NULL;
225 DWORD dwPos = 0;
226 DWORD dwByteCount;
227 BYTE buffer[1024];
228 UINT iCodePage;
229 DWORD dwDummy, i;
230 BOOL bSuccess = FALSE;
231 int iBufferSize, iRequiredBytes;
232 BYTE b;
233
234 while(dwPos < dwTextLen)
235 {
236 switch(iEncoding)
237 {
238 case ENCODING_UNICODE:
239 pBytes = (LPBYTE) &pszText[dwPos];
240 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR);
241 dwPos = dwTextLen;
242 break;
243
244 case ENCODING_UNICODE_BE:
245 dwByteCount = (dwTextLen - dwPos) * sizeof(WCHAR);
246 if (dwByteCount > sizeof(buffer))
247 dwByteCount = sizeof(buffer);
248
249 memcpy(buffer, &pszText[dwPos], dwByteCount);
250 for (i = 0; i < dwByteCount; i += 2)
251 {
252 b = buffer[i+0];
253 buffer[i+0] = buffer[i+1];
254 buffer[i+1] = b;
255 }
256 pBytes = (LPBYTE) &buffer[dwPos];
257 dwPos += dwByteCount / sizeof(WCHAR);
258 break;
259
260 case ENCODING_ANSI:
261 case ENCODING_UTF8:
262 if (iEncoding == ENCODING_ANSI)
263 iCodePage = CP_ACP;
264 else if (iEncoding == ENCODING_UTF8)
265 iCodePage = CP_UTF8;
266 else
267 goto done;
268
269 iRequiredBytes = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, NULL, 0, NULL, NULL);
270 if (iRequiredBytes <= 0)
271 {
272 goto done;
273 }
274 else if (iRequiredBytes < sizeof(buffer))
275 {
276 pBytes = buffer;
277 iBufferSize = sizeof(buffer);
278 }
279 else
280 {
281 pAllocBuffer = (LPBYTE) HeapAlloc(GetProcessHeap(), 0, iRequiredBytes);
282 if (!pAllocBuffer)
283 return FALSE;
284 pBytes = pAllocBuffer;
285 iBufferSize = iRequiredBytes;
286 }
287
288 dwByteCount = WideCharToMultiByte(iCodePage, 0, &pszText[dwPos], dwTextLen - dwPos, (LPSTR) pBytes, iBufferSize, NULL, NULL);
289 if (!dwByteCount)
290 goto done;
291
292 dwPos = dwTextLen;
293 break;
294
295 default:
296 goto done;
297 }
298
299 if (!WriteFile(hFile, pBytes, dwByteCount, &dwDummy, NULL))
300 goto done;
301
302 /* free the buffer, if we have allocated one */
303 if (pAllocBuffer)
304 {
305 HeapFree(GetProcessHeap(), 0, pAllocBuffer);
306 pAllocBuffer = NULL;
307 }
308 }
309 bSuccess = TRUE;
310
311 done:
312 if (pAllocBuffer)
313 HeapFree(GetProcessHeap(), 0, pAllocBuffer);
314 return bSuccess;
315 }
316
317 BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, int iEncoding, int iEoln)
318 {
319 WCHAR wcBom;
320 WCHAR wcEoln;
321 BYTE bEoln;
322 LPBYTE pbEoln = NULL;
323 DWORD dwDummy, dwPos, dwNext, dwEolnSize = 0;
324
325 /* Write the proper byte order marks if not ANSI */
326 if (iEncoding != ENCODING_ANSI)
327 {
328 wcBom = 0xFEFF;
329 if (!WriteEncodedText(hFile, &wcBom, 1, iEncoding))
330 return FALSE;
331 }
332
333 /* Identify the proper eoln to use */
334 switch(iEoln)
335 {
336 case EOLN_LF:
337 bEoln = '\n';
338 pbEoln = &bEoln;
339 dwEolnSize = sizeof(bEoln);
340 break;
341 case EOLN_CR:
342 bEoln = '\r';
343 pbEoln = &bEoln;
344 dwEolnSize = sizeof(bEoln);
345 break;
346 }
347
348 /* If we have an eoln, make sure it is of the proper encoding */
349 if (pbEoln && ((iEncoding == ENCODING_UNICODE) || (iEncoding == ENCODING_UNICODE_BE)))
350 {
351 wcEoln = bEoln;
352 pbEoln = (LPBYTE) &wcEoln;
353 dwEolnSize = sizeof(wcEoln);
354 }
355
356 dwPos = 0;
357
358 while(dwPos < dwTextLen)
359 {
360 if (pbEoln)
361 {
362 /* Find the next eoln */
363 dwNext = dwPos;
364 while(dwNext < dwTextLen-1)
365 {
366 if ((pszText[dwNext] == '\r') && (pszText[dwNext+1] == '\n'))
367 break;
368 dwNext++;
369 }
370 }
371 else
372 {
373 /* No eoln conversion is necessary */
374 dwNext = dwTextLen;
375 }
376
377 if (!WriteEncodedText(hFile, &pszText[dwPos], dwNext - dwPos, iEncoding))
378 return FALSE;
379 dwPos = dwNext;
380
381 /* are we at an eoln? */
382 while ((dwPos < dwTextLen-1) &&
383 ((pszText[dwPos] == '\r') && (pszText[dwPos+1] == '\n')))
384 {
385 if (!WriteFile(hFile, pbEoln, dwEolnSize, &dwDummy, NULL))
386 return FALSE;
387 dwPos += 2;
388 }
389 }
390 while(dwPos < dwTextLen);
391
392 return TRUE;
393 }
394