7a5d7382c59d2b96fb77f97c183f28206dc2c17a
[reactos.git] / base / applications / cmdutils / fc / text.h
1 /*
2 * PROJECT: ReactOS FC Command
3 * LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later)
4 * PURPOSE: Comparing text files
5 * COPYRIGHT: Copyright 2021 Katayama Hirofumi MZ (katayama.hirofumi.mz@gmail.com)
6 */
7 #include "fc.h"
8
9 #define IS_SPACE(ch) ((ch) == TEXT(' ') || (ch) == TEXT('\t'))
10
11 #ifdef UNICODE
12 #define NODE NODE_W
13 #define PrintLine PrintLineW
14 #define TextCompare TextCompareW
15 #else
16 #define NODE NODE_A
17 #define PrintLine PrintLineA
18 #define TextCompare TextCompareA
19 #endif
20
21 static LPTSTR AllocLine(LPCTSTR pch, DWORD cch)
22 {
23 LPTSTR pszNew = malloc((cch + 1) * sizeof(TCHAR));
24 if (!pszNew)
25 return NULL;
26 memcpy(pszNew, pch, cch * sizeof(TCHAR));
27 pszNew[cch] = 0;
28 return pszNew;
29 }
30
31 static NODE *AllocNode(LPTSTR psz, DWORD lineno)
32 {
33 NODE *node;
34 if (!psz)
35 return NULL;
36 node = calloc(1, sizeof(NODE));
37 if (!node)
38 {
39 free(psz);
40 return NULL;
41 }
42 node->pszLine = psz;
43 node->lineno = lineno;
44 return node;
45 }
46
47 static __inline VOID DeleteNode(NODE *node)
48 {
49 if (node)
50 {
51 free(node->pszLine);
52 free(node->pszComp);
53 free(node);
54 }
55 }
56
57 static VOID DeleteList(struct list *list)
58 {
59 struct list *ptr;
60 NODE *node;
61 while ((ptr = list_head(list)) != NULL)
62 {
63 list_remove(ptr);
64 node = LIST_ENTRY(ptr, NODE, entry);
65 DeleteNode(node);
66 }
67 }
68
69 static __inline LPCTSTR SkipSpace(LPCTSTR pch)
70 {
71 while (IS_SPACE(*pch))
72 ++pch;
73 return pch;
74 }
75
76 static __inline LPCTSTR FindLastNonSpace(LPCTSTR pch)
77 {
78 LPCTSTR pchLast = NULL;
79 while (*pch)
80 {
81 if (!IS_SPACE(*pch))
82 pchLast = pch;
83 ++pch;
84 }
85 return pchLast;
86 }
87
88 static VOID DeleteDuplicateSpaces(LPTSTR psz)
89 {
90 LPTSTR pch0, pch1;
91 for (pch0 = pch1 = psz; *pch0; ++pch0)
92 {
93 *pch1++ = *pch0;
94 if (IS_SPACE(*pch0))
95 {
96 do
97 {
98 ++pch0;
99 } while (IS_SPACE(*pch0));
100 --pch0;
101 }
102 }
103 *pch1 = 0;
104 }
105
106 static LPTSTR CompressSpace(LPCTSTR line)
107 {
108 LPTSTR pszNew;
109 LPCTSTR pchLast;
110
111 line = SkipSpace(line);
112 pchLast = FindLastNonSpace(line);
113 if (pchLast == NULL)
114 return AllocLine(NULL, 0);
115
116 pszNew = AllocLine(line, (DWORD)(pchLast - line) + 1);
117 if (!pszNew)
118 return NULL;
119
120 DeleteDuplicateSpaces(pszNew);
121 return pszNew;
122 }
123
124 #define TAB_WIDTH 8
125
126 static INT ExpandTabLength(LPCTSTR line)
127 {
128 LPCTSTR pch;
129 INT cch = 0;
130 for (pch = line; *pch; ++pch)
131 {
132 if (*pch == TEXT('\t'))
133 cch += TAB_WIDTH - (cch % TAB_WIDTH);
134 else
135 ++cch;
136 }
137 return cch;
138 }
139
140 static LPTSTR ExpandTab(LPCTSTR line)
141 {
142 INT spaces, cch = ExpandTabLength(line), ich;
143 LPTSTR pszNew = malloc((cch + 1) * sizeof(TCHAR));
144 LPCTSTR pch;
145 if (!pszNew)
146 return NULL;
147 ich = 0;
148 for (pch = line; *pch; ++pch)
149 {
150 if (*pch == TEXT('\t'))
151 {
152 spaces = TAB_WIDTH - (ich % TAB_WIDTH);
153 while (spaces-- > 0)
154 {
155 pszNew[ich++] = TEXT(' ');
156 }
157 }
158 else
159 {
160 pszNew[ich++] = *pch;
161 }
162 }
163 pszNew[ich] = 0;
164 return pszNew;
165 }
166
167 #define HASH_EOF 0xFFFFFFFF
168 #define HASH_MASK 0x7FFFFFFF
169
170 static DWORD GetHash(LPCTSTR psz, BOOL bIgnoreCase)
171 {
172 DWORD ret = 0xDEADFACE;
173 while (*psz)
174 {
175 ret += (bIgnoreCase ? towupper(*psz) : *psz);
176 ret <<= 2;
177 ++psz;
178 }
179 return (ret & HASH_MASK);
180 }
181
182 static NODE *AllocEOFNode(DWORD lineno)
183 {
184 NODE *node = AllocNode(AllocLine(NULL, 0), 0);
185 if (node == NULL)
186 return NULL;
187 node->pszComp = AllocLine(NULL, 0);
188 if (node->pszComp == NULL)
189 {
190 DeleteNode(node);
191 return NULL;
192 }
193 node->lineno = lineno;
194 node->hash = HASH_EOF;
195 return node;
196 }
197
198 static __inline BOOL IsEOFNode(NODE *node)
199 {
200 return !node || node->hash == HASH_EOF;
201 }
202
203 static BOOL ConvertNode(const FILECOMPARE *pFC, NODE *node)
204 {
205 if (!(pFC->dwFlags & FLAG_T))
206 {
207 LPTSTR tmp = ExpandTab(node->pszLine);
208 if (!tmp)
209 return FALSE;
210 free(node->pszLine);
211 node->pszLine = tmp;
212 if (!(pFC->dwFlags & FLAG_W))
213 node->hash = GetHash(node->pszLine, !!(pFC->dwFlags & FLAG_C));
214 }
215 if (pFC->dwFlags & FLAG_W)
216 {
217 node->pszComp = CompressSpace(node->pszLine);
218 if (!node->pszComp)
219 return FALSE;
220 node->hash = GetHash(node->pszComp, !!(pFC->dwFlags & FLAG_C));
221 }
222 return TRUE;
223 }
224
225 static FCRET CompareNode(const FILECOMPARE *pFC, const NODE *node0, const NODE *node1)
226 {
227 DWORD dwCmpFlags;
228 LPTSTR psz0, psz1;
229 INT ret;
230 if (node0->hash != node1->hash)
231 return FCRET_DIFFERENT;
232
233 psz0 = (pFC->dwFlags & FLAG_W) ? node0->pszComp : node0->pszLine;
234 psz1 = (pFC->dwFlags & FLAG_W) ? node1->pszComp : node1->pszLine;
235 dwCmpFlags = ((pFC->dwFlags & FLAG_C) ? NORM_IGNORECASE : 0);
236 ret = CompareString(LOCALE_USER_DEFAULT, dwCmpFlags, psz0, -1, psz1, -1);
237 return (ret == CSTR_EQUAL) ? FCRET_IDENTICAL : FCRET_DIFFERENT;
238 }
239
240 static BOOL FindNextLine(LPCTSTR pch, DWORD ich, DWORD cch, LPDWORD pich)
241 {
242 while (ich < cch)
243 {
244 if (pch[ich] == TEXT('\n') || pch[ich] == TEXT('\0'))
245 {
246 *pich = ich;
247 return TRUE;
248 }
249 ++ich;
250 }
251 *pich = cch;
252 return FALSE;
253 }
254
255 static FCRET
256 ParseLines(const FILECOMPARE *pFC, HANDLE *phMapping,
257 LARGE_INTEGER *pib, const LARGE_INTEGER *pcb, struct list *list)
258 {
259 DWORD lineno = 1, ich, cch, ichNext, cbView, cchNode;
260 LPTSTR psz, pszLine;
261 BOOL fLast, bCR;
262 NODE *node;
263
264 if (*phMapping == NULL)
265 return FCRET_NO_MORE_DATA;
266
267 if (pib->QuadPart >= pcb->QuadPart)
268 {
269 CloseHandle(*phMapping);
270 *phMapping = NULL;
271 return FCRET_NO_MORE_DATA;
272 }
273
274 cbView = (DWORD)min(pcb->QuadPart - pib->QuadPart, MAX_VIEW_SIZE);
275 psz = MapViewOfFile(*phMapping, FILE_MAP_READ, pib->HighPart, pib->LowPart, cbView);
276 if (!psz)
277 {
278 return OutOfMemory();
279 }
280
281 ich = 0;
282 cch = cbView / sizeof(TCHAR);
283 fLast = (pib->QuadPart + cbView >= pcb->QuadPart);
284 while (ich < cch &&
285 (FindNextLine(psz, ich, cch, &ichNext) ||
286 (ichNext == cch && (fLast || ich == 0))))
287 {
288 bCR = (ichNext > 0) && (psz[ichNext - 1] == TEXT('\r'));
289 cchNode = ichNext - ich - bCR;
290 pszLine = AllocLine(&psz[ich], cchNode);
291 node = AllocNode(pszLine, lineno++);
292 if (!node || !ConvertNode(pFC, node))
293 {
294 DeleteNode(node);
295 UnmapViewOfFile(psz);
296 return OutOfMemory();
297 }
298 list_add_tail(list, &node->entry);
299 ich = ichNext + 1;
300 }
301
302 UnmapViewOfFile(psz);
303
304 pib->QuadPart += ichNext * sizeof(WCHAR);
305
306 if (pib->QuadPart < pcb->QuadPart)
307 return FCRET_IDENTICAL;
308
309 // append EOF node
310 node = AllocEOFNode(lineno);
311 if (!node)
312 return OutOfMemory();
313 list_add_tail(list, &node->entry);
314
315 return FCRET_NO_MORE_DATA;
316 }
317
318 static VOID
319 ShowDiff(FILECOMPARE *pFC, INT i, struct list *begin, struct list *end)
320 {
321 NODE* node;
322 struct list *list = &pFC->list[i];
323 struct list *first = NULL, *last = NULL;
324 PrintCaption(pFC->file[i]);
325 if (begin && end && list_prev(list, begin))
326 begin = list_prev(list, begin);
327 while (begin != end)
328 {
329 node = LIST_ENTRY(begin, NODE, entry);
330 if (IsEOFNode(node))
331 break;
332 if (!first)
333 first = begin;
334 last = begin;
335 if (!(pFC->dwFlags & FLAG_A))
336 PrintLine(pFC, node->lineno, node->pszLine);
337 begin = list_next(list, begin);
338 }
339 if ((pFC->dwFlags & FLAG_A) && first)
340 {
341 node = LIST_ENTRY(first, NODE, entry);
342 PrintLine(pFC, node->lineno, node->pszLine);
343 first = list_next(list, first);
344 if (first != last)
345 {
346 if (list_next(list, first) == last)
347 {
348 node = LIST_ENTRY(first, NODE, entry);
349 PrintLine(pFC, node->lineno, node->pszLine);
350 }
351 else
352 {
353 PrintDots();
354 }
355 }
356 node = LIST_ENTRY(last, NODE, entry);
357 PrintLine(pFC, node->lineno, node->pszLine);
358 }
359 }
360
361 static VOID
362 SkipIdentical(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1)
363 {
364 struct list *ptr0 = *pptr0, *ptr1 = *pptr1;
365 while (ptr0 && ptr1)
366 {
367 NODE *node0 = LIST_ENTRY(ptr0, NODE, entry);
368 NODE *node1 = LIST_ENTRY(ptr1, NODE, entry);
369 if (CompareNode(pFC, node0, node1) != FCRET_IDENTICAL)
370 break;
371 ptr0 = list_next(&pFC->list[0], ptr0);
372 ptr1 = list_next(&pFC->list[1], ptr1);
373 }
374 *pptr0 = ptr0;
375 *pptr1 = ptr1;
376 }
377
378 static DWORD
379 SkipIdenticalN(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1,
380 DWORD nnnn, DWORD lineno0, DWORD lineno1)
381 {
382 struct list *ptr0 = *pptr0, *ptr1 = *pptr1;
383 DWORD count = 0;
384 while (ptr0 && ptr1)
385 {
386 NODE *node0 = LIST_ENTRY(ptr0, NODE, entry);
387 NODE *node1 = LIST_ENTRY(ptr1, NODE, entry);
388 if (node0->lineno >= lineno0)
389 break;
390 if (node1->lineno >= lineno1)
391 break;
392 if (CompareNode(pFC, node0, node1) != FCRET_IDENTICAL)
393 break;
394 ptr0 = list_next(&pFC->list[0], ptr0);
395 ptr1 = list_next(&pFC->list[1], ptr1);
396 ++count;
397 if (count >= nnnn)
398 break;
399 }
400 *pptr0 = ptr0;
401 *pptr1 = ptr1;
402 return count;
403 }
404
405 static FCRET
406 ScanDiff(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1,
407 DWORD lineno0, DWORD lineno1)
408 {
409 struct list *ptr0 = *pptr0, *ptr1 = *pptr1, *tmp0, *tmp1;
410 NODE *node0, *node1;
411 INT count;
412 while (ptr0 && ptr1)
413 {
414 node0 = LIST_ENTRY(ptr0, NODE, entry);
415 node1 = LIST_ENTRY(ptr1, NODE, entry);
416 if (node0->lineno >= lineno0)
417 return FCRET_DIFFERENT;
418 if (node1->lineno >= lineno1)
419 return FCRET_DIFFERENT;
420 tmp0 = ptr0;
421 tmp1 = ptr1;
422 count = SkipIdenticalN(pFC, &tmp0, &tmp1, pFC->nnnn, lineno0, lineno1);
423 if (count >= pFC->nnnn)
424 break;
425 if (count > 0)
426 {
427 ptr0 = tmp0;
428 ptr1 = tmp1;
429 }
430 else
431 {
432 ptr0 = list_next(&pFC->list[0], ptr0);
433 ptr1 = list_next(&pFC->list[1], ptr1);
434 }
435 }
436 *pptr0 = ptr0;
437 *pptr1 = ptr1;
438 return FCRET_IDENTICAL;
439 }
440
441 static FCRET
442 Resync(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1)
443 {
444 FCRET ret;
445 struct list *ptr0, *ptr1, *save0 = NULL, *save1 = NULL;
446 NODE *node0, *node1;
447 struct list *list0 = &pFC->list[0], *list1 = &pFC->list[1];
448 DWORD lineno0, lineno1;
449 INT penalty, i0, i1, min_penalty = MAXLONG;
450
451 node0 = LIST_ENTRY(*pptr0, NODE, entry);
452 node1 = LIST_ENTRY(*pptr1, NODE, entry);
453 lineno0 = node0->lineno + pFC->n;
454 lineno1 = node1->lineno + pFC->n;
455
456 // ``If the files that you are comparing have more than pFC->n consecutive
457 // differing lines, FC cancels the comparison,,
458 // ``If the number of matching lines in the files is less than pFC->nnnn,
459 // FC displays the matching lines as differences,,
460 for (ptr1 = list_next(list1, *pptr1), i1 = 0; ptr1; ptr1 = list_next(list1, ptr1), ++i1)
461 {
462 node1 = LIST_ENTRY(ptr1, NODE, entry);
463 if (node1->lineno >= lineno1)
464 break;
465 for (ptr0 = list_next(list0, *pptr0), i0 = 0; ptr0; ptr0 = list_next(list0, ptr0), ++i0)
466 {
467 node0 = LIST_ENTRY(ptr0, NODE, entry);
468 if (node0->lineno >= lineno0)
469 break;
470 if (CompareNode(pFC, node0, node1) == FCRET_IDENTICAL)
471 {
472 penalty = min(i0, i1) + abs(i1 - i0);
473 if (min_penalty > penalty)
474 {
475 min_penalty = penalty;
476 save0 = ptr0;
477 save1 = ptr1;
478 }
479 }
480 }
481 }
482
483 if (save0 && save1)
484 {
485 *pptr0 = save0;
486 *pptr1 = save1;
487 ret = ScanDiff(pFC, &save0, &save1, lineno0, lineno1);
488 if (save0 && save1)
489 {
490 *pptr0 = save0;
491 *pptr1 = save1;
492 }
493 return ret;
494 }
495
496 for (ptr0 = *pptr0; ptr0; ptr0 = list_next(list0, ptr0))
497 {
498 node0 = LIST_ENTRY(ptr0, NODE, entry);
499 if (node0->lineno == lineno0)
500 break;
501 }
502 for (ptr1 = *pptr1; ptr1; ptr1 = list_next(list1, ptr1))
503 {
504 node1 = LIST_ENTRY(ptr1, NODE, entry);
505 if (node1->lineno == lineno1)
506 break;
507 }
508 *pptr0 = ptr0;
509 *pptr1 = ptr1;
510 return FCRET_DIFFERENT;
511 }
512
513 static FCRET
514 Finalize(FILECOMPARE* pFC, struct list *ptr0, struct list* ptr1, BOOL fDifferent)
515 {
516 if (!ptr0 && !ptr1)
517 {
518 if (fDifferent)
519 return Different(pFC->file[0], pFC->file[1]);
520 return NoDifference();
521 }
522 else
523 {
524 ShowDiff(pFC, 0, ptr0, NULL);
525 ShowDiff(pFC, 1, ptr1, NULL);
526 PrintEndOfDiff();
527 return FCRET_DIFFERENT;
528 }
529 }
530
531 // FIXME: "cmd_apitest fc" has some failures.
532 FCRET TextCompare(FILECOMPARE *pFC, HANDLE *phMapping0, const LARGE_INTEGER *pcb0,
533 HANDLE *phMapping1, const LARGE_INTEGER *pcb1)
534 {
535 FCRET ret, ret0, ret1;
536 struct list *ptr0, *ptr1, *save0, *save1, *next0, *next1;
537 NODE* node0, * node1;
538 BOOL fDifferent = FALSE;
539 LARGE_INTEGER ib0 = { .QuadPart = 0 }, ib1 = { .QuadPart = 0 };
540 struct list *list0 = &pFC->list[0], *list1 = &pFC->list[1];
541 list_init(list0);
542 list_init(list1);
543
544 do
545 {
546 ret0 = ParseLines(pFC, phMapping0, &ib0, pcb0, list0);
547 if (ret0 == FCRET_INVALID)
548 {
549 ret = ret0;
550 goto cleanup;
551 }
552 ret1 = ParseLines(pFC, phMapping1, &ib1, pcb1, list1);
553 if (ret1 == FCRET_INVALID)
554 {
555 ret = ret1;
556 goto cleanup;
557 }
558
559 ptr0 = list_head(list0);
560 ptr1 = list_head(list1);
561 for (;;)
562 {
563 if (!ptr0 || !ptr1)
564 goto quit;
565
566 // skip identical (sync'ed)
567 SkipIdentical(pFC, &ptr0, &ptr1);
568 if (ptr0 || ptr1)
569 fDifferent = TRUE;
570 node0 = LIST_ENTRY(ptr0, NODE, entry);
571 node1 = LIST_ENTRY(ptr1, NODE, entry);
572 if (IsEOFNode(node0) || IsEOFNode(node1))
573 goto quit;
574
575 // try to resync
576 save0 = ptr0;
577 save1 = ptr1;
578 ret = Resync(pFC, &ptr0, &ptr1);
579 if (ret == FCRET_INVALID)
580 goto cleanup;
581 if (ret == FCRET_DIFFERENT)
582 {
583 // resync failed
584 ret = ResyncFailed();
585 // show the difference
586 ShowDiff(pFC, 0, save0, ptr0);
587 ShowDiff(pFC, 1, save1, ptr1);
588 PrintEndOfDiff();
589 goto cleanup;
590 }
591
592 // show the difference
593 fDifferent = TRUE;
594 next0 = ptr0 ? list_next(list0, ptr0) : ptr0;
595 next1 = ptr1 ? list_next(list1, ptr1) : ptr1;
596 ShowDiff(pFC, 0, save0, (next0 ? next0 : ptr0));
597 ShowDiff(pFC, 1, save1, (next1 ? next1 : ptr1));
598 PrintEndOfDiff();
599
600 // now resync'ed
601 }
602 } while (ret0 != FCRET_NO_MORE_DATA || ret1 != FCRET_NO_MORE_DATA);
603
604 quit:
605 ret = Finalize(pFC, ptr0, ptr1, fDifferent);
606 cleanup:
607 DeleteList(list0);
608 DeleteList(list1);
609 return ret;
610 }