[USP10] Sync with Wine Staging 2.9. CORE-13362
[reactos.git] / reactos / dll / win32 / usp10 / bidi.c
1 /*
2 * Uniscribe BiDirectional handling
3 *
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 *
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
25 *
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
27 *
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
42 */
43
44 #include <windef.h>
45
46 #include <wine/list.h>
47
48 #include "usp10_internal.h"
49
50 extern const unsigned short bidi_bracket_table[] DECLSPEC_HIDDEN;
51
52 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
53
54 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
55 #define MAX_DEPTH 125
56
57 /* HELPER FUNCTIONS AND DECLARATIONS */
58
59 /*------------------------------------------------------------------------
60 Bidirectional Character Types
61
62 as defined by the Unicode Bidirectional Algorithm Table 3-7.
63
64 Note:
65
66 The list of bidirectional character types here is not grouped the
67 same way as the table 3-7, since the numberic values for the types
68 are chosen to keep the state and action tables compact.
69 ------------------------------------------------------------------------*/
70 enum directions
71 {
72 /* input types */
73 /* ON MUST be zero, code relies on ON = NI = 0 */
74 ON = 0, /* Other Neutral */
75 L, /* Left Letter */
76 R, /* Right Letter */
77 AN, /* Arabic Number */
78 EN, /* European Number */
79 AL, /* Arabic Letter (Right-to-left) */
80 NSM, /* Non-spacing Mark */
81 CS, /* Common Separator */
82 ES, /* European Separator */
83 ET, /* European Terminator (post/prefix e.g. $ and %) */
84
85 /* resolved types */
86 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
87
88 /* input types, */
89 S, /* Segment Separator (TAB) // used only in L1 */
90 WS, /* White space // used only in L1 */
91 B, /* Paragraph Separator (aka as PS) */
92
93 /* types for explicit controls */
94 RLO, /* these are used only in X1-X9 */
95 RLE,
96 LRO,
97 LRE,
98 PDF,
99
100 LRI, /* Isolate formatting characters new with 6.3 */
101 RLI,
102 FSI,
103 PDI,
104
105 /* resolved types, also resolved directions */
106 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
107 };
108
109 static const char debug_type[][4] =
110 {
111 "ON", /* Other Neutral */
112 "L", /* Left Letter */
113 "R", /* Right Letter */
114 "AN", /* Arabic Number */
115 "EN", /* European Number */
116 "AL", /* Arabic Letter (Right-to-left) */
117 "NSM", /* Non-spacing Mark */
118 "CS", /* Common Separator */
119 "ES", /* European Separator */
120 "ET", /* European Terminator (post/prefix e.g. $ and %) */
121 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
122 "S", /* Segment Separator (TAB) // used only in L1 */
123 "WS", /* White space // used only in L1 */
124 "B", /* Paragraph Separator (aka as PS) */
125 "RLO", /* these are used only in X1-X9 */
126 "RLE",
127 "LRO",
128 "LRE",
129 "PDF",
130 "LRI", /* Isolate formatting characters new with 6.3 */
131 "RLI",
132 "FSI",
133 "PDI",
134 };
135
136 /* HELPER FUNCTIONS */
137
138 static inline void dump_types(const char* header, WORD *types, int start, int end)
139 {
140 int i, len = 0;
141 TRACE("%s:",header);
142 for (i = start; i < end && len < 200; i++)
143 {
144 TRACE(" %s",debug_type[types[i]]);
145 len += strlen(debug_type[types[i]])+1;
146 }
147 if (i != end)
148 TRACE("...");
149 TRACE("\n");
150 }
151
152 /* Convert the libwine information to the direction enum */
153 static void classify(const WCHAR *string, WORD *chartype, DWORD count, const SCRIPT_CONTROL *c)
154 {
155 static const enum directions dir_map[16] =
156 {
157 L, /* unassigned defaults to L */
158 L,
159 R,
160 EN,
161 ES,
162 ET,
163 AN,
164 CS,
165 B,
166 S,
167 WS,
168 ON,
169 AL,
170 NSM,
171 BN,
172 PDF /* also LRE, LRO, RLE, RLO */
173 };
174
175 unsigned i;
176
177 for (i = 0; i < count; ++i)
178 {
179 chartype[i] = dir_map[get_char_typeW(string[i]) >> 12];
180 switch (chartype[i])
181 {
182 case ES:
183 if (!c->fLegacyBidiClass) break;
184 switch (string[i])
185 {
186 case '-':
187 case '+': chartype[i] = NI; break;
188 case '/': chartype[i] = CS; break;
189 }
190 break;
191 case PDF:
192 switch (string[i])
193 {
194 case 0x202A: chartype[i] = LRE; break;
195 case 0x202B: chartype[i] = RLE; break;
196 case 0x202C: chartype[i] = PDF; break;
197 case 0x202D: chartype[i] = LRO; break;
198 case 0x202E: chartype[i] = RLO; break;
199 case 0x2066: chartype[i] = LRI; break;
200 case 0x2067: chartype[i] = RLI; break;
201 case 0x2068: chartype[i] = FSI; break;
202 case 0x2069: chartype[i] = PDI; break;
203 }
204 break;
205 }
206 }
207 }
208
209 /* RESOLVE EXPLICIT */
210
211 static WORD GreaterEven(int i)
212 {
213 return odd(i) ? i + 1 : i + 2;
214 }
215
216 static WORD GreaterOdd(int i)
217 {
218 return odd(i) ? i + 2 : i + 1;
219 }
220
221 static WORD EmbeddingDirection(int level)
222 {
223 return odd(level) ? R : L;
224 }
225
226 /*------------------------------------------------------------------------
227 Function: resolveExplicit
228
229 Recursively resolves explicit embedding levels and overrides.
230 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
231
232 Input: Base embedding level and direction
233 Character count
234
235 Output: Array of embedding levels
236
237 In/Out: Array of direction classes
238
239
240 Note: The function uses two simple counters to keep track of
241 matching explicit codes and PDF. Use the default argument for
242 the outermost call. The nesting counter counts the recursion
243 depth and not the embedding level.
244 ------------------------------------------------------------------------*/
245 typedef struct tagStackItem {
246 int level;
247 int override;
248 BOOL isolate;
249 } StackItem;
250
251 #define push_stack(l,o,i) \
252 do { stack_top--; \
253 stack[stack_top].level = l; \
254 stack[stack_top].override = o; \
255 stack[stack_top].isolate = i;} while(0)
256
257 #define pop_stack() do { stack_top++; } while(0)
258
259 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
260
261 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, WORD *poutOverrides, int count, BOOL initialOverride)
262 {
263 /* X1 */
264 int overflow_isolate_count = 0;
265 int overflow_embedding_count = 0;
266 int valid_isolate_count = 0;
267 int i;
268
269 StackItem stack[MAX_DEPTH+2];
270 int stack_top = MAX_DEPTH+1;
271
272 stack[stack_top].level = level;
273 stack[stack_top].override = NI;
274 stack[stack_top].isolate = FALSE;
275
276 if (initialOverride)
277 {
278 if (odd(level))
279 push_stack(level, R, FALSE);
280 else
281 push_stack(level, L, FALSE);
282 }
283
284 for (i = 0; i < count; i++)
285 {
286 poutOverrides[i] = stack[stack_top].override;
287
288 /* X2 */
289 if (pclass[i] == RLE)
290 {
291 int least_odd = GreaterOdd(stack[stack_top].level);
292 poutLevel[i] = stack[stack_top].level;
293 if (valid_level(least_odd))
294 push_stack(least_odd, NI, FALSE);
295 else if (overflow_isolate_count == 0)
296 overflow_embedding_count++;
297 }
298 /* X3 */
299 else if (pclass[i] == LRE)
300 {
301 int least_even = GreaterEven(stack[stack_top].level);
302 poutLevel[i] = stack[stack_top].level;
303 if (valid_level(least_even))
304 push_stack(least_even, NI, FALSE);
305 else if (overflow_isolate_count == 0)
306 overflow_embedding_count++;
307 }
308 /* X4 */
309 else if (pclass[i] == RLO)
310 {
311 int least_odd = GreaterOdd(stack[stack_top].level);
312 poutLevel[i] = stack[stack_top].level;
313 if (valid_level(least_odd))
314 push_stack(least_odd, R, FALSE);
315 else if (overflow_isolate_count == 0)
316 overflow_embedding_count++;
317 }
318 /* X5 */
319 else if (pclass[i] == LRO)
320 {
321 int least_even = GreaterEven(stack[stack_top].level);
322 poutLevel[i] = stack[stack_top].level;
323 if (valid_level(least_even))
324 push_stack(least_even, L, FALSE);
325 else if (overflow_isolate_count == 0)
326 overflow_embedding_count++;
327 }
328 /* X5a */
329 else if (pclass[i] == RLI)
330 {
331 int least_odd = GreaterOdd(stack[stack_top].level);
332 poutLevel[i] = stack[stack_top].level;
333 if (valid_level(least_odd))
334 {
335 valid_isolate_count++;
336 push_stack(least_odd, NI, TRUE);
337 }
338 else
339 overflow_isolate_count++;
340 }
341 /* X5b */
342 else if (pclass[i] == LRI)
343 {
344 int least_even = GreaterEven(stack[stack_top].level);
345 poutLevel[i] = stack[stack_top].level;
346 if (valid_level(least_even))
347 {
348 valid_isolate_count++;
349 push_stack(least_even, NI, TRUE);
350 }
351 else
352 overflow_isolate_count++;
353 }
354 /* X5c */
355 else if (pclass[i] == FSI)
356 {
357 int j;
358 int new_level = 0;
359 int skipping = 0;
360 poutLevel[i] = stack[stack_top].level;
361 for (j = i+1; j < count; j++)
362 {
363 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI)
364 {
365 skipping++;
366 continue;
367 }
368 else if (pclass[j] == PDI)
369 {
370 if (skipping)
371 skipping --;
372 else
373 break;
374 continue;
375 }
376
377 if (skipping) continue;
378
379 if (pclass[j] == L)
380 {
381 new_level = 0;
382 break;
383 }
384 else if (pclass[j] == R || pclass[j] == AL)
385 {
386 new_level = 1;
387 break;
388 }
389 }
390 if (odd(new_level))
391 {
392 int least_odd = GreaterOdd(stack[stack_top].level);
393 if (valid_level(least_odd))
394 {
395 valid_isolate_count++;
396 push_stack(least_odd, NI, TRUE);
397 }
398 else
399 overflow_isolate_count++;
400 }
401 else
402 {
403 int least_even = GreaterEven(stack[stack_top].level);
404 if (valid_level(least_even))
405 {
406 valid_isolate_count++;
407 push_stack(least_even, NI, TRUE);
408 }
409 else
410 overflow_isolate_count++;
411 }
412 }
413 /* X6 */
414 else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF)
415 {
416 poutLevel[i] = stack[stack_top].level;
417 if (stack[stack_top].override != NI)
418 pclass[i] = stack[stack_top].override;
419 }
420 /* X6a */
421 else if (pclass[i] == PDI)
422 {
423 if (overflow_isolate_count) overflow_isolate_count--;
424 else if (!valid_isolate_count) {/* do nothing */}
425 else
426 {
427 overflow_embedding_count = 0;
428 while (!stack[stack_top].isolate) pop_stack();
429 pop_stack();
430 valid_isolate_count --;
431 }
432 poutLevel[i] = stack[stack_top].level;
433 }
434 /* X7 */
435 else if (pclass[i] == PDF)
436 {
437 poutLevel[i] = stack[stack_top].level;
438 if (overflow_isolate_count) {/* do nothing */}
439 else if (overflow_embedding_count) overflow_embedding_count--;
440 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
441 pop_stack();
442 }
443 /* X8: Nothing */
444 }
445 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
446 for (i = 0; i < count ; i++)
447 if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF)
448 pclass[i] = BN;
449 }
450
451 static inline int previousValidChar(const WORD *pcls, int index, int back_fence)
452 {
453 if (index == -1 || index == back_fence) return index;
454 index --;
455 while (index > back_fence && pcls[index] == BN) index --;
456 return index;
457 }
458
459 static inline int nextValidChar(const WORD *pcls, int index, int front_fence)
460 {
461 if (index == front_fence) return index;
462 index ++;
463 while (index < front_fence && pcls[index] == BN) index ++;
464 return index;
465 }
466
467 typedef struct tagRun
468 {
469 int start;
470 int end;
471 WORD e;
472 } Run;
473
474 typedef struct tagRunChar
475 {
476 WCHAR ch;
477 WORD *pcls;
478 } RunChar;
479
480 typedef struct tagIsolatedRun
481 {
482 struct list entry;
483 int length;
484 WORD sos;
485 WORD eos;
486 WORD e;
487
488 RunChar item[1];
489 } IsolatedRun;
490
491 static inline int iso_nextValidChar(IsolatedRun *iso_run, int index)
492 {
493 if (index >= (iso_run->length-1)) return -1;
494 index ++;
495 while (index < iso_run->length && *iso_run->item[index].pcls == BN) index++;
496 if (index == iso_run->length) return -1;
497 return index;
498 }
499
500 static inline int iso_previousValidChar(IsolatedRun *iso_run, int index)
501 {
502
503 if (index <= 0) return -1;
504 index --;
505 while (index > -1 && *iso_run->item[index].pcls == BN) index--;
506 return index;
507 }
508
509 static inline void iso_dump_types(const char* header, IsolatedRun *iso_run)
510 {
511 int i, len = 0;
512 TRACE("%s:",header);
513 TRACE("[ ");
514 for (i = 0; i < iso_run->length && len < 200; i++)
515 {
516 TRACE(" %s",debug_type[*iso_run->item[i].pcls]);
517 len += strlen(debug_type[*iso_run->item[i].pcls])+1;
518 }
519 if (i != iso_run->length)
520 TRACE("...");
521 TRACE(" ]\n");
522 }
523
524 /*------------------------------------------------------------------------
525 Function: resolveWeak
526
527 Resolves the directionality of numeric and other weak character types
528
529 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
530
531 Input: Array of embedding levels
532 Character count
533
534 In/Out: Array of directional classes
535
536 Note: On input only these directional classes are expected
537 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
538 ------------------------------------------------------------------------*/
539
540 static void resolveWeak(IsolatedRun * iso_run)
541 {
542 int i;
543
544 /* W1 */
545 for (i=0; i < iso_run->length; i++)
546 {
547 if (*iso_run->item[i].pcls == NSM)
548 {
549 int j = iso_previousValidChar(iso_run, i);
550 if (j == -1)
551 *iso_run->item[i].pcls = iso_run->sos;
552 else if (*iso_run->item[j].pcls >= LRI)
553 *iso_run->item[i].pcls = ON;
554 else
555 *iso_run->item[i].pcls = *iso_run->item[j].pcls;
556 }
557 }
558
559 /* W2 */
560 for (i = 0; i < iso_run->length; i++)
561 {
562 if (*iso_run->item[i].pcls == EN)
563 {
564 int j = iso_previousValidChar(iso_run, i);
565 while (j > -1)
566 {
567 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L || *iso_run->item[j].pcls == AL)
568 {
569 if (*iso_run->item[j].pcls == AL)
570 *iso_run->item[i].pcls = AN;
571 break;
572 }
573 j = iso_previousValidChar(iso_run, j);
574 }
575 }
576 }
577
578 /* W3 */
579 for (i = 0; i < iso_run->length; i++)
580 {
581 if (*iso_run->item[i].pcls == AL)
582 *iso_run->item[i].pcls = R;
583 }
584
585 /* W4 */
586 for (i = 0; i < iso_run->length; i++)
587 {
588 if (*iso_run->item[i].pcls == ES)
589 {
590 int b = iso_previousValidChar(iso_run, i);
591 int f = iso_nextValidChar(iso_run, i);
592
593 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
594 *iso_run->item[i].pcls = EN;
595 }
596 else if (*iso_run->item[i].pcls == CS)
597 {
598 int b = iso_previousValidChar(iso_run, i);
599 int f = iso_nextValidChar(iso_run, i);
600
601 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
602 *iso_run->item[i].pcls = EN;
603 else if (b > -1 && f > -1 && *iso_run->item[b].pcls == AN && *iso_run->item[f].pcls == AN)
604 *iso_run->item[i].pcls = AN;
605 }
606 }
607
608 /* W5 */
609 for (i = 0; i < iso_run->length; i++)
610 {
611 if (*iso_run->item[i].pcls == ET)
612 {
613 int j;
614 for (j = i-1 ; j > -1; j--)
615 {
616 if (*iso_run->item[j].pcls == BN) continue;
617 if (*iso_run->item[j].pcls == ET) continue;
618 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
619 else break;
620 }
621 if (*iso_run->item[i].pcls == ET)
622 {
623 for (j = i+1; j < iso_run->length; j++)
624 {
625 if (*iso_run->item[j].pcls == BN) continue;
626 if (*iso_run->item[j].pcls == ET) continue;
627 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
628 else break;
629 }
630 }
631 }
632 }
633
634 /* W6 */
635 for (i = 0; i < iso_run->length; i++)
636 {
637 if (*iso_run->item[i].pcls == ET || *iso_run->item[i].pcls == ES || *iso_run->item[i].pcls == CS || *iso_run->item[i].pcls == ON)
638 {
639 int b = i-1;
640 int f = i+1;
641 if (b > -1 && *iso_run->item[b].pcls == BN)
642 *iso_run->item[b].pcls = ON;
643 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
644 *iso_run->item[f].pcls = ON;
645
646 *iso_run->item[i].pcls = ON;
647 }
648 }
649
650 /* W7 */
651 for (i = 0; i < iso_run->length; i++)
652 {
653 if (*iso_run->item[i].pcls == EN)
654 {
655 int j;
656 for (j = iso_previousValidChar(iso_run, i); j > -1; j = iso_previousValidChar(iso_run, j))
657 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L)
658 {
659 if (*iso_run->item[j].pcls == L)
660 *iso_run->item[i].pcls = L;
661 break;
662 }
663 if (iso_run->sos == L && j == -1)
664 *iso_run->item[i].pcls = L;
665 }
666 }
667 }
668
669 typedef struct tagBracketPair
670 {
671 int start;
672 int end;
673 } BracketPair;
674
675 static int compr(const void *a, const void* b)
676 {
677 return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
678 }
679
680 static BracketPair *computeBracketPairs(IsolatedRun *iso_run)
681 {
682 WCHAR *open_stack;
683 int *stack_index;
684 int stack_top = iso_run->length;
685 BracketPair *out = NULL;
686 int pair_count = 0;
687 int i;
688
689 open_stack = heap_alloc(iso_run->length * sizeof(*open_stack));
690 stack_index = heap_alloc(iso_run->length * sizeof(*stack_index));
691
692 for (i = 0; i < iso_run->length; i++)
693 {
694 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
695 if (ubv)
696 {
697 if (!out)
698 {
699 out = heap_alloc(sizeof(*out));
700 out[0].start = -1;
701 }
702
703 if ((ubv >> 8) == 0)
704 {
705 stack_top --;
706 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
707 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
708 if (open_stack[stack_top] == 0x232A)
709 open_stack[stack_top] = 0x3009;
710 stack_index[stack_top] = i;
711 }
712 else if ((ubv >> 8) == 1)
713 {
714 int j;
715 if (stack_top == iso_run->length) continue;
716 for (j = stack_top; j < iso_run->length; j++)
717 {
718 WCHAR c = iso_run->item[i].ch;
719 if (c == 0x232A) c = 0x3009;
720 if (c == open_stack[j])
721 {
722 out[pair_count].start = stack_index[j];
723 out[pair_count].end = i;
724 pair_count++;
725 out = HeapReAlloc(GetProcessHeap(), 0, out, sizeof(BracketPair) * (pair_count+1));
726 out[pair_count].start = -1;
727 stack_top = j+1;
728 break;
729 }
730 }
731 }
732 }
733 }
734 if (pair_count == 0)
735 {
736 heap_free(out);
737 out = NULL;
738 }
739 else if (pair_count > 1)
740 qsort(out, pair_count, sizeof(BracketPair), compr);
741
742 heap_free(open_stack);
743 heap_free(stack_index);
744 return out;
745 }
746
747 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
748
749 /*------------------------------------------------------------------------
750 Function: resolveNeutrals
751
752 Resolves the directionality of neutral character types.
753
754 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
755
756 Input: Array of embedding levels
757 Character count
758 Baselevel
759
760 In/Out: Array of directional classes
761
762 Note: On input only these directional classes are expected
763 R, L, NI, AN, EN and BN
764
765 W8 resolves a number of ENs to L
766 ------------------------------------------------------------------------*/
767 static void resolveNeutrals(IsolatedRun *iso_run)
768 {
769 int i;
770 BracketPair *pairs = NULL;
771
772 /* Translate isolates into NI */
773 for (i = 0; i < iso_run->length; i++)
774 {
775 if (*iso_run->item[i].pcls >= LRI)
776 *iso_run->item[i].pcls = NI;
777
778 switch(*iso_run->item[i].pcls)
779 {
780 case B:
781 case S:
782 case WS: *iso_run->item[i].pcls = NI;
783 }
784
785 ASSERT(*iso_run->item[i].pcls < 5 || *iso_run->item[i].pcls == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */
786 }
787
788 /* N0: Skipping bracketed pairs for now */
789 pairs = computeBracketPairs(iso_run);
790 if (pairs)
791 {
792 BracketPair *p = &pairs[0];
793 int i = 0;
794 while (p->start >= 0)
795 {
796 int j;
797 int e = EmbeddingDirection(iso_run->e);
798 int o = EmbeddingDirection(iso_run->e+1);
799 BOOL flag_o = FALSE;
800 TRACE("Bracket Pair [%i - %i]\n",p->start, p->end);
801
802 /* N0.b */
803 for (j = p->start+1; j < p->end; j++)
804 {
805 if (N0_TYPE(*iso_run->item[j].pcls) == e)
806 {
807 *iso_run->item[p->start].pcls = e;
808 *iso_run->item[p->end].pcls = e;
809 break;
810 }
811 else if (N0_TYPE(*iso_run->item[j].pcls) == o)
812 flag_o = TRUE;
813 }
814 /* N0.c */
815 if (j == p->end && flag_o)
816 {
817 for (j = p->start; j >= 0; j--)
818 {
819 if (N0_TYPE(*iso_run->item[j].pcls) == o)
820 {
821 *iso_run->item[p->start].pcls = o;
822 *iso_run->item[p->end].pcls = o;
823 break;
824 }
825 else if (N0_TYPE(*iso_run->item[j].pcls) == e)
826 {
827 *iso_run->item[p->start].pcls = e;
828 *iso_run->item[p->end].pcls = e;
829 break;
830 }
831 }
832 if ( j < 0 )
833 {
834 *iso_run->item[p->start].pcls = iso_run->sos;
835 *iso_run->item[p->end].pcls = iso_run->sos;
836 }
837 }
838
839 i++;
840 p = &pairs[i];
841 }
842 heap_free(pairs);
843 }
844
845 /* N1 */
846 for (i = 0; i < iso_run->length; i++)
847 {
848 WORD l,r;
849
850 if (*iso_run->item[i].pcls == NI)
851 {
852 int j;
853 int b = iso_previousValidChar(iso_run, i);
854
855 if (b == -1)
856 {
857 l = iso_run->sos;
858 b = 0;
859 }
860 else
861 {
862 if (*iso_run->item[b].pcls == R || *iso_run->item[b].pcls == AN || *iso_run->item[b].pcls == EN)
863 l = R;
864 else if (*iso_run->item[b].pcls == L)
865 l = L;
866 else /* No string type */
867 continue;
868 }
869 j = iso_nextValidChar(iso_run, i);
870 while (j > -1 && *iso_run->item[j].pcls == NI) j = iso_nextValidChar(iso_run, j);
871
872 if (j == -1)
873 {
874 r = iso_run->eos;
875 j = iso_run->length;
876 }
877 else if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == AN || *iso_run->item[j].pcls == EN)
878 r = R;
879 else if (*iso_run->item[j].pcls == L)
880 r = L;
881 else /* No string type */
882 continue;
883
884 if (r == l)
885 {
886 for (b = i; b < j && b < iso_run->length; b++)
887 *iso_run->item[b].pcls = r;
888 }
889 }
890 }
891
892 /* N2 */
893 for (i = 0; i < iso_run->length; i++)
894 {
895 if (*iso_run->item[i].pcls == NI)
896 {
897 int b = i-1;
898 int f = i+1;
899 *iso_run->item[i].pcls = EmbeddingDirection(iso_run->e);
900 if (b > -1 && *iso_run->item[b].pcls == BN)
901 *iso_run->item[b].pcls = EmbeddingDirection(iso_run->e);
902 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
903 *iso_run->item[f].pcls = EmbeddingDirection(iso_run->e);
904 }
905 }
906 }
907
908 /*------------------------------------------------------------------------
909 Function: resolveImplicit
910
911 Recursively resolves implicit embedding levels.
912 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
913
914 Input: Array of direction classes
915 Character count
916 Base level
917
918 In/Out: Array of embedding levels
919
920 Note: levels may exceed 15 on output.
921 Accepted subset of direction classes
922 R, L, AN, EN
923 ------------------------------------------------------------------------*/
924 static void resolveImplicit(const WORD * pcls, WORD *plevel, int sos, int eos)
925 {
926 int i;
927
928 /* I1/2 */
929 for (i = sos; i <= eos; i++)
930 {
931 if (pcls[i] == BN)
932 continue;
933
934 ASSERT(pcls[i] > 0); /* "No Neutrals allowed to survive here." */
935 ASSERT(pcls[i] < 5); /* "Out of range." */
936
937 if (odd(plevel[i]) && (pcls[i] == L || pcls[i] == EN || pcls [i] == AN))
938 plevel[i]++;
939 else if (!odd(plevel[i]) && pcls[i] == R)
940 plevel[i]++;
941 else if (!odd(plevel[i]) && (pcls[i] == EN || pcls [i] == AN))
942 plevel[i]+=2;
943 }
944 }
945
946 static void resolveResolved(unsigned baselevel, const WORD * pcls, WORD *plevel, int sos, int eos)
947 {
948 int i;
949
950 /* L1 */
951 for (i = sos; i <= eos; i++)
952 {
953 if (pcls[i] == B || pcls[i] == S)
954 {
955 int j = i -1;
956 while (i > sos && j >= sos &&
957 (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
958 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
959 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
960 plevel[j--] = baselevel;
961 plevel[i] = baselevel;
962 }
963 else if (pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO || pcls[i] == RLO ||
964 pcls[i] == PDF || pcls[i] == BN)
965 {
966 plevel[i] = i ? plevel[i - 1] : baselevel;
967 }
968 if (i == eos &&
969 (pcls[i] == WS || pcls[i] == FSI || pcls[i] == LRI || pcls[i] == RLI ||
970 pcls[i] == PDI || pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO ||
971 pcls[i] == RLO || pcls[i] == PDF || pcls[i] == BN ))
972 {
973 int j = i;
974 while (j >= sos && (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
975 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
976 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
977 plevel[j--] = baselevel;
978 }
979 }
980 }
981
982 static void computeIsolatingRunsSet(unsigned baselevel, WORD *pcls, const WORD *pLevel,
983 const WCHAR *string, unsigned int uCount, struct list *set)
984 {
985 int run_start, run_end, i;
986 int run_count = 0;
987 Run *runs;
988 IsolatedRun *current_isolated;
989
990 if (!(runs = heap_alloc(uCount * sizeof(*runs))))
991 return;
992
993 list_init(set);
994
995 /* Build Runs */
996 run_start = 0;
997 while (run_start < uCount)
998 {
999 run_end = nextValidChar(pcls, run_start, uCount);
1000 while (run_end < uCount && pLevel[run_end] == pLevel[run_start]) run_end = nextValidChar(pcls, run_end, uCount);
1001 run_end --;
1002 runs[run_count].start = run_start;
1003 runs[run_count].end = run_end;
1004 runs[run_count].e = pLevel[run_start];
1005 run_start = nextValidChar(pcls, run_end, uCount);
1006 run_count++;
1007 }
1008
1009 /* Build Isolating Runs */
1010 i = 0;
1011 while (i < run_count)
1012 {
1013 int k = i;
1014 if (runs[k].start >= 0)
1015 {
1016 int type_fence, real_end;
1017 int j;
1018
1019 if (!(current_isolated = heap_alloc(FIELD_OFFSET(IsolatedRun, item[uCount]))))
1020 break;
1021
1022 run_start = runs[k].start;
1023 current_isolated->e = runs[k].e;
1024 current_isolated->length = (runs[k].end - runs[k].start)+1;
1025
1026 for (j = 0; j < current_isolated->length; j++)
1027 {
1028 current_isolated->item[j].pcls = &pcls[runs[k].start+j];
1029 current_isolated->item[j].ch = string[runs[k].start + j];
1030 }
1031
1032 run_end = runs[k].end;
1033
1034 TRACE("{ [%i -- %i]",run_start, run_end);
1035
1036 if (pcls[run_end] == BN)
1037 run_end = previousValidChar(pcls, run_end, runs[k].start);
1038
1039 while (run_end < uCount && (pcls[run_end] == RLI || pcls[run_end] == LRI || pcls[run_end] == FSI))
1040 {
1041 j = k+1;
1042 search:
1043 while (j < run_count && pcls[runs[j].start] != PDI) j++;
1044 if (j < run_count && runs[i].e != runs[j].e)
1045 {
1046 j++;
1047 goto search;
1048 }
1049
1050 if (j != run_count)
1051 {
1052 int m;
1053 int l = current_isolated->length;
1054
1055 current_isolated->length += (runs[j].end - runs[j].start)+1;
1056 for (m = 0; l < current_isolated->length; l++, m++)
1057 {
1058 current_isolated->item[l].pcls = &pcls[runs[j].start+m];
1059 current_isolated->item[l].ch = string[runs[j].start + m];
1060 }
1061
1062 TRACE("[%i -- %i]",runs[j].start, runs[j].end);
1063
1064 run_end = runs[j].end;
1065 if (pcls[run_end] == BN)
1066 run_end = previousValidChar(pcls, run_end, runs[i].start);
1067 runs[j].start = -1;
1068 k = j;
1069 }
1070 else
1071 {
1072 run_end = uCount;
1073 break;
1074 }
1075 }
1076
1077 type_fence = previousValidChar(pcls, run_start, -1);
1078
1079 if (type_fence == -1)
1080 current_isolated->sos = (baselevel > pLevel[run_start])?baselevel:pLevel[run_start];
1081 else
1082 current_isolated->sos = (pLevel[type_fence] > pLevel[run_start])?pLevel[type_fence]:pLevel[run_start];
1083
1084 current_isolated->sos = EmbeddingDirection(current_isolated->sos);
1085
1086 if (run_end == uCount)
1087 current_isolated->eos = current_isolated->sos;
1088 else
1089 {
1090 /* eos could be an BN */
1091 if ( pcls[run_end] == BN )
1092 {
1093 real_end = previousValidChar(pcls, run_end, run_start-1);
1094 if (real_end < run_start)
1095 real_end = run_start;
1096 }
1097 else
1098 real_end = run_end;
1099
1100 type_fence = nextValidChar(pcls, run_end, uCount);
1101 if (type_fence == uCount)
1102 current_isolated->eos = (baselevel > pLevel[real_end])?baselevel:pLevel[real_end];
1103 else
1104 current_isolated->eos = (pLevel[type_fence] > pLevel[real_end])?pLevel[type_fence]:pLevel[real_end];
1105
1106 current_isolated->eos = EmbeddingDirection(current_isolated->eos);
1107 }
1108
1109 list_add_tail(set, &current_isolated->entry);
1110 TRACE(" } level %i {%s <--> %s}\n",current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1111 }
1112 i++;
1113 }
1114
1115 heap_free(runs);
1116 }
1117
1118 /*************************************************************
1119 * BIDI_DeterminLevels
1120 */
1121 BOOL BIDI_DetermineLevels(
1122 const WCHAR *lpString, /* [in] The string for which information is to be returned */
1123 unsigned int uCount, /* [in] Number of WCHARs in string. */
1124 const SCRIPT_STATE *s,
1125 const SCRIPT_CONTROL *c,
1126 WORD *lpOutLevels, /* [out] final string levels */
1127 WORD *lpOutOverrides /* [out] final string overrides */
1128 )
1129 {
1130 WORD *chartype;
1131 unsigned baselevel = 0;
1132 struct list IsolatingRuns;
1133 IsolatedRun *iso_run, *next;
1134
1135 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
1136
1137 if (!(chartype = heap_alloc(uCount * sizeof(*chartype))))
1138 {
1139 WARN("Out of memory\n");
1140 return FALSE;
1141 }
1142
1143 baselevel = s->uBidiLevel;
1144
1145 classify(lpString, chartype, uCount, c);
1146 if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount);
1147
1148 memset(lpOutOverrides, 0, sizeof(WORD) * uCount);
1149
1150 /* resolve explicit */
1151 resolveExplicit(baselevel, chartype, lpOutLevels, lpOutOverrides, uCount, s->fOverrideDirection);
1152 if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount);
1153
1154 /* X10/BD13: Computer Isolating runs */
1155 computeIsolatingRunsSet(baselevel, chartype, lpOutLevels, lpString, uCount, &IsolatingRuns);
1156
1157 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1158 {
1159 if (TRACE_ON(bidi)) iso_dump_types("Run", iso_run);
1160
1161 /* resolve weak */
1162 resolveWeak(iso_run);
1163 if (TRACE_ON(bidi)) iso_dump_types("After Weak", iso_run);
1164
1165 /* resolve neutrals */
1166 resolveNeutrals(iso_run);
1167 if (TRACE_ON(bidi)) iso_dump_types("After Neutrals", iso_run);
1168
1169 list_remove(&iso_run->entry);
1170 heap_free(iso_run);
1171 }
1172
1173 if (TRACE_ON(bidi)) dump_types("Before Implicit", chartype, 0, uCount);
1174 /* resolveImplicit */
1175 resolveImplicit(chartype, lpOutLevels, 0, uCount-1);
1176
1177 /* resolveResolvedLevels*/
1178 classify(lpString, chartype, uCount, c);
1179 resolveResolved(baselevel, chartype, lpOutLevels, 0, uCount-1);
1180
1181 heap_free(chartype);
1182 return TRUE;
1183 }
1184
1185 /* reverse cch indexes */
1186 static void reverse(int *pidx, int cch)
1187 {
1188 int temp;
1189 int ich = 0;
1190 for (; ich < --cch; ich++)
1191 {
1192 temp = pidx[ich];
1193 pidx[ich] = pidx[cch];
1194 pidx[cch] = temp;
1195 }
1196 }
1197
1198
1199 /*------------------------------------------------------------------------
1200 Functions: reorder/reorderLevel
1201
1202 Recursively reorders the display string
1203 "From the highest level down, reverse all characters at that level and
1204 higher, down to the lowest odd level"
1205
1206 Implements rule L2 of the Unicode bidi Algorithm.
1207
1208 Input: Array of embedding levels
1209 Character count
1210 Flag enabling reversal (set to false by initial caller)
1211
1212 In/Out: Text to reorder
1213
1214 Note: levels may exceed 15 resp. 61 on input.
1215
1216 Rule L3 - reorder combining marks is not implemented here
1217 Rule L4 - glyph mirroring is implemented as a display option below
1218
1219 Note: this should be applied a line at a time
1220 -------------------------------------------------------------------------*/
1221 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1222 {
1223 int ich = 0;
1224
1225 /* true as soon as first odd level encountered */
1226 fReverse = fReverse || odd(level);
1227
1228 for (; ich < cch; ich++)
1229 {
1230 if (plevel[ich] < level)
1231 {
1232 break;
1233 }
1234 else if (plevel[ich] > level)
1235 {
1236 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
1237 cch - ich, fReverse) - 1;
1238 }
1239 }
1240 if (fReverse)
1241 {
1242 reverse(pIndexs, ich);
1243 }
1244 return ich;
1245 }
1246
1247 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1248 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1249 {
1250 int ich = 0;
1251 int newlevel = -1;
1252
1253 /* true as soon as first odd level encountered */
1254 fReverse = fReverse || odd(level);
1255
1256 for (; ich < cch; ich++)
1257 {
1258 if (plevel[ich] < level)
1259 break;
1260 else if (plevel[ich] > level)
1261 newlevel = ich;
1262 }
1263 if (fReverse)
1264 {
1265 reverse(pIndexs, ich);
1266 }
1267
1268 if (newlevel >= 0)
1269 {
1270 ich = 0;
1271 for (; ich < cch; ich++)
1272 if (plevel[ich] < level)
1273 break;
1274 else if (plevel[ich] > level)
1275 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
1276 cch - ich, fReverse) - 1;
1277 }
1278
1279 return ich;
1280 }
1281
1282 BOOL BIDI_GetStrengths(const WCHAR *string, unsigned int count, const SCRIPT_CONTROL *c, WORD *strength)
1283 {
1284 unsigned int i;
1285
1286 classify(string, strength, count, c);
1287 for (i = 0; i < count; i++)
1288 {
1289 switch (strength[i])
1290 {
1291 case L:
1292 case LRE:
1293 case LRO:
1294 case R:
1295 case AL:
1296 case RLE:
1297 case RLO:
1298 strength[i] = BIDI_STRONG;
1299 break;
1300 case PDF:
1301 case EN:
1302 case ES:
1303 case ET:
1304 case AN:
1305 case CS:
1306 case BN:
1307 strength[i] = BIDI_WEAK;
1308 break;
1309 case B:
1310 case S:
1311 case WS:
1312 case ON:
1313 default: /* Neutrals and NSM */
1314 strength[i] = BIDI_NEUTRAL;
1315 }
1316 }
1317 return TRUE;
1318 }