37b09ba053ccc07642366d1196435829e510f624
[reactos.git] / reactos / dll / win32 / usp10 / bidi.c
1 /*
2 * Uniscribe BiDirectional handling
3 *
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 *
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
25 *
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
27 *
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
42 */
43
44 #include <windef.h>
45
46 #include <wine/list.h>
47
48 #include "usp10_internal.h"
49
50 extern const unsigned short bidi_bracket_table[] DECLSPEC_HIDDEN;
51
52 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
53
54 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
55 #define MAX_DEPTH 125
56
57 /* HELPER FUNCTIONS AND DECLARATIONS */
58
59 /*------------------------------------------------------------------------
60 Bidirectional Character Types
61
62 as defined by the Unicode Bidirectional Algorithm Table 3-7.
63
64 Note:
65
66 The list of bidirectional character types here is not grouped the
67 same way as the table 3-7, since the numberic values for the types
68 are chosen to keep the state and action tables compact.
69 ------------------------------------------------------------------------*/
70 enum directions
71 {
72 /* input types */
73 /* ON MUST be zero, code relies on ON = NI = 0 */
74 ON = 0, /* Other Neutral */
75 L, /* Left Letter */
76 R, /* Right Letter */
77 AN, /* Arabic Number */
78 EN, /* European Number */
79 AL, /* Arabic Letter (Right-to-left) */
80 NSM, /* Non-spacing Mark */
81 CS, /* Common Separator */
82 ES, /* European Separator */
83 ET, /* European Terminator (post/prefix e.g. $ and %) */
84
85 /* resolved types */
86 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
87
88 /* input types, */
89 S, /* Segment Separator (TAB) // used only in L1 */
90 WS, /* White space // used only in L1 */
91 B, /* Paragraph Separator (aka as PS) */
92
93 /* types for explicit controls */
94 RLO, /* these are used only in X1-X9 */
95 RLE,
96 LRO,
97 LRE,
98 PDF,
99
100 LRI, /* Isolate formatting characters new with 6.3 */
101 RLI,
102 FSI,
103 PDI,
104
105 /* resolved types, also resolved directions */
106 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
107 };
108
109 static const char debug_type[][4] =
110 {
111 "ON", /* Other Neutral */
112 "L", /* Left Letter */
113 "R", /* Right Letter */
114 "AN", /* Arabic Number */
115 "EN", /* European Number */
116 "AL", /* Arabic Letter (Right-to-left) */
117 "NSM", /* Non-spacing Mark */
118 "CS", /* Common Separator */
119 "ES", /* European Separator */
120 "ET", /* European Terminator (post/prefix e.g. $ and %) */
121 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
122 "S", /* Segment Separator (TAB) // used only in L1 */
123 "WS", /* White space // used only in L1 */
124 "B", /* Paragraph Separator (aka as PS) */
125 "RLO", /* these are used only in X1-X9 */
126 "RLE",
127 "LRO",
128 "LRE",
129 "PDF",
130 "LRI", /* Isolate formatting characters new with 6.3 */
131 "RLI",
132 "FSI",
133 "PDI",
134 };
135
136 /* HELPER FUNCTIONS */
137
138 static inline void dump_types(const char* header, WORD *types, int start, int end)
139 {
140 int i, len = 0;
141 TRACE("%s:",header);
142 for (i = start; i < end && len < 200; i++)
143 {
144 TRACE(" %s",debug_type[types[i]]);
145 len += strlen(debug_type[types[i]])+1;
146 }
147 if (i != end)
148 TRACE("...");
149 TRACE("\n");
150 }
151
152 /* Convert the libwine information to the direction enum */
153 static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIPT_CONTROL *c)
154 {
155 static const enum directions dir_map[16] =
156 {
157 L, /* unassigned defaults to L */
158 L,
159 R,
160 EN,
161 ES,
162 ET,
163 AN,
164 CS,
165 B,
166 S,
167 WS,
168 ON,
169 AL,
170 NSM,
171 BN,
172 PDF /* also LRE, LRO, RLE, RLO */
173 };
174
175 unsigned i;
176
177 for (i = 0; i < uCount; ++i)
178 {
179 chartype[i] = dir_map[get_char_typeW(lpString[i]) >> 12];
180 switch (chartype[i])
181 {
182 case ES:
183 if (!c->fLegacyBidiClass) break;
184 switch (lpString[i])
185 {
186 case '-':
187 case '+': chartype[i] = NI; break;
188 case '/': chartype[i] = CS; break;
189 }
190 break;
191 case PDF:
192 switch (lpString[i])
193 {
194 case 0x202A: chartype[i] = LRE; break;
195 case 0x202B: chartype[i] = RLE; break;
196 case 0x202C: chartype[i] = PDF; break;
197 case 0x202D: chartype[i] = LRO; break;
198 case 0x202E: chartype[i] = RLO; break;
199 case 0x2066: chartype[i] = LRI; break;
200 case 0x2067: chartype[i] = RLI; break;
201 case 0x2068: chartype[i] = FSI; break;
202 case 0x2069: chartype[i] = PDI; break;
203 }
204 break;
205 }
206 }
207 }
208
209 /* RESOLVE EXPLICIT */
210
211 static WORD GreaterEven(int i)
212 {
213 return odd(i) ? i + 1 : i + 2;
214 }
215
216 static WORD GreaterOdd(int i)
217 {
218 return odd(i) ? i + 2 : i + 1;
219 }
220
221 static WORD EmbeddingDirection(int level)
222 {
223 return odd(level) ? R : L;
224 }
225
226 /*------------------------------------------------------------------------
227 Function: resolveExplicit
228
229 Recursively resolves explicit embedding levels and overrides.
230 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
231
232 Input: Base embedding level and direction
233 Character count
234
235 Output: Array of embedding levels
236
237 In/Out: Array of direction classes
238
239
240 Note: The function uses two simple counters to keep track of
241 matching explicit codes and PDF. Use the default argument for
242 the outermost call. The nesting counter counts the recursion
243 depth and not the embedding level.
244 ------------------------------------------------------------------------*/
245 typedef struct tagStackItem {
246 int level;
247 int override;
248 BOOL isolate;
249 } StackItem;
250
251 #define push_stack(l,o,i) \
252 do { stack_top--; \
253 stack[stack_top].level = l; \
254 stack[stack_top].override = o; \
255 stack[stack_top].isolate = i;} while(0)
256
257 #define pop_stack() do { stack_top++; } while(0)
258
259 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
260
261 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, WORD *poutOverrides, int count, BOOL initialOverride)
262 {
263 /* X1 */
264 int overflow_isolate_count = 0;
265 int overflow_embedding_count = 0;
266 int valid_isolate_count = 0;
267 int i;
268
269 StackItem stack[MAX_DEPTH+2];
270 int stack_top = MAX_DEPTH+1;
271
272 stack[stack_top].level = level;
273 stack[stack_top].override = NI;
274 stack[stack_top].isolate = FALSE;
275
276 if (initialOverride)
277 {
278 if (odd(level))
279 push_stack(level, R, FALSE);
280 else
281 push_stack(level, L, FALSE);
282 }
283
284 for (i = 0; i < count; i++)
285 {
286 poutOverrides[i] = stack[stack_top].override;
287
288 /* X2 */
289 if (pclass[i] == RLE)
290 {
291 int least_odd = GreaterOdd(stack[stack_top].level);
292 poutLevel[i] = stack[stack_top].level;
293 if (valid_level(least_odd))
294 push_stack(least_odd, NI, FALSE);
295 else if (overflow_isolate_count == 0)
296 overflow_embedding_count++;
297 }
298 /* X3 */
299 else if (pclass[i] == LRE)
300 {
301 int least_even = GreaterEven(stack[stack_top].level);
302 poutLevel[i] = stack[stack_top].level;
303 if (valid_level(least_even))
304 push_stack(least_even, NI, FALSE);
305 else if (overflow_isolate_count == 0)
306 overflow_embedding_count++;
307 }
308 /* X4 */
309 else if (pclass[i] == RLO)
310 {
311 int least_odd = GreaterOdd(stack[stack_top].level);
312 poutLevel[i] = stack[stack_top].level;
313 if (valid_level(least_odd))
314 push_stack(least_odd, R, FALSE);
315 else if (overflow_isolate_count == 0)
316 overflow_embedding_count++;
317 }
318 /* X5 */
319 else if (pclass[i] == LRO)
320 {
321 int least_even = GreaterEven(stack[stack_top].level);
322 poutLevel[i] = stack[stack_top].level;
323 if (valid_level(least_even))
324 push_stack(least_even, L, FALSE);
325 else if (overflow_isolate_count == 0)
326 overflow_embedding_count++;
327 }
328 /* X5a */
329 else if (pclass[i] == RLI)
330 {
331 int least_odd = GreaterOdd(stack[stack_top].level);
332 poutLevel[i] = stack[stack_top].level;
333 if (valid_level(least_odd))
334 {
335 valid_isolate_count++;
336 push_stack(least_odd, NI, TRUE);
337 }
338 else
339 overflow_isolate_count++;
340 }
341 /* X5b */
342 else if (pclass[i] == LRI)
343 {
344 int least_even = GreaterEven(stack[stack_top].level);
345 poutLevel[i] = stack[stack_top].level;
346 if (valid_level(least_even))
347 {
348 valid_isolate_count++;
349 push_stack(least_even, NI, TRUE);
350 }
351 else
352 overflow_isolate_count++;
353 }
354 /* X5c */
355 else if (pclass[i] == FSI)
356 {
357 int j;
358 int new_level = 0;
359 int skipping = 0;
360 poutLevel[i] = stack[stack_top].level;
361 for (j = i+1; j < count; j++)
362 {
363 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI)
364 {
365 skipping++;
366 continue;
367 }
368 else if (pclass[j] == PDI)
369 {
370 if (skipping)
371 skipping --;
372 else
373 break;
374 continue;
375 }
376
377 if (skipping) continue;
378
379 if (pclass[j] == L)
380 {
381 new_level = 0;
382 break;
383 }
384 else if (pclass[j] == R || pclass[j] == AL)
385 {
386 new_level = 1;
387 break;
388 }
389 }
390 if (odd(new_level))
391 {
392 int least_odd = GreaterOdd(stack[stack_top].level);
393 if (valid_level(least_odd))
394 {
395 valid_isolate_count++;
396 push_stack(least_odd, NI, TRUE);
397 }
398 else
399 overflow_isolate_count++;
400 }
401 else
402 {
403 int least_even = GreaterEven(stack[stack_top].level);
404 if (valid_level(least_even))
405 {
406 valid_isolate_count++;
407 push_stack(least_even, NI, TRUE);
408 }
409 else
410 overflow_isolate_count++;
411 }
412 }
413 /* X6 */
414 else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF)
415 {
416 poutLevel[i] = stack[stack_top].level;
417 if (stack[stack_top].override != NI)
418 pclass[i] = stack[stack_top].override;
419 }
420 /* X6a */
421 else if (pclass[i] == PDI)
422 {
423 if (overflow_isolate_count) overflow_isolate_count--;
424 else if (!valid_isolate_count) {/* do nothing */}
425 else
426 {
427 overflow_embedding_count = 0;
428 while (!stack[stack_top].isolate) pop_stack();
429 pop_stack();
430 valid_isolate_count --;
431 }
432 poutLevel[i] = stack[stack_top].level;
433 }
434 /* X7 */
435 else if (pclass[i] == PDF)
436 {
437 poutLevel[i] = stack[stack_top].level;
438 if (overflow_isolate_count) {/* do nothing */}
439 else if (overflow_embedding_count) overflow_embedding_count--;
440 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
441 pop_stack();
442 }
443 /* X8: Nothing */
444 }
445 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
446 for (i = 0; i < count ; i++)
447 if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF)
448 pclass[i] = BN;
449 }
450
451 static inline int previousValidChar(const WORD *pcls, int index, int back_fence)
452 {
453 if (index == -1 || index == back_fence) return index;
454 index --;
455 while (index > back_fence && pcls[index] == BN) index --;
456 return index;
457 }
458
459 static inline int nextValidChar(const WORD *pcls, int index, int front_fence)
460 {
461 if (index == front_fence) return index;
462 index ++;
463 while (index < front_fence && pcls[index] == BN) index ++;
464 return index;
465 }
466
467 typedef struct tagRun
468 {
469 int start;
470 int end;
471 WORD e;
472 } Run;
473
474 typedef struct tagRunChar
475 {
476 WCHAR ch;
477 WORD *pcls;
478 } RunChar;
479
480 typedef struct tagIsolatedRun
481 {
482 struct list entry;
483 int length;
484 WORD sos;
485 WORD eos;
486 WORD e;
487
488 RunChar item[1];
489 } IsolatedRun;
490
491 static inline int iso_nextValidChar(IsolatedRun *iso_run, int index)
492 {
493 if (index >= (iso_run->length-1)) return -1;
494 index ++;
495 while (index < iso_run->length && *iso_run->item[index].pcls == BN) index++;
496 if (index == iso_run->length) return -1;
497 return index;
498 }
499
500 static inline int iso_previousValidChar(IsolatedRun *iso_run, int index)
501 {
502
503 if (index <= 0) return -1;
504 index --;
505 while (index > -1 && *iso_run->item[index].pcls == BN) index--;
506 return index;
507 }
508
509 static inline void iso_dump_types(const char* header, IsolatedRun *iso_run)
510 {
511 int i, len = 0;
512 TRACE("%s:",header);
513 TRACE("[ ");
514 for (i = 0; i < iso_run->length && len < 200; i++)
515 {
516 TRACE(" %s",debug_type[*iso_run->item[i].pcls]);
517 len += strlen(debug_type[*iso_run->item[i].pcls])+1;
518 }
519 if (i != iso_run->length)
520 TRACE("...");
521 TRACE(" ]\n");
522 }
523
524 /*------------------------------------------------------------------------
525 Function: resolveWeak
526
527 Resolves the directionality of numeric and other weak character types
528
529 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
530
531 Input: Array of embedding levels
532 Character count
533
534 In/Out: Array of directional classes
535
536 Note: On input only these directional classes are expected
537 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
538 ------------------------------------------------------------------------*/
539
540 static void resolveWeak(IsolatedRun * iso_run)
541 {
542 int i;
543
544 /* W1 */
545 for (i=0; i < iso_run->length; i++)
546 {
547 if (*iso_run->item[i].pcls == NSM)
548 {
549 int j = iso_previousValidChar(iso_run, i);
550 if (j == -1)
551 *iso_run->item[i].pcls = iso_run->sos;
552 else if (*iso_run->item[j].pcls >= LRI)
553 *iso_run->item[i].pcls = ON;
554 else
555 *iso_run->item[i].pcls = *iso_run->item[j].pcls;
556 }
557 }
558
559 /* W2 */
560 for (i = 0; i < iso_run->length; i++)
561 {
562 if (*iso_run->item[i].pcls == EN)
563 {
564 int j = iso_previousValidChar(iso_run, i);
565 while (j > -1)
566 {
567 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L || *iso_run->item[j].pcls == AL)
568 {
569 if (*iso_run->item[j].pcls == AL)
570 *iso_run->item[i].pcls = AN;
571 break;
572 }
573 j = iso_previousValidChar(iso_run, j);
574 }
575 }
576 }
577
578 /* W3 */
579 for (i = 0; i < iso_run->length; i++)
580 {
581 if (*iso_run->item[i].pcls == AL)
582 *iso_run->item[i].pcls = R;
583 }
584
585 /* W4 */
586 for (i = 0; i < iso_run->length; i++)
587 {
588 if (*iso_run->item[i].pcls == ES)
589 {
590 int b = iso_previousValidChar(iso_run, i);
591 int f = iso_nextValidChar(iso_run, i);
592
593 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
594 *iso_run->item[i].pcls = EN;
595 }
596 else if (*iso_run->item[i].pcls == CS)
597 {
598 int b = iso_previousValidChar(iso_run, i);
599 int f = iso_nextValidChar(iso_run, i);
600
601 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
602 *iso_run->item[i].pcls = EN;
603 else if (b > -1 && f > -1 && *iso_run->item[b].pcls == AN && *iso_run->item[f].pcls == AN)
604 *iso_run->item[i].pcls = AN;
605 }
606 }
607
608 /* W5 */
609 for (i = 0; i < iso_run->length; i++)
610 {
611 if (*iso_run->item[i].pcls == ET)
612 {
613 int j;
614 for (j = i-1 ; j > -1; j--)
615 {
616 if (*iso_run->item[j].pcls == BN) continue;
617 if (*iso_run->item[j].pcls == ET) continue;
618 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
619 else break;
620 }
621 if (*iso_run->item[i].pcls == ET)
622 {
623 for (j = i+1; j < iso_run->length; j++)
624 {
625 if (*iso_run->item[j].pcls == BN) continue;
626 if (*iso_run->item[j].pcls == ET) continue;
627 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
628 else break;
629 }
630 }
631 }
632 }
633
634 /* W6 */
635 for (i = 0; i < iso_run->length; i++)
636 {
637 if (*iso_run->item[i].pcls == ET || *iso_run->item[i].pcls == ES || *iso_run->item[i].pcls == CS || *iso_run->item[i].pcls == ON)
638 {
639 int b = i-1;
640 int f = i+1;
641 if (b > -1 && *iso_run->item[b].pcls == BN)
642 *iso_run->item[b].pcls = ON;
643 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
644 *iso_run->item[f].pcls = ON;
645
646 *iso_run->item[i].pcls = ON;
647 }
648 }
649
650 /* W7 */
651 for (i = 0; i < iso_run->length; i++)
652 {
653 if (*iso_run->item[i].pcls == EN)
654 {
655 int j;
656 for (j = iso_previousValidChar(iso_run, i); j > -1; j = iso_previousValidChar(iso_run, j))
657 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L)
658 {
659 if (*iso_run->item[j].pcls == L)
660 *iso_run->item[i].pcls = L;
661 break;
662 }
663 if (iso_run->sos == L && j == -1)
664 *iso_run->item[i].pcls = L;
665 }
666 }
667 }
668
669 typedef struct tagBracketPair
670 {
671 int start;
672 int end;
673 } BracketPair;
674
675 static int compr(const void *a, const void* b)
676 {
677 return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
678 }
679
680 static BracketPair *computeBracketPairs(IsolatedRun *iso_run)
681 {
682 WCHAR *open_stack;
683 int *stack_index;
684 int stack_top = iso_run->length;
685 BracketPair *out = NULL;
686 int pair_count = 0;
687 int i;
688
689 open_stack = HeapAlloc(GetProcessHeap(), 0, sizeof(WCHAR) * iso_run->length);
690 stack_index = HeapAlloc(GetProcessHeap(), 0, sizeof(int) * iso_run->length);
691
692 for (i = 0; i < iso_run->length; i++)
693 {
694 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
695 if (ubv)
696 {
697 if (!out)
698 {
699 out = HeapAlloc(GetProcessHeap(),0,sizeof(BracketPair));
700 out[0].start = -1;
701 }
702
703 if ((ubv >> 8) == 0)
704 {
705 stack_top --;
706 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
707 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
708 if (open_stack[stack_top] == 0x232A)
709 open_stack[stack_top] = 0x3009;
710 stack_index[stack_top] = i;
711 }
712 else if ((ubv >> 8) == 1)
713 {
714 int j;
715 if (stack_top == iso_run->length) continue;
716 for (j = stack_top; j < iso_run->length; j++)
717 {
718 WCHAR c = iso_run->item[i].ch;
719 if (c == 0x232A) c = 0x3009;
720 if (c == open_stack[j])
721 {
722 out[pair_count].start = stack_index[j];
723 out[pair_count].end = i;
724 pair_count++;
725 out = HeapReAlloc(GetProcessHeap(), 0, out, sizeof(BracketPair) * (pair_count+1));
726 out[pair_count].start = -1;
727 stack_top = j+1;
728 break;
729 }
730 }
731 }
732 }
733 }
734 if (pair_count == 0)
735 {
736 HeapFree(GetProcessHeap(),0,out);
737 out = NULL;
738 }
739 else if (pair_count > 1)
740 qsort(out, pair_count, sizeof(BracketPair), compr);
741
742 HeapFree(GetProcessHeap(), 0, open_stack);
743 HeapFree(GetProcessHeap(), 0, stack_index);
744 return out;
745 }
746
747 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
748
749 /*------------------------------------------------------------------------
750 Function: resolveNeutrals
751
752 Resolves the directionality of neutral character types.
753
754 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
755
756 Input: Array of embedding levels
757 Character count
758 Baselevel
759
760 In/Out: Array of directional classes
761
762 Note: On input only these directional classes are expected
763 R, L, NI, AN, EN and BN
764
765 W8 resolves a number of ENs to L
766 ------------------------------------------------------------------------*/
767 static void resolveNeutrals(IsolatedRun *iso_run)
768 {
769 int i;
770 BracketPair *pairs = NULL;
771
772 /* Translate isolates into NI */
773 for (i = 0; i < iso_run->length; i++)
774 {
775 if (*iso_run->item[i].pcls >= LRI)
776 *iso_run->item[i].pcls = NI;
777
778 switch(*iso_run->item[i].pcls)
779 {
780 case B:
781 case S:
782 case WS: *iso_run->item[i].pcls = NI;
783 }
784
785 ASSERT(*iso_run->item[i].pcls < 5 || *iso_run->item[i].pcls == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */
786 }
787
788 /* N0: Skipping bracketed pairs for now */
789 pairs = computeBracketPairs(iso_run);
790 if (pairs)
791 {
792 BracketPair *p = &pairs[0];
793 int i = 0;
794 while (p->start >= 0)
795 {
796 int j;
797 int e = EmbeddingDirection(iso_run->e);
798 int o = EmbeddingDirection(iso_run->e+1);
799 BOOL flag_o = FALSE;
800 TRACE("Bracket Pair [%i - %i]\n",p->start, p->end);
801
802 /* N0.b */
803 for (j = p->start+1; j < p->end; j++)
804 {
805 if (N0_TYPE(*iso_run->item[j].pcls) == e)
806 {
807 *iso_run->item[p->start].pcls = e;
808 *iso_run->item[p->end].pcls = e;
809 break;
810 }
811 else if (N0_TYPE(*iso_run->item[j].pcls) == o)
812 flag_o = TRUE;
813 }
814 /* N0.c */
815 if (j == p->end && flag_o)
816 {
817 for (j = p->start; j >= 0; j--)
818 {
819 if (N0_TYPE(*iso_run->item[j].pcls) == o)
820 {
821 *iso_run->item[p->start].pcls = o;
822 *iso_run->item[p->end].pcls = o;
823 break;
824 }
825 else if (N0_TYPE(*iso_run->item[j].pcls) == e)
826 {
827 *iso_run->item[p->start].pcls = e;
828 *iso_run->item[p->end].pcls = e;
829 break;
830 }
831 }
832 if ( j < 0 )
833 {
834 *iso_run->item[p->start].pcls = iso_run->sos;
835 *iso_run->item[p->end].pcls = iso_run->sos;
836 }
837 }
838
839 i++;
840 p = &pairs[i];
841 }
842 HeapFree(GetProcessHeap(),0,pairs);
843 }
844
845 /* N1 */
846 for (i = 0; i < iso_run->length; i++)
847 {
848 WORD l,r;
849
850 if (*iso_run->item[i].pcls == NI)
851 {
852 int j;
853 int b = iso_previousValidChar(iso_run, i);
854
855 if (b == -1)
856 {
857 l = iso_run->sos;
858 b = 0;
859 }
860 else
861 {
862 if (*iso_run->item[b].pcls == R || *iso_run->item[b].pcls == AN || *iso_run->item[b].pcls == EN)
863 l = R;
864 else if (*iso_run->item[b].pcls == L)
865 l = L;
866 else /* No string type */
867 continue;
868 }
869 j = iso_nextValidChar(iso_run, i);
870 while (j > -1 && *iso_run->item[j].pcls == NI) j = iso_nextValidChar(iso_run, j);
871
872 if (j == -1)
873 {
874 r = iso_run->eos;
875 j = iso_run->length;
876 }
877 else if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == AN || *iso_run->item[j].pcls == EN)
878 r = R;
879 else if (*iso_run->item[j].pcls == L)
880 r = L;
881 else /* No string type */
882 continue;
883
884 if (r == l)
885 {
886 for (b = i; b < j && b < iso_run->length; b++)
887 *iso_run->item[b].pcls = r;
888 }
889 }
890 }
891
892 /* N2 */
893 for (i = 0; i < iso_run->length; i++)
894 {
895 if (*iso_run->item[i].pcls == NI)
896 {
897 int b = i-1;
898 int f = i+1;
899 *iso_run->item[i].pcls = EmbeddingDirection(iso_run->e);
900 if (b > -1 && *iso_run->item[b].pcls == BN)
901 *iso_run->item[b].pcls = EmbeddingDirection(iso_run->e);
902 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
903 *iso_run->item[f].pcls = EmbeddingDirection(iso_run->e);
904 }
905 }
906 }
907
908 /*------------------------------------------------------------------------
909 Function: resolveImplicit
910
911 Recursively resolves implicit embedding levels.
912 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
913
914 Input: Array of direction classes
915 Character count
916 Base level
917
918 In/Out: Array of embedding levels
919
920 Note: levels may exceed 15 on output.
921 Accepted subset of direction classes
922 R, L, AN, EN
923 ------------------------------------------------------------------------*/
924 static void resolveImplicit(const WORD * pcls, WORD *plevel, int sos, int eos)
925 {
926 int i;
927
928 /* I1/2 */
929 for (i = sos; i <= eos; i++)
930 {
931 if (pcls[i] == BN)
932 continue;
933
934 ASSERT(pcls[i] > 0); /* "No Neutrals allowed to survive here." */
935 ASSERT(pcls[i] < 5); /* "Out of range." */
936
937 if (odd(plevel[i]) && (pcls[i] == L || pcls[i] == EN || pcls [i] == AN))
938 plevel[i]++;
939 else if (!odd(plevel[i]) && pcls[i] == R)
940 plevel[i]++;
941 else if (!odd(plevel[i]) && (pcls[i] == EN || pcls [i] == AN))
942 plevel[i]+=2;
943 }
944 }
945
946 static void resolveResolved(unsigned baselevel, const WORD * pcls, WORD *plevel, int sos, int eos)
947 {
948 int i;
949
950 /* L1 */
951 for (i = sos; i <= eos; i++)
952 {
953 if (pcls[i] == B || pcls[i] == S)
954 {
955 int j = i -1;
956 while (i > sos && j >= sos &&
957 (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
958 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
959 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
960 plevel[j--] = baselevel;
961 plevel[i] = baselevel;
962 }
963 else if (pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO || pcls[i] == RLO ||
964 pcls[i] == PDF || pcls[i] == BN)
965 {
966 plevel[i] = i ? plevel[i - 1] : baselevel;
967 }
968 if (i == eos &&
969 (pcls[i] == WS || pcls[i] == FSI || pcls[i] == LRI || pcls[i] == RLI ||
970 pcls[i] == PDI || pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO ||
971 pcls[i] == RLO || pcls[i] == PDF || pcls[i] == BN ))
972 {
973 int j = i;
974 while (j >= sos && (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
975 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
976 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
977 plevel[j--] = baselevel;
978 }
979 }
980 }
981
982 static void computeIsolatingRunsSet(unsigned baselevel, WORD *pcls, WORD *pLevel, LPCWSTR lpString, int uCount, struct list *set)
983 {
984 int run_start, run_end, i;
985 int run_count = 0;
986 Run *runs;
987 IsolatedRun *current_isolated;
988
989 runs = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(Run));
990 if (!runs) return;
991
992 list_init(set);
993
994 /* Build Runs */
995 run_start = 0;
996 while (run_start < uCount)
997 {
998 run_end = nextValidChar(pcls, run_start, uCount);
999 while (run_end < uCount && pLevel[run_end] == pLevel[run_start]) run_end = nextValidChar(pcls, run_end, uCount);
1000 run_end --;
1001 runs[run_count].start = run_start;
1002 runs[run_count].end = run_end;
1003 runs[run_count].e = pLevel[run_start];
1004 run_start = nextValidChar(pcls, run_end, uCount);
1005 run_count++;
1006 }
1007
1008 /* Build Isolating Runs */
1009 i = 0;
1010 while (i < run_count)
1011 {
1012 int k = i;
1013 if (runs[k].start >= 0)
1014 {
1015 int type_fence, real_end;
1016 int j;
1017 current_isolated = HeapAlloc(GetProcessHeap(), 0, sizeof(IsolatedRun) + sizeof(RunChar)*uCount);
1018 if (!current_isolated) break;
1019
1020 run_start = runs[k].start;
1021 current_isolated->e = runs[k].e;
1022 current_isolated->length = (runs[k].end - runs[k].start)+1;
1023
1024 for (j = 0; j < current_isolated->length; j++)
1025 {
1026 current_isolated->item[j].pcls = &pcls[runs[k].start+j];
1027 current_isolated->item[j].ch = lpString[runs[k].start+j];
1028 }
1029
1030 run_end = runs[k].end;
1031
1032 TRACE("{ [%i -- %i]",run_start, run_end);
1033
1034 if (pcls[run_end] == BN)
1035 run_end = previousValidChar(pcls, run_end, runs[k].start);
1036
1037 while (run_end < uCount && (pcls[run_end] == RLI || pcls[run_end] == LRI || pcls[run_end] == FSI))
1038 {
1039 j = k+1;
1040 search:
1041 while (j < run_count && pcls[runs[j].start] != PDI) j++;
1042 if (j < run_count && runs[i].e != runs[j].e)
1043 {
1044 j++;
1045 goto search;
1046 }
1047
1048 if (j != run_count)
1049 {
1050 int m;
1051 int l = current_isolated->length;
1052
1053 current_isolated->length += (runs[j].end - runs[j].start)+1;
1054 for (m = 0; l < current_isolated->length; l++, m++)
1055 {
1056 current_isolated->item[l].pcls = &pcls[runs[j].start+m];
1057 current_isolated->item[l].ch = lpString[runs[j].start+m];
1058 }
1059
1060 TRACE("[%i -- %i]",runs[j].start, runs[j].end);
1061
1062 run_end = runs[j].end;
1063 if (pcls[run_end] == BN)
1064 run_end = previousValidChar(pcls, run_end, runs[i].start);
1065 runs[j].start = -1;
1066 k = j;
1067 }
1068 else
1069 {
1070 run_end = uCount;
1071 break;
1072 }
1073 }
1074
1075 type_fence = previousValidChar(pcls, run_start, -1);
1076
1077 if (type_fence == -1)
1078 current_isolated->sos = (baselevel > pLevel[run_start])?baselevel:pLevel[run_start];
1079 else
1080 current_isolated->sos = (pLevel[type_fence] > pLevel[run_start])?pLevel[type_fence]:pLevel[run_start];
1081
1082 current_isolated->sos = EmbeddingDirection(current_isolated->sos);
1083
1084 if (run_end == uCount)
1085 current_isolated->eos = current_isolated->sos;
1086 else
1087 {
1088 /* eos could be an BN */
1089 if ( pcls[run_end] == BN )
1090 {
1091 real_end = previousValidChar(pcls, run_end, run_start-1);
1092 if (real_end < run_start)
1093 real_end = run_start;
1094 }
1095 else
1096 real_end = run_end;
1097
1098 type_fence = nextValidChar(pcls, run_end, uCount);
1099 if (type_fence == uCount)
1100 current_isolated->eos = (baselevel > pLevel[real_end])?baselevel:pLevel[real_end];
1101 else
1102 current_isolated->eos = (pLevel[type_fence] > pLevel[real_end])?pLevel[type_fence]:pLevel[real_end];
1103
1104 current_isolated->eos = EmbeddingDirection(current_isolated->eos);
1105 }
1106
1107 list_add_tail(set, &current_isolated->entry);
1108 TRACE(" } level %i {%s <--> %s}\n",current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1109 }
1110 i++;
1111 }
1112
1113 HeapFree(GetProcessHeap(), 0, runs);
1114 }
1115
1116 /*************************************************************
1117 * BIDI_DeterminLevels
1118 */
1119 BOOL BIDI_DetermineLevels(
1120 LPCWSTR lpString, /* [in] The string for which information is to be returned */
1121 INT uCount, /* [in] Number of WCHARs in string. */
1122 const SCRIPT_STATE *s,
1123 const SCRIPT_CONTROL *c,
1124 WORD *lpOutLevels, /* [out] final string levels */
1125 WORD *lpOutOverrides /* [out] final string overrides */
1126 )
1127 {
1128 WORD *chartype;
1129 unsigned baselevel = 0;
1130 struct list IsolatingRuns;
1131 IsolatedRun *iso_run, *next;
1132
1133 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
1134
1135 chartype = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(WORD));
1136 if (!chartype)
1137 {
1138 WARN("Out of memory\n");
1139 return FALSE;
1140 }
1141
1142 baselevel = s->uBidiLevel;
1143
1144 classify(lpString, chartype, uCount, c);
1145 if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount);
1146
1147 memset(lpOutOverrides, 0, sizeof(WORD) * uCount);
1148
1149 /* resolve explicit */
1150 resolveExplicit(baselevel, chartype, lpOutLevels, lpOutOverrides, uCount, s->fOverrideDirection);
1151 if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount);
1152
1153 /* X10/BD13: Computer Isolating runs */
1154 computeIsolatingRunsSet(baselevel, chartype, lpOutLevels, lpString, uCount, &IsolatingRuns);
1155
1156 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1157 {
1158 if (TRACE_ON(bidi)) iso_dump_types("Run", iso_run);
1159
1160 /* resolve weak */
1161 resolveWeak(iso_run);
1162 if (TRACE_ON(bidi)) iso_dump_types("After Weak", iso_run);
1163
1164 /* resolve neutrals */
1165 resolveNeutrals(iso_run);
1166 if (TRACE_ON(bidi)) iso_dump_types("After Neutrals", iso_run);
1167
1168 list_remove(&iso_run->entry);
1169 HeapFree(GetProcessHeap(),0,iso_run);
1170 }
1171
1172 if (TRACE_ON(bidi)) dump_types("Before Implicit", chartype, 0, uCount);
1173 /* resolveImplicit */
1174 resolveImplicit(chartype, lpOutLevels, 0, uCount-1);
1175
1176 /* resolveResolvedLevels*/
1177 classify(lpString, chartype, uCount, c);
1178 resolveResolved(baselevel, chartype, lpOutLevels, 0, uCount-1);
1179
1180 HeapFree(GetProcessHeap(), 0, chartype);
1181 return TRUE;
1182 }
1183
1184 /* reverse cch indexes */
1185 static void reverse(int *pidx, int cch)
1186 {
1187 int temp;
1188 int ich = 0;
1189 for (; ich < --cch; ich++)
1190 {
1191 temp = pidx[ich];
1192 pidx[ich] = pidx[cch];
1193 pidx[cch] = temp;
1194 }
1195 }
1196
1197
1198 /*------------------------------------------------------------------------
1199 Functions: reorder/reorderLevel
1200
1201 Recursively reorders the display string
1202 "From the highest level down, reverse all characters at that level and
1203 higher, down to the lowest odd level"
1204
1205 Implements rule L2 of the Unicode bidi Algorithm.
1206
1207 Input: Array of embedding levels
1208 Character count
1209 Flag enabling reversal (set to false by initial caller)
1210
1211 In/Out: Text to reorder
1212
1213 Note: levels may exceed 15 resp. 61 on input.
1214
1215 Rule L3 - reorder combining marks is not implemented here
1216 Rule L4 - glyph mirroring is implemented as a display option below
1217
1218 Note: this should be applied a line at a time
1219 -------------------------------------------------------------------------*/
1220 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1221 {
1222 int ich = 0;
1223
1224 /* true as soon as first odd level encountered */
1225 fReverse = fReverse || odd(level);
1226
1227 for (; ich < cch; ich++)
1228 {
1229 if (plevel[ich] < level)
1230 {
1231 break;
1232 }
1233 else if (plevel[ich] > level)
1234 {
1235 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
1236 cch - ich, fReverse) - 1;
1237 }
1238 }
1239 if (fReverse)
1240 {
1241 reverse(pIndexs, ich);
1242 }
1243 return ich;
1244 }
1245
1246 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1247 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1248 {
1249 int ich = 0;
1250 int newlevel = -1;
1251
1252 /* true as soon as first odd level encountered */
1253 fReverse = fReverse || odd(level);
1254
1255 for (; ich < cch; ich++)
1256 {
1257 if (plevel[ich] < level)
1258 break;
1259 else if (plevel[ich] > level)
1260 newlevel = ich;
1261 }
1262 if (fReverse)
1263 {
1264 reverse(pIndexs, ich);
1265 }
1266
1267 if (newlevel >= 0)
1268 {
1269 ich = 0;
1270 for (; ich < cch; ich++)
1271 if (plevel[ich] < level)
1272 break;
1273 else if (plevel[ich] > level)
1274 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
1275 cch - ich, fReverse) - 1;
1276 }
1277
1278 return ich;
1279 }
1280
1281 BOOL BIDI_GetStrengths(LPCWSTR lpString, INT uCount, const SCRIPT_CONTROL *c,
1282 WORD* lpStrength)
1283 {
1284 int i;
1285 classify(lpString, lpStrength, uCount, c);
1286
1287 for ( i = 0; i < uCount; i++)
1288 {
1289 switch(lpStrength[i])
1290 {
1291 case L:
1292 case LRE:
1293 case LRO:
1294 case R:
1295 case AL:
1296 case RLE:
1297 case RLO:
1298 lpStrength[i] = BIDI_STRONG;
1299 break;
1300 case PDF:
1301 case EN:
1302 case ES:
1303 case ET:
1304 case AN:
1305 case CS:
1306 case BN:
1307 lpStrength[i] = BIDI_WEAK;
1308 break;
1309 case B:
1310 case S:
1311 case WS:
1312 case ON:
1313 default: /* Neutrals and NSM */
1314 lpStrength[i] = BIDI_NEUTRAL;
1315 }
1316 }
1317 return TRUE;
1318 }