2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
46 #include <wine/list.h>
48 #include "usp10_internal.h"
50 extern const unsigned short bidi_bracket_table
[];
52 WINE_DEFAULT_DEBUG_CHANNEL(bidi
);
54 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
57 /* HELPER FUNCTIONS AND DECLARATIONS */
59 /*------------------------------------------------------------------------
60 Bidirectional Character Types
62 as defined by the Unicode Bidirectional Algorithm Table 3-7.
66 The list of bidirectional character types here is not grouped the
67 same way as the table 3-7, since the numberic values for the types
68 are chosen to keep the state and action tables compact.
69 ------------------------------------------------------------------------*/
73 /* ON MUST be zero, code relies on ON = NI = 0 */
74 ON
= 0, /* Other Neutral */
77 AN
, /* Arabic Number */
78 EN
, /* European Number */
79 AL
, /* Arabic Letter (Right-to-left) */
80 NSM
, /* Non-spacing Mark */
81 CS
, /* Common Separator */
82 ES
, /* European Separator */
83 ET
, /* European Terminator (post/prefix e.g. $ and %) */
86 BN
, /* Boundary neutral (type of RLE etc after explicit levels) */
89 S
, /* Segment Separator (TAB) // used only in L1 */
90 WS
, /* White space // used only in L1 */
91 B
, /* Paragraph Separator (aka as PS) */
93 /* types for explicit controls */
94 RLO
, /* these are used only in X1-X9 */
100 LRI
, /* Isolate formatting characters new with 6.3 */
105 /* resolved types, also resolved directions */
106 NI
= ON
, /* alias, where ON, WS, S and Isolates are treated the same */
109 static const char debug_type
[][4] =
111 "ON", /* Other Neutral */
112 "L", /* Left Letter */
113 "R", /* Right Letter */
114 "AN", /* Arabic Number */
115 "EN", /* European Number */
116 "AL", /* Arabic Letter (Right-to-left) */
117 "NSM", /* Non-spacing Mark */
118 "CS", /* Common Separator */
119 "ES", /* European Separator */
120 "ET", /* European Terminator (post/prefix e.g. $ and %) */
121 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
122 "S", /* Segment Separator (TAB) // used only in L1 */
123 "WS", /* White space // used only in L1 */
124 "B", /* Paragraph Separator (aka as PS) */
125 "RLO", /* these are used only in X1-X9 */
130 "LRI", /* Isolate formatting characters new with 6.3 */
136 /* HELPER FUNCTIONS */
138 static inline void dump_types(const char* header
, WORD
*types
, int start
, int end
)
142 for (i
= start
; i
< end
&& len
< 200; i
++)
144 TRACE(" %s",debug_type
[types
[i
]]);
145 len
+= strlen(debug_type
[types
[i
]])+1;
152 /* Convert the libwine information to the direction enum */
153 static void classify(LPCWSTR lpString
, WORD
*chartype
, DWORD uCount
, const SCRIPT_CONTROL
*c
)
155 static const enum directions dir_map
[16] =
157 L
, /* unassigned defaults to L */
172 PDF
/* also LRE, LRO, RLE, RLO */
177 for (i
= 0; i
< uCount
; ++i
)
179 chartype
[i
] = dir_map
[get_char_typeW(lpString
[i
]) >> 12];
183 if (!c
->fLegacyBidiClass
) break;
187 case '+': chartype
[i
] = NI
; break;
188 case '/': chartype
[i
] = CS
; break;
194 case 0x202A: chartype
[i
] = LRE
; break;
195 case 0x202B: chartype
[i
] = RLE
; break;
196 case 0x202C: chartype
[i
] = PDF
; break;
197 case 0x202D: chartype
[i
] = LRO
; break;
198 case 0x202E: chartype
[i
] = RLO
; break;
199 case 0x2066: chartype
[i
] = LRI
; break;
200 case 0x2067: chartype
[i
] = RLI
; break;
201 case 0x2068: chartype
[i
] = FSI
; break;
202 case 0x2069: chartype
[i
] = PDI
; break;
209 /* RESOLVE EXPLICIT */
211 static WORD
GreaterEven(int i
)
213 return odd(i
) ? i
+ 1 : i
+ 2;
216 static WORD
GreaterOdd(int i
)
218 return odd(i
) ? i
+ 2 : i
+ 1;
221 static WORD
EmbeddingDirection(int level
)
223 return odd(level
) ? R
: L
;
226 /*------------------------------------------------------------------------
227 Function: resolveExplicit
229 Recursively resolves explicit embedding levels and overrides.
230 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
232 Input: Base embedding level and direction
235 Output: Array of embedding levels
237 In/Out: Array of direction classes
240 Note: The function uses two simple counters to keep track of
241 matching explicit codes and PDF. Use the default argument for
242 the outermost call. The nesting counter counts the recursion
243 depth and not the embedding level.
244 ------------------------------------------------------------------------*/
245 typedef struct tagStackItem
{
251 #define push_stack(l,o,i) \
253 stack[stack_top].level = l; \
254 stack[stack_top].override = o; \
255 stack[stack_top].isolate = i;} while(0)
257 #define pop_stack() do { stack_top++; } while(0)
259 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
261 static void resolveExplicit(int level
, WORD
*pclass
, WORD
*poutLevel
, int count
)
264 int overflow_isolate_count
= 0;
265 int overflow_embedding_count
= 0;
266 int valid_isolate_count
= 0;
269 StackItem stack
[MAX_DEPTH
+2];
270 int stack_top
= MAX_DEPTH
+1;
272 stack
[stack_top
].level
= level
;
273 stack
[stack_top
].override
= NI
;
274 stack
[stack_top
].isolate
= FALSE
;
276 for (i
= 0; i
< count
; i
++)
279 if (pclass
[i
] == RLE
)
281 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
282 poutLevel
[i
] = stack
[stack_top
].level
;
283 if (valid_level(least_odd
))
284 push_stack(least_odd
, NI
, FALSE
);
285 else if (overflow_isolate_count
== 0)
286 overflow_embedding_count
++;
289 else if (pclass
[i
] == LRE
)
291 int least_even
= GreaterEven(stack
[stack_top
].level
);
292 poutLevel
[i
] = stack
[stack_top
].level
;
293 if (valid_level(least_even
))
294 push_stack(least_even
, NI
, FALSE
);
295 else if (overflow_isolate_count
== 0)
296 overflow_embedding_count
++;
299 else if (pclass
[i
] == RLO
)
301 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
302 poutLevel
[i
] = stack
[stack_top
].level
;
303 if (valid_level(least_odd
))
304 push_stack(least_odd
, R
, FALSE
);
305 else if (overflow_isolate_count
== 0)
306 overflow_embedding_count
++;
309 else if (pclass
[i
] == LRO
)
311 int least_even
= GreaterEven(stack
[stack_top
].level
);
312 poutLevel
[i
] = stack
[stack_top
].level
;
313 if (valid_level(least_even
))
314 push_stack(least_even
, L
, FALSE
);
315 else if (overflow_isolate_count
== 0)
316 overflow_embedding_count
++;
319 else if (pclass
[i
] == RLI
)
321 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
322 poutLevel
[i
] = stack
[stack_top
].level
;
323 if (valid_level(least_odd
))
325 valid_isolate_count
++;
326 push_stack(least_odd
, NI
, TRUE
);
329 overflow_isolate_count
++;
332 else if (pclass
[i
] == LRI
)
334 int least_even
= GreaterEven(stack
[stack_top
].level
);
335 poutLevel
[i
] = stack
[stack_top
].level
;
336 if (valid_level(least_even
))
338 valid_isolate_count
++;
339 push_stack(least_even
, NI
, TRUE
);
342 overflow_isolate_count
++;
345 else if (pclass
[i
] == FSI
)
350 poutLevel
[i
] = stack
[stack_top
].level
;
351 for (j
= i
+1; j
< count
; j
++)
353 if (pclass
[j
] == LRI
|| pclass
[j
] == RLI
|| pclass
[j
] == FSI
)
358 else if (pclass
[j
] == PDI
)
367 if (skipping
) continue;
374 else if (pclass
[j
] == R
|| pclass
[j
] == AL
)
382 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
383 if (valid_level(least_odd
))
385 valid_isolate_count
++;
386 push_stack(least_odd
, NI
, TRUE
);
389 overflow_isolate_count
++;
393 int least_even
= GreaterEven(stack
[stack_top
].level
);
394 if (valid_level(least_even
))
396 valid_isolate_count
++;
397 push_stack(least_even
, NI
, TRUE
);
400 overflow_isolate_count
++;
404 else if (pclass
[i
] != B
&& pclass
[i
] != BN
&& pclass
[i
] != PDI
&& pclass
[i
] != PDF
)
406 poutLevel
[i
] = stack
[stack_top
].level
;
407 if (stack
[stack_top
].override
!= NI
)
408 pclass
[i
] = stack
[stack_top
].override
;
411 else if (pclass
[i
] == PDI
)
413 if (overflow_isolate_count
) overflow_isolate_count
--;
414 else if (!valid_isolate_count
) {/* do nothing */}
417 overflow_embedding_count
= 0;
418 while (!stack
[stack_top
].isolate
) pop_stack();
420 valid_isolate_count
--;
422 poutLevel
[i
] = stack
[stack_top
].level
;
425 else if (pclass
[i
] == PDF
)
427 poutLevel
[i
] = stack
[stack_top
].level
;
428 if (overflow_isolate_count
) {/* do nothing */}
429 else if (overflow_embedding_count
) overflow_embedding_count
--;
430 else if (!stack
[stack_top
].isolate
&& stack_top
< (MAX_DEPTH
+1))
435 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
436 for (i
= 0; i
< count
; i
++)
437 if (pclass
[i
] == RLE
|| pclass
[i
] == LRE
|| pclass
[i
] == RLO
|| pclass
[i
] == LRO
|| pclass
[i
] == PDF
)
441 static inline int previousValidChar(const WORD
*pcls
, int index
, int back_fence
)
443 if (index
== -1 || index
== back_fence
) return index
;
445 while (index
> back_fence
&& pcls
[index
] == BN
) index
--;
449 static inline int nextValidChar(const WORD
*pcls
, int index
, int front_fence
)
451 if (index
== front_fence
) return index
;
453 while (index
< front_fence
&& pcls
[index
] == BN
) index
++;
457 typedef struct tagRun
464 typedef struct tagRunChar
470 typedef struct tagIsolatedRun
481 static inline int iso_nextValidChar(IsolatedRun
*iso_run
, int index
)
483 if (index
>= (iso_run
->length
-1)) return -1;
485 while (index
< iso_run
->length
&& *iso_run
->item
[index
].pcls
== BN
) index
++;
486 if (index
== iso_run
->length
) return -1;
490 static inline int iso_previousValidChar(IsolatedRun
*iso_run
, int index
)
493 if (index
<= 0) return -1;
495 while (index
> -1 && *iso_run
->item
[index
].pcls
== BN
) index
--;
499 static inline int iso_previousChar(IsolatedRun
*iso_run
, int index
)
501 if (index
<= 0) return -1;
505 static inline void iso_dump_types(const char* header
, IsolatedRun
*iso_run
)
510 for (i
= 0; i
< iso_run
->length
&& len
< 200; i
++)
512 TRACE(" %s",debug_type
[*iso_run
->item
[i
].pcls
]);
513 len
+= strlen(debug_type
[*iso_run
->item
[i
].pcls
])+1;
515 if (i
!= iso_run
->length
)
520 /*------------------------------------------------------------------------
521 Function: resolveWeak
523 Resolves the directionality of numeric and other weak character types
525 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
527 Input: Array of embedding levels
530 In/Out: Array of directional classes
532 Note: On input only these directional classes are expected
533 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
534 ------------------------------------------------------------------------*/
536 static void resolveWeak(IsolatedRun
* iso_run
)
541 for (i
=0; i
< iso_run
->length
; i
++)
543 if (*iso_run
->item
[i
].pcls
== NSM
)
545 int j
= iso_previousValidChar(iso_run
, i
);
547 *iso_run
->item
[i
].pcls
= iso_run
->sos
;
548 else if (*iso_run
->item
[j
].pcls
>= LRI
)
549 *iso_run
->item
[i
].pcls
= ON
;
551 *iso_run
->item
[i
].pcls
= *iso_run
->item
[j
].pcls
;
556 for (i
= 0; i
< iso_run
->length
; i
++)
558 if (*iso_run
->item
[i
].pcls
== EN
)
560 int j
= iso_previousValidChar(iso_run
, i
);
563 if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== L
|| *iso_run
->item
[j
].pcls
== AL
)
565 if (*iso_run
->item
[j
].pcls
== AL
)
566 *iso_run
->item
[i
].pcls
= AN
;
569 j
= iso_previousValidChar(iso_run
, j
);
575 for (i
= 0; i
< iso_run
->length
; i
++)
577 if (*iso_run
->item
[i
].pcls
== AL
)
578 *iso_run
->item
[i
].pcls
= R
;
582 for (i
= 0; i
< iso_run
->length
; i
++)
584 if (*iso_run
->item
[i
].pcls
== ES
)
586 int b
= iso_previousValidChar(iso_run
, i
);
587 int f
= iso_nextValidChar(iso_run
, i
);
589 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== EN
&& *iso_run
->item
[f
].pcls
== EN
)
590 *iso_run
->item
[i
].pcls
= EN
;
592 else if (*iso_run
->item
[i
].pcls
== CS
)
594 int b
= iso_previousValidChar(iso_run
, i
);
595 int f
= iso_nextValidChar(iso_run
, i
);
597 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== EN
&& *iso_run
->item
[f
].pcls
== EN
)
598 *iso_run
->item
[i
].pcls
= EN
;
599 else if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== AN
&& *iso_run
->item
[f
].pcls
== AN
)
600 *iso_run
->item
[i
].pcls
= AN
;
605 for (i
= 0; i
< iso_run
->length
; i
++)
607 if (*iso_run
->item
[i
].pcls
== ET
)
610 for (j
= i
-1 ; j
> -1; j
--)
612 if (*iso_run
->item
[j
].pcls
== BN
) continue;
613 if (*iso_run
->item
[j
].pcls
== ET
) continue;
614 else if (*iso_run
->item
[j
].pcls
== EN
) *iso_run
->item
[i
].pcls
= EN
;
617 if (*iso_run
->item
[i
].pcls
== ET
)
619 for (j
= i
+1; j
< iso_run
->length
; j
++)
621 if (*iso_run
->item
[j
].pcls
== BN
) continue;
622 if (*iso_run
->item
[j
].pcls
== ET
) continue;
623 else if (*iso_run
->item
[j
].pcls
== EN
) *iso_run
->item
[i
].pcls
= EN
;
631 for (i
= 0; i
< iso_run
->length
; i
++)
633 if (*iso_run
->item
[i
].pcls
== ET
|| *iso_run
->item
[i
].pcls
== ES
|| *iso_run
->item
[i
].pcls
== CS
|| *iso_run
->item
[i
].pcls
== ON
)
637 if (b
> -1 && *iso_run
->item
[b
].pcls
== BN
)
638 *iso_run
->item
[b
].pcls
= ON
;
639 if (f
< iso_run
->length
&& *iso_run
->item
[f
].pcls
== BN
)
640 *iso_run
->item
[f
].pcls
= ON
;
642 *iso_run
->item
[i
].pcls
= ON
;
647 for (i
= 0; i
< iso_run
->length
; i
++)
649 if (*iso_run
->item
[i
].pcls
== EN
)
652 for (j
= iso_previousValidChar(iso_run
, i
); j
> -1; j
= iso_previousValidChar(iso_run
, j
))
653 if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== L
)
655 if (*iso_run
->item
[j
].pcls
== L
)
656 *iso_run
->item
[i
].pcls
= L
;
659 if (iso_run
->sos
== L
&& j
== -1)
660 *iso_run
->item
[i
].pcls
= L
;
665 typedef struct tagBracketPair
671 static int compr(const void *a
, const void* b
)
673 return ((BracketPair
*)a
)->start
- ((BracketPair
*)b
)->start
;
676 static BracketPair
*computeBracketPairs(IsolatedRun
*iso_run
)
680 int stack_top
= iso_run
->length
;
681 BracketPair
*out
= NULL
;
685 open_stack
= HeapAlloc(GetProcessHeap(), 0, sizeof(WCHAR
) * iso_run
->length
);
686 stack_index
= HeapAlloc(GetProcessHeap(), 0, sizeof(int) * iso_run
->length
);
688 for (i
= 0; i
< iso_run
->length
; i
++)
690 unsigned short ubv
= get_table_entry(bidi_bracket_table
, iso_run
->item
[i
].ch
);
695 out
= HeapAlloc(GetProcessHeap(),0,sizeof(BracketPair
));
702 open_stack
[stack_top
] = iso_run
->item
[i
].ch
+ (signed char)(ubv
& 0xff);
703 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
704 if (open_stack
[stack_top
] == 0x232A)
705 open_stack
[stack_top
] = 0x3009;
706 stack_index
[stack_top
] = i
;
708 else if ((ubv
>> 8) == 1)
711 if (stack_top
== iso_run
->length
) continue;
712 for (j
= stack_top
; j
< iso_run
->length
; j
++)
714 WCHAR c
= iso_run
->item
[i
].ch
;
715 if (c
== 0x232A) c
= 0x3009;
716 if (c
== open_stack
[j
])
718 out
[pair_count
].start
= stack_index
[j
];
719 out
[pair_count
].end
= i
;
721 out
= HeapReAlloc(GetProcessHeap(), 0, out
, sizeof(BracketPair
) * (pair_count
+1));
722 out
[pair_count
].start
= -1;
732 HeapFree(GetProcessHeap(),0,out
);
735 else if (pair_count
> 1)
736 qsort(out
, pair_count
, sizeof(BracketPair
), compr
);
738 HeapFree(GetProcessHeap(), 0, open_stack
);
739 HeapFree(GetProcessHeap(), 0, stack_index
);
743 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
745 /*------------------------------------------------------------------------
746 Function: resolveNeutrals
748 Resolves the directionality of neutral character types.
750 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
752 Input: Array of embedding levels
756 In/Out: Array of directional classes
758 Note: On input only these directional classes are expected
759 R, L, NI, AN, EN and BN
761 W8 resolves a number of ENs to L
762 ------------------------------------------------------------------------*/
763 static void resolveNeutrals(IsolatedRun
*iso_run
)
766 BracketPair
*pairs
= NULL
;
768 /* Translate isolates into NI */
769 for (i
= 0; i
< iso_run
->length
; i
++)
771 if (*iso_run
->item
[i
].pcls
>= LRI
)
772 *iso_run
->item
[i
].pcls
= NI
;
774 switch(*iso_run
->item
[i
].pcls
)
778 case WS
: *iso_run
->item
[i
].pcls
= NI
;
781 ASSERT(*iso_run
->item
[i
].pcls
< 5 || *iso_run
->item
[i
].pcls
== BN
); /* "Only NI, L, R, AN, EN and BN are allowed" */
784 /* N0: Skipping bracketed pairs for now */
785 pairs
= computeBracketPairs(iso_run
);
788 BracketPair
*p
= &pairs
[0];
790 while (p
->start
>= 0)
793 int e
= EmbeddingDirection(iso_run
->e
);
794 int o
= EmbeddingDirection(iso_run
->e
+1);
796 TRACE("Bracket Pair [%i - %i]\n",p
->start
, p
->end
);
799 for (j
= p
->start
+1; j
< p
->end
; j
++)
801 if (N0_TYPE(*iso_run
->item
[j
].pcls
) == e
)
803 *iso_run
->item
[p
->start
].pcls
= e
;
804 *iso_run
->item
[p
->end
].pcls
= e
;
807 else if (N0_TYPE(*iso_run
->item
[j
].pcls
) == o
)
811 if (j
== p
->end
&& flag_o
)
813 for (j
= p
->start
; j
>= 0; j
--)
815 if (N0_TYPE(*iso_run
->item
[j
].pcls
) == o
)
817 *iso_run
->item
[p
->start
].pcls
= o
;
818 *iso_run
->item
[p
->end
].pcls
= o
;
821 else if (N0_TYPE(*iso_run
->item
[j
].pcls
) == e
)
823 *iso_run
->item
[p
->start
].pcls
= e
;
824 *iso_run
->item
[p
->end
].pcls
= e
;
830 *iso_run
->item
[p
->start
].pcls
= iso_run
->sos
;
831 *iso_run
->item
[p
->end
].pcls
= iso_run
->sos
;
838 HeapFree(GetProcessHeap(),0,pairs
);
842 for (i
= 0; i
< iso_run
->length
; i
++)
846 if (*iso_run
->item
[i
].pcls
== NI
)
849 int b
= iso_previousValidChar(iso_run
, i
);
858 if (*iso_run
->item
[b
].pcls
== R
|| *iso_run
->item
[b
].pcls
== AN
|| *iso_run
->item
[b
].pcls
== EN
)
860 else if (*iso_run
->item
[b
].pcls
== L
)
862 else /* No string type */
865 j
= iso_nextValidChar(iso_run
, i
);
866 while (j
> -1 && *iso_run
->item
[j
].pcls
== NI
) j
= iso_nextValidChar(iso_run
, j
);
873 else if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== AN
|| *iso_run
->item
[j
].pcls
== EN
)
875 else if (*iso_run
->item
[j
].pcls
== L
)
877 else /* No string type */
882 for (b
= i
; b
< j
&& b
< iso_run
->length
; b
++)
883 *iso_run
->item
[b
].pcls
= r
;
889 for (i
= 0; i
< iso_run
->length
; i
++)
891 if (*iso_run
->item
[i
].pcls
== NI
)
895 *iso_run
->item
[i
].pcls
= EmbeddingDirection(iso_run
->e
);
896 if (b
> -1 && *iso_run
->item
[b
].pcls
== BN
)
897 *iso_run
->item
[b
].pcls
= EmbeddingDirection(iso_run
->e
);
898 if (f
< iso_run
->length
&& *iso_run
->item
[f
].pcls
== BN
)
899 *iso_run
->item
[f
].pcls
= EmbeddingDirection(iso_run
->e
);
904 /*------------------------------------------------------------------------
905 Function: resolveImplicit
907 Recursively resolves implicit embedding levels.
908 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
910 Input: Array of direction classes
914 In/Out: Array of embedding levels
916 Note: levels may exceed 15 on output.
917 Accepted subset of direction classes
919 ------------------------------------------------------------------------*/
920 static void resolveImplicit(const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
925 for (i
= sos
; i
<= eos
; i
++)
930 ASSERT(pcls
[i
] > 0); /* "No Neutrals allowed to survive here." */
931 ASSERT(pcls
[i
] < 5); /* "Out of range." */
933 if (odd(plevel
[i
]) && (pcls
[i
] == L
|| pcls
[i
] == EN
|| pcls
[i
] == AN
))
935 else if (!odd(plevel
[i
]) && pcls
[i
] == R
)
937 else if (!odd(plevel
[i
]) && (pcls
[i
] == EN
|| pcls
[i
] == AN
))
942 static void resolveResolved(unsigned baselevel
, const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
947 for (i
= sos
; i
<= eos
; i
++)
949 if (pcls
[i
] == B
|| pcls
[i
] == S
)
952 while (i
> sos
&& j
>= sos
&&
953 (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
954 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
955 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
956 plevel
[j
--] = baselevel
;
957 plevel
[i
] = baselevel
;
960 (pcls
[i
] == WS
|| pcls
[i
] == FSI
|| pcls
[i
] == LRI
|| pcls
[i
] == RLI
||
961 pcls
[i
] == PDI
|| pcls
[i
] == LRE
|| pcls
[i
] == RLE
|| pcls
[i
] == LRO
||
962 pcls
[i
] == RLO
|| pcls
[i
] == PDF
|| pcls
[i
] == BN
))
965 while (j
>= sos
&& (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
966 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
967 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
968 plevel
[j
--] = baselevel
;
973 static void computeIsolatingRunsSet(unsigned baselevel
, WORD
*pcls
, WORD
*pLevel
, LPCWSTR lpString
, int uCount
, struct list
*set
)
975 int run_start
, run_end
, i
;
978 IsolatedRun
*current_isolated
;
980 runs
= HeapAlloc(GetProcessHeap(), 0, uCount
* sizeof(Run
));
987 while (run_start
< uCount
)
989 run_end
= nextValidChar(pcls
, run_start
, uCount
);
990 while (run_end
< uCount
&& pLevel
[run_end
] == pLevel
[run_start
]) run_end
= nextValidChar(pcls
, run_end
, uCount
);
992 runs
[run_count
].start
= run_start
;
993 runs
[run_count
].end
= run_end
;
994 runs
[run_count
].e
= pLevel
[run_start
];
995 run_start
= nextValidChar(pcls
, run_end
, uCount
);
999 /* Build Isolating Runs */
1001 while (i
< run_count
)
1004 if (runs
[k
].start
>= 0)
1006 int type_fence
, real_end
;
1008 current_isolated
= HeapAlloc(GetProcessHeap(), 0, sizeof(IsolatedRun
) + sizeof(RunChar
)*uCount
);
1009 if (!current_isolated
) break;
1011 run_start
= runs
[k
].start
;
1012 current_isolated
->e
= runs
[k
].e
;
1013 current_isolated
->length
= (runs
[k
].end
- runs
[k
].start
)+1;
1015 for (j
= 0; j
< current_isolated
->length
; j
++)
1017 current_isolated
->item
[j
].pcls
= &pcls
[runs
[k
].start
+j
];
1018 current_isolated
->item
[j
].ch
= lpString
[runs
[k
].start
+j
];
1021 run_end
= runs
[k
].end
;
1023 TRACE("{ [%i -- %i]",run_start
, run_end
);
1025 if (pcls
[run_end
] == BN
)
1026 run_end
= previousValidChar(pcls
, run_end
, runs
[k
].start
);
1028 while (run_end
< uCount
&& (pcls
[run_end
] == RLI
|| pcls
[run_end
] == LRI
|| pcls
[run_end
] == FSI
))
1032 while (j
< run_count
&& pcls
[runs
[j
].start
] != PDI
) j
++;
1033 if (j
< run_count
&& runs
[i
].e
!= runs
[j
].e
)
1042 int l
= current_isolated
->length
;
1044 current_isolated
->length
+= (runs
[j
].end
- runs
[j
].start
)+1;
1045 for (m
= 0; l
< current_isolated
->length
; l
++, m
++)
1047 current_isolated
->item
[l
].pcls
= &pcls
[runs
[j
].start
+m
];
1048 current_isolated
->item
[l
].ch
= lpString
[runs
[j
].start
+m
];
1051 TRACE("[%i -- %i]",runs
[j
].start
, runs
[j
].end
);
1053 run_end
= runs
[j
].end
;
1054 if (pcls
[run_end
] == BN
)
1055 run_end
= previousValidChar(pcls
, run_end
, runs
[i
].start
);
1066 type_fence
= previousValidChar(pcls
, run_start
, -1);
1068 if (type_fence
== -1)
1069 current_isolated
->sos
= (baselevel
> pLevel
[run_start
])?baselevel
:pLevel
[run_start
];
1071 current_isolated
->sos
= (pLevel
[type_fence
] > pLevel
[run_start
])?pLevel
[type_fence
]:pLevel
[run_start
];
1073 current_isolated
->sos
= EmbeddingDirection(current_isolated
->sos
);
1075 if (run_end
== uCount
)
1076 current_isolated
->eos
= current_isolated
->sos
;
1079 /* eos could be an BN */
1080 if ( pcls
[run_end
] == BN
)
1082 real_end
= previousValidChar(pcls
, run_end
, run_start
-1);
1083 if (real_end
< run_start
)
1084 real_end
= run_start
;
1089 type_fence
= nextValidChar(pcls
, run_end
, uCount
);
1090 if (type_fence
== uCount
)
1091 current_isolated
->eos
= (baselevel
> pLevel
[real_end
])?baselevel
:pLevel
[real_end
];
1093 current_isolated
->eos
= (pLevel
[type_fence
] > pLevel
[real_end
])?pLevel
[type_fence
]:pLevel
[real_end
];
1095 current_isolated
->eos
= EmbeddingDirection(current_isolated
->eos
);
1098 list_add_tail(set
, ¤t_isolated
->entry
);
1099 TRACE(" } level %i {%s <--> %s}\n",current_isolated
->e
, debug_type
[current_isolated
->sos
], debug_type
[current_isolated
->eos
]);
1104 HeapFree(GetProcessHeap(), 0, runs
);
1107 /*************************************************************
1108 * BIDI_DeterminLevels
1110 BOOL
BIDI_DetermineLevels(
1111 LPCWSTR lpString
, /* [in] The string for which information is to be returned */
1112 INT uCount
, /* [in] Number of WCHARs in string. */
1113 const SCRIPT_STATE
*s
,
1114 const SCRIPT_CONTROL
*c
,
1115 WORD
*lpOutLevels
/* [out] final string levels */
1119 unsigned baselevel
= 0;
1120 struct list IsolatingRuns
;
1121 IsolatedRun
*iso_run
, *next
;
1123 TRACE("%s, %d\n", debugstr_wn(lpString
, uCount
), uCount
);
1125 chartype
= HeapAlloc(GetProcessHeap(), 0, uCount
* sizeof(WORD
));
1128 WARN("Out of memory\n");
1132 baselevel
= s
->uBidiLevel
;
1134 classify(lpString
, chartype
, uCount
, c
);
1135 if (TRACE_ON(bidi
)) dump_types("Start ", chartype
, 0, uCount
);
1137 /* resolve explicit */
1138 resolveExplicit(baselevel
, chartype
, lpOutLevels
, uCount
);
1139 if (TRACE_ON(bidi
)) dump_types("After Explicit", chartype
, 0, uCount
);
1141 /* X10/BD13: Computer Isolating runs */
1142 computeIsolatingRunsSet(baselevel
, chartype
, lpOutLevels
, lpString
, uCount
, &IsolatingRuns
);
1144 LIST_FOR_EACH_ENTRY_SAFE(iso_run
, next
, &IsolatingRuns
, IsolatedRun
, entry
)
1146 if (TRACE_ON(bidi
)) iso_dump_types("Run", iso_run
);
1149 resolveWeak(iso_run
);
1150 if (TRACE_ON(bidi
)) iso_dump_types("After Weak", iso_run
);
1152 /* resolve neutrals */
1153 resolveNeutrals(iso_run
);
1154 if (TRACE_ON(bidi
)) iso_dump_types("After Neutrals", iso_run
);
1156 list_remove(&iso_run
->entry
);
1157 HeapFree(GetProcessHeap(),0,iso_run
);
1160 if (TRACE_ON(bidi
)) dump_types("Before Implicit", chartype
, 0, uCount
);
1161 /* resolveImplicit */
1162 resolveImplicit(chartype
, lpOutLevels
, 0, uCount
-1);
1164 /* resolveResolvedLevels*/
1165 classify(lpString
, chartype
, uCount
, c
);
1166 resolveResolved(baselevel
, chartype
, lpOutLevels
, 0, uCount
-1);
1168 HeapFree(GetProcessHeap(), 0, chartype
);
1172 /* reverse cch indexes */
1173 static void reverse(int *pidx
, int cch
)
1177 for (; ich
< --cch
; ich
++)
1180 pidx
[ich
] = pidx
[cch
];
1186 /*------------------------------------------------------------------------
1187 Functions: reorder/reorderLevel
1189 Recursively reorders the display string
1190 "From the highest level down, reverse all characters at that level and
1191 higher, down to the lowest odd level"
1193 Implements rule L2 of the Unicode bidi Algorithm.
1195 Input: Array of embedding levels
1197 Flag enabling reversal (set to false by initial caller)
1199 In/Out: Text to reorder
1201 Note: levels may exceed 15 resp. 61 on input.
1203 Rule L3 - reorder combining marks is not implemented here
1204 Rule L4 - glyph mirroring is implemented as a display option below
1206 Note: this should be applied a line at a time
1207 -------------------------------------------------------------------------*/
1208 int BIDI_ReorderV2lLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1212 /* true as soon as first odd level encountered */
1213 fReverse
= fReverse
|| odd(level
);
1215 for (; ich
< cch
; ich
++)
1217 if (plevel
[ich
] < level
)
1221 else if (plevel
[ich
] > level
)
1223 ich
+= BIDI_ReorderV2lLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1224 cch
- ich
, fReverse
) - 1;
1229 reverse(pIndexs
, ich
);
1234 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1235 int BIDI_ReorderL2vLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1240 /* true as soon as first odd level encountered */
1241 fReverse
= fReverse
|| odd(level
);
1243 for (; ich
< cch
; ich
++)
1245 if (plevel
[ich
] < level
)
1247 else if (plevel
[ich
] > level
)
1252 reverse(pIndexs
, ich
);
1258 for (; ich
< cch
; ich
++)
1259 if (plevel
[ich
] < level
)
1261 else if (plevel
[ich
] > level
)
1262 ich
+= BIDI_ReorderL2vLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1263 cch
- ich
, fReverse
) - 1;
1269 BOOL
BIDI_GetStrengths(LPCWSTR lpString
, INT uCount
, const SCRIPT_CONTROL
*c
,
1273 classify(lpString
, lpStrength
, uCount
, c
);
1275 for ( i
= 0; i
< uCount
; i
++)
1277 switch(lpStrength
[i
])
1286 lpStrength
[i
] = BIDI_STRONG
;
1295 lpStrength
[i
] = BIDI_WEAK
;
1301 default: /* Neutrals and NSM */
1302 lpStrength
[i
] = BIDI_NEUTRAL
;