2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
46 #include <wine/list.h>
48 #include "usp10_internal.h"
50 extern const unsigned short bidi_bracket_table
[];
52 WINE_DEFAULT_DEBUG_CHANNEL(bidi
);
54 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
57 /* HELPER FUNCTIONS AND DECLARATIONS */
59 /*------------------------------------------------------------------------
60 Bidirectional Character Types
62 as defined by the Unicode Bidirectional Algorithm Table 3-7.
66 The list of bidirectional character types here is not grouped the
67 same way as the table 3-7, since the numberic values for the types
68 are chosen to keep the state and action tables compact.
69 ------------------------------------------------------------------------*/
73 /* ON MUST be zero, code relies on ON = NI = 0 */
74 ON
= 0, /* Other Neutral */
77 AN
, /* Arabic Number */
78 EN
, /* European Number */
79 AL
, /* Arabic Letter (Right-to-left) */
80 NSM
, /* Non-spacing Mark */
81 CS
, /* Common Separator */
82 ES
, /* European Separator */
83 ET
, /* European Terminator (post/prefix e.g. $ and %) */
86 BN
, /* Boundary neutral (type of RLE etc after explicit levels) */
89 S
, /* Segment Separator (TAB) // used only in L1 */
90 WS
, /* White space // used only in L1 */
91 B
, /* Paragraph Separator (aka as PS) */
93 /* types for explicit controls */
94 RLO
, /* these are used only in X1-X9 */
100 LRI
, /* Isolate formatting characters new with 6.3 */
105 /* resolved types, also resolved directions */
106 NI
= ON
, /* alias, where ON, WS, S and Isolates are treated the same */
109 static const char debug_type
[][4] =
111 "ON", /* Other Neutral */
112 "L", /* Left Letter */
113 "R", /* Right Letter */
114 "AN", /* Arabic Number */
115 "EN", /* European Number */
116 "AL", /* Arabic Letter (Right-to-left) */
117 "NSM", /* Non-spacing Mark */
118 "CS", /* Common Separator */
119 "ES", /* European Separator */
120 "ET", /* European Terminator (post/prefix e.g. $ and %) */
121 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
122 "S", /* Segment Separator (TAB) // used only in L1 */
123 "WS", /* White space // used only in L1 */
124 "B", /* Paragraph Separator (aka as PS) */
125 "RLO", /* these are used only in X1-X9 */
130 "LRI", /* Isolate formatting characters new with 6.3 */
136 /* HELPER FUNCTIONS */
138 static inline void dump_types(const char* header
, WORD
*types
, int start
, int end
)
142 for (i
= start
; i
< end
&& len
< 200; i
++)
144 TRACE(" %s",debug_type
[types
[i
]]);
145 len
+= strlen(debug_type
[types
[i
]])+1;
152 /* Convert the libwine information to the direction enum */
153 static void classify(LPCWSTR lpString
, WORD
*chartype
, DWORD uCount
, const SCRIPT_CONTROL
*c
)
155 static const enum directions dir_map
[16] =
157 L
, /* unassigned defaults to L */
172 PDF
/* also LRE, LRO, RLE, RLO */
177 for (i
= 0; i
< uCount
; ++i
)
179 chartype
[i
] = dir_map
[get_char_typeW(lpString
[i
]) >> 12];
183 if (!c
->fLegacyBidiClass
) break;
187 case '+': chartype
[i
] = NI
; break;
188 case '/': chartype
[i
] = CS
; break;
194 case 0x202A: chartype
[i
] = LRE
; break;
195 case 0x202B: chartype
[i
] = RLE
; break;
196 case 0x202C: chartype
[i
] = PDF
; break;
197 case 0x202D: chartype
[i
] = LRO
; break;
198 case 0x202E: chartype
[i
] = RLO
; break;
199 case 0x2066: chartype
[i
] = LRI
; break;
200 case 0x2067: chartype
[i
] = RLI
; break;
201 case 0x2068: chartype
[i
] = FSI
; break;
202 case 0x2069: chartype
[i
] = PDI
; break;
209 /* RESOLVE EXPLICIT */
211 static WORD
GreaterEven(int i
)
213 return odd(i
) ? i
+ 1 : i
+ 2;
216 static WORD
GreaterOdd(int i
)
218 return odd(i
) ? i
+ 2 : i
+ 1;
221 static WORD
EmbeddingDirection(int level
)
223 return odd(level
) ? R
: L
;
226 /*------------------------------------------------------------------------
227 Function: resolveExplicit
229 Recursively resolves explicit embedding levels and overrides.
230 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
232 Input: Base embedding level and direction
235 Output: Array of embedding levels
237 In/Out: Array of direction classes
240 Note: The function uses two simple counters to keep track of
241 matching explicit codes and PDF. Use the default argument for
242 the outermost call. The nesting counter counts the recursion
243 depth and not the embedding level.
244 ------------------------------------------------------------------------*/
245 typedef struct tagStackItem
{
251 #define push_stack(l,o,i) \
253 stack[stack_top].level = l; \
254 stack[stack_top].override = o; \
255 stack[stack_top].isolate = i;} while(0)
257 #define pop_stack() do { stack_top++; } while(0)
259 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
261 static void resolveExplicit(int level
, WORD
*pclass
, WORD
*poutLevel
, int count
)
264 int overflow_isolate_count
= 0;
265 int overflow_embedding_count
= 0;
266 int valid_isolate_count
= 0;
269 StackItem stack
[MAX_DEPTH
+2];
270 int stack_top
= MAX_DEPTH
+1;
272 stack
[stack_top
].level
= level
;
273 stack
[stack_top
].override
= NI
;
274 stack
[stack_top
].isolate
= FALSE
;
276 for (i
= 0; i
< count
; i
++)
279 if (pclass
[i
] == RLE
)
281 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
282 poutLevel
[i
] = stack
[stack_top
].level
;
283 if (valid_level(least_odd
))
284 push_stack(least_odd
, NI
, FALSE
);
285 else if (overflow_isolate_count
== 0)
286 overflow_embedding_count
++;
289 else if (pclass
[i
] == LRE
)
291 int least_even
= GreaterEven(stack
[stack_top
].level
);
292 poutLevel
[i
] = stack
[stack_top
].level
;
293 if (valid_level(least_even
))
294 push_stack(least_even
, NI
, FALSE
);
295 else if (overflow_isolate_count
== 0)
296 overflow_embedding_count
++;
299 else if (pclass
[i
] == RLO
)
301 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
302 poutLevel
[i
] = stack
[stack_top
].level
;
303 if (valid_level(least_odd
))
304 push_stack(least_odd
, R
, FALSE
);
305 else if (overflow_isolate_count
== 0)
306 overflow_embedding_count
++;
309 else if (pclass
[i
] == LRO
)
311 int least_even
= GreaterEven(stack
[stack_top
].level
);
312 poutLevel
[i
] = stack
[stack_top
].level
;
313 if (valid_level(least_even
))
314 push_stack(least_even
, L
, FALSE
);
315 else if (overflow_isolate_count
== 0)
316 overflow_embedding_count
++;
319 else if (pclass
[i
] == RLI
)
321 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
322 poutLevel
[i
] = stack
[stack_top
].level
;
323 if (valid_level(least_odd
))
325 valid_isolate_count
++;
326 push_stack(least_odd
, NI
, TRUE
);
329 overflow_isolate_count
++;
332 else if (pclass
[i
] == LRI
)
334 int least_even
= GreaterEven(stack
[stack_top
].level
);
335 poutLevel
[i
] = stack
[stack_top
].level
;
336 if (valid_level(least_even
))
338 valid_isolate_count
++;
339 push_stack(least_even
, NI
, TRUE
);
342 overflow_isolate_count
++;
345 else if (pclass
[i
] == FSI
)
350 poutLevel
[i
] = stack
[stack_top
].level
;
351 for (j
= i
+1; j
< count
; j
++)
353 if (pclass
[j
] == LRI
|| pclass
[j
] == RLI
|| pclass
[j
] == FSI
)
358 else if (pclass
[j
] == PDI
)
367 if (skipping
) continue;
374 else if (pclass
[j
] == R
|| pclass
[j
] == AL
)
382 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
383 if (valid_level(least_odd
))
385 valid_isolate_count
++;
386 push_stack(least_odd
, NI
, TRUE
);
389 overflow_isolate_count
++;
393 int least_even
= GreaterEven(stack
[stack_top
].level
);
394 if (valid_level(least_even
))
396 valid_isolate_count
++;
397 push_stack(least_even
, NI
, TRUE
);
400 overflow_isolate_count
++;
404 else if (pclass
[i
] != B
&& pclass
[i
] != BN
&& pclass
[i
] != PDI
&& pclass
[i
] != PDF
)
406 poutLevel
[i
] = stack
[stack_top
].level
;
407 if (stack
[stack_top
].override
!= NI
)
408 pclass
[i
] = stack
[stack_top
].override
;
411 else if (pclass
[i
] == PDI
)
413 if (overflow_isolate_count
) overflow_isolate_count
--;
414 else if (!valid_isolate_count
) {/* do nothing */}
417 overflow_embedding_count
= 0;
418 while (!stack
[stack_top
].isolate
) pop_stack();
420 valid_isolate_count
--;
422 poutLevel
[i
] = stack
[stack_top
].level
;
425 else if (pclass
[i
] == PDF
)
427 poutLevel
[i
] = stack
[stack_top
].level
;
428 if (overflow_isolate_count
) {/* do nothing */}
429 else if (overflow_embedding_count
) overflow_embedding_count
--;
430 else if (!stack
[stack_top
].isolate
&& stack_top
< (MAX_DEPTH
+1))
435 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
436 for (i
= 0; i
< count
; i
++)
437 if (pclass
[i
] == RLE
|| pclass
[i
] == LRE
|| pclass
[i
] == RLO
|| pclass
[i
] == LRO
|| pclass
[i
] == PDF
)
441 static inline int previousValidChar(const WORD
*pcls
, int index
, int back_fence
)
443 if (index
== -1 || index
== back_fence
) return index
;
445 while (index
> back_fence
&& pcls
[index
] == BN
) index
--;
449 static inline int nextValidChar(const WORD
*pcls
, int index
, int front_fence
)
451 if (index
== front_fence
) return index
;
453 while (index
< front_fence
&& pcls
[index
] == BN
) index
++;
457 typedef struct tagRun
464 typedef struct tagRunChar
470 typedef struct tagIsolatedRun
481 static inline int iso_nextValidChar(IsolatedRun
*iso_run
, int index
)
483 if (index
>= (iso_run
->length
-1)) return -1;
485 while (index
< iso_run
->length
&& *iso_run
->item
[index
].pcls
== BN
) index
++;
486 if (index
== iso_run
->length
) return -1;
490 static inline int iso_previousValidChar(IsolatedRun
*iso_run
, int index
)
493 if (index
<= 0) return -1;
495 while (index
> -1 && *iso_run
->item
[index
].pcls
== BN
) index
--;
499 static inline void iso_dump_types(const char* header
, IsolatedRun
*iso_run
)
504 for (i
= 0; i
< iso_run
->length
&& len
< 200; i
++)
506 TRACE(" %s",debug_type
[*iso_run
->item
[i
].pcls
]);
507 len
+= strlen(debug_type
[*iso_run
->item
[i
].pcls
])+1;
509 if (i
!= iso_run
->length
)
514 /*------------------------------------------------------------------------
515 Function: resolveWeak
517 Resolves the directionality of numeric and other weak character types
519 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
521 Input: Array of embedding levels
524 In/Out: Array of directional classes
526 Note: On input only these directional classes are expected
527 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
528 ------------------------------------------------------------------------*/
530 static void resolveWeak(IsolatedRun
* iso_run
)
535 for (i
=0; i
< iso_run
->length
; i
++)
537 if (*iso_run
->item
[i
].pcls
== NSM
)
539 int j
= iso_previousValidChar(iso_run
, i
);
541 *iso_run
->item
[i
].pcls
= iso_run
->sos
;
542 else if (*iso_run
->item
[j
].pcls
>= LRI
)
543 *iso_run
->item
[i
].pcls
= ON
;
545 *iso_run
->item
[i
].pcls
= *iso_run
->item
[j
].pcls
;
550 for (i
= 0; i
< iso_run
->length
; i
++)
552 if (*iso_run
->item
[i
].pcls
== EN
)
554 int j
= iso_previousValidChar(iso_run
, i
);
557 if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== L
|| *iso_run
->item
[j
].pcls
== AL
)
559 if (*iso_run
->item
[j
].pcls
== AL
)
560 *iso_run
->item
[i
].pcls
= AN
;
563 j
= iso_previousValidChar(iso_run
, j
);
569 for (i
= 0; i
< iso_run
->length
; i
++)
571 if (*iso_run
->item
[i
].pcls
== AL
)
572 *iso_run
->item
[i
].pcls
= R
;
576 for (i
= 0; i
< iso_run
->length
; i
++)
578 if (*iso_run
->item
[i
].pcls
== ES
)
580 int b
= iso_previousValidChar(iso_run
, i
);
581 int f
= iso_nextValidChar(iso_run
, i
);
583 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== EN
&& *iso_run
->item
[f
].pcls
== EN
)
584 *iso_run
->item
[i
].pcls
= EN
;
586 else if (*iso_run
->item
[i
].pcls
== CS
)
588 int b
= iso_previousValidChar(iso_run
, i
);
589 int f
= iso_nextValidChar(iso_run
, i
);
591 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== EN
&& *iso_run
->item
[f
].pcls
== EN
)
592 *iso_run
->item
[i
].pcls
= EN
;
593 else if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== AN
&& *iso_run
->item
[f
].pcls
== AN
)
594 *iso_run
->item
[i
].pcls
= AN
;
599 for (i
= 0; i
< iso_run
->length
; i
++)
601 if (*iso_run
->item
[i
].pcls
== ET
)
604 for (j
= i
-1 ; j
> -1; j
--)
606 if (*iso_run
->item
[j
].pcls
== BN
) continue;
607 if (*iso_run
->item
[j
].pcls
== ET
) continue;
608 else if (*iso_run
->item
[j
].pcls
== EN
) *iso_run
->item
[i
].pcls
= EN
;
611 if (*iso_run
->item
[i
].pcls
== ET
)
613 for (j
= i
+1; j
< iso_run
->length
; j
++)
615 if (*iso_run
->item
[j
].pcls
== BN
) continue;
616 if (*iso_run
->item
[j
].pcls
== ET
) continue;
617 else if (*iso_run
->item
[j
].pcls
== EN
) *iso_run
->item
[i
].pcls
= EN
;
625 for (i
= 0; i
< iso_run
->length
; i
++)
627 if (*iso_run
->item
[i
].pcls
== ET
|| *iso_run
->item
[i
].pcls
== ES
|| *iso_run
->item
[i
].pcls
== CS
|| *iso_run
->item
[i
].pcls
== ON
)
631 if (b
> -1 && *iso_run
->item
[b
].pcls
== BN
)
632 *iso_run
->item
[b
].pcls
= ON
;
633 if (f
< iso_run
->length
&& *iso_run
->item
[f
].pcls
== BN
)
634 *iso_run
->item
[f
].pcls
= ON
;
636 *iso_run
->item
[i
].pcls
= ON
;
641 for (i
= 0; i
< iso_run
->length
; i
++)
643 if (*iso_run
->item
[i
].pcls
== EN
)
646 for (j
= iso_previousValidChar(iso_run
, i
); j
> -1; j
= iso_previousValidChar(iso_run
, j
))
647 if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== L
)
649 if (*iso_run
->item
[j
].pcls
== L
)
650 *iso_run
->item
[i
].pcls
= L
;
653 if (iso_run
->sos
== L
&& j
== -1)
654 *iso_run
->item
[i
].pcls
= L
;
659 typedef struct tagBracketPair
665 static int compr(const void *a
, const void* b
)
667 return ((BracketPair
*)a
)->start
- ((BracketPair
*)b
)->start
;
670 static BracketPair
*computeBracketPairs(IsolatedRun
*iso_run
)
674 int stack_top
= iso_run
->length
;
675 BracketPair
*out
= NULL
;
679 open_stack
= HeapAlloc(GetProcessHeap(), 0, sizeof(WCHAR
) * iso_run
->length
);
680 stack_index
= HeapAlloc(GetProcessHeap(), 0, sizeof(int) * iso_run
->length
);
682 for (i
= 0; i
< iso_run
->length
; i
++)
684 unsigned short ubv
= get_table_entry(bidi_bracket_table
, iso_run
->item
[i
].ch
);
689 out
= HeapAlloc(GetProcessHeap(),0,sizeof(BracketPair
));
696 open_stack
[stack_top
] = iso_run
->item
[i
].ch
+ (signed char)(ubv
& 0xff);
697 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
698 if (open_stack
[stack_top
] == 0x232A)
699 open_stack
[stack_top
] = 0x3009;
700 stack_index
[stack_top
] = i
;
702 else if ((ubv
>> 8) == 1)
705 if (stack_top
== iso_run
->length
) continue;
706 for (j
= stack_top
; j
< iso_run
->length
; j
++)
708 WCHAR c
= iso_run
->item
[i
].ch
;
709 if (c
== 0x232A) c
= 0x3009;
710 if (c
== open_stack
[j
])
712 out
[pair_count
].start
= stack_index
[j
];
713 out
[pair_count
].end
= i
;
715 out
= HeapReAlloc(GetProcessHeap(), 0, out
, sizeof(BracketPair
) * (pair_count
+1));
716 out
[pair_count
].start
= -1;
726 HeapFree(GetProcessHeap(),0,out
);
729 else if (pair_count
> 1)
730 qsort(out
, pair_count
, sizeof(BracketPair
), compr
);
732 HeapFree(GetProcessHeap(), 0, open_stack
);
733 HeapFree(GetProcessHeap(), 0, stack_index
);
737 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
739 /*------------------------------------------------------------------------
740 Function: resolveNeutrals
742 Resolves the directionality of neutral character types.
744 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
746 Input: Array of embedding levels
750 In/Out: Array of directional classes
752 Note: On input only these directional classes are expected
753 R, L, NI, AN, EN and BN
755 W8 resolves a number of ENs to L
756 ------------------------------------------------------------------------*/
757 static void resolveNeutrals(IsolatedRun
*iso_run
)
760 BracketPair
*pairs
= NULL
;
762 /* Translate isolates into NI */
763 for (i
= 0; i
< iso_run
->length
; i
++)
765 if (*iso_run
->item
[i
].pcls
>= LRI
)
766 *iso_run
->item
[i
].pcls
= NI
;
768 switch(*iso_run
->item
[i
].pcls
)
772 case WS
: *iso_run
->item
[i
].pcls
= NI
;
775 ASSERT(*iso_run
->item
[i
].pcls
< 5 || *iso_run
->item
[i
].pcls
== BN
); /* "Only NI, L, R, AN, EN and BN are allowed" */
778 /* N0: Skipping bracketed pairs for now */
779 pairs
= computeBracketPairs(iso_run
);
782 BracketPair
*p
= &pairs
[0];
784 while (p
->start
>= 0)
787 int e
= EmbeddingDirection(iso_run
->e
);
788 int o
= EmbeddingDirection(iso_run
->e
+1);
790 TRACE("Bracket Pair [%i - %i]\n",p
->start
, p
->end
);
793 for (j
= p
->start
+1; j
< p
->end
; j
++)
795 if (N0_TYPE(*iso_run
->item
[j
].pcls
) == e
)
797 *iso_run
->item
[p
->start
].pcls
= e
;
798 *iso_run
->item
[p
->end
].pcls
= e
;
801 else if (N0_TYPE(*iso_run
->item
[j
].pcls
) == o
)
805 if (j
== p
->end
&& flag_o
)
807 for (j
= p
->start
; j
>= 0; j
--)
809 if (N0_TYPE(*iso_run
->item
[j
].pcls
) == o
)
811 *iso_run
->item
[p
->start
].pcls
= o
;
812 *iso_run
->item
[p
->end
].pcls
= o
;
815 else if (N0_TYPE(*iso_run
->item
[j
].pcls
) == e
)
817 *iso_run
->item
[p
->start
].pcls
= e
;
818 *iso_run
->item
[p
->end
].pcls
= e
;
824 *iso_run
->item
[p
->start
].pcls
= iso_run
->sos
;
825 *iso_run
->item
[p
->end
].pcls
= iso_run
->sos
;
832 HeapFree(GetProcessHeap(),0,pairs
);
836 for (i
= 0; i
< iso_run
->length
; i
++)
840 if (*iso_run
->item
[i
].pcls
== NI
)
843 int b
= iso_previousValidChar(iso_run
, i
);
852 if (*iso_run
->item
[b
].pcls
== R
|| *iso_run
->item
[b
].pcls
== AN
|| *iso_run
->item
[b
].pcls
== EN
)
854 else if (*iso_run
->item
[b
].pcls
== L
)
856 else /* No string type */
859 j
= iso_nextValidChar(iso_run
, i
);
860 while (j
> -1 && *iso_run
->item
[j
].pcls
== NI
) j
= iso_nextValidChar(iso_run
, j
);
867 else if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== AN
|| *iso_run
->item
[j
].pcls
== EN
)
869 else if (*iso_run
->item
[j
].pcls
== L
)
871 else /* No string type */
876 for (b
= i
; b
< j
&& b
< iso_run
->length
; b
++)
877 *iso_run
->item
[b
].pcls
= r
;
883 for (i
= 0; i
< iso_run
->length
; i
++)
885 if (*iso_run
->item
[i
].pcls
== NI
)
889 *iso_run
->item
[i
].pcls
= EmbeddingDirection(iso_run
->e
);
890 if (b
> -1 && *iso_run
->item
[b
].pcls
== BN
)
891 *iso_run
->item
[b
].pcls
= EmbeddingDirection(iso_run
->e
);
892 if (f
< iso_run
->length
&& *iso_run
->item
[f
].pcls
== BN
)
893 *iso_run
->item
[f
].pcls
= EmbeddingDirection(iso_run
->e
);
898 /*------------------------------------------------------------------------
899 Function: resolveImplicit
901 Recursively resolves implicit embedding levels.
902 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
904 Input: Array of direction classes
908 In/Out: Array of embedding levels
910 Note: levels may exceed 15 on output.
911 Accepted subset of direction classes
913 ------------------------------------------------------------------------*/
914 static void resolveImplicit(const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
919 for (i
= sos
; i
<= eos
; i
++)
924 ASSERT(pcls
[i
] > 0); /* "No Neutrals allowed to survive here." */
925 ASSERT(pcls
[i
] < 5); /* "Out of range." */
927 if (odd(plevel
[i
]) && (pcls
[i
] == L
|| pcls
[i
] == EN
|| pcls
[i
] == AN
))
929 else if (!odd(plevel
[i
]) && pcls
[i
] == R
)
931 else if (!odd(plevel
[i
]) && (pcls
[i
] == EN
|| pcls
[i
] == AN
))
936 static void resolveResolved(unsigned baselevel
, const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
941 for (i
= sos
; i
<= eos
; i
++)
943 if (pcls
[i
] == B
|| pcls
[i
] == S
)
946 while (i
> sos
&& j
>= sos
&&
947 (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
948 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
949 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
950 plevel
[j
--] = baselevel
;
951 plevel
[i
] = baselevel
;
954 (pcls
[i
] == WS
|| pcls
[i
] == FSI
|| pcls
[i
] == LRI
|| pcls
[i
] == RLI
||
955 pcls
[i
] == PDI
|| pcls
[i
] == LRE
|| pcls
[i
] == RLE
|| pcls
[i
] == LRO
||
956 pcls
[i
] == RLO
|| pcls
[i
] == PDF
|| pcls
[i
] == BN
))
959 while (j
>= sos
&& (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
960 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
961 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
962 plevel
[j
--] = baselevel
;
967 static void computeIsolatingRunsSet(unsigned baselevel
, WORD
*pcls
, WORD
*pLevel
, LPCWSTR lpString
, int uCount
, struct list
*set
)
969 int run_start
, run_end
, i
;
972 IsolatedRun
*current_isolated
;
974 runs
= HeapAlloc(GetProcessHeap(), 0, uCount
* sizeof(Run
));
981 while (run_start
< uCount
)
983 run_end
= nextValidChar(pcls
, run_start
, uCount
);
984 while (run_end
< uCount
&& pLevel
[run_end
] == pLevel
[run_start
]) run_end
= nextValidChar(pcls
, run_end
, uCount
);
986 runs
[run_count
].start
= run_start
;
987 runs
[run_count
].end
= run_end
;
988 runs
[run_count
].e
= pLevel
[run_start
];
989 run_start
= nextValidChar(pcls
, run_end
, uCount
);
993 /* Build Isolating Runs */
995 while (i
< run_count
)
998 if (runs
[k
].start
>= 0)
1000 int type_fence
, real_end
;
1002 current_isolated
= HeapAlloc(GetProcessHeap(), 0, sizeof(IsolatedRun
) + sizeof(RunChar
)*uCount
);
1003 if (!current_isolated
) break;
1005 run_start
= runs
[k
].start
;
1006 current_isolated
->e
= runs
[k
].e
;
1007 current_isolated
->length
= (runs
[k
].end
- runs
[k
].start
)+1;
1009 for (j
= 0; j
< current_isolated
->length
; j
++)
1011 current_isolated
->item
[j
].pcls
= &pcls
[runs
[k
].start
+j
];
1012 current_isolated
->item
[j
].ch
= lpString
[runs
[k
].start
+j
];
1015 run_end
= runs
[k
].end
;
1017 TRACE("{ [%i -- %i]",run_start
, run_end
);
1019 if (pcls
[run_end
] == BN
)
1020 run_end
= previousValidChar(pcls
, run_end
, runs
[k
].start
);
1022 while (run_end
< uCount
&& (pcls
[run_end
] == RLI
|| pcls
[run_end
] == LRI
|| pcls
[run_end
] == FSI
))
1026 while (j
< run_count
&& pcls
[runs
[j
].start
] != PDI
) j
++;
1027 if (j
< run_count
&& runs
[i
].e
!= runs
[j
].e
)
1036 int l
= current_isolated
->length
;
1038 current_isolated
->length
+= (runs
[j
].end
- runs
[j
].start
)+1;
1039 for (m
= 0; l
< current_isolated
->length
; l
++, m
++)
1041 current_isolated
->item
[l
].pcls
= &pcls
[runs
[j
].start
+m
];
1042 current_isolated
->item
[l
].ch
= lpString
[runs
[j
].start
+m
];
1045 TRACE("[%i -- %i]",runs
[j
].start
, runs
[j
].end
);
1047 run_end
= runs
[j
].end
;
1048 if (pcls
[run_end
] == BN
)
1049 run_end
= previousValidChar(pcls
, run_end
, runs
[i
].start
);
1060 type_fence
= previousValidChar(pcls
, run_start
, -1);
1062 if (type_fence
== -1)
1063 current_isolated
->sos
= (baselevel
> pLevel
[run_start
])?baselevel
:pLevel
[run_start
];
1065 current_isolated
->sos
= (pLevel
[type_fence
] > pLevel
[run_start
])?pLevel
[type_fence
]:pLevel
[run_start
];
1067 current_isolated
->sos
= EmbeddingDirection(current_isolated
->sos
);
1069 if (run_end
== uCount
)
1070 current_isolated
->eos
= current_isolated
->sos
;
1073 /* eos could be an BN */
1074 if ( pcls
[run_end
] == BN
)
1076 real_end
= previousValidChar(pcls
, run_end
, run_start
-1);
1077 if (real_end
< run_start
)
1078 real_end
= run_start
;
1083 type_fence
= nextValidChar(pcls
, run_end
, uCount
);
1084 if (type_fence
== uCount
)
1085 current_isolated
->eos
= (baselevel
> pLevel
[real_end
])?baselevel
:pLevel
[real_end
];
1087 current_isolated
->eos
= (pLevel
[type_fence
] > pLevel
[real_end
])?pLevel
[type_fence
]:pLevel
[real_end
];
1089 current_isolated
->eos
= EmbeddingDirection(current_isolated
->eos
);
1092 list_add_tail(set
, ¤t_isolated
->entry
);
1093 TRACE(" } level %i {%s <--> %s}\n",current_isolated
->e
, debug_type
[current_isolated
->sos
], debug_type
[current_isolated
->eos
]);
1098 HeapFree(GetProcessHeap(), 0, runs
);
1101 /*************************************************************
1102 * BIDI_DeterminLevels
1104 BOOL
BIDI_DetermineLevels(
1105 LPCWSTR lpString
, /* [in] The string for which information is to be returned */
1106 INT uCount
, /* [in] Number of WCHARs in string. */
1107 const SCRIPT_STATE
*s
,
1108 const SCRIPT_CONTROL
*c
,
1109 WORD
*lpOutLevels
/* [out] final string levels */
1113 unsigned baselevel
= 0;
1114 struct list IsolatingRuns
;
1115 IsolatedRun
*iso_run
, *next
;
1117 TRACE("%s, %d\n", debugstr_wn(lpString
, uCount
), uCount
);
1119 chartype
= HeapAlloc(GetProcessHeap(), 0, uCount
* sizeof(WORD
));
1122 WARN("Out of memory\n");
1126 baselevel
= s
->uBidiLevel
;
1128 classify(lpString
, chartype
, uCount
, c
);
1129 if (TRACE_ON(bidi
)) dump_types("Start ", chartype
, 0, uCount
);
1131 /* resolve explicit */
1132 resolveExplicit(baselevel
, chartype
, lpOutLevels
, uCount
);
1133 if (TRACE_ON(bidi
)) dump_types("After Explicit", chartype
, 0, uCount
);
1135 /* X10/BD13: Computer Isolating runs */
1136 computeIsolatingRunsSet(baselevel
, chartype
, lpOutLevels
, lpString
, uCount
, &IsolatingRuns
);
1138 LIST_FOR_EACH_ENTRY_SAFE(iso_run
, next
, &IsolatingRuns
, IsolatedRun
, entry
)
1140 if (TRACE_ON(bidi
)) iso_dump_types("Run", iso_run
);
1143 resolveWeak(iso_run
);
1144 if (TRACE_ON(bidi
)) iso_dump_types("After Weak", iso_run
);
1146 /* resolve neutrals */
1147 resolveNeutrals(iso_run
);
1148 if (TRACE_ON(bidi
)) iso_dump_types("After Neutrals", iso_run
);
1150 list_remove(&iso_run
->entry
);
1151 HeapFree(GetProcessHeap(),0,iso_run
);
1154 if (TRACE_ON(bidi
)) dump_types("Before Implicit", chartype
, 0, uCount
);
1155 /* resolveImplicit */
1156 resolveImplicit(chartype
, lpOutLevels
, 0, uCount
-1);
1158 /* resolveResolvedLevels*/
1159 classify(lpString
, chartype
, uCount
, c
);
1160 resolveResolved(baselevel
, chartype
, lpOutLevels
, 0, uCount
-1);
1162 HeapFree(GetProcessHeap(), 0, chartype
);
1166 /* reverse cch indexes */
1167 static void reverse(int *pidx
, int cch
)
1171 for (; ich
< --cch
; ich
++)
1174 pidx
[ich
] = pidx
[cch
];
1180 /*------------------------------------------------------------------------
1181 Functions: reorder/reorderLevel
1183 Recursively reorders the display string
1184 "From the highest level down, reverse all characters at that level and
1185 higher, down to the lowest odd level"
1187 Implements rule L2 of the Unicode bidi Algorithm.
1189 Input: Array of embedding levels
1191 Flag enabling reversal (set to false by initial caller)
1193 In/Out: Text to reorder
1195 Note: levels may exceed 15 resp. 61 on input.
1197 Rule L3 - reorder combining marks is not implemented here
1198 Rule L4 - glyph mirroring is implemented as a display option below
1200 Note: this should be applied a line at a time
1201 -------------------------------------------------------------------------*/
1202 int BIDI_ReorderV2lLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1206 /* true as soon as first odd level encountered */
1207 fReverse
= fReverse
|| odd(level
);
1209 for (; ich
< cch
; ich
++)
1211 if (plevel
[ich
] < level
)
1215 else if (plevel
[ich
] > level
)
1217 ich
+= BIDI_ReorderV2lLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1218 cch
- ich
, fReverse
) - 1;
1223 reverse(pIndexs
, ich
);
1228 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1229 int BIDI_ReorderL2vLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1234 /* true as soon as first odd level encountered */
1235 fReverse
= fReverse
|| odd(level
);
1237 for (; ich
< cch
; ich
++)
1239 if (plevel
[ich
] < level
)
1241 else if (plevel
[ich
] > level
)
1246 reverse(pIndexs
, ich
);
1252 for (; ich
< cch
; ich
++)
1253 if (plevel
[ich
] < level
)
1255 else if (plevel
[ich
] > level
)
1256 ich
+= BIDI_ReorderL2vLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1257 cch
- ich
, fReverse
) - 1;
1263 BOOL
BIDI_GetStrengths(LPCWSTR lpString
, INT uCount
, const SCRIPT_CONTROL
*c
,
1267 classify(lpString
, lpStrength
, uCount
, c
);
1269 for ( i
= 0; i
< uCount
; i
++)
1271 switch(lpStrength
[i
])
1280 lpStrength
[i
] = BIDI_STRONG
;
1289 lpStrength
[i
] = BIDI_WEAK
;
1295 default: /* Neutrals and NSM */
1296 lpStrength
[i
] = BIDI_NEUTRAL
;