2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
46 #include <wine/list.h>
48 #include "usp10_internal.h"
50 WINE_DEFAULT_DEBUG_CHANNEL(bidi
);
52 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
55 /* HELPER FUNCTIONS AND DECLARATIONS */
57 /*------------------------------------------------------------------------
58 Bidirectional Character Types
60 as defined by the Unicode Bidirectional Algorithm Table 3-7.
64 The list of bidirectional character types here is not grouped the
65 same way as the table 3-7, since the numberic values for the types
66 are chosen to keep the state and action tables compact.
67 ------------------------------------------------------------------------*/
71 /* ON MUST be zero, code relies on ON = NI = 0 */
72 ON
= 0, /* Other Neutral */
75 AN
, /* Arabic Number */
76 EN
, /* European Number */
77 AL
, /* Arabic Letter (Right-to-left) */
78 NSM
, /* Non-spacing Mark */
79 CS
, /* Common Separator */
80 ES
, /* European Separator */
81 ET
, /* European Terminator (post/prefix e.g. $ and %) */
84 BN
, /* Boundary neutral (type of RLE etc after explicit levels) */
87 S
, /* Segment Separator (TAB) // used only in L1 */
88 WS
, /* White space // used only in L1 */
89 B
, /* Paragraph Separator (aka as PS) */
91 /* types for explicit controls */
92 RLO
, /* these are used only in X1-X9 */
98 LRI
, /* Isolate formatting characters new with 6.3 */
103 /* resolved types, also resolved directions */
104 NI
= ON
, /* alias, where ON, WS, S and Isolates are treated the same */
107 static const char debug_type
[][4] =
109 "ON", /* Other Neutral */
110 "L", /* Left Letter */
111 "R", /* Right Letter */
112 "AN", /* Arabic Number */
113 "EN", /* European Number */
114 "AL", /* Arabic Letter (Right-to-left) */
115 "NSM", /* Non-spacing Mark */
116 "CS", /* Common Separator */
117 "ES", /* European Separator */
118 "ET", /* European Terminator (post/prefix e.g. $ and %) */
119 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
120 "S", /* Segment Separator (TAB) // used only in L1 */
121 "WS", /* White space // used only in L1 */
122 "B", /* Paragraph Separator (aka as PS) */
123 "RLO", /* these are used only in X1-X9 */
128 "LRI", /* Isolate formatting characters new with 6.3 */
134 /* HELPER FUNCTIONS */
136 static inline void dump_types(const char* header
, WORD
*types
, int start
, int end
)
140 for (i
= start
; i
< end
; i
++)
141 TRACE(" %s",debug_type
[types
[i
]]);
145 /* Convert the libwine information to the direction enum */
146 static void classify(LPCWSTR lpString
, WORD
*chartype
, DWORD uCount
, const SCRIPT_CONTROL
*c
)
148 static const enum directions dir_map
[16] =
150 L
, /* unassigned defaults to L */
165 PDF
/* also LRE, LRO, RLE, RLO */
170 for (i
= 0; i
< uCount
; ++i
)
172 chartype
[i
] = dir_map
[get_char_typeW(lpString
[i
]) >> 12];
176 if (!c
->fLegacyBidiClass
) break;
180 case '+': chartype
[i
] = NI
; break;
181 case '/': chartype
[i
] = CS
; break;
187 case 0x202A: chartype
[i
] = LRE
; break;
188 case 0x202B: chartype
[i
] = RLE
; break;
189 case 0x202C: chartype
[i
] = PDF
; break;
190 case 0x202D: chartype
[i
] = LRO
; break;
191 case 0x202E: chartype
[i
] = RLO
; break;
192 case 0x2066: chartype
[i
] = LRI
; break;
193 case 0x2067: chartype
[i
] = RLI
; break;
194 case 0x2068: chartype
[i
] = FSI
; break;
195 case 0x2069: chartype
[i
] = PDI
; break;
202 /* RESOLVE EXPLICIT */
204 static WORD
GreaterEven(int i
)
206 return odd(i
) ? i
+ 1 : i
+ 2;
209 static WORD
GreaterOdd(int i
)
211 return odd(i
) ? i
+ 2 : i
+ 1;
214 static WORD
EmbeddingDirection(int level
)
216 return odd(level
) ? R
: L
;
219 /*------------------------------------------------------------------------
220 Function: resolveExplicit
222 Recursively resolves explicit embedding levels and overrides.
223 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
225 Input: Base embedding level and direction
228 Output: Array of embedding levels
230 In/Out: Array of direction classes
233 Note: The function uses two simple counters to keep track of
234 matching explicit codes and PDF. Use the default argument for
235 the outermost call. The nesting counter counts the recursion
236 depth and not the embedding level.
237 ------------------------------------------------------------------------*/
238 typedef struct tagStackItem
{
244 #define push_stack(l,o,i) \
246 stack[stack_top].level = l; \
247 stack[stack_top].override = o; \
248 stack[stack_top].isolate = i;} while(0)
250 #define pop_stack() do { stack_top++; } while(0)
252 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
254 static void resolveExplicit(int level
, WORD
*pclass
, WORD
*poutLevel
, int count
)
257 int overflow_isolate_count
= 0;
258 int overflow_embedding_count
= 0;
259 int valid_isolate_count
= 0;
262 StackItem stack
[MAX_DEPTH
+2];
263 int stack_top
= MAX_DEPTH
+1;
265 stack
[stack_top
].level
= level
;
266 stack
[stack_top
].override
= NI
;
267 stack
[stack_top
].isolate
= FALSE
;
269 for (i
= 0; i
< count
; i
++)
272 if (pclass
[i
] == RLE
)
274 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
275 poutLevel
[i
] = stack
[stack_top
].level
;
276 if (valid_level(least_odd
))
277 push_stack(least_odd
, NI
, FALSE
);
278 else if (overflow_isolate_count
== 0)
279 overflow_embedding_count
++;
282 else if (pclass
[i
] == LRE
)
284 int least_even
= GreaterEven(stack
[stack_top
].level
);
285 poutLevel
[i
] = stack
[stack_top
].level
;
286 if (valid_level(least_even
))
287 push_stack(least_even
, NI
, FALSE
);
288 else if (overflow_isolate_count
== 0)
289 overflow_embedding_count
++;
292 else if (pclass
[i
] == RLO
)
294 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
295 poutLevel
[i
] = stack
[stack_top
].level
;
296 if (valid_level(least_odd
))
297 push_stack(least_odd
, R
, FALSE
);
298 else if (overflow_isolate_count
== 0)
299 overflow_embedding_count
++;
302 else if (pclass
[i
] == LRO
)
304 int least_even
= GreaterEven(stack
[stack_top
].level
);
305 poutLevel
[i
] = stack
[stack_top
].level
;
306 if (valid_level(least_even
))
307 push_stack(least_even
, L
, FALSE
);
308 else if (overflow_isolate_count
== 0)
309 overflow_embedding_count
++;
312 else if (pclass
[i
] == RLI
)
314 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
315 poutLevel
[i
] = stack
[stack_top
].level
;
316 if (valid_level(least_odd
))
318 valid_isolate_count
++;
319 push_stack(least_odd
, NI
, TRUE
);
322 overflow_isolate_count
++;
325 else if (pclass
[i
] == LRI
)
327 int least_even
= GreaterEven(stack
[stack_top
].level
);
328 poutLevel
[i
] = stack
[stack_top
].level
;
329 if (valid_level(least_even
))
331 valid_isolate_count
++;
332 push_stack(least_even
, NI
, TRUE
);
335 overflow_isolate_count
++;
338 else if (pclass
[i
] == FSI
)
343 poutLevel
[i
] = stack
[stack_top
].level
;
344 for (j
= i
+1; j
< count
; j
++)
346 if (pclass
[j
] == LRI
|| pclass
[j
] == RLI
|| pclass
[j
] == FSI
)
351 else if (pclass
[j
] == PDI
)
360 if (skipping
) continue;
367 else if (pclass
[j
] == R
|| pclass
[j
] == AL
)
375 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
376 if (valid_level(least_odd
))
378 valid_isolate_count
++;
379 push_stack(least_odd
, NI
, TRUE
);
382 overflow_isolate_count
++;
386 int least_even
= GreaterEven(stack
[stack_top
].level
);
387 if (valid_level(least_even
))
389 valid_isolate_count
++;
390 push_stack(least_even
, NI
, TRUE
);
393 overflow_isolate_count
++;
397 else if (pclass
[i
] != B
&& pclass
[i
] != BN
&& pclass
[i
] != PDI
&& pclass
[i
] != PDF
)
399 poutLevel
[i
] = stack
[stack_top
].level
;
400 if (stack
[stack_top
].override
!= NI
)
401 pclass
[i
] = stack
[stack_top
].override
;
404 else if (pclass
[i
] == PDI
)
406 if (overflow_isolate_count
) overflow_isolate_count
--;
407 else if (!valid_isolate_count
) {/* do nothing */}
410 overflow_embedding_count
= 0;
411 while (!stack
[stack_top
].isolate
) pop_stack();
413 valid_isolate_count
--;
415 poutLevel
[i
] = stack
[stack_top
].level
;
418 else if (pclass
[i
] == PDF
)
420 poutLevel
[i
] = stack
[stack_top
].level
;
421 if (overflow_isolate_count
) {/* do nothing */}
422 else if (overflow_embedding_count
) overflow_embedding_count
--;
423 else if (!stack
[stack_top
].isolate
&& stack_top
< (MAX_DEPTH
+1))
428 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
429 for (i
= 0; i
< count
; i
++)
430 if (pclass
[i
] == RLE
|| pclass
[i
] == LRE
|| pclass
[i
] == RLO
|| pclass
[i
] == LRO
|| pclass
[i
] == PDF
)
434 static inline int previousValidChar(const WORD
*pcls
, int index
, int back_fence
)
436 if (index
== -1 || index
== back_fence
) return index
;
438 while (index
> back_fence
&& pcls
[index
] == BN
) index
--;
442 static inline int nextValidChar(const WORD
*pcls
, int index
, int front_fence
)
444 if (index
== front_fence
) return index
;
446 while (index
< front_fence
&& pcls
[index
] == BN
) index
++;
450 typedef struct tagRun
457 typedef struct tagIsolatedRun
468 static inline int iso_nextValidChar(IsolatedRun
*iso_run
, int index
)
470 if (index
>= (iso_run
->length
-1)) return -1;
472 while (index
< iso_run
->length
&& *iso_run
->ppcls
[index
] == BN
) index
++;
473 if (index
== iso_run
->length
) return -1;
477 static inline int iso_previousValidChar(IsolatedRun
*iso_run
, int index
)
480 if (index
<= 0) return -1;
482 while (index
> -1 && *iso_run
->ppcls
[index
] == BN
) index
--;
486 static inline int iso_previousChar(IsolatedRun
*iso_run
, int index
)
488 if (index
<= 0) return -1;
492 static inline void iso_dump_types(const char* header
, IsolatedRun
*iso_run
)
497 for (i
= 0; i
< iso_run
->length
; i
++)
498 TRACE(" %s",debug_type
[*iso_run
->ppcls
[i
]]);
502 /*------------------------------------------------------------------------
503 Function: resolveWeak
505 Resolves the directionality of numeric and other weak character types
507 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
509 Input: Array of embedding levels
512 In/Out: Array of directional classes
514 Note: On input only these directional classes are expected
515 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
516 ------------------------------------------------------------------------*/
518 static void resolveWeak(IsolatedRun
* iso_run
)
523 for (i
=0; i
< iso_run
->length
; i
++)
525 if (*iso_run
->ppcls
[i
] == NSM
)
527 int j
= iso_previousValidChar(iso_run
, i
);
529 *iso_run
->ppcls
[i
] = iso_run
->sos
;
530 else if (*iso_run
->ppcls
[j
] >= LRI
)
531 *iso_run
->ppcls
[i
] = ON
;
533 *iso_run
->ppcls
[i
] = *iso_run
->ppcls
[j
];
538 for (i
= 0; i
< iso_run
->length
; i
++)
540 if (*iso_run
->ppcls
[i
] == EN
)
542 int j
= iso_previousValidChar(iso_run
, i
);
545 if (*iso_run
->ppcls
[j
] == R
|| *iso_run
->ppcls
[j
] == L
|| *iso_run
->ppcls
[j
] == AL
)
547 if (*iso_run
->ppcls
[j
] == AL
)
548 *iso_run
->ppcls
[i
] = AN
;
551 j
= iso_previousValidChar(iso_run
, j
);
557 for (i
= 0; i
< iso_run
->length
; i
++)
559 if (*iso_run
->ppcls
[i
] == AL
)
560 *iso_run
->ppcls
[i
] = R
;
564 for (i
= 0; i
< iso_run
->length
; i
++)
566 if (*iso_run
->ppcls
[i
] == ES
)
568 int b
= iso_previousValidChar(iso_run
, i
);
569 int f
= iso_nextValidChar(iso_run
, i
);
571 if (b
> -1 && f
> -1 && *iso_run
->ppcls
[b
] == EN
&& *iso_run
->ppcls
[f
] == EN
)
572 *iso_run
->ppcls
[i
] = EN
;
574 else if (*iso_run
->ppcls
[i
] == CS
)
576 int b
= iso_previousValidChar(iso_run
, i
);
577 int f
= iso_nextValidChar(iso_run
, i
);
579 if (b
> -1 && f
> -1 && *iso_run
->ppcls
[b
] == EN
&& *iso_run
->ppcls
[f
] == EN
)
580 *iso_run
->ppcls
[i
] = EN
;
581 else if (b
> -1 && f
> -1 && *iso_run
->ppcls
[b
] == AN
&& *iso_run
->ppcls
[f
] == AN
)
582 *iso_run
->ppcls
[i
] = AN
;
587 for (i
= 0; i
< iso_run
->length
; i
++)
589 if (*iso_run
->ppcls
[i
] == ET
)
592 for (j
= i
-1 ; j
> -1; j
--)
594 if (*iso_run
->ppcls
[j
] == BN
) continue;
595 if (*iso_run
->ppcls
[j
] == ET
) continue;
596 else if (*iso_run
->ppcls
[j
] == EN
) *iso_run
->ppcls
[i
] = EN
;
599 if (*iso_run
->ppcls
[i
] == ET
)
601 for (j
= i
+1; j
< iso_run
->length
; j
++)
603 if (*iso_run
->ppcls
[j
] == BN
) continue;
604 if (*iso_run
->ppcls
[j
] == ET
) continue;
605 else if (*iso_run
->ppcls
[j
] == EN
) *iso_run
->ppcls
[i
] = EN
;
613 for (i
= 0; i
< iso_run
->length
; i
++)
615 if (*iso_run
->ppcls
[i
] == ET
|| *iso_run
->ppcls
[i
] == ES
|| *iso_run
->ppcls
[i
] == CS
|| *iso_run
->ppcls
[i
] == ON
)
619 if (b
> -1 && *iso_run
->ppcls
[b
] == BN
)
620 *iso_run
->ppcls
[b
] = ON
;
621 if (f
< iso_run
->length
&& *iso_run
->ppcls
[f
] == BN
)
622 *iso_run
->ppcls
[f
] = ON
;
624 *iso_run
->ppcls
[i
] = ON
;
629 for (i
= 0; i
< iso_run
->length
; i
++)
631 if (*iso_run
->ppcls
[i
] == EN
)
634 for (j
= iso_previousValidChar(iso_run
, i
); j
> -1; j
= iso_previousValidChar(iso_run
, j
))
635 if (*iso_run
->ppcls
[j
] == R
|| *iso_run
->ppcls
[j
] == L
)
637 if (*iso_run
->ppcls
[j
] == L
)
638 *iso_run
->ppcls
[i
] = L
;
641 if (iso_run
->sos
== L
&& j
== -1)
642 *iso_run
->ppcls
[i
] = L
;
647 /*------------------------------------------------------------------------
648 Function: resolveNeutrals
650 Resolves the directionality of neutral character types.
652 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
654 Input: Array of embedding levels
658 In/Out: Array of directional classes
660 Note: On input only these directional classes are expected
661 R, L, NI, AN, EN and BN
663 W8 resolves a number of ENs to L
664 ------------------------------------------------------------------------*/
665 static void resolveNeutrals(IsolatedRun
*iso_run
)
669 /* Translate isolates into NI */
670 for (i
= 0; i
< iso_run
->length
; i
++)
672 if (*iso_run
->ppcls
[i
] >= LRI
)
673 *iso_run
->ppcls
[i
] = NI
;
675 switch(*iso_run
->ppcls
[i
])
679 case WS
: *iso_run
->ppcls
[i
] = NI
;
682 ASSERT(*iso_run
->ppcls
[i
] < 5 || *iso_run
->ppcls
[i
] == BN
); /* "Only NI, L, R, AN, EN and BN are allowed" */
685 /* N0: Skipping bracketed pairs for now */
688 for (i
= 0; i
< iso_run
->length
; i
++)
692 if (*iso_run
->ppcls
[i
] == NI
)
695 int b
= iso_previousValidChar(iso_run
, i
);
704 if (*iso_run
->ppcls
[b
] == R
|| *iso_run
->ppcls
[b
] == AN
|| *iso_run
->ppcls
[b
] == EN
)
706 else if (*iso_run
->ppcls
[b
] == L
)
708 else /* No string type */
711 j
= iso_nextValidChar(iso_run
, i
);
712 while (j
> -1 && *iso_run
->ppcls
[j
] == NI
) j
= iso_nextValidChar(iso_run
, j
);
719 else if (*iso_run
->ppcls
[j
] == R
|| *iso_run
->ppcls
[j
] == AN
|| *iso_run
->ppcls
[j
] == EN
)
721 else if (*iso_run
->ppcls
[j
] == L
)
723 else /* No string type */
728 for (b
= i
; b
< j
&& b
< iso_run
->length
; b
++)
729 *iso_run
->ppcls
[b
] = r
;
735 for (i
= 0; i
< iso_run
->length
; i
++)
737 if (*iso_run
->ppcls
[i
] == NI
)
741 *iso_run
->ppcls
[i
] = EmbeddingDirection(iso_run
->e
);
742 if (b
> -1 && *iso_run
->ppcls
[b
] == BN
)
743 *iso_run
->ppcls
[b
] = EmbeddingDirection(iso_run
->e
);
744 if (f
< iso_run
->length
&& *iso_run
->ppcls
[f
] == BN
)
745 *iso_run
->ppcls
[f
] = EmbeddingDirection(iso_run
->e
);
750 /*------------------------------------------------------------------------
751 Function: resolveImplicit
753 Recursively resolves implicit embedding levels.
754 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
756 Input: Array of direction classes
760 In/Out: Array of embedding levels
762 Note: levels may exceed 15 on output.
763 Accepted subset of direction classes
765 ------------------------------------------------------------------------*/
766 static void resolveImplicit(const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
771 for (i
= sos
; i
<= eos
; i
++)
776 ASSERT(pcls
[i
] > 0); /* "No Neutrals allowed to survive here." */
777 ASSERT(pcls
[i
] < 5); /* "Out of range." */
779 if (odd(plevel
[i
]) && (pcls
[i
] == L
|| pcls
[i
] == EN
|| pcls
[i
] == AN
))
781 else if (!odd(plevel
[i
]) && pcls
[i
] == R
)
783 else if (!odd(plevel
[i
]) && (pcls
[i
] == EN
|| pcls
[i
] == AN
))
788 static void resolveResolved(unsigned baselevel
, const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
793 for (i
= sos
; i
<= eos
; i
++)
795 if (pcls
[i
] == B
|| pcls
[i
] == S
)
798 while (i
> sos
&& j
>= sos
&&
799 (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
800 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
801 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
802 plevel
[j
--] = baselevel
;
803 plevel
[i
] = baselevel
;
806 (pcls
[i
] == WS
|| pcls
[i
] == FSI
|| pcls
[i
] == LRI
|| pcls
[i
] == RLI
||
807 pcls
[i
] == PDI
|| pcls
[i
] == LRE
|| pcls
[i
] == RLE
|| pcls
[i
] == LRO
||
808 pcls
[i
] == RLO
|| pcls
[i
] == PDF
|| pcls
[i
] == BN
))
811 while (j
>= sos
&& (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
812 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
813 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
814 plevel
[j
--] = baselevel
;
819 static void computeIsolatingRunsSet(unsigned baselevel
, WORD
*pcls
, WORD
*pLevel
, int uCount
, struct list
*set
)
821 int run_start
, run_end
, i
;
824 IsolatedRun
*current_isolated
;
826 runs
= HeapAlloc(GetProcessHeap(), 0, uCount
* sizeof(Run
));
833 while (run_start
< uCount
)
835 run_end
= nextValidChar(pcls
, run_start
, uCount
);
836 while (run_end
< uCount
&& pLevel
[run_end
] == pLevel
[run_start
]) run_end
= nextValidChar(pcls
, run_end
, uCount
);
838 runs
[run_count
].start
= run_start
;
839 runs
[run_count
].end
= run_end
;
840 runs
[run_count
].e
= pLevel
[run_start
];
841 run_start
= nextValidChar(pcls
, run_end
, uCount
);
845 /* Build Isolating Runs */
847 while (i
< run_count
)
850 if (runs
[k
].start
>= 0)
852 int type_fence
, real_end
;
854 current_isolated
= HeapAlloc(GetProcessHeap(), 0, sizeof(IsolatedRun
) + sizeof(WORD
*)*uCount
);
855 if (!current_isolated
) break;
857 run_start
= runs
[k
].start
;
858 current_isolated
->e
= runs
[k
].e
;
859 current_isolated
->length
= (runs
[k
].end
- runs
[k
].start
)+1;
861 for (j
= 0; j
< current_isolated
->length
; j
++)
862 current_isolated
->ppcls
[j
] = &pcls
[runs
[k
].start
+j
];
864 run_end
= runs
[k
].end
;
866 TRACE("{ [%i -- %i]",run_start
, run_end
);
868 if (pcls
[run_end
] == BN
)
869 run_end
= previousValidChar(pcls
, run_end
, runs
[k
].start
);
871 while (run_end
< uCount
&& (pcls
[run_end
] == RLI
|| pcls
[run_end
] == LRI
|| pcls
[run_end
] == FSI
))
875 while (j
< run_count
&& pcls
[runs
[j
].start
] != PDI
) j
++;
876 if (j
< run_count
&& runs
[i
].e
!= runs
[j
].e
)
885 int l
= current_isolated
->length
;
887 current_isolated
->length
+= (runs
[j
].end
- runs
[j
].start
)+1;
888 for (m
= 0; l
< current_isolated
->length
; l
++, m
++)
889 current_isolated
->ppcls
[l
] = &pcls
[runs
[j
].start
+m
];
891 TRACE("[%i -- %i]",runs
[j
].start
, runs
[j
].end
);
893 run_end
= runs
[j
].end
;
894 if (pcls
[run_end
] == BN
)
895 run_end
= previousValidChar(pcls
, run_end
, runs
[i
].start
);
906 type_fence
= previousValidChar(pcls
, run_start
, -1);
908 if (type_fence
== -1)
909 current_isolated
->sos
= (baselevel
> pLevel
[run_start
])?baselevel
:pLevel
[run_start
];
911 current_isolated
->sos
= (pLevel
[type_fence
] > pLevel
[run_start
])?pLevel
[type_fence
]:pLevel
[run_start
];
913 current_isolated
->sos
= EmbeddingDirection(current_isolated
->sos
);
915 if (run_end
== uCount
)
916 current_isolated
->eos
= current_isolated
->sos
;
919 /* eos could be an BN */
920 if ( pcls
[run_end
] == BN
)
922 real_end
= previousValidChar(pcls
, run_end
, run_start
-1);
923 if (real_end
< run_start
)
924 real_end
= run_start
;
929 type_fence
= nextValidChar(pcls
, run_end
, uCount
);
930 if (type_fence
== uCount
)
931 current_isolated
->eos
= (baselevel
> pLevel
[real_end
])?baselevel
:pLevel
[real_end
];
933 current_isolated
->eos
= (pLevel
[type_fence
] > pLevel
[real_end
])?pLevel
[type_fence
]:pLevel
[real_end
];
935 current_isolated
->eos
= EmbeddingDirection(current_isolated
->eos
);
938 list_add_tail(set
, ¤t_isolated
->entry
);
939 TRACE(" } level %i {%s <--> %s}\n",current_isolated
->e
, debug_type
[current_isolated
->sos
], debug_type
[current_isolated
->eos
]);
944 HeapFree(GetProcessHeap(), 0, runs
);
947 /*************************************************************
948 * BIDI_DeterminLevels
950 BOOL
BIDI_DetermineLevels(
951 LPCWSTR lpString
, /* [in] The string for which information is to be returned */
952 INT uCount
, /* [in] Number of WCHARs in string. */
953 const SCRIPT_STATE
*s
,
954 const SCRIPT_CONTROL
*c
,
955 WORD
*lpOutLevels
/* [out] final string levels */
959 unsigned baselevel
= 0;
960 struct list IsolatingRuns
;
961 IsolatedRun
*iso_run
, *next
;
963 TRACE("%s, %d\n", debugstr_wn(lpString
, uCount
), uCount
);
965 chartype
= HeapAlloc(GetProcessHeap(), 0, uCount
* sizeof(WORD
));
968 WARN("Out of memory\n");
972 baselevel
= s
->uBidiLevel
;
974 classify(lpString
, chartype
, uCount
, c
);
975 if (TRACE_ON(bidi
)) dump_types("Start ", chartype
, 0, uCount
);
977 /* resolve explicit */
978 resolveExplicit(baselevel
, chartype
, lpOutLevels
, uCount
);
979 if (TRACE_ON(bidi
)) dump_types("After Explicit", chartype
, 0, uCount
);
981 /* X10/BD13: Computer Isolating runs */
982 computeIsolatingRunsSet(baselevel
, chartype
, lpOutLevels
, uCount
, &IsolatingRuns
);
984 LIST_FOR_EACH_ENTRY_SAFE(iso_run
, next
, &IsolatingRuns
, IsolatedRun
, entry
)
986 if (TRACE_ON(bidi
)) iso_dump_types("Run", iso_run
);
989 resolveWeak(iso_run
);
990 if (TRACE_ON(bidi
)) iso_dump_types("After Weak", iso_run
);
992 /* resolve neutrals */
993 resolveNeutrals(iso_run
);
994 if (TRACE_ON(bidi
)) iso_dump_types("After Neutrals", iso_run
);
996 list_remove(&iso_run
->entry
);
997 HeapFree(GetProcessHeap(),0,iso_run
);
1000 if (TRACE_ON(bidi
)) dump_types("Before Implicit", chartype
, 0, uCount
);
1001 /* resolveImplicit */
1002 resolveImplicit(chartype
, lpOutLevels
, 0, uCount
-1);
1004 /* resolveResolvedLevels*/
1005 classify(lpString
, chartype
, uCount
, c
);
1006 resolveResolved(baselevel
, chartype
, lpOutLevels
, 0, uCount
-1);
1008 HeapFree(GetProcessHeap(), 0, chartype
);
1012 /* reverse cch indexes */
1013 static void reverse(int *pidx
, int cch
)
1017 for (; ich
< --cch
; ich
++)
1020 pidx
[ich
] = pidx
[cch
];
1026 /*------------------------------------------------------------------------
1027 Functions: reorder/reorderLevel
1029 Recursively reorders the display string
1030 "From the highest level down, reverse all characters at that level and
1031 higher, down to the lowest odd level"
1033 Implements rule L2 of the Unicode bidi Algorithm.
1035 Input: Array of embedding levels
1037 Flag enabling reversal (set to false by initial caller)
1039 In/Out: Text to reorder
1041 Note: levels may exceed 15 resp. 61 on input.
1043 Rule L3 - reorder combining marks is not implemented here
1044 Rule L4 - glyph mirroring is implemented as a display option below
1046 Note: this should be applied a line at a time
1047 -------------------------------------------------------------------------*/
1048 int BIDI_ReorderV2lLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1052 /* true as soon as first odd level encountered */
1053 fReverse
= fReverse
|| odd(level
);
1055 for (; ich
< cch
; ich
++)
1057 if (plevel
[ich
] < level
)
1061 else if (plevel
[ich
] > level
)
1063 ich
+= BIDI_ReorderV2lLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1064 cch
- ich
, fReverse
) - 1;
1069 reverse(pIndexs
, ich
);
1074 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1075 int BIDI_ReorderL2vLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1080 /* true as soon as first odd level encountered */
1081 fReverse
= fReverse
|| odd(level
);
1083 for (; ich
< cch
; ich
++)
1085 if (plevel
[ich
] < level
)
1087 else if (plevel
[ich
] > level
)
1092 reverse(pIndexs
, ich
);
1098 for (; ich
< cch
; ich
++)
1099 if (plevel
[ich
] < level
)
1101 else if (plevel
[ich
] > level
)
1102 ich
+= BIDI_ReorderL2vLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1103 cch
- ich
, fReverse
) - 1;
1109 BOOL
BIDI_GetStrengths(LPCWSTR lpString
, INT uCount
, const SCRIPT_CONTROL
*c
,
1113 classify(lpString
, lpStrength
, uCount
, c
);
1115 for ( i
= 0; i
< uCount
; i
++)
1117 switch(lpStrength
[i
])
1126 lpStrength
[i
] = BIDI_STRONG
;
1135 lpStrength
[i
] = BIDI_WEAK
;
1141 default: /* Neutrals and NSM */
1142 lpStrength
[i
] = BIDI_NEUTRAL
;