2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
46 #include <wine/list.h>
48 #include "usp10_internal.h"
50 WINE_DEFAULT_DEBUG_CHANNEL(bidi
);
52 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
55 /* HELPER FUNCTIONS AND DECLARATIONS */
57 /*------------------------------------------------------------------------
58 Bidirectional Character Types
60 as defined by the Unicode Bidirectional Algorithm Table 3-7.
64 The list of bidirectional character types here is not grouped the
65 same way as the table 3-7, since the numberic values for the types
66 are chosen to keep the state and action tables compact.
67 ------------------------------------------------------------------------*/
71 /* ON MUST be zero, code relies on ON = NI = 0 */
72 ON
= 0, /* Other Neutral */
75 AN
, /* Arabic Number */
76 EN
, /* European Number */
77 AL
, /* Arabic Letter (Right-to-left) */
78 NSM
, /* Non-spacing Mark */
79 CS
, /* Common Separator */
80 ES
, /* European Separator */
81 ET
, /* European Terminator (post/prefix e.g. $ and %) */
84 BN
, /* Boundary neutral (type of RLE etc after explicit levels) */
87 S
, /* Segment Separator (TAB) // used only in L1 */
88 WS
, /* White space // used only in L1 */
89 B
, /* Paragraph Separator (aka as PS) */
91 /* types for explicit controls */
92 RLO
, /* these are used only in X1-X9 */
98 LRI
, /* Isolate formatting characters new with 6.3 */
103 /* resolved types, also resolved directions */
104 NI
= ON
, /* alias, where ON, WS, S and Isolates are treated the same */
107 static const char debug_type
[][4] =
109 "ON", /* Other Neutral */
110 "L", /* Left Letter */
111 "R", /* Right Letter */
112 "AN", /* Arabic Number */
113 "EN", /* European Number */
114 "AL", /* Arabic Letter (Right-to-left) */
115 "NSM", /* Non-spacing Mark */
116 "CS", /* Common Separator */
117 "ES", /* European Separator */
118 "ET", /* European Terminator (post/prefix e.g. $ and %) */
119 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
120 "S", /* Segment Separator (TAB) // used only in L1 */
121 "WS", /* White space // used only in L1 */
122 "B", /* Paragraph Separator (aka as PS) */
123 "RLO", /* these are used only in X1-X9 */
128 "LRI", /* Isolate formatting characters new with 6.3 */
134 /* HELPER FUNCTIONS */
136 static inline void dump_types(const char* header
, WORD
*types
, int start
, int end
)
140 for (i
= start
; i
< end
; i
++)
141 TRACE(" %s",debug_type
[types
[i
]]);
145 /* Convert the libwine information to the direction enum */
146 static void classify(LPCWSTR lpString
, WORD
*chartype
, DWORD uCount
, const SCRIPT_CONTROL
*c
)
148 static const enum directions dir_map
[16] =
150 L
, /* unassigned defaults to L */
165 PDF
/* also LRE, LRO, RLE, RLO */
170 for (i
= 0; i
< uCount
; ++i
)
172 chartype
[i
] = dir_map
[get_char_typeW(lpString
[i
]) >> 12];
176 if (!c
->fLegacyBidiClass
) break;
180 case '+': chartype
[i
] = NI
; break;
181 case '/': chartype
[i
] = CS
; break;
187 case 0x202A: chartype
[i
] = LRE
; break;
188 case 0x202B: chartype
[i
] = RLE
; break;
189 case 0x202C: chartype
[i
] = PDF
; break;
190 case 0x202D: chartype
[i
] = LRO
; break;
191 case 0x202E: chartype
[i
] = RLO
; break;
192 case 0x2066: chartype
[i
] = LRI
; break;
193 case 0x2067: chartype
[i
] = RLI
; break;
194 case 0x2068: chartype
[i
] = FSI
; break;
195 case 0x2069: chartype
[i
] = PDI
; break;
202 /* RESOLVE EXPLICIT */
204 static WORD
GreaterEven(int i
)
206 return odd(i
) ? i
+ 1 : i
+ 2;
209 static WORD
GreaterOdd(int i
)
211 return odd(i
) ? i
+ 2 : i
+ 1;
214 static WORD
EmbeddingDirection(int level
)
216 return odd(level
) ? R
: L
;
219 /*------------------------------------------------------------------------
220 Function: resolveExplicit
222 Recursively resolves explicit embedding levels and overrides.
223 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
225 Input: Base embedding level and direction
228 Output: Array of embedding levels
230 In/Out: Array of direction classes
233 Note: The function uses two simple counters to keep track of
234 matching explicit codes and PDF. Use the default argument for
235 the outermost call. The nesting counter counts the recursion
236 depth and not the embedding level.
237 ------------------------------------------------------------------------*/
238 typedef struct tagStackItem
{
244 #define push_stack(l,o,i) \
246 stack[stack_top].level = l; \
247 stack[stack_top].override = o; \
248 stack[stack_top].isolate = i;} while(0)
250 #define pop_stack() do { stack_top++; } while(0)
252 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
254 static void resolveExplicit(int level
, WORD
*pclass
, WORD
*poutLevel
, int count
)
257 int overflow_isolate_count
= 0;
258 int overflow_embedding_count
= 0;
259 int valid_isolate_count
= 0;
262 StackItem stack
[MAX_DEPTH
+2];
263 int stack_top
= MAX_DEPTH
+1;
265 stack
[stack_top
].level
= level
;
266 stack
[stack_top
].override
= NI
;
267 stack
[stack_top
].isolate
= FALSE
;
269 for (i
= 0; i
< count
; i
++)
272 if (pclass
[i
] == RLE
)
274 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
275 poutLevel
[i
] = stack
[stack_top
].level
;
276 if (valid_level(least_odd
))
277 push_stack(least_odd
, NI
, FALSE
);
278 else if (overflow_isolate_count
== 0)
279 overflow_embedding_count
++;
282 else if (pclass
[i
] == LRE
)
284 int least_even
= GreaterEven(stack
[stack_top
].level
);
285 poutLevel
[i
] = stack
[stack_top
].level
;
286 if (valid_level(least_even
))
287 push_stack(least_even
, NI
, FALSE
);
288 else if (overflow_isolate_count
== 0)
289 overflow_embedding_count
++;
292 else if (pclass
[i
] == RLO
)
294 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
295 poutLevel
[i
] = stack
[stack_top
].level
;
296 if (valid_level(least_odd
))
297 push_stack(least_odd
, R
, FALSE
);
298 else if (overflow_isolate_count
== 0)
299 overflow_embedding_count
++;
302 else if (pclass
[i
] == LRO
)
304 int least_even
= GreaterEven(stack
[stack_top
].level
);
305 poutLevel
[i
] = stack
[stack_top
].level
;
306 if (valid_level(least_even
))
307 push_stack(least_even
, L
, FALSE
);
308 else if (overflow_isolate_count
== 0)
309 overflow_embedding_count
++;
312 else if (pclass
[i
] == RLI
)
314 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
315 poutLevel
[i
] = stack
[stack_top
].level
;
316 if (valid_level(least_odd
))
318 valid_isolate_count
++;
319 push_stack(least_odd
, NI
, TRUE
);
322 overflow_isolate_count
++;
325 else if (pclass
[i
] == LRI
)
327 int least_even
= GreaterEven(stack
[stack_top
].level
);
328 poutLevel
[i
] = stack
[stack_top
].level
;
329 if (valid_level(least_even
))
331 valid_isolate_count
++;
332 push_stack(least_even
, NI
, TRUE
);
335 overflow_isolate_count
++;
338 else if (pclass
[i
] == FSI
)
343 poutLevel
[i
] = stack
[stack_top
].level
;
344 for (j
= i
+1; j
< count
; j
++)
346 if (pclass
[j
] == LRI
|| pclass
[j
] == RLI
|| pclass
[j
] == FSI
)
351 else if (pclass
[j
] == PDI
)
360 if (skipping
) continue;
367 else if (pclass
[j
] == R
|| pclass
[j
] == AL
)
375 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
376 if (valid_level(least_odd
))
378 valid_isolate_count
++;
379 push_stack(least_odd
, NI
, TRUE
);
382 overflow_isolate_count
++;
386 int least_even
= GreaterEven(stack
[stack_top
].level
);
387 if (valid_level(least_even
))
389 valid_isolate_count
++;
390 push_stack(least_even
, NI
, TRUE
);
393 overflow_isolate_count
++;
397 else if (pclass
[i
] != B
&& pclass
[i
] != BN
&& pclass
[i
] != PDI
&& pclass
[i
] != PDF
)
399 poutLevel
[i
] = stack
[stack_top
].level
;
400 if (stack
[stack_top
].override
!= NI
)
401 pclass
[i
] = stack
[stack_top
].override
;
404 else if (pclass
[i
] == PDI
)
406 if (overflow_isolate_count
) overflow_isolate_count
--;
407 else if (!valid_isolate_count
) {/* do nothing */}
410 overflow_embedding_count
= 0;
411 while (!stack
[stack_top
].isolate
) pop_stack();
413 valid_isolate_count
--;
415 poutLevel
[i
] = stack
[stack_top
].level
;
418 else if (pclass
[i
] == PDF
)
420 poutLevel
[i
] = stack
[stack_top
].level
;
421 if (overflow_isolate_count
) {/* do nothing */}
422 else if (overflow_embedding_count
) overflow_embedding_count
--;
423 else if (!stack
[stack_top
].isolate
&& stack_top
< (MAX_DEPTH
+1))
428 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
429 for (i
= 0; i
< count
; i
++)
430 if (pclass
[i
] == RLE
|| pclass
[i
] == LRE
|| pclass
[i
] == RLO
|| pclass
[i
] == LRO
|| pclass
[i
] == PDF
)
434 static inline int previousValidChar(const WORD
*pcls
, int index
, int back_fence
)
436 if (index
== -1 || index
== back_fence
) return index
;
438 while (index
> back_fence
&& pcls
[index
] == BN
) index
--;
442 static inline int nextValidChar(const WORD
*pcls
, int index
, int front_fence
)
444 if (index
== front_fence
) return index
;
446 while (index
< front_fence
&& pcls
[index
] == BN
) index
++;
450 typedef struct tagRun
457 typedef struct tagIsolatedRun
468 static inline int iso_nextValidChar(IsolatedRun
*iso_run
, int index
)
470 if (index
>= (iso_run
->length
-1)) return -1;
472 while (index
< iso_run
->length
&& *iso_run
->ppcls
[index
] == BN
) index
++;
473 if (index
== iso_run
->length
) return -1;
477 static inline int iso_previousValidChar(IsolatedRun
*iso_run
, int index
)
480 if (index
<= 0) return -1;
482 while (index
> -1 && *iso_run
->ppcls
[index
] == BN
) index
--;
486 static inline int iso_previousChar(IsolatedRun
*iso_run
, int index
)
488 if (index
<= 0) return -1;
492 static inline void iso_dump_types(const char* header
, IsolatedRun
*iso_run
)
497 for (i
= 0; i
< iso_run
->length
; i
++)
498 TRACE(" %s",debug_type
[*iso_run
->ppcls
[i
]]);
502 /*------------------------------------------------------------------------
503 Function: resolveWeak
505 Resolves the directionality of numeric and other weak character types
507 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
509 Input: Array of embedding levels
512 In/Out: Array of directional classes
514 Note: On input only these directional classes are expected
515 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
516 ------------------------------------------------------------------------*/
518 static void resolveWeak(IsolatedRun
* iso_run
)
523 for (i
=0; i
< iso_run
->length
; i
++)
525 if (*iso_run
->ppcls
[i
] == NSM
)
527 int j
= iso_previousValidChar(iso_run
, i
);
529 *iso_run
->ppcls
[i
] = iso_run
->sos
;
530 else if (*iso_run
->ppcls
[j
] >= LRI
)
531 *iso_run
->ppcls
[i
] = ON
;
533 *iso_run
->ppcls
[i
] = *iso_run
->ppcls
[j
];
538 for (i
= 0; i
< iso_run
->length
; i
++)
540 if (*iso_run
->ppcls
[i
] == EN
)
542 int j
= iso_previousValidChar(iso_run
, i
);
545 if (*iso_run
->ppcls
[j
] == R
|| *iso_run
->ppcls
[j
] == L
|| *iso_run
->ppcls
[j
] == AL
)
547 if (*iso_run
->ppcls
[j
] == AL
)
548 *iso_run
->ppcls
[i
] = AN
;
551 j
= iso_previousValidChar(iso_run
, j
);
557 for (i
= 0; i
< iso_run
->length
; i
++)
559 if (*iso_run
->ppcls
[i
] == AL
)
560 *iso_run
->ppcls
[i
] = R
;
564 for (i
= 0; i
< iso_run
->length
; i
++)
566 if (*iso_run
->ppcls
[i
] == ES
)
568 int b
= iso_previousValidChar(iso_run
, i
);
569 int f
= iso_nextValidChar(iso_run
, i
);
571 if (b
> -1 && f
> -1 && *iso_run
->ppcls
[b
] == EN
&& *iso_run
->ppcls
[f
] == EN
)
572 *iso_run
->ppcls
[i
] = EN
;
574 else if (*iso_run
->ppcls
[i
] == CS
)
576 int b
= iso_previousValidChar(iso_run
, i
);
577 int f
= iso_nextValidChar(iso_run
, i
);
579 if (b
> -1 && f
> -1 && *iso_run
->ppcls
[b
] == EN
&& *iso_run
->ppcls
[f
] == EN
)
580 *iso_run
->ppcls
[i
] = EN
;
581 else if (b
> -1 && f
> -1 && *iso_run
->ppcls
[b
] == AN
&& *iso_run
->ppcls
[f
] == AN
)
582 *iso_run
->ppcls
[i
] = AN
;
587 for (i
= 0; i
< iso_run
->length
; i
++)
589 if (*iso_run
->ppcls
[i
] == ET
)
592 for (j
= i
-1 ; j
> -1; j
--)
594 if (*iso_run
->ppcls
[j
] == BN
) continue;
595 if (*iso_run
->ppcls
[j
] == ET
) continue;
596 else if (*iso_run
->ppcls
[j
] == EN
) *iso_run
->ppcls
[i
] = EN
;
599 if (*iso_run
->ppcls
[i
] == ET
)
601 for (j
= i
+1; j
< iso_run
->length
; j
++)
603 if (*iso_run
->ppcls
[j
] == BN
) continue;
604 if (*iso_run
->ppcls
[j
] == ET
) continue;
605 else if (*iso_run
->ppcls
[j
] == EN
) *iso_run
->ppcls
[i
] = EN
;
613 for (i
= 0; i
< iso_run
->length
; i
++)
615 if (*iso_run
->ppcls
[i
] == ET
|| *iso_run
->ppcls
[i
] == ES
|| *iso_run
->ppcls
[i
] == CS
|| *iso_run
->ppcls
[i
] == ON
)
619 if (b
> -1 && *iso_run
->ppcls
[b
] == BN
)
620 *iso_run
->ppcls
[b
] = ON
;
621 if (f
< iso_run
->length
&& *iso_run
->ppcls
[f
] == BN
)
622 *iso_run
->ppcls
[f
] = ON
;
624 *iso_run
->ppcls
[i
] = ON
;
629 for (i
= 0; i
< iso_run
->length
; i
++)
631 if (*iso_run
->ppcls
[i
] == EN
)
634 for (j
= iso_previousValidChar(iso_run
, i
); j
> -1; j
= iso_previousValidChar(iso_run
, j
))
635 if (*iso_run
->ppcls
[j
] == R
|| *iso_run
->ppcls
[j
] == L
)
637 if (*iso_run
->ppcls
[j
] == L
)
638 *iso_run
->ppcls
[i
] = L
;
641 if (iso_run
->sos
== L
&& j
== -1)
642 *iso_run
->ppcls
[i
] = L
;
647 /*------------------------------------------------------------------------
648 Function: resolveNeutrals
650 Resolves the directionality of neutral character types.
652 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
654 Input: Array of embedding levels
658 In/Out: Array of directional classes
660 Note: On input only these directional classes are expected
661 R, L, NI, AN, EN and BN
663 W8 resolves a number of ENs to L
664 ------------------------------------------------------------------------*/
665 static void resolveNeutrals(IsolatedRun
*iso_run
)
669 /* Translate isolates into NI */
670 for (i
= 0; i
< iso_run
->length
; i
++)
672 if (*iso_run
->ppcls
[i
] >= LRI
)
673 *iso_run
->ppcls
[i
] = NI
;
675 switch(*iso_run
->ppcls
[i
])
679 case WS
: *iso_run
->ppcls
[i
] = NI
;
682 ASSERT(*iso_run
->ppcls
[i
] < 5 || *iso_run
->ppcls
[i
] == BN
); /* "Only NI, L, R, AN, EN and BN are allowed" */
685 /* N0: Skipping bracketed pairs for now */
688 for (i
= 0; i
< iso_run
->length
; i
++)
692 if (*iso_run
->ppcls
[i
] == NI
)
695 int b
= iso_previousValidChar(iso_run
, i
);
704 if (*iso_run
->ppcls
[b
] == R
|| *iso_run
->ppcls
[b
] == AN
|| *iso_run
->ppcls
[b
] == EN
)
706 else if (*iso_run
->ppcls
[b
] == L
)
708 else /* No string type */
711 j
= iso_nextValidChar(iso_run
, i
);
712 while (j
> -1 && *iso_run
->ppcls
[j
] == NI
) j
= iso_nextValidChar(iso_run
, j
);
719 else if (*iso_run
->ppcls
[j
] == R
|| *iso_run
->ppcls
[j
] == AN
|| *iso_run
->ppcls
[j
] == EN
)
721 else if (*iso_run
->ppcls
[j
] == L
)
723 else /* No string type */
728 for (b
= i
; b
< j
&& b
< iso_run
->length
; b
++)
729 *iso_run
->ppcls
[b
] = r
;
735 for (i
= 0; i
< iso_run
->length
; i
++)
737 if (*iso_run
->ppcls
[i
] == NI
)
741 *iso_run
->ppcls
[i
] = EmbeddingDirection(iso_run
->e
);
742 if (b
> -1 && *iso_run
->ppcls
[b
] == BN
)
743 *iso_run
->ppcls
[b
] = EmbeddingDirection(iso_run
->e
);
744 if (f
< iso_run
->length
&& *iso_run
->ppcls
[f
] == BN
)
745 *iso_run
->ppcls
[f
] = EmbeddingDirection(iso_run
->e
);
750 /*------------------------------------------------------------------------
751 Function: resolveImplicit
753 Recursively resolves implicit embedding levels.
754 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
756 Input: Array of direction classes
760 In/Out: Array of embedding levels
762 Note: levels may exceed 15 on output.
763 Accepted subset of direction classes
765 ------------------------------------------------------------------------*/
766 static void resolveImplicit(const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
771 for (i
= sos
; i
<= eos
; i
++)
776 ASSERT(pcls
[i
] > 0); /* "No Neutrals allowed to survive here." */
777 ASSERT(pcls
[i
] < 5); /* "Out of range." */
779 if (odd(plevel
[i
]) && (pcls
[i
] == L
|| pcls
[i
] == EN
|| pcls
[i
] == AN
))
781 else if (!odd(plevel
[i
]) && pcls
[i
] == R
)
783 else if (!odd(plevel
[i
]) && (pcls
[i
] == EN
|| pcls
[i
] == AN
))
788 static void resolveResolved(unsigned baselevel
, const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
793 for (i
= sos
; i
<= eos
; i
++)
795 if (pcls
[i
] == B
|| pcls
[i
] == S
)
798 while (i
> sos
&& j
>= sos
&&
799 (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
800 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
801 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
802 plevel
[j
--] = baselevel
;
803 plevel
[i
] = baselevel
;
806 (pcls
[i
] == WS
|| pcls
[i
] == FSI
|| pcls
[i
] == LRI
|| pcls
[i
] == RLI
||
807 pcls
[i
] == PDI
|| pcls
[i
] == LRE
|| pcls
[i
] == RLE
|| pcls
[i
] == LRO
||
808 pcls
[i
] == RLO
|| pcls
[i
] == PDF
|| pcls
[i
] == BN
))
811 while (j
>= sos
&& (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
812 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
813 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
814 plevel
[j
--] = baselevel
;
819 static void computeIsolatingRunsSet(unsigned baselevel
, WORD
*pcls
, WORD
*pLevel
, int uCount
, struct list
*set
)
821 int run_start
, run_end
, i
;
824 IsolatedRun
*current_isolated
;
830 while (run_start
< uCount
)
832 run_end
= nextValidChar(pcls
, run_start
, uCount
);
833 while (run_end
< uCount
&& pLevel
[run_end
] == pLevel
[run_start
]) run_end
= nextValidChar(pcls
, run_end
, uCount
);
835 runs
[run_count
].start
= run_start
;
836 runs
[run_count
].end
= run_end
;
837 runs
[run_count
].e
= pLevel
[run_start
];
838 run_start
= nextValidChar(pcls
, run_end
, uCount
);
842 /* Build Isolating Runs */
844 while (i
< run_count
)
847 if (runs
[k
].start
>= 0)
849 int type_fence
, real_end
;
851 current_isolated
= HeapAlloc(GetProcessHeap(), 0, sizeof(IsolatedRun
) + sizeof(WORD
*)*uCount
);
853 run_start
= runs
[k
].start
;
854 current_isolated
->e
= runs
[k
].e
;
855 current_isolated
->length
= (runs
[k
].end
- runs
[k
].start
)+1;
857 for (j
= 0; j
< current_isolated
->length
; j
++)
858 current_isolated
->ppcls
[j
] = &pcls
[runs
[k
].start
+j
];
860 run_end
= runs
[k
].end
;
862 TRACE("{ [%i -- %i]",run_start
, run_end
);
864 if (pcls
[run_end
] == BN
)
865 run_end
= previousValidChar(pcls
, run_end
, runs
[k
].start
);
867 while (run_end
< uCount
&& (pcls
[run_end
] == RLI
|| pcls
[run_end
] == LRI
|| pcls
[run_end
] == FSI
))
871 while (j
< run_count
&& pcls
[runs
[j
].start
] != PDI
) j
++;
872 if (j
< run_count
&& runs
[i
].e
!= runs
[j
].e
)
881 int l
= current_isolated
->length
;
883 current_isolated
->length
+= (runs
[j
].end
- runs
[j
].start
)+1;
884 for (m
= 0; l
< current_isolated
->length
; l
++, m
++)
885 current_isolated
->ppcls
[l
] = &pcls
[runs
[j
].start
+m
];
887 TRACE("[%i -- %i]",runs
[j
].start
, runs
[j
].end
);
889 run_end
= runs
[j
].end
;
890 if (pcls
[run_end
] == BN
)
891 run_end
= previousValidChar(pcls
, run_end
, runs
[i
].start
);
902 type_fence
= previousValidChar(pcls
, run_start
, -1);
904 if (type_fence
== -1)
905 current_isolated
->sos
= (baselevel
> pLevel
[run_start
])?baselevel
:pLevel
[run_start
];
907 current_isolated
->sos
= (pLevel
[type_fence
] > pLevel
[run_start
])?pLevel
[type_fence
]:pLevel
[run_start
];
909 current_isolated
->sos
= EmbeddingDirection(current_isolated
->sos
);
911 if (run_end
== uCount
)
912 current_isolated
->eos
= current_isolated
->sos
;
915 /* eos could be an BN */
916 if ( pcls
[run_end
] == BN
)
918 real_end
= previousValidChar(pcls
, run_end
, run_start
-1);
919 if (real_end
< run_start
)
920 real_end
= run_start
;
925 type_fence
= nextValidChar(pcls
, run_end
, uCount
);
926 if (type_fence
== uCount
)
927 current_isolated
->eos
= (baselevel
> pLevel
[real_end
])?baselevel
:pLevel
[real_end
];
929 current_isolated
->eos
= (pLevel
[type_fence
] > pLevel
[real_end
])?pLevel
[type_fence
]:pLevel
[real_end
];
931 current_isolated
->eos
= EmbeddingDirection(current_isolated
->eos
);
934 list_add_tail(set
, ¤t_isolated
->entry
);
935 TRACE(" } level %i {%s <--> %s}\n",current_isolated
->e
, debug_type
[current_isolated
->sos
], debug_type
[current_isolated
->eos
]);
941 /*************************************************************
942 * BIDI_DeterminLevels
944 BOOL
BIDI_DetermineLevels(
945 LPCWSTR lpString
, /* [in] The string for which information is to be returned */
946 INT uCount
, /* [in] Number of WCHARs in string. */
947 const SCRIPT_STATE
*s
,
948 const SCRIPT_CONTROL
*c
,
949 WORD
*lpOutLevels
/* [out] final string levels */
953 unsigned baselevel
= 0;
954 struct list IsolatingRuns
;
955 IsolatedRun
*iso_run
, *next
;
957 TRACE("%s, %d\n", debugstr_wn(lpString
, uCount
), uCount
);
959 chartype
= HeapAlloc(GetProcessHeap(), 0, uCount
* sizeof(WORD
));
962 WARN("Out of memory\n");
966 baselevel
= s
->uBidiLevel
;
968 classify(lpString
, chartype
, uCount
, c
);
969 if (TRACE_ON(bidi
)) dump_types("Start ", chartype
, 0, uCount
);
971 /* resolve explicit */
972 resolveExplicit(baselevel
, chartype
, lpOutLevels
, uCount
);
973 if (TRACE_ON(bidi
)) dump_types("After Explicit", chartype
, 0, uCount
);
975 /* X10/BD13: Computer Isolating runs */
976 computeIsolatingRunsSet(baselevel
, chartype
, lpOutLevels
, uCount
, &IsolatingRuns
);
978 LIST_FOR_EACH_ENTRY_SAFE(iso_run
, next
, &IsolatingRuns
, IsolatedRun
, entry
)
980 if (TRACE_ON(bidi
)) iso_dump_types("Run", iso_run
);
983 resolveWeak(iso_run
);
984 if (TRACE_ON(bidi
)) iso_dump_types("After Weak", iso_run
);
986 /* resolve neutrals */
987 resolveNeutrals(iso_run
);
988 if (TRACE_ON(bidi
)) iso_dump_types("After Neutrals", iso_run
);
990 list_remove(&iso_run
->entry
);
991 HeapFree(GetProcessHeap(),0,iso_run
);
994 if (TRACE_ON(bidi
)) dump_types("Before Implicit", chartype
, 0, uCount
);
995 /* resolveImplicit */
996 resolveImplicit(chartype
, lpOutLevels
, 0, uCount
-1);
998 /* resolveResolvedLevels*/
999 classify(lpString
, chartype
, uCount
, c
);
1000 resolveResolved(baselevel
, chartype
, lpOutLevels
, 0, uCount
-1);
1002 HeapFree(GetProcessHeap(), 0, chartype
);
1006 /* reverse cch indexes */
1007 static void reverse(int *pidx
, int cch
)
1011 for (; ich
< --cch
; ich
++)
1014 pidx
[ich
] = pidx
[cch
];
1020 /*------------------------------------------------------------------------
1021 Functions: reorder/reorderLevel
1023 Recursively reorders the display string
1024 "From the highest level down, reverse all characters at that level and
1025 higher, down to the lowest odd level"
1027 Implements rule L2 of the Unicode bidi Algorithm.
1029 Input: Array of embedding levels
1031 Flag enabling reversal (set to false by initial caller)
1033 In/Out: Text to reorder
1035 Note: levels may exceed 15 resp. 61 on input.
1037 Rule L3 - reorder combining marks is not implemented here
1038 Rule L4 - glyph mirroring is implemented as a display option below
1040 Note: this should be applied a line at a time
1041 -------------------------------------------------------------------------*/
1042 int BIDI_ReorderV2lLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1046 /* true as soon as first odd level encountered */
1047 fReverse
= fReverse
|| odd(level
);
1049 for (; ich
< cch
; ich
++)
1051 if (plevel
[ich
] < level
)
1055 else if (plevel
[ich
] > level
)
1057 ich
+= BIDI_ReorderV2lLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1058 cch
- ich
, fReverse
) - 1;
1063 reverse(pIndexs
, ich
);
1068 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1069 int BIDI_ReorderL2vLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1074 /* true as soon as first odd level encountered */
1075 fReverse
= fReverse
|| odd(level
);
1077 for (; ich
< cch
; ich
++)
1079 if (plevel
[ich
] < level
)
1081 else if (plevel
[ich
] > level
)
1086 reverse(pIndexs
, ich
);
1092 for (; ich
< cch
; ich
++)
1093 if (plevel
[ich
] < level
)
1095 else if (plevel
[ich
] > level
)
1096 ich
+= BIDI_ReorderL2vLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1097 cch
- ich
, fReverse
) - 1;
1103 BOOL
BIDI_GetStrengths(LPCWSTR lpString
, INT uCount
, const SCRIPT_CONTROL
*c
,
1107 classify(lpString
, lpStrength
, uCount
, c
);
1109 for ( i
= 0; i
< uCount
; i
++)
1111 switch(lpStrength
[i
])
1120 lpStrength
[i
] = BIDI_STRONG
;
1129 lpStrength
[i
] = BIDI_WEAK
;
1135 default: /* Neutrals and NSM */
1136 lpStrength
[i
] = BIDI_NEUTRAL
;