2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
52 #include "wine/unicode.h"
53 #include "wine/debug.h"
55 #include "usp10_internal.h"
57 WINE_DEFAULT_DEBUG_CHANNEL(bidi
);
59 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
62 /* HELPER FUNCTIONS AND DECLARATIONS */
64 /*------------------------------------------------------------------------
65 Bidirectional Character Types
67 as defined by the Unicode Bidirectional Algorithm Table 3-7.
71 The list of bidirectional character types here is not grouped the
72 same way as the table 3-7, since the numberic values for the types
73 are chosen to keep the state and action tables compact.
74 ------------------------------------------------------------------------*/
78 /* ON MUST be zero, code relies on ON = N = 0 */
79 ON
= 0, /* Other Neutral */
82 AN
, /* Arabic Number */
83 EN
, /* European Number */
84 AL
, /* Arabic Letter (Right-to-left) */
85 NSM
, /* Non-spacing Mark */
86 CS
, /* Common Separator */
87 ES
, /* European Separator */
88 ET
, /* European Terminator (post/prefix e.g. $ and %) */
91 BN
, /* Boundary neutral (type of RLE etc after explicit levels) */
94 S
, /* Segment Separator (TAB) // used only in L1 */
95 WS
, /* White space // used only in L1 */
96 B
, /* Paragraph Separator (aka as PS) */
98 /* types for explicit controls */
99 RLO
, /* these are used only in X1-X9 */
105 /* resolved types, also resolved directions */
106 N
= ON
, /* alias, where ON, WS and S are treated the same */
109 /* HELPER FUNCTIONS */
110 /* the character type contains the C1_* flags in the low 12 bits */
111 /* and the C2_* type in the high 4 bits */
112 static __inline
unsigned short get_char_typeW( WCHAR ch
)
115 GetStringTypeW(CT_CTYPE1
, &ch
, 1, &CharType
);
119 /* Convert the libwine information to the direction enum */
120 static void classify(LPCWSTR lpString
, WORD
*chartype
, DWORD uCount
, const SCRIPT_CONTROL
*c
)
122 static const enum directions dir_map
[16] =
124 L
, /* unassigned defaults to L */
139 PDF
/* also LRE, LRO, RLE, RLO */
144 for (i
= 0; i
< uCount
; ++i
)
146 chartype
[i
] = dir_map
[get_char_typeW(lpString
[i
]) >> 12];
150 if (!c
->fLegacyBidiClass
) break;
154 case '+': chartype
[i
] = N
; break;
155 case '/': chartype
[i
] = CS
; break;
161 case 0x202A: chartype
[i
] = LRE
; break;
162 case 0x202B: chartype
[i
] = RLE
; break;
163 case 0x202C: chartype
[i
] = PDF
; break;
164 case 0x202D: chartype
[i
] = LRO
; break;
165 case 0x202E: chartype
[i
] = RLO
; break;
172 /* Set a run of cval values at locations all prior to, but not including */
173 /* iStart, to the new value nval. */
174 static void SetDeferredRun(WORD
*pval
, int cval
, int iStart
, int nval
)
177 for (; i
>= iStart
- cval
; i
--)
183 /* RESOLVE EXPLICIT */
185 static WORD
GreaterEven(int i
)
187 return odd(i
) ? i
+ 1 : i
+ 2;
190 static WORD
GreaterOdd(int i
)
192 return odd(i
) ? i
+ 2 : i
+ 1;
195 static WORD
EmbeddingDirection(int level
)
197 return odd(level
) ? R
: L
;
200 /*------------------------------------------------------------------------
201 Function: resolveExplicit
203 Recursively resolves explicit embedding levels and overrides.
204 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
206 Input: Base embedding level and direction
209 Output: Array of embedding levels
211 In/Out: Array of direction classes
214 Note: The function uses two simple counters to keep track of
215 matching explicit codes and PDF. Use the default argument for
216 the outermost call. The nesting counter counts the recursion
217 depth and not the embedding level.
218 ------------------------------------------------------------------------*/
220 static int resolveExplicit(int level
, int dir
, WORD
*pcls
, WORD
*plevel
, int cch
, int nNest
)
222 /* always called with a valid nesting level
223 nesting levels are != embedding levels */
224 int nLastValid
= nNest
;
227 /* check input values */
228 ASSERT(nNest
>= 0 && level
>= 0 && level
<= MAX_LEVEL
);
230 /* process the text */
231 for (; ich
< cch
; ich
++)
233 WORD cls
= pcls
[ich
];
239 if (GreaterEven(level
) <= MAX_LEVEL
- (cls
== LRO
? 2 : 0))
241 plevel
[ich
] = GreaterEven(level
);
243 ich
+= resolveExplicit(plevel
[ich
], (cls
== LRE
? N
: L
),
244 &pcls
[ich
+1], &plevel
[ich
+1],
245 cch
- (ich
+1), nNest
);
249 cls
= pcls
[ich
] = BN
;
255 if (GreaterOdd(level
) <= MAX_LEVEL
- (cls
== RLO
? 2 : 0))
257 plevel
[ich
] = GreaterOdd(level
);
259 ich
+= resolveExplicit(plevel
[ich
], (cls
== RLE
? N
: R
),
260 &pcls
[ich
+1], &plevel
[ich
+1],
261 cch
- (ich
+1), nNest
);
265 cls
= pcls
[ich
] = BN
;
269 cls
= pcls
[ich
] = BN
;
272 if (nLastValid
< nNest
)
278 cch
= ich
; /* break the loop, but complete body */
283 /* Apply the override */
296 /* RESOLVE WEAK TYPES */
298 enum states
/* possible states */
300 xa
, /* arabic letter */
301 xr
, /* right letter */
302 xl
, /* left letter */
304 ao
, /* arabic lett. foll by ON */
305 ro
, /* right lett. foll by ON */
306 lo
, /* left lett. foll by ON */
308 rt
, /* ET following R */
309 lt
, /* ET following L */
311 cn
, /* EN, AN following AL */
312 ra
, /* arabic number foll R */
313 re
, /* european number foll R */
314 la
, /* arabic number foll L */
315 le
, /* european number foll L */
317 ac
, /* CS following cn */
318 rc
, /* CS following ra */
319 rs
, /* CS,ES following re */
320 lc
, /* CS following la */
321 ls
, /* CS,ES following le */
323 ret
, /* ET following re */
324 let
, /* ET following le */
327 static const int stateWeak
[][10] =
329 /* N, L, R, AN, EN, AL,NSM, CS, ES, ET */
330 /*xa*/ { ao
, xl
, xr
, cn
, cn
, xa
, xa
, ao
, ao
, ao
}, /* arabic letter */
331 /*xr*/ { ro
, xl
, xr
, ra
, re
, xa
, xr
, ro
, ro
, rt
}, /* right letter */
332 /*xl*/ { lo
, xl
, xr
, la
, le
, xa
, xl
, lo
, lo
, lt
}, /* left letter */
334 /*ao*/ { ao
, xl
, xr
, cn
, cn
, xa
, ao
, ao
, ao
, ao
}, /* arabic lett. foll by ON*/
335 /*ro*/ { ro
, xl
, xr
, ra
, re
, xa
, ro
, ro
, ro
, rt
}, /* right lett. foll by ON */
336 /*lo*/ { lo
, xl
, xr
, la
, le
, xa
, lo
, lo
, lo
, lt
}, /* left lett. foll by ON */
338 /*rt*/ { ro
, xl
, xr
, ra
, re
, xa
, rt
, ro
, ro
, rt
}, /* ET following R */
339 /*lt*/ { lo
, xl
, xr
, la
, le
, xa
, lt
, lo
, lo
, lt
}, /* ET following L */
341 /*cn*/ { ao
, xl
, xr
, cn
, cn
, xa
, cn
, ac
, ao
, ao
}, /* EN, AN following AL */
342 /*ra*/ { ro
, xl
, xr
, ra
, re
, xa
, ra
, rc
, ro
, rt
}, /* arabic number foll R */
343 /*re*/ { ro
, xl
, xr
, ra
, re
, xa
, re
, rs
, rs
,ret
}, /* european number foll R */
344 /*la*/ { lo
, xl
, xr
, la
, le
, xa
, la
, lc
, lo
, lt
}, /* arabic number foll L */
345 /*le*/ { lo
, xl
, xr
, la
, le
, xa
, le
, ls
, ls
,let
}, /* european number foll L */
347 /*ac*/ { ao
, xl
, xr
, cn
, cn
, xa
, ao
, ao
, ao
, ao
}, /* CS following cn */
348 /*rc*/ { ro
, xl
, xr
, ra
, re
, xa
, ro
, ro
, ro
, rt
}, /* CS following ra */
349 /*rs*/ { ro
, xl
, xr
, ra
, re
, xa
, ro
, ro
, ro
, rt
}, /* CS,ES following re */
350 /*lc*/ { lo
, xl
, xr
, la
, le
, xa
, lo
, lo
, lo
, lt
}, /* CS following la */
351 /*ls*/ { lo
, xl
, xr
, la
, le
, xa
, lo
, lo
, lo
, lt
}, /* CS,ES following le */
353 /*ret*/{ ro
, xl
, xr
, ra
, re
, xa
,ret
, ro
, ro
,ret
}, /* ET following re */
354 /*let*/{ lo
, xl
, xr
, la
, le
, xa
,let
, lo
, lo
,let
}, /* ET following le */
357 enum actions
/* possible actions */
360 IX
= 0x100, /* increment */
361 XX
= 0xF, /* no-op */
364 xxx
= (XX
<< 4) + XX
, /* no-op */
365 xIx
= IX
+ xxx
, /* increment run */
366 xxN
= (XX
<< 4) + ON
, /* set current to N */
367 xxE
= (XX
<< 4) + EN
, /* set current to EN */
368 xxA
= (XX
<< 4) + AN
, /* set current to AN */
369 xxR
= (XX
<< 4) + R
, /* set current to R */
370 xxL
= (XX
<< 4) + L
, /* set current to L */
371 Nxx
= (ON
<< 4) + 0xF, /* set run to neutral */
372 Axx
= (AN
<< 4) + 0xF, /* set run to AN */
373 ExE
= (EN
<< 4) + EN
, /* set run to EN, set current to EN */
374 NIx
= (ON
<< 4) + 0xF + IX
, /* set run to N, increment */
375 NxN
= (ON
<< 4) + ON
, /* set run to N, set current to N */
376 NxR
= (ON
<< 4) + R
, /* set run to N, set current to R */
377 NxE
= (ON
<< 4) + EN
, /* set run to N, set current to EN */
379 AxA
= (AN
<< 4) + AN
, /* set run to AN, set current to AN */
380 NxL
= (ON
<< 4) + L
, /* set run to N, set current to L */
381 LxL
= (L
<< 4) + L
, /* set run to L, set current to L */
384 static const int actionWeak
[][10] =
386 /* N, L, R, AN, EN, AL, NSM, CS, ES, ET */
387 /*xa*/ { xxx
, xxx
, xxx
, xxx
, xxA
, xxR
, xxR
, xxN
, xxN
, xxN
}, /* arabic letter */
388 /*xr*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxR
, xxN
, xxN
, xIx
}, /* right letter */
389 /*xl*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxL
, xxN
, xxN
, xIx
}, /* left letter */
391 /*ao*/ { xxx
, xxx
, xxx
, xxx
, xxA
, xxR
, xxN
, xxN
, xxN
, xxN
}, /* arabic lett. foll by ON */
392 /*ro*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxN
, xxN
, xxN
, xIx
}, /* right lett. foll by ON */
393 /*lo*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxN
, xxN
, xxN
, xIx
}, /* left lett. foll by ON */
395 /*rt*/ { Nxx
, Nxx
, Nxx
, Nxx
, ExE
, NxR
, xIx
, NxN
, NxN
, xIx
}, /* ET following R */
396 /*lt*/ { Nxx
, Nxx
, Nxx
, Nxx
, LxL
, NxR
, xIx
, NxN
, NxN
, xIx
}, /* ET following L */
398 /*cn*/ { xxx
, xxx
, xxx
, xxx
, xxA
, xxR
, xxA
, xIx
, xxN
, xxN
}, /* EN, AN following AL */
399 /*ra*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxA
, xIx
, xxN
, xIx
}, /* arabic number foll R */
400 /*re*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxE
, xIx
, xIx
, xxE
}, /* european number foll R */
401 /*la*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxA
, xIx
, xxN
, xIx
}, /* arabic number foll L */
402 /*le*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxL
, xIx
, xIx
, xxL
}, /* european number foll L */
404 /*ac*/ { Nxx
, Nxx
, Nxx
, Axx
, AxA
, NxR
, NxN
, NxN
, NxN
, NxN
}, /* CS following cn */
405 /*rc*/ { Nxx
, Nxx
, Nxx
, Axx
, NxE
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS following ra */
406 /*rs*/ { Nxx
, Nxx
, Nxx
, Nxx
, ExE
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS,ES following re */
407 /*lc*/ { Nxx
, Nxx
, Nxx
, Axx
, NxL
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS following la */
408 /*ls*/ { Nxx
, Nxx
, Nxx
, Nxx
, LxL
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS,ES following le */
410 /*ret*/{ xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxE
, xxN
, xxN
, xxE
}, /* ET following re */
411 /*let*/{ xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxL
, xxN
, xxN
, xxL
}, /* ET following le */
414 static int GetDeferredType(int action
)
416 return (action
>> 4) & 0xF;
419 static int GetResolvedType(int action
)
424 /* Note on action table:
426 States can be of two kinds:
427 - Immediate Resolution State, where each input token
428 is resolved as soon as it is seen. These states have
429 only single action codes (xxN) or the no-op (xxx)
430 for static input tokens.
431 - Deferred Resolution State, where input tokens either
432 either extend the run (xIx) or resolve its Type (e.g. Nxx).
434 Input classes are of three kinds
435 - Static Input Token, where the class of the token remains
436 unchanged on output (AN, L, N, R)
437 - Replaced Input Token, where the class of the token is
438 always replaced on output (AL, BN, NSM, CS, ES, ET)
439 - Conditional Input Token, where the class of the token is
440 changed on output in some, but not all, cases (EN)
442 Where tokens are subject to change, a double action
443 (e.g. NxA, or NxN) is _required_ after deferred states,
444 resolving both the deferred state and changing the current token.
447 /*------------------------------------------------------------------------
448 Function: resolveWeak
450 Resolves the directionality of numeric and other weak character types
452 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
454 Input: Array of embedding levels
457 In/Out: Array of directional classes
459 Note: On input only these directional classes are expected
460 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
461 ------------------------------------------------------------------------*/
462 static void resolveWeak(int baselevel
, WORD
*pcls
, WORD
*plevel
, int cch
)
464 int state
= odd(baselevel
) ? xr
: xl
;
467 int level
= baselevel
;
468 int action
, clsRun
, clsNew
;
472 for (; ich
< cch
; ich
++)
474 /* ignore boundary neutrals */
477 /* must flatten levels unless at a level change; */
480 /* lookahead for level changes */
481 if (ich
+ 1 == cch
&& level
!= baselevel
)
483 /* have to fixup last BN before end of the loop, since
484 * its fix-upped value will be needed below the assert */
485 pcls
[ich
] = EmbeddingDirection(level
);
487 else if (ich
+ 1 < cch
&& level
!= plevel
[ich
+1] && pcls
[ich
+1] != BN
)
489 /* fixup LAST BN in front / after a level run to make
490 * it act like the SOR/EOR in rule X10 */
491 int newlevel
= plevel
[ich
+1];
492 if (level
> newlevel
) {
495 plevel
[ich
] = newlevel
;
497 /* must match assigned level */
498 pcls
[ich
] = EmbeddingDirection(newlevel
);
499 level
= plevel
[ich
+1];
503 /* don't interrupt runs */
512 ASSERT(pcls
[ich
] <= BN
);
515 action
= actionWeak
[state
][cls
];
517 /* resolve the directionality for deferred runs */
518 clsRun
= GetDeferredType(action
);
521 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
525 /* resolve the directionality class at the current location */
526 clsNew
= GetResolvedType(action
);
530 /* increment a deferred run */
534 state
= stateWeak
[state
][cls
];
537 /* resolve any deferred runs
538 * use the direction of the current level to emulate PDF */
539 cls
= EmbeddingDirection(level
);
541 /* resolve the directionality for deferred runs */
542 clsRun
= GetDeferredType(actionWeak
[state
][cls
]);
544 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
547 /* RESOLVE NEUTRAL TYPES */
552 /* action to resolve previous input */
553 nL
= L
, /* resolve EN to L */
554 En
= 3 << 4, /* resolve neutrals run to embedding level direction */
555 Rn
= R
<< 4, /* resolve neutrals run to strong right */
556 Ln
= L
<< 4, /* resolved neutrals run to strong left */
557 In
= (1<<8), /* increment count of deferred neutrals */
558 LnL
= (1<<4)+L
, /* set run and EN to L */
561 static int GetDeferredNeutrals(int action
, int level
)
563 action
= (action
>> 4) & 0xF;
564 if (action
== (En
>> 4))
565 return EmbeddingDirection(level
);
570 static int GetResolvedNeutrals(int action
)
572 action
= action
& 0xF;
582 /* new temporary class */
583 r
, /* R and characters resolved to R */
584 l
, /* L and characters resolved to L */
585 rn
, /* N preceded by right */
586 ln
, /* N preceded by left */
587 a
, /* AN preceded by left (the abbreviation 'la' is used up above) */
588 na
, /* N preceded by a */
592 /*------------------------------------------------------------------------
595 By rule W7, whenever a EN is 'dominated' by an L (including start of
596 run with embedding direction = L) it is resolved to, and further treated
599 This leads to the need for 'a' and 'na' states.
600 ------------------------------------------------------------------------*/
602 static const int actionNeutrals
[][5] =
604 /* N, L, R, AN, EN = cls */
605 { In
, 0, 0, 0, 0 }, /* r right */
606 { In
, 0, 0, 0, L
}, /* l left */
608 { In
, En
, Rn
, Rn
, Rn
}, /* rn N preceded by right */
609 { In
, Ln
, En
, En
, LnL
}, /* ln N preceded by left */
611 { In
, 0, 0, 0, L
}, /* a AN preceded by left */
612 { In
, En
, Rn
, Rn
, En
}, /* na N preceded by a */
615 static const int stateNeutrals
[][5] =
617 /* N, L, R, AN, EN */
618 { rn
, l
, r
, r
, r
}, /* r right */
619 { ln
, l
, r
, a
, l
}, /* l left */
621 { rn
, l
, r
, r
, r
}, /* rn N preceded by right */
622 { ln
, l
, r
, a
, l
}, /* ln N preceded by left */
624 { na
, l
, r
, a
, l
}, /* a AN preceded by left */
625 { na
, l
, r
, a
, l
}, /* na N preceded by la */
628 /*------------------------------------------------------------------------
629 Function: resolveNeutrals
631 Resolves the directionality of neutral character types.
633 Implements rules W7, N1 and N2 of the Unicode Bidi Algorithm.
635 Input: Array of embedding levels
639 In/Out: Array of directional classes
641 Note: On input only these directional classes are expected
642 R, L, N, AN, EN and BN
644 W8 resolves a number of ENs to L
645 ------------------------------------------------------------------------*/
646 static void resolveNeutrals(int baselevel
, WORD
*pcls
, const WORD
*plevel
, int cch
)
648 /* the state at the start of text depends on the base level */
649 int state
= odd(baselevel
) ? r
: l
;
653 int level
= baselevel
;
655 int action
, clsRun
, clsNew
;
657 for (; ich
< cch
; ich
++)
659 /* ignore boundary neutrals */
662 /* include in the count for a deferred run */
666 /* skip any further processing */
670 ASSERT(pcls
[ich
] < 5); /* "Only N, L, R, AN, EN are allowed" */
673 action
= actionNeutrals
[state
][cls
];
675 /* resolve the directionality for deferred runs */
676 clsRun
= GetDeferredNeutrals(action
, level
);
679 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
683 /* resolve the directionality class at the current location */
684 clsNew
= GetResolvedNeutrals(action
);
691 state
= stateNeutrals
[state
][cls
];
695 /* resolve any deferred runs */
696 cls
= EmbeddingDirection(level
); /* eor has type of current level */
698 /* resolve the directionality for deferred runs */
699 clsRun
= GetDeferredNeutrals(actionNeutrals
[state
][cls
], level
);
701 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
704 /* RESOLVE IMPLICIT */
706 /*------------------------------------------------------------------------
707 Function: resolveImplicit
709 Recursively resolves implicit embedding levels.
710 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
712 Input: Array of direction classes
716 In/Out: Array of embedding levels
718 Note: levels may exceed 15 on output.
719 Accepted subset of direction classes
721 ------------------------------------------------------------------------*/
722 static const WORD addLevel
[][4] =
725 /* even */ { 0, 1, 2, 2, },
726 /* odd */ { 1, 0, 1, 1, }
730 static void resolveImplicit(const WORD
* pcls
, WORD
*plevel
, int cch
)
733 for (; ich
< cch
; ich
++)
735 /* cannot resolve bn here, since some bn were resolved to strong
736 * types in resolveWeak. To remove these we need the original
737 * types, which are available again in resolveWhiteSpace */
742 ASSERT(pcls
[ich
] > 0); /* "No Neutrals allowed to survive here." */
743 ASSERT(pcls
[ich
] < 5); /* "Out of range." */
744 plevel
[ich
] += addLevel
[odd(plevel
[ich
])][pcls
[ich
] - 1];
748 /*************************************************************
749 * BIDI_DeterminLevels
751 BOOL
BIDI_DetermineLevels(
752 LPCWSTR lpString
, /* [in] The string for which information is to be returned */
753 INT uCount
, /* [in] Number of WCHARs in string. */
754 const SCRIPT_STATE
*s
,
755 const SCRIPT_CONTROL
*c
,
756 WORD
*lpOutLevels
/* [out] final string levels */
760 unsigned baselevel
= 0,j
;
761 TRACE("%s, %d", debugstr_wn(lpString
, uCount
), uCount
);
763 chartype
= HeapAlloc(GetProcessHeap(), 0, uCount
* sizeof(WORD
));
766 WARN("Out of memory\n");
770 baselevel
= s
->uBidiLevel
;
772 classify(lpString
, chartype
, uCount
, c
);
774 for (j
= 0; j
< uCount
; ++j
)
780 case ON
: chartype
[j
] = N
;
784 /* resolve explicit */
785 resolveExplicit(baselevel
, N
, chartype
, lpOutLevels
, uCount
, 0);
788 resolveWeak(baselevel
, chartype
, lpOutLevels
, uCount
);
790 /* resolve neutrals */
791 resolveNeutrals(baselevel
, chartype
, lpOutLevels
, uCount
);
793 /* resolveImplicit */
794 resolveImplicit(chartype
, lpOutLevels
, uCount
);
796 HeapFree(GetProcessHeap(), 0, chartype
);
800 /* reverse cch indexes */
801 static void reverse(int *pidx
, int cch
)
805 for (; ich
< --cch
; ich
++)
808 pidx
[ich
] = pidx
[cch
];
814 /*------------------------------------------------------------------------
815 Functions: reorder/reorderLevel
817 Recursively reorders the display string
818 "From the highest level down, reverse all characters at that level and
819 higher, down to the lowest odd level"
821 Implements rule L2 of the Unicode bidi Algorithm.
823 Input: Array of embedding levels
825 Flag enabling reversal (set to false by initial caller)
827 In/Out: Text to reorder
829 Note: levels may exceed 15 resp. 61 on input.
831 Rule L3 - reorder combining marks is not implemented here
832 Rule L4 - glyph mirroring is implemented as a display option below
834 Note: this should be applied a line at a time
835 -------------------------------------------------------------------------*/
836 int BIDI_ReorderV2lLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
840 /* true as soon as first odd level encountered */
841 fReverse
= fReverse
|| odd(level
);
843 for (; ich
< cch
; ich
++)
845 if (plevel
[ich
] < level
)
849 else if (plevel
[ich
] > level
)
851 ich
+= BIDI_ReorderV2lLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
852 cch
- ich
, fReverse
) - 1;
857 reverse(pIndexs
, ich
);
862 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
863 int BIDI_ReorderL2vLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
868 /* true as soon as first odd level encountered */
869 fReverse
= fReverse
|| odd(level
);
871 for (; ich
< cch
; ich
++)
873 if (plevel
[ich
] < level
)
875 else if (plevel
[ich
] > level
)
880 reverse(pIndexs
, ich
);
886 for (; ich
< cch
; ich
++)
887 if (plevel
[ich
] > level
)
888 ich
+= BIDI_ReorderL2vLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
889 cch
- ich
, fReverse
) - 1;