2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
44 #include "usp10_internal.h"
46 WINE_DEFAULT_DEBUG_CHANNEL(bidi
);
48 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
51 /* HELPER FUNCTIONS AND DECLARATIONS */
53 /*------------------------------------------------------------------------
54 Bidirectional Character Types
56 as defined by the Unicode Bidirectional Algorithm Table 3-7.
60 The list of bidirectional character types here is not grouped the
61 same way as the table 3-7, since the numberic values for the types
62 are chosen to keep the state and action tables compact.
63 ------------------------------------------------------------------------*/
67 /* ON MUST be zero, code relies on ON = N = 0 */
68 ON
= 0, /* Other Neutral */
71 AN
, /* Arabic Number */
72 EN
, /* European Number */
73 AL
, /* Arabic Letter (Right-to-left) */
74 NSM
, /* Non-spacing Mark */
75 CS
, /* Common Separator */
76 ES
, /* European Separator */
77 ET
, /* European Terminator (post/prefix e.g. $ and %) */
80 BN
, /* Boundary neutral (type of RLE etc after explicit levels) */
83 S
, /* Segment Separator (TAB) // used only in L1 */
84 WS
, /* White space // used only in L1 */
85 B
, /* Paragraph Separator (aka as PS) */
87 /* types for explicit controls */
88 RLO
, /* these are used only in X1-X9 */
94 /* resolved types, also resolved directions */
95 N
= ON
, /* alias, where ON, WS and S are treated the same */
98 /* HELPER FUNCTIONS */
100 /* Convert the libwine information to the direction enum */
101 static void classify(LPCWSTR lpString
, WORD
*chartype
, DWORD uCount
, const SCRIPT_CONTROL
*c
)
103 static const enum directions dir_map
[16] =
105 L
, /* unassigned defaults to L */
120 PDF
/* also LRE, LRO, RLE, RLO */
125 for (i
= 0; i
< uCount
; ++i
)
127 chartype
[i
] = dir_map
[get_char_typeW(lpString
[i
]) >> 12];
131 if (!c
->fLegacyBidiClass
) break;
135 case '+': chartype
[i
] = N
; break;
136 case '/': chartype
[i
] = CS
; break;
142 case 0x202A: chartype
[i
] = LRE
; break;
143 case 0x202B: chartype
[i
] = RLE
; break;
144 case 0x202C: chartype
[i
] = PDF
; break;
145 case 0x202D: chartype
[i
] = LRO
; break;
146 case 0x202E: chartype
[i
] = RLO
; break;
153 /* Set a run of cval values at locations all prior to, but not including */
154 /* iStart, to the new value nval. */
155 static void SetDeferredRun(WORD
*pval
, int cval
, int iStart
, int nval
)
158 for (; i
>= iStart
- cval
; i
--)
164 /* RESOLVE EXPLICIT */
166 static WORD
GreaterEven(int i
)
168 return odd(i
) ? i
+ 1 : i
+ 2;
171 static WORD
GreaterOdd(int i
)
173 return odd(i
) ? i
+ 2 : i
+ 1;
176 static WORD
EmbeddingDirection(int level
)
178 return odd(level
) ? R
: L
;
181 /*------------------------------------------------------------------------
182 Function: resolveExplicit
184 Recursively resolves explicit embedding levels and overrides.
185 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
187 Input: Base embedding level and direction
190 Output: Array of embedding levels
192 In/Out: Array of direction classes
195 Note: The function uses two simple counters to keep track of
196 matching explicit codes and PDF. Use the default argument for
197 the outermost call. The nesting counter counts the recursion
198 depth and not the embedding level.
199 ------------------------------------------------------------------------*/
201 static int resolveExplicit(int level
, int dir
, WORD
*pcls
, WORD
*plevel
, int cch
, int nNest
)
203 /* always called with a valid nesting level
204 nesting levels are != embedding levels */
205 int nLastValid
= nNest
;
208 /* check input values */
209 ASSERT(nNest
>= 0 && level
>= 0 && level
<= MAX_LEVEL
);
211 /* process the text */
212 for (; ich
< cch
; ich
++)
214 WORD cls
= pcls
[ich
];
220 if (GreaterEven(level
) <= MAX_LEVEL
- (cls
== LRO
? 2 : 0))
222 plevel
[ich
] = GreaterEven(level
);
224 ich
+= resolveExplicit(plevel
[ich
], (cls
== LRE
? N
: L
),
225 &pcls
[ich
+1], &plevel
[ich
+1],
226 cch
- (ich
+1), nNest
);
230 cls
= pcls
[ich
] = BN
;
236 if (GreaterOdd(level
) <= MAX_LEVEL
- (cls
== RLO
? 2 : 0))
238 plevel
[ich
] = GreaterOdd(level
);
240 ich
+= resolveExplicit(plevel
[ich
], (cls
== RLE
? N
: R
),
241 &pcls
[ich
+1], &plevel
[ich
+1],
242 cch
- (ich
+1), nNest
);
246 cls
= pcls
[ich
] = BN
;
250 cls
= pcls
[ich
] = BN
;
253 if (nLastValid
< nNest
)
259 cch
= ich
; /* break the loop, but complete body */
264 /* Apply the override */
277 /* RESOLVE WEAK TYPES */
279 enum states
/* possible states */
281 xa
, /* Arabic letter */
282 xr
, /* right letter */
283 xl
, /* left letter */
285 ao
, /* Arabic lett. foll by ON */
286 ro
, /* right lett. foll by ON */
287 lo
, /* left lett. foll by ON */
289 rt
, /* ET following R */
290 lt
, /* ET following L */
292 cn
, /* EN, AN following AL */
293 ra
, /* Arabic number foll R */
294 re
, /* European number foll R */
295 la
, /* Arabic number foll L */
296 le
, /* European number foll L */
298 ac
, /* CS following cn */
299 rc
, /* CS following ra */
300 rs
, /* CS,ES following re */
301 lc
, /* CS following la */
302 ls
, /* CS,ES following le */
304 ret
, /* ET following re */
305 let
, /* ET following le */
308 static const int stateWeak
[][10] =
310 /* N, L, R, AN, EN, AL,NSM, CS, ES, ET */
311 /*xa*/ { ao
, xl
, xr
, cn
, cn
, xa
, xa
, ao
, ao
, ao
}, /* Arabic letter */
312 /*xr*/ { ro
, xl
, xr
, ra
, re
, xa
, xr
, ro
, ro
, rt
}, /* right letter */
313 /*xl*/ { lo
, xl
, xr
, la
, le
, xa
, xl
, lo
, lo
, lt
}, /* left letter */
315 /*ao*/ { ao
, xl
, xr
, cn
, cn
, xa
, ao
, ao
, ao
, ao
}, /* Arabic lett. foll by ON*/
316 /*ro*/ { ro
, xl
, xr
, ra
, re
, xa
, ro
, ro
, ro
, rt
}, /* right lett. foll by ON */
317 /*lo*/ { lo
, xl
, xr
, la
, le
, xa
, lo
, lo
, lo
, lt
}, /* left lett. foll by ON */
319 /*rt*/ { ro
, xl
, xr
, ra
, re
, xa
, rt
, ro
, ro
, rt
}, /* ET following R */
320 /*lt*/ { lo
, xl
, xr
, la
, le
, xa
, lt
, lo
, lo
, lt
}, /* ET following L */
322 /*cn*/ { ao
, xl
, xr
, cn
, cn
, xa
, cn
, ac
, ao
, ao
}, /* EN, AN following AL */
323 /*ra*/ { ro
, xl
, xr
, ra
, re
, xa
, ra
, rc
, ro
, rt
}, /* Arabic number foll R */
324 /*re*/ { ro
, xl
, xr
, ra
, re
, xa
, re
, rs
, rs
,ret
}, /* European number foll R */
325 /*la*/ { lo
, xl
, xr
, la
, le
, xa
, la
, lc
, lo
, lt
}, /* Arabic number foll L */
326 /*le*/ { lo
, xl
, xr
, la
, le
, xa
, le
, ls
, ls
,let
}, /* European number foll L */
328 /*ac*/ { ao
, xl
, xr
, cn
, cn
, xa
, ao
, ao
, ao
, ao
}, /* CS following cn */
329 /*rc*/ { ro
, xl
, xr
, ra
, re
, xa
, ro
, ro
, ro
, rt
}, /* CS following ra */
330 /*rs*/ { ro
, xl
, xr
, ra
, re
, xa
, ro
, ro
, ro
, rt
}, /* CS,ES following re */
331 /*lc*/ { lo
, xl
, xr
, la
, le
, xa
, lo
, lo
, lo
, lt
}, /* CS following la */
332 /*ls*/ { lo
, xl
, xr
, la
, le
, xa
, lo
, lo
, lo
, lt
}, /* CS,ES following le */
334 /*ret*/{ ro
, xl
, xr
, ra
, re
, xa
,ret
, ro
, ro
,ret
}, /* ET following re */
335 /*let*/{ lo
, xl
, xr
, la
, le
, xa
,let
, lo
, lo
,let
}, /* ET following le */
338 enum actions
/* possible actions */
341 IX
= 0x100, /* increment */
342 XX
= 0xF, /* no-op */
345 xxx
= (XX
<< 4) + XX
, /* no-op */
346 xIx
= IX
+ xxx
, /* increment run */
347 xxN
= (XX
<< 4) + ON
, /* set current to N */
348 xxE
= (XX
<< 4) + EN
, /* set current to EN */
349 xxA
= (XX
<< 4) + AN
, /* set current to AN */
350 xxR
= (XX
<< 4) + R
, /* set current to R */
351 xxL
= (XX
<< 4) + L
, /* set current to L */
352 Nxx
= (ON
<< 4) + 0xF, /* set run to neutral */
353 Axx
= (AN
<< 4) + 0xF, /* set run to AN */
354 ExE
= (EN
<< 4) + EN
, /* set run to EN, set current to EN */
355 NIx
= (ON
<< 4) + 0xF + IX
, /* set run to N, increment */
356 NxN
= (ON
<< 4) + ON
, /* set run to N, set current to N */
357 NxR
= (ON
<< 4) + R
, /* set run to N, set current to R */
358 NxE
= (ON
<< 4) + EN
, /* set run to N, set current to EN */
360 AxA
= (AN
<< 4) + AN
, /* set run to AN, set current to AN */
361 NxL
= (ON
<< 4) + L
, /* set run to N, set current to L */
362 LxL
= (L
<< 4) + L
, /* set run to L, set current to L */
365 static const int actionWeak
[][10] =
367 /* N, L, R, AN, EN, AL, NSM, CS, ES, ET */
368 /*xa*/ { xxx
, xxx
, xxx
, xxx
, xxA
, xxR
, xxR
, xxN
, xxN
, xxN
}, /* Arabic letter */
369 /*xr*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxR
, xxN
, xxN
, xIx
}, /* right letter */
370 /*xl*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxL
, xxN
, xxN
, xIx
}, /* left letter */
372 /*ao*/ { xxx
, xxx
, xxx
, xxx
, xxA
, xxR
, xxN
, xxN
, xxN
, xxN
}, /* Arabic lett. foll by ON */
373 /*ro*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxN
, xxN
, xxN
, xIx
}, /* right lett. foll by ON */
374 /*lo*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxN
, xxN
, xxN
, xIx
}, /* left lett. foll by ON */
376 /*rt*/ { Nxx
, Nxx
, Nxx
, Nxx
, ExE
, NxR
, xIx
, NxN
, NxN
, xIx
}, /* ET following R */
377 /*lt*/ { Nxx
, Nxx
, Nxx
, Nxx
, LxL
, NxR
, xIx
, NxN
, NxN
, xIx
}, /* ET following L */
379 /*cn*/ { xxx
, xxx
, xxx
, xxx
, xxA
, xxR
, xxA
, xIx
, xxN
, xxN
}, /* EN, AN following AL */
380 /*ra*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxA
, xIx
, xxN
, xIx
}, /* Arabic number foll R */
381 /*re*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxE
, xIx
, xIx
, xxE
}, /* European number foll R */
382 /*la*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxA
, xIx
, xxN
, xIx
}, /* Arabic number foll L */
383 /*le*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxL
, xIx
, xIx
, xxL
}, /* European number foll L */
385 /*ac*/ { Nxx
, Nxx
, Nxx
, Axx
, AxA
, NxR
, NxN
, NxN
, NxN
, NxN
}, /* CS following cn */
386 /*rc*/ { Nxx
, Nxx
, Nxx
, Axx
, NxE
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS following ra */
387 /*rs*/ { Nxx
, Nxx
, Nxx
, Nxx
, ExE
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS,ES following re */
388 /*lc*/ { Nxx
, Nxx
, Nxx
, Axx
, NxL
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS following la */
389 /*ls*/ { Nxx
, Nxx
, Nxx
, Nxx
, LxL
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS,ES following le */
391 /*ret*/{ xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxE
, xxN
, xxN
, xxE
}, /* ET following re */
392 /*let*/{ xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxL
, xxN
, xxN
, xxL
}, /* ET following le */
395 static int GetDeferredType(int action
)
397 return (action
>> 4) & 0xF;
400 static int GetResolvedType(int action
)
405 /* Note on action table:
407 States can be of two kinds:
408 - Immediate Resolution State, where each input token
409 is resolved as soon as it is seen. These states have
410 only single action codes (xxN) or the no-op (xxx)
411 for static input tokens.
412 - Deferred Resolution State, where input tokens either
413 either extend the run (xIx) or resolve its Type (e.g. Nxx).
415 Input classes are of three kinds
416 - Static Input Token, where the class of the token remains
417 unchanged on output (AN, L, N, R)
418 - Replaced Input Token, where the class of the token is
419 always replaced on output (AL, BN, NSM, CS, ES, ET)
420 - Conditional Input Token, where the class of the token is
421 changed on output in some, but not all, cases (EN)
423 Where tokens are subject to change, a double action
424 (e.g. NxA, or NxN) is _required_ after deferred states,
425 resolving both the deferred state and changing the current token.
428 /*------------------------------------------------------------------------
429 Function: resolveWeak
431 Resolves the directionality of numeric and other weak character types
433 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
435 Input: Array of embedding levels
438 In/Out: Array of directional classes
440 Note: On input only these directional classes are expected
441 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
442 ------------------------------------------------------------------------*/
443 static void resolveWeak(int baselevel
, WORD
*pcls
, WORD
*plevel
, int cch
)
445 int state
= odd(baselevel
) ? xr
: xl
;
448 int level
= baselevel
;
449 int action
, clsRun
, clsNew
;
453 for (; ich
< cch
; ich
++)
455 /* ignore boundary neutrals */
458 /* must flatten levels unless at a level change; */
461 /* lookahead for level changes */
462 if (ich
+ 1 == cch
&& level
!= baselevel
)
464 /* have to fixup last BN before end of the loop, since
465 * its fix-upped value will be needed below the assert */
466 pcls
[ich
] = EmbeddingDirection(level
);
468 else if (ich
+ 1 < cch
&& level
!= plevel
[ich
+1] && pcls
[ich
+1] != BN
)
470 /* fixup LAST BN in front / after a level run to make
471 * it act like the SOR/EOR in rule X10 */
472 int newlevel
= plevel
[ich
+1];
473 if (level
> newlevel
) {
476 plevel
[ich
] = newlevel
;
478 /* must match assigned level */
479 pcls
[ich
] = EmbeddingDirection(newlevel
);
480 level
= plevel
[ich
+1];
484 /* don't interrupt runs */
493 ASSERT(pcls
[ich
] <= BN
);
496 action
= actionWeak
[state
][cls
];
498 /* resolve the directionality for deferred runs */
499 clsRun
= GetDeferredType(action
);
502 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
506 /* resolve the directionality class at the current location */
507 clsNew
= GetResolvedType(action
);
511 /* increment a deferred run */
515 state
= stateWeak
[state
][cls
];
518 /* resolve any deferred runs
519 * use the direction of the current level to emulate PDF */
520 cls
= EmbeddingDirection(level
);
522 /* resolve the directionality for deferred runs */
523 clsRun
= GetDeferredType(actionWeak
[state
][cls
]);
525 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
528 /* RESOLVE NEUTRAL TYPES */
533 /* action to resolve previous input */
534 nL
= L
, /* resolve EN to L */
535 En
= 3 << 4, /* resolve neutrals run to embedding level direction */
536 Rn
= R
<< 4, /* resolve neutrals run to strong right */
537 Ln
= L
<< 4, /* resolved neutrals run to strong left */
538 In
= (1<<8), /* increment count of deferred neutrals */
539 LnL
= (1<<4)+L
, /* set run and EN to L */
542 static int GetDeferredNeutrals(int action
, int level
)
544 action
= (action
>> 4) & 0xF;
545 if (action
== (En
>> 4))
546 return EmbeddingDirection(level
);
551 static int GetResolvedNeutrals(int action
)
553 action
= action
& 0xF;
563 /* new temporary class */
564 r
, /* R and characters resolved to R */
565 l
, /* L and characters resolved to L */
566 rn
, /* N preceded by right */
567 ln
, /* N preceded by left */
568 a
, /* AN preceded by left (the abbreviation 'la' is used up above) */
569 na
, /* N preceded by a */
573 /*------------------------------------------------------------------------
576 By rule W7, whenever a EN is 'dominated' by an L (including start of
577 run with embedding direction = L) it is resolved to, and further treated
580 This leads to the need for 'a' and 'na' states.
581 ------------------------------------------------------------------------*/
583 static const int actionNeutrals
[][5] =
585 /* N, L, R, AN, EN = cls */
586 { In
, 0, 0, 0, 0 }, /* r right */
587 { In
, 0, 0, 0, L
}, /* l left */
589 { In
, En
, Rn
, Rn
, Rn
}, /* rn N preceded by right */
590 { In
, Ln
, En
, En
, LnL
}, /* ln N preceded by left */
592 { In
, 0, 0, 0, L
}, /* a AN preceded by left */
593 { In
, En
, Rn
, Rn
, En
}, /* na N preceded by a */
596 static const int stateNeutrals
[][5] =
598 /* N, L, R, AN, EN */
599 { rn
, l
, r
, r
, r
}, /* r right */
600 { ln
, l
, r
, a
, l
}, /* l left */
602 { rn
, l
, r
, r
, r
}, /* rn N preceded by right */
603 { ln
, l
, r
, a
, l
}, /* ln N preceded by left */
605 { na
, l
, r
, a
, l
}, /* a AN preceded by left */
606 { na
, l
, r
, a
, l
}, /* na N preceded by la */
609 /*------------------------------------------------------------------------
610 Function: resolveNeutrals
612 Resolves the directionality of neutral character types.
614 Implements rules W7, N1 and N2 of the Unicode Bidi Algorithm.
616 Input: Array of embedding levels
620 In/Out: Array of directional classes
622 Note: On input only these directional classes are expected
623 R, L, N, AN, EN and BN
625 W8 resolves a number of ENs to L
626 ------------------------------------------------------------------------*/
627 static void resolveNeutrals(int baselevel
, WORD
*pcls
, const WORD
*plevel
, int cch
)
629 /* the state at the start of text depends on the base level */
630 int state
= odd(baselevel
) ? r
: l
;
634 int level
= baselevel
;
636 int action
, clsRun
, clsNew
;
638 for (; ich
< cch
; ich
++)
640 /* ignore boundary neutrals */
643 /* include in the count for a deferred run */
647 /* skip any further processing */
651 ASSERT(pcls
[ich
] < 5); /* "Only N, L, R, AN, EN are allowed" */
654 action
= actionNeutrals
[state
][cls
];
656 /* resolve the directionality for deferred runs */
657 clsRun
= GetDeferredNeutrals(action
, level
);
660 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
664 /* resolve the directionality class at the current location */
665 clsNew
= GetResolvedNeutrals(action
);
672 state
= stateNeutrals
[state
][cls
];
676 /* resolve any deferred runs */
677 cls
= EmbeddingDirection(level
); /* eor has type of current level */
679 /* resolve the directionality for deferred runs */
680 clsRun
= GetDeferredNeutrals(actionNeutrals
[state
][cls
], level
);
682 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
685 /* RESOLVE IMPLICIT */
687 /*------------------------------------------------------------------------
688 Function: resolveImplicit
690 Recursively resolves implicit embedding levels.
691 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
693 Input: Array of direction classes
697 In/Out: Array of embedding levels
699 Note: levels may exceed 15 on output.
700 Accepted subset of direction classes
702 ------------------------------------------------------------------------*/
703 static const WORD addLevel
[][4] =
706 /* even */ { 0, 1, 2, 2, },
707 /* odd */ { 1, 0, 1, 1, }
711 static void resolveImplicit(const WORD
* pcls
, WORD
*plevel
, int cch
)
714 for (; ich
< cch
; ich
++)
716 /* cannot resolve bn here, since some bn were resolved to strong
717 * types in resolveWeak. To remove these we need the original
718 * types, which are available again in resolveWhiteSpace */
723 ASSERT(pcls
[ich
] > 0); /* "No Neutrals allowed to survive here." */
724 ASSERT(pcls
[ich
] < 5); /* "Out of range." */
725 plevel
[ich
] += addLevel
[odd(plevel
[ich
])][pcls
[ich
] - 1];
729 /*************************************************************
730 * BIDI_DeterminLevels
732 BOOL
BIDI_DetermineLevels(
733 LPCWSTR lpString
, /* [in] The string for which information is to be returned */
734 INT uCount
, /* [in] Number of WCHARs in string. */
735 const SCRIPT_STATE
*s
,
736 const SCRIPT_CONTROL
*c
,
737 WORD
*lpOutLevels
/* [out] final string levels */
741 unsigned baselevel
= 0;
743 TRACE("%s, %d\n", debugstr_wn(lpString
, uCount
), uCount
);
745 chartype
= HeapAlloc(GetProcessHeap(), 0, uCount
* sizeof(WORD
));
748 WARN("Out of memory\n");
752 baselevel
= s
->uBidiLevel
;
754 classify(lpString
, chartype
, uCount
, c
);
756 for (j
= 0; j
< uCount
; ++j
)
762 case ON
: chartype
[j
] = N
;
766 /* resolve explicit */
767 resolveExplicit(baselevel
, N
, chartype
, lpOutLevels
, uCount
, 0);
770 resolveWeak(baselevel
, chartype
, lpOutLevels
, uCount
);
772 /* resolve neutrals */
773 resolveNeutrals(baselevel
, chartype
, lpOutLevels
, uCount
);
775 /* resolveImplicit */
776 resolveImplicit(chartype
, lpOutLevels
, uCount
);
778 HeapFree(GetProcessHeap(), 0, chartype
);
782 /* reverse cch indexes */
783 static void reverse(int *pidx
, int cch
)
787 for (; ich
< --cch
; ich
++)
790 pidx
[ich
] = pidx
[cch
];
796 /*------------------------------------------------------------------------
797 Functions: reorder/reorderLevel
799 Recursively reorders the display string
800 "From the highest level down, reverse all characters at that level and
801 higher, down to the lowest odd level"
803 Implements rule L2 of the Unicode bidi Algorithm.
805 Input: Array of embedding levels
807 Flag enabling reversal (set to false by initial caller)
809 In/Out: Text to reorder
811 Note: levels may exceed 15 resp. 61 on input.
813 Rule L3 - reorder combining marks is not implemented here
814 Rule L4 - glyph mirroring is implemented as a display option below
816 Note: this should be applied a line at a time
817 -------------------------------------------------------------------------*/
818 int BIDI_ReorderV2lLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
822 /* true as soon as first odd level encountered */
823 fReverse
= fReverse
|| odd(level
);
825 for (; ich
< cch
; ich
++)
827 if (plevel
[ich
] < level
)
831 else if (plevel
[ich
] > level
)
833 ich
+= BIDI_ReorderV2lLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
834 cch
- ich
, fReverse
) - 1;
839 reverse(pIndexs
, ich
);
844 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
845 int BIDI_ReorderL2vLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
850 /* true as soon as first odd level encountered */
851 fReverse
= fReverse
|| odd(level
);
853 for (; ich
< cch
; ich
++)
855 if (plevel
[ich
] < level
)
857 else if (plevel
[ich
] > level
)
862 reverse(pIndexs
, ich
);
868 for (; ich
< cch
; ich
++)
869 if (plevel
[ich
] < level
)
871 else if (plevel
[ich
] > level
)
872 ich
+= BIDI_ReorderL2vLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
873 cch
- ich
, fReverse
) - 1;
879 BOOL
BIDI_GetStrengths(LPCWSTR lpString
, INT uCount
, const SCRIPT_CONTROL
*c
,
883 classify(lpString
, lpStrength
, uCount
, c
);
885 for ( i
= 0; i
< uCount
; i
++)
887 switch(lpStrength
[i
])
896 lpStrength
[i
] = BIDI_STRONG
;
905 lpStrength
[i
] = BIDI_WEAK
;
911 default: /* Neutrals and NSM */
912 lpStrength
[i
] = BIDI_NEUTRAL
;