[uxtheme]
[reactos.git] / reactos / dll / win32 / usp10 / bidi.c
1 /*
2 * Uniscribe BiDirectional handling
3 *
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 *
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
25 *
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
27 *
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
42 */
43
44 #include "config.h"
45
46 #include <stdarg.h>
47 #include "windef.h"
48 #include "winbase.h"
49 #include "wingdi.h"
50 #include "winnls.h"
51 #include "usp10.h"
52 #include "wine/unicode.h"
53 #include "wine/debug.h"
54
55 #include "usp10_internal.h"
56
57 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
58
59 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
60 #define MAX_LEVEL 61
61
62 /* HELPER FUNCTIONS AND DECLARATIONS */
63
64 /*------------------------------------------------------------------------
65 Bidirectional Character Types
66
67 as defined by the Unicode Bidirectional Algorithm Table 3-7.
68
69 Note:
70
71 The list of bidirectional character types here is not grouped the
72 same way as the table 3-7, since the numberic values for the types
73 are chosen to keep the state and action tables compact.
74 ------------------------------------------------------------------------*/
75 enum directions
76 {
77 /* input types */
78 /* ON MUST be zero, code relies on ON = N = 0 */
79 ON = 0, /* Other Neutral */
80 L, /* Left Letter */
81 R, /* Right Letter */
82 AN, /* Arabic Number */
83 EN, /* European Number */
84 AL, /* Arabic Letter (Right-to-left) */
85 NSM, /* Non-spacing Mark */
86 CS, /* Common Separator */
87 ES, /* European Separator */
88 ET, /* European Terminator (post/prefix e.g. $ and %) */
89
90 /* resolved types */
91 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
92
93 /* input types, */
94 S, /* Segment Separator (TAB) // used only in L1 */
95 WS, /* White space // used only in L1 */
96 B, /* Paragraph Separator (aka as PS) */
97
98 /* types for explicit controls */
99 RLO, /* these are used only in X1-X9 */
100 RLE,
101 LRO,
102 LRE,
103 PDF,
104
105 /* resolved types, also resolved directions */
106 N = ON, /* alias, where ON, WS and S are treated the same */
107 };
108
109 /* HELPER FUNCTIONS */
110 /* the character type contains the C1_* flags in the low 12 bits */
111 /* and the C2_* type in the high 4 bits */
112 static __inline unsigned short get_char_typeW( WCHAR ch )
113 {
114 WORD CharType;
115 GetStringTypeW(CT_CTYPE1, &ch, 1, &CharType);
116 return CharType;
117 }
118
119 /* Convert the libwine information to the direction enum */
120 static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIPT_CONTROL *c)
121 {
122 static const enum directions dir_map[16] =
123 {
124 L, /* unassigned defaults to L */
125 L,
126 R,
127 EN,
128 ES,
129 ET,
130 AN,
131 CS,
132 B,
133 S,
134 WS,
135 ON,
136 AL,
137 NSM,
138 BN,
139 PDF /* also LRE, LRO, RLE, RLO */
140 };
141
142 unsigned i;
143
144 for (i = 0; i < uCount; ++i)
145 {
146 chartype[i] = dir_map[get_char_typeW(lpString[i]) >> 12];
147 switch (chartype[i])
148 {
149 case ES:
150 if (!c->fLegacyBidiClass) break;
151 switch (lpString[i])
152 {
153 case '-':
154 case '+': chartype[i] = N; break;
155 case '/': chartype[i] = CS; break;
156 }
157 break;
158 case PDF:
159 switch (lpString[i])
160 {
161 case 0x202A: chartype[i] = LRE; break;
162 case 0x202B: chartype[i] = RLE; break;
163 case 0x202C: chartype[i] = PDF; break;
164 case 0x202D: chartype[i] = LRO; break;
165 case 0x202E: chartype[i] = RLO; break;
166 }
167 break;
168 }
169 }
170 }
171
172 /* Set a run of cval values at locations all prior to, but not including */
173 /* iStart, to the new value nval. */
174 static void SetDeferredRun(WORD *pval, int cval, int iStart, int nval)
175 {
176 int i = iStart - 1;
177 for (; i >= iStart - cval; i--)
178 {
179 pval[i] = nval;
180 }
181 }
182
183 /* RESOLVE EXPLICIT */
184
185 static WORD GreaterEven(int i)
186 {
187 return odd(i) ? i + 1 : i + 2;
188 }
189
190 static WORD GreaterOdd(int i)
191 {
192 return odd(i) ? i + 2 : i + 1;
193 }
194
195 static WORD EmbeddingDirection(int level)
196 {
197 return odd(level) ? R : L;
198 }
199
200 /*------------------------------------------------------------------------
201 Function: resolveExplicit
202
203 Recursively resolves explicit embedding levels and overrides.
204 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
205
206 Input: Base embedding level and direction
207 Character count
208
209 Output: Array of embedding levels
210
211 In/Out: Array of direction classes
212
213
214 Note: The function uses two simple counters to keep track of
215 matching explicit codes and PDF. Use the default argument for
216 the outermost call. The nesting counter counts the recursion
217 depth and not the embedding level.
218 ------------------------------------------------------------------------*/
219
220 static int resolveExplicit(int level, int dir, WORD *pcls, WORD *plevel, int cch, int nNest)
221 {
222 /* always called with a valid nesting level
223 nesting levels are != embedding levels */
224 int nLastValid = nNest;
225 int ich = 0;
226
227 /* check input values */
228 ASSERT(nNest >= 0 && level >= 0 && level <= MAX_LEVEL);
229
230 /* process the text */
231 for (; ich < cch; ich++)
232 {
233 WORD cls = pcls[ich];
234 switch (cls)
235 {
236 case LRO:
237 case LRE:
238 nNest++;
239 if (GreaterEven(level) <= MAX_LEVEL - (cls == LRO ? 2 : 0))
240 {
241 plevel[ich] = GreaterEven(level);
242 pcls[ich] = BN;
243 ich += resolveExplicit(plevel[ich], (cls == LRE ? N : L),
244 &pcls[ich+1], &plevel[ich+1],
245 cch - (ich+1), nNest);
246 nNest--;
247 continue;
248 }
249 cls = pcls[ich] = BN;
250 break;
251
252 case RLO:
253 case RLE:
254 nNest++;
255 if (GreaterOdd(level) <= MAX_LEVEL - (cls == RLO ? 2 : 0))
256 {
257 plevel[ich] = GreaterOdd(level);
258 pcls[ich] = BN;
259 ich += resolveExplicit(plevel[ich], (cls == RLE ? N : R),
260 &pcls[ich+1], &plevel[ich+1],
261 cch - (ich+1), nNest);
262 nNest--;
263 continue;
264 }
265 cls = pcls[ich] = BN;
266 break;
267
268 case PDF:
269 cls = pcls[ich] = BN;
270 if (nNest)
271 {
272 if (nLastValid < nNest)
273 {
274 nNest--;
275 }
276 else
277 {
278 cch = ich; /* break the loop, but complete body */
279 }
280 }
281 }
282
283 /* Apply the override */
284 if (dir != N)
285 {
286 cls = dir;
287 }
288 plevel[ich] = level;
289 if (pcls[ich] != BN)
290 pcls[ich] = cls;
291 }
292
293 return ich;
294 }
295
296 /* RESOLVE WEAK TYPES */
297
298 enum states /* possible states */
299 {
300 xa, /* arabic letter */
301 xr, /* right letter */
302 xl, /* left letter */
303
304 ao, /* arabic lett. foll by ON */
305 ro, /* right lett. foll by ON */
306 lo, /* left lett. foll by ON */
307
308 rt, /* ET following R */
309 lt, /* ET following L */
310
311 cn, /* EN, AN following AL */
312 ra, /* arabic number foll R */
313 re, /* european number foll R */
314 la, /* arabic number foll L */
315 le, /* european number foll L */
316
317 ac, /* CS following cn */
318 rc, /* CS following ra */
319 rs, /* CS,ES following re */
320 lc, /* CS following la */
321 ls, /* CS,ES following le */
322
323 ret, /* ET following re */
324 let, /* ET following le */
325 } ;
326
327 static const int stateWeak[][10] =
328 {
329 /* N, L, R, AN, EN, AL,NSM, CS, ES, ET */
330 /*xa*/ { ao, xl, xr, cn, cn, xa, xa, ao, ao, ao }, /* arabic letter */
331 /*xr*/ { ro, xl, xr, ra, re, xa, xr, ro, ro, rt }, /* right letter */
332 /*xl*/ { lo, xl, xr, la, le, xa, xl, lo, lo, lt }, /* left letter */
333
334 /*ao*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* arabic lett. foll by ON*/
335 /*ro*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* right lett. foll by ON */
336 /*lo*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* left lett. foll by ON */
337
338 /*rt*/ { ro, xl, xr, ra, re, xa, rt, ro, ro, rt }, /* ET following R */
339 /*lt*/ { lo, xl, xr, la, le, xa, lt, lo, lo, lt }, /* ET following L */
340
341 /*cn*/ { ao, xl, xr, cn, cn, xa, cn, ac, ao, ao }, /* EN, AN following AL */
342 /*ra*/ { ro, xl, xr, ra, re, xa, ra, rc, ro, rt }, /* arabic number foll R */
343 /*re*/ { ro, xl, xr, ra, re, xa, re, rs, rs,ret }, /* european number foll R */
344 /*la*/ { lo, xl, xr, la, le, xa, la, lc, lo, lt }, /* arabic number foll L */
345 /*le*/ { lo, xl, xr, la, le, xa, le, ls, ls,let }, /* european number foll L */
346
347 /*ac*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* CS following cn */
348 /*rc*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS following ra */
349 /*rs*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS,ES following re */
350 /*lc*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS following la */
351 /*ls*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS,ES following le */
352
353 /*ret*/{ ro, xl, xr, ra, re, xa,ret, ro, ro,ret }, /* ET following re */
354 /*let*/{ lo, xl, xr, la, le, xa,let, lo, lo,let }, /* ET following le */
355 };
356
357 enum actions /* possible actions */
358 {
359 /* primitives */
360 IX = 0x100, /* increment */
361 XX = 0xF, /* no-op */
362
363 /* actions */
364 xxx = (XX << 4) + XX, /* no-op */
365 xIx = IX + xxx, /* increment run */
366 xxN = (XX << 4) + ON, /* set current to N */
367 xxE = (XX << 4) + EN, /* set current to EN */
368 xxA = (XX << 4) + AN, /* set current to AN */
369 xxR = (XX << 4) + R, /* set current to R */
370 xxL = (XX << 4) + L, /* set current to L */
371 Nxx = (ON << 4) + 0xF, /* set run to neutral */
372 Axx = (AN << 4) + 0xF, /* set run to AN */
373 ExE = (EN << 4) + EN, /* set run to EN, set current to EN */
374 NIx = (ON << 4) + 0xF + IX, /* set run to N, increment */
375 NxN = (ON << 4) + ON, /* set run to N, set current to N */
376 NxR = (ON << 4) + R, /* set run to N, set current to R */
377 NxE = (ON << 4) + EN, /* set run to N, set current to EN */
378
379 AxA = (AN << 4) + AN, /* set run to AN, set current to AN */
380 NxL = (ON << 4) + L, /* set run to N, set current to L */
381 LxL = (L << 4) + L, /* set run to L, set current to L */
382 } ;
383
384 static const int actionWeak[][10] =
385 {
386 /* N, L, R, AN, EN, AL, NSM, CS, ES, ET */
387 /*xa*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxR, xxN, xxN, xxN }, /* arabic letter */
388 /*xr*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxR, xxN, xxN, xIx }, /* right letter */
389 /*xl*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xIx }, /* left letter */
390
391 /*ao*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxN, xxN, xxN, xxN }, /* arabic lett. foll by ON */
392 /*ro*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxN, xxN, xxN, xIx }, /* right lett. foll by ON */
393 /*lo*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxN, xxN, xxN, xIx }, /* left lett. foll by ON */
394
395 /*rt*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, xIx, NxN, NxN, xIx }, /* ET following R */
396 /*lt*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, xIx, NxN, NxN, xIx }, /* ET following L */
397
398 /*cn*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxA, xIx, xxN, xxN }, /* EN, AN following AL */
399 /*ra*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxA, xIx, xxN, xIx }, /* arabic number foll R */
400 /*re*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxE, xIx, xIx, xxE }, /* european number foll R */
401 /*la*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxA, xIx, xxN, xIx }, /* arabic number foll L */
402 /*le*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xIx, xIx, xxL }, /* european number foll L */
403
404 /*ac*/ { Nxx, Nxx, Nxx, Axx, AxA, NxR, NxN, NxN, NxN, NxN }, /* CS following cn */
405 /*rc*/ { Nxx, Nxx, Nxx, Axx, NxE, NxR, NxN, NxN, NxN, NIx }, /* CS following ra */
406 /*rs*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following re */
407 /*lc*/ { Nxx, Nxx, Nxx, Axx, NxL, NxR, NxN, NxN, NxN, NIx }, /* CS following la */
408 /*ls*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following le */
409
410 /*ret*/{ xxx, xxx, xxx, xxx, xxE, xxR, xxE, xxN, xxN, xxE }, /* ET following re */
411 /*let*/{ xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xxL }, /* ET following le */
412 };
413
414 static int GetDeferredType(int action)
415 {
416 return (action >> 4) & 0xF;
417 }
418
419 static int GetResolvedType(int action)
420 {
421 return action & 0xF;
422 }
423
424 /* Note on action table:
425
426 States can be of two kinds:
427 - Immediate Resolution State, where each input token
428 is resolved as soon as it is seen. These states have
429 only single action codes (xxN) or the no-op (xxx)
430 for static input tokens.
431 - Deferred Resolution State, where input tokens either
432 either extend the run (xIx) or resolve its Type (e.g. Nxx).
433
434 Input classes are of three kinds
435 - Static Input Token, where the class of the token remains
436 unchanged on output (AN, L, N, R)
437 - Replaced Input Token, where the class of the token is
438 always replaced on output (AL, BN, NSM, CS, ES, ET)
439 - Conditional Input Token, where the class of the token is
440 changed on output in some, but not all, cases (EN)
441
442 Where tokens are subject to change, a double action
443 (e.g. NxA, or NxN) is _required_ after deferred states,
444 resolving both the deferred state and changing the current token.
445 */
446
447 /*------------------------------------------------------------------------
448 Function: resolveWeak
449
450 Resolves the directionality of numeric and other weak character types
451
452 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
453
454 Input: Array of embedding levels
455 Character count
456
457 In/Out: Array of directional classes
458
459 Note: On input only these directional classes are expected
460 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
461 ------------------------------------------------------------------------*/
462 static void resolveWeak(int baselevel, WORD *pcls, WORD *plevel, int cch)
463 {
464 int state = odd(baselevel) ? xr : xl;
465 int cls;
466
467 int level = baselevel;
468 int action, clsRun, clsNew;
469 int cchRun = 0;
470 int ich = 0;
471
472 for (; ich < cch; ich++)
473 {
474 /* ignore boundary neutrals */
475 if (pcls[ich] == BN)
476 {
477 /* must flatten levels unless at a level change; */
478 plevel[ich] = level;
479
480 /* lookahead for level changes */
481 if (ich + 1 == cch && level != baselevel)
482 {
483 /* have to fixup last BN before end of the loop, since
484 * its fix-upped value will be needed below the assert */
485 pcls[ich] = EmbeddingDirection(level);
486 }
487 else if (ich + 1 < cch && level != plevel[ich+1] && pcls[ich+1] != BN)
488 {
489 /* fixup LAST BN in front / after a level run to make
490 * it act like the SOR/EOR in rule X10 */
491 int newlevel = plevel[ich+1];
492 if (level > newlevel) {
493 newlevel = level;
494 }
495 plevel[ich] = newlevel;
496
497 /* must match assigned level */
498 pcls[ich] = EmbeddingDirection(newlevel);
499 level = plevel[ich+1];
500 }
501 else
502 {
503 /* don't interrupt runs */
504 if (cchRun)
505 {
506 cchRun++;
507 }
508 continue;
509 }
510 }
511
512 ASSERT(pcls[ich] <= BN);
513 cls = pcls[ich];
514
515 action = actionWeak[state][cls];
516
517 /* resolve the directionality for deferred runs */
518 clsRun = GetDeferredType(action);
519 if (clsRun != XX)
520 {
521 SetDeferredRun(pcls, cchRun, ich, clsRun);
522 cchRun = 0;
523 }
524
525 /* resolve the directionality class at the current location */
526 clsNew = GetResolvedType(action);
527 if (clsNew != XX)
528 pcls[ich] = clsNew;
529
530 /* increment a deferred run */
531 if (IX & action)
532 cchRun++;
533
534 state = stateWeak[state][cls];
535 }
536
537 /* resolve any deferred runs
538 * use the direction of the current level to emulate PDF */
539 cls = EmbeddingDirection(level);
540
541 /* resolve the directionality for deferred runs */
542 clsRun = GetDeferredType(actionWeak[state][cls]);
543 if (clsRun != XX)
544 SetDeferredRun(pcls, cchRun, ich, clsRun);
545 }
546
547 /* RESOLVE NEUTRAL TYPES */
548
549 /* action values */
550 enum neutralactions
551 {
552 /* action to resolve previous input */
553 nL = L, /* resolve EN to L */
554 En = 3 << 4, /* resolve neutrals run to embedding level direction */
555 Rn = R << 4, /* resolve neutrals run to strong right */
556 Ln = L << 4, /* resolved neutrals run to strong left */
557 In = (1<<8), /* increment count of deferred neutrals */
558 LnL = (1<<4)+L, /* set run and EN to L */
559 };
560
561 static int GetDeferredNeutrals(int action, int level)
562 {
563 action = (action >> 4) & 0xF;
564 if (action == (En >> 4))
565 return EmbeddingDirection(level);
566 else
567 return action;
568 }
569
570 static int GetResolvedNeutrals(int action)
571 {
572 action = action & 0xF;
573 if (action == In)
574 return 0;
575 else
576 return action;
577 }
578
579 /* state values */
580 enum resolvestates
581 {
582 /* new temporary class */
583 r, /* R and characters resolved to R */
584 l, /* L and characters resolved to L */
585 rn, /* N preceded by right */
586 ln, /* N preceded by left */
587 a, /* AN preceded by left (the abbreviation 'la' is used up above) */
588 na, /* N preceded by a */
589 } ;
590
591
592 /*------------------------------------------------------------------------
593 Notes:
594
595 By rule W7, whenever a EN is 'dominated' by an L (including start of
596 run with embedding direction = L) it is resolved to, and further treated
597 as L.
598
599 This leads to the need for 'a' and 'na' states.
600 ------------------------------------------------------------------------*/
601
602 static const int actionNeutrals[][5] =
603 {
604 /* N, L, R, AN, EN = cls */
605 { In, 0, 0, 0, 0 }, /* r right */
606 { In, 0, 0, 0, L }, /* l left */
607
608 { In, En, Rn, Rn, Rn }, /* rn N preceded by right */
609 { In, Ln, En, En, LnL}, /* ln N preceded by left */
610
611 { In, 0, 0, 0, L }, /* a AN preceded by left */
612 { In, En, Rn, Rn, En }, /* na N preceded by a */
613 } ;
614
615 static const int stateNeutrals[][5] =
616 {
617 /* N, L, R, AN, EN */
618 { rn, l, r, r, r }, /* r right */
619 { ln, l, r, a, l }, /* l left */
620
621 { rn, l, r, r, r }, /* rn N preceded by right */
622 { ln, l, r, a, l }, /* ln N preceded by left */
623
624 { na, l, r, a, l }, /* a AN preceded by left */
625 { na, l, r, a, l }, /* na N preceded by la */
626 } ;
627
628 /*------------------------------------------------------------------------
629 Function: resolveNeutrals
630
631 Resolves the directionality of neutral character types.
632
633 Implements rules W7, N1 and N2 of the Unicode Bidi Algorithm.
634
635 Input: Array of embedding levels
636 Character count
637 Baselevel
638
639 In/Out: Array of directional classes
640
641 Note: On input only these directional classes are expected
642 R, L, N, AN, EN and BN
643
644 W8 resolves a number of ENs to L
645 ------------------------------------------------------------------------*/
646 static void resolveNeutrals(int baselevel, WORD *pcls, const WORD *plevel, int cch)
647 {
648 /* the state at the start of text depends on the base level */
649 int state = odd(baselevel) ? r : l;
650 int cls;
651
652 int cchRun = 0;
653 int level = baselevel;
654
655 int action, clsRun, clsNew;
656 int ich = 0;
657 for (; ich < cch; ich++)
658 {
659 /* ignore boundary neutrals */
660 if (pcls[ich] == BN)
661 {
662 /* include in the count for a deferred run */
663 if (cchRun)
664 cchRun++;
665
666 /* skip any further processing */
667 continue;
668 }
669
670 ASSERT(pcls[ich] < 5); /* "Only N, L, R, AN, EN are allowed" */
671 cls = pcls[ich];
672
673 action = actionNeutrals[state][cls];
674
675 /* resolve the directionality for deferred runs */
676 clsRun = GetDeferredNeutrals(action, level);
677 if (clsRun != N)
678 {
679 SetDeferredRun(pcls, cchRun, ich, clsRun);
680 cchRun = 0;
681 }
682
683 /* resolve the directionality class at the current location */
684 clsNew = GetResolvedNeutrals(action);
685 if (clsNew != N)
686 pcls[ich] = clsNew;
687
688 if (In & action)
689 cchRun++;
690
691 state = stateNeutrals[state][cls];
692 level = plevel[ich];
693 }
694
695 /* resolve any deferred runs */
696 cls = EmbeddingDirection(level); /* eor has type of current level */
697
698 /* resolve the directionality for deferred runs */
699 clsRun = GetDeferredNeutrals(actionNeutrals[state][cls], level);
700 if (clsRun != N)
701 SetDeferredRun(pcls, cchRun, ich, clsRun);
702 }
703
704 /* RESOLVE IMPLICIT */
705
706 /*------------------------------------------------------------------------
707 Function: resolveImplicit
708
709 Recursively resolves implicit embedding levels.
710 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
711
712 Input: Array of direction classes
713 Character count
714 Base level
715
716 In/Out: Array of embedding levels
717
718 Note: levels may exceed 15 on output.
719 Accepted subset of direction classes
720 R, L, AN, EN
721 ------------------------------------------------------------------------*/
722 static const WORD addLevel[][4] =
723 {
724 /* L, R, AN, EN */
725 /* even */ { 0, 1, 2, 2, },
726 /* odd */ { 1, 0, 1, 1, }
727
728 };
729
730 static void resolveImplicit(const WORD * pcls, WORD *plevel, int cch)
731 {
732 int ich = 0;
733 for (; ich < cch; ich++)
734 {
735 /* cannot resolve bn here, since some bn were resolved to strong
736 * types in resolveWeak. To remove these we need the original
737 * types, which are available again in resolveWhiteSpace */
738 if (pcls[ich] == BN)
739 {
740 continue;
741 }
742 ASSERT(pcls[ich] > 0); /* "No Neutrals allowed to survive here." */
743 ASSERT(pcls[ich] < 5); /* "Out of range." */
744 plevel[ich] += addLevel[odd(plevel[ich])][pcls[ich] - 1];
745 }
746 }
747
748 /*************************************************************
749 * BIDI_DeterminLevels
750 */
751 BOOL BIDI_DetermineLevels(
752 LPCWSTR lpString, /* [in] The string for which information is to be returned */
753 INT uCount, /* [in] Number of WCHARs in string. */
754 const SCRIPT_STATE *s,
755 const SCRIPT_CONTROL *c,
756 WORD *lpOutLevels /* [out] final string levels */
757 )
758 {
759 WORD *chartype;
760 unsigned baselevel = 0,j;
761 TRACE("%s, %d", debugstr_wn(lpString, uCount), uCount);
762
763 chartype = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(WORD));
764 if (!chartype)
765 {
766 WARN("Out of memory\n");
767 return FALSE;
768 }
769
770 baselevel = s->uBidiLevel;
771
772 classify(lpString, chartype, uCount, c);
773
774 for (j = 0; j < uCount; ++j)
775 switch(chartype[j])
776 {
777 case B:
778 case S:
779 case WS:
780 case ON: chartype[j] = N;
781 default: continue;
782 }
783
784 /* resolve explicit */
785 resolveExplicit(baselevel, N, chartype, lpOutLevels, uCount, 0);
786
787 /* resolve weak */
788 resolveWeak(baselevel, chartype, lpOutLevels, uCount);
789
790 /* resolve neutrals */
791 resolveNeutrals(baselevel, chartype, lpOutLevels, uCount);
792
793 /* resolveImplicit */
794 resolveImplicit(chartype, lpOutLevels, uCount);
795
796 HeapFree(GetProcessHeap(), 0, chartype);
797 return TRUE;
798 }
799
800 /* reverse cch indexes */
801 static void reverse(int *pidx, int cch)
802 {
803 int temp;
804 int ich = 0;
805 for (; ich < --cch; ich++)
806 {
807 temp = pidx[ich];
808 pidx[ich] = pidx[cch];
809 pidx[cch] = temp;
810 }
811 }
812
813
814 /*------------------------------------------------------------------------
815 Functions: reorder/reorderLevel
816
817 Recursively reorders the display string
818 "From the highest level down, reverse all characters at that level and
819 higher, down to the lowest odd level"
820
821 Implements rule L2 of the Unicode bidi Algorithm.
822
823 Input: Array of embedding levels
824 Character count
825 Flag enabling reversal (set to false by initial caller)
826
827 In/Out: Text to reorder
828
829 Note: levels may exceed 15 resp. 61 on input.
830
831 Rule L3 - reorder combining marks is not implemented here
832 Rule L4 - glyph mirroring is implemented as a display option below
833
834 Note: this should be applied a line at a time
835 -------------------------------------------------------------------------*/
836 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
837 {
838 int ich = 0;
839
840 /* true as soon as first odd level encountered */
841 fReverse = fReverse || odd(level);
842
843 for (; ich < cch; ich++)
844 {
845 if (plevel[ich] < level)
846 {
847 break;
848 }
849 else if (plevel[ich] > level)
850 {
851 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
852 cch - ich, fReverse) - 1;
853 }
854 }
855 if (fReverse)
856 {
857 reverse(pIndexs, ich);
858 }
859 return ich;
860 }
861
862 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
863 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
864 {
865 int ich = 0;
866 int newlevel = -1;
867
868 /* true as soon as first odd level encountered */
869 fReverse = fReverse || odd(level);
870
871 for (; ich < cch; ich++)
872 {
873 if (plevel[ich] < level)
874 break;
875 else if (plevel[ich] > level)
876 newlevel = ich;
877 }
878 if (fReverse)
879 {
880 reverse(pIndexs, ich);
881 }
882
883 if (newlevel > 1)
884 {
885 ich = 0;
886 for (; ich < cch; ich++)
887 if (plevel[ich] > level)
888 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
889 cch - ich, fReverse) - 1;
890 }
891
892 return ich;
893 }