[SHELL/EXPERIMENTS]
[reactos.git] / dll / win32 / usp10 / bidi.c
1 /*
2 * Uniscribe BiDirectional handling
3 *
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 *
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
25 *
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
27 *
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
42 */
43
44 #include "usp10_internal.h"
45
46 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
47
48 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
49 #define MAX_LEVEL 61
50
51 /* HELPER FUNCTIONS AND DECLARATIONS */
52
53 /*------------------------------------------------------------------------
54 Bidirectional Character Types
55
56 as defined by the Unicode Bidirectional Algorithm Table 3-7.
57
58 Note:
59
60 The list of bidirectional character types here is not grouped the
61 same way as the table 3-7, since the numberic values for the types
62 are chosen to keep the state and action tables compact.
63 ------------------------------------------------------------------------*/
64 enum directions
65 {
66 /* input types */
67 /* ON MUST be zero, code relies on ON = N = 0 */
68 ON = 0, /* Other Neutral */
69 L, /* Left Letter */
70 R, /* Right Letter */
71 AN, /* Arabic Number */
72 EN, /* European Number */
73 AL, /* Arabic Letter (Right-to-left) */
74 NSM, /* Non-spacing Mark */
75 CS, /* Common Separator */
76 ES, /* European Separator */
77 ET, /* European Terminator (post/prefix e.g. $ and %) */
78
79 /* resolved types */
80 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
81
82 /* input types, */
83 S, /* Segment Separator (TAB) // used only in L1 */
84 WS, /* White space // used only in L1 */
85 B, /* Paragraph Separator (aka as PS) */
86
87 /* types for explicit controls */
88 RLO, /* these are used only in X1-X9 */
89 RLE,
90 LRO,
91 LRE,
92 PDF,
93
94 /* resolved types, also resolved directions */
95 N = ON, /* alias, where ON, WS and S are treated the same */
96 };
97
98 /* HELPER FUNCTIONS */
99
100 /* Convert the libwine information to the direction enum */
101 static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIPT_CONTROL *c)
102 {
103 static const enum directions dir_map[16] =
104 {
105 L, /* unassigned defaults to L */
106 L,
107 R,
108 EN,
109 ES,
110 ET,
111 AN,
112 CS,
113 B,
114 S,
115 WS,
116 ON,
117 AL,
118 NSM,
119 BN,
120 PDF /* also LRE, LRO, RLE, RLO */
121 };
122
123 unsigned i;
124
125 for (i = 0; i < uCount; ++i)
126 {
127 chartype[i] = dir_map[get_char_typeW(lpString[i]) >> 12];
128 switch (chartype[i])
129 {
130 case ES:
131 if (!c->fLegacyBidiClass) break;
132 switch (lpString[i])
133 {
134 case '-':
135 case '+': chartype[i] = N; break;
136 case '/': chartype[i] = CS; break;
137 }
138 break;
139 case PDF:
140 switch (lpString[i])
141 {
142 case 0x202A: chartype[i] = LRE; break;
143 case 0x202B: chartype[i] = RLE; break;
144 case 0x202C: chartype[i] = PDF; break;
145 case 0x202D: chartype[i] = LRO; break;
146 case 0x202E: chartype[i] = RLO; break;
147 }
148 break;
149 }
150 }
151 }
152
153 /* Set a run of cval values at locations all prior to, but not including */
154 /* iStart, to the new value nval. */
155 static void SetDeferredRun(WORD *pval, int cval, int iStart, int nval)
156 {
157 int i = iStart - 1;
158 for (; i >= iStart - cval; i--)
159 {
160 pval[i] = nval;
161 }
162 }
163
164 /* RESOLVE EXPLICIT */
165
166 static WORD GreaterEven(int i)
167 {
168 return odd(i) ? i + 1 : i + 2;
169 }
170
171 static WORD GreaterOdd(int i)
172 {
173 return odd(i) ? i + 2 : i + 1;
174 }
175
176 static WORD EmbeddingDirection(int level)
177 {
178 return odd(level) ? R : L;
179 }
180
181 /*------------------------------------------------------------------------
182 Function: resolveExplicit
183
184 Recursively resolves explicit embedding levels and overrides.
185 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
186
187 Input: Base embedding level and direction
188 Character count
189
190 Output: Array of embedding levels
191
192 In/Out: Array of direction classes
193
194
195 Note: The function uses two simple counters to keep track of
196 matching explicit codes and PDF. Use the default argument for
197 the outermost call. The nesting counter counts the recursion
198 depth and not the embedding level.
199 ------------------------------------------------------------------------*/
200
201 static int resolveExplicit(int level, int dir, WORD *pcls, WORD *plevel, int cch, int nNest)
202 {
203 /* always called with a valid nesting level
204 nesting levels are != embedding levels */
205 int nLastValid = nNest;
206 int ich = 0;
207
208 /* check input values */
209 ASSERT(nNest >= 0 && level >= 0 && level <= MAX_LEVEL);
210
211 /* process the text */
212 for (; ich < cch; ich++)
213 {
214 WORD cls = pcls[ich];
215 switch (cls)
216 {
217 case LRO:
218 case LRE:
219 nNest++;
220 if (GreaterEven(level) <= MAX_LEVEL - (cls == LRO ? 2 : 0))
221 {
222 plevel[ich] = GreaterEven(level);
223 pcls[ich] = BN;
224 ich += resolveExplicit(plevel[ich], (cls == LRE ? N : L),
225 &pcls[ich+1], &plevel[ich+1],
226 cch - (ich+1), nNest);
227 nNest--;
228 continue;
229 }
230 cls = pcls[ich] = BN;
231 break;
232
233 case RLO:
234 case RLE:
235 nNest++;
236 if (GreaterOdd(level) <= MAX_LEVEL - (cls == RLO ? 2 : 0))
237 {
238 plevel[ich] = GreaterOdd(level);
239 pcls[ich] = BN;
240 ich += resolveExplicit(plevel[ich], (cls == RLE ? N : R),
241 &pcls[ich+1], &plevel[ich+1],
242 cch - (ich+1), nNest);
243 nNest--;
244 continue;
245 }
246 cls = pcls[ich] = BN;
247 break;
248
249 case PDF:
250 cls = pcls[ich] = BN;
251 if (nNest)
252 {
253 if (nLastValid < nNest)
254 {
255 nNest--;
256 }
257 else
258 {
259 cch = ich; /* break the loop, but complete body */
260 }
261 }
262 }
263
264 /* Apply the override */
265 if (dir != N)
266 {
267 cls = dir;
268 }
269 plevel[ich] = level;
270 if (pcls[ich] != BN)
271 pcls[ich] = cls;
272 }
273
274 return ich;
275 }
276
277 /* RESOLVE WEAK TYPES */
278
279 enum states /* possible states */
280 {
281 xa, /* Arabic letter */
282 xr, /* right letter */
283 xl, /* left letter */
284
285 ao, /* Arabic lett. foll by ON */
286 ro, /* right lett. foll by ON */
287 lo, /* left lett. foll by ON */
288
289 rt, /* ET following R */
290 lt, /* ET following L */
291
292 cn, /* EN, AN following AL */
293 ra, /* Arabic number foll R */
294 re, /* European number foll R */
295 la, /* Arabic number foll L */
296 le, /* European number foll L */
297
298 ac, /* CS following cn */
299 rc, /* CS following ra */
300 rs, /* CS,ES following re */
301 lc, /* CS following la */
302 ls, /* CS,ES following le */
303
304 ret, /* ET following re */
305 let, /* ET following le */
306 } ;
307
308 static const int stateWeak[][10] =
309 {
310 /* N, L, R, AN, EN, AL,NSM, CS, ES, ET */
311 /*xa*/ { ao, xl, xr, cn, cn, xa, xa, ao, ao, ao }, /* Arabic letter */
312 /*xr*/ { ro, xl, xr, ra, re, xa, xr, ro, ro, rt }, /* right letter */
313 /*xl*/ { lo, xl, xr, la, le, xa, xl, lo, lo, lt }, /* left letter */
314
315 /*ao*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* Arabic lett. foll by ON*/
316 /*ro*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* right lett. foll by ON */
317 /*lo*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* left lett. foll by ON */
318
319 /*rt*/ { ro, xl, xr, ra, re, xa, rt, ro, ro, rt }, /* ET following R */
320 /*lt*/ { lo, xl, xr, la, le, xa, lt, lo, lo, lt }, /* ET following L */
321
322 /*cn*/ { ao, xl, xr, cn, cn, xa, cn, ac, ao, ao }, /* EN, AN following AL */
323 /*ra*/ { ro, xl, xr, ra, re, xa, ra, rc, ro, rt }, /* Arabic number foll R */
324 /*re*/ { ro, xl, xr, ra, re, xa, re, rs, rs,ret }, /* European number foll R */
325 /*la*/ { lo, xl, xr, la, le, xa, la, lc, lo, lt }, /* Arabic number foll L */
326 /*le*/ { lo, xl, xr, la, le, xa, le, ls, ls,let }, /* European number foll L */
327
328 /*ac*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* CS following cn */
329 /*rc*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS following ra */
330 /*rs*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS,ES following re */
331 /*lc*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS following la */
332 /*ls*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS,ES following le */
333
334 /*ret*/{ ro, xl, xr, ra, re, xa,ret, ro, ro,ret }, /* ET following re */
335 /*let*/{ lo, xl, xr, la, le, xa,let, lo, lo,let }, /* ET following le */
336 };
337
338 enum actions /* possible actions */
339 {
340 /* primitives */
341 IX = 0x100, /* increment */
342 XX = 0xF, /* no-op */
343
344 /* actions */
345 xxx = (XX << 4) + XX, /* no-op */
346 xIx = IX + xxx, /* increment run */
347 xxN = (XX << 4) + ON, /* set current to N */
348 xxE = (XX << 4) + EN, /* set current to EN */
349 xxA = (XX << 4) + AN, /* set current to AN */
350 xxR = (XX << 4) + R, /* set current to R */
351 xxL = (XX << 4) + L, /* set current to L */
352 Nxx = (ON << 4) + 0xF, /* set run to neutral */
353 Axx = (AN << 4) + 0xF, /* set run to AN */
354 ExE = (EN << 4) + EN, /* set run to EN, set current to EN */
355 NIx = (ON << 4) + 0xF + IX, /* set run to N, increment */
356 NxN = (ON << 4) + ON, /* set run to N, set current to N */
357 NxR = (ON << 4) + R, /* set run to N, set current to R */
358 NxE = (ON << 4) + EN, /* set run to N, set current to EN */
359
360 AxA = (AN << 4) + AN, /* set run to AN, set current to AN */
361 NxL = (ON << 4) + L, /* set run to N, set current to L */
362 LxL = (L << 4) + L, /* set run to L, set current to L */
363 } ;
364
365 static const int actionWeak[][10] =
366 {
367 /* N, L, R, AN, EN, AL, NSM, CS, ES, ET */
368 /*xa*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxR, xxN, xxN, xxN }, /* Arabic letter */
369 /*xr*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxR, xxN, xxN, xIx }, /* right letter */
370 /*xl*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xIx }, /* left letter */
371
372 /*ao*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxN, xxN, xxN, xxN }, /* Arabic lett. foll by ON */
373 /*ro*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxN, xxN, xxN, xIx }, /* right lett. foll by ON */
374 /*lo*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxN, xxN, xxN, xIx }, /* left lett. foll by ON */
375
376 /*rt*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, xIx, NxN, NxN, xIx }, /* ET following R */
377 /*lt*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, xIx, NxN, NxN, xIx }, /* ET following L */
378
379 /*cn*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxA, xIx, xxN, xxN }, /* EN, AN following AL */
380 /*ra*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxA, xIx, xxN, xIx }, /* Arabic number foll R */
381 /*re*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxE, xIx, xIx, xxE }, /* European number foll R */
382 /*la*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxA, xIx, xxN, xIx }, /* Arabic number foll L */
383 /*le*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xIx, xIx, xxL }, /* European number foll L */
384
385 /*ac*/ { Nxx, Nxx, Nxx, Axx, AxA, NxR, NxN, NxN, NxN, NxN }, /* CS following cn */
386 /*rc*/ { Nxx, Nxx, Nxx, Axx, NxE, NxR, NxN, NxN, NxN, NIx }, /* CS following ra */
387 /*rs*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following re */
388 /*lc*/ { Nxx, Nxx, Nxx, Axx, NxL, NxR, NxN, NxN, NxN, NIx }, /* CS following la */
389 /*ls*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following le */
390
391 /*ret*/{ xxx, xxx, xxx, xxx, xxE, xxR, xxE, xxN, xxN, xxE }, /* ET following re */
392 /*let*/{ xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xxL }, /* ET following le */
393 };
394
395 static int GetDeferredType(int action)
396 {
397 return (action >> 4) & 0xF;
398 }
399
400 static int GetResolvedType(int action)
401 {
402 return action & 0xF;
403 }
404
405 /* Note on action table:
406
407 States can be of two kinds:
408 - Immediate Resolution State, where each input token
409 is resolved as soon as it is seen. These states have
410 only single action codes (xxN) or the no-op (xxx)
411 for static input tokens.
412 - Deferred Resolution State, where input tokens either
413 either extend the run (xIx) or resolve its Type (e.g. Nxx).
414
415 Input classes are of three kinds
416 - Static Input Token, where the class of the token remains
417 unchanged on output (AN, L, N, R)
418 - Replaced Input Token, where the class of the token is
419 always replaced on output (AL, BN, NSM, CS, ES, ET)
420 - Conditional Input Token, where the class of the token is
421 changed on output in some, but not all, cases (EN)
422
423 Where tokens are subject to change, a double action
424 (e.g. NxA, or NxN) is _required_ after deferred states,
425 resolving both the deferred state and changing the current token.
426 */
427
428 /*------------------------------------------------------------------------
429 Function: resolveWeak
430
431 Resolves the directionality of numeric and other weak character types
432
433 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
434
435 Input: Array of embedding levels
436 Character count
437
438 In/Out: Array of directional classes
439
440 Note: On input only these directional classes are expected
441 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
442 ------------------------------------------------------------------------*/
443 static void resolveWeak(int baselevel, WORD *pcls, WORD *plevel, int cch)
444 {
445 int state = odd(baselevel) ? xr : xl;
446 int cls;
447
448 int level = baselevel;
449 int action, clsRun, clsNew;
450 int cchRun = 0;
451 int ich = 0;
452
453 for (; ich < cch; ich++)
454 {
455 /* ignore boundary neutrals */
456 if (pcls[ich] == BN)
457 {
458 /* must flatten levels unless at a level change; */
459 plevel[ich] = level;
460
461 /* lookahead for level changes */
462 if (ich + 1 == cch && level != baselevel)
463 {
464 /* have to fixup last BN before end of the loop, since
465 * its fix-upped value will be needed below the assert */
466 pcls[ich] = EmbeddingDirection(level);
467 }
468 else if (ich + 1 < cch && level != plevel[ich+1] && pcls[ich+1] != BN)
469 {
470 /* fixup LAST BN in front / after a level run to make
471 * it act like the SOR/EOR in rule X10 */
472 int newlevel = plevel[ich+1];
473 if (level > newlevel) {
474 newlevel = level;
475 }
476 plevel[ich] = newlevel;
477
478 /* must match assigned level */
479 pcls[ich] = EmbeddingDirection(newlevel);
480 level = plevel[ich+1];
481 }
482 else
483 {
484 /* don't interrupt runs */
485 if (cchRun)
486 {
487 cchRun++;
488 }
489 continue;
490 }
491 }
492
493 ASSERT(pcls[ich] <= BN);
494 cls = pcls[ich];
495
496 action = actionWeak[state][cls];
497
498 /* resolve the directionality for deferred runs */
499 clsRun = GetDeferredType(action);
500 if (clsRun != XX)
501 {
502 SetDeferredRun(pcls, cchRun, ich, clsRun);
503 cchRun = 0;
504 }
505
506 /* resolve the directionality class at the current location */
507 clsNew = GetResolvedType(action);
508 if (clsNew != XX)
509 pcls[ich] = clsNew;
510
511 /* increment a deferred run */
512 if (IX & action)
513 cchRun++;
514
515 state = stateWeak[state][cls];
516 }
517
518 /* resolve any deferred runs
519 * use the direction of the current level to emulate PDF */
520 cls = EmbeddingDirection(level);
521
522 /* resolve the directionality for deferred runs */
523 clsRun = GetDeferredType(actionWeak[state][cls]);
524 if (clsRun != XX)
525 SetDeferredRun(pcls, cchRun, ich, clsRun);
526 }
527
528 /* RESOLVE NEUTRAL TYPES */
529
530 /* action values */
531 enum neutralactions
532 {
533 /* action to resolve previous input */
534 nL = L, /* resolve EN to L */
535 En = 3 << 4, /* resolve neutrals run to embedding level direction */
536 Rn = R << 4, /* resolve neutrals run to strong right */
537 Ln = L << 4, /* resolved neutrals run to strong left */
538 In = (1<<8), /* increment count of deferred neutrals */
539 LnL = (1<<4)+L, /* set run and EN to L */
540 };
541
542 static int GetDeferredNeutrals(int action, int level)
543 {
544 action = (action >> 4) & 0xF;
545 if (action == (En >> 4))
546 return EmbeddingDirection(level);
547 else
548 return action;
549 }
550
551 static int GetResolvedNeutrals(int action)
552 {
553 action = action & 0xF;
554 if (action == In)
555 return 0;
556 else
557 return action;
558 }
559
560 /* state values */
561 enum resolvestates
562 {
563 /* new temporary class */
564 r, /* R and characters resolved to R */
565 l, /* L and characters resolved to L */
566 rn, /* N preceded by right */
567 ln, /* N preceded by left */
568 a, /* AN preceded by left (the abbreviation 'la' is used up above) */
569 na, /* N preceded by a */
570 } ;
571
572
573 /*------------------------------------------------------------------------
574 Notes:
575
576 By rule W7, whenever a EN is 'dominated' by an L (including start of
577 run with embedding direction = L) it is resolved to, and further treated
578 as L.
579
580 This leads to the need for 'a' and 'na' states.
581 ------------------------------------------------------------------------*/
582
583 static const int actionNeutrals[][5] =
584 {
585 /* N, L, R, AN, EN = cls */
586 { In, 0, 0, 0, 0 }, /* r right */
587 { In, 0, 0, 0, L }, /* l left */
588
589 { In, En, Rn, Rn, Rn }, /* rn N preceded by right */
590 { In, Ln, En, En, LnL}, /* ln N preceded by left */
591
592 { In, 0, 0, 0, L }, /* a AN preceded by left */
593 { In, En, Rn, Rn, En }, /* na N preceded by a */
594 } ;
595
596 static const int stateNeutrals[][5] =
597 {
598 /* N, L, R, AN, EN */
599 { rn, l, r, r, r }, /* r right */
600 { ln, l, r, a, l }, /* l left */
601
602 { rn, l, r, r, r }, /* rn N preceded by right */
603 { ln, l, r, a, l }, /* ln N preceded by left */
604
605 { na, l, r, a, l }, /* a AN preceded by left */
606 { na, l, r, a, l }, /* na N preceded by la */
607 } ;
608
609 /*------------------------------------------------------------------------
610 Function: resolveNeutrals
611
612 Resolves the directionality of neutral character types.
613
614 Implements rules W7, N1 and N2 of the Unicode Bidi Algorithm.
615
616 Input: Array of embedding levels
617 Character count
618 Baselevel
619
620 In/Out: Array of directional classes
621
622 Note: On input only these directional classes are expected
623 R, L, N, AN, EN and BN
624
625 W8 resolves a number of ENs to L
626 ------------------------------------------------------------------------*/
627 static void resolveNeutrals(int baselevel, WORD *pcls, const WORD *plevel, int cch)
628 {
629 /* the state at the start of text depends on the base level */
630 int state = odd(baselevel) ? r : l;
631 int cls;
632
633 int cchRun = 0;
634 int level = baselevel;
635
636 int action, clsRun, clsNew;
637 int ich = 0;
638 for (; ich < cch; ich++)
639 {
640 /* ignore boundary neutrals */
641 if (pcls[ich] == BN)
642 {
643 /* include in the count for a deferred run */
644 if (cchRun)
645 cchRun++;
646
647 /* skip any further processing */
648 continue;
649 }
650
651 ASSERT(pcls[ich] < 5); /* "Only N, L, R, AN, EN are allowed" */
652 cls = pcls[ich];
653
654 action = actionNeutrals[state][cls];
655
656 /* resolve the directionality for deferred runs */
657 clsRun = GetDeferredNeutrals(action, level);
658 if (clsRun != N)
659 {
660 SetDeferredRun(pcls, cchRun, ich, clsRun);
661 cchRun = 0;
662 }
663
664 /* resolve the directionality class at the current location */
665 clsNew = GetResolvedNeutrals(action);
666 if (clsNew != N)
667 pcls[ich] = clsNew;
668
669 if (In & action)
670 cchRun++;
671
672 state = stateNeutrals[state][cls];
673 level = plevel[ich];
674 }
675
676 /* resolve any deferred runs */
677 cls = EmbeddingDirection(level); /* eor has type of current level */
678
679 /* resolve the directionality for deferred runs */
680 clsRun = GetDeferredNeutrals(actionNeutrals[state][cls], level);
681 if (clsRun != N)
682 SetDeferredRun(pcls, cchRun, ich, clsRun);
683 }
684
685 /* RESOLVE IMPLICIT */
686
687 /*------------------------------------------------------------------------
688 Function: resolveImplicit
689
690 Recursively resolves implicit embedding levels.
691 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
692
693 Input: Array of direction classes
694 Character count
695 Base level
696
697 In/Out: Array of embedding levels
698
699 Note: levels may exceed 15 on output.
700 Accepted subset of direction classes
701 R, L, AN, EN
702 ------------------------------------------------------------------------*/
703 static const WORD addLevel[][4] =
704 {
705 /* L, R, AN, EN */
706 /* even */ { 0, 1, 2, 2, },
707 /* odd */ { 1, 0, 1, 1, }
708
709 };
710
711 static void resolveImplicit(const WORD * pcls, WORD *plevel, int cch)
712 {
713 int ich = 0;
714 for (; ich < cch; ich++)
715 {
716 /* cannot resolve bn here, since some bn were resolved to strong
717 * types in resolveWeak. To remove these we need the original
718 * types, which are available again in resolveWhiteSpace */
719 if (pcls[ich] == BN)
720 {
721 continue;
722 }
723 ASSERT(pcls[ich] > 0); /* "No Neutrals allowed to survive here." */
724 ASSERT(pcls[ich] < 5); /* "Out of range." */
725 plevel[ich] += addLevel[odd(plevel[ich])][pcls[ich] - 1];
726 }
727 }
728
729 /*************************************************************
730 * BIDI_DeterminLevels
731 */
732 BOOL BIDI_DetermineLevels(
733 LPCWSTR lpString, /* [in] The string for which information is to be returned */
734 INT uCount, /* [in] Number of WCHARs in string. */
735 const SCRIPT_STATE *s,
736 const SCRIPT_CONTROL *c,
737 WORD *lpOutLevels /* [out] final string levels */
738 )
739 {
740 WORD *chartype;
741 unsigned baselevel = 0;
742 INT j;
743 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
744
745 chartype = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(WORD));
746 if (!chartype)
747 {
748 WARN("Out of memory\n");
749 return FALSE;
750 }
751
752 baselevel = s->uBidiLevel;
753
754 classify(lpString, chartype, uCount, c);
755
756 for (j = 0; j < uCount; ++j)
757 switch(chartype[j])
758 {
759 case B:
760 case S:
761 case WS:
762 case ON: chartype[j] = N;
763 default: continue;
764 }
765
766 /* resolve explicit */
767 resolveExplicit(baselevel, N, chartype, lpOutLevels, uCount, 0);
768
769 /* resolve weak */
770 resolveWeak(baselevel, chartype, lpOutLevels, uCount);
771
772 /* resolve neutrals */
773 resolveNeutrals(baselevel, chartype, lpOutLevels, uCount);
774
775 /* resolveImplicit */
776 resolveImplicit(chartype, lpOutLevels, uCount);
777
778 HeapFree(GetProcessHeap(), 0, chartype);
779 return TRUE;
780 }
781
782 /* reverse cch indexes */
783 static void reverse(int *pidx, int cch)
784 {
785 int temp;
786 int ich = 0;
787 for (; ich < --cch; ich++)
788 {
789 temp = pidx[ich];
790 pidx[ich] = pidx[cch];
791 pidx[cch] = temp;
792 }
793 }
794
795
796 /*------------------------------------------------------------------------
797 Functions: reorder/reorderLevel
798
799 Recursively reorders the display string
800 "From the highest level down, reverse all characters at that level and
801 higher, down to the lowest odd level"
802
803 Implements rule L2 of the Unicode bidi Algorithm.
804
805 Input: Array of embedding levels
806 Character count
807 Flag enabling reversal (set to false by initial caller)
808
809 In/Out: Text to reorder
810
811 Note: levels may exceed 15 resp. 61 on input.
812
813 Rule L3 - reorder combining marks is not implemented here
814 Rule L4 - glyph mirroring is implemented as a display option below
815
816 Note: this should be applied a line at a time
817 -------------------------------------------------------------------------*/
818 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
819 {
820 int ich = 0;
821
822 /* true as soon as first odd level encountered */
823 fReverse = fReverse || odd(level);
824
825 for (; ich < cch; ich++)
826 {
827 if (plevel[ich] < level)
828 {
829 break;
830 }
831 else if (plevel[ich] > level)
832 {
833 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
834 cch - ich, fReverse) - 1;
835 }
836 }
837 if (fReverse)
838 {
839 reverse(pIndexs, ich);
840 }
841 return ich;
842 }
843
844 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
845 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
846 {
847 int ich = 0;
848 int newlevel = -1;
849
850 /* true as soon as first odd level encountered */
851 fReverse = fReverse || odd(level);
852
853 for (; ich < cch; ich++)
854 {
855 if (plevel[ich] < level)
856 break;
857 else if (plevel[ich] > level)
858 newlevel = ich;
859 }
860 if (fReverse)
861 {
862 reverse(pIndexs, ich);
863 }
864
865 if (newlevel >= 0)
866 {
867 ich = 0;
868 for (; ich < cch; ich++)
869 if (plevel[ich] < level)
870 break;
871 else if (plevel[ich] > level)
872 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
873 cch - ich, fReverse) - 1;
874 }
875
876 return ich;
877 }
878
879 BOOL BIDI_GetStrengths(LPCWSTR lpString, INT uCount, const SCRIPT_CONTROL *c,
880 WORD* lpStrength)
881 {
882 int i;
883 classify(lpString, lpStrength, uCount, c);
884
885 for ( i = 0; i < uCount; i++)
886 {
887 switch(lpStrength[i])
888 {
889 case L:
890 case LRE:
891 case LRO:
892 case R:
893 case AL:
894 case RLE:
895 case RLO:
896 lpStrength[i] = BIDI_STRONG;
897 break;
898 case PDF:
899 case EN:
900 case ES:
901 case ET:
902 case AN:
903 case CS:
904 case BN:
905 lpStrength[i] = BIDI_WEAK;
906 break;
907 case B:
908 case S:
909 case WS:
910 case ON:
911 default: /* Neutrals and NSM */
912 lpStrength[i] = BIDI_NEUTRAL;
913 }
914 }
915 return TRUE;
916 }