Sync with trunk r58687.
[reactos.git] / dll / win32 / usp10 / bidi.c
1 /*
2 * Uniscribe BiDirectional handling
3 *
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 *
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
25 *
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
27 *
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
42 */
43
44 #include <config.h>
45
46 //#include <stdarg.h>
47 #include <windef.h>
48 //#include "winbase.h"
49 #include <wingdi.h>
50 //#include "winnls.h"
51 #include <usp10.h>
52 #include <wine/unicode.h>
53 #include <wine/debug.h>
54
55 #include "usp10_internal.h"
56
57 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
58
59 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
60 #define MAX_LEVEL 61
61
62 /* HELPER FUNCTIONS AND DECLARATIONS */
63
64 /*------------------------------------------------------------------------
65 Bidirectional Character Types
66
67 as defined by the Unicode Bidirectional Algorithm Table 3-7.
68
69 Note:
70
71 The list of bidirectional character types here is not grouped the
72 same way as the table 3-7, since the numberic values for the types
73 are chosen to keep the state and action tables compact.
74 ------------------------------------------------------------------------*/
75 enum directions
76 {
77 /* input types */
78 /* ON MUST be zero, code relies on ON = N = 0 */
79 ON = 0, /* Other Neutral */
80 L, /* Left Letter */
81 R, /* Right Letter */
82 AN, /* Arabic Number */
83 EN, /* European Number */
84 AL, /* Arabic Letter (Right-to-left) */
85 NSM, /* Non-spacing Mark */
86 CS, /* Common Separator */
87 ES, /* European Separator */
88 ET, /* European Terminator (post/prefix e.g. $ and %) */
89
90 /* resolved types */
91 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
92
93 /* input types, */
94 S, /* Segment Separator (TAB) // used only in L1 */
95 WS, /* White space // used only in L1 */
96 B, /* Paragraph Separator (aka as PS) */
97
98 /* types for explicit controls */
99 RLO, /* these are used only in X1-X9 */
100 RLE,
101 LRO,
102 LRE,
103 PDF,
104
105 /* resolved types, also resolved directions */
106 N = ON, /* alias, where ON, WS and S are treated the same */
107 };
108
109 /* HELPER FUNCTIONS */
110
111 /* Convert the libwine information to the direction enum */
112 static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIPT_CONTROL *c)
113 {
114 static const enum directions dir_map[16] =
115 {
116 L, /* unassigned defaults to L */
117 L,
118 R,
119 EN,
120 ES,
121 ET,
122 AN,
123 CS,
124 B,
125 S,
126 WS,
127 ON,
128 AL,
129 NSM,
130 BN,
131 PDF /* also LRE, LRO, RLE, RLO */
132 };
133
134 unsigned i;
135
136 for (i = 0; i < uCount; ++i)
137 {
138 chartype[i] = dir_map[get_char_typeW(lpString[i]) >> 12];
139 switch (chartype[i])
140 {
141 case ES:
142 if (!c->fLegacyBidiClass) break;
143 switch (lpString[i])
144 {
145 case '-':
146 case '+': chartype[i] = N; break;
147 case '/': chartype[i] = CS; break;
148 }
149 break;
150 case PDF:
151 switch (lpString[i])
152 {
153 case 0x202A: chartype[i] = LRE; break;
154 case 0x202B: chartype[i] = RLE; break;
155 case 0x202C: chartype[i] = PDF; break;
156 case 0x202D: chartype[i] = LRO; break;
157 case 0x202E: chartype[i] = RLO; break;
158 }
159 break;
160 }
161 }
162 }
163
164 /* Set a run of cval values at locations all prior to, but not including */
165 /* iStart, to the new value nval. */
166 static void SetDeferredRun(WORD *pval, int cval, int iStart, int nval)
167 {
168 int i = iStart - 1;
169 for (; i >= iStart - cval; i--)
170 {
171 pval[i] = nval;
172 }
173 }
174
175 /* RESOLVE EXPLICIT */
176
177 static WORD GreaterEven(int i)
178 {
179 return odd(i) ? i + 1 : i + 2;
180 }
181
182 static WORD GreaterOdd(int i)
183 {
184 return odd(i) ? i + 2 : i + 1;
185 }
186
187 static WORD EmbeddingDirection(int level)
188 {
189 return odd(level) ? R : L;
190 }
191
192 /*------------------------------------------------------------------------
193 Function: resolveExplicit
194
195 Recursively resolves explicit embedding levels and overrides.
196 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
197
198 Input: Base embedding level and direction
199 Character count
200
201 Output: Array of embedding levels
202
203 In/Out: Array of direction classes
204
205
206 Note: The function uses two simple counters to keep track of
207 matching explicit codes and PDF. Use the default argument for
208 the outermost call. The nesting counter counts the recursion
209 depth and not the embedding level.
210 ------------------------------------------------------------------------*/
211
212 static int resolveExplicit(int level, int dir, WORD *pcls, WORD *plevel, int cch, int nNest)
213 {
214 /* always called with a valid nesting level
215 nesting levels are != embedding levels */
216 int nLastValid = nNest;
217 int ich = 0;
218
219 /* check input values */
220 ASSERT(nNest >= 0 && level >= 0 && level <= MAX_LEVEL);
221
222 /* process the text */
223 for (; ich < cch; ich++)
224 {
225 WORD cls = pcls[ich];
226 switch (cls)
227 {
228 case LRO:
229 case LRE:
230 nNest++;
231 if (GreaterEven(level) <= MAX_LEVEL - (cls == LRO ? 2 : 0))
232 {
233 plevel[ich] = GreaterEven(level);
234 pcls[ich] = BN;
235 ich += resolveExplicit(plevel[ich], (cls == LRE ? N : L),
236 &pcls[ich+1], &plevel[ich+1],
237 cch - (ich+1), nNest);
238 nNest--;
239 continue;
240 }
241 cls = pcls[ich] = BN;
242 break;
243
244 case RLO:
245 case RLE:
246 nNest++;
247 if (GreaterOdd(level) <= MAX_LEVEL - (cls == RLO ? 2 : 0))
248 {
249 plevel[ich] = GreaterOdd(level);
250 pcls[ich] = BN;
251 ich += resolveExplicit(plevel[ich], (cls == RLE ? N : R),
252 &pcls[ich+1], &plevel[ich+1],
253 cch - (ich+1), nNest);
254 nNest--;
255 continue;
256 }
257 cls = pcls[ich] = BN;
258 break;
259
260 case PDF:
261 cls = pcls[ich] = BN;
262 if (nNest)
263 {
264 if (nLastValid < nNest)
265 {
266 nNest--;
267 }
268 else
269 {
270 cch = ich; /* break the loop, but complete body */
271 }
272 }
273 }
274
275 /* Apply the override */
276 if (dir != N)
277 {
278 cls = dir;
279 }
280 plevel[ich] = level;
281 if (pcls[ich] != BN)
282 pcls[ich] = cls;
283 }
284
285 return ich;
286 }
287
288 /* RESOLVE WEAK TYPES */
289
290 enum states /* possible states */
291 {
292 xa, /* Arabic letter */
293 xr, /* right letter */
294 xl, /* left letter */
295
296 ao, /* Arabic lett. foll by ON */
297 ro, /* right lett. foll by ON */
298 lo, /* left lett. foll by ON */
299
300 rt, /* ET following R */
301 lt, /* ET following L */
302
303 cn, /* EN, AN following AL */
304 ra, /* Arabic number foll R */
305 re, /* European number foll R */
306 la, /* Arabic number foll L */
307 le, /* European number foll L */
308
309 ac, /* CS following cn */
310 rc, /* CS following ra */
311 rs, /* CS,ES following re */
312 lc, /* CS following la */
313 ls, /* CS,ES following le */
314
315 ret, /* ET following re */
316 let, /* ET following le */
317 } ;
318
319 static const int stateWeak[][10] =
320 {
321 /* N, L, R, AN, EN, AL,NSM, CS, ES, ET */
322 /*xa*/ { ao, xl, xr, cn, cn, xa, xa, ao, ao, ao }, /* Arabic letter */
323 /*xr*/ { ro, xl, xr, ra, re, xa, xr, ro, ro, rt }, /* right letter */
324 /*xl*/ { lo, xl, xr, la, le, xa, xl, lo, lo, lt }, /* left letter */
325
326 /*ao*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* Arabic lett. foll by ON*/
327 /*ro*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* right lett. foll by ON */
328 /*lo*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* left lett. foll by ON */
329
330 /*rt*/ { ro, xl, xr, ra, re, xa, rt, ro, ro, rt }, /* ET following R */
331 /*lt*/ { lo, xl, xr, la, le, xa, lt, lo, lo, lt }, /* ET following L */
332
333 /*cn*/ { ao, xl, xr, cn, cn, xa, cn, ac, ao, ao }, /* EN, AN following AL */
334 /*ra*/ { ro, xl, xr, ra, re, xa, ra, rc, ro, rt }, /* Arabic number foll R */
335 /*re*/ { ro, xl, xr, ra, re, xa, re, rs, rs,ret }, /* European number foll R */
336 /*la*/ { lo, xl, xr, la, le, xa, la, lc, lo, lt }, /* Arabic number foll L */
337 /*le*/ { lo, xl, xr, la, le, xa, le, ls, ls,let }, /* European number foll L */
338
339 /*ac*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* CS following cn */
340 /*rc*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS following ra */
341 /*rs*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS,ES following re */
342 /*lc*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS following la */
343 /*ls*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS,ES following le */
344
345 /*ret*/{ ro, xl, xr, ra, re, xa,ret, ro, ro,ret }, /* ET following re */
346 /*let*/{ lo, xl, xr, la, le, xa,let, lo, lo,let }, /* ET following le */
347 };
348
349 enum actions /* possible actions */
350 {
351 /* primitives */
352 IX = 0x100, /* increment */
353 XX = 0xF, /* no-op */
354
355 /* actions */
356 xxx = (XX << 4) + XX, /* no-op */
357 xIx = IX + xxx, /* increment run */
358 xxN = (XX << 4) + ON, /* set current to N */
359 xxE = (XX << 4) + EN, /* set current to EN */
360 xxA = (XX << 4) + AN, /* set current to AN */
361 xxR = (XX << 4) + R, /* set current to R */
362 xxL = (XX << 4) + L, /* set current to L */
363 Nxx = (ON << 4) + 0xF, /* set run to neutral */
364 Axx = (AN << 4) + 0xF, /* set run to AN */
365 ExE = (EN << 4) + EN, /* set run to EN, set current to EN */
366 NIx = (ON << 4) + 0xF + IX, /* set run to N, increment */
367 NxN = (ON << 4) + ON, /* set run to N, set current to N */
368 NxR = (ON << 4) + R, /* set run to N, set current to R */
369 NxE = (ON << 4) + EN, /* set run to N, set current to EN */
370
371 AxA = (AN << 4) + AN, /* set run to AN, set current to AN */
372 NxL = (ON << 4) + L, /* set run to N, set current to L */
373 LxL = (L << 4) + L, /* set run to L, set current to L */
374 } ;
375
376 static const int actionWeak[][10] =
377 {
378 /* N, L, R, AN, EN, AL, NSM, CS, ES, ET */
379 /*xa*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxR, xxN, xxN, xxN }, /* Arabic letter */
380 /*xr*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxR, xxN, xxN, xIx }, /* right letter */
381 /*xl*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xIx }, /* left letter */
382
383 /*ao*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxN, xxN, xxN, xxN }, /* Arabic lett. foll by ON */
384 /*ro*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxN, xxN, xxN, xIx }, /* right lett. foll by ON */
385 /*lo*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxN, xxN, xxN, xIx }, /* left lett. foll by ON */
386
387 /*rt*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, xIx, NxN, NxN, xIx }, /* ET following R */
388 /*lt*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, xIx, NxN, NxN, xIx }, /* ET following L */
389
390 /*cn*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxA, xIx, xxN, xxN }, /* EN, AN following AL */
391 /*ra*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxA, xIx, xxN, xIx }, /* Arabic number foll R */
392 /*re*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxE, xIx, xIx, xxE }, /* European number foll R */
393 /*la*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxA, xIx, xxN, xIx }, /* Arabic number foll L */
394 /*le*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xIx, xIx, xxL }, /* European number foll L */
395
396 /*ac*/ { Nxx, Nxx, Nxx, Axx, AxA, NxR, NxN, NxN, NxN, NxN }, /* CS following cn */
397 /*rc*/ { Nxx, Nxx, Nxx, Axx, NxE, NxR, NxN, NxN, NxN, NIx }, /* CS following ra */
398 /*rs*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following re */
399 /*lc*/ { Nxx, Nxx, Nxx, Axx, NxL, NxR, NxN, NxN, NxN, NIx }, /* CS following la */
400 /*ls*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following le */
401
402 /*ret*/{ xxx, xxx, xxx, xxx, xxE, xxR, xxE, xxN, xxN, xxE }, /* ET following re */
403 /*let*/{ xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xxL }, /* ET following le */
404 };
405
406 static int GetDeferredType(int action)
407 {
408 return (action >> 4) & 0xF;
409 }
410
411 static int GetResolvedType(int action)
412 {
413 return action & 0xF;
414 }
415
416 /* Note on action table:
417
418 States can be of two kinds:
419 - Immediate Resolution State, where each input token
420 is resolved as soon as it is seen. These states have
421 only single action codes (xxN) or the no-op (xxx)
422 for static input tokens.
423 - Deferred Resolution State, where input tokens either
424 either extend the run (xIx) or resolve its Type (e.g. Nxx).
425
426 Input classes are of three kinds
427 - Static Input Token, where the class of the token remains
428 unchanged on output (AN, L, N, R)
429 - Replaced Input Token, where the class of the token is
430 always replaced on output (AL, BN, NSM, CS, ES, ET)
431 - Conditional Input Token, where the class of the token is
432 changed on output in some, but not all, cases (EN)
433
434 Where tokens are subject to change, a double action
435 (e.g. NxA, or NxN) is _required_ after deferred states,
436 resolving both the deferred state and changing the current token.
437 */
438
439 /*------------------------------------------------------------------------
440 Function: resolveWeak
441
442 Resolves the directionality of numeric and other weak character types
443
444 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
445
446 Input: Array of embedding levels
447 Character count
448
449 In/Out: Array of directional classes
450
451 Note: On input only these directional classes are expected
452 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
453 ------------------------------------------------------------------------*/
454 static void resolveWeak(int baselevel, WORD *pcls, WORD *plevel, int cch)
455 {
456 int state = odd(baselevel) ? xr : xl;
457 int cls;
458
459 int level = baselevel;
460 int action, clsRun, clsNew;
461 int cchRun = 0;
462 int ich = 0;
463
464 for (; ich < cch; ich++)
465 {
466 /* ignore boundary neutrals */
467 if (pcls[ich] == BN)
468 {
469 /* must flatten levels unless at a level change; */
470 plevel[ich] = level;
471
472 /* lookahead for level changes */
473 if (ich + 1 == cch && level != baselevel)
474 {
475 /* have to fixup last BN before end of the loop, since
476 * its fix-upped value will be needed below the assert */
477 pcls[ich] = EmbeddingDirection(level);
478 }
479 else if (ich + 1 < cch && level != plevel[ich+1] && pcls[ich+1] != BN)
480 {
481 /* fixup LAST BN in front / after a level run to make
482 * it act like the SOR/EOR in rule X10 */
483 int newlevel = plevel[ich+1];
484 if (level > newlevel) {
485 newlevel = level;
486 }
487 plevel[ich] = newlevel;
488
489 /* must match assigned level */
490 pcls[ich] = EmbeddingDirection(newlevel);
491 level = plevel[ich+1];
492 }
493 else
494 {
495 /* don't interrupt runs */
496 if (cchRun)
497 {
498 cchRun++;
499 }
500 continue;
501 }
502 }
503
504 ASSERT(pcls[ich] <= BN);
505 cls = pcls[ich];
506
507 action = actionWeak[state][cls];
508
509 /* resolve the directionality for deferred runs */
510 clsRun = GetDeferredType(action);
511 if (clsRun != XX)
512 {
513 SetDeferredRun(pcls, cchRun, ich, clsRun);
514 cchRun = 0;
515 }
516
517 /* resolve the directionality class at the current location */
518 clsNew = GetResolvedType(action);
519 if (clsNew != XX)
520 pcls[ich] = clsNew;
521
522 /* increment a deferred run */
523 if (IX & action)
524 cchRun++;
525
526 state = stateWeak[state][cls];
527 }
528
529 /* resolve any deferred runs
530 * use the direction of the current level to emulate PDF */
531 cls = EmbeddingDirection(level);
532
533 /* resolve the directionality for deferred runs */
534 clsRun = GetDeferredType(actionWeak[state][cls]);
535 if (clsRun != XX)
536 SetDeferredRun(pcls, cchRun, ich, clsRun);
537 }
538
539 /* RESOLVE NEUTRAL TYPES */
540
541 /* action values */
542 enum neutralactions
543 {
544 /* action to resolve previous input */
545 nL = L, /* resolve EN to L */
546 En = 3 << 4, /* resolve neutrals run to embedding level direction */
547 Rn = R << 4, /* resolve neutrals run to strong right */
548 Ln = L << 4, /* resolved neutrals run to strong left */
549 In = (1<<8), /* increment count of deferred neutrals */
550 LnL = (1<<4)+L, /* set run and EN to L */
551 };
552
553 static int GetDeferredNeutrals(int action, int level)
554 {
555 action = (action >> 4) & 0xF;
556 if (action == (En >> 4))
557 return EmbeddingDirection(level);
558 else
559 return action;
560 }
561
562 static int GetResolvedNeutrals(int action)
563 {
564 action = action & 0xF;
565 if (action == In)
566 return 0;
567 else
568 return action;
569 }
570
571 /* state values */
572 enum resolvestates
573 {
574 /* new temporary class */
575 r, /* R and characters resolved to R */
576 l, /* L and characters resolved to L */
577 rn, /* N preceded by right */
578 ln, /* N preceded by left */
579 a, /* AN preceded by left (the abbreviation 'la' is used up above) */
580 na, /* N preceded by a */
581 } ;
582
583
584 /*------------------------------------------------------------------------
585 Notes:
586
587 By rule W7, whenever a EN is 'dominated' by an L (including start of
588 run with embedding direction = L) it is resolved to, and further treated
589 as L.
590
591 This leads to the need for 'a' and 'na' states.
592 ------------------------------------------------------------------------*/
593
594 static const int actionNeutrals[][5] =
595 {
596 /* N, L, R, AN, EN = cls */
597 { In, 0, 0, 0, 0 }, /* r right */
598 { In, 0, 0, 0, L }, /* l left */
599
600 { In, En, Rn, Rn, Rn }, /* rn N preceded by right */
601 { In, Ln, En, En, LnL}, /* ln N preceded by left */
602
603 { In, 0, 0, 0, L }, /* a AN preceded by left */
604 { In, En, Rn, Rn, En }, /* na N preceded by a */
605 } ;
606
607 static const int stateNeutrals[][5] =
608 {
609 /* N, L, R, AN, EN */
610 { rn, l, r, r, r }, /* r right */
611 { ln, l, r, a, l }, /* l left */
612
613 { rn, l, r, r, r }, /* rn N preceded by right */
614 { ln, l, r, a, l }, /* ln N preceded by left */
615
616 { na, l, r, a, l }, /* a AN preceded by left */
617 { na, l, r, a, l }, /* na N preceded by la */
618 } ;
619
620 /*------------------------------------------------------------------------
621 Function: resolveNeutrals
622
623 Resolves the directionality of neutral character types.
624
625 Implements rules W7, N1 and N2 of the Unicode Bidi Algorithm.
626
627 Input: Array of embedding levels
628 Character count
629 Baselevel
630
631 In/Out: Array of directional classes
632
633 Note: On input only these directional classes are expected
634 R, L, N, AN, EN and BN
635
636 W8 resolves a number of ENs to L
637 ------------------------------------------------------------------------*/
638 static void resolveNeutrals(int baselevel, WORD *pcls, const WORD *plevel, int cch)
639 {
640 /* the state at the start of text depends on the base level */
641 int state = odd(baselevel) ? r : l;
642 int cls;
643
644 int cchRun = 0;
645 int level = baselevel;
646
647 int action, clsRun, clsNew;
648 int ich = 0;
649 for (; ich < cch; ich++)
650 {
651 /* ignore boundary neutrals */
652 if (pcls[ich] == BN)
653 {
654 /* include in the count for a deferred run */
655 if (cchRun)
656 cchRun++;
657
658 /* skip any further processing */
659 continue;
660 }
661
662 ASSERT(pcls[ich] < 5); /* "Only N, L, R, AN, EN are allowed" */
663 cls = pcls[ich];
664
665 action = actionNeutrals[state][cls];
666
667 /* resolve the directionality for deferred runs */
668 clsRun = GetDeferredNeutrals(action, level);
669 if (clsRun != N)
670 {
671 SetDeferredRun(pcls, cchRun, ich, clsRun);
672 cchRun = 0;
673 }
674
675 /* resolve the directionality class at the current location */
676 clsNew = GetResolvedNeutrals(action);
677 if (clsNew != N)
678 pcls[ich] = clsNew;
679
680 if (In & action)
681 cchRun++;
682
683 state = stateNeutrals[state][cls];
684 level = plevel[ich];
685 }
686
687 /* resolve any deferred runs */
688 cls = EmbeddingDirection(level); /* eor has type of current level */
689
690 /* resolve the directionality for deferred runs */
691 clsRun = GetDeferredNeutrals(actionNeutrals[state][cls], level);
692 if (clsRun != N)
693 SetDeferredRun(pcls, cchRun, ich, clsRun);
694 }
695
696 /* RESOLVE IMPLICIT */
697
698 /*------------------------------------------------------------------------
699 Function: resolveImplicit
700
701 Recursively resolves implicit embedding levels.
702 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
703
704 Input: Array of direction classes
705 Character count
706 Base level
707
708 In/Out: Array of embedding levels
709
710 Note: levels may exceed 15 on output.
711 Accepted subset of direction classes
712 R, L, AN, EN
713 ------------------------------------------------------------------------*/
714 static const WORD addLevel[][4] =
715 {
716 /* L, R, AN, EN */
717 /* even */ { 0, 1, 2, 2, },
718 /* odd */ { 1, 0, 1, 1, }
719
720 };
721
722 static void resolveImplicit(const WORD * pcls, WORD *plevel, int cch)
723 {
724 int ich = 0;
725 for (; ich < cch; ich++)
726 {
727 /* cannot resolve bn here, since some bn were resolved to strong
728 * types in resolveWeak. To remove these we need the original
729 * types, which are available again in resolveWhiteSpace */
730 if (pcls[ich] == BN)
731 {
732 continue;
733 }
734 ASSERT(pcls[ich] > 0); /* "No Neutrals allowed to survive here." */
735 ASSERT(pcls[ich] < 5); /* "Out of range." */
736 plevel[ich] += addLevel[odd(plevel[ich])][pcls[ich] - 1];
737 }
738 }
739
740 /*************************************************************
741 * BIDI_DeterminLevels
742 */
743 BOOL BIDI_DetermineLevels(
744 LPCWSTR lpString, /* [in] The string for which information is to be returned */
745 INT uCount, /* [in] Number of WCHARs in string. */
746 const SCRIPT_STATE *s,
747 const SCRIPT_CONTROL *c,
748 WORD *lpOutLevels /* [out] final string levels */
749 )
750 {
751 WORD *chartype;
752 unsigned baselevel = 0;
753 INT j;
754 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
755
756 chartype = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(WORD));
757 if (!chartype)
758 {
759 WARN("Out of memory\n");
760 return FALSE;
761 }
762
763 baselevel = s->uBidiLevel;
764
765 classify(lpString, chartype, uCount, c);
766
767 for (j = 0; j < uCount; ++j)
768 switch(chartype[j])
769 {
770 case B:
771 case S:
772 case WS:
773 case ON: chartype[j] = N;
774 default: continue;
775 }
776
777 /* resolve explicit */
778 resolveExplicit(baselevel, N, chartype, lpOutLevels, uCount, 0);
779
780 /* resolve weak */
781 resolveWeak(baselevel, chartype, lpOutLevels, uCount);
782
783 /* resolve neutrals */
784 resolveNeutrals(baselevel, chartype, lpOutLevels, uCount);
785
786 /* resolveImplicit */
787 resolveImplicit(chartype, lpOutLevels, uCount);
788
789 HeapFree(GetProcessHeap(), 0, chartype);
790 return TRUE;
791 }
792
793 /* reverse cch indexes */
794 static void reverse(int *pidx, int cch)
795 {
796 int temp;
797 int ich = 0;
798 for (; ich < --cch; ich++)
799 {
800 temp = pidx[ich];
801 pidx[ich] = pidx[cch];
802 pidx[cch] = temp;
803 }
804 }
805
806
807 /*------------------------------------------------------------------------
808 Functions: reorder/reorderLevel
809
810 Recursively reorders the display string
811 "From the highest level down, reverse all characters at that level and
812 higher, down to the lowest odd level"
813
814 Implements rule L2 of the Unicode bidi Algorithm.
815
816 Input: Array of embedding levels
817 Character count
818 Flag enabling reversal (set to false by initial caller)
819
820 In/Out: Text to reorder
821
822 Note: levels may exceed 15 resp. 61 on input.
823
824 Rule L3 - reorder combining marks is not implemented here
825 Rule L4 - glyph mirroring is implemented as a display option below
826
827 Note: this should be applied a line at a time
828 -------------------------------------------------------------------------*/
829 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
830 {
831 int ich = 0;
832
833 /* true as soon as first odd level encountered */
834 fReverse = fReverse || odd(level);
835
836 for (; ich < cch; ich++)
837 {
838 if (plevel[ich] < level)
839 {
840 break;
841 }
842 else if (plevel[ich] > level)
843 {
844 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
845 cch - ich, fReverse) - 1;
846 }
847 }
848 if (fReverse)
849 {
850 reverse(pIndexs, ich);
851 }
852 return ich;
853 }
854
855 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
856 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
857 {
858 int ich = 0;
859 int newlevel = -1;
860
861 /* true as soon as first odd level encountered */
862 fReverse = fReverse || odd(level);
863
864 for (; ich < cch; ich++)
865 {
866 if (plevel[ich] < level)
867 break;
868 else if (plevel[ich] > level)
869 newlevel = ich;
870 }
871 if (fReverse)
872 {
873 reverse(pIndexs, ich);
874 }
875
876 if (newlevel >= 0)
877 {
878 ich = 0;
879 for (; ich < cch; ich++)
880 if (plevel[ich] < level)
881 break;
882 else if (plevel[ich] > level)
883 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
884 cch - ich, fReverse) - 1;
885 }
886
887 return ich;
888 }
889
890 BOOL BIDI_GetStrengths(LPCWSTR lpString, INT uCount, const SCRIPT_CONTROL *c,
891 WORD* lpStrength)
892 {
893 int i;
894 classify(lpString, lpStrength, uCount, c);
895
896 for ( i = 0; i < uCount; i++)
897 {
898 switch(lpStrength[i])
899 {
900 case L:
901 case LRE:
902 case LRO:
903 case R:
904 case AL:
905 case RLE:
906 case RLO:
907 lpStrength[i] = BIDI_STRONG;
908 break;
909 case PDF:
910 case EN:
911 case ES:
912 case ET:
913 case AN:
914 case CS:
915 case BN:
916 lpStrength[i] = BIDI_WEAK;
917 break;
918 case B:
919 case S:
920 case WS:
921 case ON:
922 default: /* Neutrals and NSM */
923 lpStrength[i] = BIDI_NEUTRAL;
924 }
925 }
926 return TRUE;
927 }