Sync with trunk revision 64099.
[reactos.git] / dll / win32 / usp10 / bidi.c
1 /*
2 * Uniscribe BiDirectional handling
3 *
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 *
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
25 *
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
27 *
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
42 */
43
44 #include <windef.h>
45
46 #include <wine/list.h>
47
48 #include "usp10_internal.h"
49
50 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
51
52 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
53 #define MAX_DEPTH 125
54
55 /* HELPER FUNCTIONS AND DECLARATIONS */
56
57 /*------------------------------------------------------------------------
58 Bidirectional Character Types
59
60 as defined by the Unicode Bidirectional Algorithm Table 3-7.
61
62 Note:
63
64 The list of bidirectional character types here is not grouped the
65 same way as the table 3-7, since the numberic values for the types
66 are chosen to keep the state and action tables compact.
67 ------------------------------------------------------------------------*/
68 enum directions
69 {
70 /* input types */
71 /* ON MUST be zero, code relies on ON = NI = 0 */
72 ON = 0, /* Other Neutral */
73 L, /* Left Letter */
74 R, /* Right Letter */
75 AN, /* Arabic Number */
76 EN, /* European Number */
77 AL, /* Arabic Letter (Right-to-left) */
78 NSM, /* Non-spacing Mark */
79 CS, /* Common Separator */
80 ES, /* European Separator */
81 ET, /* European Terminator (post/prefix e.g. $ and %) */
82
83 /* resolved types */
84 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
85
86 /* input types, */
87 S, /* Segment Separator (TAB) // used only in L1 */
88 WS, /* White space // used only in L1 */
89 B, /* Paragraph Separator (aka as PS) */
90
91 /* types for explicit controls */
92 RLO, /* these are used only in X1-X9 */
93 RLE,
94 LRO,
95 LRE,
96 PDF,
97
98 LRI, /* Isolate formatting characters new with 6.3 */
99 RLI,
100 FSI,
101 PDI,
102
103 /* resolved types, also resolved directions */
104 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
105 };
106
107 static const char debug_type[][4] =
108 {
109 "ON", /* Other Neutral */
110 "L", /* Left Letter */
111 "R", /* Right Letter */
112 "AN", /* Arabic Number */
113 "EN", /* European Number */
114 "AL", /* Arabic Letter (Right-to-left) */
115 "NSM", /* Non-spacing Mark */
116 "CS", /* Common Separator */
117 "ES", /* European Separator */
118 "ET", /* European Terminator (post/prefix e.g. $ and %) */
119 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
120 "S", /* Segment Separator (TAB) // used only in L1 */
121 "WS", /* White space // used only in L1 */
122 "B", /* Paragraph Separator (aka as PS) */
123 "RLO", /* these are used only in X1-X9 */
124 "RLE",
125 "LRO",
126 "LRE",
127 "PDF",
128 "LRI", /* Isolate formatting characters new with 6.3 */
129 "RLI",
130 "FSI",
131 "PDI",
132 };
133
134 /* HELPER FUNCTIONS */
135
136 static inline void dump_types(const char* header, WORD *types, int start, int end)
137 {
138 int i;
139 TRACE("%s:",header);
140 for (i = start; i< end; i++)
141 TRACE(" %s",debug_type[types[i]]);
142 TRACE("\n");
143 }
144
145 /* Convert the libwine information to the direction enum */
146 static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIPT_CONTROL *c)
147 {
148 static const enum directions dir_map[16] =
149 {
150 L, /* unassigned defaults to L */
151 L,
152 R,
153 EN,
154 ES,
155 ET,
156 AN,
157 CS,
158 B,
159 S,
160 WS,
161 ON,
162 AL,
163 NSM,
164 BN,
165 PDF /* also LRE, LRO, RLE, RLO */
166 };
167
168 unsigned i;
169
170 for (i = 0; i < uCount; ++i)
171 {
172 chartype[i] = dir_map[get_char_typeW(lpString[i]) >> 12];
173 switch (chartype[i])
174 {
175 case ES:
176 if (!c->fLegacyBidiClass) break;
177 switch (lpString[i])
178 {
179 case '-':
180 case '+': chartype[i] = NI; break;
181 case '/': chartype[i] = CS; break;
182 }
183 break;
184 case PDF:
185 switch (lpString[i])
186 {
187 case 0x202A: chartype[i] = LRE; break;
188 case 0x202B: chartype[i] = RLE; break;
189 case 0x202C: chartype[i] = PDF; break;
190 case 0x202D: chartype[i] = LRO; break;
191 case 0x202E: chartype[i] = RLO; break;
192 case 0x2066: chartype[i] = LRI; break;
193 case 0x2067: chartype[i] = RLI; break;
194 case 0x2068: chartype[i] = FSI; break;
195 case 0x2069: chartype[i] = PDI; break;
196 }
197 break;
198 }
199 }
200 }
201
202 /* RESOLVE EXPLICIT */
203
204 static WORD GreaterEven(int i)
205 {
206 return odd(i) ? i + 1 : i + 2;
207 }
208
209 static WORD GreaterOdd(int i)
210 {
211 return odd(i) ? i + 2 : i + 1;
212 }
213
214 static WORD EmbeddingDirection(int level)
215 {
216 return odd(level) ? R : L;
217 }
218
219 /*------------------------------------------------------------------------
220 Function: resolveExplicit
221
222 Recursively resolves explicit embedding levels and overrides.
223 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
224
225 Input: Base embedding level and direction
226 Character count
227
228 Output: Array of embedding levels
229
230 In/Out: Array of direction classes
231
232
233 Note: The function uses two simple counters to keep track of
234 matching explicit codes and PDF. Use the default argument for
235 the outermost call. The nesting counter counts the recursion
236 depth and not the embedding level.
237 ------------------------------------------------------------------------*/
238 typedef struct tagStackItem {
239 int level;
240 int override;
241 BOOL isolate;
242 } StackItem;
243
244 #define push_stack(l,o,i) \
245 do { stack_top--; \
246 stack[stack_top].level = l; \
247 stack[stack_top].override = o; \
248 stack[stack_top].isolate = i;} while(0)
249
250 #define pop_stack() do { stack_top++; } while(0)
251
252 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
253
254 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, int count)
255 {
256 /* X1 */
257 int overflow_isolate_count = 0;
258 int overflow_embedding_count = 0;
259 int valid_isolate_count = 0;
260 int i;
261
262 StackItem stack[MAX_DEPTH+2];
263 int stack_top = MAX_DEPTH+1;
264
265 stack[stack_top].level = level;
266 stack[stack_top].override = NI;
267 stack[stack_top].isolate = FALSE;
268
269 for (i = 0; i < count; i++)
270 {
271 /* X2 */
272 if (pclass[i] == RLE)
273 {
274 int least_odd = GreaterOdd(stack[stack_top].level);
275 poutLevel[i] = stack[stack_top].level;
276 if (valid_level(least_odd))
277 push_stack(least_odd, NI, FALSE);
278 else if (overflow_isolate_count == 0)
279 overflow_embedding_count++;
280 }
281 /* X3 */
282 else if (pclass[i] == LRE)
283 {
284 int least_even = GreaterEven(stack[stack_top].level);
285 poutLevel[i] = stack[stack_top].level;
286 if (valid_level(least_even))
287 push_stack(least_even, NI, FALSE);
288 else if (overflow_isolate_count == 0)
289 overflow_embedding_count++;
290 }
291 /* X4 */
292 else if (pclass[i] == RLO)
293 {
294 int least_odd = GreaterOdd(stack[stack_top].level);
295 poutLevel[i] = stack[stack_top].level;
296 if (valid_level(least_odd))
297 push_stack(least_odd, R, FALSE);
298 else if (overflow_isolate_count == 0)
299 overflow_embedding_count++;
300 }
301 /* X5 */
302 else if (pclass[i] == LRO)
303 {
304 int least_even = GreaterEven(stack[stack_top].level);
305 poutLevel[i] = stack[stack_top].level;
306 if (valid_level(least_even))
307 push_stack(least_even, L, FALSE);
308 else if (overflow_isolate_count == 0)
309 overflow_embedding_count++;
310 }
311 /* X5a */
312 else if (pclass[i] == RLI)
313 {
314 int least_odd = GreaterOdd(stack[stack_top].level);
315 poutLevel[i] = stack[stack_top].level;
316 if (valid_level(least_odd))
317 {
318 valid_isolate_count++;
319 push_stack(least_odd, NI, TRUE);
320 }
321 else
322 overflow_isolate_count++;
323 }
324 /* X5b */
325 else if (pclass[i] == LRI)
326 {
327 int least_even = GreaterEven(stack[stack_top].level);
328 poutLevel[i] = stack[stack_top].level;
329 if (valid_level(least_even))
330 {
331 valid_isolate_count++;
332 push_stack(least_even, NI, TRUE);
333 }
334 else
335 overflow_isolate_count++;
336 }
337 /* X5c */
338 else if (pclass[i] == FSI)
339 {
340 int j;
341 int new_level = 0;
342 int skipping = 0;
343 poutLevel[i] = stack[stack_top].level;
344 for (j = i+1; j < count; j++)
345 {
346 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI)
347 {
348 skipping++;
349 continue;
350 }
351 else if (pclass[j] == PDI)
352 {
353 if (skipping)
354 skipping --;
355 else
356 break;
357 continue;
358 }
359
360 if (skipping) continue;
361
362 if (pclass[j] == L)
363 {
364 new_level = 0;
365 break;
366 }
367 else if (pclass[j] == R || pclass[j] == AL)
368 {
369 new_level = 1;
370 break;
371 }
372 }
373 if (odd(new_level))
374 {
375 int least_odd = GreaterOdd(stack[stack_top].level);
376 if (valid_level(least_odd))
377 {
378 valid_isolate_count++;
379 push_stack(least_odd, NI, TRUE);
380 }
381 else
382 overflow_isolate_count++;
383 }
384 else
385 {
386 int least_even = GreaterEven(stack[stack_top].level);
387 if (valid_level(least_even))
388 {
389 valid_isolate_count++;
390 push_stack(least_even, NI, TRUE);
391 }
392 else
393 overflow_isolate_count++;
394 }
395 }
396 /* X6 */
397 else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF)
398 {
399 poutLevel[i] = stack[stack_top].level;
400 if (stack[stack_top].override != NI)
401 pclass[i] = stack[stack_top].override;
402 }
403 /* X6a */
404 else if (pclass[i] == PDI)
405 {
406 if (overflow_isolate_count) overflow_isolate_count--;
407 else if (!valid_isolate_count) {/* do nothing */}
408 else
409 {
410 overflow_embedding_count = 0;
411 while (!stack[stack_top].isolate) pop_stack();
412 pop_stack();
413 valid_isolate_count --;
414 }
415 poutLevel[i] = stack[stack_top].level;
416 }
417 /* X7 */
418 else if (pclass[i] == PDF)
419 {
420 poutLevel[i] = stack[stack_top].level;
421 if (overflow_isolate_count) {/* do nothing */}
422 else if (overflow_embedding_count) overflow_embedding_count--;
423 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
424 pop_stack();
425 }
426 /* X8: Nothing */
427 }
428 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
429 for (i = 0; i < count ; i++)
430 if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF)
431 pclass[i] = BN;
432 }
433
434 static inline int previousValidChar(const WORD *pcls, int index, int back_fence)
435 {
436 if (index == -1 || index == back_fence) return index;
437 index --;
438 while (index > back_fence && pcls[index] == BN) index --;
439 return index;
440 }
441
442 static inline int nextValidChar(const WORD *pcls, int index, int front_fence)
443 {
444 if (index == front_fence) return index;
445 index ++;
446 while (index < front_fence && pcls[index] == BN) index ++;
447 return index;
448 }
449
450 typedef struct tagRun
451 {
452 int start;
453 int end;
454 WORD e;
455 } Run;
456
457 typedef struct tagIsolatedRun
458 {
459 struct list entry;
460 int length;
461 WORD sos;
462 WORD eos;
463 WORD e;
464
465 WORD *ppcls[1];
466 } IsolatedRun;
467
468 static inline int iso_nextValidChar(IsolatedRun *iso_run, int index)
469 {
470 if (index >= (iso_run->length-1)) return -1;
471 index ++;
472 while (index < iso_run->length && *iso_run->ppcls[index] == BN) index++;
473 if (index == iso_run->length) return -1;
474 return index;
475 }
476
477 static inline int iso_previousValidChar(IsolatedRun *iso_run, int index)
478 {
479
480 if (index <= 0) return -1;
481 index --;
482 while (index > -1 && *iso_run->ppcls[index] == BN) index--;
483 return index;
484 }
485
486 static inline int iso_previousChar(IsolatedRun *iso_run, int index)
487 {
488 if (index <= 0) return -1;
489 return index --;
490 }
491
492 static inline void iso_dump_types(const char* header, IsolatedRun *iso_run)
493 {
494 int i;
495 TRACE("%s:",header);
496 TRACE("[ ");
497 for (i = 0; i < iso_run->length; i++)
498 TRACE(" %s",debug_type[*iso_run->ppcls[i]]);
499 TRACE(" ]\n");
500 }
501
502 /*------------------------------------------------------------------------
503 Function: resolveWeak
504
505 Resolves the directionality of numeric and other weak character types
506
507 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
508
509 Input: Array of embedding levels
510 Character count
511
512 In/Out: Array of directional classes
513
514 Note: On input only these directional classes are expected
515 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
516 ------------------------------------------------------------------------*/
517
518 static void resolveWeak(IsolatedRun * iso_run)
519 {
520 int i;
521
522 /* W1 */
523 for (i=0; i < iso_run->length; i++)
524 {
525 if (*iso_run->ppcls[i] == NSM)
526 {
527 int j = iso_previousValidChar(iso_run, i);
528 if (j == -1)
529 *iso_run->ppcls[i] = iso_run->sos;
530 else if (*iso_run->ppcls[j] >= LRI)
531 *iso_run->ppcls[i] = ON;
532 else
533 *iso_run->ppcls[i] = *iso_run->ppcls[j];
534 }
535 }
536
537 /* W2 */
538 for (i = 0; i < iso_run->length; i++)
539 {
540 if (*iso_run->ppcls[i] == EN)
541 {
542 int j = iso_previousValidChar(iso_run, i);
543 while (j > -1)
544 {
545 if (*iso_run->ppcls[j] == R || *iso_run->ppcls[j] == L || *iso_run->ppcls[j] == AL)
546 {
547 if (*iso_run->ppcls[j] == AL)
548 *iso_run->ppcls[i] = AN;
549 break;
550 }
551 j = iso_previousValidChar(iso_run, j);
552 }
553 }
554 }
555
556 /* W3 */
557 for (i = 0; i < iso_run->length; i++)
558 {
559 if (*iso_run->ppcls[i] == AL)
560 *iso_run->ppcls[i] = R;
561 }
562
563 /* W4 */
564 for (i = 0; i < iso_run->length; i++)
565 {
566 if (*iso_run->ppcls[i] == ES)
567 {
568 int b = iso_previousValidChar(iso_run, i);
569 int f = iso_nextValidChar(iso_run, i);
570
571 if (b > -1 && f > -1 && *iso_run->ppcls[b] == EN && *iso_run->ppcls[f] == EN)
572 *iso_run->ppcls[i] = EN;
573 }
574 else if (*iso_run->ppcls[i] == CS)
575 {
576 int b = iso_previousValidChar(iso_run, i);
577 int f = iso_nextValidChar(iso_run, i);
578
579 if (b > -1 && f > -1 && *iso_run->ppcls[b] == EN && *iso_run->ppcls[f] == EN)
580 *iso_run->ppcls[i] = EN;
581 else if (b > -1 && f > -1 && *iso_run->ppcls[b] == AN && *iso_run->ppcls[f] == AN)
582 *iso_run->ppcls[i] = AN;
583 }
584 }
585
586 /* W5 */
587 for (i = 0; i < iso_run->length; i++)
588 {
589 if (*iso_run->ppcls[i] == ET)
590 {
591 int j;
592 for (j = i-1 ; j > -1; j--)
593 {
594 if (*iso_run->ppcls[j] == BN) continue;
595 if (*iso_run->ppcls[j] == ET) continue;
596 else if (*iso_run->ppcls[j] == EN) *iso_run->ppcls[i] = EN;
597 else break;
598 }
599 if (*iso_run->ppcls[i] == ET)
600 {
601 for (j = i+1; j < iso_run->length; j++)
602 {
603 if (*iso_run->ppcls[j] == BN) continue;
604 if (*iso_run->ppcls[j] == ET) continue;
605 else if (*iso_run->ppcls[j] == EN) *iso_run->ppcls[i] = EN;
606 else break;
607 }
608 }
609 }
610 }
611
612 /* W6 */
613 for (i = 0; i < iso_run->length; i++)
614 {
615 if (*iso_run->ppcls[i] == ET || *iso_run->ppcls[i] == ES || *iso_run->ppcls[i] == CS || *iso_run->ppcls[i] == ON)
616 {
617 int b = i-1;
618 int f = i+1;
619 if (b > -1 && *iso_run->ppcls[b] == BN)
620 *iso_run->ppcls[b] = ON;
621 if (f < iso_run->length && *iso_run->ppcls[f] == BN)
622 *iso_run->ppcls[f] = ON;
623
624 *iso_run->ppcls[i] = ON;
625 }
626 }
627
628 /* W7 */
629 for (i = 0; i < iso_run->length; i++)
630 {
631 if (*iso_run->ppcls[i] == EN)
632 {
633 int j;
634 for (j = iso_previousValidChar(iso_run, i); j > -1; j = iso_previousValidChar(iso_run, j))
635 if (*iso_run->ppcls[j] == R || *iso_run->ppcls[j] == L)
636 {
637 if (*iso_run->ppcls[j] == L)
638 *iso_run->ppcls[i] = L;
639 break;
640 }
641 if (iso_run->sos == L && j == -1)
642 *iso_run->ppcls[i] = L;
643 }
644 }
645 }
646
647 /*------------------------------------------------------------------------
648 Function: resolveNeutrals
649
650 Resolves the directionality of neutral character types.
651
652 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
653
654 Input: Array of embedding levels
655 Character count
656 Baselevel
657
658 In/Out: Array of directional classes
659
660 Note: On input only these directional classes are expected
661 R, L, NI, AN, EN and BN
662
663 W8 resolves a number of ENs to L
664 ------------------------------------------------------------------------*/
665 static void resolveNeutrals(IsolatedRun *iso_run)
666 {
667 int i;
668
669 /* Translate isolates into NI */
670 for (i = 0; i < iso_run->length; i++)
671 {
672 if (*iso_run->ppcls[i] >= LRI)
673 *iso_run->ppcls[i] = NI;
674
675 switch(*iso_run->ppcls[i])
676 {
677 case B:
678 case S:
679 case WS: *iso_run->ppcls[i] = NI;
680 }
681
682 ASSERT(*iso_run->ppcls[i] < 5 || *iso_run->ppcls[i] == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */
683 }
684
685 /* N0: Skipping bracketed pairs for now */
686
687 /* N1 */
688 for (i = 0; i < iso_run->length; i++)
689 {
690 WORD l,r;
691
692 if (*iso_run->ppcls[i] == NI)
693 {
694 int j;
695 int b = iso_previousValidChar(iso_run, i);
696
697 if (b == -1)
698 {
699 l = iso_run->sos;
700 b = 0;
701 }
702 else
703 {
704 if (*iso_run->ppcls[b] == R || *iso_run->ppcls[b] == AN || *iso_run->ppcls[b] == EN)
705 l = R;
706 else if (*iso_run->ppcls[b] == L)
707 l = L;
708 else /* No string type */
709 continue;
710 }
711 j = iso_nextValidChar(iso_run, i);
712 while (j > -1 && *iso_run->ppcls[j] == NI) j = iso_nextValidChar(iso_run, j);
713
714 if (j == -1)
715 {
716 r = iso_run->eos;
717 j = iso_run->length;
718 }
719 else if (*iso_run->ppcls[j] == R || *iso_run->ppcls[j] == AN || *iso_run->ppcls[j] == EN)
720 r = R;
721 else if (*iso_run->ppcls[j] == L)
722 r = L;
723 else /* No string type */
724 continue;
725
726 if (r == l)
727 {
728 for (b = i; b < j && b < iso_run->length; b++)
729 *iso_run->ppcls[b] = r;
730 }
731 }
732 }
733
734 /* N2 */
735 for (i = 0; i < iso_run->length; i++)
736 {
737 if (*iso_run->ppcls[i] == NI)
738 {
739 int b = i-1;
740 int f = i+1;
741 *iso_run->ppcls[i] = EmbeddingDirection(iso_run->e);
742 if (b > -1 && *iso_run->ppcls[b] == BN)
743 *iso_run->ppcls[b] = EmbeddingDirection(iso_run->e);
744 if (f < iso_run->length && *iso_run->ppcls[f] == BN)
745 *iso_run->ppcls[f] = EmbeddingDirection(iso_run->e);
746 }
747 }
748 }
749
750 /*------------------------------------------------------------------------
751 Function: resolveImplicit
752
753 Recursively resolves implicit embedding levels.
754 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
755
756 Input: Array of direction classes
757 Character count
758 Base level
759
760 In/Out: Array of embedding levels
761
762 Note: levels may exceed 15 on output.
763 Accepted subset of direction classes
764 R, L, AN, EN
765 ------------------------------------------------------------------------*/
766 static void resolveImplicit(const WORD * pcls, WORD *plevel, int sos, int eos)
767 {
768 int i;
769
770 /* I1/2 */
771 for (i = sos; i <= eos; i++)
772 {
773 if (pcls[i] == BN)
774 continue;
775
776 ASSERT(pcls[i] > 0); /* "No Neutrals allowed to survive here." */
777 ASSERT(pcls[i] < 5); /* "Out of range." */
778
779 if (odd(plevel[i]) && (pcls[i] == L || pcls[i] == EN || pcls [i] == AN))
780 plevel[i]++;
781 else if (!odd(plevel[i]) && pcls[i] == R)
782 plevel[i]++;
783 else if (!odd(plevel[i]) && (pcls[i] == EN || pcls [i] == AN))
784 plevel[i]+=2;
785 }
786 }
787
788 static void resolveResolved(unsigned baselevel, const WORD * pcls, WORD *plevel, int sos, int eos)
789 {
790 int i;
791
792 /* L1 */
793 for (i = sos; i <= eos; i++)
794 {
795 if (pcls[i] == B || pcls[i] == S)
796 {
797 int j = i -1;
798 while (i > sos && j >= sos &&
799 (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
800 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
801 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
802 plevel[j--] = baselevel;
803 plevel[i] = baselevel;
804 }
805 if (i == eos &&
806 (pcls[i] == WS || pcls[i] == FSI || pcls[i] == LRI || pcls[i] == RLI ||
807 pcls[i] == PDI || pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO ||
808 pcls[i] == RLO || pcls[i] == PDF || pcls[i] == BN ))
809 {
810 int j = i;
811 while (j >= sos && (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
812 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
813 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
814 plevel[j--] = baselevel;
815 }
816 }
817 }
818
819 static void computeIsolatingRunsSet(unsigned baselevel, WORD *pcls, WORD *pLevel, int uCount, struct list *set)
820 {
821 int run_start, run_end, i;
822 int run_count = 0;
823 Run *runs;
824 IsolatedRun *current_isolated;
825
826 runs = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(Run));
827 if (!runs) return;
828
829 list_init(set);
830
831 /* Build Runs */
832 run_start = 0;
833 while (run_start < uCount)
834 {
835 run_end = nextValidChar(pcls, run_start, uCount);
836 while (run_end < uCount && pLevel[run_end] == pLevel[run_start]) run_end = nextValidChar(pcls, run_end, uCount);
837 run_end --;
838 runs[run_count].start = run_start;
839 runs[run_count].end = run_end;
840 runs[run_count].e = pLevel[run_start];
841 run_start = nextValidChar(pcls, run_end, uCount);
842 run_count++;
843 }
844
845 /* Build Isolating Runs */
846 i = 0;
847 while (i < run_count)
848 {
849 int k = i;
850 if (runs[k].start >= 0)
851 {
852 int type_fence, real_end;
853 int j;
854 current_isolated = HeapAlloc(GetProcessHeap(), 0, sizeof(IsolatedRun) + sizeof(WORD*)*uCount);
855 if (!current_isolated) break;
856
857 run_start = runs[k].start;
858 current_isolated->e = runs[k].e;
859 current_isolated->length = (runs[k].end - runs[k].start)+1;
860
861 for (j = 0; j < current_isolated->length; j++)
862 current_isolated->ppcls[j] = &pcls[runs[k].start+j];
863
864 run_end = runs[k].end;
865
866 TRACE("{ [%i -- %i]",run_start, run_end);
867
868 if (pcls[run_end] == BN)
869 run_end = previousValidChar(pcls, run_end, runs[k].start);
870
871 while (run_end < uCount && (pcls[run_end] == RLI || pcls[run_end] == LRI || pcls[run_end] == FSI))
872 {
873 j = k+1;
874 search:
875 while (j < run_count && pcls[runs[j].start] != PDI) j++;
876 if (j < run_count && runs[i].e != runs[j].e)
877 {
878 j++;
879 goto search;
880 }
881
882 if (j != run_count)
883 {
884 int m;
885 int l = current_isolated->length;
886
887 current_isolated->length += (runs[j].end - runs[j].start)+1;
888 for (m = 0; l < current_isolated->length; l++, m++)
889 current_isolated->ppcls[l] = &pcls[runs[j].start+m];
890
891 TRACE("[%i -- %i]",runs[j].start, runs[j].end);
892
893 run_end = runs[j].end;
894 if (pcls[run_end] == BN)
895 run_end = previousValidChar(pcls, run_end, runs[i].start);
896 runs[j].start = -1;
897 k = j;
898 }
899 else
900 {
901 run_end = uCount;
902 break;
903 }
904 }
905
906 type_fence = previousValidChar(pcls, run_start, -1);
907
908 if (type_fence == -1)
909 current_isolated->sos = (baselevel > pLevel[run_start])?baselevel:pLevel[run_start];
910 else
911 current_isolated->sos = (pLevel[type_fence] > pLevel[run_start])?pLevel[type_fence]:pLevel[run_start];
912
913 current_isolated->sos = EmbeddingDirection(current_isolated->sos);
914
915 if (run_end == uCount)
916 current_isolated->eos = current_isolated->sos;
917 else
918 {
919 /* eos could be an BN */
920 if ( pcls[run_end] == BN )
921 {
922 real_end = previousValidChar(pcls, run_end, run_start-1);
923 if (real_end < run_start)
924 real_end = run_start;
925 }
926 else
927 real_end = run_end;
928
929 type_fence = nextValidChar(pcls, run_end, uCount);
930 if (type_fence == uCount)
931 current_isolated->eos = (baselevel > pLevel[real_end])?baselevel:pLevel[real_end];
932 else
933 current_isolated->eos = (pLevel[type_fence] > pLevel[real_end])?pLevel[type_fence]:pLevel[real_end];
934
935 current_isolated->eos = EmbeddingDirection(current_isolated->eos);
936 }
937
938 list_add_tail(set, &current_isolated->entry);
939 TRACE(" } level %i {%s <--> %s}\n",current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
940 }
941 i++;
942 }
943
944 HeapFree(GetProcessHeap(), 0, runs);
945 }
946
947 /*************************************************************
948 * BIDI_DeterminLevels
949 */
950 BOOL BIDI_DetermineLevels(
951 LPCWSTR lpString, /* [in] The string for which information is to be returned */
952 INT uCount, /* [in] Number of WCHARs in string. */
953 const SCRIPT_STATE *s,
954 const SCRIPT_CONTROL *c,
955 WORD *lpOutLevels /* [out] final string levels */
956 )
957 {
958 WORD *chartype;
959 unsigned baselevel = 0;
960 struct list IsolatingRuns;
961 IsolatedRun *iso_run, *next;
962
963 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
964
965 chartype = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(WORD));
966 if (!chartype)
967 {
968 WARN("Out of memory\n");
969 return FALSE;
970 }
971
972 baselevel = s->uBidiLevel;
973
974 classify(lpString, chartype, uCount, c);
975 if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount);
976
977 /* resolve explicit */
978 resolveExplicit(baselevel, chartype, lpOutLevels, uCount);
979 if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount);
980
981 /* X10/BD13: Computer Isolating runs */
982 computeIsolatingRunsSet(baselevel, chartype, lpOutLevels, uCount, &IsolatingRuns);
983
984 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
985 {
986 if (TRACE_ON(bidi)) iso_dump_types("Run", iso_run);
987
988 /* resolve weak */
989 resolveWeak(iso_run);
990 if (TRACE_ON(bidi)) iso_dump_types("After Weak", iso_run);
991
992 /* resolve neutrals */
993 resolveNeutrals(iso_run);
994 if (TRACE_ON(bidi)) iso_dump_types("After Neutrals", iso_run);
995
996 list_remove(&iso_run->entry);
997 HeapFree(GetProcessHeap(),0,iso_run);
998 }
999
1000 if (TRACE_ON(bidi)) dump_types("Before Implicit", chartype, 0, uCount);
1001 /* resolveImplicit */
1002 resolveImplicit(chartype, lpOutLevels, 0, uCount-1);
1003
1004 /* resolveResolvedLevels*/
1005 classify(lpString, chartype, uCount, c);
1006 resolveResolved(baselevel, chartype, lpOutLevels, 0, uCount-1);
1007
1008 HeapFree(GetProcessHeap(), 0, chartype);
1009 return TRUE;
1010 }
1011
1012 /* reverse cch indexes */
1013 static void reverse(int *pidx, int cch)
1014 {
1015 int temp;
1016 int ich = 0;
1017 for (; ich < --cch; ich++)
1018 {
1019 temp = pidx[ich];
1020 pidx[ich] = pidx[cch];
1021 pidx[cch] = temp;
1022 }
1023 }
1024
1025
1026 /*------------------------------------------------------------------------
1027 Functions: reorder/reorderLevel
1028
1029 Recursively reorders the display string
1030 "From the highest level down, reverse all characters at that level and
1031 higher, down to the lowest odd level"
1032
1033 Implements rule L2 of the Unicode bidi Algorithm.
1034
1035 Input: Array of embedding levels
1036 Character count
1037 Flag enabling reversal (set to false by initial caller)
1038
1039 In/Out: Text to reorder
1040
1041 Note: levels may exceed 15 resp. 61 on input.
1042
1043 Rule L3 - reorder combining marks is not implemented here
1044 Rule L4 - glyph mirroring is implemented as a display option below
1045
1046 Note: this should be applied a line at a time
1047 -------------------------------------------------------------------------*/
1048 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1049 {
1050 int ich = 0;
1051
1052 /* true as soon as first odd level encountered */
1053 fReverse = fReverse || odd(level);
1054
1055 for (; ich < cch; ich++)
1056 {
1057 if (plevel[ich] < level)
1058 {
1059 break;
1060 }
1061 else if (plevel[ich] > level)
1062 {
1063 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
1064 cch - ich, fReverse) - 1;
1065 }
1066 }
1067 if (fReverse)
1068 {
1069 reverse(pIndexs, ich);
1070 }
1071 return ich;
1072 }
1073
1074 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1075 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1076 {
1077 int ich = 0;
1078 int newlevel = -1;
1079
1080 /* true as soon as first odd level encountered */
1081 fReverse = fReverse || odd(level);
1082
1083 for (; ich < cch; ich++)
1084 {
1085 if (plevel[ich] < level)
1086 break;
1087 else if (plevel[ich] > level)
1088 newlevel = ich;
1089 }
1090 if (fReverse)
1091 {
1092 reverse(pIndexs, ich);
1093 }
1094
1095 if (newlevel >= 0)
1096 {
1097 ich = 0;
1098 for (; ich < cch; ich++)
1099 if (plevel[ich] < level)
1100 break;
1101 else if (plevel[ich] > level)
1102 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
1103 cch - ich, fReverse) - 1;
1104 }
1105
1106 return ich;
1107 }
1108
1109 BOOL BIDI_GetStrengths(LPCWSTR lpString, INT uCount, const SCRIPT_CONTROL *c,
1110 WORD* lpStrength)
1111 {
1112 int i;
1113 classify(lpString, lpStrength, uCount, c);
1114
1115 for ( i = 0; i < uCount; i++)
1116 {
1117 switch(lpStrength[i])
1118 {
1119 case L:
1120 case LRE:
1121 case LRO:
1122 case R:
1123 case AL:
1124 case RLE:
1125 case RLO:
1126 lpStrength[i] = BIDI_STRONG;
1127 break;
1128 case PDF:
1129 case EN:
1130 case ES:
1131 case ET:
1132 case AN:
1133 case CS:
1134 case BN:
1135 lpStrength[i] = BIDI_WEAK;
1136 break;
1137 case B:
1138 case S:
1139 case WS:
1140 case ON:
1141 default: /* Neutrals and NSM */
1142 lpStrength[i] = BIDI_NEUTRAL;
1143 }
1144 }
1145 return TRUE;
1146 }