[Usp10] - Sync to Wine 1.7.14.
[reactos.git] / reactos / dll / win32 / usp10 / bidi.c
1 /*
2 * Uniscribe BiDirectional handling
3 *
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 *
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
25 *
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
27 *
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
42 */
43
44 #include <windef.h>
45
46 #include <wine/list.h>
47
48 #include "usp10_internal.h"
49
50 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
51
52 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
53 #define MAX_DEPTH 125
54
55 /* HELPER FUNCTIONS AND DECLARATIONS */
56
57 /*------------------------------------------------------------------------
58 Bidirectional Character Types
59
60 as defined by the Unicode Bidirectional Algorithm Table 3-7.
61
62 Note:
63
64 The list of bidirectional character types here is not grouped the
65 same way as the table 3-7, since the numberic values for the types
66 are chosen to keep the state and action tables compact.
67 ------------------------------------------------------------------------*/
68 enum directions
69 {
70 /* input types */
71 /* ON MUST be zero, code relies on ON = NI = 0 */
72 ON = 0, /* Other Neutral */
73 L, /* Left Letter */
74 R, /* Right Letter */
75 AN, /* Arabic Number */
76 EN, /* European Number */
77 AL, /* Arabic Letter (Right-to-left) */
78 NSM, /* Non-spacing Mark */
79 CS, /* Common Separator */
80 ES, /* European Separator */
81 ET, /* European Terminator (post/prefix e.g. $ and %) */
82
83 /* resolved types */
84 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
85
86 /* input types, */
87 S, /* Segment Separator (TAB) // used only in L1 */
88 WS, /* White space // used only in L1 */
89 B, /* Paragraph Separator (aka as PS) */
90
91 /* types for explicit controls */
92 RLO, /* these are used only in X1-X9 */
93 RLE,
94 LRO,
95 LRE,
96 PDF,
97
98 LRI, /* Isolate formatting characters new with 6.3 */
99 RLI,
100 FSI,
101 PDI,
102
103 /* resolved types, also resolved directions */
104 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
105 };
106
107 static const char debug_type[][4] =
108 {
109 "ON", /* Other Neutral */
110 "L", /* Left Letter */
111 "R", /* Right Letter */
112 "AN", /* Arabic Number */
113 "EN", /* European Number */
114 "AL", /* Arabic Letter (Right-to-left) */
115 "NSM", /* Non-spacing Mark */
116 "CS", /* Common Separator */
117 "ES", /* European Separator */
118 "ET", /* European Terminator (post/prefix e.g. $ and %) */
119 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
120 "S", /* Segment Separator (TAB) // used only in L1 */
121 "WS", /* White space // used only in L1 */
122 "B", /* Paragraph Separator (aka as PS) */
123 "RLO", /* these are used only in X1-X9 */
124 "RLE",
125 "LRO",
126 "LRE",
127 "PDF",
128 "LRI", /* Isolate formatting characters new with 6.3 */
129 "RLI",
130 "FSI",
131 "PDI",
132 };
133
134 /* HELPER FUNCTIONS */
135
136 static inline void dump_types(const char* header, WORD *types, int start, int end)
137 {
138 int i;
139 TRACE("%s:",header);
140 for (i = start; i< end; i++)
141 TRACE(" %s",debug_type[types[i]]);
142 TRACE("\n");
143 }
144
145 /* Convert the libwine information to the direction enum */
146 static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIPT_CONTROL *c)
147 {
148 static const enum directions dir_map[16] =
149 {
150 L, /* unassigned defaults to L */
151 L,
152 R,
153 EN,
154 ES,
155 ET,
156 AN,
157 CS,
158 B,
159 S,
160 WS,
161 ON,
162 AL,
163 NSM,
164 BN,
165 PDF /* also LRE, LRO, RLE, RLO */
166 };
167
168 unsigned i;
169
170 for (i = 0; i < uCount; ++i)
171 {
172 chartype[i] = dir_map[get_char_typeW(lpString[i]) >> 12];
173 switch (chartype[i])
174 {
175 case ES:
176 if (!c->fLegacyBidiClass) break;
177 switch (lpString[i])
178 {
179 case '-':
180 case '+': chartype[i] = NI; break;
181 case '/': chartype[i] = CS; break;
182 }
183 break;
184 case PDF:
185 switch (lpString[i])
186 {
187 case 0x202A: chartype[i] = LRE; break;
188 case 0x202B: chartype[i] = RLE; break;
189 case 0x202C: chartype[i] = PDF; break;
190 case 0x202D: chartype[i] = LRO; break;
191 case 0x202E: chartype[i] = RLO; break;
192 case 0x2066: chartype[i] = LRI; break;
193 case 0x2067: chartype[i] = RLI; break;
194 case 0x2068: chartype[i] = FSI; break;
195 case 0x2069: chartype[i] = PDI; break;
196 }
197 break;
198 }
199 }
200 }
201
202 /* RESOLVE EXPLICIT */
203
204 static WORD GreaterEven(int i)
205 {
206 return odd(i) ? i + 1 : i + 2;
207 }
208
209 static WORD GreaterOdd(int i)
210 {
211 return odd(i) ? i + 2 : i + 1;
212 }
213
214 static WORD EmbeddingDirection(int level)
215 {
216 return odd(level) ? R : L;
217 }
218
219 /*------------------------------------------------------------------------
220 Function: resolveExplicit
221
222 Recursively resolves explicit embedding levels and overrides.
223 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
224
225 Input: Base embedding level and direction
226 Character count
227
228 Output: Array of embedding levels
229
230 In/Out: Array of direction classes
231
232
233 Note: The function uses two simple counters to keep track of
234 matching explicit codes and PDF. Use the default argument for
235 the outermost call. The nesting counter counts the recursion
236 depth and not the embedding level.
237 ------------------------------------------------------------------------*/
238 typedef struct tagStackItem {
239 int level;
240 int override;
241 BOOL isolate;
242 } StackItem;
243
244 #define push_stack(l,o,i) \
245 do { stack_top--; \
246 stack[stack_top].level = l; \
247 stack[stack_top].override = o; \
248 stack[stack_top].isolate = i;} while(0)
249
250 #define pop_stack() do { stack_top++; } while(0)
251
252 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
253
254 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, int count)
255 {
256 /* X1 */
257 int overflow_isolate_count = 0;
258 int overflow_embedding_count = 0;
259 int valid_isolate_count = 0;
260 int i;
261
262 StackItem stack[MAX_DEPTH+2];
263 int stack_top = MAX_DEPTH+1;
264
265 stack[stack_top].level = level;
266 stack[stack_top].override = NI;
267 stack[stack_top].isolate = FALSE;
268
269 for (i = 0; i < count; i++)
270 {
271 /* X2 */
272 if (pclass[i] == RLE)
273 {
274 int least_odd = GreaterOdd(stack[stack_top].level);
275 poutLevel[i] = stack[stack_top].level;
276 if (valid_level(least_odd))
277 push_stack(least_odd, NI, FALSE);
278 else if (overflow_isolate_count == 0)
279 overflow_embedding_count++;
280 }
281 /* X3 */
282 else if (pclass[i] == LRE)
283 {
284 int least_even = GreaterEven(stack[stack_top].level);
285 poutLevel[i] = stack[stack_top].level;
286 if (valid_level(least_even))
287 push_stack(least_even, NI, FALSE);
288 else if (overflow_isolate_count == 0)
289 overflow_embedding_count++;
290 }
291 /* X4 */
292 else if (pclass[i] == RLO)
293 {
294 int least_odd = GreaterOdd(stack[stack_top].level);
295 poutLevel[i] = stack[stack_top].level;
296 if (valid_level(least_odd))
297 push_stack(least_odd, R, FALSE);
298 else if (overflow_isolate_count == 0)
299 overflow_embedding_count++;
300 }
301 /* X5 */
302 else if (pclass[i] == LRO)
303 {
304 int least_even = GreaterEven(stack[stack_top].level);
305 poutLevel[i] = stack[stack_top].level;
306 if (valid_level(least_even))
307 push_stack(least_even, L, FALSE);
308 else if (overflow_isolate_count == 0)
309 overflow_embedding_count++;
310 }
311 /* X5a */
312 else if (pclass[i] == RLI)
313 {
314 int least_odd = GreaterOdd(stack[stack_top].level);
315 poutLevel[i] = stack[stack_top].level;
316 if (valid_level(least_odd))
317 {
318 valid_isolate_count++;
319 push_stack(least_odd, NI, TRUE);
320 }
321 else
322 overflow_isolate_count++;
323 }
324 /* X5b */
325 else if (pclass[i] == LRI)
326 {
327 int least_even = GreaterEven(stack[stack_top].level);
328 poutLevel[i] = stack[stack_top].level;
329 if (valid_level(least_even))
330 {
331 valid_isolate_count++;
332 push_stack(least_even, NI, TRUE);
333 }
334 else
335 overflow_isolate_count++;
336 }
337 /* X5c */
338 else if (pclass[i] == FSI)
339 {
340 int j;
341 int new_level = 0;
342 int skipping = 0;
343 poutLevel[i] = stack[stack_top].level;
344 for (j = i+1; j < count; j++)
345 {
346 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI)
347 {
348 skipping++;
349 continue;
350 }
351 else if (pclass[j] == PDI)
352 {
353 if (skipping)
354 skipping --;
355 else
356 break;
357 continue;
358 }
359
360 if (skipping) continue;
361
362 if (pclass[j] == L)
363 {
364 new_level = 0;
365 break;
366 }
367 else if (pclass[j] == R || pclass[j] == AL)
368 {
369 new_level = 1;
370 break;
371 }
372 }
373 if (odd(new_level))
374 {
375 int least_odd = GreaterOdd(stack[stack_top].level);
376 if (valid_level(least_odd))
377 {
378 valid_isolate_count++;
379 push_stack(least_odd, NI, TRUE);
380 }
381 else
382 overflow_isolate_count++;
383 }
384 else
385 {
386 int least_even = GreaterEven(stack[stack_top].level);
387 if (valid_level(least_even))
388 {
389 valid_isolate_count++;
390 push_stack(least_even, NI, TRUE);
391 }
392 else
393 overflow_isolate_count++;
394 }
395 }
396 /* X6 */
397 else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF)
398 {
399 poutLevel[i] = stack[stack_top].level;
400 if (stack[stack_top].override != NI)
401 pclass[i] = stack[stack_top].override;
402 }
403 /* X6a */
404 else if (pclass[i] == PDI)
405 {
406 if (overflow_isolate_count) overflow_isolate_count--;
407 else if (!valid_isolate_count) {/* do nothing */}
408 else
409 {
410 overflow_embedding_count = 0;
411 while (!stack[stack_top].isolate) pop_stack();
412 pop_stack();
413 valid_isolate_count --;
414 }
415 poutLevel[i] = stack[stack_top].level;
416 }
417 /* X7 */
418 else if (pclass[i] == PDF)
419 {
420 poutLevel[i] = stack[stack_top].level;
421 if (overflow_isolate_count) {/* do nothing */}
422 else if (overflow_embedding_count) overflow_embedding_count--;
423 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
424 pop_stack();
425 }
426 /* X8: Nothing */
427 }
428 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
429 for (i = 0; i < count ; i++)
430 if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF)
431 pclass[i] = BN;
432 }
433
434 static inline int previousValidChar(const WORD *pcls, int index, int back_fence)
435 {
436 if (index == -1 || index == back_fence) return index;
437 index --;
438 while (index > back_fence && pcls[index] == BN) index --;
439 return index;
440 }
441
442 static inline int nextValidChar(const WORD *pcls, int index, int front_fence)
443 {
444 if (index == front_fence) return index;
445 index ++;
446 while (index < front_fence && pcls[index] == BN) index ++;
447 return index;
448 }
449
450 typedef struct tagRun
451 {
452 int start;
453 int end;
454 WORD e;
455 } Run;
456
457 typedef struct tagIsolatedRun
458 {
459 struct list entry;
460 int length;
461 WORD sos;
462 WORD eos;
463 WORD e;
464
465 WORD *ppcls[1];
466 } IsolatedRun;
467
468 static inline int iso_nextValidChar(IsolatedRun *iso_run, int index)
469 {
470 if (index >= (iso_run->length-1)) return -1;
471 index ++;
472 while (index < iso_run->length && *iso_run->ppcls[index] == BN) index++;
473 if (index == iso_run->length) return -1;
474 return index;
475 }
476
477 static inline int iso_previousValidChar(IsolatedRun *iso_run, int index)
478 {
479
480 if (index <= 0) return -1;
481 index --;
482 while (index > -1 && *iso_run->ppcls[index] == BN) index--;
483 return index;
484 }
485
486 static inline int iso_previousChar(IsolatedRun *iso_run, int index)
487 {
488 if (index <= 0) return -1;
489 return index --;
490 }
491
492 static inline void iso_dump_types(const char* header, IsolatedRun *iso_run)
493 {
494 int i;
495 TRACE("%s:",header);
496 TRACE("[ ");
497 for (i = 0; i < iso_run->length; i++)
498 TRACE(" %s",debug_type[*iso_run->ppcls[i]]);
499 TRACE(" ]\n");
500 }
501
502 /*------------------------------------------------------------------------
503 Function: resolveWeak
504
505 Resolves the directionality of numeric and other weak character types
506
507 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
508
509 Input: Array of embedding levels
510 Character count
511
512 In/Out: Array of directional classes
513
514 Note: On input only these directional classes are expected
515 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
516 ------------------------------------------------------------------------*/
517
518 static void resolveWeak(IsolatedRun * iso_run)
519 {
520 int i;
521
522 /* W1 */
523 for (i=0; i < iso_run->length; i++)
524 {
525 if (*iso_run->ppcls[i] == NSM)
526 {
527 int j = iso_previousValidChar(iso_run, i);
528 if (j == -1)
529 *iso_run->ppcls[i] = iso_run->sos;
530 else if (*iso_run->ppcls[j] >= LRI)
531 *iso_run->ppcls[i] = ON;
532 else
533 *iso_run->ppcls[i] = *iso_run->ppcls[j];
534 }
535 }
536
537 /* W2 */
538 for (i = 0; i < iso_run->length; i++)
539 {
540 if (*iso_run->ppcls[i] == EN)
541 {
542 int j = iso_previousValidChar(iso_run, i);
543 while (j > -1)
544 {
545 if (*iso_run->ppcls[j] == R || *iso_run->ppcls[j] == L || *iso_run->ppcls[j] == AL)
546 {
547 if (*iso_run->ppcls[j] == AL)
548 *iso_run->ppcls[i] = AN;
549 break;
550 }
551 j = iso_previousValidChar(iso_run, j);
552 }
553 }
554 }
555
556 /* W3 */
557 for (i = 0; i < iso_run->length; i++)
558 {
559 if (*iso_run->ppcls[i] == AL)
560 *iso_run->ppcls[i] = R;
561 }
562
563 /* W4 */
564 for (i = 0; i < iso_run->length; i++)
565 {
566 if (*iso_run->ppcls[i] == ES)
567 {
568 int b = iso_previousValidChar(iso_run, i);
569 int f = iso_nextValidChar(iso_run, i);
570
571 if (b > -1 && f > -1 && *iso_run->ppcls[b] == EN && *iso_run->ppcls[f] == EN)
572 *iso_run->ppcls[i] = EN;
573 }
574 else if (*iso_run->ppcls[i] == CS)
575 {
576 int b = iso_previousValidChar(iso_run, i);
577 int f = iso_nextValidChar(iso_run, i);
578
579 if (b > -1 && f > -1 && *iso_run->ppcls[b] == EN && *iso_run->ppcls[f] == EN)
580 *iso_run->ppcls[i] = EN;
581 else if (b > -1 && f > -1 && *iso_run->ppcls[b] == AN && *iso_run->ppcls[f] == AN)
582 *iso_run->ppcls[i] = AN;
583 }
584 }
585
586 /* W5 */
587 for (i = 0; i < iso_run->length; i++)
588 {
589 if (*iso_run->ppcls[i] == ET)
590 {
591 int j;
592 for (j = i-1 ; j > -1; j--)
593 {
594 if (*iso_run->ppcls[j] == BN) continue;
595 if (*iso_run->ppcls[j] == ET) continue;
596 else if (*iso_run->ppcls[j] == EN) *iso_run->ppcls[i] = EN;
597 else break;
598 }
599 if (*iso_run->ppcls[i] == ET)
600 {
601 for (j = i+1; j < iso_run->length; j++)
602 {
603 if (*iso_run->ppcls[j] == BN) continue;
604 if (*iso_run->ppcls[j] == ET) continue;
605 else if (*iso_run->ppcls[j] == EN) *iso_run->ppcls[i] = EN;
606 else break;
607 }
608 }
609 }
610 }
611
612 /* W6 */
613 for (i = 0; i < iso_run->length; i++)
614 {
615 if (*iso_run->ppcls[i] == ET || *iso_run->ppcls[i] == ES || *iso_run->ppcls[i] == CS || *iso_run->ppcls[i] == ON)
616 {
617 int b = i-1;
618 int f = i+1;
619 if (b > -1 && *iso_run->ppcls[b] == BN)
620 *iso_run->ppcls[b] = ON;
621 if (f < iso_run->length && *iso_run->ppcls[f] == BN)
622 *iso_run->ppcls[f] = ON;
623
624 *iso_run->ppcls[i] = ON;
625 }
626 }
627
628 /* W7 */
629 for (i = 0; i < iso_run->length; i++)
630 {
631 if (*iso_run->ppcls[i] == EN)
632 {
633 int j;
634 for (j = iso_previousValidChar(iso_run, i); j > -1; j = iso_previousValidChar(iso_run, j))
635 if (*iso_run->ppcls[j] == R || *iso_run->ppcls[j] == L)
636 {
637 if (*iso_run->ppcls[j] == L)
638 *iso_run->ppcls[i] = L;
639 break;
640 }
641 if (iso_run->sos == L && j == -1)
642 *iso_run->ppcls[i] = L;
643 }
644 }
645 }
646
647 /*------------------------------------------------------------------------
648 Function: resolveNeutrals
649
650 Resolves the directionality of neutral character types.
651
652 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
653
654 Input: Array of embedding levels
655 Character count
656 Baselevel
657
658 In/Out: Array of directional classes
659
660 Note: On input only these directional classes are expected
661 R, L, NI, AN, EN and BN
662
663 W8 resolves a number of ENs to L
664 ------------------------------------------------------------------------*/
665 static void resolveNeutrals(IsolatedRun *iso_run)
666 {
667 int i;
668
669 /* Translate isolates into NI */
670 for (i = 0; i < iso_run->length; i++)
671 {
672 if (*iso_run->ppcls[i] >= LRI)
673 *iso_run->ppcls[i] = NI;
674
675 switch(*iso_run->ppcls[i])
676 {
677 case B:
678 case S:
679 case WS: *iso_run->ppcls[i] = NI;
680 }
681
682 ASSERT(*iso_run->ppcls[i] < 5 || *iso_run->ppcls[i] == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */
683 }
684
685 /* N0: Skipping bracketed pairs for now */
686
687 /* N1 */
688 for (i = 0; i < iso_run->length; i++)
689 {
690 WORD l,r;
691
692 if (*iso_run->ppcls[i] == NI)
693 {
694 int j;
695 int b = iso_previousValidChar(iso_run, i);
696
697 if (b == -1)
698 {
699 l = iso_run->sos;
700 b = 0;
701 }
702 else
703 {
704 if (*iso_run->ppcls[b] == R || *iso_run->ppcls[b] == AN || *iso_run->ppcls[b] == EN)
705 l = R;
706 else if (*iso_run->ppcls[b] == L)
707 l = L;
708 else /* No string type */
709 continue;
710 }
711 j = iso_nextValidChar(iso_run, i);
712 while (j > -1 && *iso_run->ppcls[j] == NI) j = iso_nextValidChar(iso_run, j);
713
714 if (j == -1)
715 {
716 r = iso_run->eos;
717 j = iso_run->length;
718 }
719 else if (*iso_run->ppcls[j] == R || *iso_run->ppcls[j] == AN || *iso_run->ppcls[j] == EN)
720 r = R;
721 else if (*iso_run->ppcls[j] == L)
722 r = L;
723 else /* No string type */
724 continue;
725
726 if (r == l)
727 {
728 for (b = i; b < j && b < iso_run->length; b++)
729 *iso_run->ppcls[b] = r;
730 }
731 }
732 }
733
734 /* N2 */
735 for (i = 0; i < iso_run->length; i++)
736 {
737 if (*iso_run->ppcls[i] == NI)
738 {
739 int b = i-1;
740 int f = i+1;
741 *iso_run->ppcls[i] = EmbeddingDirection(iso_run->e);
742 if (b > -1 && *iso_run->ppcls[b] == BN)
743 *iso_run->ppcls[b] = EmbeddingDirection(iso_run->e);
744 if (f < iso_run->length && *iso_run->ppcls[f] == BN)
745 *iso_run->ppcls[f] = EmbeddingDirection(iso_run->e);
746 }
747 }
748 }
749
750 /*------------------------------------------------------------------------
751 Function: resolveImplicit
752
753 Recursively resolves implicit embedding levels.
754 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
755
756 Input: Array of direction classes
757 Character count
758 Base level
759
760 In/Out: Array of embedding levels
761
762 Note: levels may exceed 15 on output.
763 Accepted subset of direction classes
764 R, L, AN, EN
765 ------------------------------------------------------------------------*/
766 static void resolveImplicit(const WORD * pcls, WORD *plevel, int sos, int eos)
767 {
768 int i;
769
770 /* I1/2 */
771 for (i = sos; i <= eos; i++)
772 {
773 if (pcls[i] == BN)
774 continue;
775
776 ASSERT(pcls[i] > 0); /* "No Neutrals allowed to survive here." */
777 ASSERT(pcls[i] < 5); /* "Out of range." */
778
779 if (odd(plevel[i]) && (pcls[i] == L || pcls[i] == EN || pcls [i] == AN))
780 plevel[i]++;
781 else if (!odd(plevel[i]) && pcls[i] == R)
782 plevel[i]++;
783 else if (!odd(plevel[i]) && (pcls[i] == EN || pcls [i] == AN))
784 plevel[i]+=2;
785 }
786 }
787
788 static void resolveResolved(unsigned baselevel, const WORD * pcls, WORD *plevel, int sos, int eos)
789 {
790 int i;
791
792 /* L1 */
793 for (i = sos; i <= eos; i++)
794 {
795 if (pcls[i] == B || pcls[i] == S)
796 {
797 int j = i -1;
798 while (i > sos && j >= sos &&
799 (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
800 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
801 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
802 plevel[j--] = baselevel;
803 plevel[i] = baselevel;
804 }
805 if (i == eos &&
806 (pcls[i] == WS || pcls[i] == FSI || pcls[i] == LRI || pcls[i] == RLI ||
807 pcls[i] == PDI || pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO ||
808 pcls[i] == RLO || pcls[i] == PDF || pcls[i] == BN ))
809 {
810 int j = i;
811 while (j >= sos && (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
812 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
813 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
814 plevel[j--] = baselevel;
815 }
816 }
817 }
818
819 static void computeIsolatingRunsSet(unsigned baselevel, WORD *pcls, WORD *pLevel, int uCount, struct list *set)
820 {
821 int run_start, run_end, i;
822 Run runs[uCount];
823 int run_count = 0;
824 IsolatedRun *current_isolated;
825
826 list_init(set);
827
828 /* Build Runs */
829 run_start = 0;
830 while (run_start < uCount)
831 {
832 run_end = nextValidChar(pcls, run_start, uCount);
833 while (run_end < uCount && pLevel[run_end] == pLevel[run_start]) run_end = nextValidChar(pcls, run_end, uCount);
834 run_end --;
835 runs[run_count].start = run_start;
836 runs[run_count].end = run_end;
837 runs[run_count].e = pLevel[run_start];
838 run_start = nextValidChar(pcls, run_end, uCount);
839 run_count++;
840 }
841
842 /* Build Isolating Runs */
843 i = 0;
844 while (i < run_count)
845 {
846 int k = i;
847 if (runs[k].start >= 0)
848 {
849 int type_fence, real_end;
850 int j;
851 current_isolated = HeapAlloc(GetProcessHeap(), 0, sizeof(IsolatedRun) + sizeof(WORD*)*uCount);
852
853 run_start = runs[k].start;
854 current_isolated->e = runs[k].e;
855 current_isolated->length = (runs[k].end - runs[k].start)+1;
856
857 for (j = 0; j < current_isolated->length; j++)
858 current_isolated->ppcls[j] = &pcls[runs[k].start+j];
859
860 run_end = runs[k].end;
861
862 TRACE("{ [%i -- %i]",run_start, run_end);
863
864 if (pcls[run_end] == BN)
865 run_end = previousValidChar(pcls, run_end, runs[k].start);
866
867 while (run_end < uCount && (pcls[run_end] == RLI || pcls[run_end] == LRI || pcls[run_end] == FSI))
868 {
869 j = k+1;
870 search:
871 while (j < run_count && pcls[runs[j].start] != PDI) j++;
872 if (j < run_count && runs[i].e != runs[j].e)
873 {
874 j++;
875 goto search;
876 }
877
878 if (j != run_count)
879 {
880 int m;
881 int l = current_isolated->length;
882
883 current_isolated->length += (runs[j].end - runs[j].start)+1;
884 for (m = 0; l < current_isolated->length; l++, m++)
885 current_isolated->ppcls[l] = &pcls[runs[j].start+m];
886
887 TRACE("[%i -- %i]",runs[j].start, runs[j].end);
888
889 run_end = runs[j].end;
890 if (pcls[run_end] == BN)
891 run_end = previousValidChar(pcls, run_end, runs[i].start);
892 runs[j].start = -1;
893 k = j;
894 }
895 else
896 {
897 run_end = uCount;
898 break;
899 }
900 }
901
902 type_fence = previousValidChar(pcls, run_start, -1);
903
904 if (type_fence == -1)
905 current_isolated->sos = (baselevel > pLevel[run_start])?baselevel:pLevel[run_start];
906 else
907 current_isolated->sos = (pLevel[type_fence] > pLevel[run_start])?pLevel[type_fence]:pLevel[run_start];
908
909 current_isolated->sos = EmbeddingDirection(current_isolated->sos);
910
911 if (run_end == uCount)
912 current_isolated->eos = current_isolated->sos;
913 else
914 {
915 /* eos could be an BN */
916 if ( pcls[run_end] == BN )
917 {
918 real_end = previousValidChar(pcls, run_end, run_start-1);
919 if (real_end < run_start)
920 real_end = run_start;
921 }
922 else
923 real_end = run_end;
924
925 type_fence = nextValidChar(pcls, run_end, uCount);
926 if (type_fence == uCount)
927 current_isolated->eos = (baselevel > pLevel[real_end])?baselevel:pLevel[real_end];
928 else
929 current_isolated->eos = (pLevel[type_fence] > pLevel[real_end])?pLevel[type_fence]:pLevel[real_end];
930
931 current_isolated->eos = EmbeddingDirection(current_isolated->eos);
932 }
933
934 list_add_tail(set, &current_isolated->entry);
935 TRACE(" } level %i {%s <--> %s}\n",current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
936 }
937 i++;
938 }
939 }
940
941 /*************************************************************
942 * BIDI_DeterminLevels
943 */
944 BOOL BIDI_DetermineLevels(
945 LPCWSTR lpString, /* [in] The string for which information is to be returned */
946 INT uCount, /* [in] Number of WCHARs in string. */
947 const SCRIPT_STATE *s,
948 const SCRIPT_CONTROL *c,
949 WORD *lpOutLevels /* [out] final string levels */
950 )
951 {
952 WORD *chartype;
953 unsigned baselevel = 0;
954 struct list IsolatingRuns;
955 IsolatedRun *iso_run, *next;
956
957 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
958
959 chartype = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(WORD));
960 if (!chartype)
961 {
962 WARN("Out of memory\n");
963 return FALSE;
964 }
965
966 baselevel = s->uBidiLevel;
967
968 classify(lpString, chartype, uCount, c);
969 if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount);
970
971 /* resolve explicit */
972 resolveExplicit(baselevel, chartype, lpOutLevels, uCount);
973 if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount);
974
975 /* X10/BD13: Computer Isolating runs */
976 computeIsolatingRunsSet(baselevel, chartype, lpOutLevels, uCount, &IsolatingRuns);
977
978 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
979 {
980 if (TRACE_ON(bidi)) iso_dump_types("Run", iso_run);
981
982 /* resolve weak */
983 resolveWeak(iso_run);
984 if (TRACE_ON(bidi)) iso_dump_types("After Weak", iso_run);
985
986 /* resolve neutrals */
987 resolveNeutrals(iso_run);
988 if (TRACE_ON(bidi)) iso_dump_types("After Neutrals", iso_run);
989
990 list_remove(&iso_run->entry);
991 HeapFree(GetProcessHeap(),0,iso_run);
992 }
993
994 if (TRACE_ON(bidi)) dump_types("Before Implicit", chartype, 0, uCount);
995 /* resolveImplicit */
996 resolveImplicit(chartype, lpOutLevels, 0, uCount-1);
997
998 /* resolveResolvedLevels*/
999 classify(lpString, chartype, uCount, c);
1000 resolveResolved(baselevel, chartype, lpOutLevels, 0, uCount-1);
1001
1002 HeapFree(GetProcessHeap(), 0, chartype);
1003 return TRUE;
1004 }
1005
1006 /* reverse cch indexes */
1007 static void reverse(int *pidx, int cch)
1008 {
1009 int temp;
1010 int ich = 0;
1011 for (; ich < --cch; ich++)
1012 {
1013 temp = pidx[ich];
1014 pidx[ich] = pidx[cch];
1015 pidx[cch] = temp;
1016 }
1017 }
1018
1019
1020 /*------------------------------------------------------------------------
1021 Functions: reorder/reorderLevel
1022
1023 Recursively reorders the display string
1024 "From the highest level down, reverse all characters at that level and
1025 higher, down to the lowest odd level"
1026
1027 Implements rule L2 of the Unicode bidi Algorithm.
1028
1029 Input: Array of embedding levels
1030 Character count
1031 Flag enabling reversal (set to false by initial caller)
1032
1033 In/Out: Text to reorder
1034
1035 Note: levels may exceed 15 resp. 61 on input.
1036
1037 Rule L3 - reorder combining marks is not implemented here
1038 Rule L4 - glyph mirroring is implemented as a display option below
1039
1040 Note: this should be applied a line at a time
1041 -------------------------------------------------------------------------*/
1042 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1043 {
1044 int ich = 0;
1045
1046 /* true as soon as first odd level encountered */
1047 fReverse = fReverse || odd(level);
1048
1049 for (; ich < cch; ich++)
1050 {
1051 if (plevel[ich] < level)
1052 {
1053 break;
1054 }
1055 else if (plevel[ich] > level)
1056 {
1057 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
1058 cch - ich, fReverse) - 1;
1059 }
1060 }
1061 if (fReverse)
1062 {
1063 reverse(pIndexs, ich);
1064 }
1065 return ich;
1066 }
1067
1068 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1069 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1070 {
1071 int ich = 0;
1072 int newlevel = -1;
1073
1074 /* true as soon as first odd level encountered */
1075 fReverse = fReverse || odd(level);
1076
1077 for (; ich < cch; ich++)
1078 {
1079 if (plevel[ich] < level)
1080 break;
1081 else if (plevel[ich] > level)
1082 newlevel = ich;
1083 }
1084 if (fReverse)
1085 {
1086 reverse(pIndexs, ich);
1087 }
1088
1089 if (newlevel >= 0)
1090 {
1091 ich = 0;
1092 for (; ich < cch; ich++)
1093 if (plevel[ich] < level)
1094 break;
1095 else if (plevel[ich] > level)
1096 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
1097 cch - ich, fReverse) - 1;
1098 }
1099
1100 return ich;
1101 }
1102
1103 BOOL BIDI_GetStrengths(LPCWSTR lpString, INT uCount, const SCRIPT_CONTROL *c,
1104 WORD* lpStrength)
1105 {
1106 int i;
1107 classify(lpString, lpStrength, uCount, c);
1108
1109 for ( i = 0; i < uCount; i++)
1110 {
1111 switch(lpStrength[i])
1112 {
1113 case L:
1114 case LRE:
1115 case LRO:
1116 case R:
1117 case AL:
1118 case RLE:
1119 case RLO:
1120 lpStrength[i] = BIDI_STRONG;
1121 break;
1122 case PDF:
1123 case EN:
1124 case ES:
1125 case ET:
1126 case AN:
1127 case CS:
1128 case BN:
1129 lpStrength[i] = BIDI_WEAK;
1130 break;
1131 case B:
1132 case S:
1133 case WS:
1134 case ON:
1135 default: /* Neutrals and NSM */
1136 lpStrength[i] = BIDI_NEUTRAL;
1137 }
1138 }
1139 return TRUE;
1140 }