Create the AHCI branch for Aman's work
[reactos.git] / dll / win32 / usp10 / bidi.c
1 /*
2 * Uniscribe BiDirectional handling
3 *
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 *
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
25 *
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
27 *
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
42 */
43
44 #include <windef.h>
45
46 #include <wine/list.h>
47
48 #include "usp10_internal.h"
49
50 extern const unsigned short bidi_bracket_table[];
51
52 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
53
54 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
55 #define MAX_DEPTH 125
56
57 /* HELPER FUNCTIONS AND DECLARATIONS */
58
59 /*------------------------------------------------------------------------
60 Bidirectional Character Types
61
62 as defined by the Unicode Bidirectional Algorithm Table 3-7.
63
64 Note:
65
66 The list of bidirectional character types here is not grouped the
67 same way as the table 3-7, since the numberic values for the types
68 are chosen to keep the state and action tables compact.
69 ------------------------------------------------------------------------*/
70 enum directions
71 {
72 /* input types */
73 /* ON MUST be zero, code relies on ON = NI = 0 */
74 ON = 0, /* Other Neutral */
75 L, /* Left Letter */
76 R, /* Right Letter */
77 AN, /* Arabic Number */
78 EN, /* European Number */
79 AL, /* Arabic Letter (Right-to-left) */
80 NSM, /* Non-spacing Mark */
81 CS, /* Common Separator */
82 ES, /* European Separator */
83 ET, /* European Terminator (post/prefix e.g. $ and %) */
84
85 /* resolved types */
86 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
87
88 /* input types, */
89 S, /* Segment Separator (TAB) // used only in L1 */
90 WS, /* White space // used only in L1 */
91 B, /* Paragraph Separator (aka as PS) */
92
93 /* types for explicit controls */
94 RLO, /* these are used only in X1-X9 */
95 RLE,
96 LRO,
97 LRE,
98 PDF,
99
100 LRI, /* Isolate formatting characters new with 6.3 */
101 RLI,
102 FSI,
103 PDI,
104
105 /* resolved types, also resolved directions */
106 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
107 };
108
109 static const char debug_type[][4] =
110 {
111 "ON", /* Other Neutral */
112 "L", /* Left Letter */
113 "R", /* Right Letter */
114 "AN", /* Arabic Number */
115 "EN", /* European Number */
116 "AL", /* Arabic Letter (Right-to-left) */
117 "NSM", /* Non-spacing Mark */
118 "CS", /* Common Separator */
119 "ES", /* European Separator */
120 "ET", /* European Terminator (post/prefix e.g. $ and %) */
121 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
122 "S", /* Segment Separator (TAB) // used only in L1 */
123 "WS", /* White space // used only in L1 */
124 "B", /* Paragraph Separator (aka as PS) */
125 "RLO", /* these are used only in X1-X9 */
126 "RLE",
127 "LRO",
128 "LRE",
129 "PDF",
130 "LRI", /* Isolate formatting characters new with 6.3 */
131 "RLI",
132 "FSI",
133 "PDI",
134 };
135
136 /* HELPER FUNCTIONS */
137
138 static inline void dump_types(const char* header, WORD *types, int start, int end)
139 {
140 int i, len = 0;
141 TRACE("%s:",header);
142 for (i = start; i < end && len < 200; i++)
143 {
144 TRACE(" %s",debug_type[types[i]]);
145 len += strlen(debug_type[types[i]])+1;
146 }
147 if (i != end)
148 TRACE("...");
149 TRACE("\n");
150 }
151
152 /* Convert the libwine information to the direction enum */
153 static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIPT_CONTROL *c)
154 {
155 static const enum directions dir_map[16] =
156 {
157 L, /* unassigned defaults to L */
158 L,
159 R,
160 EN,
161 ES,
162 ET,
163 AN,
164 CS,
165 B,
166 S,
167 WS,
168 ON,
169 AL,
170 NSM,
171 BN,
172 PDF /* also LRE, LRO, RLE, RLO */
173 };
174
175 unsigned i;
176
177 for (i = 0; i < uCount; ++i)
178 {
179 chartype[i] = dir_map[get_char_typeW(lpString[i]) >> 12];
180 switch (chartype[i])
181 {
182 case ES:
183 if (!c->fLegacyBidiClass) break;
184 switch (lpString[i])
185 {
186 case '-':
187 case '+': chartype[i] = NI; break;
188 case '/': chartype[i] = CS; break;
189 }
190 break;
191 case PDF:
192 switch (lpString[i])
193 {
194 case 0x202A: chartype[i] = LRE; break;
195 case 0x202B: chartype[i] = RLE; break;
196 case 0x202C: chartype[i] = PDF; break;
197 case 0x202D: chartype[i] = LRO; break;
198 case 0x202E: chartype[i] = RLO; break;
199 case 0x2066: chartype[i] = LRI; break;
200 case 0x2067: chartype[i] = RLI; break;
201 case 0x2068: chartype[i] = FSI; break;
202 case 0x2069: chartype[i] = PDI; break;
203 }
204 break;
205 }
206 }
207 }
208
209 /* RESOLVE EXPLICIT */
210
211 static WORD GreaterEven(int i)
212 {
213 return odd(i) ? i + 1 : i + 2;
214 }
215
216 static WORD GreaterOdd(int i)
217 {
218 return odd(i) ? i + 2 : i + 1;
219 }
220
221 static WORD EmbeddingDirection(int level)
222 {
223 return odd(level) ? R : L;
224 }
225
226 /*------------------------------------------------------------------------
227 Function: resolveExplicit
228
229 Recursively resolves explicit embedding levels and overrides.
230 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
231
232 Input: Base embedding level and direction
233 Character count
234
235 Output: Array of embedding levels
236
237 In/Out: Array of direction classes
238
239
240 Note: The function uses two simple counters to keep track of
241 matching explicit codes and PDF. Use the default argument for
242 the outermost call. The nesting counter counts the recursion
243 depth and not the embedding level.
244 ------------------------------------------------------------------------*/
245 typedef struct tagStackItem {
246 int level;
247 int override;
248 BOOL isolate;
249 } StackItem;
250
251 #define push_stack(l,o,i) \
252 do { stack_top--; \
253 stack[stack_top].level = l; \
254 stack[stack_top].override = o; \
255 stack[stack_top].isolate = i;} while(0)
256
257 #define pop_stack() do { stack_top++; } while(0)
258
259 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
260
261 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, WORD *poutOverrides, int count, BOOL initialOverride)
262 {
263 /* X1 */
264 int overflow_isolate_count = 0;
265 int overflow_embedding_count = 0;
266 int valid_isolate_count = 0;
267 int i;
268
269 StackItem stack[MAX_DEPTH+2];
270 int stack_top = MAX_DEPTH+1;
271
272 stack[stack_top].level = level;
273 stack[stack_top].override = NI;
274 stack[stack_top].isolate = FALSE;
275
276 if (initialOverride)
277 {
278 if (odd(level))
279 push_stack(level, R, FALSE);
280 else
281 push_stack(level, L, FALSE);
282 }
283
284 for (i = 0; i < count; i++)
285 {
286 poutOverrides[i] = stack[stack_top].override;
287
288 /* X2 */
289 if (pclass[i] == RLE)
290 {
291 int least_odd = GreaterOdd(stack[stack_top].level);
292 poutLevel[i] = stack[stack_top].level;
293 if (valid_level(least_odd))
294 push_stack(least_odd, NI, FALSE);
295 else if (overflow_isolate_count == 0)
296 overflow_embedding_count++;
297 }
298 /* X3 */
299 else if (pclass[i] == LRE)
300 {
301 int least_even = GreaterEven(stack[stack_top].level);
302 poutLevel[i] = stack[stack_top].level;
303 if (valid_level(least_even))
304 push_stack(least_even, NI, FALSE);
305 else if (overflow_isolate_count == 0)
306 overflow_embedding_count++;
307 }
308 /* X4 */
309 else if (pclass[i] == RLO)
310 {
311 int least_odd = GreaterOdd(stack[stack_top].level);
312 poutLevel[i] = stack[stack_top].level;
313 if (valid_level(least_odd))
314 push_stack(least_odd, R, FALSE);
315 else if (overflow_isolate_count == 0)
316 overflow_embedding_count++;
317 }
318 /* X5 */
319 else if (pclass[i] == LRO)
320 {
321 int least_even = GreaterEven(stack[stack_top].level);
322 poutLevel[i] = stack[stack_top].level;
323 if (valid_level(least_even))
324 push_stack(least_even, L, FALSE);
325 else if (overflow_isolate_count == 0)
326 overflow_embedding_count++;
327 }
328 /* X5a */
329 else if (pclass[i] == RLI)
330 {
331 int least_odd = GreaterOdd(stack[stack_top].level);
332 poutLevel[i] = stack[stack_top].level;
333 if (valid_level(least_odd))
334 {
335 valid_isolate_count++;
336 push_stack(least_odd, NI, TRUE);
337 }
338 else
339 overflow_isolate_count++;
340 }
341 /* X5b */
342 else if (pclass[i] == LRI)
343 {
344 int least_even = GreaterEven(stack[stack_top].level);
345 poutLevel[i] = stack[stack_top].level;
346 if (valid_level(least_even))
347 {
348 valid_isolate_count++;
349 push_stack(least_even, NI, TRUE);
350 }
351 else
352 overflow_isolate_count++;
353 }
354 /* X5c */
355 else if (pclass[i] == FSI)
356 {
357 int j;
358 int new_level = 0;
359 int skipping = 0;
360 poutLevel[i] = stack[stack_top].level;
361 for (j = i+1; j < count; j++)
362 {
363 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI)
364 {
365 skipping++;
366 continue;
367 }
368 else if (pclass[j] == PDI)
369 {
370 if (skipping)
371 skipping --;
372 else
373 break;
374 continue;
375 }
376
377 if (skipping) continue;
378
379 if (pclass[j] == L)
380 {
381 new_level = 0;
382 break;
383 }
384 else if (pclass[j] == R || pclass[j] == AL)
385 {
386 new_level = 1;
387 break;
388 }
389 }
390 if (odd(new_level))
391 {
392 int least_odd = GreaterOdd(stack[stack_top].level);
393 if (valid_level(least_odd))
394 {
395 valid_isolate_count++;
396 push_stack(least_odd, NI, TRUE);
397 }
398 else
399 overflow_isolate_count++;
400 }
401 else
402 {
403 int least_even = GreaterEven(stack[stack_top].level);
404 if (valid_level(least_even))
405 {
406 valid_isolate_count++;
407 push_stack(least_even, NI, TRUE);
408 }
409 else
410 overflow_isolate_count++;
411 }
412 }
413 /* X6 */
414 else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF)
415 {
416 poutLevel[i] = stack[stack_top].level;
417 if (stack[stack_top].override != NI)
418 pclass[i] = stack[stack_top].override;
419 }
420 /* X6a */
421 else if (pclass[i] == PDI)
422 {
423 if (overflow_isolate_count) overflow_isolate_count--;
424 else if (!valid_isolate_count) {/* do nothing */}
425 else
426 {
427 overflow_embedding_count = 0;
428 while (!stack[stack_top].isolate) pop_stack();
429 pop_stack();
430 valid_isolate_count --;
431 }
432 poutLevel[i] = stack[stack_top].level;
433 }
434 /* X7 */
435 else if (pclass[i] == PDF)
436 {
437 poutLevel[i] = stack[stack_top].level;
438 if (overflow_isolate_count) {/* do nothing */}
439 else if (overflow_embedding_count) overflow_embedding_count--;
440 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
441 pop_stack();
442 }
443 /* X8: Nothing */
444 }
445 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
446 for (i = 0; i < count ; i++)
447 if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF)
448 pclass[i] = BN;
449 }
450
451 static inline int previousValidChar(const WORD *pcls, int index, int back_fence)
452 {
453 if (index == -1 || index == back_fence) return index;
454 index --;
455 while (index > back_fence && pcls[index] == BN) index --;
456 return index;
457 }
458
459 static inline int nextValidChar(const WORD *pcls, int index, int front_fence)
460 {
461 if (index == front_fence) return index;
462 index ++;
463 while (index < front_fence && pcls[index] == BN) index ++;
464 return index;
465 }
466
467 typedef struct tagRun
468 {
469 int start;
470 int end;
471 WORD e;
472 } Run;
473
474 typedef struct tagRunChar
475 {
476 WCHAR ch;
477 WORD *pcls;
478 } RunChar;
479
480 typedef struct tagIsolatedRun
481 {
482 struct list entry;
483 int length;
484 WORD sos;
485 WORD eos;
486 WORD e;
487
488 RunChar item[1];
489 } IsolatedRun;
490
491 static inline int iso_nextValidChar(IsolatedRun *iso_run, int index)
492 {
493 if (index >= (iso_run->length-1)) return -1;
494 index ++;
495 while (index < iso_run->length && *iso_run->item[index].pcls == BN) index++;
496 if (index == iso_run->length) return -1;
497 return index;
498 }
499
500 static inline int iso_previousValidChar(IsolatedRun *iso_run, int index)
501 {
502
503 if (index <= 0) return -1;
504 index --;
505 while (index > -1 && *iso_run->item[index].pcls == BN) index--;
506 return index;
507 }
508
509 static inline void iso_dump_types(const char* header, IsolatedRun *iso_run)
510 {
511 int i, len = 0;
512 TRACE("%s:",header);
513 TRACE("[ ");
514 for (i = 0; i < iso_run->length && len < 200; i++)
515 {
516 TRACE(" %s",debug_type[*iso_run->item[i].pcls]);
517 len += strlen(debug_type[*iso_run->item[i].pcls])+1;
518 }
519 if (i != iso_run->length)
520 TRACE("...");
521 TRACE(" ]\n");
522 }
523
524 /*------------------------------------------------------------------------
525 Function: resolveWeak
526
527 Resolves the directionality of numeric and other weak character types
528
529 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
530
531 Input: Array of embedding levels
532 Character count
533
534 In/Out: Array of directional classes
535
536 Note: On input only these directional classes are expected
537 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
538 ------------------------------------------------------------------------*/
539
540 static void resolveWeak(IsolatedRun * iso_run)
541 {
542 int i;
543
544 /* W1 */
545 for (i=0; i < iso_run->length; i++)
546 {
547 if (*iso_run->item[i].pcls == NSM)
548 {
549 int j = iso_previousValidChar(iso_run, i);
550 if (j == -1)
551 *iso_run->item[i].pcls = iso_run->sos;
552 else if (*iso_run->item[j].pcls >= LRI)
553 *iso_run->item[i].pcls = ON;
554 else
555 *iso_run->item[i].pcls = *iso_run->item[j].pcls;
556 }
557 }
558
559 /* W2 */
560 for (i = 0; i < iso_run->length; i++)
561 {
562 if (*iso_run->item[i].pcls == EN)
563 {
564 int j = iso_previousValidChar(iso_run, i);
565 while (j > -1)
566 {
567 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L || *iso_run->item[j].pcls == AL)
568 {
569 if (*iso_run->item[j].pcls == AL)
570 *iso_run->item[i].pcls = AN;
571 break;
572 }
573 j = iso_previousValidChar(iso_run, j);
574 }
575 }
576 }
577
578 /* W3 */
579 for (i = 0; i < iso_run->length; i++)
580 {
581 if (*iso_run->item[i].pcls == AL)
582 *iso_run->item[i].pcls = R;
583 }
584
585 /* W4 */
586 for (i = 0; i < iso_run->length; i++)
587 {
588 if (*iso_run->item[i].pcls == ES)
589 {
590 int b = iso_previousValidChar(iso_run, i);
591 int f = iso_nextValidChar(iso_run, i);
592
593 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
594 *iso_run->item[i].pcls = EN;
595 }
596 else if (*iso_run->item[i].pcls == CS)
597 {
598 int b = iso_previousValidChar(iso_run, i);
599 int f = iso_nextValidChar(iso_run, i);
600
601 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
602 *iso_run->item[i].pcls = EN;
603 else if (b > -1 && f > -1 && *iso_run->item[b].pcls == AN && *iso_run->item[f].pcls == AN)
604 *iso_run->item[i].pcls = AN;
605 }
606 }
607
608 /* W5 */
609 for (i = 0; i < iso_run->length; i++)
610 {
611 if (*iso_run->item[i].pcls == ET)
612 {
613 int j;
614 for (j = i-1 ; j > -1; j--)
615 {
616 if (*iso_run->item[j].pcls == BN) continue;
617 if (*iso_run->item[j].pcls == ET) continue;
618 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
619 else break;
620 }
621 if (*iso_run->item[i].pcls == ET)
622 {
623 for (j = i+1; j < iso_run->length; j++)
624 {
625 if (*iso_run->item[j].pcls == BN) continue;
626 if (*iso_run->item[j].pcls == ET) continue;
627 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
628 else break;
629 }
630 }
631 }
632 }
633
634 /* W6 */
635 for (i = 0; i < iso_run->length; i++)
636 {
637 if (*iso_run->item[i].pcls == ET || *iso_run->item[i].pcls == ES || *iso_run->item[i].pcls == CS || *iso_run->item[i].pcls == ON)
638 {
639 int b = i-1;
640 int f = i+1;
641 if (b > -1 && *iso_run->item[b].pcls == BN)
642 *iso_run->item[b].pcls = ON;
643 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
644 *iso_run->item[f].pcls = ON;
645
646 *iso_run->item[i].pcls = ON;
647 }
648 }
649
650 /* W7 */
651 for (i = 0; i < iso_run->length; i++)
652 {
653 if (*iso_run->item[i].pcls == EN)
654 {
655 int j;
656 for (j = iso_previousValidChar(iso_run, i); j > -1; j = iso_previousValidChar(iso_run, j))
657 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L)
658 {
659 if (*iso_run->item[j].pcls == L)
660 *iso_run->item[i].pcls = L;
661 break;
662 }
663 if (iso_run->sos == L && j == -1)
664 *iso_run->item[i].pcls = L;
665 }
666 }
667 }
668
669 typedef struct tagBracketPair
670 {
671 int start;
672 int end;
673 } BracketPair;
674
675 static int compr(const void *a, const void* b)
676 {
677 return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
678 }
679
680 static BracketPair *computeBracketPairs(IsolatedRun *iso_run)
681 {
682 WCHAR *open_stack;
683 int *stack_index;
684 int stack_top = iso_run->length;
685 BracketPair *out = NULL;
686 int pair_count = 0;
687 int i;
688
689 open_stack = HeapAlloc(GetProcessHeap(), 0, sizeof(WCHAR) * iso_run->length);
690 stack_index = HeapAlloc(GetProcessHeap(), 0, sizeof(int) * iso_run->length);
691
692 for (i = 0; i < iso_run->length; i++)
693 {
694 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
695 if (ubv)
696 {
697 if (!out)
698 {
699 out = HeapAlloc(GetProcessHeap(),0,sizeof(BracketPair));
700 out[0].start = -1;
701 }
702
703 if ((ubv >> 8) == 0)
704 {
705 stack_top --;
706 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
707 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
708 if (open_stack[stack_top] == 0x232A)
709 open_stack[stack_top] = 0x3009;
710 stack_index[stack_top] = i;
711 }
712 else if ((ubv >> 8) == 1)
713 {
714 int j;
715 if (stack_top == iso_run->length) continue;
716 for (j = stack_top; j < iso_run->length; j++)
717 {
718 WCHAR c = iso_run->item[i].ch;
719 if (c == 0x232A) c = 0x3009;
720 if (c == open_stack[j])
721 {
722 out[pair_count].start = stack_index[j];
723 out[pair_count].end = i;
724 pair_count++;
725 out = HeapReAlloc(GetProcessHeap(), 0, out, sizeof(BracketPair) * (pair_count+1));
726 out[pair_count].start = -1;
727 stack_top = j+1;
728 break;
729 }
730 }
731 }
732 }
733 }
734 if (pair_count == 0)
735 {
736 HeapFree(GetProcessHeap(),0,out);
737 out = NULL;
738 }
739 else if (pair_count > 1)
740 qsort(out, pair_count, sizeof(BracketPair), compr);
741
742 HeapFree(GetProcessHeap(), 0, open_stack);
743 HeapFree(GetProcessHeap(), 0, stack_index);
744 return out;
745 }
746
747 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
748
749 /*------------------------------------------------------------------------
750 Function: resolveNeutrals
751
752 Resolves the directionality of neutral character types.
753
754 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
755
756 Input: Array of embedding levels
757 Character count
758 Baselevel
759
760 In/Out: Array of directional classes
761
762 Note: On input only these directional classes are expected
763 R, L, NI, AN, EN and BN
764
765 W8 resolves a number of ENs to L
766 ------------------------------------------------------------------------*/
767 static void resolveNeutrals(IsolatedRun *iso_run)
768 {
769 int i;
770 BracketPair *pairs = NULL;
771
772 /* Translate isolates into NI */
773 for (i = 0; i < iso_run->length; i++)
774 {
775 if (*iso_run->item[i].pcls >= LRI)
776 *iso_run->item[i].pcls = NI;
777
778 switch(*iso_run->item[i].pcls)
779 {
780 case B:
781 case S:
782 case WS: *iso_run->item[i].pcls = NI;
783 }
784
785 ASSERT(*iso_run->item[i].pcls < 5 || *iso_run->item[i].pcls == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */
786 }
787
788 /* N0: Skipping bracketed pairs for now */
789 pairs = computeBracketPairs(iso_run);
790 if (pairs)
791 {
792 BracketPair *p = &pairs[0];
793 int i = 0;
794 while (p->start >= 0)
795 {
796 int j;
797 int e = EmbeddingDirection(iso_run->e);
798 int o = EmbeddingDirection(iso_run->e+1);
799 BOOL flag_o = FALSE;
800 TRACE("Bracket Pair [%i - %i]\n",p->start, p->end);
801
802 /* N0.b */
803 for (j = p->start+1; j < p->end; j++)
804 {
805 if (N0_TYPE(*iso_run->item[j].pcls) == e)
806 {
807 *iso_run->item[p->start].pcls = e;
808 *iso_run->item[p->end].pcls = e;
809 break;
810 }
811 else if (N0_TYPE(*iso_run->item[j].pcls) == o)
812 flag_o = TRUE;
813 }
814 /* N0.c */
815 if (j == p->end && flag_o)
816 {
817 for (j = p->start; j >= 0; j--)
818 {
819 if (N0_TYPE(*iso_run->item[j].pcls) == o)
820 {
821 *iso_run->item[p->start].pcls = o;
822 *iso_run->item[p->end].pcls = o;
823 break;
824 }
825 else if (N0_TYPE(*iso_run->item[j].pcls) == e)
826 {
827 *iso_run->item[p->start].pcls = e;
828 *iso_run->item[p->end].pcls = e;
829 break;
830 }
831 }
832 if ( j < 0 )
833 {
834 *iso_run->item[p->start].pcls = iso_run->sos;
835 *iso_run->item[p->end].pcls = iso_run->sos;
836 }
837 }
838
839 i++;
840 p = &pairs[i];
841 }
842 HeapFree(GetProcessHeap(),0,pairs);
843 }
844
845 /* N1 */
846 for (i = 0; i < iso_run->length; i++)
847 {
848 WORD l,r;
849
850 if (*iso_run->item[i].pcls == NI)
851 {
852 int j;
853 int b = iso_previousValidChar(iso_run, i);
854
855 if (b == -1)
856 {
857 l = iso_run->sos;
858 b = 0;
859 }
860 else
861 {
862 if (*iso_run->item[b].pcls == R || *iso_run->item[b].pcls == AN || *iso_run->item[b].pcls == EN)
863 l = R;
864 else if (*iso_run->item[b].pcls == L)
865 l = L;
866 else /* No string type */
867 continue;
868 }
869 j = iso_nextValidChar(iso_run, i);
870 while (j > -1 && *iso_run->item[j].pcls == NI) j = iso_nextValidChar(iso_run, j);
871
872 if (j == -1)
873 {
874 r = iso_run->eos;
875 j = iso_run->length;
876 }
877 else if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == AN || *iso_run->item[j].pcls == EN)
878 r = R;
879 else if (*iso_run->item[j].pcls == L)
880 r = L;
881 else /* No string type */
882 continue;
883
884 if (r == l)
885 {
886 for (b = i; b < j && b < iso_run->length; b++)
887 *iso_run->item[b].pcls = r;
888 }
889 }
890 }
891
892 /* N2 */
893 for (i = 0; i < iso_run->length; i++)
894 {
895 if (*iso_run->item[i].pcls == NI)
896 {
897 int b = i-1;
898 int f = i+1;
899 *iso_run->item[i].pcls = EmbeddingDirection(iso_run->e);
900 if (b > -1 && *iso_run->item[b].pcls == BN)
901 *iso_run->item[b].pcls = EmbeddingDirection(iso_run->e);
902 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
903 *iso_run->item[f].pcls = EmbeddingDirection(iso_run->e);
904 }
905 }
906 }
907
908 /*------------------------------------------------------------------------
909 Function: resolveImplicit
910
911 Recursively resolves implicit embedding levels.
912 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
913
914 Input: Array of direction classes
915 Character count
916 Base level
917
918 In/Out: Array of embedding levels
919
920 Note: levels may exceed 15 on output.
921 Accepted subset of direction classes
922 R, L, AN, EN
923 ------------------------------------------------------------------------*/
924 static void resolveImplicit(const WORD * pcls, WORD *plevel, int sos, int eos)
925 {
926 int i;
927
928 /* I1/2 */
929 for (i = sos; i <= eos; i++)
930 {
931 if (pcls[i] == BN)
932 continue;
933
934 ASSERT(pcls[i] > 0); /* "No Neutrals allowed to survive here." */
935 ASSERT(pcls[i] < 5); /* "Out of range." */
936
937 if (odd(plevel[i]) && (pcls[i] == L || pcls[i] == EN || pcls [i] == AN))
938 plevel[i]++;
939 else if (!odd(plevel[i]) && pcls[i] == R)
940 plevel[i]++;
941 else if (!odd(plevel[i]) && (pcls[i] == EN || pcls [i] == AN))
942 plevel[i]+=2;
943 }
944 }
945
946 static void resolveResolved(unsigned baselevel, const WORD * pcls, WORD *plevel, int sos, int eos)
947 {
948 int i;
949
950 /* L1 */
951 for (i = sos; i <= eos; i++)
952 {
953 if (pcls[i] == B || pcls[i] == S)
954 {
955 int j = i -1;
956 while (i > sos && j >= sos &&
957 (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
958 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
959 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
960 plevel[j--] = baselevel;
961 plevel[i] = baselevel;
962 }
963 if (i == eos &&
964 (pcls[i] == WS || pcls[i] == FSI || pcls[i] == LRI || pcls[i] == RLI ||
965 pcls[i] == PDI || pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO ||
966 pcls[i] == RLO || pcls[i] == PDF || pcls[i] == BN ))
967 {
968 int j = i;
969 while (j >= sos && (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
970 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
971 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
972 plevel[j--] = baselevel;
973 }
974 }
975 }
976
977 static void computeIsolatingRunsSet(unsigned baselevel, WORD *pcls, WORD *pLevel, LPCWSTR lpString, int uCount, struct list *set)
978 {
979 int run_start, run_end, i;
980 int run_count = 0;
981 Run *runs;
982 IsolatedRun *current_isolated;
983
984 runs = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(Run));
985 if (!runs) return;
986
987 list_init(set);
988
989 /* Build Runs */
990 run_start = 0;
991 while (run_start < uCount)
992 {
993 run_end = nextValidChar(pcls, run_start, uCount);
994 while (run_end < uCount && pLevel[run_end] == pLevel[run_start]) run_end = nextValidChar(pcls, run_end, uCount);
995 run_end --;
996 runs[run_count].start = run_start;
997 runs[run_count].end = run_end;
998 runs[run_count].e = pLevel[run_start];
999 run_start = nextValidChar(pcls, run_end, uCount);
1000 run_count++;
1001 }
1002
1003 /* Build Isolating Runs */
1004 i = 0;
1005 while (i < run_count)
1006 {
1007 int k = i;
1008 if (runs[k].start >= 0)
1009 {
1010 int type_fence, real_end;
1011 int j;
1012 current_isolated = HeapAlloc(GetProcessHeap(), 0, sizeof(IsolatedRun) + sizeof(RunChar)*uCount);
1013 if (!current_isolated) break;
1014
1015 run_start = runs[k].start;
1016 current_isolated->e = runs[k].e;
1017 current_isolated->length = (runs[k].end - runs[k].start)+1;
1018
1019 for (j = 0; j < current_isolated->length; j++)
1020 {
1021 current_isolated->item[j].pcls = &pcls[runs[k].start+j];
1022 current_isolated->item[j].ch = lpString[runs[k].start+j];
1023 }
1024
1025 run_end = runs[k].end;
1026
1027 TRACE("{ [%i -- %i]",run_start, run_end);
1028
1029 if (pcls[run_end] == BN)
1030 run_end = previousValidChar(pcls, run_end, runs[k].start);
1031
1032 while (run_end < uCount && (pcls[run_end] == RLI || pcls[run_end] == LRI || pcls[run_end] == FSI))
1033 {
1034 j = k+1;
1035 search:
1036 while (j < run_count && pcls[runs[j].start] != PDI) j++;
1037 if (j < run_count && runs[i].e != runs[j].e)
1038 {
1039 j++;
1040 goto search;
1041 }
1042
1043 if (j != run_count)
1044 {
1045 int m;
1046 int l = current_isolated->length;
1047
1048 current_isolated->length += (runs[j].end - runs[j].start)+1;
1049 for (m = 0; l < current_isolated->length; l++, m++)
1050 {
1051 current_isolated->item[l].pcls = &pcls[runs[j].start+m];
1052 current_isolated->item[l].ch = lpString[runs[j].start+m];
1053 }
1054
1055 TRACE("[%i -- %i]",runs[j].start, runs[j].end);
1056
1057 run_end = runs[j].end;
1058 if (pcls[run_end] == BN)
1059 run_end = previousValidChar(pcls, run_end, runs[i].start);
1060 runs[j].start = -1;
1061 k = j;
1062 }
1063 else
1064 {
1065 run_end = uCount;
1066 break;
1067 }
1068 }
1069
1070 type_fence = previousValidChar(pcls, run_start, -1);
1071
1072 if (type_fence == -1)
1073 current_isolated->sos = (baselevel > pLevel[run_start])?baselevel:pLevel[run_start];
1074 else
1075 current_isolated->sos = (pLevel[type_fence] > pLevel[run_start])?pLevel[type_fence]:pLevel[run_start];
1076
1077 current_isolated->sos = EmbeddingDirection(current_isolated->sos);
1078
1079 if (run_end == uCount)
1080 current_isolated->eos = current_isolated->sos;
1081 else
1082 {
1083 /* eos could be an BN */
1084 if ( pcls[run_end] == BN )
1085 {
1086 real_end = previousValidChar(pcls, run_end, run_start-1);
1087 if (real_end < run_start)
1088 real_end = run_start;
1089 }
1090 else
1091 real_end = run_end;
1092
1093 type_fence = nextValidChar(pcls, run_end, uCount);
1094 if (type_fence == uCount)
1095 current_isolated->eos = (baselevel > pLevel[real_end])?baselevel:pLevel[real_end];
1096 else
1097 current_isolated->eos = (pLevel[type_fence] > pLevel[real_end])?pLevel[type_fence]:pLevel[real_end];
1098
1099 current_isolated->eos = EmbeddingDirection(current_isolated->eos);
1100 }
1101
1102 list_add_tail(set, &current_isolated->entry);
1103 TRACE(" } level %i {%s <--> %s}\n",current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1104 }
1105 i++;
1106 }
1107
1108 HeapFree(GetProcessHeap(), 0, runs);
1109 }
1110
1111 /*************************************************************
1112 * BIDI_DeterminLevels
1113 */
1114 BOOL BIDI_DetermineLevels(
1115 LPCWSTR lpString, /* [in] The string for which information is to be returned */
1116 INT uCount, /* [in] Number of WCHARs in string. */
1117 const SCRIPT_STATE *s,
1118 const SCRIPT_CONTROL *c,
1119 WORD *lpOutLevels, /* [out] final string levels */
1120 WORD *lpOutOverrides /* [out] final string overrides */
1121 )
1122 {
1123 WORD *chartype;
1124 unsigned baselevel = 0;
1125 struct list IsolatingRuns;
1126 IsolatedRun *iso_run, *next;
1127
1128 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
1129
1130 chartype = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(WORD));
1131 if (!chartype)
1132 {
1133 WARN("Out of memory\n");
1134 return FALSE;
1135 }
1136
1137 baselevel = s->uBidiLevel;
1138
1139 classify(lpString, chartype, uCount, c);
1140 if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount);
1141
1142 memset(lpOutOverrides, 0, sizeof(WORD) * uCount);
1143
1144 /* resolve explicit */
1145 resolveExplicit(baselevel, chartype, lpOutLevels, lpOutOverrides, uCount, s->fOverrideDirection);
1146 if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount);
1147
1148 /* X10/BD13: Computer Isolating runs */
1149 computeIsolatingRunsSet(baselevel, chartype, lpOutLevels, lpString, uCount, &IsolatingRuns);
1150
1151 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1152 {
1153 if (TRACE_ON(bidi)) iso_dump_types("Run", iso_run);
1154
1155 /* resolve weak */
1156 resolveWeak(iso_run);
1157 if (TRACE_ON(bidi)) iso_dump_types("After Weak", iso_run);
1158
1159 /* resolve neutrals */
1160 resolveNeutrals(iso_run);
1161 if (TRACE_ON(bidi)) iso_dump_types("After Neutrals", iso_run);
1162
1163 list_remove(&iso_run->entry);
1164 HeapFree(GetProcessHeap(),0,iso_run);
1165 }
1166
1167 if (TRACE_ON(bidi)) dump_types("Before Implicit", chartype, 0, uCount);
1168 /* resolveImplicit */
1169 resolveImplicit(chartype, lpOutLevels, 0, uCount-1);
1170
1171 /* resolveResolvedLevels*/
1172 classify(lpString, chartype, uCount, c);
1173 resolveResolved(baselevel, chartype, lpOutLevels, 0, uCount-1);
1174
1175 HeapFree(GetProcessHeap(), 0, chartype);
1176 return TRUE;
1177 }
1178
1179 /* reverse cch indexes */
1180 static void reverse(int *pidx, int cch)
1181 {
1182 int temp;
1183 int ich = 0;
1184 for (; ich < --cch; ich++)
1185 {
1186 temp = pidx[ich];
1187 pidx[ich] = pidx[cch];
1188 pidx[cch] = temp;
1189 }
1190 }
1191
1192
1193 /*------------------------------------------------------------------------
1194 Functions: reorder/reorderLevel
1195
1196 Recursively reorders the display string
1197 "From the highest level down, reverse all characters at that level and
1198 higher, down to the lowest odd level"
1199
1200 Implements rule L2 of the Unicode bidi Algorithm.
1201
1202 Input: Array of embedding levels
1203 Character count
1204 Flag enabling reversal (set to false by initial caller)
1205
1206 In/Out: Text to reorder
1207
1208 Note: levels may exceed 15 resp. 61 on input.
1209
1210 Rule L3 - reorder combining marks is not implemented here
1211 Rule L4 - glyph mirroring is implemented as a display option below
1212
1213 Note: this should be applied a line at a time
1214 -------------------------------------------------------------------------*/
1215 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1216 {
1217 int ich = 0;
1218
1219 /* true as soon as first odd level encountered */
1220 fReverse = fReverse || odd(level);
1221
1222 for (; ich < cch; ich++)
1223 {
1224 if (plevel[ich] < level)
1225 {
1226 break;
1227 }
1228 else if (plevel[ich] > level)
1229 {
1230 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
1231 cch - ich, fReverse) - 1;
1232 }
1233 }
1234 if (fReverse)
1235 {
1236 reverse(pIndexs, ich);
1237 }
1238 return ich;
1239 }
1240
1241 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1242 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1243 {
1244 int ich = 0;
1245 int newlevel = -1;
1246
1247 /* true as soon as first odd level encountered */
1248 fReverse = fReverse || odd(level);
1249
1250 for (; ich < cch; ich++)
1251 {
1252 if (plevel[ich] < level)
1253 break;
1254 else if (plevel[ich] > level)
1255 newlevel = ich;
1256 }
1257 if (fReverse)
1258 {
1259 reverse(pIndexs, ich);
1260 }
1261
1262 if (newlevel >= 0)
1263 {
1264 ich = 0;
1265 for (; ich < cch; ich++)
1266 if (plevel[ich] < level)
1267 break;
1268 else if (plevel[ich] > level)
1269 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
1270 cch - ich, fReverse) - 1;
1271 }
1272
1273 return ich;
1274 }
1275
1276 BOOL BIDI_GetStrengths(LPCWSTR lpString, INT uCount, const SCRIPT_CONTROL *c,
1277 WORD* lpStrength)
1278 {
1279 int i;
1280 classify(lpString, lpStrength, uCount, c);
1281
1282 for ( i = 0; i < uCount; i++)
1283 {
1284 switch(lpStrength[i])
1285 {
1286 case L:
1287 case LRE:
1288 case LRO:
1289 case R:
1290 case AL:
1291 case RLE:
1292 case RLO:
1293 lpStrength[i] = BIDI_STRONG;
1294 break;
1295 case PDF:
1296 case EN:
1297 case ES:
1298 case ET:
1299 case AN:
1300 case CS:
1301 case BN:
1302 lpStrength[i] = BIDI_WEAK;
1303 break;
1304 case B:
1305 case S:
1306 case WS:
1307 case ON:
1308 default: /* Neutrals and NSM */
1309 lpStrength[i] = BIDI_NEUTRAL;
1310 }
1311 }
1312 return TRUE;
1313 }