Sync with trunk r65656.
[reactos.git] / dll / win32 / usp10 / bidi.c
1 /*
2 * Uniscribe BiDirectional handling
3 *
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 *
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
25 *
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
27 *
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
42 */
43
44 #include <windef.h>
45
46 #include <wine/list.h>
47
48 #include "usp10_internal.h"
49
50 extern const unsigned short bidi_bracket_table[];
51
52 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
53
54 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
55 #define MAX_DEPTH 125
56
57 /* HELPER FUNCTIONS AND DECLARATIONS */
58
59 /*------------------------------------------------------------------------
60 Bidirectional Character Types
61
62 as defined by the Unicode Bidirectional Algorithm Table 3-7.
63
64 Note:
65
66 The list of bidirectional character types here is not grouped the
67 same way as the table 3-7, since the numberic values for the types
68 are chosen to keep the state and action tables compact.
69 ------------------------------------------------------------------------*/
70 enum directions
71 {
72 /* input types */
73 /* ON MUST be zero, code relies on ON = NI = 0 */
74 ON = 0, /* Other Neutral */
75 L, /* Left Letter */
76 R, /* Right Letter */
77 AN, /* Arabic Number */
78 EN, /* European Number */
79 AL, /* Arabic Letter (Right-to-left) */
80 NSM, /* Non-spacing Mark */
81 CS, /* Common Separator */
82 ES, /* European Separator */
83 ET, /* European Terminator (post/prefix e.g. $ and %) */
84
85 /* resolved types */
86 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
87
88 /* input types, */
89 S, /* Segment Separator (TAB) // used only in L1 */
90 WS, /* White space // used only in L1 */
91 B, /* Paragraph Separator (aka as PS) */
92
93 /* types for explicit controls */
94 RLO, /* these are used only in X1-X9 */
95 RLE,
96 LRO,
97 LRE,
98 PDF,
99
100 LRI, /* Isolate formatting characters new with 6.3 */
101 RLI,
102 FSI,
103 PDI,
104
105 /* resolved types, also resolved directions */
106 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
107 };
108
109 static const char debug_type[][4] =
110 {
111 "ON", /* Other Neutral */
112 "L", /* Left Letter */
113 "R", /* Right Letter */
114 "AN", /* Arabic Number */
115 "EN", /* European Number */
116 "AL", /* Arabic Letter (Right-to-left) */
117 "NSM", /* Non-spacing Mark */
118 "CS", /* Common Separator */
119 "ES", /* European Separator */
120 "ET", /* European Terminator (post/prefix e.g. $ and %) */
121 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
122 "S", /* Segment Separator (TAB) // used only in L1 */
123 "WS", /* White space // used only in L1 */
124 "B", /* Paragraph Separator (aka as PS) */
125 "RLO", /* these are used only in X1-X9 */
126 "RLE",
127 "LRO",
128 "LRE",
129 "PDF",
130 "LRI", /* Isolate formatting characters new with 6.3 */
131 "RLI",
132 "FSI",
133 "PDI",
134 };
135
136 /* HELPER FUNCTIONS */
137
138 static inline void dump_types(const char* header, WORD *types, int start, int end)
139 {
140 int i, len = 0;
141 TRACE("%s:",header);
142 for (i = start; i < end && len < 200; i++)
143 {
144 TRACE(" %s",debug_type[types[i]]);
145 len += strlen(debug_type[types[i]])+1;
146 }
147 if (i != end)
148 TRACE("...");
149 TRACE("\n");
150 }
151
152 /* Convert the libwine information to the direction enum */
153 static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIPT_CONTROL *c)
154 {
155 static const enum directions dir_map[16] =
156 {
157 L, /* unassigned defaults to L */
158 L,
159 R,
160 EN,
161 ES,
162 ET,
163 AN,
164 CS,
165 B,
166 S,
167 WS,
168 ON,
169 AL,
170 NSM,
171 BN,
172 PDF /* also LRE, LRO, RLE, RLO */
173 };
174
175 unsigned i;
176
177 for (i = 0; i < uCount; ++i)
178 {
179 chartype[i] = dir_map[get_char_typeW(lpString[i]) >> 12];
180 switch (chartype[i])
181 {
182 case ES:
183 if (!c->fLegacyBidiClass) break;
184 switch (lpString[i])
185 {
186 case '-':
187 case '+': chartype[i] = NI; break;
188 case '/': chartype[i] = CS; break;
189 }
190 break;
191 case PDF:
192 switch (lpString[i])
193 {
194 case 0x202A: chartype[i] = LRE; break;
195 case 0x202B: chartype[i] = RLE; break;
196 case 0x202C: chartype[i] = PDF; break;
197 case 0x202D: chartype[i] = LRO; break;
198 case 0x202E: chartype[i] = RLO; break;
199 case 0x2066: chartype[i] = LRI; break;
200 case 0x2067: chartype[i] = RLI; break;
201 case 0x2068: chartype[i] = FSI; break;
202 case 0x2069: chartype[i] = PDI; break;
203 }
204 break;
205 }
206 }
207 }
208
209 /* RESOLVE EXPLICIT */
210
211 static WORD GreaterEven(int i)
212 {
213 return odd(i) ? i + 1 : i + 2;
214 }
215
216 static WORD GreaterOdd(int i)
217 {
218 return odd(i) ? i + 2 : i + 1;
219 }
220
221 static WORD EmbeddingDirection(int level)
222 {
223 return odd(level) ? R : L;
224 }
225
226 /*------------------------------------------------------------------------
227 Function: resolveExplicit
228
229 Recursively resolves explicit embedding levels and overrides.
230 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
231
232 Input: Base embedding level and direction
233 Character count
234
235 Output: Array of embedding levels
236
237 In/Out: Array of direction classes
238
239
240 Note: The function uses two simple counters to keep track of
241 matching explicit codes and PDF. Use the default argument for
242 the outermost call. The nesting counter counts the recursion
243 depth and not the embedding level.
244 ------------------------------------------------------------------------*/
245 typedef struct tagStackItem {
246 int level;
247 int override;
248 BOOL isolate;
249 } StackItem;
250
251 #define push_stack(l,o,i) \
252 do { stack_top--; \
253 stack[stack_top].level = l; \
254 stack[stack_top].override = o; \
255 stack[stack_top].isolate = i;} while(0)
256
257 #define pop_stack() do { stack_top++; } while(0)
258
259 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
260
261 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, int count)
262 {
263 /* X1 */
264 int overflow_isolate_count = 0;
265 int overflow_embedding_count = 0;
266 int valid_isolate_count = 0;
267 int i;
268
269 StackItem stack[MAX_DEPTH+2];
270 int stack_top = MAX_DEPTH+1;
271
272 stack[stack_top].level = level;
273 stack[stack_top].override = NI;
274 stack[stack_top].isolate = FALSE;
275
276 for (i = 0; i < count; i++)
277 {
278 /* X2 */
279 if (pclass[i] == RLE)
280 {
281 int least_odd = GreaterOdd(stack[stack_top].level);
282 poutLevel[i] = stack[stack_top].level;
283 if (valid_level(least_odd))
284 push_stack(least_odd, NI, FALSE);
285 else if (overflow_isolate_count == 0)
286 overflow_embedding_count++;
287 }
288 /* X3 */
289 else if (pclass[i] == LRE)
290 {
291 int least_even = GreaterEven(stack[stack_top].level);
292 poutLevel[i] = stack[stack_top].level;
293 if (valid_level(least_even))
294 push_stack(least_even, NI, FALSE);
295 else if (overflow_isolate_count == 0)
296 overflow_embedding_count++;
297 }
298 /* X4 */
299 else if (pclass[i] == RLO)
300 {
301 int least_odd = GreaterOdd(stack[stack_top].level);
302 poutLevel[i] = stack[stack_top].level;
303 if (valid_level(least_odd))
304 push_stack(least_odd, R, FALSE);
305 else if (overflow_isolate_count == 0)
306 overflow_embedding_count++;
307 }
308 /* X5 */
309 else if (pclass[i] == LRO)
310 {
311 int least_even = GreaterEven(stack[stack_top].level);
312 poutLevel[i] = stack[stack_top].level;
313 if (valid_level(least_even))
314 push_stack(least_even, L, FALSE);
315 else if (overflow_isolate_count == 0)
316 overflow_embedding_count++;
317 }
318 /* X5a */
319 else if (pclass[i] == RLI)
320 {
321 int least_odd = GreaterOdd(stack[stack_top].level);
322 poutLevel[i] = stack[stack_top].level;
323 if (valid_level(least_odd))
324 {
325 valid_isolate_count++;
326 push_stack(least_odd, NI, TRUE);
327 }
328 else
329 overflow_isolate_count++;
330 }
331 /* X5b */
332 else if (pclass[i] == LRI)
333 {
334 int least_even = GreaterEven(stack[stack_top].level);
335 poutLevel[i] = stack[stack_top].level;
336 if (valid_level(least_even))
337 {
338 valid_isolate_count++;
339 push_stack(least_even, NI, TRUE);
340 }
341 else
342 overflow_isolate_count++;
343 }
344 /* X5c */
345 else if (pclass[i] == FSI)
346 {
347 int j;
348 int new_level = 0;
349 int skipping = 0;
350 poutLevel[i] = stack[stack_top].level;
351 for (j = i+1; j < count; j++)
352 {
353 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI)
354 {
355 skipping++;
356 continue;
357 }
358 else if (pclass[j] == PDI)
359 {
360 if (skipping)
361 skipping --;
362 else
363 break;
364 continue;
365 }
366
367 if (skipping) continue;
368
369 if (pclass[j] == L)
370 {
371 new_level = 0;
372 break;
373 }
374 else if (pclass[j] == R || pclass[j] == AL)
375 {
376 new_level = 1;
377 break;
378 }
379 }
380 if (odd(new_level))
381 {
382 int least_odd = GreaterOdd(stack[stack_top].level);
383 if (valid_level(least_odd))
384 {
385 valid_isolate_count++;
386 push_stack(least_odd, NI, TRUE);
387 }
388 else
389 overflow_isolate_count++;
390 }
391 else
392 {
393 int least_even = GreaterEven(stack[stack_top].level);
394 if (valid_level(least_even))
395 {
396 valid_isolate_count++;
397 push_stack(least_even, NI, TRUE);
398 }
399 else
400 overflow_isolate_count++;
401 }
402 }
403 /* X6 */
404 else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF)
405 {
406 poutLevel[i] = stack[stack_top].level;
407 if (stack[stack_top].override != NI)
408 pclass[i] = stack[stack_top].override;
409 }
410 /* X6a */
411 else if (pclass[i] == PDI)
412 {
413 if (overflow_isolate_count) overflow_isolate_count--;
414 else if (!valid_isolate_count) {/* do nothing */}
415 else
416 {
417 overflow_embedding_count = 0;
418 while (!stack[stack_top].isolate) pop_stack();
419 pop_stack();
420 valid_isolate_count --;
421 }
422 poutLevel[i] = stack[stack_top].level;
423 }
424 /* X7 */
425 else if (pclass[i] == PDF)
426 {
427 poutLevel[i] = stack[stack_top].level;
428 if (overflow_isolate_count) {/* do nothing */}
429 else if (overflow_embedding_count) overflow_embedding_count--;
430 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
431 pop_stack();
432 }
433 /* X8: Nothing */
434 }
435 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
436 for (i = 0; i < count ; i++)
437 if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF)
438 pclass[i] = BN;
439 }
440
441 static inline int previousValidChar(const WORD *pcls, int index, int back_fence)
442 {
443 if (index == -1 || index == back_fence) return index;
444 index --;
445 while (index > back_fence && pcls[index] == BN) index --;
446 return index;
447 }
448
449 static inline int nextValidChar(const WORD *pcls, int index, int front_fence)
450 {
451 if (index == front_fence) return index;
452 index ++;
453 while (index < front_fence && pcls[index] == BN) index ++;
454 return index;
455 }
456
457 typedef struct tagRun
458 {
459 int start;
460 int end;
461 WORD e;
462 } Run;
463
464 typedef struct tagRunChar
465 {
466 WCHAR ch;
467 WORD *pcls;
468 } RunChar;
469
470 typedef struct tagIsolatedRun
471 {
472 struct list entry;
473 int length;
474 WORD sos;
475 WORD eos;
476 WORD e;
477
478 RunChar item[1];
479 } IsolatedRun;
480
481 static inline int iso_nextValidChar(IsolatedRun *iso_run, int index)
482 {
483 if (index >= (iso_run->length-1)) return -1;
484 index ++;
485 while (index < iso_run->length && *iso_run->item[index].pcls == BN) index++;
486 if (index == iso_run->length) return -1;
487 return index;
488 }
489
490 static inline int iso_previousValidChar(IsolatedRun *iso_run, int index)
491 {
492
493 if (index <= 0) return -1;
494 index --;
495 while (index > -1 && *iso_run->item[index].pcls == BN) index--;
496 return index;
497 }
498
499 static inline int iso_previousChar(IsolatedRun *iso_run, int index)
500 {
501 if (index <= 0) return -1;
502 return index --;
503 }
504
505 static inline void iso_dump_types(const char* header, IsolatedRun *iso_run)
506 {
507 int i, len = 0;
508 TRACE("%s:",header);
509 TRACE("[ ");
510 for (i = 0; i < iso_run->length && len < 200; i++)
511 {
512 TRACE(" %s",debug_type[*iso_run->item[i].pcls]);
513 len += strlen(debug_type[*iso_run->item[i].pcls])+1;
514 }
515 if (i != iso_run->length)
516 TRACE("...");
517 TRACE(" ]\n");
518 }
519
520 /*------------------------------------------------------------------------
521 Function: resolveWeak
522
523 Resolves the directionality of numeric and other weak character types
524
525 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
526
527 Input: Array of embedding levels
528 Character count
529
530 In/Out: Array of directional classes
531
532 Note: On input only these directional classes are expected
533 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
534 ------------------------------------------------------------------------*/
535
536 static void resolveWeak(IsolatedRun * iso_run)
537 {
538 int i;
539
540 /* W1 */
541 for (i=0; i < iso_run->length; i++)
542 {
543 if (*iso_run->item[i].pcls == NSM)
544 {
545 int j = iso_previousValidChar(iso_run, i);
546 if (j == -1)
547 *iso_run->item[i].pcls = iso_run->sos;
548 else if (*iso_run->item[j].pcls >= LRI)
549 *iso_run->item[i].pcls = ON;
550 else
551 *iso_run->item[i].pcls = *iso_run->item[j].pcls;
552 }
553 }
554
555 /* W2 */
556 for (i = 0; i < iso_run->length; i++)
557 {
558 if (*iso_run->item[i].pcls == EN)
559 {
560 int j = iso_previousValidChar(iso_run, i);
561 while (j > -1)
562 {
563 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L || *iso_run->item[j].pcls == AL)
564 {
565 if (*iso_run->item[j].pcls == AL)
566 *iso_run->item[i].pcls = AN;
567 break;
568 }
569 j = iso_previousValidChar(iso_run, j);
570 }
571 }
572 }
573
574 /* W3 */
575 for (i = 0; i < iso_run->length; i++)
576 {
577 if (*iso_run->item[i].pcls == AL)
578 *iso_run->item[i].pcls = R;
579 }
580
581 /* W4 */
582 for (i = 0; i < iso_run->length; i++)
583 {
584 if (*iso_run->item[i].pcls == ES)
585 {
586 int b = iso_previousValidChar(iso_run, i);
587 int f = iso_nextValidChar(iso_run, i);
588
589 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
590 *iso_run->item[i].pcls = EN;
591 }
592 else if (*iso_run->item[i].pcls == CS)
593 {
594 int b = iso_previousValidChar(iso_run, i);
595 int f = iso_nextValidChar(iso_run, i);
596
597 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
598 *iso_run->item[i].pcls = EN;
599 else if (b > -1 && f > -1 && *iso_run->item[b].pcls == AN && *iso_run->item[f].pcls == AN)
600 *iso_run->item[i].pcls = AN;
601 }
602 }
603
604 /* W5 */
605 for (i = 0; i < iso_run->length; i++)
606 {
607 if (*iso_run->item[i].pcls == ET)
608 {
609 int j;
610 for (j = i-1 ; j > -1; j--)
611 {
612 if (*iso_run->item[j].pcls == BN) continue;
613 if (*iso_run->item[j].pcls == ET) continue;
614 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
615 else break;
616 }
617 if (*iso_run->item[i].pcls == ET)
618 {
619 for (j = i+1; j < iso_run->length; j++)
620 {
621 if (*iso_run->item[j].pcls == BN) continue;
622 if (*iso_run->item[j].pcls == ET) continue;
623 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
624 else break;
625 }
626 }
627 }
628 }
629
630 /* W6 */
631 for (i = 0; i < iso_run->length; i++)
632 {
633 if (*iso_run->item[i].pcls == ET || *iso_run->item[i].pcls == ES || *iso_run->item[i].pcls == CS || *iso_run->item[i].pcls == ON)
634 {
635 int b = i-1;
636 int f = i+1;
637 if (b > -1 && *iso_run->item[b].pcls == BN)
638 *iso_run->item[b].pcls = ON;
639 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
640 *iso_run->item[f].pcls = ON;
641
642 *iso_run->item[i].pcls = ON;
643 }
644 }
645
646 /* W7 */
647 for (i = 0; i < iso_run->length; i++)
648 {
649 if (*iso_run->item[i].pcls == EN)
650 {
651 int j;
652 for (j = iso_previousValidChar(iso_run, i); j > -1; j = iso_previousValidChar(iso_run, j))
653 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L)
654 {
655 if (*iso_run->item[j].pcls == L)
656 *iso_run->item[i].pcls = L;
657 break;
658 }
659 if (iso_run->sos == L && j == -1)
660 *iso_run->item[i].pcls = L;
661 }
662 }
663 }
664
665 typedef struct tagBracketPair
666 {
667 int start;
668 int end;
669 } BracketPair;
670
671 static int compr(const void *a, const void* b)
672 {
673 return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
674 }
675
676 static BracketPair *computeBracketPairs(IsolatedRun *iso_run)
677 {
678 WCHAR *open_stack;
679 int *stack_index;
680 int stack_top = iso_run->length;
681 BracketPair *out = NULL;
682 int pair_count = 0;
683 int i;
684
685 open_stack = HeapAlloc(GetProcessHeap(), 0, sizeof(WCHAR) * iso_run->length);
686 stack_index = HeapAlloc(GetProcessHeap(), 0, sizeof(int) * iso_run->length);
687
688 for (i = 0; i < iso_run->length; i++)
689 {
690 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
691 if (ubv)
692 {
693 if (!out)
694 {
695 out = HeapAlloc(GetProcessHeap(),0,sizeof(BracketPair));
696 out[0].start = -1;
697 }
698
699 if ((ubv >> 8) == 0)
700 {
701 stack_top --;
702 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
703 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
704 if (open_stack[stack_top] == 0x232A)
705 open_stack[stack_top] = 0x3009;
706 stack_index[stack_top] = i;
707 }
708 else if ((ubv >> 8) == 1)
709 {
710 int j;
711 if (stack_top == iso_run->length) continue;
712 for (j = stack_top; j < iso_run->length; j++)
713 {
714 WCHAR c = iso_run->item[i].ch;
715 if (c == 0x232A) c = 0x3009;
716 if (c == open_stack[j])
717 {
718 out[pair_count].start = stack_index[j];
719 out[pair_count].end = i;
720 pair_count++;
721 out = HeapReAlloc(GetProcessHeap(), 0, out, sizeof(BracketPair) * (pair_count+1));
722 out[pair_count].start = -1;
723 stack_top = j+1;
724 break;
725 }
726 }
727 }
728 }
729 }
730 if (pair_count == 0)
731 {
732 HeapFree(GetProcessHeap(),0,out);
733 out = NULL;
734 }
735 else if (pair_count > 1)
736 qsort(out, pair_count, sizeof(BracketPair), compr);
737
738 HeapFree(GetProcessHeap(), 0, open_stack);
739 HeapFree(GetProcessHeap(), 0, stack_index);
740 return out;
741 }
742
743 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
744
745 /*------------------------------------------------------------------------
746 Function: resolveNeutrals
747
748 Resolves the directionality of neutral character types.
749
750 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
751
752 Input: Array of embedding levels
753 Character count
754 Baselevel
755
756 In/Out: Array of directional classes
757
758 Note: On input only these directional classes are expected
759 R, L, NI, AN, EN and BN
760
761 W8 resolves a number of ENs to L
762 ------------------------------------------------------------------------*/
763 static void resolveNeutrals(IsolatedRun *iso_run)
764 {
765 int i;
766 BracketPair *pairs = NULL;
767
768 /* Translate isolates into NI */
769 for (i = 0; i < iso_run->length; i++)
770 {
771 if (*iso_run->item[i].pcls >= LRI)
772 *iso_run->item[i].pcls = NI;
773
774 switch(*iso_run->item[i].pcls)
775 {
776 case B:
777 case S:
778 case WS: *iso_run->item[i].pcls = NI;
779 }
780
781 ASSERT(*iso_run->item[i].pcls < 5 || *iso_run->item[i].pcls == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */
782 }
783
784 /* N0: Skipping bracketed pairs for now */
785 pairs = computeBracketPairs(iso_run);
786 if (pairs)
787 {
788 BracketPair *p = &pairs[0];
789 int i = 0;
790 while (p->start >= 0)
791 {
792 int j;
793 int e = EmbeddingDirection(iso_run->e);
794 int o = EmbeddingDirection(iso_run->e+1);
795 BOOL flag_o = FALSE;
796 TRACE("Bracket Pair [%i - %i]\n",p->start, p->end);
797
798 /* N0.b */
799 for (j = p->start+1; j < p->end; j++)
800 {
801 if (N0_TYPE(*iso_run->item[j].pcls) == e)
802 {
803 *iso_run->item[p->start].pcls = e;
804 *iso_run->item[p->end].pcls = e;
805 break;
806 }
807 else if (N0_TYPE(*iso_run->item[j].pcls) == o)
808 flag_o = TRUE;
809 }
810 /* N0.c */
811 if (j == p->end && flag_o)
812 {
813 for (j = p->start; j >= 0; j--)
814 {
815 if (N0_TYPE(*iso_run->item[j].pcls) == o)
816 {
817 *iso_run->item[p->start].pcls = o;
818 *iso_run->item[p->end].pcls = o;
819 break;
820 }
821 else if (N0_TYPE(*iso_run->item[j].pcls) == e)
822 {
823 *iso_run->item[p->start].pcls = e;
824 *iso_run->item[p->end].pcls = e;
825 break;
826 }
827 }
828 if ( j < 0 )
829 {
830 *iso_run->item[p->start].pcls = iso_run->sos;
831 *iso_run->item[p->end].pcls = iso_run->sos;
832 }
833 }
834
835 i++;
836 p = &pairs[i];
837 }
838 HeapFree(GetProcessHeap(),0,pairs);
839 }
840
841 /* N1 */
842 for (i = 0; i < iso_run->length; i++)
843 {
844 WORD l,r;
845
846 if (*iso_run->item[i].pcls == NI)
847 {
848 int j;
849 int b = iso_previousValidChar(iso_run, i);
850
851 if (b == -1)
852 {
853 l = iso_run->sos;
854 b = 0;
855 }
856 else
857 {
858 if (*iso_run->item[b].pcls == R || *iso_run->item[b].pcls == AN || *iso_run->item[b].pcls == EN)
859 l = R;
860 else if (*iso_run->item[b].pcls == L)
861 l = L;
862 else /* No string type */
863 continue;
864 }
865 j = iso_nextValidChar(iso_run, i);
866 while (j > -1 && *iso_run->item[j].pcls == NI) j = iso_nextValidChar(iso_run, j);
867
868 if (j == -1)
869 {
870 r = iso_run->eos;
871 j = iso_run->length;
872 }
873 else if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == AN || *iso_run->item[j].pcls == EN)
874 r = R;
875 else if (*iso_run->item[j].pcls == L)
876 r = L;
877 else /* No string type */
878 continue;
879
880 if (r == l)
881 {
882 for (b = i; b < j && b < iso_run->length; b++)
883 *iso_run->item[b].pcls = r;
884 }
885 }
886 }
887
888 /* N2 */
889 for (i = 0; i < iso_run->length; i++)
890 {
891 if (*iso_run->item[i].pcls == NI)
892 {
893 int b = i-1;
894 int f = i+1;
895 *iso_run->item[i].pcls = EmbeddingDirection(iso_run->e);
896 if (b > -1 && *iso_run->item[b].pcls == BN)
897 *iso_run->item[b].pcls = EmbeddingDirection(iso_run->e);
898 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
899 *iso_run->item[f].pcls = EmbeddingDirection(iso_run->e);
900 }
901 }
902 }
903
904 /*------------------------------------------------------------------------
905 Function: resolveImplicit
906
907 Recursively resolves implicit embedding levels.
908 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
909
910 Input: Array of direction classes
911 Character count
912 Base level
913
914 In/Out: Array of embedding levels
915
916 Note: levels may exceed 15 on output.
917 Accepted subset of direction classes
918 R, L, AN, EN
919 ------------------------------------------------------------------------*/
920 static void resolveImplicit(const WORD * pcls, WORD *plevel, int sos, int eos)
921 {
922 int i;
923
924 /* I1/2 */
925 for (i = sos; i <= eos; i++)
926 {
927 if (pcls[i] == BN)
928 continue;
929
930 ASSERT(pcls[i] > 0); /* "No Neutrals allowed to survive here." */
931 ASSERT(pcls[i] < 5); /* "Out of range." */
932
933 if (odd(plevel[i]) && (pcls[i] == L || pcls[i] == EN || pcls [i] == AN))
934 plevel[i]++;
935 else if (!odd(plevel[i]) && pcls[i] == R)
936 plevel[i]++;
937 else if (!odd(plevel[i]) && (pcls[i] == EN || pcls [i] == AN))
938 plevel[i]+=2;
939 }
940 }
941
942 static void resolveResolved(unsigned baselevel, const WORD * pcls, WORD *plevel, int sos, int eos)
943 {
944 int i;
945
946 /* L1 */
947 for (i = sos; i <= eos; i++)
948 {
949 if (pcls[i] == B || pcls[i] == S)
950 {
951 int j = i -1;
952 while (i > sos && j >= sos &&
953 (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
954 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
955 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
956 plevel[j--] = baselevel;
957 plevel[i] = baselevel;
958 }
959 if (i == eos &&
960 (pcls[i] == WS || pcls[i] == FSI || pcls[i] == LRI || pcls[i] == RLI ||
961 pcls[i] == PDI || pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO ||
962 pcls[i] == RLO || pcls[i] == PDF || pcls[i] == BN ))
963 {
964 int j = i;
965 while (j >= sos && (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
966 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
967 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
968 plevel[j--] = baselevel;
969 }
970 }
971 }
972
973 static void computeIsolatingRunsSet(unsigned baselevel, WORD *pcls, WORD *pLevel, LPCWSTR lpString, int uCount, struct list *set)
974 {
975 int run_start, run_end, i;
976 int run_count = 0;
977 Run *runs;
978 IsolatedRun *current_isolated;
979
980 runs = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(Run));
981 if (!runs) return;
982
983 list_init(set);
984
985 /* Build Runs */
986 run_start = 0;
987 while (run_start < uCount)
988 {
989 run_end = nextValidChar(pcls, run_start, uCount);
990 while (run_end < uCount && pLevel[run_end] == pLevel[run_start]) run_end = nextValidChar(pcls, run_end, uCount);
991 run_end --;
992 runs[run_count].start = run_start;
993 runs[run_count].end = run_end;
994 runs[run_count].e = pLevel[run_start];
995 run_start = nextValidChar(pcls, run_end, uCount);
996 run_count++;
997 }
998
999 /* Build Isolating Runs */
1000 i = 0;
1001 while (i < run_count)
1002 {
1003 int k = i;
1004 if (runs[k].start >= 0)
1005 {
1006 int type_fence, real_end;
1007 int j;
1008 current_isolated = HeapAlloc(GetProcessHeap(), 0, sizeof(IsolatedRun) + sizeof(RunChar)*uCount);
1009 if (!current_isolated) break;
1010
1011 run_start = runs[k].start;
1012 current_isolated->e = runs[k].e;
1013 current_isolated->length = (runs[k].end - runs[k].start)+1;
1014
1015 for (j = 0; j < current_isolated->length; j++)
1016 {
1017 current_isolated->item[j].pcls = &pcls[runs[k].start+j];
1018 current_isolated->item[j].ch = lpString[runs[k].start+j];
1019 }
1020
1021 run_end = runs[k].end;
1022
1023 TRACE("{ [%i -- %i]",run_start, run_end);
1024
1025 if (pcls[run_end] == BN)
1026 run_end = previousValidChar(pcls, run_end, runs[k].start);
1027
1028 while (run_end < uCount && (pcls[run_end] == RLI || pcls[run_end] == LRI || pcls[run_end] == FSI))
1029 {
1030 j = k+1;
1031 search:
1032 while (j < run_count && pcls[runs[j].start] != PDI) j++;
1033 if (j < run_count && runs[i].e != runs[j].e)
1034 {
1035 j++;
1036 goto search;
1037 }
1038
1039 if (j != run_count)
1040 {
1041 int m;
1042 int l = current_isolated->length;
1043
1044 current_isolated->length += (runs[j].end - runs[j].start)+1;
1045 for (m = 0; l < current_isolated->length; l++, m++)
1046 {
1047 current_isolated->item[l].pcls = &pcls[runs[j].start+m];
1048 current_isolated->item[l].ch = lpString[runs[j].start+m];
1049 }
1050
1051 TRACE("[%i -- %i]",runs[j].start, runs[j].end);
1052
1053 run_end = runs[j].end;
1054 if (pcls[run_end] == BN)
1055 run_end = previousValidChar(pcls, run_end, runs[i].start);
1056 runs[j].start = -1;
1057 k = j;
1058 }
1059 else
1060 {
1061 run_end = uCount;
1062 break;
1063 }
1064 }
1065
1066 type_fence = previousValidChar(pcls, run_start, -1);
1067
1068 if (type_fence == -1)
1069 current_isolated->sos = (baselevel > pLevel[run_start])?baselevel:pLevel[run_start];
1070 else
1071 current_isolated->sos = (pLevel[type_fence] > pLevel[run_start])?pLevel[type_fence]:pLevel[run_start];
1072
1073 current_isolated->sos = EmbeddingDirection(current_isolated->sos);
1074
1075 if (run_end == uCount)
1076 current_isolated->eos = current_isolated->sos;
1077 else
1078 {
1079 /* eos could be an BN */
1080 if ( pcls[run_end] == BN )
1081 {
1082 real_end = previousValidChar(pcls, run_end, run_start-1);
1083 if (real_end < run_start)
1084 real_end = run_start;
1085 }
1086 else
1087 real_end = run_end;
1088
1089 type_fence = nextValidChar(pcls, run_end, uCount);
1090 if (type_fence == uCount)
1091 current_isolated->eos = (baselevel > pLevel[real_end])?baselevel:pLevel[real_end];
1092 else
1093 current_isolated->eos = (pLevel[type_fence] > pLevel[real_end])?pLevel[type_fence]:pLevel[real_end];
1094
1095 current_isolated->eos = EmbeddingDirection(current_isolated->eos);
1096 }
1097
1098 list_add_tail(set, &current_isolated->entry);
1099 TRACE(" } level %i {%s <--> %s}\n",current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1100 }
1101 i++;
1102 }
1103
1104 HeapFree(GetProcessHeap(), 0, runs);
1105 }
1106
1107 /*************************************************************
1108 * BIDI_DeterminLevels
1109 */
1110 BOOL BIDI_DetermineLevels(
1111 LPCWSTR lpString, /* [in] The string for which information is to be returned */
1112 INT uCount, /* [in] Number of WCHARs in string. */
1113 const SCRIPT_STATE *s,
1114 const SCRIPT_CONTROL *c,
1115 WORD *lpOutLevels /* [out] final string levels */
1116 )
1117 {
1118 WORD *chartype;
1119 unsigned baselevel = 0;
1120 struct list IsolatingRuns;
1121 IsolatedRun *iso_run, *next;
1122
1123 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
1124
1125 chartype = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(WORD));
1126 if (!chartype)
1127 {
1128 WARN("Out of memory\n");
1129 return FALSE;
1130 }
1131
1132 baselevel = s->uBidiLevel;
1133
1134 classify(lpString, chartype, uCount, c);
1135 if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount);
1136
1137 /* resolve explicit */
1138 resolveExplicit(baselevel, chartype, lpOutLevels, uCount);
1139 if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount);
1140
1141 /* X10/BD13: Computer Isolating runs */
1142 computeIsolatingRunsSet(baselevel, chartype, lpOutLevels, lpString, uCount, &IsolatingRuns);
1143
1144 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1145 {
1146 if (TRACE_ON(bidi)) iso_dump_types("Run", iso_run);
1147
1148 /* resolve weak */
1149 resolveWeak(iso_run);
1150 if (TRACE_ON(bidi)) iso_dump_types("After Weak", iso_run);
1151
1152 /* resolve neutrals */
1153 resolveNeutrals(iso_run);
1154 if (TRACE_ON(bidi)) iso_dump_types("After Neutrals", iso_run);
1155
1156 list_remove(&iso_run->entry);
1157 HeapFree(GetProcessHeap(),0,iso_run);
1158 }
1159
1160 if (TRACE_ON(bidi)) dump_types("Before Implicit", chartype, 0, uCount);
1161 /* resolveImplicit */
1162 resolveImplicit(chartype, lpOutLevels, 0, uCount-1);
1163
1164 /* resolveResolvedLevels*/
1165 classify(lpString, chartype, uCount, c);
1166 resolveResolved(baselevel, chartype, lpOutLevels, 0, uCount-1);
1167
1168 HeapFree(GetProcessHeap(), 0, chartype);
1169 return TRUE;
1170 }
1171
1172 /* reverse cch indexes */
1173 static void reverse(int *pidx, int cch)
1174 {
1175 int temp;
1176 int ich = 0;
1177 for (; ich < --cch; ich++)
1178 {
1179 temp = pidx[ich];
1180 pidx[ich] = pidx[cch];
1181 pidx[cch] = temp;
1182 }
1183 }
1184
1185
1186 /*------------------------------------------------------------------------
1187 Functions: reorder/reorderLevel
1188
1189 Recursively reorders the display string
1190 "From the highest level down, reverse all characters at that level and
1191 higher, down to the lowest odd level"
1192
1193 Implements rule L2 of the Unicode bidi Algorithm.
1194
1195 Input: Array of embedding levels
1196 Character count
1197 Flag enabling reversal (set to false by initial caller)
1198
1199 In/Out: Text to reorder
1200
1201 Note: levels may exceed 15 resp. 61 on input.
1202
1203 Rule L3 - reorder combining marks is not implemented here
1204 Rule L4 - glyph mirroring is implemented as a display option below
1205
1206 Note: this should be applied a line at a time
1207 -------------------------------------------------------------------------*/
1208 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1209 {
1210 int ich = 0;
1211
1212 /* true as soon as first odd level encountered */
1213 fReverse = fReverse || odd(level);
1214
1215 for (; ich < cch; ich++)
1216 {
1217 if (plevel[ich] < level)
1218 {
1219 break;
1220 }
1221 else if (plevel[ich] > level)
1222 {
1223 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
1224 cch - ich, fReverse) - 1;
1225 }
1226 }
1227 if (fReverse)
1228 {
1229 reverse(pIndexs, ich);
1230 }
1231 return ich;
1232 }
1233
1234 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1235 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1236 {
1237 int ich = 0;
1238 int newlevel = -1;
1239
1240 /* true as soon as first odd level encountered */
1241 fReverse = fReverse || odd(level);
1242
1243 for (; ich < cch; ich++)
1244 {
1245 if (plevel[ich] < level)
1246 break;
1247 else if (plevel[ich] > level)
1248 newlevel = ich;
1249 }
1250 if (fReverse)
1251 {
1252 reverse(pIndexs, ich);
1253 }
1254
1255 if (newlevel >= 0)
1256 {
1257 ich = 0;
1258 for (; ich < cch; ich++)
1259 if (plevel[ich] < level)
1260 break;
1261 else if (plevel[ich] > level)
1262 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
1263 cch - ich, fReverse) - 1;
1264 }
1265
1266 return ich;
1267 }
1268
1269 BOOL BIDI_GetStrengths(LPCWSTR lpString, INT uCount, const SCRIPT_CONTROL *c,
1270 WORD* lpStrength)
1271 {
1272 int i;
1273 classify(lpString, lpStrength, uCount, c);
1274
1275 for ( i = 0; i < uCount; i++)
1276 {
1277 switch(lpStrength[i])
1278 {
1279 case L:
1280 case LRE:
1281 case LRO:
1282 case R:
1283 case AL:
1284 case RLE:
1285 case RLO:
1286 lpStrength[i] = BIDI_STRONG;
1287 break;
1288 case PDF:
1289 case EN:
1290 case ES:
1291 case ET:
1292 case AN:
1293 case CS:
1294 case BN:
1295 lpStrength[i] = BIDI_WEAK;
1296 break;
1297 case B:
1298 case S:
1299 case WS:
1300 case ON:
1301 default: /* Neutrals and NSM */
1302 lpStrength[i] = BIDI_NEUTRAL;
1303 }
1304 }
1305 return TRUE;
1306 }