[USP10] Sync with Wine Staging 3.3. CORE-14434
[reactos.git] / dll / win32 / usp10 / bidi.c
1 /*
2 * Uniscribe BiDirectional handling
3 *
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 *
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
25 *
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
27 *
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
42 */
43
44 #include "config.h"
45
46 #include <stdarg.h>
47 #include <stdlib.h>
48 #include "windef.h"
49 #include "winbase.h"
50 #include "wingdi.h"
51 #include "winnls.h"
52 #include "usp10.h"
53 #include "wine/unicode.h"
54 #include "wine/debug.h"
55 #include "wine/heap.h"
56 #include "wine/list.h"
57
58 #include "usp10_internal.h"
59
60 extern const unsigned short bidi_bracket_table[] DECLSPEC_HIDDEN;
61
62 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
63
64 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
65 #define MAX_DEPTH 125
66
67 /* HELPER FUNCTIONS AND DECLARATIONS */
68
69 /*------------------------------------------------------------------------
70 Bidirectional Character Types
71
72 as defined by the Unicode Bidirectional Algorithm Table 3-7.
73
74 Note:
75
76 The list of bidirectional character types here is not grouped the
77 same way as the table 3-7, since the numberic values for the types
78 are chosen to keep the state and action tables compact.
79 ------------------------------------------------------------------------*/
80 enum directions
81 {
82 /* input types */
83 /* ON MUST be zero, code relies on ON = NI = 0 */
84 ON = 0, /* Other Neutral */
85 L, /* Left Letter */
86 R, /* Right Letter */
87 AN, /* Arabic Number */
88 EN, /* European Number */
89 AL, /* Arabic Letter (Right-to-left) */
90 NSM, /* Non-spacing Mark */
91 CS, /* Common Separator */
92 ES, /* European Separator */
93 ET, /* European Terminator (post/prefix e.g. $ and %) */
94
95 /* resolved types */
96 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
97
98 /* input types, */
99 S, /* Segment Separator (TAB) // used only in L1 */
100 WS, /* White space // used only in L1 */
101 B, /* Paragraph Separator (aka as PS) */
102
103 /* types for explicit controls */
104 RLO, /* these are used only in X1-X9 */
105 RLE,
106 LRO,
107 LRE,
108 PDF,
109
110 LRI, /* Isolate formatting characters new with 6.3 */
111 RLI,
112 FSI,
113 PDI,
114
115 /* resolved types, also resolved directions */
116 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
117 };
118
119 static const char debug_type[][4] =
120 {
121 "ON", /* Other Neutral */
122 "L", /* Left Letter */
123 "R", /* Right Letter */
124 "AN", /* Arabic Number */
125 "EN", /* European Number */
126 "AL", /* Arabic Letter (Right-to-left) */
127 "NSM", /* Non-spacing Mark */
128 "CS", /* Common Separator */
129 "ES", /* European Separator */
130 "ET", /* European Terminator (post/prefix e.g. $ and %) */
131 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
132 "S", /* Segment Separator (TAB) // used only in L1 */
133 "WS", /* White space // used only in L1 */
134 "B", /* Paragraph Separator (aka as PS) */
135 "RLO", /* these are used only in X1-X9 */
136 "RLE",
137 "LRO",
138 "LRE",
139 "PDF",
140 "LRI", /* Isolate formatting characters new with 6.3 */
141 "RLI",
142 "FSI",
143 "PDI",
144 };
145
146 /* HELPER FUNCTIONS */
147
148 static inline void dump_types(const char* header, WORD *types, int start, int end)
149 {
150 int i, len = 0;
151 TRACE("%s:",header);
152 for (i = start; i < end && len < 200; i++)
153 {
154 TRACE(" %s",debug_type[types[i]]);
155 len += strlen(debug_type[types[i]])+1;
156 }
157 if (i != end)
158 TRACE("...");
159 TRACE("\n");
160 }
161
162 /* Convert the libwine information to the direction enum */
163 static void classify(const WCHAR *string, WORD *chartype, DWORD count, const SCRIPT_CONTROL *c)
164 {
165 static const enum directions dir_map[16] =
166 {
167 L, /* unassigned defaults to L */
168 L,
169 R,
170 EN,
171 ES,
172 ET,
173 AN,
174 CS,
175 B,
176 S,
177 WS,
178 ON,
179 AL,
180 NSM,
181 BN,
182 PDF /* also LRE, LRO, RLE, RLO */
183 };
184
185 unsigned i;
186
187 for (i = 0; i < count; ++i)
188 {
189 chartype[i] = dir_map[get_char_typeW(string[i]) >> 12];
190 switch (chartype[i])
191 {
192 case ES:
193 if (!c->fLegacyBidiClass) break;
194 switch (string[i])
195 {
196 case '-':
197 case '+': chartype[i] = NI; break;
198 case '/': chartype[i] = CS; break;
199 }
200 break;
201 case PDF:
202 switch (string[i])
203 {
204 case 0x202A: chartype[i] = LRE; break;
205 case 0x202B: chartype[i] = RLE; break;
206 case 0x202C: chartype[i] = PDF; break;
207 case 0x202D: chartype[i] = LRO; break;
208 case 0x202E: chartype[i] = RLO; break;
209 case 0x2066: chartype[i] = LRI; break;
210 case 0x2067: chartype[i] = RLI; break;
211 case 0x2068: chartype[i] = FSI; break;
212 case 0x2069: chartype[i] = PDI; break;
213 }
214 break;
215 }
216 }
217 }
218
219 /* RESOLVE EXPLICIT */
220
221 static WORD GreaterEven(int i)
222 {
223 return odd(i) ? i + 1 : i + 2;
224 }
225
226 static WORD GreaterOdd(int i)
227 {
228 return odd(i) ? i + 2 : i + 1;
229 }
230
231 static WORD EmbeddingDirection(int level)
232 {
233 return odd(level) ? R : L;
234 }
235
236 /*------------------------------------------------------------------------
237 Function: resolveExplicit
238
239 Recursively resolves explicit embedding levels and overrides.
240 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
241
242 Input: Base embedding level and direction
243 Character count
244
245 Output: Array of embedding levels
246
247 In/Out: Array of direction classes
248
249
250 Note: The function uses two simple counters to keep track of
251 matching explicit codes and PDF. Use the default argument for
252 the outermost call. The nesting counter counts the recursion
253 depth and not the embedding level.
254 ------------------------------------------------------------------------*/
255 typedef struct tagStackItem {
256 int level;
257 int override;
258 BOOL isolate;
259 } StackItem;
260
261 #define push_stack(l,o,i) \
262 do { stack_top--; \
263 stack[stack_top].level = l; \
264 stack[stack_top].override = o; \
265 stack[stack_top].isolate = i;} while(0)
266
267 #define pop_stack() do { stack_top++; } while(0)
268
269 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
270
271 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, WORD *poutOverrides, int count, BOOL initialOverride)
272 {
273 /* X1 */
274 int overflow_isolate_count = 0;
275 int overflow_embedding_count = 0;
276 int valid_isolate_count = 0;
277 int i;
278
279 StackItem stack[MAX_DEPTH+2];
280 int stack_top = MAX_DEPTH+1;
281
282 stack[stack_top].level = level;
283 stack[stack_top].override = NI;
284 stack[stack_top].isolate = FALSE;
285
286 if (initialOverride)
287 {
288 if (odd(level))
289 push_stack(level, R, FALSE);
290 else
291 push_stack(level, L, FALSE);
292 }
293
294 for (i = 0; i < count; i++)
295 {
296 poutOverrides[i] = stack[stack_top].override;
297
298 /* X2 */
299 if (pclass[i] == RLE)
300 {
301 int least_odd = GreaterOdd(stack[stack_top].level);
302 poutLevel[i] = stack[stack_top].level;
303 if (valid_level(least_odd))
304 push_stack(least_odd, NI, FALSE);
305 else if (overflow_isolate_count == 0)
306 overflow_embedding_count++;
307 }
308 /* X3 */
309 else if (pclass[i] == LRE)
310 {
311 int least_even = GreaterEven(stack[stack_top].level);
312 poutLevel[i] = stack[stack_top].level;
313 if (valid_level(least_even))
314 push_stack(least_even, NI, FALSE);
315 else if (overflow_isolate_count == 0)
316 overflow_embedding_count++;
317 }
318 /* X4 */
319 else if (pclass[i] == RLO)
320 {
321 int least_odd = GreaterOdd(stack[stack_top].level);
322 poutLevel[i] = stack[stack_top].level;
323 if (valid_level(least_odd))
324 push_stack(least_odd, R, FALSE);
325 else if (overflow_isolate_count == 0)
326 overflow_embedding_count++;
327 }
328 /* X5 */
329 else if (pclass[i] == LRO)
330 {
331 int least_even = GreaterEven(stack[stack_top].level);
332 poutLevel[i] = stack[stack_top].level;
333 if (valid_level(least_even))
334 push_stack(least_even, L, FALSE);
335 else if (overflow_isolate_count == 0)
336 overflow_embedding_count++;
337 }
338 /* X5a */
339 else if (pclass[i] == RLI)
340 {
341 int least_odd = GreaterOdd(stack[stack_top].level);
342 poutLevel[i] = stack[stack_top].level;
343 if (valid_level(least_odd))
344 {
345 valid_isolate_count++;
346 push_stack(least_odd, NI, TRUE);
347 }
348 else
349 overflow_isolate_count++;
350 }
351 /* X5b */
352 else if (pclass[i] == LRI)
353 {
354 int least_even = GreaterEven(stack[stack_top].level);
355 poutLevel[i] = stack[stack_top].level;
356 if (valid_level(least_even))
357 {
358 valid_isolate_count++;
359 push_stack(least_even, NI, TRUE);
360 }
361 else
362 overflow_isolate_count++;
363 }
364 /* X5c */
365 else if (pclass[i] == FSI)
366 {
367 int j;
368 int new_level = 0;
369 int skipping = 0;
370 poutLevel[i] = stack[stack_top].level;
371 for (j = i+1; j < count; j++)
372 {
373 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI)
374 {
375 skipping++;
376 continue;
377 }
378 else if (pclass[j] == PDI)
379 {
380 if (skipping)
381 skipping --;
382 else
383 break;
384 continue;
385 }
386
387 if (skipping) continue;
388
389 if (pclass[j] == L)
390 {
391 new_level = 0;
392 break;
393 }
394 else if (pclass[j] == R || pclass[j] == AL)
395 {
396 new_level = 1;
397 break;
398 }
399 }
400 if (odd(new_level))
401 {
402 int least_odd = GreaterOdd(stack[stack_top].level);
403 if (valid_level(least_odd))
404 {
405 valid_isolate_count++;
406 push_stack(least_odd, NI, TRUE);
407 }
408 else
409 overflow_isolate_count++;
410 }
411 else
412 {
413 int least_even = GreaterEven(stack[stack_top].level);
414 if (valid_level(least_even))
415 {
416 valid_isolate_count++;
417 push_stack(least_even, NI, TRUE);
418 }
419 else
420 overflow_isolate_count++;
421 }
422 }
423 /* X6 */
424 else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF)
425 {
426 poutLevel[i] = stack[stack_top].level;
427 if (stack[stack_top].override != NI)
428 pclass[i] = stack[stack_top].override;
429 }
430 /* X6a */
431 else if (pclass[i] == PDI)
432 {
433 if (overflow_isolate_count) overflow_isolate_count--;
434 else if (!valid_isolate_count) {/* do nothing */}
435 else
436 {
437 overflow_embedding_count = 0;
438 while (!stack[stack_top].isolate) pop_stack();
439 pop_stack();
440 valid_isolate_count --;
441 }
442 poutLevel[i] = stack[stack_top].level;
443 }
444 /* X7 */
445 else if (pclass[i] == PDF)
446 {
447 poutLevel[i] = stack[stack_top].level;
448 if (overflow_isolate_count) {/* do nothing */}
449 else if (overflow_embedding_count) overflow_embedding_count--;
450 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
451 pop_stack();
452 }
453 /* X8: Nothing */
454 }
455 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
456 for (i = 0; i < count ; i++)
457 if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF)
458 pclass[i] = BN;
459 }
460
461 static inline int previousValidChar(const WORD *pcls, int index, int back_fence)
462 {
463 if (index == -1 || index == back_fence) return index;
464 index --;
465 while (index > back_fence && pcls[index] == BN) index --;
466 return index;
467 }
468
469 static inline int nextValidChar(const WORD *pcls, int index, int front_fence)
470 {
471 if (index == front_fence) return index;
472 index ++;
473 while (index < front_fence && pcls[index] == BN) index ++;
474 return index;
475 }
476
477 typedef struct tagRun
478 {
479 int start;
480 int end;
481 WORD e;
482 } Run;
483
484 typedef struct tagRunChar
485 {
486 WCHAR ch;
487 WORD *pcls;
488 } RunChar;
489
490 typedef struct tagIsolatedRun
491 {
492 struct list entry;
493 int length;
494 WORD sos;
495 WORD eos;
496 WORD e;
497
498 RunChar item[1];
499 } IsolatedRun;
500
501 static inline int iso_nextValidChar(IsolatedRun *iso_run, int index)
502 {
503 if (index >= (iso_run->length-1)) return -1;
504 index ++;
505 while (index < iso_run->length && *iso_run->item[index].pcls == BN) index++;
506 if (index == iso_run->length) return -1;
507 return index;
508 }
509
510 static inline int iso_previousValidChar(IsolatedRun *iso_run, int index)
511 {
512
513 if (index <= 0) return -1;
514 index --;
515 while (index > -1 && *iso_run->item[index].pcls == BN) index--;
516 return index;
517 }
518
519 static inline void iso_dump_types(const char* header, IsolatedRun *iso_run)
520 {
521 int i, len = 0;
522 TRACE("%s:",header);
523 TRACE("[ ");
524 for (i = 0; i < iso_run->length && len < 200; i++)
525 {
526 TRACE(" %s",debug_type[*iso_run->item[i].pcls]);
527 len += strlen(debug_type[*iso_run->item[i].pcls])+1;
528 }
529 if (i != iso_run->length)
530 TRACE("...");
531 TRACE(" ]\n");
532 }
533
534 /*------------------------------------------------------------------------
535 Function: resolveWeak
536
537 Resolves the directionality of numeric and other weak character types
538
539 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
540
541 Input: Array of embedding levels
542 Character count
543
544 In/Out: Array of directional classes
545
546 Note: On input only these directional classes are expected
547 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
548 ------------------------------------------------------------------------*/
549
550 static void resolveWeak(IsolatedRun * iso_run)
551 {
552 int i;
553
554 /* W1 */
555 for (i=0; i < iso_run->length; i++)
556 {
557 if (*iso_run->item[i].pcls == NSM)
558 {
559 int j = iso_previousValidChar(iso_run, i);
560 if (j == -1)
561 *iso_run->item[i].pcls = iso_run->sos;
562 else if (*iso_run->item[j].pcls >= LRI)
563 *iso_run->item[i].pcls = ON;
564 else
565 *iso_run->item[i].pcls = *iso_run->item[j].pcls;
566 }
567 }
568
569 /* W2 */
570 for (i = 0; i < iso_run->length; i++)
571 {
572 if (*iso_run->item[i].pcls == EN)
573 {
574 int j = iso_previousValidChar(iso_run, i);
575 while (j > -1)
576 {
577 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L || *iso_run->item[j].pcls == AL)
578 {
579 if (*iso_run->item[j].pcls == AL)
580 *iso_run->item[i].pcls = AN;
581 break;
582 }
583 j = iso_previousValidChar(iso_run, j);
584 }
585 }
586 }
587
588 /* W3 */
589 for (i = 0; i < iso_run->length; i++)
590 {
591 if (*iso_run->item[i].pcls == AL)
592 *iso_run->item[i].pcls = R;
593 }
594
595 /* W4 */
596 for (i = 0; i < iso_run->length; i++)
597 {
598 if (*iso_run->item[i].pcls == ES)
599 {
600 int b = iso_previousValidChar(iso_run, i);
601 int f = iso_nextValidChar(iso_run, i);
602
603 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
604 *iso_run->item[i].pcls = EN;
605 }
606 else if (*iso_run->item[i].pcls == CS)
607 {
608 int b = iso_previousValidChar(iso_run, i);
609 int f = iso_nextValidChar(iso_run, i);
610
611 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
612 *iso_run->item[i].pcls = EN;
613 else if (b > -1 && f > -1 && *iso_run->item[b].pcls == AN && *iso_run->item[f].pcls == AN)
614 *iso_run->item[i].pcls = AN;
615 }
616 }
617
618 /* W5 */
619 for (i = 0; i < iso_run->length; i++)
620 {
621 if (*iso_run->item[i].pcls == ET)
622 {
623 int j;
624 for (j = i-1 ; j > -1; j--)
625 {
626 if (*iso_run->item[j].pcls == BN) continue;
627 if (*iso_run->item[j].pcls == ET) continue;
628 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
629 else break;
630 }
631 if (*iso_run->item[i].pcls == ET)
632 {
633 for (j = i+1; j < iso_run->length; j++)
634 {
635 if (*iso_run->item[j].pcls == BN) continue;
636 if (*iso_run->item[j].pcls == ET) continue;
637 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
638 else break;
639 }
640 }
641 }
642 }
643
644 /* W6 */
645 for (i = 0; i < iso_run->length; i++)
646 {
647 if (*iso_run->item[i].pcls == ET || *iso_run->item[i].pcls == ES || *iso_run->item[i].pcls == CS || *iso_run->item[i].pcls == ON)
648 {
649 int b = i-1;
650 int f = i+1;
651 if (b > -1 && *iso_run->item[b].pcls == BN)
652 *iso_run->item[b].pcls = ON;
653 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
654 *iso_run->item[f].pcls = ON;
655
656 *iso_run->item[i].pcls = ON;
657 }
658 }
659
660 /* W7 */
661 for (i = 0; i < iso_run->length; i++)
662 {
663 if (*iso_run->item[i].pcls == EN)
664 {
665 int j;
666 for (j = iso_previousValidChar(iso_run, i); j > -1; j = iso_previousValidChar(iso_run, j))
667 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L)
668 {
669 if (*iso_run->item[j].pcls == L)
670 *iso_run->item[i].pcls = L;
671 break;
672 }
673 if (iso_run->sos == L && j == -1)
674 *iso_run->item[i].pcls = L;
675 }
676 }
677 }
678
679 typedef struct tagBracketPair
680 {
681 int start;
682 int end;
683 } BracketPair;
684
685 static int compr(const void *a, const void* b)
686 {
687 return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
688 }
689
690 static BracketPair *computeBracketPairs(IsolatedRun *iso_run)
691 {
692 WCHAR *open_stack;
693 int *stack_index;
694 int stack_top = iso_run->length;
695 unsigned int pair_count = 0;
696 BracketPair *out = NULL;
697 SIZE_T out_size = 0;
698 int i;
699
700 open_stack = heap_alloc(iso_run->length * sizeof(*open_stack));
701 stack_index = heap_alloc(iso_run->length * sizeof(*stack_index));
702
703 for (i = 0; i < iso_run->length; i++)
704 {
705 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
706
707 if (!ubv)
708 continue;
709
710 if ((ubv >> 8) == 0)
711 {
712 --stack_top;
713 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
714 /* Deal with canonical equivalent U+2329/232A and U+3008/3009. */
715 if (open_stack[stack_top] == 0x232a)
716 open_stack[stack_top] = 0x3009;
717 stack_index[stack_top] = i;
718 }
719 else if ((ubv >> 8) == 1)
720 {
721 unsigned int j;
722
723 for (j = stack_top; j < iso_run->length; ++j)
724 {
725 WCHAR c = iso_run->item[i].ch;
726
727 if (c == 0x232a)
728 c = 0x3009;
729
730 if (c != open_stack[j])
731 continue;
732
733 if (!(usp10_array_reserve((void **)&out, &out_size, pair_count + 2, sizeof(*out))))
734 ERR("Failed to grow output array.\n");
735
736 out[pair_count].start = stack_index[j];
737 out[pair_count].end = i;
738 ++pair_count;
739
740 out[pair_count].start = -1;
741 stack_top = j + 1;
742 break;
743 }
744 }
745 }
746
747 heap_free(open_stack);
748 heap_free(stack_index);
749
750 if (!pair_count)
751 return NULL;
752
753 qsort(out, pair_count, sizeof(*out), compr);
754
755 return out;
756 }
757
758 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
759
760 /*------------------------------------------------------------------------
761 Function: resolveNeutrals
762
763 Resolves the directionality of neutral character types.
764
765 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
766
767 Input: Array of embedding levels
768 Character count
769 Baselevel
770
771 In/Out: Array of directional classes
772
773 Note: On input only these directional classes are expected
774 R, L, NI, AN, EN and BN
775
776 W8 resolves a number of ENs to L
777 ------------------------------------------------------------------------*/
778 static void resolveNeutrals(IsolatedRun *iso_run)
779 {
780 int i;
781 BracketPair *pairs = NULL;
782
783 /* Translate isolates into NI */
784 for (i = 0; i < iso_run->length; i++)
785 {
786 if (*iso_run->item[i].pcls >= LRI)
787 *iso_run->item[i].pcls = NI;
788
789 switch(*iso_run->item[i].pcls)
790 {
791 case B:
792 case S:
793 case WS: *iso_run->item[i].pcls = NI;
794 }
795
796 ASSERT(*iso_run->item[i].pcls < 5 || *iso_run->item[i].pcls == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */
797 }
798
799 /* N0: Skipping bracketed pairs for now */
800 pairs = computeBracketPairs(iso_run);
801 if (pairs)
802 {
803 BracketPair *p = &pairs[0];
804 int i = 0;
805 while (p->start >= 0)
806 {
807 int j;
808 int e = EmbeddingDirection(iso_run->e);
809 int o = EmbeddingDirection(iso_run->e+1);
810 BOOL flag_o = FALSE;
811 TRACE("Bracket Pair [%i - %i]\n",p->start, p->end);
812
813 /* N0.b */
814 for (j = p->start+1; j < p->end; j++)
815 {
816 if (N0_TYPE(*iso_run->item[j].pcls) == e)
817 {
818 *iso_run->item[p->start].pcls = e;
819 *iso_run->item[p->end].pcls = e;
820 break;
821 }
822 else if (N0_TYPE(*iso_run->item[j].pcls) == o)
823 flag_o = TRUE;
824 }
825 /* N0.c */
826 if (j == p->end && flag_o)
827 {
828 for (j = p->start; j >= 0; j--)
829 {
830 if (N0_TYPE(*iso_run->item[j].pcls) == o)
831 {
832 *iso_run->item[p->start].pcls = o;
833 *iso_run->item[p->end].pcls = o;
834 break;
835 }
836 else if (N0_TYPE(*iso_run->item[j].pcls) == e)
837 {
838 *iso_run->item[p->start].pcls = e;
839 *iso_run->item[p->end].pcls = e;
840 break;
841 }
842 }
843 if ( j < 0 )
844 {
845 *iso_run->item[p->start].pcls = iso_run->sos;
846 *iso_run->item[p->end].pcls = iso_run->sos;
847 }
848 }
849
850 i++;
851 p = &pairs[i];
852 }
853 heap_free(pairs);
854 }
855
856 /* N1 */
857 for (i = 0; i < iso_run->length; i++)
858 {
859 WORD l,r;
860
861 if (*iso_run->item[i].pcls == NI)
862 {
863 int j;
864 int b = iso_previousValidChar(iso_run, i);
865
866 if (b == -1)
867 {
868 l = iso_run->sos;
869 b = 0;
870 }
871 else
872 {
873 if (*iso_run->item[b].pcls == R || *iso_run->item[b].pcls == AN || *iso_run->item[b].pcls == EN)
874 l = R;
875 else if (*iso_run->item[b].pcls == L)
876 l = L;
877 else /* No string type */
878 continue;
879 }
880 j = iso_nextValidChar(iso_run, i);
881 while (j > -1 && *iso_run->item[j].pcls == NI) j = iso_nextValidChar(iso_run, j);
882
883 if (j == -1)
884 {
885 r = iso_run->eos;
886 j = iso_run->length;
887 }
888 else if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == AN || *iso_run->item[j].pcls == EN)
889 r = R;
890 else if (*iso_run->item[j].pcls == L)
891 r = L;
892 else /* No string type */
893 continue;
894
895 if (r == l)
896 {
897 for (b = i; b < j && b < iso_run->length; b++)
898 *iso_run->item[b].pcls = r;
899 }
900 }
901 }
902
903 /* N2 */
904 for (i = 0; i < iso_run->length; i++)
905 {
906 if (*iso_run->item[i].pcls == NI)
907 {
908 int b = i-1;
909 int f = i+1;
910 *iso_run->item[i].pcls = EmbeddingDirection(iso_run->e);
911 if (b > -1 && *iso_run->item[b].pcls == BN)
912 *iso_run->item[b].pcls = EmbeddingDirection(iso_run->e);
913 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
914 *iso_run->item[f].pcls = EmbeddingDirection(iso_run->e);
915 }
916 }
917 }
918
919 /*------------------------------------------------------------------------
920 Function: resolveImplicit
921
922 Recursively resolves implicit embedding levels.
923 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
924
925 Input: Array of direction classes
926 Character count
927 Base level
928
929 In/Out: Array of embedding levels
930
931 Note: levels may exceed 15 on output.
932 Accepted subset of direction classes
933 R, L, AN, EN
934 ------------------------------------------------------------------------*/
935 static void resolveImplicit(const WORD * pcls, WORD *plevel, int sos, int eos)
936 {
937 int i;
938
939 /* I1/2 */
940 for (i = sos; i <= eos; i++)
941 {
942 if (pcls[i] == BN)
943 continue;
944
945 ASSERT(pcls[i] > 0); /* "No Neutrals allowed to survive here." */
946 ASSERT(pcls[i] < 5); /* "Out of range." */
947
948 if (odd(plevel[i]) && (pcls[i] == L || pcls[i] == EN || pcls [i] == AN))
949 plevel[i]++;
950 else if (!odd(plevel[i]) && pcls[i] == R)
951 plevel[i]++;
952 else if (!odd(plevel[i]) && (pcls[i] == EN || pcls [i] == AN))
953 plevel[i]+=2;
954 }
955 }
956
957 static void resolveResolved(unsigned baselevel, const WORD * pcls, WORD *plevel, int sos, int eos)
958 {
959 int i;
960
961 /* L1 */
962 for (i = sos; i <= eos; i++)
963 {
964 if (pcls[i] == B || pcls[i] == S)
965 {
966 int j = i -1;
967 while (i > sos && j >= sos &&
968 (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
969 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
970 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
971 plevel[j--] = baselevel;
972 plevel[i] = baselevel;
973 }
974 else if (pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO || pcls[i] == RLO ||
975 pcls[i] == PDF || pcls[i] == BN)
976 {
977 plevel[i] = i ? plevel[i - 1] : baselevel;
978 }
979 if (i == eos &&
980 (pcls[i] == WS || pcls[i] == FSI || pcls[i] == LRI || pcls[i] == RLI ||
981 pcls[i] == PDI || pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO ||
982 pcls[i] == RLO || pcls[i] == PDF || pcls[i] == BN ))
983 {
984 int j = i;
985 while (j >= sos && (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
986 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
987 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
988 plevel[j--] = baselevel;
989 }
990 }
991 }
992
993 static void computeIsolatingRunsSet(unsigned baselevel, WORD *pcls, const WORD *pLevel,
994 const WCHAR *string, unsigned int uCount, struct list *set)
995 {
996 int run_start, run_end, i;
997 int run_count = 0;
998 Run *runs;
999 IsolatedRun *current_isolated;
1000
1001 if (!(runs = heap_alloc(uCount * sizeof(*runs))))
1002 return;
1003
1004 list_init(set);
1005
1006 /* Build Runs */
1007 run_start = 0;
1008 while (run_start < uCount)
1009 {
1010 run_end = nextValidChar(pcls, run_start, uCount);
1011 while (run_end < uCount && pLevel[run_end] == pLevel[run_start]) run_end = nextValidChar(pcls, run_end, uCount);
1012 run_end --;
1013 runs[run_count].start = run_start;
1014 runs[run_count].end = run_end;
1015 runs[run_count].e = pLevel[run_start];
1016 run_start = nextValidChar(pcls, run_end, uCount);
1017 run_count++;
1018 }
1019
1020 /* Build Isolating Runs */
1021 i = 0;
1022 while (i < run_count)
1023 {
1024 int k = i;
1025 if (runs[k].start >= 0)
1026 {
1027 int type_fence, real_end;
1028 int j;
1029
1030 if (!(current_isolated = heap_alloc(FIELD_OFFSET(IsolatedRun, item[uCount]))))
1031 break;
1032
1033 run_start = runs[k].start;
1034 current_isolated->e = runs[k].e;
1035 current_isolated->length = (runs[k].end - runs[k].start)+1;
1036
1037 for (j = 0; j < current_isolated->length; j++)
1038 {
1039 current_isolated->item[j].pcls = &pcls[runs[k].start+j];
1040 current_isolated->item[j].ch = string[runs[k].start + j];
1041 }
1042
1043 run_end = runs[k].end;
1044
1045 TRACE("{ [%i -- %i]",run_start, run_end);
1046
1047 if (pcls[run_end] == BN)
1048 run_end = previousValidChar(pcls, run_end, runs[k].start);
1049
1050 while (run_end < uCount && (pcls[run_end] == RLI || pcls[run_end] == LRI || pcls[run_end] == FSI))
1051 {
1052 j = k+1;
1053 search:
1054 while (j < run_count && pcls[runs[j].start] != PDI) j++;
1055 if (j < run_count && runs[i].e != runs[j].e)
1056 {
1057 j++;
1058 goto search;
1059 }
1060
1061 if (j != run_count)
1062 {
1063 int m;
1064 int l = current_isolated->length;
1065
1066 current_isolated->length += (runs[j].end - runs[j].start)+1;
1067 for (m = 0; l < current_isolated->length; l++, m++)
1068 {
1069 current_isolated->item[l].pcls = &pcls[runs[j].start+m];
1070 current_isolated->item[l].ch = string[runs[j].start + m];
1071 }
1072
1073 TRACE("[%i -- %i]",runs[j].start, runs[j].end);
1074
1075 run_end = runs[j].end;
1076 if (pcls[run_end] == BN)
1077 run_end = previousValidChar(pcls, run_end, runs[i].start);
1078 runs[j].start = -1;
1079 k = j;
1080 }
1081 else
1082 {
1083 run_end = uCount;
1084 break;
1085 }
1086 }
1087
1088 type_fence = previousValidChar(pcls, run_start, -1);
1089
1090 if (type_fence == -1)
1091 current_isolated->sos = (baselevel > pLevel[run_start])?baselevel:pLevel[run_start];
1092 else
1093 current_isolated->sos = (pLevel[type_fence] > pLevel[run_start])?pLevel[type_fence]:pLevel[run_start];
1094
1095 current_isolated->sos = EmbeddingDirection(current_isolated->sos);
1096
1097 if (run_end == uCount)
1098 current_isolated->eos = current_isolated->sos;
1099 else
1100 {
1101 /* eos could be an BN */
1102 if ( pcls[run_end] == BN )
1103 {
1104 real_end = previousValidChar(pcls, run_end, run_start-1);
1105 if (real_end < run_start)
1106 real_end = run_start;
1107 }
1108 else
1109 real_end = run_end;
1110
1111 type_fence = nextValidChar(pcls, run_end, uCount);
1112 if (type_fence == uCount)
1113 current_isolated->eos = (baselevel > pLevel[real_end])?baselevel:pLevel[real_end];
1114 else
1115 current_isolated->eos = (pLevel[type_fence] > pLevel[real_end])?pLevel[type_fence]:pLevel[real_end];
1116
1117 current_isolated->eos = EmbeddingDirection(current_isolated->eos);
1118 }
1119
1120 list_add_tail(set, &current_isolated->entry);
1121 TRACE(" } level %i {%s <--> %s}\n",current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1122 }
1123 i++;
1124 }
1125
1126 heap_free(runs);
1127 }
1128
1129 /*************************************************************
1130 * BIDI_DeterminLevels
1131 */
1132 BOOL BIDI_DetermineLevels(
1133 const WCHAR *lpString, /* [in] The string for which information is to be returned */
1134 unsigned int uCount, /* [in] Number of WCHARs in string. */
1135 const SCRIPT_STATE *s,
1136 const SCRIPT_CONTROL *c,
1137 WORD *lpOutLevels, /* [out] final string levels */
1138 WORD *lpOutOverrides /* [out] final string overrides */
1139 )
1140 {
1141 WORD *chartype;
1142 unsigned baselevel = 0;
1143 struct list IsolatingRuns;
1144 IsolatedRun *iso_run, *next;
1145
1146 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
1147
1148 if (!(chartype = heap_alloc(uCount * sizeof(*chartype))))
1149 {
1150 WARN("Out of memory\n");
1151 return FALSE;
1152 }
1153
1154 baselevel = s->uBidiLevel;
1155
1156 classify(lpString, chartype, uCount, c);
1157 if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount);
1158
1159 memset(lpOutOverrides, 0, sizeof(WORD) * uCount);
1160
1161 /* resolve explicit */
1162 resolveExplicit(baselevel, chartype, lpOutLevels, lpOutOverrides, uCount, s->fOverrideDirection);
1163 if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount);
1164
1165 /* X10/BD13: Computer Isolating runs */
1166 computeIsolatingRunsSet(baselevel, chartype, lpOutLevels, lpString, uCount, &IsolatingRuns);
1167
1168 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1169 {
1170 if (TRACE_ON(bidi)) iso_dump_types("Run", iso_run);
1171
1172 /* resolve weak */
1173 resolveWeak(iso_run);
1174 if (TRACE_ON(bidi)) iso_dump_types("After Weak", iso_run);
1175
1176 /* resolve neutrals */
1177 resolveNeutrals(iso_run);
1178 if (TRACE_ON(bidi)) iso_dump_types("After Neutrals", iso_run);
1179
1180 list_remove(&iso_run->entry);
1181 heap_free(iso_run);
1182 }
1183
1184 if (TRACE_ON(bidi)) dump_types("Before Implicit", chartype, 0, uCount);
1185 /* resolveImplicit */
1186 resolveImplicit(chartype, lpOutLevels, 0, uCount-1);
1187
1188 /* resolveResolvedLevels*/
1189 classify(lpString, chartype, uCount, c);
1190 resolveResolved(baselevel, chartype, lpOutLevels, 0, uCount-1);
1191
1192 heap_free(chartype);
1193 return TRUE;
1194 }
1195
1196 /* reverse cch indexes */
1197 static void reverse(int *pidx, int cch)
1198 {
1199 int temp;
1200 int ich = 0;
1201 for (; ich < --cch; ich++)
1202 {
1203 temp = pidx[ich];
1204 pidx[ich] = pidx[cch];
1205 pidx[cch] = temp;
1206 }
1207 }
1208
1209
1210 /*------------------------------------------------------------------------
1211 Functions: reorder/reorderLevel
1212
1213 Recursively reorders the display string
1214 "From the highest level down, reverse all characters at that level and
1215 higher, down to the lowest odd level"
1216
1217 Implements rule L2 of the Unicode bidi Algorithm.
1218
1219 Input: Array of embedding levels
1220 Character count
1221 Flag enabling reversal (set to false by initial caller)
1222
1223 In/Out: Text to reorder
1224
1225 Note: levels may exceed 15 resp. 61 on input.
1226
1227 Rule L3 - reorder combining marks is not implemented here
1228 Rule L4 - glyph mirroring is implemented as a display option below
1229
1230 Note: this should be applied a line at a time
1231 -------------------------------------------------------------------------*/
1232 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1233 {
1234 int ich = 0;
1235
1236 /* true as soon as first odd level encountered */
1237 fReverse = fReverse || odd(level);
1238
1239 for (; ich < cch; ich++)
1240 {
1241 if (plevel[ich] < level)
1242 {
1243 break;
1244 }
1245 else if (plevel[ich] > level)
1246 {
1247 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
1248 cch - ich, fReverse) - 1;
1249 }
1250 }
1251 if (fReverse)
1252 {
1253 reverse(pIndexs, ich);
1254 }
1255 return ich;
1256 }
1257
1258 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1259 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1260 {
1261 int ich = 0;
1262 int newlevel = -1;
1263
1264 /* true as soon as first odd level encountered */
1265 fReverse = fReverse || odd(level);
1266
1267 for (; ich < cch; ich++)
1268 {
1269 if (plevel[ich] < level)
1270 break;
1271 else if (plevel[ich] > level)
1272 newlevel = ich;
1273 }
1274 if (fReverse)
1275 {
1276 reverse(pIndexs, ich);
1277 }
1278
1279 if (newlevel >= 0)
1280 {
1281 ich = 0;
1282 for (; ich < cch; ich++)
1283 if (plevel[ich] < level)
1284 break;
1285 else if (plevel[ich] > level)
1286 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
1287 cch - ich, fReverse) - 1;
1288 }
1289
1290 return ich;
1291 }
1292
1293 BOOL BIDI_GetStrengths(const WCHAR *string, unsigned int count, const SCRIPT_CONTROL *c, WORD *strength)
1294 {
1295 unsigned int i;
1296
1297 classify(string, strength, count, c);
1298 for (i = 0; i < count; i++)
1299 {
1300 switch (strength[i])
1301 {
1302 case L:
1303 case LRE:
1304 case LRO:
1305 case R:
1306 case AL:
1307 case RLE:
1308 case RLO:
1309 strength[i] = BIDI_STRONG;
1310 break;
1311 case PDF:
1312 case EN:
1313 case ES:
1314 case ET:
1315 case AN:
1316 case CS:
1317 case BN:
1318 strength[i] = BIDI_WEAK;
1319 break;
1320 case B:
1321 case S:
1322 case WS:
1323 case ON:
1324 default: /* Neutrals and NSM */
1325 strength[i] = BIDI_NEUTRAL;
1326 }
1327 }
1328 return TRUE;
1329 }