[URLMON_WINETEST] Sync with Wine Staging 1.9.4. CORE-10912
[reactos.git] / reactos / dll / win32 / usp10 / bidi.c
1 /*
2 * Uniscribe BiDirectional handling
3 *
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 *
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
25 *
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
27 *
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
42 */
43
44 #include <windef.h>
45
46 #include <wine/list.h>
47
48 #include "usp10_internal.h"
49
50 extern const unsigned short bidi_bracket_table[];
51
52 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
53
54 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
55 #define MAX_DEPTH 125
56
57 /* HELPER FUNCTIONS AND DECLARATIONS */
58
59 /*------------------------------------------------------------------------
60 Bidirectional Character Types
61
62 as defined by the Unicode Bidirectional Algorithm Table 3-7.
63
64 Note:
65
66 The list of bidirectional character types here is not grouped the
67 same way as the table 3-7, since the numberic values for the types
68 are chosen to keep the state and action tables compact.
69 ------------------------------------------------------------------------*/
70 enum directions
71 {
72 /* input types */
73 /* ON MUST be zero, code relies on ON = NI = 0 */
74 ON = 0, /* Other Neutral */
75 L, /* Left Letter */
76 R, /* Right Letter */
77 AN, /* Arabic Number */
78 EN, /* European Number */
79 AL, /* Arabic Letter (Right-to-left) */
80 NSM, /* Non-spacing Mark */
81 CS, /* Common Separator */
82 ES, /* European Separator */
83 ET, /* European Terminator (post/prefix e.g. $ and %) */
84
85 /* resolved types */
86 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
87
88 /* input types, */
89 S, /* Segment Separator (TAB) // used only in L1 */
90 WS, /* White space // used only in L1 */
91 B, /* Paragraph Separator (aka as PS) */
92
93 /* types for explicit controls */
94 RLO, /* these are used only in X1-X9 */
95 RLE,
96 LRO,
97 LRE,
98 PDF,
99
100 LRI, /* Isolate formatting characters new with 6.3 */
101 RLI,
102 FSI,
103 PDI,
104
105 /* resolved types, also resolved directions */
106 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
107 };
108
109 static const char debug_type[][4] =
110 {
111 "ON", /* Other Neutral */
112 "L", /* Left Letter */
113 "R", /* Right Letter */
114 "AN", /* Arabic Number */
115 "EN", /* European Number */
116 "AL", /* Arabic Letter (Right-to-left) */
117 "NSM", /* Non-spacing Mark */
118 "CS", /* Common Separator */
119 "ES", /* European Separator */
120 "ET", /* European Terminator (post/prefix e.g. $ and %) */
121 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
122 "S", /* Segment Separator (TAB) // used only in L1 */
123 "WS", /* White space // used only in L1 */
124 "B", /* Paragraph Separator (aka as PS) */
125 "RLO", /* these are used only in X1-X9 */
126 "RLE",
127 "LRO",
128 "LRE",
129 "PDF",
130 "LRI", /* Isolate formatting characters new with 6.3 */
131 "RLI",
132 "FSI",
133 "PDI",
134 };
135
136 /* HELPER FUNCTIONS */
137
138 static inline void dump_types(const char* header, WORD *types, int start, int end)
139 {
140 int i, len = 0;
141 TRACE("%s:",header);
142 for (i = start; i < end && len < 200; i++)
143 {
144 TRACE(" %s",debug_type[types[i]]);
145 len += strlen(debug_type[types[i]])+1;
146 }
147 if (i != end)
148 TRACE("...");
149 TRACE("\n");
150 }
151
152 /* Convert the libwine information to the direction enum */
153 static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIPT_CONTROL *c)
154 {
155 static const enum directions dir_map[16] =
156 {
157 L, /* unassigned defaults to L */
158 L,
159 R,
160 EN,
161 ES,
162 ET,
163 AN,
164 CS,
165 B,
166 S,
167 WS,
168 ON,
169 AL,
170 NSM,
171 BN,
172 PDF /* also LRE, LRO, RLE, RLO */
173 };
174
175 unsigned i;
176
177 for (i = 0; i < uCount; ++i)
178 {
179 chartype[i] = dir_map[get_char_typeW(lpString[i]) >> 12];
180 switch (chartype[i])
181 {
182 case ES:
183 if (!c->fLegacyBidiClass) break;
184 switch (lpString[i])
185 {
186 case '-':
187 case '+': chartype[i] = NI; break;
188 case '/': chartype[i] = CS; break;
189 }
190 break;
191 case PDF:
192 switch (lpString[i])
193 {
194 case 0x202A: chartype[i] = LRE; break;
195 case 0x202B: chartype[i] = RLE; break;
196 case 0x202C: chartype[i] = PDF; break;
197 case 0x202D: chartype[i] = LRO; break;
198 case 0x202E: chartype[i] = RLO; break;
199 case 0x2066: chartype[i] = LRI; break;
200 case 0x2067: chartype[i] = RLI; break;
201 case 0x2068: chartype[i] = FSI; break;
202 case 0x2069: chartype[i] = PDI; break;
203 }
204 break;
205 }
206 }
207 }
208
209 /* RESOLVE EXPLICIT */
210
211 static WORD GreaterEven(int i)
212 {
213 return odd(i) ? i + 1 : i + 2;
214 }
215
216 static WORD GreaterOdd(int i)
217 {
218 return odd(i) ? i + 2 : i + 1;
219 }
220
221 static WORD EmbeddingDirection(int level)
222 {
223 return odd(level) ? R : L;
224 }
225
226 /*------------------------------------------------------------------------
227 Function: resolveExplicit
228
229 Recursively resolves explicit embedding levels and overrides.
230 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
231
232 Input: Base embedding level and direction
233 Character count
234
235 Output: Array of embedding levels
236
237 In/Out: Array of direction classes
238
239
240 Note: The function uses two simple counters to keep track of
241 matching explicit codes and PDF. Use the default argument for
242 the outermost call. The nesting counter counts the recursion
243 depth and not the embedding level.
244 ------------------------------------------------------------------------*/
245 typedef struct tagStackItem {
246 int level;
247 int override;
248 BOOL isolate;
249 } StackItem;
250
251 #define push_stack(l,o,i) \
252 do { stack_top--; \
253 stack[stack_top].level = l; \
254 stack[stack_top].override = o; \
255 stack[stack_top].isolate = i;} while(0)
256
257 #define pop_stack() do { stack_top++; } while(0)
258
259 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
260
261 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, int count)
262 {
263 /* X1 */
264 int overflow_isolate_count = 0;
265 int overflow_embedding_count = 0;
266 int valid_isolate_count = 0;
267 int i;
268
269 StackItem stack[MAX_DEPTH+2];
270 int stack_top = MAX_DEPTH+1;
271
272 stack[stack_top].level = level;
273 stack[stack_top].override = NI;
274 stack[stack_top].isolate = FALSE;
275
276 for (i = 0; i < count; i++)
277 {
278 /* X2 */
279 if (pclass[i] == RLE)
280 {
281 int least_odd = GreaterOdd(stack[stack_top].level);
282 poutLevel[i] = stack[stack_top].level;
283 if (valid_level(least_odd))
284 push_stack(least_odd, NI, FALSE);
285 else if (overflow_isolate_count == 0)
286 overflow_embedding_count++;
287 }
288 /* X3 */
289 else if (pclass[i] == LRE)
290 {
291 int least_even = GreaterEven(stack[stack_top].level);
292 poutLevel[i] = stack[stack_top].level;
293 if (valid_level(least_even))
294 push_stack(least_even, NI, FALSE);
295 else if (overflow_isolate_count == 0)
296 overflow_embedding_count++;
297 }
298 /* X4 */
299 else if (pclass[i] == RLO)
300 {
301 int least_odd = GreaterOdd(stack[stack_top].level);
302 poutLevel[i] = stack[stack_top].level;
303 if (valid_level(least_odd))
304 push_stack(least_odd, R, FALSE);
305 else if (overflow_isolate_count == 0)
306 overflow_embedding_count++;
307 }
308 /* X5 */
309 else if (pclass[i] == LRO)
310 {
311 int least_even = GreaterEven(stack[stack_top].level);
312 poutLevel[i] = stack[stack_top].level;
313 if (valid_level(least_even))
314 push_stack(least_even, L, FALSE);
315 else if (overflow_isolate_count == 0)
316 overflow_embedding_count++;
317 }
318 /* X5a */
319 else if (pclass[i] == RLI)
320 {
321 int least_odd = GreaterOdd(stack[stack_top].level);
322 poutLevel[i] = stack[stack_top].level;
323 if (valid_level(least_odd))
324 {
325 valid_isolate_count++;
326 push_stack(least_odd, NI, TRUE);
327 }
328 else
329 overflow_isolate_count++;
330 }
331 /* X5b */
332 else if (pclass[i] == LRI)
333 {
334 int least_even = GreaterEven(stack[stack_top].level);
335 poutLevel[i] = stack[stack_top].level;
336 if (valid_level(least_even))
337 {
338 valid_isolate_count++;
339 push_stack(least_even, NI, TRUE);
340 }
341 else
342 overflow_isolate_count++;
343 }
344 /* X5c */
345 else if (pclass[i] == FSI)
346 {
347 int j;
348 int new_level = 0;
349 int skipping = 0;
350 poutLevel[i] = stack[stack_top].level;
351 for (j = i+1; j < count; j++)
352 {
353 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI)
354 {
355 skipping++;
356 continue;
357 }
358 else if (pclass[j] == PDI)
359 {
360 if (skipping)
361 skipping --;
362 else
363 break;
364 continue;
365 }
366
367 if (skipping) continue;
368
369 if (pclass[j] == L)
370 {
371 new_level = 0;
372 break;
373 }
374 else if (pclass[j] == R || pclass[j] == AL)
375 {
376 new_level = 1;
377 break;
378 }
379 }
380 if (odd(new_level))
381 {
382 int least_odd = GreaterOdd(stack[stack_top].level);
383 if (valid_level(least_odd))
384 {
385 valid_isolate_count++;
386 push_stack(least_odd, NI, TRUE);
387 }
388 else
389 overflow_isolate_count++;
390 }
391 else
392 {
393 int least_even = GreaterEven(stack[stack_top].level);
394 if (valid_level(least_even))
395 {
396 valid_isolate_count++;
397 push_stack(least_even, NI, TRUE);
398 }
399 else
400 overflow_isolate_count++;
401 }
402 }
403 /* X6 */
404 else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF)
405 {
406 poutLevel[i] = stack[stack_top].level;
407 if (stack[stack_top].override != NI)
408 pclass[i] = stack[stack_top].override;
409 }
410 /* X6a */
411 else if (pclass[i] == PDI)
412 {
413 if (overflow_isolate_count) overflow_isolate_count--;
414 else if (!valid_isolate_count) {/* do nothing */}
415 else
416 {
417 overflow_embedding_count = 0;
418 while (!stack[stack_top].isolate) pop_stack();
419 pop_stack();
420 valid_isolate_count --;
421 }
422 poutLevel[i] = stack[stack_top].level;
423 }
424 /* X7 */
425 else if (pclass[i] == PDF)
426 {
427 poutLevel[i] = stack[stack_top].level;
428 if (overflow_isolate_count) {/* do nothing */}
429 else if (overflow_embedding_count) overflow_embedding_count--;
430 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
431 pop_stack();
432 }
433 /* X8: Nothing */
434 }
435 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
436 for (i = 0; i < count ; i++)
437 if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF)
438 pclass[i] = BN;
439 }
440
441 static inline int previousValidChar(const WORD *pcls, int index, int back_fence)
442 {
443 if (index == -1 || index == back_fence) return index;
444 index --;
445 while (index > back_fence && pcls[index] == BN) index --;
446 return index;
447 }
448
449 static inline int nextValidChar(const WORD *pcls, int index, int front_fence)
450 {
451 if (index == front_fence) return index;
452 index ++;
453 while (index < front_fence && pcls[index] == BN) index ++;
454 return index;
455 }
456
457 typedef struct tagRun
458 {
459 int start;
460 int end;
461 WORD e;
462 } Run;
463
464 typedef struct tagRunChar
465 {
466 WCHAR ch;
467 WORD *pcls;
468 } RunChar;
469
470 typedef struct tagIsolatedRun
471 {
472 struct list entry;
473 int length;
474 WORD sos;
475 WORD eos;
476 WORD e;
477
478 RunChar item[1];
479 } IsolatedRun;
480
481 static inline int iso_nextValidChar(IsolatedRun *iso_run, int index)
482 {
483 if (index >= (iso_run->length-1)) return -1;
484 index ++;
485 while (index < iso_run->length && *iso_run->item[index].pcls == BN) index++;
486 if (index == iso_run->length) return -1;
487 return index;
488 }
489
490 static inline int iso_previousValidChar(IsolatedRun *iso_run, int index)
491 {
492
493 if (index <= 0) return -1;
494 index --;
495 while (index > -1 && *iso_run->item[index].pcls == BN) index--;
496 return index;
497 }
498
499 static inline void iso_dump_types(const char* header, IsolatedRun *iso_run)
500 {
501 int i, len = 0;
502 TRACE("%s:",header);
503 TRACE("[ ");
504 for (i = 0; i < iso_run->length && len < 200; i++)
505 {
506 TRACE(" %s",debug_type[*iso_run->item[i].pcls]);
507 len += strlen(debug_type[*iso_run->item[i].pcls])+1;
508 }
509 if (i != iso_run->length)
510 TRACE("...");
511 TRACE(" ]\n");
512 }
513
514 /*------------------------------------------------------------------------
515 Function: resolveWeak
516
517 Resolves the directionality of numeric and other weak character types
518
519 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
520
521 Input: Array of embedding levels
522 Character count
523
524 In/Out: Array of directional classes
525
526 Note: On input only these directional classes are expected
527 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
528 ------------------------------------------------------------------------*/
529
530 static void resolveWeak(IsolatedRun * iso_run)
531 {
532 int i;
533
534 /* W1 */
535 for (i=0; i < iso_run->length; i++)
536 {
537 if (*iso_run->item[i].pcls == NSM)
538 {
539 int j = iso_previousValidChar(iso_run, i);
540 if (j == -1)
541 *iso_run->item[i].pcls = iso_run->sos;
542 else if (*iso_run->item[j].pcls >= LRI)
543 *iso_run->item[i].pcls = ON;
544 else
545 *iso_run->item[i].pcls = *iso_run->item[j].pcls;
546 }
547 }
548
549 /* W2 */
550 for (i = 0; i < iso_run->length; i++)
551 {
552 if (*iso_run->item[i].pcls == EN)
553 {
554 int j = iso_previousValidChar(iso_run, i);
555 while (j > -1)
556 {
557 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L || *iso_run->item[j].pcls == AL)
558 {
559 if (*iso_run->item[j].pcls == AL)
560 *iso_run->item[i].pcls = AN;
561 break;
562 }
563 j = iso_previousValidChar(iso_run, j);
564 }
565 }
566 }
567
568 /* W3 */
569 for (i = 0; i < iso_run->length; i++)
570 {
571 if (*iso_run->item[i].pcls == AL)
572 *iso_run->item[i].pcls = R;
573 }
574
575 /* W4 */
576 for (i = 0; i < iso_run->length; i++)
577 {
578 if (*iso_run->item[i].pcls == ES)
579 {
580 int b = iso_previousValidChar(iso_run, i);
581 int f = iso_nextValidChar(iso_run, i);
582
583 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
584 *iso_run->item[i].pcls = EN;
585 }
586 else if (*iso_run->item[i].pcls == CS)
587 {
588 int b = iso_previousValidChar(iso_run, i);
589 int f = iso_nextValidChar(iso_run, i);
590
591 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
592 *iso_run->item[i].pcls = EN;
593 else if (b > -1 && f > -1 && *iso_run->item[b].pcls == AN && *iso_run->item[f].pcls == AN)
594 *iso_run->item[i].pcls = AN;
595 }
596 }
597
598 /* W5 */
599 for (i = 0; i < iso_run->length; i++)
600 {
601 if (*iso_run->item[i].pcls == ET)
602 {
603 int j;
604 for (j = i-1 ; j > -1; j--)
605 {
606 if (*iso_run->item[j].pcls == BN) continue;
607 if (*iso_run->item[j].pcls == ET) continue;
608 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
609 else break;
610 }
611 if (*iso_run->item[i].pcls == ET)
612 {
613 for (j = i+1; j < iso_run->length; j++)
614 {
615 if (*iso_run->item[j].pcls == BN) continue;
616 if (*iso_run->item[j].pcls == ET) continue;
617 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
618 else break;
619 }
620 }
621 }
622 }
623
624 /* W6 */
625 for (i = 0; i < iso_run->length; i++)
626 {
627 if (*iso_run->item[i].pcls == ET || *iso_run->item[i].pcls == ES || *iso_run->item[i].pcls == CS || *iso_run->item[i].pcls == ON)
628 {
629 int b = i-1;
630 int f = i+1;
631 if (b > -1 && *iso_run->item[b].pcls == BN)
632 *iso_run->item[b].pcls = ON;
633 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
634 *iso_run->item[f].pcls = ON;
635
636 *iso_run->item[i].pcls = ON;
637 }
638 }
639
640 /* W7 */
641 for (i = 0; i < iso_run->length; i++)
642 {
643 if (*iso_run->item[i].pcls == EN)
644 {
645 int j;
646 for (j = iso_previousValidChar(iso_run, i); j > -1; j = iso_previousValidChar(iso_run, j))
647 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L)
648 {
649 if (*iso_run->item[j].pcls == L)
650 *iso_run->item[i].pcls = L;
651 break;
652 }
653 if (iso_run->sos == L && j == -1)
654 *iso_run->item[i].pcls = L;
655 }
656 }
657 }
658
659 typedef struct tagBracketPair
660 {
661 int start;
662 int end;
663 } BracketPair;
664
665 static int compr(const void *a, const void* b)
666 {
667 return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
668 }
669
670 static BracketPair *computeBracketPairs(IsolatedRun *iso_run)
671 {
672 WCHAR *open_stack;
673 int *stack_index;
674 int stack_top = iso_run->length;
675 BracketPair *out = NULL;
676 int pair_count = 0;
677 int i;
678
679 open_stack = HeapAlloc(GetProcessHeap(), 0, sizeof(WCHAR) * iso_run->length);
680 stack_index = HeapAlloc(GetProcessHeap(), 0, sizeof(int) * iso_run->length);
681
682 for (i = 0; i < iso_run->length; i++)
683 {
684 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
685 if (ubv)
686 {
687 if (!out)
688 {
689 out = HeapAlloc(GetProcessHeap(),0,sizeof(BracketPair));
690 out[0].start = -1;
691 }
692
693 if ((ubv >> 8) == 0)
694 {
695 stack_top --;
696 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
697 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
698 if (open_stack[stack_top] == 0x232A)
699 open_stack[stack_top] = 0x3009;
700 stack_index[stack_top] = i;
701 }
702 else if ((ubv >> 8) == 1)
703 {
704 int j;
705 if (stack_top == iso_run->length) continue;
706 for (j = stack_top; j < iso_run->length; j++)
707 {
708 WCHAR c = iso_run->item[i].ch;
709 if (c == 0x232A) c = 0x3009;
710 if (c == open_stack[j])
711 {
712 out[pair_count].start = stack_index[j];
713 out[pair_count].end = i;
714 pair_count++;
715 out = HeapReAlloc(GetProcessHeap(), 0, out, sizeof(BracketPair) * (pair_count+1));
716 out[pair_count].start = -1;
717 stack_top = j+1;
718 break;
719 }
720 }
721 }
722 }
723 }
724 if (pair_count == 0)
725 {
726 HeapFree(GetProcessHeap(),0,out);
727 out = NULL;
728 }
729 else if (pair_count > 1)
730 qsort(out, pair_count, sizeof(BracketPair), compr);
731
732 HeapFree(GetProcessHeap(), 0, open_stack);
733 HeapFree(GetProcessHeap(), 0, stack_index);
734 return out;
735 }
736
737 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
738
739 /*------------------------------------------------------------------------
740 Function: resolveNeutrals
741
742 Resolves the directionality of neutral character types.
743
744 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
745
746 Input: Array of embedding levels
747 Character count
748 Baselevel
749
750 In/Out: Array of directional classes
751
752 Note: On input only these directional classes are expected
753 R, L, NI, AN, EN and BN
754
755 W8 resolves a number of ENs to L
756 ------------------------------------------------------------------------*/
757 static void resolveNeutrals(IsolatedRun *iso_run)
758 {
759 int i;
760 BracketPair *pairs = NULL;
761
762 /* Translate isolates into NI */
763 for (i = 0; i < iso_run->length; i++)
764 {
765 if (*iso_run->item[i].pcls >= LRI)
766 *iso_run->item[i].pcls = NI;
767
768 switch(*iso_run->item[i].pcls)
769 {
770 case B:
771 case S:
772 case WS: *iso_run->item[i].pcls = NI;
773 }
774
775 ASSERT(*iso_run->item[i].pcls < 5 || *iso_run->item[i].pcls == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */
776 }
777
778 /* N0: Skipping bracketed pairs for now */
779 pairs = computeBracketPairs(iso_run);
780 if (pairs)
781 {
782 BracketPair *p = &pairs[0];
783 int i = 0;
784 while (p->start >= 0)
785 {
786 int j;
787 int e = EmbeddingDirection(iso_run->e);
788 int o = EmbeddingDirection(iso_run->e+1);
789 BOOL flag_o = FALSE;
790 TRACE("Bracket Pair [%i - %i]\n",p->start, p->end);
791
792 /* N0.b */
793 for (j = p->start+1; j < p->end; j++)
794 {
795 if (N0_TYPE(*iso_run->item[j].pcls) == e)
796 {
797 *iso_run->item[p->start].pcls = e;
798 *iso_run->item[p->end].pcls = e;
799 break;
800 }
801 else if (N0_TYPE(*iso_run->item[j].pcls) == o)
802 flag_o = TRUE;
803 }
804 /* N0.c */
805 if (j == p->end && flag_o)
806 {
807 for (j = p->start; j >= 0; j--)
808 {
809 if (N0_TYPE(*iso_run->item[j].pcls) == o)
810 {
811 *iso_run->item[p->start].pcls = o;
812 *iso_run->item[p->end].pcls = o;
813 break;
814 }
815 else if (N0_TYPE(*iso_run->item[j].pcls) == e)
816 {
817 *iso_run->item[p->start].pcls = e;
818 *iso_run->item[p->end].pcls = e;
819 break;
820 }
821 }
822 if ( j < 0 )
823 {
824 *iso_run->item[p->start].pcls = iso_run->sos;
825 *iso_run->item[p->end].pcls = iso_run->sos;
826 }
827 }
828
829 i++;
830 p = &pairs[i];
831 }
832 HeapFree(GetProcessHeap(),0,pairs);
833 }
834
835 /* N1 */
836 for (i = 0; i < iso_run->length; i++)
837 {
838 WORD l,r;
839
840 if (*iso_run->item[i].pcls == NI)
841 {
842 int j;
843 int b = iso_previousValidChar(iso_run, i);
844
845 if (b == -1)
846 {
847 l = iso_run->sos;
848 b = 0;
849 }
850 else
851 {
852 if (*iso_run->item[b].pcls == R || *iso_run->item[b].pcls == AN || *iso_run->item[b].pcls == EN)
853 l = R;
854 else if (*iso_run->item[b].pcls == L)
855 l = L;
856 else /* No string type */
857 continue;
858 }
859 j = iso_nextValidChar(iso_run, i);
860 while (j > -1 && *iso_run->item[j].pcls == NI) j = iso_nextValidChar(iso_run, j);
861
862 if (j == -1)
863 {
864 r = iso_run->eos;
865 j = iso_run->length;
866 }
867 else if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == AN || *iso_run->item[j].pcls == EN)
868 r = R;
869 else if (*iso_run->item[j].pcls == L)
870 r = L;
871 else /* No string type */
872 continue;
873
874 if (r == l)
875 {
876 for (b = i; b < j && b < iso_run->length; b++)
877 *iso_run->item[b].pcls = r;
878 }
879 }
880 }
881
882 /* N2 */
883 for (i = 0; i < iso_run->length; i++)
884 {
885 if (*iso_run->item[i].pcls == NI)
886 {
887 int b = i-1;
888 int f = i+1;
889 *iso_run->item[i].pcls = EmbeddingDirection(iso_run->e);
890 if (b > -1 && *iso_run->item[b].pcls == BN)
891 *iso_run->item[b].pcls = EmbeddingDirection(iso_run->e);
892 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
893 *iso_run->item[f].pcls = EmbeddingDirection(iso_run->e);
894 }
895 }
896 }
897
898 /*------------------------------------------------------------------------
899 Function: resolveImplicit
900
901 Recursively resolves implicit embedding levels.
902 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
903
904 Input: Array of direction classes
905 Character count
906 Base level
907
908 In/Out: Array of embedding levels
909
910 Note: levels may exceed 15 on output.
911 Accepted subset of direction classes
912 R, L, AN, EN
913 ------------------------------------------------------------------------*/
914 static void resolveImplicit(const WORD * pcls, WORD *plevel, int sos, int eos)
915 {
916 int i;
917
918 /* I1/2 */
919 for (i = sos; i <= eos; i++)
920 {
921 if (pcls[i] == BN)
922 continue;
923
924 ASSERT(pcls[i] > 0); /* "No Neutrals allowed to survive here." */
925 ASSERT(pcls[i] < 5); /* "Out of range." */
926
927 if (odd(plevel[i]) && (pcls[i] == L || pcls[i] == EN || pcls [i] == AN))
928 plevel[i]++;
929 else if (!odd(plevel[i]) && pcls[i] == R)
930 plevel[i]++;
931 else if (!odd(plevel[i]) && (pcls[i] == EN || pcls [i] == AN))
932 plevel[i]+=2;
933 }
934 }
935
936 static void resolveResolved(unsigned baselevel, const WORD * pcls, WORD *plevel, int sos, int eos)
937 {
938 int i;
939
940 /* L1 */
941 for (i = sos; i <= eos; i++)
942 {
943 if (pcls[i] == B || pcls[i] == S)
944 {
945 int j = i -1;
946 while (i > sos && j >= sos &&
947 (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
948 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
949 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
950 plevel[j--] = baselevel;
951 plevel[i] = baselevel;
952 }
953 if (i == eos &&
954 (pcls[i] == WS || pcls[i] == FSI || pcls[i] == LRI || pcls[i] == RLI ||
955 pcls[i] == PDI || pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO ||
956 pcls[i] == RLO || pcls[i] == PDF || pcls[i] == BN ))
957 {
958 int j = i;
959 while (j >= sos && (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
960 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
961 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
962 plevel[j--] = baselevel;
963 }
964 }
965 }
966
967 static void computeIsolatingRunsSet(unsigned baselevel, WORD *pcls, WORD *pLevel, LPCWSTR lpString, int uCount, struct list *set)
968 {
969 int run_start, run_end, i;
970 int run_count = 0;
971 Run *runs;
972 IsolatedRun *current_isolated;
973
974 runs = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(Run));
975 if (!runs) return;
976
977 list_init(set);
978
979 /* Build Runs */
980 run_start = 0;
981 while (run_start < uCount)
982 {
983 run_end = nextValidChar(pcls, run_start, uCount);
984 while (run_end < uCount && pLevel[run_end] == pLevel[run_start]) run_end = nextValidChar(pcls, run_end, uCount);
985 run_end --;
986 runs[run_count].start = run_start;
987 runs[run_count].end = run_end;
988 runs[run_count].e = pLevel[run_start];
989 run_start = nextValidChar(pcls, run_end, uCount);
990 run_count++;
991 }
992
993 /* Build Isolating Runs */
994 i = 0;
995 while (i < run_count)
996 {
997 int k = i;
998 if (runs[k].start >= 0)
999 {
1000 int type_fence, real_end;
1001 int j;
1002 current_isolated = HeapAlloc(GetProcessHeap(), 0, sizeof(IsolatedRun) + sizeof(RunChar)*uCount);
1003 if (!current_isolated) break;
1004
1005 run_start = runs[k].start;
1006 current_isolated->e = runs[k].e;
1007 current_isolated->length = (runs[k].end - runs[k].start)+1;
1008
1009 for (j = 0; j < current_isolated->length; j++)
1010 {
1011 current_isolated->item[j].pcls = &pcls[runs[k].start+j];
1012 current_isolated->item[j].ch = lpString[runs[k].start+j];
1013 }
1014
1015 run_end = runs[k].end;
1016
1017 TRACE("{ [%i -- %i]",run_start, run_end);
1018
1019 if (pcls[run_end] == BN)
1020 run_end = previousValidChar(pcls, run_end, runs[k].start);
1021
1022 while (run_end < uCount && (pcls[run_end] == RLI || pcls[run_end] == LRI || pcls[run_end] == FSI))
1023 {
1024 j = k+1;
1025 search:
1026 while (j < run_count && pcls[runs[j].start] != PDI) j++;
1027 if (j < run_count && runs[i].e != runs[j].e)
1028 {
1029 j++;
1030 goto search;
1031 }
1032
1033 if (j != run_count)
1034 {
1035 int m;
1036 int l = current_isolated->length;
1037
1038 current_isolated->length += (runs[j].end - runs[j].start)+1;
1039 for (m = 0; l < current_isolated->length; l++, m++)
1040 {
1041 current_isolated->item[l].pcls = &pcls[runs[j].start+m];
1042 current_isolated->item[l].ch = lpString[runs[j].start+m];
1043 }
1044
1045 TRACE("[%i -- %i]",runs[j].start, runs[j].end);
1046
1047 run_end = runs[j].end;
1048 if (pcls[run_end] == BN)
1049 run_end = previousValidChar(pcls, run_end, runs[i].start);
1050 runs[j].start = -1;
1051 k = j;
1052 }
1053 else
1054 {
1055 run_end = uCount;
1056 break;
1057 }
1058 }
1059
1060 type_fence = previousValidChar(pcls, run_start, -1);
1061
1062 if (type_fence == -1)
1063 current_isolated->sos = (baselevel > pLevel[run_start])?baselevel:pLevel[run_start];
1064 else
1065 current_isolated->sos = (pLevel[type_fence] > pLevel[run_start])?pLevel[type_fence]:pLevel[run_start];
1066
1067 current_isolated->sos = EmbeddingDirection(current_isolated->sos);
1068
1069 if (run_end == uCount)
1070 current_isolated->eos = current_isolated->sos;
1071 else
1072 {
1073 /* eos could be an BN */
1074 if ( pcls[run_end] == BN )
1075 {
1076 real_end = previousValidChar(pcls, run_end, run_start-1);
1077 if (real_end < run_start)
1078 real_end = run_start;
1079 }
1080 else
1081 real_end = run_end;
1082
1083 type_fence = nextValidChar(pcls, run_end, uCount);
1084 if (type_fence == uCount)
1085 current_isolated->eos = (baselevel > pLevel[real_end])?baselevel:pLevel[real_end];
1086 else
1087 current_isolated->eos = (pLevel[type_fence] > pLevel[real_end])?pLevel[type_fence]:pLevel[real_end];
1088
1089 current_isolated->eos = EmbeddingDirection(current_isolated->eos);
1090 }
1091
1092 list_add_tail(set, &current_isolated->entry);
1093 TRACE(" } level %i {%s <--> %s}\n",current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1094 }
1095 i++;
1096 }
1097
1098 HeapFree(GetProcessHeap(), 0, runs);
1099 }
1100
1101 /*************************************************************
1102 * BIDI_DeterminLevels
1103 */
1104 BOOL BIDI_DetermineLevels(
1105 LPCWSTR lpString, /* [in] The string for which information is to be returned */
1106 INT uCount, /* [in] Number of WCHARs in string. */
1107 const SCRIPT_STATE *s,
1108 const SCRIPT_CONTROL *c,
1109 WORD *lpOutLevels /* [out] final string levels */
1110 )
1111 {
1112 WORD *chartype;
1113 unsigned baselevel = 0;
1114 struct list IsolatingRuns;
1115 IsolatedRun *iso_run, *next;
1116
1117 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
1118
1119 chartype = HeapAlloc(GetProcessHeap(), 0, uCount * sizeof(WORD));
1120 if (!chartype)
1121 {
1122 WARN("Out of memory\n");
1123 return FALSE;
1124 }
1125
1126 baselevel = s->uBidiLevel;
1127
1128 classify(lpString, chartype, uCount, c);
1129 if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount);
1130
1131 /* resolve explicit */
1132 resolveExplicit(baselevel, chartype, lpOutLevels, uCount);
1133 if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount);
1134
1135 /* X10/BD13: Computer Isolating runs */
1136 computeIsolatingRunsSet(baselevel, chartype, lpOutLevels, lpString, uCount, &IsolatingRuns);
1137
1138 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1139 {
1140 if (TRACE_ON(bidi)) iso_dump_types("Run", iso_run);
1141
1142 /* resolve weak */
1143 resolveWeak(iso_run);
1144 if (TRACE_ON(bidi)) iso_dump_types("After Weak", iso_run);
1145
1146 /* resolve neutrals */
1147 resolveNeutrals(iso_run);
1148 if (TRACE_ON(bidi)) iso_dump_types("After Neutrals", iso_run);
1149
1150 list_remove(&iso_run->entry);
1151 HeapFree(GetProcessHeap(),0,iso_run);
1152 }
1153
1154 if (TRACE_ON(bidi)) dump_types("Before Implicit", chartype, 0, uCount);
1155 /* resolveImplicit */
1156 resolveImplicit(chartype, lpOutLevels, 0, uCount-1);
1157
1158 /* resolveResolvedLevels*/
1159 classify(lpString, chartype, uCount, c);
1160 resolveResolved(baselevel, chartype, lpOutLevels, 0, uCount-1);
1161
1162 HeapFree(GetProcessHeap(), 0, chartype);
1163 return TRUE;
1164 }
1165
1166 /* reverse cch indexes */
1167 static void reverse(int *pidx, int cch)
1168 {
1169 int temp;
1170 int ich = 0;
1171 for (; ich < --cch; ich++)
1172 {
1173 temp = pidx[ich];
1174 pidx[ich] = pidx[cch];
1175 pidx[cch] = temp;
1176 }
1177 }
1178
1179
1180 /*------------------------------------------------------------------------
1181 Functions: reorder/reorderLevel
1182
1183 Recursively reorders the display string
1184 "From the highest level down, reverse all characters at that level and
1185 higher, down to the lowest odd level"
1186
1187 Implements rule L2 of the Unicode bidi Algorithm.
1188
1189 Input: Array of embedding levels
1190 Character count
1191 Flag enabling reversal (set to false by initial caller)
1192
1193 In/Out: Text to reorder
1194
1195 Note: levels may exceed 15 resp. 61 on input.
1196
1197 Rule L3 - reorder combining marks is not implemented here
1198 Rule L4 - glyph mirroring is implemented as a display option below
1199
1200 Note: this should be applied a line at a time
1201 -------------------------------------------------------------------------*/
1202 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1203 {
1204 int ich = 0;
1205
1206 /* true as soon as first odd level encountered */
1207 fReverse = fReverse || odd(level);
1208
1209 for (; ich < cch; ich++)
1210 {
1211 if (plevel[ich] < level)
1212 {
1213 break;
1214 }
1215 else if (plevel[ich] > level)
1216 {
1217 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
1218 cch - ich, fReverse) - 1;
1219 }
1220 }
1221 if (fReverse)
1222 {
1223 reverse(pIndexs, ich);
1224 }
1225 return ich;
1226 }
1227
1228 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1229 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1230 {
1231 int ich = 0;
1232 int newlevel = -1;
1233
1234 /* true as soon as first odd level encountered */
1235 fReverse = fReverse || odd(level);
1236
1237 for (; ich < cch; ich++)
1238 {
1239 if (plevel[ich] < level)
1240 break;
1241 else if (plevel[ich] > level)
1242 newlevel = ich;
1243 }
1244 if (fReverse)
1245 {
1246 reverse(pIndexs, ich);
1247 }
1248
1249 if (newlevel >= 0)
1250 {
1251 ich = 0;
1252 for (; ich < cch; ich++)
1253 if (plevel[ich] < level)
1254 break;
1255 else if (plevel[ich] > level)
1256 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
1257 cch - ich, fReverse) - 1;
1258 }
1259
1260 return ich;
1261 }
1262
1263 BOOL BIDI_GetStrengths(LPCWSTR lpString, INT uCount, const SCRIPT_CONTROL *c,
1264 WORD* lpStrength)
1265 {
1266 int i;
1267 classify(lpString, lpStrength, uCount, c);
1268
1269 for ( i = 0; i < uCount; i++)
1270 {
1271 switch(lpStrength[i])
1272 {
1273 case L:
1274 case LRE:
1275 case LRO:
1276 case R:
1277 case AL:
1278 case RLE:
1279 case RLO:
1280 lpStrength[i] = BIDI_STRONG;
1281 break;
1282 case PDF:
1283 case EN:
1284 case ES:
1285 case ET:
1286 case AN:
1287 case CS:
1288 case BN:
1289 lpStrength[i] = BIDI_WEAK;
1290 break;
1291 case B:
1292 case S:
1293 case WS:
1294 case ON:
1295 default: /* Neutrals and NSM */
1296 lpStrength[i] = BIDI_NEUTRAL;
1297 }
1298 }
1299 return TRUE;
1300 }