2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
46 #include <wine/list.h>
48 #include "usp10_internal.h"
50 extern const unsigned short bidi_bracket_table
[] DECLSPEC_HIDDEN
;
52 WINE_DEFAULT_DEBUG_CHANNEL(bidi
);
54 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
57 /* HELPER FUNCTIONS AND DECLARATIONS */
59 /*------------------------------------------------------------------------
60 Bidirectional Character Types
62 as defined by the Unicode Bidirectional Algorithm Table 3-7.
66 The list of bidirectional character types here is not grouped the
67 same way as the table 3-7, since the numberic values for the types
68 are chosen to keep the state and action tables compact.
69 ------------------------------------------------------------------------*/
73 /* ON MUST be zero, code relies on ON = NI = 0 */
74 ON
= 0, /* Other Neutral */
77 AN
, /* Arabic Number */
78 EN
, /* European Number */
79 AL
, /* Arabic Letter (Right-to-left) */
80 NSM
, /* Non-spacing Mark */
81 CS
, /* Common Separator */
82 ES
, /* European Separator */
83 ET
, /* European Terminator (post/prefix e.g. $ and %) */
86 BN
, /* Boundary neutral (type of RLE etc after explicit levels) */
89 S
, /* Segment Separator (TAB) // used only in L1 */
90 WS
, /* White space // used only in L1 */
91 B
, /* Paragraph Separator (aka as PS) */
93 /* types for explicit controls */
94 RLO
, /* these are used only in X1-X9 */
100 LRI
, /* Isolate formatting characters new with 6.3 */
105 /* resolved types, also resolved directions */
106 NI
= ON
, /* alias, where ON, WS, S and Isolates are treated the same */
109 static const char debug_type
[][4] =
111 "ON", /* Other Neutral */
112 "L", /* Left Letter */
113 "R", /* Right Letter */
114 "AN", /* Arabic Number */
115 "EN", /* European Number */
116 "AL", /* Arabic Letter (Right-to-left) */
117 "NSM", /* Non-spacing Mark */
118 "CS", /* Common Separator */
119 "ES", /* European Separator */
120 "ET", /* European Terminator (post/prefix e.g. $ and %) */
121 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
122 "S", /* Segment Separator (TAB) // used only in L1 */
123 "WS", /* White space // used only in L1 */
124 "B", /* Paragraph Separator (aka as PS) */
125 "RLO", /* these are used only in X1-X9 */
130 "LRI", /* Isolate formatting characters new with 6.3 */
136 /* HELPER FUNCTIONS */
138 static inline void dump_types(const char* header
, WORD
*types
, int start
, int end
)
142 for (i
= start
; i
< end
&& len
< 200; i
++)
144 TRACE(" %s",debug_type
[types
[i
]]);
145 len
+= strlen(debug_type
[types
[i
]])+1;
152 /* Convert the libwine information to the direction enum */
153 static void classify(const WCHAR
*string
, WORD
*chartype
, DWORD count
, const SCRIPT_CONTROL
*c
)
155 static const enum directions dir_map
[16] =
157 L
, /* unassigned defaults to L */
172 PDF
/* also LRE, LRO, RLE, RLO */
177 for (i
= 0; i
< count
; ++i
)
179 chartype
[i
] = dir_map
[get_char_typeW(string
[i
]) >> 12];
183 if (!c
->fLegacyBidiClass
) break;
187 case '+': chartype
[i
] = NI
; break;
188 case '/': chartype
[i
] = CS
; break;
194 case 0x202A: chartype
[i
] = LRE
; break;
195 case 0x202B: chartype
[i
] = RLE
; break;
196 case 0x202C: chartype
[i
] = PDF
; break;
197 case 0x202D: chartype
[i
] = LRO
; break;
198 case 0x202E: chartype
[i
] = RLO
; break;
199 case 0x2066: chartype
[i
] = LRI
; break;
200 case 0x2067: chartype
[i
] = RLI
; break;
201 case 0x2068: chartype
[i
] = FSI
; break;
202 case 0x2069: chartype
[i
] = PDI
; break;
209 /* RESOLVE EXPLICIT */
211 static WORD
GreaterEven(int i
)
213 return odd(i
) ? i
+ 1 : i
+ 2;
216 static WORD
GreaterOdd(int i
)
218 return odd(i
) ? i
+ 2 : i
+ 1;
221 static WORD
EmbeddingDirection(int level
)
223 return odd(level
) ? R
: L
;
226 /*------------------------------------------------------------------------
227 Function: resolveExplicit
229 Recursively resolves explicit embedding levels and overrides.
230 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
232 Input: Base embedding level and direction
235 Output: Array of embedding levels
237 In/Out: Array of direction classes
240 Note: The function uses two simple counters to keep track of
241 matching explicit codes and PDF. Use the default argument for
242 the outermost call. The nesting counter counts the recursion
243 depth and not the embedding level.
244 ------------------------------------------------------------------------*/
245 typedef struct tagStackItem
{
251 #define push_stack(l,o,i) \
253 stack[stack_top].level = l; \
254 stack[stack_top].override = o; \
255 stack[stack_top].isolate = i;} while(0)
257 #define pop_stack() do { stack_top++; } while(0)
259 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
261 static void resolveExplicit(int level
, WORD
*pclass
, WORD
*poutLevel
, WORD
*poutOverrides
, int count
, BOOL initialOverride
)
264 int overflow_isolate_count
= 0;
265 int overflow_embedding_count
= 0;
266 int valid_isolate_count
= 0;
269 StackItem stack
[MAX_DEPTH
+2];
270 int stack_top
= MAX_DEPTH
+1;
272 stack
[stack_top
].level
= level
;
273 stack
[stack_top
].override
= NI
;
274 stack
[stack_top
].isolate
= FALSE
;
279 push_stack(level
, R
, FALSE
);
281 push_stack(level
, L
, FALSE
);
284 for (i
= 0; i
< count
; i
++)
286 poutOverrides
[i
] = stack
[stack_top
].override
;
289 if (pclass
[i
] == RLE
)
291 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
292 poutLevel
[i
] = stack
[stack_top
].level
;
293 if (valid_level(least_odd
))
294 push_stack(least_odd
, NI
, FALSE
);
295 else if (overflow_isolate_count
== 0)
296 overflow_embedding_count
++;
299 else if (pclass
[i
] == LRE
)
301 int least_even
= GreaterEven(stack
[stack_top
].level
);
302 poutLevel
[i
] = stack
[stack_top
].level
;
303 if (valid_level(least_even
))
304 push_stack(least_even
, NI
, FALSE
);
305 else if (overflow_isolate_count
== 0)
306 overflow_embedding_count
++;
309 else if (pclass
[i
] == RLO
)
311 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
312 poutLevel
[i
] = stack
[stack_top
].level
;
313 if (valid_level(least_odd
))
314 push_stack(least_odd
, R
, FALSE
);
315 else if (overflow_isolate_count
== 0)
316 overflow_embedding_count
++;
319 else if (pclass
[i
] == LRO
)
321 int least_even
= GreaterEven(stack
[stack_top
].level
);
322 poutLevel
[i
] = stack
[stack_top
].level
;
323 if (valid_level(least_even
))
324 push_stack(least_even
, L
, FALSE
);
325 else if (overflow_isolate_count
== 0)
326 overflow_embedding_count
++;
329 else if (pclass
[i
] == RLI
)
331 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
332 poutLevel
[i
] = stack
[stack_top
].level
;
333 if (valid_level(least_odd
))
335 valid_isolate_count
++;
336 push_stack(least_odd
, NI
, TRUE
);
339 overflow_isolate_count
++;
342 else if (pclass
[i
] == LRI
)
344 int least_even
= GreaterEven(stack
[stack_top
].level
);
345 poutLevel
[i
] = stack
[stack_top
].level
;
346 if (valid_level(least_even
))
348 valid_isolate_count
++;
349 push_stack(least_even
, NI
, TRUE
);
352 overflow_isolate_count
++;
355 else if (pclass
[i
] == FSI
)
360 poutLevel
[i
] = stack
[stack_top
].level
;
361 for (j
= i
+1; j
< count
; j
++)
363 if (pclass
[j
] == LRI
|| pclass
[j
] == RLI
|| pclass
[j
] == FSI
)
368 else if (pclass
[j
] == PDI
)
377 if (skipping
) continue;
384 else if (pclass
[j
] == R
|| pclass
[j
] == AL
)
392 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
393 if (valid_level(least_odd
))
395 valid_isolate_count
++;
396 push_stack(least_odd
, NI
, TRUE
);
399 overflow_isolate_count
++;
403 int least_even
= GreaterEven(stack
[stack_top
].level
);
404 if (valid_level(least_even
))
406 valid_isolate_count
++;
407 push_stack(least_even
, NI
, TRUE
);
410 overflow_isolate_count
++;
414 else if (pclass
[i
] != B
&& pclass
[i
] != BN
&& pclass
[i
] != PDI
&& pclass
[i
] != PDF
)
416 poutLevel
[i
] = stack
[stack_top
].level
;
417 if (stack
[stack_top
].override
!= NI
)
418 pclass
[i
] = stack
[stack_top
].override
;
421 else if (pclass
[i
] == PDI
)
423 if (overflow_isolate_count
) overflow_isolate_count
--;
424 else if (!valid_isolate_count
) {/* do nothing */}
427 overflow_embedding_count
= 0;
428 while (!stack
[stack_top
].isolate
) pop_stack();
430 valid_isolate_count
--;
432 poutLevel
[i
] = stack
[stack_top
].level
;
435 else if (pclass
[i
] == PDF
)
437 poutLevel
[i
] = stack
[stack_top
].level
;
438 if (overflow_isolate_count
) {/* do nothing */}
439 else if (overflow_embedding_count
) overflow_embedding_count
--;
440 else if (!stack
[stack_top
].isolate
&& stack_top
< (MAX_DEPTH
+1))
445 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
446 for (i
= 0; i
< count
; i
++)
447 if (pclass
[i
] == RLE
|| pclass
[i
] == LRE
|| pclass
[i
] == RLO
|| pclass
[i
] == LRO
|| pclass
[i
] == PDF
)
451 static inline int previousValidChar(const WORD
*pcls
, int index
, int back_fence
)
453 if (index
== -1 || index
== back_fence
) return index
;
455 while (index
> back_fence
&& pcls
[index
] == BN
) index
--;
459 static inline int nextValidChar(const WORD
*pcls
, int index
, int front_fence
)
461 if (index
== front_fence
) return index
;
463 while (index
< front_fence
&& pcls
[index
] == BN
) index
++;
467 typedef struct tagRun
474 typedef struct tagRunChar
480 typedef struct tagIsolatedRun
491 static inline int iso_nextValidChar(IsolatedRun
*iso_run
, int index
)
493 if (index
>= (iso_run
->length
-1)) return -1;
495 while (index
< iso_run
->length
&& *iso_run
->item
[index
].pcls
== BN
) index
++;
496 if (index
== iso_run
->length
) return -1;
500 static inline int iso_previousValidChar(IsolatedRun
*iso_run
, int index
)
503 if (index
<= 0) return -1;
505 while (index
> -1 && *iso_run
->item
[index
].pcls
== BN
) index
--;
509 static inline void iso_dump_types(const char* header
, IsolatedRun
*iso_run
)
514 for (i
= 0; i
< iso_run
->length
&& len
< 200; i
++)
516 TRACE(" %s",debug_type
[*iso_run
->item
[i
].pcls
]);
517 len
+= strlen(debug_type
[*iso_run
->item
[i
].pcls
])+1;
519 if (i
!= iso_run
->length
)
524 /*------------------------------------------------------------------------
525 Function: resolveWeak
527 Resolves the directionality of numeric and other weak character types
529 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
531 Input: Array of embedding levels
534 In/Out: Array of directional classes
536 Note: On input only these directional classes are expected
537 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
538 ------------------------------------------------------------------------*/
540 static void resolveWeak(IsolatedRun
* iso_run
)
545 for (i
=0; i
< iso_run
->length
; i
++)
547 if (*iso_run
->item
[i
].pcls
== NSM
)
549 int j
= iso_previousValidChar(iso_run
, i
);
551 *iso_run
->item
[i
].pcls
= iso_run
->sos
;
552 else if (*iso_run
->item
[j
].pcls
>= LRI
)
553 *iso_run
->item
[i
].pcls
= ON
;
555 *iso_run
->item
[i
].pcls
= *iso_run
->item
[j
].pcls
;
560 for (i
= 0; i
< iso_run
->length
; i
++)
562 if (*iso_run
->item
[i
].pcls
== EN
)
564 int j
= iso_previousValidChar(iso_run
, i
);
567 if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== L
|| *iso_run
->item
[j
].pcls
== AL
)
569 if (*iso_run
->item
[j
].pcls
== AL
)
570 *iso_run
->item
[i
].pcls
= AN
;
573 j
= iso_previousValidChar(iso_run
, j
);
579 for (i
= 0; i
< iso_run
->length
; i
++)
581 if (*iso_run
->item
[i
].pcls
== AL
)
582 *iso_run
->item
[i
].pcls
= R
;
586 for (i
= 0; i
< iso_run
->length
; i
++)
588 if (*iso_run
->item
[i
].pcls
== ES
)
590 int b
= iso_previousValidChar(iso_run
, i
);
591 int f
= iso_nextValidChar(iso_run
, i
);
593 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== EN
&& *iso_run
->item
[f
].pcls
== EN
)
594 *iso_run
->item
[i
].pcls
= EN
;
596 else if (*iso_run
->item
[i
].pcls
== CS
)
598 int b
= iso_previousValidChar(iso_run
, i
);
599 int f
= iso_nextValidChar(iso_run
, i
);
601 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== EN
&& *iso_run
->item
[f
].pcls
== EN
)
602 *iso_run
->item
[i
].pcls
= EN
;
603 else if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== AN
&& *iso_run
->item
[f
].pcls
== AN
)
604 *iso_run
->item
[i
].pcls
= AN
;
609 for (i
= 0; i
< iso_run
->length
; i
++)
611 if (*iso_run
->item
[i
].pcls
== ET
)
614 for (j
= i
-1 ; j
> -1; j
--)
616 if (*iso_run
->item
[j
].pcls
== BN
) continue;
617 if (*iso_run
->item
[j
].pcls
== ET
) continue;
618 else if (*iso_run
->item
[j
].pcls
== EN
) *iso_run
->item
[i
].pcls
= EN
;
621 if (*iso_run
->item
[i
].pcls
== ET
)
623 for (j
= i
+1; j
< iso_run
->length
; j
++)
625 if (*iso_run
->item
[j
].pcls
== BN
) continue;
626 if (*iso_run
->item
[j
].pcls
== ET
) continue;
627 else if (*iso_run
->item
[j
].pcls
== EN
) *iso_run
->item
[i
].pcls
= EN
;
635 for (i
= 0; i
< iso_run
->length
; i
++)
637 if (*iso_run
->item
[i
].pcls
== ET
|| *iso_run
->item
[i
].pcls
== ES
|| *iso_run
->item
[i
].pcls
== CS
|| *iso_run
->item
[i
].pcls
== ON
)
641 if (b
> -1 && *iso_run
->item
[b
].pcls
== BN
)
642 *iso_run
->item
[b
].pcls
= ON
;
643 if (f
< iso_run
->length
&& *iso_run
->item
[f
].pcls
== BN
)
644 *iso_run
->item
[f
].pcls
= ON
;
646 *iso_run
->item
[i
].pcls
= ON
;
651 for (i
= 0; i
< iso_run
->length
; i
++)
653 if (*iso_run
->item
[i
].pcls
== EN
)
656 for (j
= iso_previousValidChar(iso_run
, i
); j
> -1; j
= iso_previousValidChar(iso_run
, j
))
657 if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== L
)
659 if (*iso_run
->item
[j
].pcls
== L
)
660 *iso_run
->item
[i
].pcls
= L
;
663 if (iso_run
->sos
== L
&& j
== -1)
664 *iso_run
->item
[i
].pcls
= L
;
669 typedef struct tagBracketPair
675 static int compr(const void *a
, const void* b
)
677 return ((BracketPair
*)a
)->start
- ((BracketPair
*)b
)->start
;
680 static BracketPair
*computeBracketPairs(IsolatedRun
*iso_run
)
684 int stack_top
= iso_run
->length
;
685 BracketPair
*out
= NULL
;
689 open_stack
= heap_alloc(iso_run
->length
* sizeof(*open_stack
));
690 stack_index
= heap_alloc(iso_run
->length
* sizeof(*stack_index
));
692 for (i
= 0; i
< iso_run
->length
; i
++)
694 unsigned short ubv
= get_table_entry(bidi_bracket_table
, iso_run
->item
[i
].ch
);
699 out
= heap_alloc(sizeof(*out
));
706 open_stack
[stack_top
] = iso_run
->item
[i
].ch
+ (signed char)(ubv
& 0xff);
707 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
708 if (open_stack
[stack_top
] == 0x232A)
709 open_stack
[stack_top
] = 0x3009;
710 stack_index
[stack_top
] = i
;
712 else if ((ubv
>> 8) == 1)
715 if (stack_top
== iso_run
->length
) continue;
716 for (j
= stack_top
; j
< iso_run
->length
; j
++)
718 WCHAR c
= iso_run
->item
[i
].ch
;
719 if (c
== 0x232A) c
= 0x3009;
720 if (c
== open_stack
[j
])
722 out
[pair_count
].start
= stack_index
[j
];
723 out
[pair_count
].end
= i
;
725 out
= HeapReAlloc(GetProcessHeap(), 0, out
, sizeof(BracketPair
) * (pair_count
+1));
726 out
[pair_count
].start
= -1;
739 else if (pair_count
> 1)
740 qsort(out
, pair_count
, sizeof(BracketPair
), compr
);
742 heap_free(open_stack
);
743 heap_free(stack_index
);
747 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
749 /*------------------------------------------------------------------------
750 Function: resolveNeutrals
752 Resolves the directionality of neutral character types.
754 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
756 Input: Array of embedding levels
760 In/Out: Array of directional classes
762 Note: On input only these directional classes are expected
763 R, L, NI, AN, EN and BN
765 W8 resolves a number of ENs to L
766 ------------------------------------------------------------------------*/
767 static void resolveNeutrals(IsolatedRun
*iso_run
)
770 BracketPair
*pairs
= NULL
;
772 /* Translate isolates into NI */
773 for (i
= 0; i
< iso_run
->length
; i
++)
775 if (*iso_run
->item
[i
].pcls
>= LRI
)
776 *iso_run
->item
[i
].pcls
= NI
;
778 switch(*iso_run
->item
[i
].pcls
)
782 case WS
: *iso_run
->item
[i
].pcls
= NI
;
785 ASSERT(*iso_run
->item
[i
].pcls
< 5 || *iso_run
->item
[i
].pcls
== BN
); /* "Only NI, L, R, AN, EN and BN are allowed" */
788 /* N0: Skipping bracketed pairs for now */
789 pairs
= computeBracketPairs(iso_run
);
792 BracketPair
*p
= &pairs
[0];
794 while (p
->start
>= 0)
797 int e
= EmbeddingDirection(iso_run
->e
);
798 int o
= EmbeddingDirection(iso_run
->e
+1);
800 TRACE("Bracket Pair [%i - %i]\n",p
->start
, p
->end
);
803 for (j
= p
->start
+1; j
< p
->end
; j
++)
805 if (N0_TYPE(*iso_run
->item
[j
].pcls
) == e
)
807 *iso_run
->item
[p
->start
].pcls
= e
;
808 *iso_run
->item
[p
->end
].pcls
= e
;
811 else if (N0_TYPE(*iso_run
->item
[j
].pcls
) == o
)
815 if (j
== p
->end
&& flag_o
)
817 for (j
= p
->start
; j
>= 0; j
--)
819 if (N0_TYPE(*iso_run
->item
[j
].pcls
) == o
)
821 *iso_run
->item
[p
->start
].pcls
= o
;
822 *iso_run
->item
[p
->end
].pcls
= o
;
825 else if (N0_TYPE(*iso_run
->item
[j
].pcls
) == e
)
827 *iso_run
->item
[p
->start
].pcls
= e
;
828 *iso_run
->item
[p
->end
].pcls
= e
;
834 *iso_run
->item
[p
->start
].pcls
= iso_run
->sos
;
835 *iso_run
->item
[p
->end
].pcls
= iso_run
->sos
;
846 for (i
= 0; i
< iso_run
->length
; i
++)
850 if (*iso_run
->item
[i
].pcls
== NI
)
853 int b
= iso_previousValidChar(iso_run
, i
);
862 if (*iso_run
->item
[b
].pcls
== R
|| *iso_run
->item
[b
].pcls
== AN
|| *iso_run
->item
[b
].pcls
== EN
)
864 else if (*iso_run
->item
[b
].pcls
== L
)
866 else /* No string type */
869 j
= iso_nextValidChar(iso_run
, i
);
870 while (j
> -1 && *iso_run
->item
[j
].pcls
== NI
) j
= iso_nextValidChar(iso_run
, j
);
877 else if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== AN
|| *iso_run
->item
[j
].pcls
== EN
)
879 else if (*iso_run
->item
[j
].pcls
== L
)
881 else /* No string type */
886 for (b
= i
; b
< j
&& b
< iso_run
->length
; b
++)
887 *iso_run
->item
[b
].pcls
= r
;
893 for (i
= 0; i
< iso_run
->length
; i
++)
895 if (*iso_run
->item
[i
].pcls
== NI
)
899 *iso_run
->item
[i
].pcls
= EmbeddingDirection(iso_run
->e
);
900 if (b
> -1 && *iso_run
->item
[b
].pcls
== BN
)
901 *iso_run
->item
[b
].pcls
= EmbeddingDirection(iso_run
->e
);
902 if (f
< iso_run
->length
&& *iso_run
->item
[f
].pcls
== BN
)
903 *iso_run
->item
[f
].pcls
= EmbeddingDirection(iso_run
->e
);
908 /*------------------------------------------------------------------------
909 Function: resolveImplicit
911 Recursively resolves implicit embedding levels.
912 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
914 Input: Array of direction classes
918 In/Out: Array of embedding levels
920 Note: levels may exceed 15 on output.
921 Accepted subset of direction classes
923 ------------------------------------------------------------------------*/
924 static void resolveImplicit(const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
929 for (i
= sos
; i
<= eos
; i
++)
934 ASSERT(pcls
[i
] > 0); /* "No Neutrals allowed to survive here." */
935 ASSERT(pcls
[i
] < 5); /* "Out of range." */
937 if (odd(plevel
[i
]) && (pcls
[i
] == L
|| pcls
[i
] == EN
|| pcls
[i
] == AN
))
939 else if (!odd(plevel
[i
]) && pcls
[i
] == R
)
941 else if (!odd(plevel
[i
]) && (pcls
[i
] == EN
|| pcls
[i
] == AN
))
946 static void resolveResolved(unsigned baselevel
, const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
951 for (i
= sos
; i
<= eos
; i
++)
953 if (pcls
[i
] == B
|| pcls
[i
] == S
)
956 while (i
> sos
&& j
>= sos
&&
957 (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
958 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
959 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
960 plevel
[j
--] = baselevel
;
961 plevel
[i
] = baselevel
;
963 else if (pcls
[i
] == LRE
|| pcls
[i
] == RLE
|| pcls
[i
] == LRO
|| pcls
[i
] == RLO
||
964 pcls
[i
] == PDF
|| pcls
[i
] == BN
)
966 plevel
[i
] = i
? plevel
[i
- 1] : baselevel
;
969 (pcls
[i
] == WS
|| pcls
[i
] == FSI
|| pcls
[i
] == LRI
|| pcls
[i
] == RLI
||
970 pcls
[i
] == PDI
|| pcls
[i
] == LRE
|| pcls
[i
] == RLE
|| pcls
[i
] == LRO
||
971 pcls
[i
] == RLO
|| pcls
[i
] == PDF
|| pcls
[i
] == BN
))
974 while (j
>= sos
&& (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
975 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
976 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
977 plevel
[j
--] = baselevel
;
982 static void computeIsolatingRunsSet(unsigned baselevel
, WORD
*pcls
, const WORD
*pLevel
,
983 const WCHAR
*string
, unsigned int uCount
, struct list
*set
)
985 int run_start
, run_end
, i
;
988 IsolatedRun
*current_isolated
;
990 if (!(runs
= heap_alloc(uCount
* sizeof(*runs
))))
997 while (run_start
< uCount
)
999 run_end
= nextValidChar(pcls
, run_start
, uCount
);
1000 while (run_end
< uCount
&& pLevel
[run_end
] == pLevel
[run_start
]) run_end
= nextValidChar(pcls
, run_end
, uCount
);
1002 runs
[run_count
].start
= run_start
;
1003 runs
[run_count
].end
= run_end
;
1004 runs
[run_count
].e
= pLevel
[run_start
];
1005 run_start
= nextValidChar(pcls
, run_end
, uCount
);
1009 /* Build Isolating Runs */
1011 while (i
< run_count
)
1014 if (runs
[k
].start
>= 0)
1016 int type_fence
, real_end
;
1019 if (!(current_isolated
= heap_alloc(FIELD_OFFSET(IsolatedRun
, item
[uCount
]))))
1022 run_start
= runs
[k
].start
;
1023 current_isolated
->e
= runs
[k
].e
;
1024 current_isolated
->length
= (runs
[k
].end
- runs
[k
].start
)+1;
1026 for (j
= 0; j
< current_isolated
->length
; j
++)
1028 current_isolated
->item
[j
].pcls
= &pcls
[runs
[k
].start
+j
];
1029 current_isolated
->item
[j
].ch
= string
[runs
[k
].start
+ j
];
1032 run_end
= runs
[k
].end
;
1034 TRACE("{ [%i -- %i]",run_start
, run_end
);
1036 if (pcls
[run_end
] == BN
)
1037 run_end
= previousValidChar(pcls
, run_end
, runs
[k
].start
);
1039 while (run_end
< uCount
&& (pcls
[run_end
] == RLI
|| pcls
[run_end
] == LRI
|| pcls
[run_end
] == FSI
))
1043 while (j
< run_count
&& pcls
[runs
[j
].start
] != PDI
) j
++;
1044 if (j
< run_count
&& runs
[i
].e
!= runs
[j
].e
)
1053 int l
= current_isolated
->length
;
1055 current_isolated
->length
+= (runs
[j
].end
- runs
[j
].start
)+1;
1056 for (m
= 0; l
< current_isolated
->length
; l
++, m
++)
1058 current_isolated
->item
[l
].pcls
= &pcls
[runs
[j
].start
+m
];
1059 current_isolated
->item
[l
].ch
= string
[runs
[j
].start
+ m
];
1062 TRACE("[%i -- %i]",runs
[j
].start
, runs
[j
].end
);
1064 run_end
= runs
[j
].end
;
1065 if (pcls
[run_end
] == BN
)
1066 run_end
= previousValidChar(pcls
, run_end
, runs
[i
].start
);
1077 type_fence
= previousValidChar(pcls
, run_start
, -1);
1079 if (type_fence
== -1)
1080 current_isolated
->sos
= (baselevel
> pLevel
[run_start
])?baselevel
:pLevel
[run_start
];
1082 current_isolated
->sos
= (pLevel
[type_fence
] > pLevel
[run_start
])?pLevel
[type_fence
]:pLevel
[run_start
];
1084 current_isolated
->sos
= EmbeddingDirection(current_isolated
->sos
);
1086 if (run_end
== uCount
)
1087 current_isolated
->eos
= current_isolated
->sos
;
1090 /* eos could be an BN */
1091 if ( pcls
[run_end
] == BN
)
1093 real_end
= previousValidChar(pcls
, run_end
, run_start
-1);
1094 if (real_end
< run_start
)
1095 real_end
= run_start
;
1100 type_fence
= nextValidChar(pcls
, run_end
, uCount
);
1101 if (type_fence
== uCount
)
1102 current_isolated
->eos
= (baselevel
> pLevel
[real_end
])?baselevel
:pLevel
[real_end
];
1104 current_isolated
->eos
= (pLevel
[type_fence
] > pLevel
[real_end
])?pLevel
[type_fence
]:pLevel
[real_end
];
1106 current_isolated
->eos
= EmbeddingDirection(current_isolated
->eos
);
1109 list_add_tail(set
, ¤t_isolated
->entry
);
1110 TRACE(" } level %i {%s <--> %s}\n",current_isolated
->e
, debug_type
[current_isolated
->sos
], debug_type
[current_isolated
->eos
]);
1118 /*************************************************************
1119 * BIDI_DeterminLevels
1121 BOOL
BIDI_DetermineLevels(
1122 const WCHAR
*lpString
, /* [in] The string for which information is to be returned */
1123 unsigned int uCount
, /* [in] Number of WCHARs in string. */
1124 const SCRIPT_STATE
*s
,
1125 const SCRIPT_CONTROL
*c
,
1126 WORD
*lpOutLevels
, /* [out] final string levels */
1127 WORD
*lpOutOverrides
/* [out] final string overrides */
1131 unsigned baselevel
= 0;
1132 struct list IsolatingRuns
;
1133 IsolatedRun
*iso_run
, *next
;
1135 TRACE("%s, %d\n", debugstr_wn(lpString
, uCount
), uCount
);
1137 if (!(chartype
= heap_alloc(uCount
* sizeof(*chartype
))))
1139 WARN("Out of memory\n");
1143 baselevel
= s
->uBidiLevel
;
1145 classify(lpString
, chartype
, uCount
, c
);
1146 if (TRACE_ON(bidi
)) dump_types("Start ", chartype
, 0, uCount
);
1148 memset(lpOutOverrides
, 0, sizeof(WORD
) * uCount
);
1150 /* resolve explicit */
1151 resolveExplicit(baselevel
, chartype
, lpOutLevels
, lpOutOverrides
, uCount
, s
->fOverrideDirection
);
1152 if (TRACE_ON(bidi
)) dump_types("After Explicit", chartype
, 0, uCount
);
1154 /* X10/BD13: Computer Isolating runs */
1155 computeIsolatingRunsSet(baselevel
, chartype
, lpOutLevels
, lpString
, uCount
, &IsolatingRuns
);
1157 LIST_FOR_EACH_ENTRY_SAFE(iso_run
, next
, &IsolatingRuns
, IsolatedRun
, entry
)
1159 if (TRACE_ON(bidi
)) iso_dump_types("Run", iso_run
);
1162 resolveWeak(iso_run
);
1163 if (TRACE_ON(bidi
)) iso_dump_types("After Weak", iso_run
);
1165 /* resolve neutrals */
1166 resolveNeutrals(iso_run
);
1167 if (TRACE_ON(bidi
)) iso_dump_types("After Neutrals", iso_run
);
1169 list_remove(&iso_run
->entry
);
1173 if (TRACE_ON(bidi
)) dump_types("Before Implicit", chartype
, 0, uCount
);
1174 /* resolveImplicit */
1175 resolveImplicit(chartype
, lpOutLevels
, 0, uCount
-1);
1177 /* resolveResolvedLevels*/
1178 classify(lpString
, chartype
, uCount
, c
);
1179 resolveResolved(baselevel
, chartype
, lpOutLevels
, 0, uCount
-1);
1181 heap_free(chartype
);
1185 /* reverse cch indexes */
1186 static void reverse(int *pidx
, int cch
)
1190 for (; ich
< --cch
; ich
++)
1193 pidx
[ich
] = pidx
[cch
];
1199 /*------------------------------------------------------------------------
1200 Functions: reorder/reorderLevel
1202 Recursively reorders the display string
1203 "From the highest level down, reverse all characters at that level and
1204 higher, down to the lowest odd level"
1206 Implements rule L2 of the Unicode bidi Algorithm.
1208 Input: Array of embedding levels
1210 Flag enabling reversal (set to false by initial caller)
1212 In/Out: Text to reorder
1214 Note: levels may exceed 15 resp. 61 on input.
1216 Rule L3 - reorder combining marks is not implemented here
1217 Rule L4 - glyph mirroring is implemented as a display option below
1219 Note: this should be applied a line at a time
1220 -------------------------------------------------------------------------*/
1221 int BIDI_ReorderV2lLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1225 /* true as soon as first odd level encountered */
1226 fReverse
= fReverse
|| odd(level
);
1228 for (; ich
< cch
; ich
++)
1230 if (plevel
[ich
] < level
)
1234 else if (plevel
[ich
] > level
)
1236 ich
+= BIDI_ReorderV2lLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1237 cch
- ich
, fReverse
) - 1;
1242 reverse(pIndexs
, ich
);
1247 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1248 int BIDI_ReorderL2vLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1253 /* true as soon as first odd level encountered */
1254 fReverse
= fReverse
|| odd(level
);
1256 for (; ich
< cch
; ich
++)
1258 if (plevel
[ich
] < level
)
1260 else if (plevel
[ich
] > level
)
1265 reverse(pIndexs
, ich
);
1271 for (; ich
< cch
; ich
++)
1272 if (plevel
[ich
] < level
)
1274 else if (plevel
[ich
] > level
)
1275 ich
+= BIDI_ReorderL2vLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1276 cch
- ich
, fReverse
) - 1;
1282 BOOL
BIDI_GetStrengths(const WCHAR
*string
, unsigned int count
, const SCRIPT_CONTROL
*c
, WORD
*strength
)
1286 classify(string
, strength
, count
, c
);
1287 for (i
= 0; i
< count
; i
++)
1289 switch (strength
[i
])
1298 strength
[i
] = BIDI_STRONG
;
1307 strength
[i
] = BIDI_WEAK
;
1313 default: /* Neutrals and NSM */
1314 strength
[i
] = BIDI_NEUTRAL
;