2 * Uniscribe BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
46 #include <wine/list.h>
48 #include "usp10_internal.h"
50 extern const unsigned short bidi_bracket_table
[];
52 WINE_DEFAULT_DEBUG_CHANNEL(bidi
);
54 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
57 /* HELPER FUNCTIONS AND DECLARATIONS */
59 /*------------------------------------------------------------------------
60 Bidirectional Character Types
62 as defined by the Unicode Bidirectional Algorithm Table 3-7.
66 The list of bidirectional character types here is not grouped the
67 same way as the table 3-7, since the numberic values for the types
68 are chosen to keep the state and action tables compact.
69 ------------------------------------------------------------------------*/
73 /* ON MUST be zero, code relies on ON = NI = 0 */
74 ON
= 0, /* Other Neutral */
77 AN
, /* Arabic Number */
78 EN
, /* European Number */
79 AL
, /* Arabic Letter (Right-to-left) */
80 NSM
, /* Non-spacing Mark */
81 CS
, /* Common Separator */
82 ES
, /* European Separator */
83 ET
, /* European Terminator (post/prefix e.g. $ and %) */
86 BN
, /* Boundary neutral (type of RLE etc after explicit levels) */
89 S
, /* Segment Separator (TAB) // used only in L1 */
90 WS
, /* White space // used only in L1 */
91 B
, /* Paragraph Separator (aka as PS) */
93 /* types for explicit controls */
94 RLO
, /* these are used only in X1-X9 */
100 LRI
, /* Isolate formatting characters new with 6.3 */
105 /* resolved types, also resolved directions */
106 NI
= ON
, /* alias, where ON, WS, S and Isolates are treated the same */
109 static const char debug_type
[][4] =
111 "ON", /* Other Neutral */
112 "L", /* Left Letter */
113 "R", /* Right Letter */
114 "AN", /* Arabic Number */
115 "EN", /* European Number */
116 "AL", /* Arabic Letter (Right-to-left) */
117 "NSM", /* Non-spacing Mark */
118 "CS", /* Common Separator */
119 "ES", /* European Separator */
120 "ET", /* European Terminator (post/prefix e.g. $ and %) */
121 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
122 "S", /* Segment Separator (TAB) // used only in L1 */
123 "WS", /* White space // used only in L1 */
124 "B", /* Paragraph Separator (aka as PS) */
125 "RLO", /* these are used only in X1-X9 */
130 "LRI", /* Isolate formatting characters new with 6.3 */
136 /* HELPER FUNCTIONS */
138 static inline void dump_types(const char* header
, WORD
*types
, int start
, int end
)
142 for (i
= start
; i
< end
&& len
< 200; i
++)
144 TRACE(" %s",debug_type
[types
[i
]]);
145 len
+= strlen(debug_type
[types
[i
]])+1;
152 /* Convert the libwine information to the direction enum */
153 static void classify(LPCWSTR lpString
, WORD
*chartype
, DWORD uCount
, const SCRIPT_CONTROL
*c
)
155 static const enum directions dir_map
[16] =
157 L
, /* unassigned defaults to L */
172 PDF
/* also LRE, LRO, RLE, RLO */
177 for (i
= 0; i
< uCount
; ++i
)
179 chartype
[i
] = dir_map
[get_char_typeW(lpString
[i
]) >> 12];
183 if (!c
->fLegacyBidiClass
) break;
187 case '+': chartype
[i
] = NI
; break;
188 case '/': chartype
[i
] = CS
; break;
194 case 0x202A: chartype
[i
] = LRE
; break;
195 case 0x202B: chartype
[i
] = RLE
; break;
196 case 0x202C: chartype
[i
] = PDF
; break;
197 case 0x202D: chartype
[i
] = LRO
; break;
198 case 0x202E: chartype
[i
] = RLO
; break;
199 case 0x2066: chartype
[i
] = LRI
; break;
200 case 0x2067: chartype
[i
] = RLI
; break;
201 case 0x2068: chartype
[i
] = FSI
; break;
202 case 0x2069: chartype
[i
] = PDI
; break;
209 /* RESOLVE EXPLICIT */
211 static WORD
GreaterEven(int i
)
213 return odd(i
) ? i
+ 1 : i
+ 2;
216 static WORD
GreaterOdd(int i
)
218 return odd(i
) ? i
+ 2 : i
+ 1;
221 static WORD
EmbeddingDirection(int level
)
223 return odd(level
) ? R
: L
;
226 /*------------------------------------------------------------------------
227 Function: resolveExplicit
229 Recursively resolves explicit embedding levels and overrides.
230 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
232 Input: Base embedding level and direction
235 Output: Array of embedding levels
237 In/Out: Array of direction classes
240 Note: The function uses two simple counters to keep track of
241 matching explicit codes and PDF. Use the default argument for
242 the outermost call. The nesting counter counts the recursion
243 depth and not the embedding level.
244 ------------------------------------------------------------------------*/
245 typedef struct tagStackItem
{
251 #define push_stack(l,o,i) \
253 stack[stack_top].level = l; \
254 stack[stack_top].override = o; \
255 stack[stack_top].isolate = i;} while(0)
257 #define pop_stack() do { stack_top++; } while(0)
259 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
261 static void resolveExplicit(int level
, WORD
*pclass
, WORD
*poutLevel
, WORD
*poutOverrides
, int count
, BOOL initialOverride
)
264 int overflow_isolate_count
= 0;
265 int overflow_embedding_count
= 0;
266 int valid_isolate_count
= 0;
269 StackItem stack
[MAX_DEPTH
+2];
270 int stack_top
= MAX_DEPTH
+1;
272 stack
[stack_top
].level
= level
;
273 stack
[stack_top
].override
= NI
;
274 stack
[stack_top
].isolate
= FALSE
;
279 push_stack(level
, R
, FALSE
);
281 push_stack(level
, L
, FALSE
);
284 for (i
= 0; i
< count
; i
++)
286 poutOverrides
[i
] = stack
[stack_top
].override
;
289 if (pclass
[i
] == RLE
)
291 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
292 poutLevel
[i
] = stack
[stack_top
].level
;
293 if (valid_level(least_odd
))
294 push_stack(least_odd
, NI
, FALSE
);
295 else if (overflow_isolate_count
== 0)
296 overflow_embedding_count
++;
299 else if (pclass
[i
] == LRE
)
301 int least_even
= GreaterEven(stack
[stack_top
].level
);
302 poutLevel
[i
] = stack
[stack_top
].level
;
303 if (valid_level(least_even
))
304 push_stack(least_even
, NI
, FALSE
);
305 else if (overflow_isolate_count
== 0)
306 overflow_embedding_count
++;
309 else if (pclass
[i
] == RLO
)
311 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
312 poutLevel
[i
] = stack
[stack_top
].level
;
313 if (valid_level(least_odd
))
314 push_stack(least_odd
, R
, FALSE
);
315 else if (overflow_isolate_count
== 0)
316 overflow_embedding_count
++;
319 else if (pclass
[i
] == LRO
)
321 int least_even
= GreaterEven(stack
[stack_top
].level
);
322 poutLevel
[i
] = stack
[stack_top
].level
;
323 if (valid_level(least_even
))
324 push_stack(least_even
, L
, FALSE
);
325 else if (overflow_isolate_count
== 0)
326 overflow_embedding_count
++;
329 else if (pclass
[i
] == RLI
)
331 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
332 poutLevel
[i
] = stack
[stack_top
].level
;
333 if (valid_level(least_odd
))
335 valid_isolate_count
++;
336 push_stack(least_odd
, NI
, TRUE
);
339 overflow_isolate_count
++;
342 else if (pclass
[i
] == LRI
)
344 int least_even
= GreaterEven(stack
[stack_top
].level
);
345 poutLevel
[i
] = stack
[stack_top
].level
;
346 if (valid_level(least_even
))
348 valid_isolate_count
++;
349 push_stack(least_even
, NI
, TRUE
);
352 overflow_isolate_count
++;
355 else if (pclass
[i
] == FSI
)
360 poutLevel
[i
] = stack
[stack_top
].level
;
361 for (j
= i
+1; j
< count
; j
++)
363 if (pclass
[j
] == LRI
|| pclass
[j
] == RLI
|| pclass
[j
] == FSI
)
368 else if (pclass
[j
] == PDI
)
377 if (skipping
) continue;
384 else if (pclass
[j
] == R
|| pclass
[j
] == AL
)
392 int least_odd
= GreaterOdd(stack
[stack_top
].level
);
393 if (valid_level(least_odd
))
395 valid_isolate_count
++;
396 push_stack(least_odd
, NI
, TRUE
);
399 overflow_isolate_count
++;
403 int least_even
= GreaterEven(stack
[stack_top
].level
);
404 if (valid_level(least_even
))
406 valid_isolate_count
++;
407 push_stack(least_even
, NI
, TRUE
);
410 overflow_isolate_count
++;
414 else if (pclass
[i
] != B
&& pclass
[i
] != BN
&& pclass
[i
] != PDI
&& pclass
[i
] != PDF
)
416 poutLevel
[i
] = stack
[stack_top
].level
;
417 if (stack
[stack_top
].override
!= NI
)
418 pclass
[i
] = stack
[stack_top
].override
;
421 else if (pclass
[i
] == PDI
)
423 if (overflow_isolate_count
) overflow_isolate_count
--;
424 else if (!valid_isolate_count
) {/* do nothing */}
427 overflow_embedding_count
= 0;
428 while (!stack
[stack_top
].isolate
) pop_stack();
430 valid_isolate_count
--;
432 poutLevel
[i
] = stack
[stack_top
].level
;
435 else if (pclass
[i
] == PDF
)
437 poutLevel
[i
] = stack
[stack_top
].level
;
438 if (overflow_isolate_count
) {/* do nothing */}
439 else if (overflow_embedding_count
) overflow_embedding_count
--;
440 else if (!stack
[stack_top
].isolate
&& stack_top
< (MAX_DEPTH
+1))
445 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
446 for (i
= 0; i
< count
; i
++)
447 if (pclass
[i
] == RLE
|| pclass
[i
] == LRE
|| pclass
[i
] == RLO
|| pclass
[i
] == LRO
|| pclass
[i
] == PDF
)
451 static inline int previousValidChar(const WORD
*pcls
, int index
, int back_fence
)
453 if (index
== -1 || index
== back_fence
) return index
;
455 while (index
> back_fence
&& pcls
[index
] == BN
) index
--;
459 static inline int nextValidChar(const WORD
*pcls
, int index
, int front_fence
)
461 if (index
== front_fence
) return index
;
463 while (index
< front_fence
&& pcls
[index
] == BN
) index
++;
467 typedef struct tagRun
474 typedef struct tagRunChar
480 typedef struct tagIsolatedRun
491 static inline int iso_nextValidChar(IsolatedRun
*iso_run
, int index
)
493 if (index
>= (iso_run
->length
-1)) return -1;
495 while (index
< iso_run
->length
&& *iso_run
->item
[index
].pcls
== BN
) index
++;
496 if (index
== iso_run
->length
) return -1;
500 static inline int iso_previousValidChar(IsolatedRun
*iso_run
, int index
)
503 if (index
<= 0) return -1;
505 while (index
> -1 && *iso_run
->item
[index
].pcls
== BN
) index
--;
509 static inline void iso_dump_types(const char* header
, IsolatedRun
*iso_run
)
514 for (i
= 0; i
< iso_run
->length
&& len
< 200; i
++)
516 TRACE(" %s",debug_type
[*iso_run
->item
[i
].pcls
]);
517 len
+= strlen(debug_type
[*iso_run
->item
[i
].pcls
])+1;
519 if (i
!= iso_run
->length
)
524 /*------------------------------------------------------------------------
525 Function: resolveWeak
527 Resolves the directionality of numeric and other weak character types
529 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
531 Input: Array of embedding levels
534 In/Out: Array of directional classes
536 Note: On input only these directional classes are expected
537 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
538 ------------------------------------------------------------------------*/
540 static void resolveWeak(IsolatedRun
* iso_run
)
545 for (i
=0; i
< iso_run
->length
; i
++)
547 if (*iso_run
->item
[i
].pcls
== NSM
)
549 int j
= iso_previousValidChar(iso_run
, i
);
551 *iso_run
->item
[i
].pcls
= iso_run
->sos
;
552 else if (*iso_run
->item
[j
].pcls
>= LRI
)
553 *iso_run
->item
[i
].pcls
= ON
;
555 *iso_run
->item
[i
].pcls
= *iso_run
->item
[j
].pcls
;
560 for (i
= 0; i
< iso_run
->length
; i
++)
562 if (*iso_run
->item
[i
].pcls
== EN
)
564 int j
= iso_previousValidChar(iso_run
, i
);
567 if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== L
|| *iso_run
->item
[j
].pcls
== AL
)
569 if (*iso_run
->item
[j
].pcls
== AL
)
570 *iso_run
->item
[i
].pcls
= AN
;
573 j
= iso_previousValidChar(iso_run
, j
);
579 for (i
= 0; i
< iso_run
->length
; i
++)
581 if (*iso_run
->item
[i
].pcls
== AL
)
582 *iso_run
->item
[i
].pcls
= R
;
586 for (i
= 0; i
< iso_run
->length
; i
++)
588 if (*iso_run
->item
[i
].pcls
== ES
)
590 int b
= iso_previousValidChar(iso_run
, i
);
591 int f
= iso_nextValidChar(iso_run
, i
);
593 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== EN
&& *iso_run
->item
[f
].pcls
== EN
)
594 *iso_run
->item
[i
].pcls
= EN
;
596 else if (*iso_run
->item
[i
].pcls
== CS
)
598 int b
= iso_previousValidChar(iso_run
, i
);
599 int f
= iso_nextValidChar(iso_run
, i
);
601 if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== EN
&& *iso_run
->item
[f
].pcls
== EN
)
602 *iso_run
->item
[i
].pcls
= EN
;
603 else if (b
> -1 && f
> -1 && *iso_run
->item
[b
].pcls
== AN
&& *iso_run
->item
[f
].pcls
== AN
)
604 *iso_run
->item
[i
].pcls
= AN
;
609 for (i
= 0; i
< iso_run
->length
; i
++)
611 if (*iso_run
->item
[i
].pcls
== ET
)
614 for (j
= i
-1 ; j
> -1; j
--)
616 if (*iso_run
->item
[j
].pcls
== BN
) continue;
617 if (*iso_run
->item
[j
].pcls
== ET
) continue;
618 else if (*iso_run
->item
[j
].pcls
== EN
) *iso_run
->item
[i
].pcls
= EN
;
621 if (*iso_run
->item
[i
].pcls
== ET
)
623 for (j
= i
+1; j
< iso_run
->length
; j
++)
625 if (*iso_run
->item
[j
].pcls
== BN
) continue;
626 if (*iso_run
->item
[j
].pcls
== ET
) continue;
627 else if (*iso_run
->item
[j
].pcls
== EN
) *iso_run
->item
[i
].pcls
= EN
;
635 for (i
= 0; i
< iso_run
->length
; i
++)
637 if (*iso_run
->item
[i
].pcls
== ET
|| *iso_run
->item
[i
].pcls
== ES
|| *iso_run
->item
[i
].pcls
== CS
|| *iso_run
->item
[i
].pcls
== ON
)
641 if (b
> -1 && *iso_run
->item
[b
].pcls
== BN
)
642 *iso_run
->item
[b
].pcls
= ON
;
643 if (f
< iso_run
->length
&& *iso_run
->item
[f
].pcls
== BN
)
644 *iso_run
->item
[f
].pcls
= ON
;
646 *iso_run
->item
[i
].pcls
= ON
;
651 for (i
= 0; i
< iso_run
->length
; i
++)
653 if (*iso_run
->item
[i
].pcls
== EN
)
656 for (j
= iso_previousValidChar(iso_run
, i
); j
> -1; j
= iso_previousValidChar(iso_run
, j
))
657 if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== L
)
659 if (*iso_run
->item
[j
].pcls
== L
)
660 *iso_run
->item
[i
].pcls
= L
;
663 if (iso_run
->sos
== L
&& j
== -1)
664 *iso_run
->item
[i
].pcls
= L
;
669 typedef struct tagBracketPair
675 static int compr(const void *a
, const void* b
)
677 return ((BracketPair
*)a
)->start
- ((BracketPair
*)b
)->start
;
680 static BracketPair
*computeBracketPairs(IsolatedRun
*iso_run
)
684 int stack_top
= iso_run
->length
;
685 BracketPair
*out
= NULL
;
689 open_stack
= HeapAlloc(GetProcessHeap(), 0, sizeof(WCHAR
) * iso_run
->length
);
690 stack_index
= HeapAlloc(GetProcessHeap(), 0, sizeof(int) * iso_run
->length
);
692 for (i
= 0; i
< iso_run
->length
; i
++)
694 unsigned short ubv
= get_table_entry(bidi_bracket_table
, iso_run
->item
[i
].ch
);
699 out
= HeapAlloc(GetProcessHeap(),0,sizeof(BracketPair
));
706 open_stack
[stack_top
] = iso_run
->item
[i
].ch
+ (signed char)(ubv
& 0xff);
707 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
708 if (open_stack
[stack_top
] == 0x232A)
709 open_stack
[stack_top
] = 0x3009;
710 stack_index
[stack_top
] = i
;
712 else if ((ubv
>> 8) == 1)
715 if (stack_top
== iso_run
->length
) continue;
716 for (j
= stack_top
; j
< iso_run
->length
; j
++)
718 WCHAR c
= iso_run
->item
[i
].ch
;
719 if (c
== 0x232A) c
= 0x3009;
720 if (c
== open_stack
[j
])
722 out
[pair_count
].start
= stack_index
[j
];
723 out
[pair_count
].end
= i
;
725 out
= HeapReAlloc(GetProcessHeap(), 0, out
, sizeof(BracketPair
) * (pair_count
+1));
726 out
[pair_count
].start
= -1;
736 HeapFree(GetProcessHeap(),0,out
);
739 else if (pair_count
> 1)
740 qsort(out
, pair_count
, sizeof(BracketPair
), compr
);
742 HeapFree(GetProcessHeap(), 0, open_stack
);
743 HeapFree(GetProcessHeap(), 0, stack_index
);
747 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
749 /*------------------------------------------------------------------------
750 Function: resolveNeutrals
752 Resolves the directionality of neutral character types.
754 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
756 Input: Array of embedding levels
760 In/Out: Array of directional classes
762 Note: On input only these directional classes are expected
763 R, L, NI, AN, EN and BN
765 W8 resolves a number of ENs to L
766 ------------------------------------------------------------------------*/
767 static void resolveNeutrals(IsolatedRun
*iso_run
)
770 BracketPair
*pairs
= NULL
;
772 /* Translate isolates into NI */
773 for (i
= 0; i
< iso_run
->length
; i
++)
775 if (*iso_run
->item
[i
].pcls
>= LRI
)
776 *iso_run
->item
[i
].pcls
= NI
;
778 switch(*iso_run
->item
[i
].pcls
)
782 case WS
: *iso_run
->item
[i
].pcls
= NI
;
785 ASSERT(*iso_run
->item
[i
].pcls
< 5 || *iso_run
->item
[i
].pcls
== BN
); /* "Only NI, L, R, AN, EN and BN are allowed" */
788 /* N0: Skipping bracketed pairs for now */
789 pairs
= computeBracketPairs(iso_run
);
792 BracketPair
*p
= &pairs
[0];
794 while (p
->start
>= 0)
797 int e
= EmbeddingDirection(iso_run
->e
);
798 int o
= EmbeddingDirection(iso_run
->e
+1);
800 TRACE("Bracket Pair [%i - %i]\n",p
->start
, p
->end
);
803 for (j
= p
->start
+1; j
< p
->end
; j
++)
805 if (N0_TYPE(*iso_run
->item
[j
].pcls
) == e
)
807 *iso_run
->item
[p
->start
].pcls
= e
;
808 *iso_run
->item
[p
->end
].pcls
= e
;
811 else if (N0_TYPE(*iso_run
->item
[j
].pcls
) == o
)
815 if (j
== p
->end
&& flag_o
)
817 for (j
= p
->start
; j
>= 0; j
--)
819 if (N0_TYPE(*iso_run
->item
[j
].pcls
) == o
)
821 *iso_run
->item
[p
->start
].pcls
= o
;
822 *iso_run
->item
[p
->end
].pcls
= o
;
825 else if (N0_TYPE(*iso_run
->item
[j
].pcls
) == e
)
827 *iso_run
->item
[p
->start
].pcls
= e
;
828 *iso_run
->item
[p
->end
].pcls
= e
;
834 *iso_run
->item
[p
->start
].pcls
= iso_run
->sos
;
835 *iso_run
->item
[p
->end
].pcls
= iso_run
->sos
;
842 HeapFree(GetProcessHeap(),0,pairs
);
846 for (i
= 0; i
< iso_run
->length
; i
++)
850 if (*iso_run
->item
[i
].pcls
== NI
)
853 int b
= iso_previousValidChar(iso_run
, i
);
862 if (*iso_run
->item
[b
].pcls
== R
|| *iso_run
->item
[b
].pcls
== AN
|| *iso_run
->item
[b
].pcls
== EN
)
864 else if (*iso_run
->item
[b
].pcls
== L
)
866 else /* No string type */
869 j
= iso_nextValidChar(iso_run
, i
);
870 while (j
> -1 && *iso_run
->item
[j
].pcls
== NI
) j
= iso_nextValidChar(iso_run
, j
);
877 else if (*iso_run
->item
[j
].pcls
== R
|| *iso_run
->item
[j
].pcls
== AN
|| *iso_run
->item
[j
].pcls
== EN
)
879 else if (*iso_run
->item
[j
].pcls
== L
)
881 else /* No string type */
886 for (b
= i
; b
< j
&& b
< iso_run
->length
; b
++)
887 *iso_run
->item
[b
].pcls
= r
;
893 for (i
= 0; i
< iso_run
->length
; i
++)
895 if (*iso_run
->item
[i
].pcls
== NI
)
899 *iso_run
->item
[i
].pcls
= EmbeddingDirection(iso_run
->e
);
900 if (b
> -1 && *iso_run
->item
[b
].pcls
== BN
)
901 *iso_run
->item
[b
].pcls
= EmbeddingDirection(iso_run
->e
);
902 if (f
< iso_run
->length
&& *iso_run
->item
[f
].pcls
== BN
)
903 *iso_run
->item
[f
].pcls
= EmbeddingDirection(iso_run
->e
);
908 /*------------------------------------------------------------------------
909 Function: resolveImplicit
911 Recursively resolves implicit embedding levels.
912 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
914 Input: Array of direction classes
918 In/Out: Array of embedding levels
920 Note: levels may exceed 15 on output.
921 Accepted subset of direction classes
923 ------------------------------------------------------------------------*/
924 static void resolveImplicit(const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
929 for (i
= sos
; i
<= eos
; i
++)
934 ASSERT(pcls
[i
] > 0); /* "No Neutrals allowed to survive here." */
935 ASSERT(pcls
[i
] < 5); /* "Out of range." */
937 if (odd(plevel
[i
]) && (pcls
[i
] == L
|| pcls
[i
] == EN
|| pcls
[i
] == AN
))
939 else if (!odd(plevel
[i
]) && pcls
[i
] == R
)
941 else if (!odd(plevel
[i
]) && (pcls
[i
] == EN
|| pcls
[i
] == AN
))
946 static void resolveResolved(unsigned baselevel
, const WORD
* pcls
, WORD
*plevel
, int sos
, int eos
)
951 for (i
= sos
; i
<= eos
; i
++)
953 if (pcls
[i
] == B
|| pcls
[i
] == S
)
956 while (i
> sos
&& j
>= sos
&&
957 (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
958 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
959 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
960 plevel
[j
--] = baselevel
;
961 plevel
[i
] = baselevel
;
964 (pcls
[i
] == WS
|| pcls
[i
] == FSI
|| pcls
[i
] == LRI
|| pcls
[i
] == RLI
||
965 pcls
[i
] == PDI
|| pcls
[i
] == LRE
|| pcls
[i
] == RLE
|| pcls
[i
] == LRO
||
966 pcls
[i
] == RLO
|| pcls
[i
] == PDF
|| pcls
[i
] == BN
))
969 while (j
>= sos
&& (pcls
[j
] == WS
|| pcls
[j
] == FSI
|| pcls
[j
] == LRI
|| pcls
[j
] == RLI
||
970 pcls
[j
] == PDI
|| pcls
[j
] == LRE
|| pcls
[j
] == RLE
|| pcls
[j
] == LRO
||
971 pcls
[j
] == RLO
|| pcls
[j
] == PDF
|| pcls
[j
] == BN
))
972 plevel
[j
--] = baselevel
;
977 static void computeIsolatingRunsSet(unsigned baselevel
, WORD
*pcls
, WORD
*pLevel
, LPCWSTR lpString
, int uCount
, struct list
*set
)
979 int run_start
, run_end
, i
;
982 IsolatedRun
*current_isolated
;
984 runs
= HeapAlloc(GetProcessHeap(), 0, uCount
* sizeof(Run
));
991 while (run_start
< uCount
)
993 run_end
= nextValidChar(pcls
, run_start
, uCount
);
994 while (run_end
< uCount
&& pLevel
[run_end
] == pLevel
[run_start
]) run_end
= nextValidChar(pcls
, run_end
, uCount
);
996 runs
[run_count
].start
= run_start
;
997 runs
[run_count
].end
= run_end
;
998 runs
[run_count
].e
= pLevel
[run_start
];
999 run_start
= nextValidChar(pcls
, run_end
, uCount
);
1003 /* Build Isolating Runs */
1005 while (i
< run_count
)
1008 if (runs
[k
].start
>= 0)
1010 int type_fence
, real_end
;
1012 current_isolated
= HeapAlloc(GetProcessHeap(), 0, sizeof(IsolatedRun
) + sizeof(RunChar
)*uCount
);
1013 if (!current_isolated
) break;
1015 run_start
= runs
[k
].start
;
1016 current_isolated
->e
= runs
[k
].e
;
1017 current_isolated
->length
= (runs
[k
].end
- runs
[k
].start
)+1;
1019 for (j
= 0; j
< current_isolated
->length
; j
++)
1021 current_isolated
->item
[j
].pcls
= &pcls
[runs
[k
].start
+j
];
1022 current_isolated
->item
[j
].ch
= lpString
[runs
[k
].start
+j
];
1025 run_end
= runs
[k
].end
;
1027 TRACE("{ [%i -- %i]",run_start
, run_end
);
1029 if (pcls
[run_end
] == BN
)
1030 run_end
= previousValidChar(pcls
, run_end
, runs
[k
].start
);
1032 while (run_end
< uCount
&& (pcls
[run_end
] == RLI
|| pcls
[run_end
] == LRI
|| pcls
[run_end
] == FSI
))
1036 while (j
< run_count
&& pcls
[runs
[j
].start
] != PDI
) j
++;
1037 if (j
< run_count
&& runs
[i
].e
!= runs
[j
].e
)
1046 int l
= current_isolated
->length
;
1048 current_isolated
->length
+= (runs
[j
].end
- runs
[j
].start
)+1;
1049 for (m
= 0; l
< current_isolated
->length
; l
++, m
++)
1051 current_isolated
->item
[l
].pcls
= &pcls
[runs
[j
].start
+m
];
1052 current_isolated
->item
[l
].ch
= lpString
[runs
[j
].start
+m
];
1055 TRACE("[%i -- %i]",runs
[j
].start
, runs
[j
].end
);
1057 run_end
= runs
[j
].end
;
1058 if (pcls
[run_end
] == BN
)
1059 run_end
= previousValidChar(pcls
, run_end
, runs
[i
].start
);
1070 type_fence
= previousValidChar(pcls
, run_start
, -1);
1072 if (type_fence
== -1)
1073 current_isolated
->sos
= (baselevel
> pLevel
[run_start
])?baselevel
:pLevel
[run_start
];
1075 current_isolated
->sos
= (pLevel
[type_fence
] > pLevel
[run_start
])?pLevel
[type_fence
]:pLevel
[run_start
];
1077 current_isolated
->sos
= EmbeddingDirection(current_isolated
->sos
);
1079 if (run_end
== uCount
)
1080 current_isolated
->eos
= current_isolated
->sos
;
1083 /* eos could be an BN */
1084 if ( pcls
[run_end
] == BN
)
1086 real_end
= previousValidChar(pcls
, run_end
, run_start
-1);
1087 if (real_end
< run_start
)
1088 real_end
= run_start
;
1093 type_fence
= nextValidChar(pcls
, run_end
, uCount
);
1094 if (type_fence
== uCount
)
1095 current_isolated
->eos
= (baselevel
> pLevel
[real_end
])?baselevel
:pLevel
[real_end
];
1097 current_isolated
->eos
= (pLevel
[type_fence
] > pLevel
[real_end
])?pLevel
[type_fence
]:pLevel
[real_end
];
1099 current_isolated
->eos
= EmbeddingDirection(current_isolated
->eos
);
1102 list_add_tail(set
, ¤t_isolated
->entry
);
1103 TRACE(" } level %i {%s <--> %s}\n",current_isolated
->e
, debug_type
[current_isolated
->sos
], debug_type
[current_isolated
->eos
]);
1108 HeapFree(GetProcessHeap(), 0, runs
);
1111 /*************************************************************
1112 * BIDI_DeterminLevels
1114 BOOL
BIDI_DetermineLevels(
1115 LPCWSTR lpString
, /* [in] The string for which information is to be returned */
1116 INT uCount
, /* [in] Number of WCHARs in string. */
1117 const SCRIPT_STATE
*s
,
1118 const SCRIPT_CONTROL
*c
,
1119 WORD
*lpOutLevels
, /* [out] final string levels */
1120 WORD
*lpOutOverrides
/* [out] final string overrides */
1124 unsigned baselevel
= 0;
1125 struct list IsolatingRuns
;
1126 IsolatedRun
*iso_run
, *next
;
1128 TRACE("%s, %d\n", debugstr_wn(lpString
, uCount
), uCount
);
1130 chartype
= HeapAlloc(GetProcessHeap(), 0, uCount
* sizeof(WORD
));
1133 WARN("Out of memory\n");
1137 baselevel
= s
->uBidiLevel
;
1139 classify(lpString
, chartype
, uCount
, c
);
1140 if (TRACE_ON(bidi
)) dump_types("Start ", chartype
, 0, uCount
);
1142 memset(lpOutOverrides
, 0, sizeof(WORD
) * uCount
);
1144 /* resolve explicit */
1145 resolveExplicit(baselevel
, chartype
, lpOutLevels
, lpOutOverrides
, uCount
, s
->fOverrideDirection
);
1146 if (TRACE_ON(bidi
)) dump_types("After Explicit", chartype
, 0, uCount
);
1148 /* X10/BD13: Computer Isolating runs */
1149 computeIsolatingRunsSet(baselevel
, chartype
, lpOutLevels
, lpString
, uCount
, &IsolatingRuns
);
1151 LIST_FOR_EACH_ENTRY_SAFE(iso_run
, next
, &IsolatingRuns
, IsolatedRun
, entry
)
1153 if (TRACE_ON(bidi
)) iso_dump_types("Run", iso_run
);
1156 resolveWeak(iso_run
);
1157 if (TRACE_ON(bidi
)) iso_dump_types("After Weak", iso_run
);
1159 /* resolve neutrals */
1160 resolveNeutrals(iso_run
);
1161 if (TRACE_ON(bidi
)) iso_dump_types("After Neutrals", iso_run
);
1163 list_remove(&iso_run
->entry
);
1164 HeapFree(GetProcessHeap(),0,iso_run
);
1167 if (TRACE_ON(bidi
)) dump_types("Before Implicit", chartype
, 0, uCount
);
1168 /* resolveImplicit */
1169 resolveImplicit(chartype
, lpOutLevels
, 0, uCount
-1);
1171 /* resolveResolvedLevels*/
1172 classify(lpString
, chartype
, uCount
, c
);
1173 resolveResolved(baselevel
, chartype
, lpOutLevels
, 0, uCount
-1);
1175 HeapFree(GetProcessHeap(), 0, chartype
);
1179 /* reverse cch indexes */
1180 static void reverse(int *pidx
, int cch
)
1184 for (; ich
< --cch
; ich
++)
1187 pidx
[ich
] = pidx
[cch
];
1193 /*------------------------------------------------------------------------
1194 Functions: reorder/reorderLevel
1196 Recursively reorders the display string
1197 "From the highest level down, reverse all characters at that level and
1198 higher, down to the lowest odd level"
1200 Implements rule L2 of the Unicode bidi Algorithm.
1202 Input: Array of embedding levels
1204 Flag enabling reversal (set to false by initial caller)
1206 In/Out: Text to reorder
1208 Note: levels may exceed 15 resp. 61 on input.
1210 Rule L3 - reorder combining marks is not implemented here
1211 Rule L4 - glyph mirroring is implemented as a display option below
1213 Note: this should be applied a line at a time
1214 -------------------------------------------------------------------------*/
1215 int BIDI_ReorderV2lLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1219 /* true as soon as first odd level encountered */
1220 fReverse
= fReverse
|| odd(level
);
1222 for (; ich
< cch
; ich
++)
1224 if (plevel
[ich
] < level
)
1228 else if (plevel
[ich
] > level
)
1230 ich
+= BIDI_ReorderV2lLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1231 cch
- ich
, fReverse
) - 1;
1236 reverse(pIndexs
, ich
);
1241 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1242 int BIDI_ReorderL2vLevel(int level
, int *pIndexs
, const BYTE
* plevel
, int cch
, BOOL fReverse
)
1247 /* true as soon as first odd level encountered */
1248 fReverse
= fReverse
|| odd(level
);
1250 for (; ich
< cch
; ich
++)
1252 if (plevel
[ich
] < level
)
1254 else if (plevel
[ich
] > level
)
1259 reverse(pIndexs
, ich
);
1265 for (; ich
< cch
; ich
++)
1266 if (plevel
[ich
] < level
)
1268 else if (plevel
[ich
] > level
)
1269 ich
+= BIDI_ReorderL2vLevel(level
+ 1, pIndexs
+ ich
, plevel
+ ich
,
1270 cch
- ich
, fReverse
) - 1;
1276 BOOL
BIDI_GetStrengths(LPCWSTR lpString
, INT uCount
, const SCRIPT_CONTROL
*c
,
1280 classify(lpString
, lpStrength
, uCount
, c
);
1282 for ( i
= 0; i
< uCount
; i
++)
1284 switch(lpStrength
[i
])
1293 lpStrength
[i
] = BIDI_STRONG
;
1302 lpStrength
[i
] = BIDI_WEAK
;
1308 default: /* Neutrals and NSM */
1309 lpStrength
[i
] = BIDI_NEUTRAL
;