Git conversion: Make reactos the root directory, move rosapps, rostests, wallpapers...
[reactos.git] / dll / win32 / usp10 / indic.c
1 /*
2 * Implementation of Indic Syllables for the Uniscribe Script Processor
3 *
4 * Copyright 2011 CodeWeavers, Aric Stewart
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 *
20 */
21
22 #include "usp10_internal.h"
23
24 WINE_DEFAULT_DEBUG_CHANNEL(uniscribe);
25
26 static void debug_output_string(const WCHAR *str, unsigned int char_count, lexical_function f)
27 {
28 int i;
29 if (TRACE_ON(uniscribe))
30 {
31 for (i = 0; i < char_count; ++i)
32 {
33 switch (f(str[i]))
34 {
35 case lex_Consonant: TRACE("C"); break;
36 case lex_Ra: TRACE("Ra"); break;
37 case lex_Vowel: TRACE("V"); break;
38 case lex_Nukta: TRACE("N"); break;
39 case lex_Halant: TRACE("H"); break;
40 case lex_ZWNJ: TRACE("Zwnj"); break;
41 case lex_ZWJ: TRACE("Zwj"); break;
42 case lex_Matra_post: TRACE("Mp");break;
43 case lex_Matra_above: TRACE("Ma");break;
44 case lex_Matra_below: TRACE("Mb");break;
45 case lex_Matra_pre: TRACE("Mm");break;
46 case lex_Modifier: TRACE("Sm"); break;
47 case lex_Vedic: TRACE("Vd"); break;
48 case lex_Anudatta: TRACE("A"); break;
49 case lex_Composed_Vowel: TRACE("t"); break;
50 default:
51 TRACE("X"); break;
52 }
53 }
54 TRACE("\n");
55 }
56 }
57
58 static inline BOOL is_matra( int type )
59 {
60 return (type == lex_Matra_above || type == lex_Matra_below ||
61 type == lex_Matra_pre || type == lex_Matra_post ||
62 type == lex_Composed_Vowel);
63 }
64
65 static inline BOOL is_joiner( int type )
66 {
67 return (type == lex_ZWJ || type == lex_ZWNJ);
68 }
69
70 static int consonant_header(const WCHAR *input, unsigned int cChar,
71 unsigned int start, unsigned int next, lexical_function lex)
72 {
73 if (!is_consonant( lex(input[next]) )) return -1;
74 next++;
75 if ((next < cChar) && lex(input[next]) == lex_Nukta)
76 next++;
77 if ((next < cChar) && lex(input[next])==lex_Halant)
78 {
79 next++;
80 if((next < cChar) && is_joiner( lex(input[next]) ))
81 next++;
82 if ((next < cChar) && is_consonant( lex(input[next]) ))
83 return next;
84 }
85 else if ((next < cChar) && is_joiner( lex(input[next]) ) && lex(input[next+1])==lex_Halant)
86 {
87 next+=2;
88 if ((next < cChar) && is_consonant( lex(input[next]) ))
89 return next;
90 }
91 return -1;
92 }
93
94 static int parse_consonant_syllable(const WCHAR *input, unsigned int cChar,
95 unsigned int start, unsigned int *main, unsigned int next, lexical_function lex)
96 {
97 int check;
98 int headers = 0;
99 do
100 {
101 check = consonant_header(input,cChar,start,next,lex);
102 if (check != -1)
103 {
104 next = check;
105 headers++;
106 }
107 } while (check != -1);
108 if (headers || is_consonant( lex(input[next]) ))
109 {
110 *main = next;
111 next++;
112 }
113 else
114 return -1;
115 if ((next < cChar) && lex(input[next]) == lex_Nukta)
116 next++;
117 if ((next < cChar) && lex(input[next]) == lex_Anudatta)
118 next++;
119
120 if ((next < cChar) && lex(input[next]) == lex_Halant)
121 {
122 next++;
123 if((next < cChar) && is_joiner( lex(input[next]) ))
124 next++;
125 }
126 else if (next < cChar)
127 {
128 while((next < cChar) && is_matra( lex(input[next]) ))
129 next++;
130 if ((next < cChar) && lex(input[next]) == lex_Nukta)
131 next++;
132 if ((next < cChar) && lex(input[next]) == lex_Halant)
133 next++;
134 }
135 if ((next < cChar) && lex(input[next]) == lex_Modifier)
136 next++;
137 if ((next < cChar) && lex(input[next]) == lex_Vedic)
138 next++;
139 return next;
140 }
141
142 static int parse_vowel_syllable(const WCHAR *input, unsigned int cChar,
143 unsigned int start, unsigned int next, lexical_function lex)
144 {
145 if ((next < cChar) && lex(input[next]) == lex_Nukta)
146 next++;
147 if ((next < cChar) && is_joiner( lex(input[next]) ) && lex(input[next+1])==lex_Halant && is_consonant( lex(input[next+2]) ))
148 next+=3;
149 else if ((next < cChar) && lex(input[next])==lex_Halant && is_consonant( lex(input[next+1]) ))
150 next+=2;
151 else if ((next < cChar) && lex(input[next])==lex_ZWJ && is_consonant( lex(input[next+1]) ))
152 next+=2;
153
154 if ((next < cChar) && is_matra( lex(input[next]) ))
155 {
156 while((next < cChar) && is_matra( lex(input[next]) ))
157 next++;
158 if ((next < cChar) && lex(input[next]) == lex_Nukta)
159 next++;
160 if ((next < cChar) && lex(input[next]) == lex_Halant)
161 next++;
162 }
163
164 if ((next < cChar) && lex(input[next]) == lex_Modifier)
165 next++;
166 if ((next < cChar) && lex(input[next]) == lex_Vedic)
167 next++;
168 return next;
169 }
170
171 static int Indic_process_next_syllable(const WCHAR *input, unsigned int cChar,
172 unsigned int start, unsigned int *main, unsigned int next, lexical_function lex)
173 {
174 if (lex(input[next])==lex_Vowel)
175 {
176 *main = next;
177 return parse_vowel_syllable(input, cChar, start, next+1, lex);
178 }
179 else if ((cChar > next+3) && lex(input[next]) == lex_Ra && lex(input[next+1]) == lex_Halant && lex(input[next+2]) == lex_Vowel)
180 {
181 *main = next+2;
182 return parse_vowel_syllable(input, cChar, start, next+3, lex);
183 }
184
185 else if (start == next && lex(input[next])==lex_NBSP)
186 {
187 *main = next;
188 return parse_vowel_syllable(input, cChar, start, next+1, lex);
189 }
190 else if (start == next && (cChar > next+3) && lex(input[next]) == lex_Ra && lex(input[next+1]) == lex_Halant && lex(input[next+2]) == lex_NBSP)
191 {
192 *main = next+2;
193 return parse_vowel_syllable(input, cChar, start, next+3, lex);
194 }
195
196 return parse_consonant_syllable(input, cChar, start, main, next, lex);
197 }
198
199 static BOOL Consonant_is_post_base_form(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc,
200 const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical, BOOL modern)
201 {
202 if (is_consonant(lexical(pwChar[s->base])) && s->base > s->start && lexical(pwChar[s->base-1]) == lex_Halant)
203 {
204 if (modern)
205 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->base-1], 1, 2, "pstf") > 0);
206 else
207 {
208 WCHAR cc[2];
209 cc[0] = pwChar[s->base];
210 cc[1] = pwChar[s->base-1];
211 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, cc, 1, 2, "pstf") > 0);
212 }
213 }
214 return FALSE;
215 }
216
217 static BOOL Consonant_is_below_base_form(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc,
218 const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical, BOOL modern)
219 {
220 if (is_consonant(lexical(pwChar[s->base])) && s->base > s->start && lexical(pwChar[s->base-1]) == lex_Halant)
221 {
222 if (modern)
223 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->base-1], 1, 2, "blwf") > 0);
224 else
225 {
226 WCHAR cc[2];
227 cc[0] = pwChar[s->base];
228 cc[1] = pwChar[s->base-1];
229 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, cc, 1, 2, "blwf") > 0);
230 }
231 }
232 return FALSE;
233 }
234
235 static BOOL Consonant_is_pre_base_form(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc,
236 const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical, BOOL modern)
237 {
238 if (is_consonant(lexical(pwChar[s->base])) && s->base > s->start && lexical(pwChar[s->base-1]) == lex_Halant)
239 {
240 if (modern)
241 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->base-1], 1, 2, "pref") > 0);
242 else
243 {
244 WCHAR cc[2];
245 cc[0] = pwChar[s->base];
246 cc[1] = pwChar[s->base-1];
247 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, cc, 1, 2, "pref") > 0);
248 }
249 }
250 return FALSE;
251 }
252
253 static BOOL Consonant_is_ralf(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc,
254 const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical)
255 {
256 if ((lexical(pwChar[s->start])==lex_Ra) && s->end > s->start && lexical(pwChar[s->start+1]) == lex_Halant)
257 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->start], 1, 2, "rphf") > 0);
258 return FALSE;
259 }
260
261 static int FindBaseConsonant(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc,
262 const WCHAR *input, IndicSyllable *s, lexical_function lex, BOOL modern)
263 {
264 int i;
265 BOOL blwf = FALSE;
266 BOOL pref = FALSE;
267
268 /* remove ralf from consideration */
269 if (Consonant_is_ralf(hdc, psa, psc, input, s, lex))
270 {
271 s->ralf = s->start;
272 s->start+=2;
273 }
274
275 /* try to find a base consonant */
276 if (!is_consonant( lex(input[s->base]) ))
277 {
278 for (i = s->end; i >= s->start; i--)
279 if (is_consonant( lex(input[i]) ))
280 {
281 s->base = i;
282 break;
283 }
284 }
285
286 while ((blwf = Consonant_is_below_base_form(hdc, psa, psc, input, s, lex, modern)) || Consonant_is_post_base_form(hdc, psa, psc, input, s, lex, modern) || (pref = Consonant_is_pre_base_form(hdc, psa, psc, input, s, lex, modern)))
287 {
288 if (blwf && s->blwf == -1)
289 s->blwf = s->base - 1;
290 if (pref && s->pref == -1)
291 s->pref = s->base - 1;
292
293 for (i = s->base-1; i >= s->start; i--)
294 if (is_consonant( lex(input[i]) ))
295 {
296 s->base = i;
297 break;
298 }
299 }
300
301 if (s->ralf >= 0)
302 s->start = s->ralf;
303
304 if (s->ralf == s->base)
305 s->ralf = -1;
306
307 return s->base;
308 }
309
310 void Indic_ParseSyllables(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc, const WCHAR *input, unsigned int cChar,
311 IndicSyllable **syllables, int *syllable_count, lexical_function lex, BOOL modern)
312 {
313 unsigned int center = 0;
314 int index = 0;
315 int next = 0;
316
317 *syllable_count = 0;
318
319 if (!lex)
320 {
321 ERR("Failure to have required functions\n");
322 return;
323 }
324
325 debug_output_string(input, cChar, lex);
326 while (next != -1)
327 {
328 while((next < cChar) && lex(input[next]) == lex_Generic)
329 next++;
330 index = next;
331 if (next >= cChar)
332 break;
333 next = Indic_process_next_syllable(input, cChar, 0, &center, index, lex);
334 if (next != -1)
335 {
336 if (*syllable_count)
337 *syllables = HeapReAlloc(GetProcessHeap(),0,*syllables, sizeof(IndicSyllable)*(*syllable_count+1));
338 else
339 *syllables = heap_alloc(sizeof(**syllables));
340 (*syllables)[*syllable_count].start = index;
341 (*syllables)[*syllable_count].base = center;
342 (*syllables)[*syllable_count].ralf = -1;
343 (*syllables)[*syllable_count].blwf = -1;
344 (*syllables)[*syllable_count].pref = -1;
345 (*syllables)[*syllable_count].end = next-1;
346 FindBaseConsonant(hdc, psa, psc, input, &(*syllables)[*syllable_count], lex, modern);
347 index = next;
348 *syllable_count = (*syllable_count)+1;
349 }
350 else if (index < cChar)
351 {
352 TRACE("Processing failed at %i\n",index);
353 next = ++index;
354 }
355 }
356 TRACE("Processed %i of %i characters into %i syllables\n",index,cChar,*syllable_count);
357 }
358
359 void Indic_ReorderCharacters(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc, WCHAR *input, unsigned int cChar,
360 IndicSyllable **syllables, int *syllable_count, lexical_function lex, reorder_function reorder_f, BOOL modern)
361 {
362 int i;
363
364 if (!reorder_f)
365 {
366 ERR("Failure to have required functions\n");
367 return;
368 }
369
370 Indic_ParseSyllables(hdc, psa, psc, input, cChar, syllables, syllable_count, lex, modern);
371 for (i = 0; i < *syllable_count; i++)
372 reorder_f(input, &(*syllables)[i], lex);
373 }