4a1bf9538efc7732919461519345e6162f0ba45a
[reactos.git] / reactos / dll / win32 / usp10 / indic.c
1 /*
2 * Implementation of Indic Syllables for the Uniscribe Script Processor
3 *
4 * Copyright 2011 CodeWeavers, Aric Stewart
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 *
20 */
21 #include <config.h>
22 #include <stdarg.h>
23 //#include <stdio.h>
24 //#include <stdlib.h>
25
26 #include <windef.h>
27 #include <winbase.h>
28 //#include "winuser.h"
29 #include <wingdi.h>
30 //#include "winnls.h"
31 #include <usp10.h>
32 //#include "winternl.h"
33
34 #include <wine/debug.h>
35 #include "usp10_internal.h"
36
37 WINE_DEFAULT_DEBUG_CHANNEL(uniscribe);
38
39 static void debug_output_string(LPCWSTR str, int cChar, lexical_function f)
40 {
41 int i;
42 if (TRACE_ON(uniscribe))
43 {
44 for (i = 0; i < cChar; i++)
45 {
46 switch (f(str[i]))
47 {
48 case lex_Consonant: TRACE("C"); break;
49 case lex_Ra: TRACE("Ra"); break;
50 case lex_Vowel: TRACE("V"); break;
51 case lex_Nukta: TRACE("N"); break;
52 case lex_Halant: TRACE("H"); break;
53 case lex_ZWNJ: TRACE("Zwnj"); break;
54 case lex_ZWJ: TRACE("Zwj"); break;
55 case lex_Matra_post: TRACE("Mp");break;
56 case lex_Matra_above: TRACE("Ma");break;
57 case lex_Matra_below: TRACE("Mb");break;
58 case lex_Matra_pre: TRACE("Mm");break;
59 case lex_Modifier: TRACE("Sm"); break;
60 case lex_Vedic: TRACE("Vd"); break;
61 case lex_Anudatta: TRACE("A"); break;
62 case lex_Composed_Vowel: TRACE("t"); break;
63 default:
64 TRACE("X"); break;
65 }
66 }
67 TRACE("\n");
68 }
69 }
70
71 static inline BOOL is_matra( int type )
72 {
73 return (type == lex_Matra_above || type == lex_Matra_below ||
74 type == lex_Matra_pre || type == lex_Matra_post ||
75 type == lex_Composed_Vowel);
76 }
77
78 static inline BOOL is_joiner( int type )
79 {
80 return (type == lex_ZWJ || type == lex_ZWNJ);
81 }
82
83 static INT consonant_header(LPCWSTR input, INT cChar, INT start, INT next,
84 lexical_function lex)
85 {
86 if (!is_consonant( lex(input[next]) )) return -1;
87 next++;
88 if ((next < cChar) && lex(input[next]) == lex_Nukta)
89 next++;
90 if ((next < cChar) && lex(input[next])==lex_Halant)
91 {
92 next++;
93 if((next < cChar) && is_joiner( lex(input[next]) ))
94 next++;
95 if ((next < cChar) && is_consonant( lex(input[next]) ))
96 return next;
97 }
98 else if ((next < cChar) && is_joiner( lex(input[next]) ) && lex(input[next+1])==lex_Halant)
99 {
100 next+=2;
101 if ((next < cChar) && is_consonant( lex(input[next]) ))
102 return next;
103 }
104 return -1;
105 }
106
107 static INT parse_consonant_syllable(LPCWSTR input, INT cChar, INT start,
108 INT *main, INT next, lexical_function lex)
109 {
110 int check;
111 int headers = 0;
112 do
113 {
114 check = consonant_header(input,cChar,start,next,lex);
115 if (check != -1)
116 {
117 next = check;
118 headers++;
119 }
120 } while (check != -1);
121 if (headers || is_consonant( lex(input[next]) ))
122 {
123 *main = next;
124 next++;
125 }
126 else
127 return -1;
128 if ((next < cChar) && lex(input[next]) == lex_Nukta)
129 next++;
130 if ((next < cChar) && lex(input[next]) == lex_Anudatta)
131 next++;
132
133 if ((next < cChar) && lex(input[next]) == lex_Halant)
134 {
135 next++;
136 if((next < cChar) && is_joiner( lex(input[next]) ))
137 next++;
138 }
139 else if (next < cChar)
140 {
141 while((next < cChar) && is_matra( lex(input[next]) ))
142 next++;
143 if ((next < cChar) && lex(input[next]) == lex_Nukta)
144 next++;
145 if ((next < cChar) && lex(input[next]) == lex_Halant)
146 next++;
147 }
148 if ((next < cChar) && lex(input[next]) == lex_Modifier)
149 next++;
150 if ((next < cChar) && lex(input[next]) == lex_Vedic)
151 next++;
152 return next;
153 }
154
155 static INT parse_vowel_syllable(LPCWSTR input, INT cChar, INT start,
156 INT next, lexical_function lex)
157 {
158 if ((next < cChar) && lex(input[next]) == lex_Nukta)
159 next++;
160 if ((next < cChar) && is_joiner( lex(input[next]) ) && lex(input[next+1])==lex_Halant && is_consonant( lex(input[next+2]) ))
161 next+=3;
162 else if ((next < cChar) && lex(input[next])==lex_Halant && is_consonant( lex(input[next+1]) ))
163 next+=2;
164 else if ((next < cChar) && lex(input[next])==lex_ZWJ && is_consonant( lex(input[next+1]) ))
165 next+=2;
166
167 if ((next < cChar) && is_matra( lex(input[next]) ))
168 {
169 while((next < cChar) && is_matra( lex(input[next]) ))
170 next++;
171 if ((next < cChar) && lex(input[next]) == lex_Nukta)
172 next++;
173 if ((next < cChar) && lex(input[next]) == lex_Halant)
174 next++;
175 }
176
177 if ((next < cChar) && lex(input[next]) == lex_Modifier)
178 next++;
179 if ((next < cChar) && lex(input[next]) == lex_Vedic)
180 next++;
181 return next;
182 }
183
184 static INT Indic_process_next_syllable( LPCWSTR input, INT cChar, INT start, INT* main, INT next, lexical_function lex )
185 {
186 if (lex(input[next])==lex_Vowel)
187 {
188 *main = next;
189 return parse_vowel_syllable(input, cChar, start, next+1, lex);
190 }
191 else if ((cChar > next+3) && lex(input[next]) == lex_Ra && lex(input[next+1]) == lex_Halant && lex(input[next+2]) == lex_Vowel)
192 {
193 *main = next+2;
194 return parse_vowel_syllable(input, cChar, start, next+3, lex);
195 }
196
197 else if (start == next && lex(input[next])==lex_NBSP)
198 {
199 *main = next;
200 return parse_vowel_syllable(input, cChar, start, next+1, lex);
201 }
202 else if (start == next && (cChar > next+3) && lex(input[next]) == lex_Ra && lex(input[next+1]) == lex_Halant && lex(input[next+2]) == lex_NBSP)
203 {
204 *main = next+2;
205 return parse_vowel_syllable(input, cChar, start, next+3, lex);
206 }
207
208 return parse_consonant_syllable(input, cChar, start, main, next, lex);
209 }
210
211 static BOOL Consonant_is_post_base_form(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache* psc, LPCWSTR pwChar, IndicSyllable *s, lexical_function lexical, BOOL modern)
212 {
213 if (is_consonant(lexical(pwChar[s->base])) && s->base > s->start && lexical(pwChar[s->base-1]) == lex_Halant)
214 {
215 if (modern)
216 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->base-1], 1, 2, "pstf") > 0);
217 else
218 {
219 WCHAR cc[2];
220 cc[0] = pwChar[s->base];
221 cc[1] = pwChar[s->base-1];
222 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, cc, 1, 2, "pstf") > 0);
223 }
224 }
225 return FALSE;
226 }
227
228 static BOOL Consonant_is_below_base_form(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache* psc, LPCWSTR pwChar, IndicSyllable *s, lexical_function lexical, BOOL modern)
229 {
230 if (is_consonant(lexical(pwChar[s->base])) && s->base > s->start && lexical(pwChar[s->base-1]) == lex_Halant)
231 {
232 if (modern)
233 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->base-1], 1, 2, "blwf") > 0);
234 else
235 {
236 WCHAR cc[2];
237 cc[0] = pwChar[s->base];
238 cc[1] = pwChar[s->base-1];
239 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, cc, 1, 2, "blwf") > 0);
240 }
241 }
242 return FALSE;
243 }
244
245 static BOOL Consonant_is_pre_base_form(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache* psc, LPCWSTR pwChar, IndicSyllable *s, lexical_function lexical, BOOL modern)
246 {
247 if (is_consonant(lexical(pwChar[s->base])) && s->base > s->start && lexical(pwChar[s->base-1]) == lex_Halant)
248 {
249 if (modern)
250 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->base-1], 1, 2, "pref") > 0);
251 else
252 {
253 WCHAR cc[2];
254 cc[0] = pwChar[s->base];
255 cc[1] = pwChar[s->base-1];
256 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, cc, 1, 2, "pref") > 0);
257 }
258 }
259 return FALSE;
260 }
261
262 static BOOL Consonant_is_ralf(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache* psc, LPCWSTR pwChar, IndicSyllable *s, lexical_function lexical)
263 {
264 if ((lexical(pwChar[s->start])==lex_Ra) && s->end > s->start && lexical(pwChar[s->start+1]) == lex_Halant)
265 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->start], 1, 2, "rphf") > 0);
266 return FALSE;
267 }
268
269 static int FindBaseConsonant(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache* psc, LPCWSTR input, IndicSyllable *s, lexical_function lex, BOOL modern)
270 {
271 int i;
272 BOOL blwf = FALSE;
273 BOOL pref = FALSE;
274
275 /* remove ralf from consideration */
276 if (Consonant_is_ralf(hdc, psa, psc, input, s, lex))
277 {
278 s->ralf = s->start;
279 s->start+=2;
280 }
281
282 /* try to find a base consonant */
283 if (!is_consonant( lex(input[s->base]) ))
284 {
285 for (i = s->end; i >= s->start; i--)
286 if (is_consonant( lex(input[i]) ))
287 {
288 s->base = i;
289 break;
290 }
291 }
292
293 while ((blwf = Consonant_is_below_base_form(hdc, psa, psc, input, s, lex, modern)) || Consonant_is_post_base_form(hdc, psa, psc, input, s, lex, modern) || (pref = Consonant_is_pre_base_form(hdc, psa, psc, input, s, lex, modern)))
294 {
295 if (blwf && s->blwf == -1)
296 s->blwf = s->base - 1;
297 if (pref && s->pref == -1)
298 s->pref = s->base - 1;
299
300 for (i = s->base-1; i >= s->start; i--)
301 if (is_consonant( lex(input[i]) ))
302 {
303 s->base = i;
304 break;
305 }
306 }
307
308 if (s->ralf >= 0)
309 s->start = s->ralf;
310
311 if (s->ralf == s->base)
312 s->ralf = -1;
313
314 return s->base;
315 }
316
317 void Indic_ParseSyllables( HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache* psc, LPCWSTR input, const int cChar, IndicSyllable **syllables, int *syllable_count, lexical_function lex, BOOL modern)
318 {
319 int index = 0;
320 int next = 0;
321 int center = 0;
322
323 *syllable_count = 0;
324
325 if (!lex)
326 {
327 ERR("Failure to have required functions\n");
328 return;
329 }
330
331 debug_output_string(input, cChar, lex);
332 while (next != -1)
333 {
334 while((next < cChar) && lex(input[next]) == lex_Generic)
335 next++;
336 index = next;
337 if (next >= cChar)
338 break;
339 next = Indic_process_next_syllable(input, cChar, 0, &center, index, lex);
340 if (next != -1)
341 {
342 if (*syllable_count)
343 *syllables = HeapReAlloc(GetProcessHeap(),0,*syllables, sizeof(IndicSyllable)*(*syllable_count+1));
344 else
345 *syllables = HeapAlloc(GetProcessHeap(),0,sizeof(IndicSyllable));
346 (*syllables)[*syllable_count].start = index;
347 (*syllables)[*syllable_count].base = center;
348 (*syllables)[*syllable_count].ralf = -1;
349 (*syllables)[*syllable_count].blwf = -1;
350 (*syllables)[*syllable_count].pref = -1;
351 (*syllables)[*syllable_count].end = next-1;
352 FindBaseConsonant(hdc, psa, psc, input, &(*syllables)[*syllable_count], lex, modern);
353 index = next;
354 *syllable_count = (*syllable_count)+1;
355 }
356 else if (index < cChar)
357 {
358 TRACE("Processing failed at %i\n",index);
359 next = ++index;
360 }
361 }
362 TRACE("Processed %i of %i characters into %i syllables\n",index,cChar,*syllable_count);
363 }
364
365 void Indic_ReorderCharacters( HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache* psc, LPWSTR input, int cChar, IndicSyllable **syllables, int *syllable_count, lexical_function lex, reorder_function reorder_f, BOOL modern)
366 {
367 int i;
368
369 if (!reorder_f)
370 {
371 ERR("Failure to have required functions\n");
372 return;
373 }
374
375 Indic_ParseSyllables(hdc, psa, psc, input, cChar, syllables, syllable_count, lex, modern);
376 for (i = 0; i < *syllable_count; i++)
377 reorder_f(input, &(*syllables)[i], lex);
378 }