[USP10]
[reactos.git] / reactos / dll / win32 / usp10 / indic.c
1 /*
2 * Implementation of Indic Syllables for the Uniscribe Script Processor
3 *
4 * Copyright 2011 CodeWeavers, Aric Stewart
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 *
20 */
21
22 #include "usp10_internal.h"
23
24 WINE_DEFAULT_DEBUG_CHANNEL(uniscribe);
25
26 static void debug_output_string(LPCWSTR str, int cChar, lexical_function f)
27 {
28 int i;
29 if (TRACE_ON(uniscribe))
30 {
31 for (i = 0; i < cChar; i++)
32 {
33 switch (f(str[i]))
34 {
35 case lex_Consonant: TRACE("C"); break;
36 case lex_Ra: TRACE("Ra"); break;
37 case lex_Vowel: TRACE("V"); break;
38 case lex_Nukta: TRACE("N"); break;
39 case lex_Halant: TRACE("H"); break;
40 case lex_ZWNJ: TRACE("Zwnj"); break;
41 case lex_ZWJ: TRACE("Zwj"); break;
42 case lex_Matra_post: TRACE("Mp");break;
43 case lex_Matra_above: TRACE("Ma");break;
44 case lex_Matra_below: TRACE("Mb");break;
45 case lex_Matra_pre: TRACE("Mm");break;
46 case lex_Modifier: TRACE("Sm"); break;
47 case lex_Vedic: TRACE("Vd"); break;
48 case lex_Anudatta: TRACE("A"); break;
49 case lex_Composed_Vowel: TRACE("t"); break;
50 default:
51 TRACE("X"); break;
52 }
53 }
54 TRACE("\n");
55 }
56 }
57
58 static inline BOOL is_matra( int type )
59 {
60 return (type == lex_Matra_above || type == lex_Matra_below ||
61 type == lex_Matra_pre || type == lex_Matra_post ||
62 type == lex_Composed_Vowel);
63 }
64
65 static inline BOOL is_joiner( int type )
66 {
67 return (type == lex_ZWJ || type == lex_ZWNJ);
68 }
69
70 static INT consonant_header(LPCWSTR input, INT cChar, INT start, INT next,
71 lexical_function lex)
72 {
73 if (!is_consonant( lex(input[next]) )) return -1;
74 next++;
75 if ((next < cChar) && lex(input[next]) == lex_Nukta)
76 next++;
77 if ((next < cChar) && lex(input[next])==lex_Halant)
78 {
79 next++;
80 if((next < cChar) && is_joiner( lex(input[next]) ))
81 next++;
82 if ((next < cChar) && is_consonant( lex(input[next]) ))
83 return next;
84 }
85 else if ((next < cChar) && is_joiner( lex(input[next]) ) && lex(input[next+1])==lex_Halant)
86 {
87 next+=2;
88 if ((next < cChar) && is_consonant( lex(input[next]) ))
89 return next;
90 }
91 return -1;
92 }
93
94 static INT parse_consonant_syllable(LPCWSTR input, INT cChar, INT start,
95 INT *main, INT next, lexical_function lex)
96 {
97 int check;
98 int headers = 0;
99 do
100 {
101 check = consonant_header(input,cChar,start,next,lex);
102 if (check != -1)
103 {
104 next = check;
105 headers++;
106 }
107 } while (check != -1);
108 if (headers || is_consonant( lex(input[next]) ))
109 {
110 *main = next;
111 next++;
112 }
113 else
114 return -1;
115 if ((next < cChar) && lex(input[next]) == lex_Nukta)
116 next++;
117 if ((next < cChar) && lex(input[next]) == lex_Anudatta)
118 next++;
119
120 if ((next < cChar) && lex(input[next]) == lex_Halant)
121 {
122 next++;
123 if((next < cChar) && is_joiner( lex(input[next]) ))
124 next++;
125 }
126 else if (next < cChar)
127 {
128 while((next < cChar) && is_matra( lex(input[next]) ))
129 next++;
130 if ((next < cChar) && lex(input[next]) == lex_Nukta)
131 next++;
132 if ((next < cChar) && lex(input[next]) == lex_Halant)
133 next++;
134 }
135 if ((next < cChar) && lex(input[next]) == lex_Modifier)
136 next++;
137 if ((next < cChar) && lex(input[next]) == lex_Vedic)
138 next++;
139 return next;
140 }
141
142 static INT parse_vowel_syllable(LPCWSTR input, INT cChar, INT start,
143 INT next, lexical_function lex)
144 {
145 if ((next < cChar) && lex(input[next]) == lex_Nukta)
146 next++;
147 if ((next < cChar) && is_joiner( lex(input[next]) ) && lex(input[next+1])==lex_Halant && is_consonant( lex(input[next+2]) ))
148 next+=3;
149 else if ((next < cChar) && lex(input[next])==lex_Halant && is_consonant( lex(input[next+1]) ))
150 next+=2;
151 else if ((next < cChar) && lex(input[next])==lex_ZWJ && is_consonant( lex(input[next+1]) ))
152 next+=2;
153
154 if ((next < cChar) && is_matra( lex(input[next]) ))
155 {
156 while((next < cChar) && is_matra( lex(input[next]) ))
157 next++;
158 if ((next < cChar) && lex(input[next]) == lex_Nukta)
159 next++;
160 if ((next < cChar) && lex(input[next]) == lex_Halant)
161 next++;
162 }
163
164 if ((next < cChar) && lex(input[next]) == lex_Modifier)
165 next++;
166 if ((next < cChar) && lex(input[next]) == lex_Vedic)
167 next++;
168 return next;
169 }
170
171 static INT Indic_process_next_syllable( LPCWSTR input, INT cChar, INT start, INT* main, INT next, lexical_function lex )
172 {
173 if (lex(input[next])==lex_Vowel)
174 {
175 *main = next;
176 return parse_vowel_syllable(input, cChar, start, next+1, lex);
177 }
178 else if ((cChar > next+3) && lex(input[next]) == lex_Ra && lex(input[next+1]) == lex_Halant && lex(input[next+2]) == lex_Vowel)
179 {
180 *main = next+2;
181 return parse_vowel_syllable(input, cChar, start, next+3, lex);
182 }
183
184 else if (start == next && lex(input[next])==lex_NBSP)
185 {
186 *main = next;
187 return parse_vowel_syllable(input, cChar, start, next+1, lex);
188 }
189 else if (start == next && (cChar > next+3) && lex(input[next]) == lex_Ra && lex(input[next+1]) == lex_Halant && lex(input[next+2]) == lex_NBSP)
190 {
191 *main = next+2;
192 return parse_vowel_syllable(input, cChar, start, next+3, lex);
193 }
194
195 return parse_consonant_syllable(input, cChar, start, main, next, lex);
196 }
197
198 static BOOL Consonant_is_post_base_form(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache* psc, LPCWSTR pwChar, IndicSyllable *s, lexical_function lexical, BOOL modern)
199 {
200 if (is_consonant(lexical(pwChar[s->base])) && s->base > s->start && lexical(pwChar[s->base-1]) == lex_Halant)
201 {
202 if (modern)
203 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->base-1], 1, 2, "pstf") > 0);
204 else
205 {
206 WCHAR cc[2];
207 cc[0] = pwChar[s->base];
208 cc[1] = pwChar[s->base-1];
209 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, cc, 1, 2, "pstf") > 0);
210 }
211 }
212 return FALSE;
213 }
214
215 static BOOL Consonant_is_below_base_form(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache* psc, LPCWSTR pwChar, IndicSyllable *s, lexical_function lexical, BOOL modern)
216 {
217 if (is_consonant(lexical(pwChar[s->base])) && s->base > s->start && lexical(pwChar[s->base-1]) == lex_Halant)
218 {
219 if (modern)
220 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->base-1], 1, 2, "blwf") > 0);
221 else
222 {
223 WCHAR cc[2];
224 cc[0] = pwChar[s->base];
225 cc[1] = pwChar[s->base-1];
226 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, cc, 1, 2, "blwf") > 0);
227 }
228 }
229 return FALSE;
230 }
231
232 static BOOL Consonant_is_pre_base_form(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache* psc, LPCWSTR pwChar, IndicSyllable *s, lexical_function lexical, BOOL modern)
233 {
234 if (is_consonant(lexical(pwChar[s->base])) && s->base > s->start && lexical(pwChar[s->base-1]) == lex_Halant)
235 {
236 if (modern)
237 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->base-1], 1, 2, "pref") > 0);
238 else
239 {
240 WCHAR cc[2];
241 cc[0] = pwChar[s->base];
242 cc[1] = pwChar[s->base-1];
243 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, cc, 1, 2, "pref") > 0);
244 }
245 }
246 return FALSE;
247 }
248
249 static BOOL Consonant_is_ralf(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache* psc, LPCWSTR pwChar, IndicSyllable *s, lexical_function lexical)
250 {
251 if ((lexical(pwChar[s->start])==lex_Ra) && s->end > s->start && lexical(pwChar[s->start+1]) == lex_Halant)
252 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->start], 1, 2, "rphf") > 0);
253 return FALSE;
254 }
255
256 static int FindBaseConsonant(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache* psc, LPCWSTR input, IndicSyllable *s, lexical_function lex, BOOL modern)
257 {
258 int i;
259 BOOL blwf = FALSE;
260 BOOL pref = FALSE;
261
262 /* remove ralf from consideration */
263 if (Consonant_is_ralf(hdc, psa, psc, input, s, lex))
264 {
265 s->ralf = s->start;
266 s->start+=2;
267 }
268
269 /* try to find a base consonant */
270 if (!is_consonant( lex(input[s->base]) ))
271 {
272 for (i = s->end; i >= s->start; i--)
273 if (is_consonant( lex(input[i]) ))
274 {
275 s->base = i;
276 break;
277 }
278 }
279
280 while ((blwf = Consonant_is_below_base_form(hdc, psa, psc, input, s, lex, modern)) || Consonant_is_post_base_form(hdc, psa, psc, input, s, lex, modern) || (pref = Consonant_is_pre_base_form(hdc, psa, psc, input, s, lex, modern)))
281 {
282 if (blwf && s->blwf == -1)
283 s->blwf = s->base - 1;
284 if (pref && s->pref == -1)
285 s->pref = s->base - 1;
286
287 for (i = s->base-1; i >= s->start; i--)
288 if (is_consonant( lex(input[i]) ))
289 {
290 s->base = i;
291 break;
292 }
293 }
294
295 if (s->ralf >= 0)
296 s->start = s->ralf;
297
298 if (s->ralf == s->base)
299 s->ralf = -1;
300
301 return s->base;
302 }
303
304 void Indic_ParseSyllables( HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache* psc, LPCWSTR input, const int cChar, IndicSyllable **syllables, int *syllable_count, lexical_function lex, BOOL modern)
305 {
306 int index = 0;
307 int next = 0;
308 int center = 0;
309
310 *syllable_count = 0;
311
312 if (!lex)
313 {
314 ERR("Failure to have required functions\n");
315 return;
316 }
317
318 debug_output_string(input, cChar, lex);
319 while (next != -1)
320 {
321 while((next < cChar) && lex(input[next]) == lex_Generic)
322 next++;
323 index = next;
324 if (next >= cChar)
325 break;
326 next = Indic_process_next_syllable(input, cChar, 0, &center, index, lex);
327 if (next != -1)
328 {
329 if (*syllable_count)
330 *syllables = HeapReAlloc(GetProcessHeap(),0,*syllables, sizeof(IndicSyllable)*(*syllable_count+1));
331 else
332 *syllables = HeapAlloc(GetProcessHeap(),0,sizeof(IndicSyllable));
333 (*syllables)[*syllable_count].start = index;
334 (*syllables)[*syllable_count].base = center;
335 (*syllables)[*syllable_count].ralf = -1;
336 (*syllables)[*syllable_count].blwf = -1;
337 (*syllables)[*syllable_count].pref = -1;
338 (*syllables)[*syllable_count].end = next-1;
339 FindBaseConsonant(hdc, psa, psc, input, &(*syllables)[*syllable_count], lex, modern);
340 index = next;
341 *syllable_count = (*syllable_count)+1;
342 }
343 else if (index < cChar)
344 {
345 TRACE("Processing failed at %i\n",index);
346 next = ++index;
347 }
348 }
349 TRACE("Processed %i of %i characters into %i syllables\n",index,cChar,*syllable_count);
350 }
351
352 void Indic_ReorderCharacters( HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache* psc, LPWSTR input, int cChar, IndicSyllable **syllables, int *syllable_count, lexical_function lex, reorder_function reorder_f, BOOL modern)
353 {
354 int i;
355
356 if (!reorder_f)
357 {
358 ERR("Failure to have required functions\n");
359 return;
360 }
361
362 Indic_ParseSyllables(hdc, psa, psc, input, cChar, syllables, syllable_count, lex, modern);
363 for (i = 0; i < *syllable_count; i++)
364 reorder_f(input, &(*syllables)[i], lex);
365 }