* Sync up to trunk head (r60691).
[reactos.git] / dll / win32 / usp10 / breaking.c
1 /*
2 * Implementation of line breaking algorithm for the Uniscribe Script Processor
3 *
4 * Copyright 2011 CodeWeavers, Aric Stewart
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 *
20 */
21 #include <config.h>
22 #include <stdarg.h>
23 //#include <stdio.h>
24 //#include <stdlib.h>
25
26 #include <windef.h>
27 #include <winbase.h>
28 //#include "winuser.h"
29 #include <wingdi.h>
30 //#include "winnls.h"
31 #include <usp10.h>
32 //#include "winternl.h"
33
34 #include <wine/debug.h>
35 #include "usp10_internal.h"
36
37 WINE_DEFAULT_DEBUG_CHANNEL(uniscribe);
38
39 extern const unsigned short wine_linebreak_table[];
40
41 enum breaking_types { b_BK=1, b_CR, b_LF, b_CM, b_SG, b_GL, b_CB, b_SP, b_ZW, b_NL, b_WJ, b_JL, b_JV, b_JT, b_H2, b_H3, b_XX, b_OP, b_CL, b_CP, b_QU, b_NS, b_EX, b_SY, b_IS, b_PR, b_PO, b_NU, b_AL, b_ID, b_IN, b_HY, b_BB, b_BA, b_SA, b_AI, b_B2, b_HL, b_CJ, b_RI};
42
43 enum breaking_class {b_r=1, b_s, b_x};
44
45 static void debug_output_breaks(const short* breaks, int count)
46 {
47 if (TRACE_ON(uniscribe))
48 {
49 int i;
50 TRACE("[");
51 for (i = 0; i < count && i < 200; i++)
52 {
53 switch (breaks[i])
54 {
55 case b_x: TRACE("x"); break;
56 case b_r: TRACE("!"); break;
57 case b_s: TRACE("+"); break;
58 default: TRACE("*");
59 }
60 }
61 if (i == 200)
62 TRACE("...");
63 TRACE("]\n");
64 }
65 }
66
67 static inline void else_break(short* before, short class)
68 {
69 if (*before == 0) *before = class;
70 }
71
72 void BREAK_line(const WCHAR *chars, int count, const SCRIPT_ANALYSIS *sa, SCRIPT_LOGATTR *la)
73 {
74 int i,j;
75 short *break_class;
76 short *break_before;
77
78 TRACE("In %s\n",debugstr_wn(chars,count));
79
80 break_class = HeapAlloc(GetProcessHeap(),0, count * sizeof(short));
81 break_before = HeapAlloc(GetProcessHeap(),0, count * sizeof(short));
82
83 for (i = 0; i < count; i++)
84 {
85 break_class[i] = get_table_entry( wine_linebreak_table, chars[i] );
86 break_before[i] = 0;
87
88 memset(&la[i],0,sizeof(SCRIPT_LOGATTR));
89
90 la[i].fCharStop = TRUE;
91 switch (break_class[i])
92 {
93 case b_BK:
94 case b_ZW:
95 case b_SP:
96 la[i].fWhiteSpace = TRUE;
97 break;
98 case b_CM:
99 la[i].fCharStop = FALSE;
100 break;
101 }
102 }
103
104 /* LB1 */
105 /* TODO: Have outside algorithms for these scripts */
106 for (i = 0; i < count; i++)
107 {
108 switch(break_class[i])
109 {
110 case b_AI:
111 case b_SA:
112 case b_SG:
113 case b_XX:
114 break_class[i] = b_AL;
115 break;
116 case b_CJ:
117 break_class[i] = b_NS;
118 break;
119 }
120 }
121
122 /* LB2 - LB3 */
123 break_before[0] = b_x;
124 for (i = 0; i < count; i++)
125 {
126 switch(break_class[i])
127 {
128 /* LB4 - LB6 */
129 case b_CR:
130 if (i < count-1 && break_class[i+1] == b_LF)
131 {
132 else_break(&break_before[i],b_x);
133 else_break(&break_before[i+1],b_x);
134 break;
135 }
136 case b_LF:
137 case b_NL:
138 case b_BK:
139 if (i < count-1) else_break(&break_before[i+1],b_r);
140 else_break(&break_before[i],b_x);
141 break;
142 /* LB7 */
143 case b_SP:
144 else_break(&break_before[i],b_x);
145 break;
146 case b_ZW:
147 else_break(&break_before[i],b_x);
148 /* LB8 */
149 while (i < count-1 && break_class[i+1] == b_SP)
150 i++;
151 else_break(&break_before[i],b_s);
152 break;
153 }
154 }
155
156 debug_output_breaks(break_before,count);
157
158 /* LB9 - LB10 */
159 for (i = 0; i < count; i++)
160 {
161 if (break_class[i] == b_CM)
162 {
163 if (i > 0)
164 {
165 switch (break_class[i-1])
166 {
167 case b_SP:
168 case b_BK:
169 case b_CR:
170 case b_LF:
171 case b_NL:
172 case b_ZW:
173 break_class[i] = b_AL;
174 break;
175 default:
176 break_class[i] = break_class[i-1];
177 }
178 }
179 else break_class[i] = b_AL;
180 }
181 }
182
183 for (i = 0; i < count; i++)
184 {
185 switch(break_class[i])
186 {
187 /* LB11 */
188 case b_WJ:
189 else_break(&break_before[i],b_x);
190 if (i < count-1)
191 else_break(&break_before[i+1],b_x);
192 break;
193 /* LB12 */
194 case b_GL:
195 if (i < count-1)
196 else_break(&break_before[i+1],b_x);
197 /* LB12a */
198 if (i > 0)
199 {
200 if (break_class[i-1] != b_SP &&
201 break_class[i-1] != b_BA &&
202 break_class[i-1] != b_HY)
203 else_break(&break_before[i],b_x);
204 }
205 break;
206 /* LB13 */
207 case b_CL:
208 case b_CP:
209 case b_EX:
210 case b_IS:
211 case b_SY:
212 else_break(&break_before[i],b_x);
213 break;
214 /* LB14 */
215 case b_OP:
216 while (i < count-1 && break_class[i+1] == b_SP)
217 {
218 else_break(&break_before[i+1],b_x);
219 i++;
220 }
221 else_break(&break_before[i+1],b_x);
222 break;
223 /* LB15 */
224 case b_QU:
225 j = i+1;
226 while (j < count-1 && break_class[j] == b_SP)
227 j++;
228 if (break_class[j] == b_OP)
229 {
230 for (; j > i; j--)
231 else_break(&break_before[j],b_x);
232 }
233 break;
234 /* LB16 */
235 case b_NS:
236 j = i-1;
237 while(j > 0 && break_class[j] == b_SP)
238 j--;
239 if (break_class[j] == b_CL || break_class[j] == b_CP)
240 {
241 for (j++; j <= i; j++)
242 else_break(&break_before[j],b_x);
243 }
244 break;
245 /* LB17 */
246 case b_B2:
247 j = i+1;
248 while (j < count && break_class[j] == b_SP)
249 j++;
250 if (break_class[j] == b_B2)
251 {
252 for (; j > i; j--)
253 else_break(&break_before[j],b_x);
254 }
255 break;
256 }
257 }
258
259 debug_output_breaks(break_before,count);
260
261 for (i = 0; i < count; i++)
262 {
263 switch(break_class[i])
264 {
265 /* LB18 */
266 case b_SP:
267 if (i < count-1)
268 else_break(&break_before[i+1],b_s);
269 break;
270 /* LB19 */
271 case b_QU:
272 else_break(&break_before[i],b_x);
273 if (i < count-1)
274 else_break(&break_before[i+1],b_x);
275 break;
276 /* LB20 */
277 case b_CB:
278 else_break(&break_before[i],b_s);
279 if (i < count-1)
280 else_break(&break_before[i+1],b_s);
281 /* LB21 */
282 case b_BA:
283 case b_HY:
284 case b_NS:
285 else_break(&break_before[i],b_x);
286 break;
287 case b_BB:
288 if (i < count-1)
289 else_break(&break_before[i+1],b_x);
290 break;
291 /* LB21a */
292 case b_HL:
293 if (i < count-2)
294 switch (break_class[i+1])
295 {
296 case b_HY:
297 case b_BA:
298 else_break(&break_before[i+2], b_x);
299 }
300 break;
301 /* LB22 */
302 case b_IN:
303 if (i > 0)
304 {
305 switch (break_class[i-1])
306 {
307 case b_AL:
308 case b_HL:
309 case b_ID:
310 case b_IN:
311 case b_NU:
312 else_break(&break_before[i], b_x);
313 }
314 }
315 break;
316 }
317
318 if (i < count-1)
319 {
320 /* LB23 */
321 if ((break_class[i] == b_ID && break_class[i+1] == b_PO) ||
322 (break_class[i] == b_AL && break_class[i+1] == b_NU) ||
323 (break_class[i] == b_HL && break_class[i+1] == b_NU) ||
324 (break_class[i] == b_NU && break_class[i+1] == b_AL) ||
325 (break_class[i] == b_NU && break_class[i+1] == b_HL))
326 else_break(&break_before[i+1],b_x);
327 /* LB24 */
328 if ((break_class[i] == b_PR && break_class[i+1] == b_ID) ||
329 (break_class[i] == b_PR && break_class[i+1] == b_AL) ||
330 (break_class[i] == b_PR && break_class[i+1] == b_HL) ||
331 (break_class[i] == b_PO && break_class[i+1] == b_AL) ||
332 (break_class[i] == b_PO && break_class[i+1] == b_HL))
333 else_break(&break_before[i+1],b_x);
334
335 /* LB25 */
336 if ((break_class[i] == b_CL && break_class[i+1] == b_PO) ||
337 (break_class[i] == b_CP && break_class[i+1] == b_PO) ||
338 (break_class[i] == b_CL && break_class[i+1] == b_PR) ||
339 (break_class[i] == b_CP && break_class[i+1] == b_PR) ||
340 (break_class[i] == b_NU && break_class[i+1] == b_PO) ||
341 (break_class[i] == b_NU && break_class[i+1] == b_PR) ||
342 (break_class[i] == b_PO && break_class[i+1] == b_OP) ||
343 (break_class[i] == b_PO && break_class[i+1] == b_NU) ||
344 (break_class[i] == b_PR && break_class[i+1] == b_OP) ||
345 (break_class[i] == b_PR && break_class[i+1] == b_NU) ||
346 (break_class[i] == b_HY && break_class[i+1] == b_NU) ||
347 (break_class[i] == b_IS && break_class[i+1] == b_NU) ||
348 (break_class[i] == b_NU && break_class[i+1] == b_NU) ||
349 (break_class[i] == b_SY && break_class[i+1] == b_NU))
350 else_break(&break_before[i+1],b_x);
351
352 /* LB26 */
353 if (break_class[i] == b_JL)
354 {
355 switch (break_class[i+1])
356 {
357 case b_JL:
358 case b_JV:
359 case b_H2:
360 case b_H3:
361 else_break(&break_before[i+1],b_x);
362 }
363 }
364 if ((break_class[i] == b_JV || break_class[i] == b_H2) &&
365 (break_class[i+1] == b_JV || break_class[i+1] == b_JT))
366 else_break(&break_before[i+1],b_x);
367 if ((break_class[i] == b_JT || break_class[i] == b_H3) &&
368 break_class[i+1] == b_JT)
369 else_break(&break_before[i+1],b_x);
370
371 /* LB27 */
372 switch (break_class[i])
373 {
374 case b_JL:
375 case b_JV:
376 case b_JT:
377 case b_H2:
378 case b_H3:
379 if (break_class[i+1] == b_IN || break_class[i+1] == b_PO)
380 else_break(&break_before[i+1],b_x);
381 }
382 if (break_class[i] == b_PO)
383 {
384 switch (break_class[i+1])
385 {
386 case b_JL:
387 case b_JV:
388 case b_JT:
389 case b_H2:
390 case b_H3:
391 else_break(&break_before[i+1],b_x);
392 }
393 }
394
395 /* LB28 */
396 if ((break_class[i] == b_AL && break_class[i+1] == b_AL) ||
397 (break_class[i] == b_AL && break_class[i+1] == b_HL) ||
398 (break_class[i] == b_HL && break_class[i+1] == b_AL) ||
399 (break_class[i] == b_HL && break_class[i+1] == b_HL))
400 else_break(&break_before[i+1],b_x);
401
402 /* LB29 */
403 if ((break_class[i] == b_IS && break_class[i+1] == b_AL) ||
404 (break_class[i] == b_IS && break_class[i+1] == b_HL))
405 else_break(&break_before[i+1],b_x);
406
407 /* LB30 */
408 if ((break_class[i] == b_AL || break_class[i] == b_HL || break_class[i] == b_NU) &&
409 break_class[i+1] == b_OP)
410 else_break(&break_before[i+1],b_x);
411 if (break_class[i] == b_CP &&
412 (break_class[i+1] == b_AL || break_class[i] == b_HL || break_class[i] == b_NU))
413 else_break(&break_before[i+1],b_x);
414
415 /* LB30a */
416 if (break_class[i] == b_RI && break_class[i+1] == b_RI)
417 else_break(&break_before[i+1],b_x);
418 }
419 }
420 debug_output_breaks(break_before,count);
421
422 /* LB31 */
423 for (i = 0; i < count-1; i++)
424 else_break(&break_before[i+1],b_s);
425
426 debug_output_breaks(break_before,count);
427 for (i = 0; i < count; i++)
428 {
429 if (break_before[i] != b_x)
430 {
431 la[i].fSoftBreak = TRUE;
432 la[i].fWordStop = TRUE;
433 }
434 }
435
436 HeapFree(GetProcessHeap(), 0, break_before);
437 HeapFree(GetProcessHeap(), 0, break_class);
438 }