* Sync up to trunk head (r64921).
[reactos.git] / dll / win32 / usp10 / breaking.c
1 /*
2 * Implementation of line breaking algorithm for the Uniscribe Script Processor
3 *
4 * Copyright 2011 CodeWeavers, Aric Stewart
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 *
20 */
21
22 #include "usp10_internal.h"
23
24 WINE_DEFAULT_DEBUG_CHANNEL(uniscribe);
25
26 extern const unsigned short wine_linebreak_table[];
27
28 enum breaking_types { b_BK=1, b_CR, b_LF, b_CM, b_SG, b_GL, b_CB, b_SP, b_ZW, b_NL, b_WJ, b_JL, b_JV, b_JT, b_H2, b_H3, b_XX, b_OP, b_CL, b_CP, b_QU, b_NS, b_EX, b_SY, b_IS, b_PR, b_PO, b_NU, b_AL, b_ID, b_IN, b_HY, b_BB, b_BA, b_SA, b_AI, b_B2, b_HL, b_CJ, b_RI};
29
30 enum breaking_class {b_r=1, b_s, b_x};
31
32 static void debug_output_breaks(const short* breaks, int count)
33 {
34 if (TRACE_ON(uniscribe))
35 {
36 int i;
37 TRACE("[");
38 for (i = 0; i < count && i < 200; i++)
39 {
40 switch (breaks[i])
41 {
42 case b_x: TRACE("x"); break;
43 case b_r: TRACE("!"); break;
44 case b_s: TRACE("+"); break;
45 default: TRACE("*");
46 }
47 }
48 if (i == 200)
49 TRACE("...");
50 TRACE("]\n");
51 }
52 }
53
54 static inline void else_break(short* before, short class)
55 {
56 if (*before == 0) *before = class;
57 }
58
59 void BREAK_line(const WCHAR *chars, int count, const SCRIPT_ANALYSIS *sa, SCRIPT_LOGATTR *la)
60 {
61 int i,j;
62 short *break_class;
63 short *break_before;
64
65 TRACE("In %s\n",debugstr_wn(chars,count));
66
67 break_class = HeapAlloc(GetProcessHeap(),0, count * sizeof(short));
68 break_before = HeapAlloc(GetProcessHeap(),0, count * sizeof(short));
69
70 for (i = 0; i < count; i++)
71 {
72 break_class[i] = get_table_entry( wine_linebreak_table, chars[i] );
73 break_before[i] = 0;
74
75 memset(&la[i],0,sizeof(SCRIPT_LOGATTR));
76
77 la[i].fCharStop = TRUE;
78 switch (break_class[i])
79 {
80 case b_BK:
81 case b_ZW:
82 case b_SP:
83 la[i].fWhiteSpace = TRUE;
84 break;
85 case b_CM:
86 la[i].fCharStop = FALSE;
87 break;
88 }
89 }
90
91 /* LB1 */
92 /* TODO: Have outside algorithms for these scripts */
93 for (i = 0; i < count; i++)
94 {
95 switch(break_class[i])
96 {
97 case b_AI:
98 case b_SA:
99 case b_SG:
100 case b_XX:
101 break_class[i] = b_AL;
102 break;
103 case b_CJ:
104 break_class[i] = b_NS;
105 break;
106 }
107 }
108
109 /* LB2 - LB3 */
110 break_before[0] = b_x;
111 for (i = 0; i < count; i++)
112 {
113 switch(break_class[i])
114 {
115 /* LB4 - LB6 */
116 case b_CR:
117 if (i < count-1 && break_class[i+1] == b_LF)
118 {
119 else_break(&break_before[i],b_x);
120 else_break(&break_before[i+1],b_x);
121 break;
122 }
123 case b_LF:
124 case b_NL:
125 case b_BK:
126 if (i < count-1) else_break(&break_before[i+1],b_r);
127 else_break(&break_before[i],b_x);
128 break;
129 /* LB7 */
130 case b_SP:
131 else_break(&break_before[i],b_x);
132 break;
133 case b_ZW:
134 else_break(&break_before[i],b_x);
135 /* LB8 */
136 while (i < count-1 && break_class[i+1] == b_SP)
137 i++;
138 else_break(&break_before[i],b_s);
139 break;
140 }
141 }
142
143 debug_output_breaks(break_before,count);
144
145 /* LB9 - LB10 */
146 for (i = 0; i < count; i++)
147 {
148 if (break_class[i] == b_CM)
149 {
150 if (i > 0)
151 {
152 switch (break_class[i-1])
153 {
154 case b_SP:
155 case b_BK:
156 case b_CR:
157 case b_LF:
158 case b_NL:
159 case b_ZW:
160 break_class[i] = b_AL;
161 break;
162 default:
163 break_class[i] = break_class[i-1];
164 }
165 }
166 else break_class[i] = b_AL;
167 }
168 }
169
170 for (i = 0; i < count; i++)
171 {
172 switch(break_class[i])
173 {
174 /* LB11 */
175 case b_WJ:
176 else_break(&break_before[i],b_x);
177 if (i < count-1)
178 else_break(&break_before[i+1],b_x);
179 break;
180 /* LB12 */
181 case b_GL:
182 if (i < count-1)
183 else_break(&break_before[i+1],b_x);
184 /* LB12a */
185 if (i > 0)
186 {
187 if (break_class[i-1] != b_SP &&
188 break_class[i-1] != b_BA &&
189 break_class[i-1] != b_HY)
190 else_break(&break_before[i],b_x);
191 }
192 break;
193 /* LB13 */
194 case b_CL:
195 case b_CP:
196 case b_EX:
197 case b_IS:
198 case b_SY:
199 else_break(&break_before[i],b_x);
200 break;
201 /* LB14 */
202 case b_OP:
203 while (i < count-1 && break_class[i+1] == b_SP)
204 {
205 else_break(&break_before[i+1],b_x);
206 i++;
207 }
208 else_break(&break_before[i+1],b_x);
209 break;
210 /* LB15 */
211 case b_QU:
212 j = i+1;
213 while (j < count-1 && break_class[j] == b_SP)
214 j++;
215 if (break_class[j] == b_OP)
216 {
217 for (; j > i; j--)
218 else_break(&break_before[j],b_x);
219 }
220 break;
221 /* LB16 */
222 case b_NS:
223 j = i-1;
224 while(j > 0 && break_class[j] == b_SP)
225 j--;
226 if (break_class[j] == b_CL || break_class[j] == b_CP)
227 {
228 for (j++; j <= i; j++)
229 else_break(&break_before[j],b_x);
230 }
231 break;
232 /* LB17 */
233 case b_B2:
234 j = i+1;
235 while (j < count && break_class[j] == b_SP)
236 j++;
237 if (break_class[j] == b_B2)
238 {
239 for (; j > i; j--)
240 else_break(&break_before[j],b_x);
241 }
242 break;
243 }
244 }
245
246 debug_output_breaks(break_before,count);
247
248 for (i = 0; i < count; i++)
249 {
250 switch(break_class[i])
251 {
252 /* LB18 */
253 case b_SP:
254 if (i < count-1)
255 else_break(&break_before[i+1],b_s);
256 break;
257 /* LB19 */
258 case b_QU:
259 else_break(&break_before[i],b_x);
260 if (i < count-1)
261 else_break(&break_before[i+1],b_x);
262 break;
263 /* LB20 */
264 case b_CB:
265 else_break(&break_before[i],b_s);
266 if (i < count-1)
267 else_break(&break_before[i+1],b_s);
268 break;
269 /* LB21 */
270 case b_BA:
271 case b_HY:
272 case b_NS:
273 else_break(&break_before[i],b_x);
274 break;
275 case b_BB:
276 if (i < count-1)
277 else_break(&break_before[i+1],b_x);
278 break;
279 /* LB21a */
280 case b_HL:
281 if (i < count-2)
282 switch (break_class[i+1])
283 {
284 case b_HY:
285 case b_BA:
286 else_break(&break_before[i+2], b_x);
287 }
288 break;
289 /* LB22 */
290 case b_IN:
291 if (i > 0)
292 {
293 switch (break_class[i-1])
294 {
295 case b_AL:
296 case b_HL:
297 case b_ID:
298 case b_IN:
299 case b_NU:
300 else_break(&break_before[i], b_x);
301 }
302 }
303 break;
304 }
305
306 if (i < count-1)
307 {
308 /* LB23 */
309 if ((break_class[i] == b_ID && break_class[i+1] == b_PO) ||
310 (break_class[i] == b_AL && break_class[i+1] == b_NU) ||
311 (break_class[i] == b_HL && break_class[i+1] == b_NU) ||
312 (break_class[i] == b_NU && break_class[i+1] == b_AL) ||
313 (break_class[i] == b_NU && break_class[i+1] == b_HL))
314 else_break(&break_before[i+1],b_x);
315 /* LB24 */
316 if ((break_class[i] == b_PR && break_class[i+1] == b_ID) ||
317 (break_class[i] == b_PR && break_class[i+1] == b_AL) ||
318 (break_class[i] == b_PR && break_class[i+1] == b_HL) ||
319 (break_class[i] == b_PO && break_class[i+1] == b_AL) ||
320 (break_class[i] == b_PO && break_class[i+1] == b_HL))
321 else_break(&break_before[i+1],b_x);
322
323 /* LB25 */
324 if ((break_class[i] == b_CL && break_class[i+1] == b_PO) ||
325 (break_class[i] == b_CP && break_class[i+1] == b_PO) ||
326 (break_class[i] == b_CL && break_class[i+1] == b_PR) ||
327 (break_class[i] == b_CP && break_class[i+1] == b_PR) ||
328 (break_class[i] == b_NU && break_class[i+1] == b_PO) ||
329 (break_class[i] == b_NU && break_class[i+1] == b_PR) ||
330 (break_class[i] == b_PO && break_class[i+1] == b_OP) ||
331 (break_class[i] == b_PO && break_class[i+1] == b_NU) ||
332 (break_class[i] == b_PR && break_class[i+1] == b_OP) ||
333 (break_class[i] == b_PR && break_class[i+1] == b_NU) ||
334 (break_class[i] == b_HY && break_class[i+1] == b_NU) ||
335 (break_class[i] == b_IS && break_class[i+1] == b_NU) ||
336 (break_class[i] == b_NU && break_class[i+1] == b_NU) ||
337 (break_class[i] == b_SY && break_class[i+1] == b_NU))
338 else_break(&break_before[i+1],b_x);
339
340 /* LB26 */
341 if (break_class[i] == b_JL)
342 {
343 switch (break_class[i+1])
344 {
345 case b_JL:
346 case b_JV:
347 case b_H2:
348 case b_H3:
349 else_break(&break_before[i+1],b_x);
350 }
351 }
352 if ((break_class[i] == b_JV || break_class[i] == b_H2) &&
353 (break_class[i+1] == b_JV || break_class[i+1] == b_JT))
354 else_break(&break_before[i+1],b_x);
355 if ((break_class[i] == b_JT || break_class[i] == b_H3) &&
356 break_class[i+1] == b_JT)
357 else_break(&break_before[i+1],b_x);
358
359 /* LB27 */
360 switch (break_class[i])
361 {
362 case b_JL:
363 case b_JV:
364 case b_JT:
365 case b_H2:
366 case b_H3:
367 if (break_class[i+1] == b_IN || break_class[i+1] == b_PO)
368 else_break(&break_before[i+1],b_x);
369 }
370 if (break_class[i] == b_PO)
371 {
372 switch (break_class[i+1])
373 {
374 case b_JL:
375 case b_JV:
376 case b_JT:
377 case b_H2:
378 case b_H3:
379 else_break(&break_before[i+1],b_x);
380 }
381 }
382
383 /* LB28 */
384 if ((break_class[i] == b_AL && break_class[i+1] == b_AL) ||
385 (break_class[i] == b_AL && break_class[i+1] == b_HL) ||
386 (break_class[i] == b_HL && break_class[i+1] == b_AL) ||
387 (break_class[i] == b_HL && break_class[i+1] == b_HL))
388 else_break(&break_before[i+1],b_x);
389
390 /* LB29 */
391 if ((break_class[i] == b_IS && break_class[i+1] == b_AL) ||
392 (break_class[i] == b_IS && break_class[i+1] == b_HL))
393 else_break(&break_before[i+1],b_x);
394
395 /* LB30 */
396 if ((break_class[i] == b_AL || break_class[i] == b_HL || break_class[i] == b_NU) &&
397 break_class[i+1] == b_OP)
398 else_break(&break_before[i+1],b_x);
399 if (break_class[i] == b_CP &&
400 (break_class[i+1] == b_AL || break_class[i] == b_HL || break_class[i] == b_NU))
401 else_break(&break_before[i+1],b_x);
402
403 /* LB30a */
404 if (break_class[i] == b_RI && break_class[i+1] == b_RI)
405 else_break(&break_before[i+1],b_x);
406 }
407 }
408 debug_output_breaks(break_before,count);
409
410 /* LB31 */
411 for (i = 0; i < count-1; i++)
412 else_break(&break_before[i+1],b_s);
413
414 debug_output_breaks(break_before,count);
415 for (i = 0; i < count; i++)
416 {
417 if (break_before[i] != b_x)
418 {
419 la[i].fSoftBreak = TRUE;
420 la[i].fWordStop = TRUE;
421 }
422 }
423
424 HeapFree(GetProcessHeap(), 0, break_before);
425 HeapFree(GetProcessHeap(), 0, break_class);
426 }