Sync to trunk revision 63922.
[reactos.git] / dll / win32 / usp10 / breaking.c
1 /*
2 * Implementation of line breaking algorithm for the Uniscribe Script Processor
3 *
4 * Copyright 2011 CodeWeavers, Aric Stewart
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 *
20 */
21
22 #include "usp10_internal.h"
23
24 WINE_DEFAULT_DEBUG_CHANNEL(uniscribe);
25
26 extern const unsigned short wine_linebreak_table[];
27
28 enum breaking_types { b_BK=1, b_CR, b_LF, b_CM, b_SG, b_GL, b_CB, b_SP, b_ZW, b_NL, b_WJ, b_JL, b_JV, b_JT, b_H2, b_H3, b_XX, b_OP, b_CL, b_CP, b_QU, b_NS, b_EX, b_SY, b_IS, b_PR, b_PO, b_NU, b_AL, b_ID, b_IN, b_HY, b_BB, b_BA, b_SA, b_AI, b_B2, b_HL, b_CJ, b_RI};
29
30 enum breaking_class {b_r=1, b_s, b_x};
31
32 static void debug_output_breaks(const short* breaks, int count)
33 {
34 if (TRACE_ON(uniscribe))
35 {
36 int i;
37 TRACE("[");
38 for (i = 0; i < count && i < 200; i++)
39 {
40 switch (breaks[i])
41 {
42 case b_x: TRACE("x"); break;
43 case b_r: TRACE("!"); break;
44 case b_s: TRACE("+"); break;
45 default: TRACE("*");
46 }
47 }
48 if (i == 200)
49 TRACE("...");
50 TRACE("]\n");
51 }
52 }
53
54 static inline void else_break(short* before, short class)
55 {
56 if (*before == 0) *before = class;
57 }
58
59 void BREAK_line(const WCHAR *chars, int count, const SCRIPT_ANALYSIS *sa, SCRIPT_LOGATTR *la)
60 {
61 int i,j;
62 short *break_class;
63 short *break_before;
64
65 TRACE("In %s\n",debugstr_wn(chars,count));
66
67 break_class = HeapAlloc(GetProcessHeap(),0, count * sizeof(short));
68 break_before = HeapAlloc(GetProcessHeap(),0, count * sizeof(short));
69
70 for (i = 0; i < count; i++)
71 {
72 break_class[i] = get_table_entry( wine_linebreak_table, chars[i] );
73 break_before[i] = 0;
74
75 memset(&la[i],0,sizeof(SCRIPT_LOGATTR));
76
77 la[i].fCharStop = TRUE;
78 switch (break_class[i])
79 {
80 case b_BK:
81 case b_ZW:
82 case b_SP:
83 la[i].fWhiteSpace = TRUE;
84 break;
85 case b_CM:
86 la[i].fCharStop = FALSE;
87 break;
88 }
89 }
90
91 /* LB1 */
92 /* TODO: Have outside algorithms for these scripts */
93 for (i = 0; i < count; i++)
94 {
95 switch(break_class[i])
96 {
97 case b_AI:
98 case b_SA:
99 case b_SG:
100 case b_XX:
101 break_class[i] = b_AL;
102 break;
103 case b_CJ:
104 break_class[i] = b_NS;
105 break;
106 }
107 }
108
109 /* LB2 - LB3 */
110 break_before[0] = b_x;
111 for (i = 0; i < count; i++)
112 {
113 switch(break_class[i])
114 {
115 /* LB4 - LB6 */
116 case b_CR:
117 if (i < count-1 && break_class[i+1] == b_LF)
118 {
119 else_break(&break_before[i],b_x);
120 else_break(&break_before[i+1],b_x);
121 break;
122 }
123 case b_LF:
124 case b_NL:
125 case b_BK:
126 if (i < count-1) else_break(&break_before[i+1],b_r);
127 else_break(&break_before[i],b_x);
128 break;
129 /* LB7 */
130 case b_SP:
131 else_break(&break_before[i],b_x);
132 break;
133 case b_ZW:
134 else_break(&break_before[i],b_x);
135 /* LB8 */
136 while (i < count-1 && break_class[i+1] == b_SP)
137 i++;
138 else_break(&break_before[i],b_s);
139 break;
140 }
141 }
142
143 debug_output_breaks(break_before,count);
144
145 /* LB9 - LB10 */
146 for (i = 0; i < count; i++)
147 {
148 if (break_class[i] == b_CM)
149 {
150 if (i > 0)
151 {
152 switch (break_class[i-1])
153 {
154 case b_SP:
155 case b_BK:
156 case b_CR:
157 case b_LF:
158 case b_NL:
159 case b_ZW:
160 break_class[i] = b_AL;
161 break;
162 default:
163 break_class[i] = break_class[i-1];
164 }
165 }
166 else break_class[i] = b_AL;
167 }
168 }
169
170 for (i = 0; i < count; i++)
171 {
172 switch(break_class[i])
173 {
174 /* LB11 */
175 case b_WJ:
176 else_break(&break_before[i],b_x);
177 if (i < count-1)
178 else_break(&break_before[i+1],b_x);
179 break;
180 /* LB12 */
181 case b_GL:
182 if (i < count-1)
183 else_break(&break_before[i+1],b_x);
184 /* LB12a */
185 if (i > 0)
186 {
187 if (break_class[i-1] != b_SP &&
188 break_class[i-1] != b_BA &&
189 break_class[i-1] != b_HY)
190 else_break(&break_before[i],b_x);
191 }
192 break;
193 /* LB13 */
194 case b_CL:
195 case b_CP:
196 case b_EX:
197 case b_IS:
198 case b_SY:
199 else_break(&break_before[i],b_x);
200 break;
201 /* LB14 */
202 case b_OP:
203 while (i < count-1 && break_class[i+1] == b_SP)
204 {
205 else_break(&break_before[i+1],b_x);
206 i++;
207 }
208 else_break(&break_before[i+1],b_x);
209 break;
210 /* LB15 */
211 case b_QU:
212 j = i+1;
213 while (j < count-1 && break_class[j] == b_SP)
214 j++;
215 if (break_class[j] == b_OP)
216 {
217 for (; j > i; j--)
218 else_break(&break_before[j],b_x);
219 }
220 break;
221 /* LB16 */
222 case b_NS:
223 j = i-1;
224 while(j > 0 && break_class[j] == b_SP)
225 j--;
226 if (break_class[j] == b_CL || break_class[j] == b_CP)
227 {
228 for (j++; j <= i; j++)
229 else_break(&break_before[j],b_x);
230 }
231 break;
232 /* LB17 */
233 case b_B2:
234 j = i+1;
235 while (j < count && break_class[j] == b_SP)
236 j++;
237 if (break_class[j] == b_B2)
238 {
239 for (; j > i; j--)
240 else_break(&break_before[j],b_x);
241 }
242 break;
243 }
244 }
245
246 debug_output_breaks(break_before,count);
247
248 for (i = 0; i < count; i++)
249 {
250 switch(break_class[i])
251 {
252 /* LB18 */
253 case b_SP:
254 if (i < count-1)
255 else_break(&break_before[i+1],b_s);
256 break;
257 /* LB19 */
258 case b_QU:
259 else_break(&break_before[i],b_x);
260 if (i < count-1)
261 else_break(&break_before[i+1],b_x);
262 break;
263 /* LB20 */
264 case b_CB:
265 else_break(&break_before[i],b_s);
266 if (i < count-1)
267 else_break(&break_before[i+1],b_s);
268 /* LB21 */
269 case b_BA:
270 case b_HY:
271 case b_NS:
272 else_break(&break_before[i],b_x);
273 break;
274 case b_BB:
275 if (i < count-1)
276 else_break(&break_before[i+1],b_x);
277 break;
278 /* LB21a */
279 case b_HL:
280 if (i < count-2)
281 switch (break_class[i+1])
282 {
283 case b_HY:
284 case b_BA:
285 else_break(&break_before[i+2], b_x);
286 }
287 break;
288 /* LB22 */
289 case b_IN:
290 if (i > 0)
291 {
292 switch (break_class[i-1])
293 {
294 case b_AL:
295 case b_HL:
296 case b_ID:
297 case b_IN:
298 case b_NU:
299 else_break(&break_before[i], b_x);
300 }
301 }
302 break;
303 }
304
305 if (i < count-1)
306 {
307 /* LB23 */
308 if ((break_class[i] == b_ID && break_class[i+1] == b_PO) ||
309 (break_class[i] == b_AL && break_class[i+1] == b_NU) ||
310 (break_class[i] == b_HL && break_class[i+1] == b_NU) ||
311 (break_class[i] == b_NU && break_class[i+1] == b_AL) ||
312 (break_class[i] == b_NU && break_class[i+1] == b_HL))
313 else_break(&break_before[i+1],b_x);
314 /* LB24 */
315 if ((break_class[i] == b_PR && break_class[i+1] == b_ID) ||
316 (break_class[i] == b_PR && break_class[i+1] == b_AL) ||
317 (break_class[i] == b_PR && break_class[i+1] == b_HL) ||
318 (break_class[i] == b_PO && break_class[i+1] == b_AL) ||
319 (break_class[i] == b_PO && break_class[i+1] == b_HL))
320 else_break(&break_before[i+1],b_x);
321
322 /* LB25 */
323 if ((break_class[i] == b_CL && break_class[i+1] == b_PO) ||
324 (break_class[i] == b_CP && break_class[i+1] == b_PO) ||
325 (break_class[i] == b_CL && break_class[i+1] == b_PR) ||
326 (break_class[i] == b_CP && break_class[i+1] == b_PR) ||
327 (break_class[i] == b_NU && break_class[i+1] == b_PO) ||
328 (break_class[i] == b_NU && break_class[i+1] == b_PR) ||
329 (break_class[i] == b_PO && break_class[i+1] == b_OP) ||
330 (break_class[i] == b_PO && break_class[i+1] == b_NU) ||
331 (break_class[i] == b_PR && break_class[i+1] == b_OP) ||
332 (break_class[i] == b_PR && break_class[i+1] == b_NU) ||
333 (break_class[i] == b_HY && break_class[i+1] == b_NU) ||
334 (break_class[i] == b_IS && break_class[i+1] == b_NU) ||
335 (break_class[i] == b_NU && break_class[i+1] == b_NU) ||
336 (break_class[i] == b_SY && break_class[i+1] == b_NU))
337 else_break(&break_before[i+1],b_x);
338
339 /* LB26 */
340 if (break_class[i] == b_JL)
341 {
342 switch (break_class[i+1])
343 {
344 case b_JL:
345 case b_JV:
346 case b_H2:
347 case b_H3:
348 else_break(&break_before[i+1],b_x);
349 }
350 }
351 if ((break_class[i] == b_JV || break_class[i] == b_H2) &&
352 (break_class[i+1] == b_JV || break_class[i+1] == b_JT))
353 else_break(&break_before[i+1],b_x);
354 if ((break_class[i] == b_JT || break_class[i] == b_H3) &&
355 break_class[i+1] == b_JT)
356 else_break(&break_before[i+1],b_x);
357
358 /* LB27 */
359 switch (break_class[i])
360 {
361 case b_JL:
362 case b_JV:
363 case b_JT:
364 case b_H2:
365 case b_H3:
366 if (break_class[i+1] == b_IN || break_class[i+1] == b_PO)
367 else_break(&break_before[i+1],b_x);
368 }
369 if (break_class[i] == b_PO)
370 {
371 switch (break_class[i+1])
372 {
373 case b_JL:
374 case b_JV:
375 case b_JT:
376 case b_H2:
377 case b_H3:
378 else_break(&break_before[i+1],b_x);
379 }
380 }
381
382 /* LB28 */
383 if ((break_class[i] == b_AL && break_class[i+1] == b_AL) ||
384 (break_class[i] == b_AL && break_class[i+1] == b_HL) ||
385 (break_class[i] == b_HL && break_class[i+1] == b_AL) ||
386 (break_class[i] == b_HL && break_class[i+1] == b_HL))
387 else_break(&break_before[i+1],b_x);
388
389 /* LB29 */
390 if ((break_class[i] == b_IS && break_class[i+1] == b_AL) ||
391 (break_class[i] == b_IS && break_class[i+1] == b_HL))
392 else_break(&break_before[i+1],b_x);
393
394 /* LB30 */
395 if ((break_class[i] == b_AL || break_class[i] == b_HL || break_class[i] == b_NU) &&
396 break_class[i+1] == b_OP)
397 else_break(&break_before[i+1],b_x);
398 if (break_class[i] == b_CP &&
399 (break_class[i+1] == b_AL || break_class[i] == b_HL || break_class[i] == b_NU))
400 else_break(&break_before[i+1],b_x);
401
402 /* LB30a */
403 if (break_class[i] == b_RI && break_class[i+1] == b_RI)
404 else_break(&break_before[i+1],b_x);
405 }
406 }
407 debug_output_breaks(break_before,count);
408
409 /* LB31 */
410 for (i = 0; i < count-1; i++)
411 else_break(&break_before[i+1],b_s);
412
413 debug_output_breaks(break_before,count);
414 for (i = 0; i < count; i++)
415 {
416 if (break_before[i] != b_x)
417 {
418 la[i].fSoftBreak = TRUE;
419 la[i].fWordStop = TRUE;
420 }
421 }
422
423 HeapFree(GetProcessHeap(), 0, break_before);
424 HeapFree(GetProcessHeap(), 0, break_class);
425 }