Sync with trunk r58687.
[reactos.git] / dll / win32 / usp10 / breaking.c
1 /*
2 * Implementation of line breaking algorithm for the Uniscribe Script Processor
3 *
4 * Copyright 2011 CodeWeavers, Aric Stewart
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 *
20 */
21 #include <config.h>
22 #include <stdarg.h>
23 //#include <stdio.h>
24 //#include <stdlib.h>
25
26 #include <windef.h>
27 #include <winbase.h>
28 //#include "winuser.h"
29 #include <wingdi.h>
30 //#include "winnls.h"
31 #include <usp10.h>
32 //#include "winternl.h"
33
34 #include <wine/debug.h>
35 #include "usp10_internal.h"
36
37 WINE_DEFAULT_DEBUG_CHANNEL(uniscribe);
38
39 extern const unsigned short wine_linebreak_table[];
40
41 enum breaking_types { b_BK=1, b_CR, b_LF, b_CM, b_SG, b_GL, b_CB, b_SP, b_ZW, b_NL, b_WJ, b_JL, b_JV, b_JT, b_H2, b_H3, b_XX, b_OP, b_CL, b_CP, b_QU, b_NS, b_EX, b_SY, b_IS, b_PR, b_PO, b_NU, b_AL, b_ID, b_IN, b_HY, b_BB, b_BA, b_SA, b_AI, b_B2};
42
43 enum breaking_class {b_r=1, b_s, b_x};
44
45 static void debug_output_breaks(const short* breaks, int count)
46 {
47 if (TRACE_ON(uniscribe))
48 {
49 int i;
50 TRACE("[");
51 for (i = 0; i < count && i < 200; i++)
52 {
53 switch (breaks[i])
54 {
55 case b_x: TRACE("x"); break;
56 case b_r: TRACE("!"); break;
57 case b_s: TRACE("+"); break;
58 default: TRACE("*");
59 }
60 }
61 if (i == 200)
62 TRACE("...");
63 TRACE("]\n");
64 }
65 }
66
67 static inline void else_break(short* before, short class)
68 {
69 if (*before == 0) *before = class;
70 }
71
72 void BREAK_line(const WCHAR *chars, int count, const SCRIPT_ANALYSIS *sa, SCRIPT_LOGATTR *la)
73 {
74 int i,j;
75 short *break_class;
76 short *break_before;
77
78 TRACE("In %s\n",debugstr_wn(chars,count));
79
80 break_class = HeapAlloc(GetProcessHeap(),0, count * sizeof(short));
81 break_before = HeapAlloc(GetProcessHeap(),0, count * sizeof(short));
82
83 for (i = 0; i < count; i++)
84 {
85 break_class[i] = get_table_entry( wine_linebreak_table, chars[i] );
86 break_before[i] = 0;
87
88 memset(&la[i],0,sizeof(SCRIPT_LOGATTR));
89
90 la[i].fCharStop = TRUE;
91 switch (break_class[i])
92 {
93 case b_BK:
94 case b_ZW:
95 case b_SP:
96 la[i].fWhiteSpace = TRUE;
97 break;
98 case b_CM:
99 la[i].fCharStop = FALSE;
100 }
101 }
102
103 /* LB1 */
104 /* TODO: Have outside algorithms for these scripts */
105 for (i = 0; i < count; i++)
106 {
107 switch(break_class[i])
108 {
109 case b_AI:
110 case b_SA:
111 case b_SG:
112 case b_XX:
113 break_class[i] = b_AL;
114 }
115 }
116
117 /* LB2 - LB3 */
118 break_before[0] = b_x;
119 for (i = 0; i < count; i++)
120 {
121 switch(break_class[i])
122 {
123 /* LB4 - LB6 */
124 case b_CR:
125 if (i < count-1 && break_class[i+1] == b_LF)
126 {
127 else_break(&break_before[i],b_x);
128 else_break(&break_before[i+1],b_x);
129 break;
130 }
131 case b_LF:
132 case b_NL:
133 case b_BK:
134 if (i < count-1) else_break(&break_before[i+1],b_r);
135 else_break(&break_before[i],b_x);
136 break;
137 /* LB7 */
138 case b_SP:
139 else_break(&break_before[i],b_x);
140 break;
141 case b_ZW:
142 else_break(&break_before[i],b_x);
143 /* LB8 */
144 while (i < count-1 && break_class[i+1] == b_SP)
145 i++;
146 else_break(&break_before[i],b_s);
147 break;
148 }
149 }
150
151 debug_output_breaks(break_before,count);
152
153 /* LB9 - LB10 */
154 for (i = 0; i < count; i++)
155 {
156 if (break_class[i] == b_CM)
157 {
158 if (i > 0)
159 {
160 switch (break_class[i-1])
161 {
162 case b_SP:
163 case b_BK:
164 case b_CR:
165 case b_LF:
166 case b_NL:
167 case b_ZW:
168 break_class[i] = b_AL;
169 break;
170 default:
171 break_class[i] = break_class[i-1];
172 }
173 }
174 else break_class[i] = b_AL;
175 }
176 }
177
178 for (i = 0; i < count; i++)
179 {
180 switch(break_class[i])
181 {
182 /* LB11 */
183 case b_WJ:
184 else_break(&break_before[i],b_x);
185 if (i < count-1)
186 else_break(&break_before[i+1],b_x);
187 break;
188 /* LB12 */
189 case b_GL:
190 if (i < count-1)
191 else_break(&break_before[i+1],b_x);
192 /* LB12a */
193 if (i > 0)
194 {
195 if (break_class[i-1] != b_SP &&
196 break_class[i-1] != b_BA &&
197 break_class[i-1] != b_HY)
198 else_break(&break_before[i],b_x);
199 }
200 break;
201 /* LB13 */
202 case b_CL:
203 case b_CP:
204 case b_EX:
205 case b_IS:
206 case b_SY:
207 else_break(&break_before[i],b_x);
208 break;
209 /* LB14 */
210 case b_OP:
211 while (i < count-1 && break_class[i+1] == b_SP)
212 {
213 else_break(&break_before[i+1],b_x);
214 i++;
215 }
216 else_break(&break_before[i+1],b_x);
217 break;
218 /* LB15 */
219 case b_QU:
220 j = i+1;
221 while (j < count-1 && break_class[j] == b_SP)
222 j++;
223 if (break_class[j] == b_OP)
224 {
225 for (; j > i; j--)
226 else_break(&break_before[j],b_x);
227 }
228 break;
229 /* LB16 */
230 case b_NS:
231 j = i-1;
232 while(j > 0 && break_class[j] == b_SP)
233 j--;
234 if (break_class[j] == b_CL || break_class[j] == b_CP)
235 {
236 for (j++; j <= i; j++)
237 else_break(&break_before[j],b_x);
238 }
239 break;
240 /* LB17 */
241 case b_B2:
242 j = i+1;
243 while (j < count && break_class[j] == b_SP)
244 j++;
245 if (break_class[j] == b_B2)
246 {
247 for (; j > i; j--)
248 else_break(&break_before[j],b_x);
249 }
250 break;
251 }
252 }
253
254 debug_output_breaks(break_before,count);
255
256 for (i = 0; i < count; i++)
257 {
258 switch(break_class[i])
259 {
260 /* LB18 */
261 case b_SP:
262 if (i < count-1)
263 else_break(&break_before[i+1],b_s);
264 break;
265 /* LB19 */
266 case b_QU:
267 else_break(&break_before[i],b_x);
268 if (i < count-1)
269 else_break(&break_before[i+1],b_x);
270 break;
271 /* LB20 */
272 case b_CB:
273 else_break(&break_before[i],b_s);
274 if (i < count-1)
275 else_break(&break_before[i+1],b_s);
276 /* LB21 */
277 case b_BA:
278 case b_HY:
279 case b_NS:
280 else_break(&break_before[i],b_x);
281 break;
282 case b_BB:
283 if (i < count-1)
284 else_break(&break_before[i+1],b_x);
285 break;
286 /* LB22 */
287 case b_IN:
288 if (i > 0)
289 {
290 switch (break_class[i-1])
291 {
292 case b_AL:
293 case b_ID:
294 case b_IN:
295 case b_NU:
296 else_break(&break_before[i], b_x);
297 }
298 }
299 break;
300 }
301
302 if (i < count-1)
303 {
304 /* LB23 */
305 if ((break_class[i] == b_ID && break_class[i+1] == b_PO) ||
306 (break_class[i] == b_AL && break_class[i+1] == b_NU) ||
307 (break_class[i] == b_NU && break_class[i+1] == b_AL))
308 else_break(&break_before[i+1],b_x);
309 /* LB24 */
310 if ((break_class[i] == b_PR && break_class[i+1] == b_ID) ||
311 (break_class[i] == b_PR && break_class[i+1] == b_AL) ||
312 (break_class[i] == b_PO && break_class[i+1] == b_AL))
313 else_break(&break_before[i+1],b_x);
314
315 /* LB25 */
316 if ((break_class[i] == b_CL && break_class[i+1] == b_PO) ||
317 (break_class[i] == b_CP && break_class[i+1] == b_PO) ||
318 (break_class[i] == b_CL && break_class[i+1] == b_PR) ||
319 (break_class[i] == b_CP && break_class[i+1] == b_PR) ||
320 (break_class[i] == b_NU && break_class[i+1] == b_PO) ||
321 (break_class[i] == b_NU && break_class[i+1] == b_PR) ||
322 (break_class[i] == b_PO && break_class[i+1] == b_OP) ||
323 (break_class[i] == b_PO && break_class[i+1] == b_NU) ||
324 (break_class[i] == b_PR && break_class[i+1] == b_OP) ||
325 (break_class[i] == b_PR && break_class[i+1] == b_NU) ||
326 (break_class[i] == b_HY && break_class[i+1] == b_NU) ||
327 (break_class[i] == b_IS && break_class[i+1] == b_NU) ||
328 (break_class[i] == b_NU && break_class[i+1] == b_NU) ||
329 (break_class[i] == b_SY && break_class[i+1] == b_NU))
330 else_break(&break_before[i+1],b_x);
331
332 /* LB26 */
333 if (break_class[i] == b_JL)
334 {
335 switch (break_class[i+1])
336 {
337 case b_JL:
338 case b_JV:
339 case b_H2:
340 case b_H3:
341 else_break(&break_before[i+1],b_x);
342 }
343 }
344 if ((break_class[i] == b_JV || break_class[i] == b_H2) &&
345 (break_class[i+1] == b_JV || break_class[i+1] == b_JT))
346 else_break(&break_before[i+1],b_x);
347 if ((break_class[i] == b_JT || break_class[i] == b_H3) &&
348 break_class[i+1] == b_JT)
349 else_break(&break_before[i+1],b_x);
350
351 /* LB27 */
352 switch (break_class[i])
353 {
354 case b_JL:
355 case b_JV:
356 case b_JT:
357 case b_H2:
358 case b_H3:
359 if (break_class[i+1] == b_IN || break_class[i+1] == b_PO)
360 else_break(&break_before[i+1],b_x);
361 }
362 if (break_class[i] == b_PO)
363 {
364 switch (break_class[i+1])
365 {
366 case b_JL:
367 case b_JV:
368 case b_JT:
369 case b_H2:
370 case b_H3:
371 else_break(&break_before[i+1],b_x);
372 }
373 }
374
375 /* LB28 */
376 if (break_class[i] == b_AL && break_class[i+1] == b_AL)
377 else_break(&break_before[i+1],b_x);
378
379 /* LB29 */
380 if (break_class[i] == b_IS && break_class[i+1] == b_AL)
381 else_break(&break_before[i+1],b_x);
382
383 /* LB30 */
384 if ((break_class[i] == b_AL || break_class[i] == b_NU) &&
385 break_class[i+1] == b_OP)
386 else_break(&break_before[i+1],b_x);
387 if (break_class[i] == b_CP &&
388 (break_class[i+1] == b_AL || break_class[i] == b_NU))
389 else_break(&break_before[i+1],b_x);
390 }
391 }
392 debug_output_breaks(break_before,count);
393
394 /* LB31 */
395 for (i = 0; i < count-1; i++)
396 else_break(&break_before[i+1],b_s);
397
398 debug_output_breaks(break_before,count);
399 for (i = 0; i < count; i++)
400 {
401 if (break_before[i] != b_x)
402 {
403 la[i].fSoftBreak = TRUE;
404 la[i].fWordStop = TRUE;
405 }
406 }
407
408 HeapFree(GetProcessHeap(), 0, break_before);
409 HeapFree(GetProcessHeap(), 0, break_class);
410 }