[REACTOS]
[reactos.git] / reactos / dll / 3rdparty / libxslt / xsltlocale.c
1 /*
2 * xsltlocale.c: locale handling
3 *
4 * Reference:
5 * RFC 3066: Tags for the Identification of Languages
6 * http://www.ietf.org/rfc/rfc3066.txt
7 * ISO 639-1, ISO 3166-1
8 *
9 * Author: Nick Wellnhofer
10 * winapi port: Roumen Petrov
11 */
12
13 #define IN_LIBXSLT
14 #include "libxslt.h"
15
16 #include <string.h>
17 #include <libxml/xmlmemory.h>
18
19 #include "xsltlocale.h"
20 #include "xsltutils.h"
21
22 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 2
23 #define newlocale __newlocale
24 #define freelocale __freelocale
25 #define strxfrm_l __strxfrm_l
26 #define LC_COLLATE_MASK (1 << LC_COLLATE)
27 #endif
28
29 #define TOUPPER(c) (c & ~0x20)
30 #define TOLOWER(c) (c | 0x20)
31 #define ISALPHA(c) ((unsigned)(TOUPPER(c) - 'A') < 26)
32
33 /*without terminating null character*/
34 #define XSLTMAX_ISO639LANGLEN 8
35 #define XSLTMAX_ISO3166CNTRYLEN 8
36 /* <lang>-<cntry> */
37 #define XSLTMAX_LANGTAGLEN (XSLTMAX_ISO639LANGLEN+1+XSLTMAX_ISO3166CNTRYLEN)
38
39 static const xmlChar* xsltDefaultRegion(const xmlChar *localeName);
40
41 #ifdef XSLT_LOCALE_WINAPI
42 xmlRMutexPtr xsltLocaleMutex = NULL;
43
44 struct xsltRFC1766Info_s {
45 /*note typedef unsigned char xmlChar !*/
46 xmlChar tag[XSLTMAX_LANGTAGLEN+1];
47 /*note typedef LCID xsltLocale !*/
48 xsltLocale lcid;
49 };
50 typedef struct xsltRFC1766Info_s xsltRFC1766Info;
51
52 static int xsltLocaleListSize = 0;
53 static xsltRFC1766Info *xsltLocaleList = NULL;
54
55
56 static xsltLocale
57 xslt_locale_WINAPI(const xmlChar *languageTag) {
58 int k;
59 xsltRFC1766Info *p = xsltLocaleList;
60
61 for (k=0; k<xsltLocaleListSize; k++, p++)
62 if (xmlStrcmp(p->tag, languageTag) == 0) return p->lcid;
63 return((xsltLocale)0);
64 }
65
66 static void xsltEnumSupportedLocales(void);
67 #endif
68
69 /**
70 * xsltFreeLocales:
71 *
72 * Cleanup function for the locale support on shutdown
73 */
74 void
75 xsltFreeLocales(void) {
76 #ifdef XSLT_LOCALE_WINAPI
77 xmlRMutexLock(xsltLocaleMutex);
78 xmlFree(xsltLocaleList);
79 xsltLocaleList = NULL;
80 xmlRMutexUnlock(xsltLocaleMutex);
81 #endif
82 }
83
84 /**
85 * xsltNewLocale:
86 * @languageTag: RFC 3066 language tag
87 *
88 * Creates a new locale of an opaque system dependent type based on the
89 * language tag.
90 *
91 * Returns the locale or NULL on error or if no matching locale was found
92 */
93 xsltLocale
94 xsltNewLocale(const xmlChar *languageTag) {
95 #ifdef XSLT_LOCALE_XLOCALE
96 xsltLocale locale;
97 char localeName[XSLTMAX_LANGTAGLEN+6]; /* 6 chars for ".utf8\0" */
98 const xmlChar *p = languageTag;
99 const char *region = NULL;
100 char *q = localeName;
101 int i, llen;
102
103 /* Convert something like "pt-br" to "pt_BR.utf8" */
104
105 if (languageTag == NULL)
106 return(NULL);
107
108 for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
109 *q++ = TOLOWER(*p++);
110
111 if (i == 0)
112 return(NULL);
113
114 llen = i;
115
116 if (*p) {
117 if (*p++ != '-')
118 return(NULL);
119 *q++ = '_';
120
121 for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
122 *q++ = TOUPPER(*p++);
123
124 if (i == 0 || *p)
125 return(NULL);
126
127 memcpy(q, ".utf8", 6);
128 locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
129 if (locale != NULL)
130 return(locale);
131
132 /* Continue without using country code */
133
134 q = localeName + llen;
135 }
136
137 /* Try locale without territory, e.g. for Esperanto (eo) */
138
139 memcpy(q, ".utf8", 6);
140 locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
141 if (locale != NULL)
142 return(locale);
143
144 /* Try to find most common country for language */
145
146 if (llen != 2)
147 return(NULL);
148
149 region = (char *)xsltDefaultRegion((xmlChar *)localeName);
150 if (region == NULL)
151 return(NULL);
152
153 q = localeName + llen;
154 *q++ = '_';
155 *q++ = region[0];
156 *q++ = region[1];
157 memcpy(q, ".utf8", 6);
158 locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
159
160 return(locale);
161 #endif
162
163 #ifdef XSLT_LOCALE_WINAPI
164 {
165 xsltLocale locale = (xsltLocale)0;
166 xmlChar localeName[XSLTMAX_LANGTAGLEN+1];
167 xmlChar *q = localeName;
168 const xmlChar *p = languageTag;
169 int i, llen;
170 const xmlChar *region = NULL;
171
172 if (languageTag == NULL) goto end;
173
174 xsltEnumSupportedLocales();
175
176 for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
177 *q++ = TOLOWER(*p++);
178 if (i == 0) goto end;
179
180 llen = i;
181 *q++ = '-';
182 if (*p) { /*if country tag is given*/
183 if (*p++ != '-') goto end;
184
185 for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
186 *q++ = TOUPPER(*p++);
187 if (i == 0 || *p) goto end;
188
189 *q = '\0';
190 locale = xslt_locale_WINAPI(localeName);
191 if (locale != (xsltLocale)0) goto end;
192 }
193 /* Try to find most common country for language */
194 region = xsltDefaultRegion(localeName);
195 if (region == NULL) goto end;
196
197 strcpy(localeName + llen + 1, region);
198 locale = xslt_locale_WINAPI(localeName);
199 end:
200 return(locale);
201 }
202 #endif
203
204 #ifdef XSLT_LOCALE_NONE
205 return(NULL);
206 #endif
207 }
208
209 static const xmlChar*
210 xsltDefaultRegion(const xmlChar *localeName) {
211 xmlChar c;
212 /* region should be xmlChar, but gcc warns on all string assignments */
213 const char *region = NULL;
214
215 c = localeName[1];
216 /* This is based on the locales from glibc 2.3.3 */
217
218 switch (localeName[0]) {
219 case 'a':
220 if (c == 'a' || c == 'm') region = "ET";
221 else if (c == 'f') region = "ZA";
222 else if (c == 'n') region = "ES";
223 else if (c == 'r') region = "AE";
224 else if (c == 'z') region = "AZ";
225 break;
226 case 'b':
227 if (c == 'e') region = "BY";
228 else if (c == 'g') region = "BG";
229 else if (c == 'n') region = "BD";
230 else if (c == 'r') region = "FR";
231 else if (c == 's') region = "BA";
232 break;
233 case 'c':
234 if (c == 'a') region = "ES";
235 else if (c == 's') region = "CZ";
236 else if (c == 'y') region = "GB";
237 break;
238 case 'd':
239 if (c == 'a') region = "DK";
240 else if (c == 'e') region = "DE";
241 break;
242 case 'e':
243 if (c == 'l') region = "GR";
244 else if (c == 'n' || c == 'o') region = "US";
245 else if (c == 's' || c == 'u') region = "ES";
246 else if (c == 't') region = "EE";
247 break;
248 case 'f':
249 if (c == 'a') region = "IR";
250 else if (c == 'i') region = "FI";
251 else if (c == 'o') region = "FO";
252 else if (c == 'r') region = "FR";
253 break;
254 case 'g':
255 if (c == 'a') region = "IE";
256 else if (c == 'l') region = "ES";
257 else if (c == 'v') region = "GB";
258 break;
259 case 'h':
260 if (c == 'e') region = "IL";
261 else if (c == 'i') region = "IN";
262 else if (c == 'r') region = "HT";
263 else if (c == 'u') region = "HU";
264 break;
265 case 'i':
266 if (c == 'd') region = "ID";
267 else if (c == 's') region = "IS";
268 else if (c == 't') region = "IT";
269 else if (c == 'w') region = "IL";
270 break;
271 case 'j':
272 if (c == 'a') region = "JP";
273 break;
274 case 'k':
275 if (c == 'l') region = "GL";
276 else if (c == 'o') region = "KR";
277 else if (c == 'w') region = "GB";
278 break;
279 case 'l':
280 if (c == 't') region = "LT";
281 else if (c == 'v') region = "LV";
282 break;
283 case 'm':
284 if (c == 'k') region = "MK";
285 else if (c == 'l' || c == 'r') region = "IN";
286 else if (c == 'n') region = "MN";
287 else if (c == 's') region = "MY";
288 else if (c == 't') region = "MT";
289 break;
290 case 'n':
291 if (c == 'b' || c == 'n' || c == 'o') region = "NO";
292 else if (c == 'e') region = "NP";
293 else if (c == 'l') region = "NL";
294 break;
295 case 'o':
296 if (c == 'm') region = "ET";
297 break;
298 case 'p':
299 if (c == 'a') region = "IN";
300 else if (c == 'l') region = "PL";
301 else if (c == 't') region = "PT";
302 break;
303 case 'r':
304 if (c == 'o') region = "RO";
305 else if (c == 'u') region = "RU";
306 break;
307 case 's':
308 switch (c) {
309 case 'e': region = "NO"; break;
310 case 'h': region = "YU"; break;
311 case 'k': region = "SK"; break;
312 case 'l': region = "SI"; break;
313 case 'o': region = "ET"; break;
314 case 'q': region = "AL"; break;
315 case 't': region = "ZA"; break;
316 case 'v': region = "SE"; break;
317 }
318 break;
319 case 't':
320 if (c == 'a' || c == 'e') region = "IN";
321 else if (c == 'h') region = "TH";
322 else if (c == 'i') region = "ER";
323 else if (c == 'r') region = "TR";
324 else if (c == 't') region = "RU";
325 break;
326 case 'u':
327 if (c == 'k') region = "UA";
328 else if (c == 'r') region = "PK";
329 break;
330 case 'v':
331 if (c == 'i') region = "VN";
332 break;
333 case 'w':
334 if (c == 'a') region = "BE";
335 break;
336 case 'x':
337 if (c == 'h') region = "ZA";
338 break;
339 case 'z':
340 if (c == 'h') region = "CN";
341 else if (c == 'u') region = "ZA";
342 break;
343 }
344 return((xmlChar *)region);
345 }
346
347 /**
348 * xsltFreeLocale:
349 * @locale: the locale to free
350 *
351 * Frees a locale created with xsltNewLocale
352 */
353 void
354 xsltFreeLocale(xsltLocale locale) {
355 #ifdef XSLT_LOCALE_XLOCALE
356 freelocale(locale);
357 #endif
358 }
359
360 /**
361 * xsltStrxfrm:
362 * @locale: locale created with xsltNewLocale
363 * @string: UTF-8 string to transform
364 *
365 * Transforms a string according to locale. The transformed string must then be
366 * compared with xsltLocaleStrcmp and freed with xmlFree.
367 *
368 * Returns the transformed string or NULL on error
369 */
370 xsltLocaleChar *
371 xsltStrxfrm(xsltLocale locale, const xmlChar *string)
372 {
373 #ifdef XSLT_LOCALE_NONE
374 return(NULL);
375 #else
376 size_t xstrlen, r;
377 xsltLocaleChar *xstr;
378
379 #ifdef XSLT_LOCALE_XLOCALE
380 xstrlen = strxfrm_l(NULL, (const char *)string, 0, locale) + 1;
381 xstr = (xsltLocaleChar *) xmlMalloc(xstrlen);
382 if (xstr == NULL) {
383 xsltTransformError(NULL, NULL, NULL,
384 "xsltStrxfrm : out of memory error\n");
385 return(NULL);
386 }
387
388 r = strxfrm_l((char *)xstr, (const char *)string, xstrlen, locale);
389 #endif
390
391 #ifdef XSLT_LOCALE_WINAPI
392 xstrlen = MultiByteToWideChar(CP_UTF8, 0, string, -1, NULL, 0);
393 if (xstrlen == 0) {
394 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar check failed\n");
395 return(NULL);
396 }
397 xstr = (xsltLocaleChar*) xmlMalloc(xstrlen * sizeof(xsltLocaleChar));
398 if (xstr == NULL) {
399 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n");
400 return(NULL);
401 }
402 r = MultiByteToWideChar(CP_UTF8, 0, string, -1, xstr, xstrlen);
403 if (r == 0) {
404 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar failed\n");
405 xmlFree(xstr);
406 return(NULL);
407 }
408 return(xstr);
409 #endif /* XSLT_LOCALE_WINAPI */
410
411 if (r >= xstrlen) {
412 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n");
413 xmlFree(xstr);
414 return(NULL);
415 }
416
417 return(xstr);
418 #endif /* XSLT_LOCALE_NONE */
419 }
420
421 /**
422 * xsltLocaleStrcmp:
423 * @locale: a locale identifier
424 * @str1: a string transformed with xsltStrxfrm
425 * @str2: a string transformed with xsltStrxfrm
426 *
427 * Compares two strings transformed with xsltStrxfrm
428 *
429 * Returns a value < 0 if str1 sorts before str2,
430 * a value > 0 if str1 sorts after str2,
431 * 0 if str1 and str2 are equal wrt sorting
432 */
433 int
434 xsltLocaleStrcmp(xsltLocale locale, const xsltLocaleChar *str1, const xsltLocaleChar *str2) {
435 (void)locale;
436 #ifdef XSLT_LOCALE_WINAPI
437 {
438 int ret;
439 if (str1 == str2) return(0);
440 if (str1 == NULL) return(-1);
441 if (str2 == NULL) return(1);
442 ret = CompareStringW(locale, 0, str1, -1, str2, -1);
443 if (ret == 0) {
444 xsltTransformError(NULL, NULL, NULL, "xsltLocaleStrcmp : CompareStringW fail\n");
445 return(0);
446 }
447 return(ret - 2);
448 }
449 #else
450 return(xmlStrcmp(str1, str2));
451 #endif
452 }
453
454 #ifdef XSLT_LOCALE_WINAPI
455 /**
456 * xsltCountSupportedLocales:
457 * @lcid: not used
458 *
459 * callback used to count locales
460 *
461 * Returns TRUE
462 */
463 BOOL CALLBACK
464 xsltCountSupportedLocales(LPSTR lcid) {
465 (void) lcid;
466 ++xsltLocaleListSize;
467 return(TRUE);
468 }
469
470 /**
471 * xsltIterateSupportedLocales:
472 * @lcid: not used
473 *
474 * callback used to track locales
475 *
476 * Returns TRUE if not at the end of the array
477 */
478 BOOL CALLBACK
479 xsltIterateSupportedLocales(LPSTR lcid) {
480 static int count = 0;
481 xmlChar iso639lang [XSLTMAX_ISO639LANGLEN +1];
482 xmlChar iso3136ctry[XSLTMAX_ISO3166CNTRYLEN+1];
483 int k, l;
484 xsltRFC1766Info *p = xsltLocaleList + count;
485
486 k = sscanf(lcid, "%lx", (long*)&p->lcid);
487 if (k < 1) goto end;
488 /*don't count terminating null character*/
489 k = GetLocaleInfoA(p->lcid, LOCALE_SISO639LANGNAME , iso639lang , sizeof(iso639lang ));
490 if (--k < 1) goto end;
491 l = GetLocaleInfoA(p->lcid, LOCALE_SISO3166CTRYNAME, iso3136ctry, sizeof(iso3136ctry));
492 if (--l < 1) goto end;
493
494 { /*fill results*/
495 xmlChar *q = p->tag;
496 memcpy(q, iso639lang, k);
497 q += k;
498 *q++ = '-';
499 memcpy(q, iso3136ctry, l);
500 q += l;
501 *q = '\0';
502 }
503 ++count;
504 end:
505 return((count < xsltLocaleListSize) ? TRUE : FALSE);
506 }
507
508
509 static void
510 xsltEnumSupportedLocales(void) {
511 xmlRMutexLock(xsltLocaleMutex);
512 if (xsltLocaleListSize <= 0) {
513 size_t len;
514
515 EnumSystemLocalesA(xsltCountSupportedLocales, LCID_SUPPORTED);
516
517 len = xsltLocaleListSize * sizeof(xsltRFC1766Info);
518 xsltLocaleList = xmlMalloc(len);
519 memset(xsltLocaleList, 0, len);
520 EnumSystemLocalesA(xsltIterateSupportedLocales, LCID_SUPPORTED);
521 }
522 xmlRMutexUnlock(xsltLocaleMutex);
523 }
524
525 #endif /*def XSLT_LOCALE_WINAPI*/