modified dll/win32/kernel32/misc/lang.c
[reactos.git] / reactos / lib / 3rdparty / icu4ros / icu / source / common / locmap.c
1 /*
2 **********************************************************************
3 * Copyright (C) 1996-2006, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 *
7 * Provides functionality for mapping between
8 * LCID and Posix IDs or ICU locale to codepage
9 *
10 * Note: All classes and code in this file are
11 * intended for internal use only.
12 *
13 * Methods of interest:
14 * unsigned long convertToLCID(const char*);
15 * const char* convertToPosix(unsigned long);
16 *
17 * Kathleen Wilson, 4/30/96
18 *
19 * Date Name Description
20 * 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added
21 * setId() method and safety check against
22 * MAX_ID_LENGTH.
23 * 04/23/99 stephen Added C wrapper for convertToPosix.
24 * 09/18/00 george Removed the memory leaks.
25 * 08/23/01 george Convert to C
26 */
27
28 #include "locmap.h"
29 #include "cstring.h"
30
31 /*
32 * Note:
33 * The mapping from Win32 locale ID numbers to POSIX locale strings should
34 * be the faster one.
35 *
36 * Many LCID values come from winnt.h
37 * Some also come from http://www.microsoft.com/globaldev/reference/lcid-all.mspx
38 */
39
40 /*
41 ////////////////////////////////////////////////
42 //
43 // Internal Classes for LCID <--> POSIX Mapping
44 //
45 /////////////////////////////////////////////////
46 */
47
48 typedef struct ILcidPosixElement
49 {
50 const uint32_t hostID;
51 const char * const posixID;
52 } ILcidPosixElement;
53
54 typedef struct ILcidPosixMap
55 {
56 const uint32_t numRegions;
57 const struct ILcidPosixElement* const regionMaps;
58 } ILcidPosixMap;
59
60
61 /*
62 /////////////////////////////////////////////////
63 //
64 // Easy macros to make the LCID <--> POSIX Mapping
65 //
66 /////////////////////////////////////////////////
67 */
68
69 /*
70 The standard one language/one country mapping for LCID.
71 The first element must be the language, and the following
72 elements are the language with the country.
73 */
74 #define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
75 static const ILcidPosixElement languageID[] = { \
76 {LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \
77 {hostID, #posixID}, \
78 };
79
80 /*
81 Create the map for the posixID. This macro supposes that the language string
82 name is the same as the global variable name, and that the first element
83 in the ILcidPosixElement is just the language.
84 */
85 #define ILCID_POSIX_MAP(_posixID) \
86 {sizeof(_posixID)/sizeof(ILcidPosixElement), _posixID}
87
88 /*
89 ////////////////////////////////////////////
90 //
91 // Create the table of LCID to POSIX Mapping
92 // None of it should be dynamically created.
93 //
94 // Keep static locale variables inside the function so that
95 // it can be created properly during static init.
96 //
97 ////////////////////////////////////////////
98 */
99
100 ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
101
102 static const ILcidPosixElement ar[] = {
103 {0x01, "ar"},
104 {0x3801, "ar_AE"},
105 {0x3c01, "ar_BH"},
106 {0x1401, "ar_DZ"},
107 {0x0c01, "ar_EG"},
108 {0x0801, "ar_IQ"},
109 {0x2c01, "ar_JO"},
110 {0x3401, "ar_KW"},
111 {0x3001, "ar_LB"},
112 {0x1001, "ar_LY"},
113 {0x1801, "ar_MA"},
114 {0x2001, "ar_OM"},
115 {0x4001, "ar_QA"},
116 {0x0401, "ar_SA"},
117 {0x2801, "ar_SY"},
118 {0x1c01, "ar_TN"},
119 {0x2401, "ar_YE"}
120 };
121
122 ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
123 ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
124 ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
125
126 static const ILcidPosixElement az[] = {
127 {0x2c, "az"},
128 {0x082c, "az_Cyrl_AZ"}, /* Cyrillic based */
129 {0x082c, "az_Cyrl"}, /* Cyrillic based */
130 {0x042c, "az_Latn_AZ"}, /* Latin based */
131 {0x042c, "az_Latn"}, /* Latin based */
132 {0x042c, "az_AZ"} /* Latin based */
133 };
134
135 ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
136 ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
137
138 static const ILcidPosixElement ber[] = {
139 {0x5f, "ber"},
140 {0x045f, "ber_Arab_DZ"},
141 {0x045f, "ber_Arab"},
142 {0x085f, "ber_Latn_DZ"},
143 {0x085f, "ber_Latn"}
144 };
145
146 ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
147
148 static const ILcidPosixElement bn[] = {
149 {0x45, "bn"},
150 {0x0845, "bn_BD"},
151 {0x0445, "bn_IN"}
152 };
153
154 static const ILcidPosixElement bo[] = {
155 {0x51, "bo"},
156 {0x0851, "bo_BT"},
157 {0x0451, "bo_CN"}
158 };
159
160 ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
161 ILCID_POSIX_ELEMENT_ARRAY(0x0403, ca, ca_ES)
162 ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
163 ILCID_POSIX_ELEMENT_ARRAY(0x045c, chr,chr_US)
164
165 /* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
166 static const ILcidPosixElement cs_CZ[] = {
167 {0x05, "cs"},
168 {0x0405, "cs_CZ"},
169 };
170
171 ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
172 ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
173
174 static const ILcidPosixElement de[] = {
175 {0x07, "de"},
176 {0x0c07, "de_AT"},
177 {0x0807, "de_CH"},
178 {0x0407, "de_DE"},
179 {0x1407, "de_LI"},
180 {0x1007, "de_LU"},
181 {0x10407,"de_DE@collation=phonebook"}, /*This is really de_DE_PHONEBOOK on Windows*/
182 {0x10407,"de@collation=phonebook"} /*This is really de_DE_PHONEBOOK on Windows*/
183 };
184
185 ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
186 ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
187
188 static const ILcidPosixElement en[] = {
189 {0x09, "en"},
190 {0x0c09, "en_AU"},
191 {0x2809, "en_BZ"},
192 {0x1009, "en_CA"},
193 {0x0809, "en_GB"},
194 {0x1809, "en_IE"},
195 {0x4009, "en_IN"},
196 {0x2009, "en_JM"},
197 {0x4409, "en_MY"},
198 {0x1409, "en_NZ"},
199 {0x3409, "en_PH"},
200 {0x4809, "en_SG"},
201 {0x2C09, "en_TT"},
202 {0x0409, "en_US"},
203 {0x007f, "en_US_POSIX"}, /* duplicate for roundtripping */
204 {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). */
205 {0x1c09, "en_ZA"},
206 {0x3009, "en_ZW"},
207 {0x0409, "en_AS"}, /* Alias for en_US. Leave last. */
208 {0x0409, "en_GU"}, /* Alias for en_US. Leave last. */
209 {0x0409, "en_MH"}, /* Alias for en_US. Leave last. */
210 {0x0409, "en_MP"}, /* Alias for en_US. Leave last. */
211 {0x0409, "en_UM"} /* Alias for en_US. Leave last. */
212 };
213
214 static const ILcidPosixElement en_US_POSIX[] = {
215 {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
216 };
217
218 static const ILcidPosixElement es[] = {
219 {0x0a, "es"},
220 {0x2c0a, "es_AR"},
221 {0x400a, "es_BO"},
222 {0x340a, "es_CL"},
223 {0x240a, "es_CO"},
224 {0x140a, "es_CR"},
225 {0x1c0a, "es_DO"},
226 {0x300a, "es_EC"},
227 {0x0c0a, "es_ES"}, /*Modern sort.*/
228 {0x100a, "es_GT"},
229 {0x480a, "es_HN"},
230 {0x080a, "es_MX"},
231 {0x4c0a, "es_NI"},
232 {0x180a, "es_PA"},
233 {0x280a, "es_PE"},
234 {0x500a, "es_PR"},
235 {0x3c0a, "es_PY"},
236 {0x440a, "es_SV"},
237 {0x540a, "es_US"},
238 {0x380a, "es_UY"},
239 {0x200a, "es_VE"},
240 {0x040a, "es_ES@collation=traditional"},
241 {0x040a, "es@collation=traditional"}
242 };
243
244 ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
245 ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
246
247 /* ISO-639 doesn't distinguish between Persian and Dari.*/
248 static const ILcidPosixElement fa[] = {
249 {0x29, "fa"},
250 {0x0429, "fa_IR"}, /* Persian/Farsi (Iran) */
251 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
252 };
253
254 /* duplicate for roundtripping */
255 static const ILcidPosixElement fa_AF[] = {
256 {0x8c, "fa_AF"}, /* Persian/Dari (Afghanistan) */
257 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
258 };
259
260 ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
261 ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
262 ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
263
264 static const ILcidPosixElement fr[] = {
265 {0x0c, "fr"},
266 {0x080c, "fr_BE"},
267 {0x0c0c, "fr_CA"},
268 {0x240c, "fr_CD"},
269 {0x100c, "fr_CH"},
270 {0x300c, "fr_CI"},
271 {0x2c0c, "fr_CM"},
272 {0x040c, "fr_FR"},
273 {0x3c0c, "fr_HT"},
274 {0x140c, "fr_LU"},
275 {0x380c, "fr_MA"},
276 {0x180c, "fr_MC"},
277 {0x340c, "fr_ML"},
278 {0x200c, "fr_RE"},
279 {0x280c, "fr_SN"}
280 };
281
282 ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
283
284 /* This LCID is really two different locales.*/
285 static const ILcidPosixElement ga[] = {
286 {0x3c, "ga"},
287 {0x3c, "gd"},
288 {0x083c, "ga_IE"}, /* Gaelic (Ireland) */
289 {0x043c, "gd_GB"} /* Gaelic (Scotland) */
290 };
291
292 ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
293 ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
294 ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
295 ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
296 ILCID_POSIX_ELEMENT_ARRAY(0x0468, ha, ha_NG) /* ha_Latn_NG? */
297 ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
298 ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
299 ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
300
301 /* This LCID is really four different locales.*/
302 static const ILcidPosixElement hr[] = {
303 {0x1a, "hr"},
304 {0x141a, "bs_Latn_BA"}, /* Bosnian, Bosnia and Herzegovina */
305 {0x141a, "bs_Latn"}, /* Bosnian, Bosnia and Herzegovina */
306 {0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */
307 {0x141a, "bs"}, /* Bosnian */
308 {0x201a, "bs_Cyrl_BA"}, /* Bosnian, Bosnia and Herzegovina */
309 {0x201a, "bs_Cyrl"}, /* Bosnian, Bosnia and Herzegovina */
310 {0x101a, "hr_BA"}, /* Croatian in Bosnia */
311 {0x041a, "hr_HR"}, /* Croatian*/
312 {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
313 {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
314 {0x081a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */
315 {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
316 {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
317 {0x0c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
318 {0x0c1a, "sr"} /* In CLDR sr is sr_Cyrl. */
319 };
320
321 ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
322 ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
323 ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
324 ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
325 ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
326 ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
327
328 static const ILcidPosixElement it[] = {
329 {0x10, "it"},
330 {0x0810, "it_CH"},
331 {0x0410, "it_IT"}
332 };
333
334 static const ILcidPosixElement iu[] = {
335 {0x5d, "iu"},
336 {0x045d, "iu_Cans_CA"},
337 {0x045d, "iu_Cans"},
338 {0x085d, "iu_Latn_CA"},
339 {0x085d, "iu_Latn"}
340 };
341
342 ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL) /*Left in for compatibility*/
343 ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
344 ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
345 ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
346 ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
347 ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
348 ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
349
350 static const ILcidPosixElement ko[] = {
351 {0x12, "ko"},
352 {0x0812, "ko_KP"},
353 {0x0412, "ko_KR"}
354 };
355
356 ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
357 ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr, kr_NG)
358
359 static const ILcidPosixElement ks[] = { /* We could add PK and CN too */
360 {0x60, "ks"},
361 {0x0860, "ks_IN"}, /* Documentation doesn't mention script */
362 {0x0460, "ks_Arab_IN"}
363 };
364
365 ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */
366 ILCID_POSIX_ELEMENT_ARRAY(0x0476, la, la_IT) /* TODO: Verify the country */
367 ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
368 ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
369 ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
370 ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
371 ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
372 ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
373 ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
374
375 static const ILcidPosixElement mn[] = {
376 {0x50, "mn"},
377 {0x0850, "mn_CN"},
378 {0x0450, "mn_MN"}
379 };
380
381 ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
382 ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
383 ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
384
385 static const ILcidPosixElement ms[] = {
386 {0x3e, "ms"},
387 {0x083e, "ms_BN"}, /* Brunei Darussalam*/
388 {0x043e, "ms_MY"} /* Malaysia*/
389 };
390
391 ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
392 ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
393
394 static const ILcidPosixElement ne[] = {
395 {0x61, "ne"},
396 {0x0861, "ne_IN"}, /* India*/
397 {0x0461, "ne_NP"} /* Nepal*/
398 };
399
400 static const ILcidPosixElement nl[] = {
401 {0x13, "nl"},
402 {0x0813, "nl_BE"},
403 {0x0413, "nl_NL"}
404 };
405
406 /* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/
407 static const ILcidPosixElement no[] = {
408 {0x14, "nb"}, /* really nb */
409 {0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */
410 {0x0414, "no"}, /* really nb_NO */
411 {0x0414, "no_NO"}, /* really nb_NO */
412 {0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */
413 {0x0814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */
414 {0x0814, "no_NO_NY"}/* really nn_NO */
415 };
416
417 ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA) /* TODO: Verify the ISO-639 code */
418 ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
419 ILCID_POSIX_ELEMENT_ARRAY(0x0472, om, om_ET) /* TODO: Verify the country */
420
421 /* Declared as or_IN to get around compiler errors*/
422 static const ILcidPosixElement or_IN[] = {
423 {0x48, "or"},
424 {0x0448, "or_IN"},
425 };
426
427 static const ILcidPosixElement pa[] = {
428 {0x46, "pa"},
429 {0x0446, "pa_IN"},
430 {0x0846, "pa_PK"}
431 };
432
433 ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
434 ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
435
436 static const ILcidPosixElement pt[] = {
437 {0x16, "pt"},
438 {0x0416, "pt_BR"},
439 {0x0816, "pt_PT"}
440 };
441
442 static const ILcidPosixElement qu[] = {
443 {0x6b, "qu"},
444 {0x046b, "qu_BO"},
445 {0x086b, "qu_EC"},
446 {0x0C6b, "qu_PE"}
447 };
448
449 ILCID_POSIX_ELEMENT_ARRAY(0x0486, qut, qut_GT) /* qut is an ISO-639-3 code */
450 ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
451 ILCID_POSIX_ELEMENT_ARRAY(0x0418, ro, ro_RO)
452
453 static const ILcidPosixElement root[] = {
454 {0x00, "root"}
455 };
456
457 ILCID_POSIX_ELEMENT_ARRAY(0x0419, ru, ru_RU)
458 ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
459 ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
460 ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
461
462 static const ILcidPosixElement sd[] = {
463 {0x59, "sd"},
464 {0x0459, "sd_IN"},
465 {0x0859, "sd_PK"}
466 };
467
468 static const ILcidPosixElement se[] = {
469 {0x3b, "se"},
470 {0x0c3b, "se_FI"},
471 {0x043b, "se_NO"},
472 {0x083b, "se_SE"},
473 {0x183b, "sma_NO"},
474 {0x1c3b, "sma_SE"},
475 {0x103b, "smj_NO"},
476 {0x143b, "smj_SE"},
477 {0x243b, "smn_FI"},
478 {0x203b, "sms_FI"},
479 };
480
481 ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
482 ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
483 ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
484 ILCID_POSIX_ELEMENT_ARRAY(0x0477, so, so_ET) /* TODO: Verify the country */
485 ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
486
487 static const ILcidPosixElement sv[] = {
488 {0x1d, "sv"},
489 {0x081d, "sv_FI"},
490 {0x041d, "sv_SE"}
491 };
492
493 ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
494 ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
495 ILCID_POSIX_ELEMENT_ARRAY(0x0449, ta, ta_IN)
496 ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
497 ILCID_POSIX_ELEMENT_ARRAY(0x0428, tg, tg_TJ) /* Cyrillic based by default */
498 ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
499
500 static const ILcidPosixElement ti[] = {
501 {0x73, "ti"},
502 {0x0873, "ti_ER"},
503 {0x0473, "ti_ET"}
504 };
505
506 ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
507 ILCID_POSIX_ELEMENT_ARRAY(0x0432, tn, tn_BW)
508 ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
509 ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
510 ILCID_POSIX_ELEMENT_ARRAY(0x0480, ug, ug_CN)
511 ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
512
513 static const ILcidPosixElement ur[] = {
514 {0x20, "ur"},
515 {0x0820, "ur_IN"},
516 {0x0420, "ur_PK"}
517 };
518
519 static const ILcidPosixElement uz[] = {
520 {0x43, "uz"},
521 {0x0843, "uz_Cyrl_UZ"}, /* Cyrillic based */
522 {0x0843, "uz_Cyrl"}, /* Cyrillic based */
523 {0x0843, "uz_UZ"}, /* Cyrillic based */
524 {0x0443, "uz_Latn_UZ"}, /* Latin based */
525 {0x0443, "uz_Latn"} /* Latin based */
526 };
527
528 ILCID_POSIX_ELEMENT_ARRAY(0x0433, ve, ve_ZA) /* TODO: Verify the country */
529 ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
530
531 static const ILcidPosixElement wen[] = {
532 {0x2E, "wen"},
533 {0x042E, "wen_DE"},
534 {0x042E, "hsb_DE"},
535 {0x082E, "dsb_DE"}
536 };
537
538 ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
539 ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
540 ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
541
542 static const ILcidPosixElement zh[] = {
543 {0x04, "zh"},
544 {0x0804, "zh_Hans_CN"},
545 {0x0804, "zh_Hans"},
546 {0x0804, "zh_CN"},
547 {0x0c04, "zh_Hant_HK"},
548 {0x0c04, "zh_HK"},
549 {0x1404, "zh_Hant_MO"},
550 {0x1404, "zh_MO"},
551 {0x1004, "zh_Hans_SG"},
552 {0x1004, "zh_SG"},
553 {0x0404, "zh_Hant_TW"},
554 {0x0404, "zh_Hant"},
555 {0x0404, "zh_TW"},
556 {0x30404,"zh_Hant_TW"}, /* Bopomofo order */
557 {0x30404,"zh_TW"}, /* Bopomofo order */
558 {0x20404,"zh_Hant_TW@collation=stroke"},
559 {0x20404,"zh_TW@collation=stroke"},
560 {0x20804,"zh_Hans_CN@collation=stroke"},
561 {0x20804,"zh_CN@collation=stroke"}
562 };
563
564 ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
565
566 /* This must be static and grouped by LCID. */
567
568 /* non-existent ISO-639-2 codes */
569 /*
570 0x466 Edo
571 0x467 Fulfulde - Nigeria
572 0x486 K'iche - Guatemala
573 0x430 Sutu
574 */
575 static const ILcidPosixMap gPosixIDmap[] = {
576 ILCID_POSIX_MAP(af), /* af Afrikaans 0x36 */
577 ILCID_POSIX_MAP(am), /* am Amharic 0x5e */
578 ILCID_POSIX_MAP(ar), /* ar Arabic 0x01 */
579 ILCID_POSIX_MAP(arn), /* arn Araucanian/Mapudungun 0x7a */
580 ILCID_POSIX_MAP(as), /* as Assamese 0x4d */
581 ILCID_POSIX_MAP(az), /* az Azerbaijani 0x2c */
582 ILCID_POSIX_MAP(ba), /* ba Bashkir 0x6d */
583 ILCID_POSIX_MAP(be), /* be Belarusian 0x23 */
584 ILCID_POSIX_MAP(ber), /* ber Berber/Tamazight 0x5f */
585 ILCID_POSIX_MAP(bg), /* bg Bulgarian 0x02 */
586 ILCID_POSIX_MAP(bn), /* bn Bengali; Bangla 0x45 */
587 ILCID_POSIX_MAP(bo), /* bo Tibetan 0x51 */
588 ILCID_POSIX_MAP(br), /* br Breton 0x7e */
589 ILCID_POSIX_MAP(ca), /* ca Catalan 0x03 */
590 ILCID_POSIX_MAP(chr), /* chr Cherokee 0x5c */
591 ILCID_POSIX_MAP(co), /* co Corsican 0x83 */
592 ILCID_POSIX_MAP(cs_CZ), /* cs Czech 0x05 */
593 ILCID_POSIX_MAP(cy), /* cy Welsh 0x52 */
594 ILCID_POSIX_MAP(da), /* da Danish 0x06 */
595 ILCID_POSIX_MAP(de), /* de German 0x07 */
596 ILCID_POSIX_MAP(dv), /* dv Divehi 0x65 */
597 ILCID_POSIX_MAP(el), /* el Greek 0x08 */
598 ILCID_POSIX_MAP(en), /* en English 0x09 */
599 ILCID_POSIX_MAP(en_US_POSIX), /* invariant 0x7f */
600 ILCID_POSIX_MAP(es), /* es Spanish 0x0a */
601 ILCID_POSIX_MAP(et), /* et Estonian 0x25 */
602 ILCID_POSIX_MAP(eu), /* eu Basque 0x2d */
603 ILCID_POSIX_MAP(fa), /* fa Persian/Farsi 0x29 */
604 ILCID_POSIX_MAP(fa_AF), /* fa Persian/Dari 0x8c */
605 ILCID_POSIX_MAP(fi), /* fi Finnish 0x0b */
606 ILCID_POSIX_MAP(fil), /* fil Filipino 0x64 */
607 ILCID_POSIX_MAP(fo), /* fo Faroese 0x38 */
608 ILCID_POSIX_MAP(fr), /* fr French 0x0c */
609 ILCID_POSIX_MAP(fy), /* fy Frisian 0x62 */
610 ILCID_POSIX_MAP(ga), /* * Gaelic (Ireland,Scotland) 0x3c */
611 ILCID_POSIX_MAP(gl), /* gl Galician 0x56 */
612 ILCID_POSIX_MAP(gn), /* gn Guarani 0x74 */
613 ILCID_POSIX_MAP(gsw), /* gsw Alemanic/Alsatian/Swiss German 0x84 */
614 ILCID_POSIX_MAP(gu), /* gu Gujarati 0x47 */
615 ILCID_POSIX_MAP(ha), /* ha Hausa 0x68 */
616 ILCID_POSIX_MAP(haw), /* haw Hawaiian 0x75 */
617 ILCID_POSIX_MAP(he), /* he Hebrew (formerly iw) 0x0d */
618 ILCID_POSIX_MAP(hi), /* hi Hindi 0x39 */
619 ILCID_POSIX_MAP(hr), /* * Croatian and others 0x1a */
620 ILCID_POSIX_MAP(hu), /* hu Hungarian 0x0e */
621 ILCID_POSIX_MAP(hy), /* hy Armenian 0x2b */
622 ILCID_POSIX_MAP(id), /* id Indonesian (formerly in) 0x21 */
623 ILCID_POSIX_MAP(ig), /* ig Igbo 0x70 */
624 ILCID_POSIX_MAP(ii), /* ii Sichuan Yi 0x78 */
625 ILCID_POSIX_MAP(is), /* is Icelandic 0x0f */
626 ILCID_POSIX_MAP(it), /* it Italian 0x10 */
627 ILCID_POSIX_MAP(iu), /* iu Inuktitut 0x5d */
628 ILCID_POSIX_MAP(iw), /* iw Hebrew 0x0d */
629 ILCID_POSIX_MAP(ja), /* ja Japanese 0x11 */
630 ILCID_POSIX_MAP(ka), /* ka Georgian 0x37 */
631 ILCID_POSIX_MAP(kk), /* kk Kazakh 0x3f */
632 ILCID_POSIX_MAP(kl), /* kl Kalaallisut 0x6f */
633 ILCID_POSIX_MAP(km), /* km Khmer 0x53 */
634 ILCID_POSIX_MAP(kn), /* kn Kannada 0x4b */
635 ILCID_POSIX_MAP(ko), /* ko Korean 0x12 */
636 ILCID_POSIX_MAP(kok), /* kok Konkani 0x57 */
637 ILCID_POSIX_MAP(kr), /* kr Kanuri 0x71 */
638 ILCID_POSIX_MAP(ks), /* ks Kashmiri 0x60 */
639 ILCID_POSIX_MAP(ky), /* ky Kyrgyz 0x40 */
640 ILCID_POSIX_MAP(lb), /* lb Luxembourgish 0x6e */
641 ILCID_POSIX_MAP(la), /* la Latin 0x76 */
642 ILCID_POSIX_MAP(lo), /* lo Lao 0x54 */
643 ILCID_POSIX_MAP(lt), /* lt Lithuanian 0x27 */
644 ILCID_POSIX_MAP(lv), /* lv Latvian, Lettish 0x26 */
645 ILCID_POSIX_MAP(mi), /* mi Maori 0x81 */
646 ILCID_POSIX_MAP(mk), /* mk Macedonian 0x2f */
647 ILCID_POSIX_MAP(ml), /* ml Malayalam 0x4c */
648 ILCID_POSIX_MAP(mn), /* mn Mongolian 0x50 */
649 ILCID_POSIX_MAP(mni), /* mni Manipuri 0x58 */
650 ILCID_POSIX_MAP(moh), /* moh Mohawk 0x7c */
651 ILCID_POSIX_MAP(mr), /* mr Marathi 0x4e */
652 ILCID_POSIX_MAP(ms), /* ms Malay 0x3e */
653 ILCID_POSIX_MAP(mt), /* mt Maltese 0x3a */
654 ILCID_POSIX_MAP(my), /* my Burmese 0x55 */
655 /* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */
656 ILCID_POSIX_MAP(ne), /* ne Nepali 0x61 */
657 ILCID_POSIX_MAP(nl), /* nl Dutch 0x13 */
658 /* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */
659 ILCID_POSIX_MAP(no), /* * Norwegian 0x14 */
660 ILCID_POSIX_MAP(nso), /* nso Sotho, Northern (Sepedi dialect) 0x6c */
661 ILCID_POSIX_MAP(oc), /* oc Occitan 0x82 */
662 ILCID_POSIX_MAP(om), /* om Oromo 0x72 */
663 ILCID_POSIX_MAP(or_IN), /* or Oriya 0x48 */
664 ILCID_POSIX_MAP(pa), /* pa Punjabi 0x46 */
665 ILCID_POSIX_MAP(pl), /* pl Polish 0x15 */
666 ILCID_POSIX_MAP(ps), /* ps Pashto 0x63 */
667 ILCID_POSIX_MAP(pt), /* pt Portuguese 0x16 */
668 ILCID_POSIX_MAP(qu), /* qu Quechua 0x6B */
669 ILCID_POSIX_MAP(qut), /* qut K'iche 0x86 */
670 ILCID_POSIX_MAP(rm), /* rm Raeto-Romance/Romansh 0x17 */
671 ILCID_POSIX_MAP(ro), /* ro Romanian 0x18 */
672 ILCID_POSIX_MAP(root), /* root 0x00 */
673 ILCID_POSIX_MAP(ru), /* ru Russian 0x19 */
674 ILCID_POSIX_MAP(rw), /* rw Kinyarwanda 0x87 */
675 ILCID_POSIX_MAP(sa), /* sa Sanskrit 0x4f */
676 ILCID_POSIX_MAP(sah), /* sah Yakut 0x85 */
677 ILCID_POSIX_MAP(sd), /* sd Sindhi 0x59 */
678 ILCID_POSIX_MAP(se), /* se Sami 0x3b */
679 /* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */
680 ILCID_POSIX_MAP(si), /* si Sinhalese 0x5b */
681 ILCID_POSIX_MAP(sk), /* sk Slovak 0x1b */
682 ILCID_POSIX_MAP(sl), /* sl Slovenian 0x24 */
683 ILCID_POSIX_MAP(so), /* so Somali 0x77 */
684 ILCID_POSIX_MAP(sq), /* sq Albanian 0x1c */
685 /* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */
686 ILCID_POSIX_MAP(sv), /* sv Swedish 0x1d */
687 ILCID_POSIX_MAP(sw), /* sw Swahili 0x41 */
688 ILCID_POSIX_MAP(syr), /* syr Syriac 0x5A */
689 ILCID_POSIX_MAP(ta), /* ta Tamil 0x49 */
690 ILCID_POSIX_MAP(te), /* te Telugu 0x4a */
691 ILCID_POSIX_MAP(tg), /* tg Tajik 0x28 */
692 ILCID_POSIX_MAP(th), /* th Thai 0x1e */
693 ILCID_POSIX_MAP(ti), /* ti Tigrigna 0x73 */
694 ILCID_POSIX_MAP(tk), /* tk Turkmen 0x42 */
695 ILCID_POSIX_MAP(tn), /* tn Tswana 0x32 */
696 ILCID_POSIX_MAP(tr), /* tr Turkish 0x1f */
697 ILCID_POSIX_MAP(tt), /* tt Tatar 0x44 */
698 ILCID_POSIX_MAP(ug), /* ug Uighur 0x80 */
699 ILCID_POSIX_MAP(uk), /* uk Ukrainian 0x22 */
700 ILCID_POSIX_MAP(ur), /* ur Urdu 0x20 */
701 ILCID_POSIX_MAP(uz), /* uz Uzbek 0x43 */
702 ILCID_POSIX_MAP(ve), /* ve Venda 0x33 */
703 ILCID_POSIX_MAP(vi), /* vi Vietnamese 0x2a */
704 ILCID_POSIX_MAP(wen), /* wen Sorbian 0x2e */
705 ILCID_POSIX_MAP(wo), /* wo Wolof 0x88 */
706 ILCID_POSIX_MAP(xh), /* xh Xhosa 0x34 */
707 ILCID_POSIX_MAP(yo), /* yo Yoruba 0x6a */
708 ILCID_POSIX_MAP(zh), /* zh Chinese 0x04 */
709 ILCID_POSIX_MAP(zu), /* zu Zulu 0x35 */
710 };
711
712 static const uint32_t gLocaleCount = sizeof(gPosixIDmap)/sizeof(ILcidPosixMap);
713
714 /**
715 * Do not call this function. It is called by hostID.
716 * The function is not private because this struct must stay as a C struct,
717 * and this is an internal class.
718 */
719 static int32_t
720 idCmp(const char* id1, const char* id2)
721 {
722 int32_t diffIdx = 0;
723 while (*id1 == *id2 && *id1 != 0) {
724 diffIdx++;
725 id1++;
726 id2++;
727 }
728 return diffIdx;
729 }
730
731 /**
732 * Searches for a Windows LCID
733 *
734 * @param posixid the Posix style locale id.
735 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
736 * no equivalent Windows LCID.
737 * @return the LCID
738 */
739 static uint32_t
740 getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
741 {
742 int32_t bestIdx = 0;
743 int32_t bestIdxDiff = 0;
744 int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
745 uint32_t idx;
746
747 for (idx = 0; idx < this_0->numRegions; idx++ ) {
748 int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
749 if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
750 if (posixIDlen == sameChars) {
751 /* Exact match */
752 return this_0->regionMaps[idx].hostID;
753 }
754 bestIdxDiff = sameChars;
755 bestIdx = idx;
756 }
757 }
758 /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
759 /* We also have to make sure that sid and si and similar string subsets don't match. */
760 if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
761 && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
762 {
763 *status = U_USING_FALLBACK_WARNING;
764 return this_0->regionMaps[bestIdx].hostID;
765 }
766
767 /*no match found */
768 *status = U_ILLEGAL_ARGUMENT_ERROR;
769 return this_0->regionMaps->hostID;
770 }
771
772 static const char*
773 getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
774 {
775 uint32_t i;
776 for (i = 0; i <= this_0->numRegions; i++)
777 {
778 if (this_0->regionMaps[i].hostID == hostID)
779 {
780 return this_0->regionMaps[i].posixID;
781 }
782 }
783
784 /* If you get here, then no matching region was found,
785 so return the language id with the wild card region. */
786 return this_0->regionMaps[0].posixID;
787 }
788
789 /*
790 //////////////////////////////////////
791 //
792 // LCID --> POSIX
793 //
794 /////////////////////////////////////
795 */
796
797 U_CAPI const char *
798 uprv_convertToPosix(uint32_t hostid, UErrorCode* status)
799 {
800 uint16_t langID = LANGUAGE_LCID(hostid);
801 uint32_t index;
802
803 for (index = 0; index < gLocaleCount; index++)
804 {
805 if (langID == gPosixIDmap[index].regionMaps->hostID)
806 {
807 return getPosixID(&gPosixIDmap[index], hostid);
808 }
809 }
810
811 /* no match found */
812 *status = U_ILLEGAL_ARGUMENT_ERROR;
813 return NULL;
814 }
815
816 /*
817 //////////////////////////////////////
818 //
819 // POSIX --> LCID
820 // This should only be called from uloc_getLCID.
821 // The locale ID must be in canonical form.
822 // langID is separate so that this file doesn't depend on the uloc_* API.
823 //
824 /////////////////////////////////////
825 */
826
827 U_CAPI uint32_t
828 uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
829 {
830
831 uint32_t low = 0;
832 uint32_t high = gLocaleCount;
833 uint32_t mid = high;
834 uint32_t oldmid = 0;
835 int32_t compVal;
836
837 uint32_t value = 0;
838 uint32_t fallbackValue = (uint32_t)-1;
839 UErrorCode myStatus;
840 uint32_t idx;
841
842 /* Check for incomplete id. */
843 if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
844 return 0;
845 }
846
847 /*Binary search for the map entry for normal cases */
848
849 while (high > low) /*binary search*/{
850
851 mid = (high+low) >> 1; /*Finds median*/
852
853 if (mid == oldmid)
854 break;
855
856 compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
857 if (compVal < 0){
858 high = mid;
859 }
860 else if (compVal > 0){
861 low = mid;
862 }
863 else /*we found it*/{
864 return getHostID(&gPosixIDmap[mid], posixID, status);
865 }
866 oldmid = mid;
867 }
868
869 /*
870 * Sometimes we can't do a binary search on posixID because some LCIDs
871 * go to different locales. We hit one of those special cases.
872 */
873 for (idx = 0; idx < gLocaleCount; idx++ ) {
874 myStatus = U_ZERO_ERROR;
875 value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
876 if (myStatus == U_ZERO_ERROR) {
877 return value;
878 }
879 else if (myStatus == U_USING_FALLBACK_WARNING) {
880 fallbackValue = value;
881 }
882 }
883
884 if (fallbackValue != (uint32_t)-1) {
885 *status = U_USING_FALLBACK_WARNING;
886 return fallbackValue;
887 }
888
889 /* no match found */
890 *status = U_ILLEGAL_ARGUMENT_ERROR;
891 return 0; /* return international (root) */
892 }
893