2 **********************************************************************
3 * Copyright (C) 1996-2006, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
7 * Provides functionality for mapping between
8 * LCID and Posix IDs or ICU locale to codepage
10 * Note: All classes and code in this file are
11 * intended for internal use only.
13 * Methods of interest:
14 * unsigned long convertToLCID(const char*);
15 * const char* convertToPosix(unsigned long);
17 * Kathleen Wilson, 4/30/96
19 * Date Name Description
20 * 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added
21 * setId() method and safety check against
23 * 04/23/99 stephen Added C wrapper for convertToPosix.
24 * 09/18/00 george Removed the memory leaks.
25 * 08/23/01 george Convert to C
33 * The mapping from Win32 locale ID numbers to POSIX locale strings should
36 * Many LCID values come from winnt.h
37 * Some also come from http://www.microsoft.com/globaldev/reference/lcid-all.mspx
41 ////////////////////////////////////////////////
43 // Internal Classes for LCID <--> POSIX Mapping
45 /////////////////////////////////////////////////
48 typedef struct ILcidPosixElement
50 const uint32_t hostID
;
51 const char * const posixID
;
54 typedef struct ILcidPosixMap
56 const uint32_t numRegions
;
57 const struct ILcidPosixElement
* const regionMaps
;
62 /////////////////////////////////////////////////
64 // Easy macros to make the LCID <--> POSIX Mapping
66 /////////////////////////////////////////////////
70 The standard one language/one country mapping for LCID.
71 The first element must be the language, and the following
72 elements are the language with the country.
74 #define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
75 static const ILcidPosixElement languageID[] = { \
76 {LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \
81 Create the map for the posixID. This macro supposes that the language string
82 name is the same as the global variable name, and that the first element
83 in the ILcidPosixElement is just the language.
85 #define ILCID_POSIX_MAP(_posixID) \
86 {sizeof(_posixID)/sizeof(ILcidPosixElement), _posixID}
89 ////////////////////////////////////////////
91 // Create the table of LCID to POSIX Mapping
92 // None of it should be dynamically created.
94 // Keep static locale variables inside the function so that
95 // it can be created properly during static init.
97 ////////////////////////////////////////////
100 ILCID_POSIX_ELEMENT_ARRAY(0x0436, af
, af_ZA
)
102 static const ILcidPosixElement ar
[] = {
122 ILCID_POSIX_ELEMENT_ARRAY(0x044d, as
, as_IN
)
123 ILCID_POSIX_ELEMENT_ARRAY(0x045e, am
, am_ET
)
124 ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn
,arn_CL
)
126 static const ILcidPosixElement az
[] = {
128 {0x082c, "az_Cyrl_AZ"}, /* Cyrillic based */
129 {0x082c, "az_Cyrl"}, /* Cyrillic based */
130 {0x042c, "az_Latn_AZ"}, /* Latin based */
131 {0x042c, "az_Latn"}, /* Latin based */
132 {0x042c, "az_AZ"} /* Latin based */
135 ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba
, ba_RU
)
136 ILCID_POSIX_ELEMENT_ARRAY(0x0423, be
, be_BY
)
138 static const ILcidPosixElement ber
[] = {
140 {0x045f, "ber_Arab_DZ"},
141 {0x045f, "ber_Arab"},
142 {0x085f, "ber_Latn_DZ"},
146 ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg
, bg_BG
)
148 static const ILcidPosixElement bn
[] = {
154 static const ILcidPosixElement bo
[] = {
160 ILCID_POSIX_ELEMENT_ARRAY(0x047e, br
, br_FR
)
161 ILCID_POSIX_ELEMENT_ARRAY(0x0403, ca
, ca_ES
)
162 ILCID_POSIX_ELEMENT_ARRAY(0x0483, co
, co_FR
)
163 ILCID_POSIX_ELEMENT_ARRAY(0x045c, chr
,chr_US
)
165 /* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
166 static const ILcidPosixElement cs_CZ
[] = {
171 ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy
, cy_GB
)
172 ILCID_POSIX_ELEMENT_ARRAY(0x0406, da
, da_DK
)
174 static const ILcidPosixElement de
[] = {
181 {0x10407,"de_DE@collation=phonebook"}, /*This is really de_DE_PHONEBOOK on Windows*/
182 {0x10407,"de@collation=phonebook"} /*This is really de_DE_PHONEBOOK on Windows*/
185 ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv
, dv_MV
)
186 ILCID_POSIX_ELEMENT_ARRAY(0x0408, el
, el_GR
)
188 static const ILcidPosixElement en
[] = {
203 {0x007f, "en_US_POSIX"}, /* duplicate for roundtripping */
204 {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). */
207 {0x0409, "en_AS"}, /* Alias for en_US. Leave last. */
208 {0x0409, "en_GU"}, /* Alias for en_US. Leave last. */
209 {0x0409, "en_MH"}, /* Alias for en_US. Leave last. */
210 {0x0409, "en_MP"}, /* Alias for en_US. Leave last. */
211 {0x0409, "en_UM"} /* Alias for en_US. Leave last. */
214 static const ILcidPosixElement en_US_POSIX
[] = {
215 {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
218 static const ILcidPosixElement es
[] = {
227 {0x0c0a, "es_ES"}, /*Modern sort.*/
240 {0x040a, "es_ES@collation=traditional"},
241 {0x040a, "es@collation=traditional"}
244 ILCID_POSIX_ELEMENT_ARRAY(0x0425, et
, et_EE
)
245 ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu
, eu_ES
)
247 /* ISO-639 doesn't distinguish between Persian and Dari.*/
248 static const ILcidPosixElement fa
[] = {
250 {0x0429, "fa_IR"}, /* Persian/Farsi (Iran) */
251 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
254 /* duplicate for roundtripping */
255 static const ILcidPosixElement fa_AF
[] = {
256 {0x8c, "fa_AF"}, /* Persian/Dari (Afghanistan) */
257 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
260 ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi
, fi_FI
)
261 ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil
,fil_PH
)
262 ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo
, fo_FO
)
264 static const ILcidPosixElement fr
[] = {
282 ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy
, fy_NL
)
284 /* This LCID is really two different locales.*/
285 static const ILcidPosixElement ga
[] = {
288 {0x083c, "ga_IE"}, /* Gaelic (Ireland) */
289 {0x043c, "gd_GB"} /* Gaelic (Scotland) */
292 ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl
, gl_ES
)
293 ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu
, gu_IN
)
294 ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn
, gn_PY
)
295 ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw
,gsw_FR
)
296 ILCID_POSIX_ELEMENT_ARRAY(0x0468, ha
, ha_NG
) /* ha_Latn_NG? */
297 ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw
,haw_US
)
298 ILCID_POSIX_ELEMENT_ARRAY(0x040d, he
, he_IL
)
299 ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi
, hi_IN
)
301 /* This LCID is really four different locales.*/
302 static const ILcidPosixElement hr
[] = {
304 {0x141a, "bs_Latn_BA"}, /* Bosnian, Bosnia and Herzegovina */
305 {0x141a, "bs_Latn"}, /* Bosnian, Bosnia and Herzegovina */
306 {0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */
307 {0x141a, "bs"}, /* Bosnian */
308 {0x201a, "bs_Cyrl_BA"}, /* Bosnian, Bosnia and Herzegovina */
309 {0x201a, "bs_Cyrl"}, /* Bosnian, Bosnia and Herzegovina */
310 {0x101a, "hr_BA"}, /* Croatian in Bosnia */
311 {0x041a, "hr_HR"}, /* Croatian*/
312 {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
313 {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
314 {0x081a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */
315 {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
316 {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
317 {0x0c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
318 {0x0c1a, "sr"} /* In CLDR sr is sr_Cyrl. */
321 ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu
, hu_HU
)
322 ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy
, hy_AM
)
323 ILCID_POSIX_ELEMENT_ARRAY(0x0421, id
, id_ID
)
324 ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig
, ig_NG
)
325 ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii
, ii_CN
)
326 ILCID_POSIX_ELEMENT_ARRAY(0x040f, is
, is_IS
)
328 static const ILcidPosixElement it
[] = {
334 static const ILcidPosixElement iu
[] = {
336 {0x045d, "iu_Cans_CA"},
338 {0x085d, "iu_Latn_CA"},
342 ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw
, iw_IL
) /*Left in for compatibility*/
343 ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja
, ja_JP
)
344 ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka
, ka_GE
)
345 ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk
, kk_KZ
)
346 ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl
, kl_GL
)
347 ILCID_POSIX_ELEMENT_ARRAY(0x0453, km
, km_KH
)
348 ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn
, kn_IN
)
350 static const ILcidPosixElement ko
[] = {
356 ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok
, kok_IN
)
357 ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr
, kr_NG
)
359 static const ILcidPosixElement ks
[] = { /* We could add PK and CN too */
361 {0x0860, "ks_IN"}, /* Documentation doesn't mention script */
362 {0x0460, "ks_Arab_IN"}
365 ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky
, ky_KG
) /* Kyrgyz is spoken in Kyrgyzstan */
366 ILCID_POSIX_ELEMENT_ARRAY(0x0476, la
, la_IT
) /* TODO: Verify the country */
367 ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb
, lb_LU
)
368 ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo
, lo_LA
)
369 ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt
, lt_LT
)
370 ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv
, lv_LV
)
371 ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi
, mi_NZ
)
372 ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk
, mk_MK
)
373 ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml
, ml_IN
)
375 static const ILcidPosixElement mn
[] = {
381 ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni
,mni_IN
)
382 ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh
,moh_CA
)
383 ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr
, mr_IN
)
385 static const ILcidPosixElement ms
[] = {
387 {0x083e, "ms_BN"}, /* Brunei Darussalam*/
388 {0x043e, "ms_MY"} /* Malaysia*/
391 ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt
, mt_MT
)
392 ILCID_POSIX_ELEMENT_ARRAY(0x0455, my
, my_MM
)
394 static const ILcidPosixElement ne
[] = {
396 {0x0861, "ne_IN"}, /* India*/
397 {0x0461, "ne_NP"} /* Nepal*/
400 static const ILcidPosixElement nl
[] = {
406 /* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/
407 static const ILcidPosixElement no
[] = {
408 {0x14, "nb"}, /* really nb */
409 {0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */
410 {0x0414, "no"}, /* really nb_NO */
411 {0x0414, "no_NO"}, /* really nb_NO */
412 {0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */
413 {0x0814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */
414 {0x0814, "no_NO_NY"}/* really nn_NO */
417 ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso
,nso_ZA
) /* TODO: Verify the ISO-639 code */
418 ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc
, oc_FR
)
419 ILCID_POSIX_ELEMENT_ARRAY(0x0472, om
, om_ET
) /* TODO: Verify the country */
421 /* Declared as or_IN to get around compiler errors*/
422 static const ILcidPosixElement or_IN
[] = {
427 static const ILcidPosixElement pa
[] = {
433 ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl
, pl_PL
)
434 ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps
, ps_AF
)
436 static const ILcidPosixElement pt
[] = {
442 static const ILcidPosixElement qu
[] = {
449 ILCID_POSIX_ELEMENT_ARRAY(0x0486, qut
, qut_GT
) /* qut is an ISO-639-3 code */
450 ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm
, rm_CH
)
451 ILCID_POSIX_ELEMENT_ARRAY(0x0418, ro
, ro_RO
)
453 static const ILcidPosixElement root
[] = {
457 ILCID_POSIX_ELEMENT_ARRAY(0x0419, ru
, ru_RU
)
458 ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw
, rw_RW
)
459 ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa
, sa_IN
)
460 ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah
,sah_RU
)
462 static const ILcidPosixElement sd
[] = {
468 static const ILcidPosixElement se
[] = {
481 ILCID_POSIX_ELEMENT_ARRAY(0x045b, si
, si_LK
)
482 ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk
, sk_SK
)
483 ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl
, sl_SI
)
484 ILCID_POSIX_ELEMENT_ARRAY(0x0477, so
, so_ET
) /* TODO: Verify the country */
485 ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq
, sq_AL
)
487 static const ILcidPosixElement sv
[] = {
493 ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw
, sw_KE
)
494 ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr
, syr_SY
)
495 ILCID_POSIX_ELEMENT_ARRAY(0x0449, ta
, ta_IN
)
496 ILCID_POSIX_ELEMENT_ARRAY(0x044a, te
, te_IN
)
497 ILCID_POSIX_ELEMENT_ARRAY(0x0428, tg
, tg_TJ
) /* Cyrillic based by default */
498 ILCID_POSIX_ELEMENT_ARRAY(0x041e, th
, th_TH
)
500 static const ILcidPosixElement ti
[] = {
506 ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk
, tk_TM
)
507 ILCID_POSIX_ELEMENT_ARRAY(0x0432, tn
, tn_BW
)
508 ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr
, tr_TR
)
509 ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt
, tt_RU
)
510 ILCID_POSIX_ELEMENT_ARRAY(0x0480, ug
, ug_CN
)
511 ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk
, uk_UA
)
513 static const ILcidPosixElement ur
[] = {
519 static const ILcidPosixElement uz
[] = {
521 {0x0843, "uz_Cyrl_UZ"}, /* Cyrillic based */
522 {0x0843, "uz_Cyrl"}, /* Cyrillic based */
523 {0x0843, "uz_UZ"}, /* Cyrillic based */
524 {0x0443, "uz_Latn_UZ"}, /* Latin based */
525 {0x0443, "uz_Latn"} /* Latin based */
528 ILCID_POSIX_ELEMENT_ARRAY(0x0433, ve
, ve_ZA
) /* TODO: Verify the country */
529 ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi
, vi_VN
)
531 static const ILcidPosixElement wen
[] = {
538 ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo
, wo_SN
)
539 ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh
, xh_ZA
)
540 ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo
, yo_NG
)
542 static const ILcidPosixElement zh
[] = {
544 {0x0804, "zh_Hans_CN"},
547 {0x0c04, "zh_Hant_HK"},
549 {0x1404, "zh_Hant_MO"},
551 {0x1004, "zh_Hans_SG"},
553 {0x0404, "zh_Hant_TW"},
556 {0x30404,"zh_Hant_TW"}, /* Bopomofo order */
557 {0x30404,"zh_TW"}, /* Bopomofo order */
558 {0x20404,"zh_Hant_TW@collation=stroke"},
559 {0x20404,"zh_TW@collation=stroke"},
560 {0x20804,"zh_Hans_CN@collation=stroke"},
561 {0x20804,"zh_CN@collation=stroke"}
564 ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu
, zu_ZA
)
566 /* This must be static and grouped by LCID. */
568 /* non-existent ISO-639-2 codes */
571 0x467 Fulfulde - Nigeria
572 0x486 K'iche - Guatemala
575 static const ILcidPosixMap gPosixIDmap
[] = {
576 ILCID_POSIX_MAP(af
), /* af Afrikaans 0x36 */
577 ILCID_POSIX_MAP(am
), /* am Amharic 0x5e */
578 ILCID_POSIX_MAP(ar
), /* ar Arabic 0x01 */
579 ILCID_POSIX_MAP(arn
), /* arn Araucanian/Mapudungun 0x7a */
580 ILCID_POSIX_MAP(as
), /* as Assamese 0x4d */
581 ILCID_POSIX_MAP(az
), /* az Azerbaijani 0x2c */
582 ILCID_POSIX_MAP(ba
), /* ba Bashkir 0x6d */
583 ILCID_POSIX_MAP(be
), /* be Belarusian 0x23 */
584 ILCID_POSIX_MAP(ber
), /* ber Berber/Tamazight 0x5f */
585 ILCID_POSIX_MAP(bg
), /* bg Bulgarian 0x02 */
586 ILCID_POSIX_MAP(bn
), /* bn Bengali; Bangla 0x45 */
587 ILCID_POSIX_MAP(bo
), /* bo Tibetan 0x51 */
588 ILCID_POSIX_MAP(br
), /* br Breton 0x7e */
589 ILCID_POSIX_MAP(ca
), /* ca Catalan 0x03 */
590 ILCID_POSIX_MAP(chr
), /* chr Cherokee 0x5c */
591 ILCID_POSIX_MAP(co
), /* co Corsican 0x83 */
592 ILCID_POSIX_MAP(cs_CZ
), /* cs Czech 0x05 */
593 ILCID_POSIX_MAP(cy
), /* cy Welsh 0x52 */
594 ILCID_POSIX_MAP(da
), /* da Danish 0x06 */
595 ILCID_POSIX_MAP(de
), /* de German 0x07 */
596 ILCID_POSIX_MAP(dv
), /* dv Divehi 0x65 */
597 ILCID_POSIX_MAP(el
), /* el Greek 0x08 */
598 ILCID_POSIX_MAP(en
), /* en English 0x09 */
599 ILCID_POSIX_MAP(en_US_POSIX
), /* invariant 0x7f */
600 ILCID_POSIX_MAP(es
), /* es Spanish 0x0a */
601 ILCID_POSIX_MAP(et
), /* et Estonian 0x25 */
602 ILCID_POSIX_MAP(eu
), /* eu Basque 0x2d */
603 ILCID_POSIX_MAP(fa
), /* fa Persian/Farsi 0x29 */
604 ILCID_POSIX_MAP(fa_AF
), /* fa Persian/Dari 0x8c */
605 ILCID_POSIX_MAP(fi
), /* fi Finnish 0x0b */
606 ILCID_POSIX_MAP(fil
), /* fil Filipino 0x64 */
607 ILCID_POSIX_MAP(fo
), /* fo Faroese 0x38 */
608 ILCID_POSIX_MAP(fr
), /* fr French 0x0c */
609 ILCID_POSIX_MAP(fy
), /* fy Frisian 0x62 */
610 ILCID_POSIX_MAP(ga
), /* * Gaelic (Ireland,Scotland) 0x3c */
611 ILCID_POSIX_MAP(gl
), /* gl Galician 0x56 */
612 ILCID_POSIX_MAP(gn
), /* gn Guarani 0x74 */
613 ILCID_POSIX_MAP(gsw
), /* gsw Alemanic/Alsatian/Swiss German 0x84 */
614 ILCID_POSIX_MAP(gu
), /* gu Gujarati 0x47 */
615 ILCID_POSIX_MAP(ha
), /* ha Hausa 0x68 */
616 ILCID_POSIX_MAP(haw
), /* haw Hawaiian 0x75 */
617 ILCID_POSIX_MAP(he
), /* he Hebrew (formerly iw) 0x0d */
618 ILCID_POSIX_MAP(hi
), /* hi Hindi 0x39 */
619 ILCID_POSIX_MAP(hr
), /* * Croatian and others 0x1a */
620 ILCID_POSIX_MAP(hu
), /* hu Hungarian 0x0e */
621 ILCID_POSIX_MAP(hy
), /* hy Armenian 0x2b */
622 ILCID_POSIX_MAP(id
), /* id Indonesian (formerly in) 0x21 */
623 ILCID_POSIX_MAP(ig
), /* ig Igbo 0x70 */
624 ILCID_POSIX_MAP(ii
), /* ii Sichuan Yi 0x78 */
625 ILCID_POSIX_MAP(is
), /* is Icelandic 0x0f */
626 ILCID_POSIX_MAP(it
), /* it Italian 0x10 */
627 ILCID_POSIX_MAP(iu
), /* iu Inuktitut 0x5d */
628 ILCID_POSIX_MAP(iw
), /* iw Hebrew 0x0d */
629 ILCID_POSIX_MAP(ja
), /* ja Japanese 0x11 */
630 ILCID_POSIX_MAP(ka
), /* ka Georgian 0x37 */
631 ILCID_POSIX_MAP(kk
), /* kk Kazakh 0x3f */
632 ILCID_POSIX_MAP(kl
), /* kl Kalaallisut 0x6f */
633 ILCID_POSIX_MAP(km
), /* km Khmer 0x53 */
634 ILCID_POSIX_MAP(kn
), /* kn Kannada 0x4b */
635 ILCID_POSIX_MAP(ko
), /* ko Korean 0x12 */
636 ILCID_POSIX_MAP(kok
), /* kok Konkani 0x57 */
637 ILCID_POSIX_MAP(kr
), /* kr Kanuri 0x71 */
638 ILCID_POSIX_MAP(ks
), /* ks Kashmiri 0x60 */
639 ILCID_POSIX_MAP(ky
), /* ky Kyrgyz 0x40 */
640 ILCID_POSIX_MAP(lb
), /* lb Luxembourgish 0x6e */
641 ILCID_POSIX_MAP(la
), /* la Latin 0x76 */
642 ILCID_POSIX_MAP(lo
), /* lo Lao 0x54 */
643 ILCID_POSIX_MAP(lt
), /* lt Lithuanian 0x27 */
644 ILCID_POSIX_MAP(lv
), /* lv Latvian, Lettish 0x26 */
645 ILCID_POSIX_MAP(mi
), /* mi Maori 0x81 */
646 ILCID_POSIX_MAP(mk
), /* mk Macedonian 0x2f */
647 ILCID_POSIX_MAP(ml
), /* ml Malayalam 0x4c */
648 ILCID_POSIX_MAP(mn
), /* mn Mongolian 0x50 */
649 ILCID_POSIX_MAP(mni
), /* mni Manipuri 0x58 */
650 ILCID_POSIX_MAP(moh
), /* moh Mohawk 0x7c */
651 ILCID_POSIX_MAP(mr
), /* mr Marathi 0x4e */
652 ILCID_POSIX_MAP(ms
), /* ms Malay 0x3e */
653 ILCID_POSIX_MAP(mt
), /* mt Maltese 0x3a */
654 ILCID_POSIX_MAP(my
), /* my Burmese 0x55 */
655 /* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */
656 ILCID_POSIX_MAP(ne
), /* ne Nepali 0x61 */
657 ILCID_POSIX_MAP(nl
), /* nl Dutch 0x13 */
658 /* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */
659 ILCID_POSIX_MAP(no
), /* * Norwegian 0x14 */
660 ILCID_POSIX_MAP(nso
), /* nso Sotho, Northern (Sepedi dialect) 0x6c */
661 ILCID_POSIX_MAP(oc
), /* oc Occitan 0x82 */
662 ILCID_POSIX_MAP(om
), /* om Oromo 0x72 */
663 ILCID_POSIX_MAP(or_IN
), /* or Oriya 0x48 */
664 ILCID_POSIX_MAP(pa
), /* pa Punjabi 0x46 */
665 ILCID_POSIX_MAP(pl
), /* pl Polish 0x15 */
666 ILCID_POSIX_MAP(ps
), /* ps Pashto 0x63 */
667 ILCID_POSIX_MAP(pt
), /* pt Portuguese 0x16 */
668 ILCID_POSIX_MAP(qu
), /* qu Quechua 0x6B */
669 ILCID_POSIX_MAP(qut
), /* qut K'iche 0x86 */
670 ILCID_POSIX_MAP(rm
), /* rm Raeto-Romance/Romansh 0x17 */
671 ILCID_POSIX_MAP(ro
), /* ro Romanian 0x18 */
672 ILCID_POSIX_MAP(root
), /* root 0x00 */
673 ILCID_POSIX_MAP(ru
), /* ru Russian 0x19 */
674 ILCID_POSIX_MAP(rw
), /* rw Kinyarwanda 0x87 */
675 ILCID_POSIX_MAP(sa
), /* sa Sanskrit 0x4f */
676 ILCID_POSIX_MAP(sah
), /* sah Yakut 0x85 */
677 ILCID_POSIX_MAP(sd
), /* sd Sindhi 0x59 */
678 ILCID_POSIX_MAP(se
), /* se Sami 0x3b */
679 /* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */
680 ILCID_POSIX_MAP(si
), /* si Sinhalese 0x5b */
681 ILCID_POSIX_MAP(sk
), /* sk Slovak 0x1b */
682 ILCID_POSIX_MAP(sl
), /* sl Slovenian 0x24 */
683 ILCID_POSIX_MAP(so
), /* so Somali 0x77 */
684 ILCID_POSIX_MAP(sq
), /* sq Albanian 0x1c */
685 /* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */
686 ILCID_POSIX_MAP(sv
), /* sv Swedish 0x1d */
687 ILCID_POSIX_MAP(sw
), /* sw Swahili 0x41 */
688 ILCID_POSIX_MAP(syr
), /* syr Syriac 0x5A */
689 ILCID_POSIX_MAP(ta
), /* ta Tamil 0x49 */
690 ILCID_POSIX_MAP(te
), /* te Telugu 0x4a */
691 ILCID_POSIX_MAP(tg
), /* tg Tajik 0x28 */
692 ILCID_POSIX_MAP(th
), /* th Thai 0x1e */
693 ILCID_POSIX_MAP(ti
), /* ti Tigrigna 0x73 */
694 ILCID_POSIX_MAP(tk
), /* tk Turkmen 0x42 */
695 ILCID_POSIX_MAP(tn
), /* tn Tswana 0x32 */
696 ILCID_POSIX_MAP(tr
), /* tr Turkish 0x1f */
697 ILCID_POSIX_MAP(tt
), /* tt Tatar 0x44 */
698 ILCID_POSIX_MAP(ug
), /* ug Uighur 0x80 */
699 ILCID_POSIX_MAP(uk
), /* uk Ukrainian 0x22 */
700 ILCID_POSIX_MAP(ur
), /* ur Urdu 0x20 */
701 ILCID_POSIX_MAP(uz
), /* uz Uzbek 0x43 */
702 ILCID_POSIX_MAP(ve
), /* ve Venda 0x33 */
703 ILCID_POSIX_MAP(vi
), /* vi Vietnamese 0x2a */
704 ILCID_POSIX_MAP(wen
), /* wen Sorbian 0x2e */
705 ILCID_POSIX_MAP(wo
), /* wo Wolof 0x88 */
706 ILCID_POSIX_MAP(xh
), /* xh Xhosa 0x34 */
707 ILCID_POSIX_MAP(yo
), /* yo Yoruba 0x6a */
708 ILCID_POSIX_MAP(zh
), /* zh Chinese 0x04 */
709 ILCID_POSIX_MAP(zu
), /* zu Zulu 0x35 */
712 static const uint32_t gLocaleCount
= sizeof(gPosixIDmap
)/sizeof(ILcidPosixMap
);
715 * Do not call this function. It is called by hostID.
716 * The function is not private because this struct must stay as a C struct,
717 * and this is an internal class.
720 idCmp(const char* id1
, const char* id2
)
723 while (*id1
== *id2
&& *id1
!= 0) {
732 * Searches for a Windows LCID
734 * @param posixid the Posix style locale id.
735 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
736 * no equivalent Windows LCID.
740 getHostID(const ILcidPosixMap
*this_0
, const char* posixID
, UErrorCode
* status
)
743 int32_t bestIdxDiff
= 0;
744 int32_t posixIDlen
= (int32_t)uprv_strlen(posixID
);
747 for (idx
= 0; idx
< this_0
->numRegions
; idx
++ ) {
748 int32_t sameChars
= idCmp(posixID
, this_0
->regionMaps
[idx
].posixID
);
749 if (sameChars
> bestIdxDiff
&& this_0
->regionMaps
[idx
].posixID
[sameChars
] == 0) {
750 if (posixIDlen
== sameChars
) {
752 return this_0
->regionMaps
[idx
].hostID
;
754 bestIdxDiff
= sameChars
;
758 /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
759 /* We also have to make sure that sid and si and similar string subsets don't match. */
760 if ((posixID
[bestIdxDiff
] == '_' || posixID
[bestIdxDiff
] == '@')
761 && this_0
->regionMaps
[bestIdx
].posixID
[bestIdxDiff
] == 0)
763 *status
= U_USING_FALLBACK_WARNING
;
764 return this_0
->regionMaps
[bestIdx
].hostID
;
768 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
769 return this_0
->regionMaps
->hostID
;
773 getPosixID(const ILcidPosixMap
*this_0
, uint32_t hostID
)
776 for (i
= 0; i
<= this_0
->numRegions
; i
++)
778 if (this_0
->regionMaps
[i
].hostID
== hostID
)
780 return this_0
->regionMaps
[i
].posixID
;
784 /* If you get here, then no matching region was found,
785 so return the language id with the wild card region. */
786 return this_0
->regionMaps
[0].posixID
;
790 //////////////////////////////////////
794 /////////////////////////////////////
798 uprv_convertToPosix(uint32_t hostid
, UErrorCode
* status
)
800 uint16_t langID
= LANGUAGE_LCID(hostid
);
803 for (index
= 0; index
< gLocaleCount
; index
++)
805 if (langID
== gPosixIDmap
[index
].regionMaps
->hostID
)
807 return getPosixID(&gPosixIDmap
[index
], hostid
);
812 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
817 //////////////////////////////////////
820 // This should only be called from uloc_getLCID.
821 // The locale ID must be in canonical form.
822 // langID is separate so that this file doesn't depend on the uloc_* API.
824 /////////////////////////////////////
828 uprv_convertToLCID(const char *langID
, const char* posixID
, UErrorCode
* status
)
832 uint32_t high
= gLocaleCount
;
838 uint32_t fallbackValue
= (uint32_t)-1;
842 /* Check for incomplete id. */
843 if (!langID
|| !posixID
|| uprv_strlen(langID
) < 2 || uprv_strlen(posixID
) < 2) {
847 /*Binary search for the map entry for normal cases */
849 while (high
> low
) /*binary search*/{
851 mid
= (high
+low
) >> 1; /*Finds median*/
856 compVal
= uprv_strcmp(langID
, gPosixIDmap
[mid
].regionMaps
->posixID
);
860 else if (compVal
> 0){
863 else /*we found it*/{
864 return getHostID(&gPosixIDmap
[mid
], posixID
, status
);
870 * Sometimes we can't do a binary search on posixID because some LCIDs
871 * go to different locales. We hit one of those special cases.
873 for (idx
= 0; idx
< gLocaleCount
; idx
++ ) {
874 myStatus
= U_ZERO_ERROR
;
875 value
= getHostID(&gPosixIDmap
[idx
], posixID
, &myStatus
);
876 if (myStatus
== U_ZERO_ERROR
) {
879 else if (myStatus
== U_USING_FALLBACK_WARNING
) {
880 fallbackValue
= value
;
884 if (fallbackValue
!= (uint32_t)-1) {
885 *status
= U_USING_FALLBACK_WARNING
;
886 return fallbackValue
;
890 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
891 return 0; /* return international (root) */