2 * Copyright (c) 2008, KJK::Hyperion
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * - Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
15 * - Neither the name of the ReactOS Foundation nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
33 #include <unicode/putil.h>
34 #include <unicode/uchar.h>
35 #include <unicode/uloc.h>
36 #include <unicode/ures.h>
37 #include <unicode/uscript.h>
56 chars
[0] = toupper(chars
[0]);
57 chars
[1] = tolower(chars
[1]);
58 chars
[2] = tolower(chars
[2]);
59 chars
[3] = tolower(chars
[3]);
65 bool operator<(const script_name
& Y
) const { return strncmp(chars
, Y
.chars
, 4) < 0; }
66 bool operator>(const script_name
& Y
) const { return strncmp(chars
, Y
.chars
, 4) > 0; }
67 bool operator==(const script_name
& Y
) const { return strncmp(chars
, Y
.chars
, 4) == 0; }
68 bool operator!=(const script_name
& Y
) const { return strncmp(chars
, Y
.chars
, 4) != 0; }
69 bool operator<=(const script_name
& Y
) const { return strncmp(chars
, Y
.chars
, 4) <= 0; }
70 bool operator>=(const script_name
& Y
) const { return strncmp(chars
, Y
.chars
, 4) >= 0; }
72 script_name(): chars() { }
73 script_name(const script_name
& Y
) { memcpy(chars
, Y
.chars
, sizeof(chars
)); }
75 const script_name
& operator=(const script_name
& Y
)
77 memcpy(chars
, Y
.chars
, sizeof(chars
));
81 explicit script_name(const char * pChars
)
84 assert(strlen(pChars
) == 4);
92 explicit script_name(const UChar
* pChars
)
95 assert(u_strlen(pChars
) == 4);
96 u_UCharsToChars(pChars
, chars
, 4);
101 struct lessLocaleId
: public std::binary_function
<std::string
, std::string
, bool>
103 result_type
operator()(const first_argument_type
& x
, const second_argument_type
& y
) const
105 return stricmp(x
.c_str(), y
.c_str()) < 0;
109 std::string
convertLocale(const char * locale
)
111 std::string
s(locale
); // FIXME!!!
115 bool validId(const std::string
& id
)
117 std::string::const_iterator p
= id
.begin();
119 if(p
== id
.end() || !u_isIDStart(*p
))
124 for(; p
!= id
.end(); ++ p
)
131 std::string
getLocaleLiteral(const std::string
& locale
)
135 lit
+= locale
; // FIXME!!! escapes
140 std::string
getScriptLiteral(const script_name
& s
)
152 std::string
getScriptId(const script_name
& s
)
154 std::string
id("IDNDL_Script_");
163 std::string
getScriptSetId(const std::set
<script_name
>& s
)
165 std::string
id("IDNDL_ScriptSet_");
167 for(std::set
<script_name
>::const_iterator p
= s
.begin(); p
!= s
.end(); ++ p
)
181 UErrorCode status
= U_ZERO_ERROR
;
183 /* Locale -> scripts table */
184 int32_t localeCount
= uloc_countAvailable();
186 typedef std::map
<std::string
, std::set
<script_name
>, lessLocaleId
> LocalesScripts
;
187 LocalesScripts localesScripts
;
189 for(int32_t i
= 0; i
< localeCount
; ++ i
)
191 const char * locale
= uloc_getAvailable(i
);
192 UResourceBundle
* localeRes
= ures_open(NULL
, locale
, &status
);
194 if(U_SUCCESS(status
))
196 UErrorCode localStatus
= U_ZERO_ERROR
;
197 UResourceBundle
* scriptsRes
= ures_getByKey(localeRes
, "LocaleScript", NULL
, &status
);
199 if(U_SUCCESS(status
))
201 std::set
<script_name
> localeScripts
;
203 while(ures_hasNext(scriptsRes
))
205 int32_t scriptLen
= 0;
206 const UChar
* script
= ures_getNextString(scriptsRes
, &scriptLen
, NULL
, &localStatus
);
208 if(U_SUCCESS(localStatus
))
209 localeScripts
.insert(script_name(script
));
212 fprintf(stderr
, "warning: failed reading scripts for locale %s: %s\n", locale
, u_errorName(localStatus
));
217 if(localeScripts
.size())
218 localesScripts
[convertLocale(locale
)].insert(localeScripts
.begin(), localeScripts
.end());
220 ures_close(scriptsRes
);
223 fprintf(stderr
, "warning: failed reading scripts for locale %s: %s\n", locale
, u_errorName(localStatus
));
225 ures_close(localeRes
);
231 if(!U_SUCCESS(status
))
233 fprintf(stderr
, "error: failed enumerating locale scripts: %s\n", u_errorName(status
));
237 typedef std::set
<std::set
<script_name
> > UniqueScriptSets
;
238 UniqueScriptSets uniqueScriptSets
;
240 for(LocalesScripts::const_iterator p
= localesScripts
.begin(); p
!= localesScripts
.end(); ++ p
)
241 uniqueScriptSets
.insert(p
->second
);
243 typedef std::map
<std::string
, UniqueScriptSets::const_iterator
> LocalesScriptsFolded
;
244 LocalesScriptsFolded localesScriptsFolded
;
246 for(LocalesScripts::const_iterator p
= localesScripts
.begin(); p
!= localesScripts
.end(); ++ p
)
247 localesScriptsFolded
.insert(std::make_pair(p
->first
, uniqueScriptSets
.find(p
->second
)));
249 // Unique script sets
250 printf("struct %s { wchar_t const * scripts; int length; };\n", "IDNDL_ScriptSet");
252 for(UniqueScriptSets::const_iterator p
= uniqueScriptSets
.begin(); p
!= uniqueScriptSets
.end(); ++ p
)
254 printf("static const %s %s = {", "IDNDL_ScriptSet", getScriptSetId(*p
).c_str());
256 for(std::set
<script_name
>::const_iterator pScript
= p
->begin(); pScript
!= p
->end(); ++ pScript
)
257 printf(" L%s L\";\"", getScriptLiteral(*pScript
).c_str());
259 printf(", %d };\n", static_cast<int>(p
->size() * (4 + 1) + 1));
262 // Sorted table of locale ids
263 printf("static wchar_t const * const %s [] = {\n", "IDNDL_Locales");
265 for(LocalesScriptsFolded::const_iterator p
= localesScriptsFolded
.begin(); p
!= localesScriptsFolded
.end(); ++ p
)
266 printf("L%s,\n", getLocaleLiteral(p
->first
).c_str());
270 // Locale id index -> script set
271 printf("static %s const * const %s [] = {\n", "IDNDL_ScriptSet", "IDNDL_ScriptSets");
273 for(LocalesScriptsFolded::const_iterator p
= localesScriptsFolded
.begin(); p
!= localesScriptsFolded
.end(); ++ p
)
274 printf("&%s,\n", getScriptSetId(*p
->second
).c_str());
278 /* Codepoint -> script table */
279 // Script code -> script name table
280 printf("static const wchar_t * const %s[] = {\n", "IDNDL_ScriptNames");
282 for(int script
= 0; script
< USCRIPT_CODE_LIMIT
; ++ script
)
283 printf("L%s,\n", getScriptLiteral(script_name(uscript_getShortName(static_cast<UScriptCode
>(script
)))).c_str());
287 // Codepoint range -> script code
288 printf("struct %s { int lbound; int ubound; int code; };\n", "IDNDL_CharRangeScript");
289 printf("static const %s %s[] = {\n", "IDNDL_CharRangeScript", "IDNDL_CharRangeScripts");
291 int lbound
= UCHAR_MIN_VALUE
;
292 UScriptCode lastScript
= uscript_getScript(UCHAR_MIN_VALUE
, &status
);
294 if(!U_SUCCESS(status
) || lastScript
< 0)
295 lastScript
= USCRIPT_UNKNOWN
;
297 for(UChar32 c
= UCHAR_MIN_VALUE
+ 1; c
<= UCHAR_MAX_VALUE
; ++ c
)
299 UScriptCode script
= uscript_getScript(c
, &status
);
301 if(!U_SUCCESS(status
) || script
< 0)
302 script
= USCRIPT_UNKNOWN
;
304 assert(script
>= 0 && script
< USCRIPT_CODE_LIMIT
);
306 if(script
!= lastScript
)
308 if(lastScript
!= USCRIPT_UNKNOWN
)
309 printf("{ %d, %d, %d },\n", lbound
, c
- 1, static_cast<int>(lastScript
));
316 if(lastScript
!= USCRIPT_UNKNOWN
)
317 printf("{ %d, %d, %d },\n", lbound
, UCHAR_MAX_VALUE
, static_cast<int>(lastScript
));