Sync with trunk head (part 1 of 2)
[reactos.git] / dll / nls / idndl_redist / data / genidldndata.cpp
1 /*
2 * Copyright (c) 2008, KJK::Hyperion
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * - Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * - Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * - Neither the name of the ReactOS Foundation nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 */
32
33 #include <unicode/putil.h>
34 #include <unicode/uchar.h>
35 #include <unicode/uloc.h>
36 #include <unicode/ures.h>
37 #include <unicode/uscript.h>
38
39 #include <assert.h>
40 #include <ctype.h>
41 #include <stdio.h>
42 #include <string.h>
43
44 #include <functional>
45 #include <map>
46 #include <set>
47 #include <string>
48 #include <utility>
49 #include <vector>
50
51 struct script_name
52 {
53 private:
54 void normalize()
55 {
56 chars[0] = toupper(chars[0]);
57 chars[1] = tolower(chars[1]);
58 chars[2] = tolower(chars[2]);
59 chars[3] = tolower(chars[3]);
60 }
61
62 public:
63 char chars[4];
64
65 bool operator<(const script_name& Y) const { return strncmp(chars, Y.chars, 4) < 0; }
66 bool operator>(const script_name& Y) const { return strncmp(chars, Y.chars, 4) > 0; }
67 bool operator==(const script_name& Y) const { return strncmp(chars, Y.chars, 4) == 0; }
68 bool operator!=(const script_name& Y) const { return strncmp(chars, Y.chars, 4) != 0; }
69 bool operator<=(const script_name& Y) const { return strncmp(chars, Y.chars, 4) <= 0; }
70 bool operator>=(const script_name& Y) const { return strncmp(chars, Y.chars, 4) >= 0; }
71
72 script_name(): chars() { }
73 script_name(const script_name& Y) { memcpy(chars, Y.chars, sizeof(chars)); }
74
75 const script_name& operator=(const script_name& Y)
76 {
77 memcpy(chars, Y.chars, sizeof(chars));
78 return *this;
79 }
80
81 explicit script_name(const char * pChars)
82 {
83 assert(pChars);
84 assert(strlen(pChars) == 4);
85 chars[0] = pChars[0];
86 chars[1] = pChars[1];
87 chars[2] = pChars[2];
88 chars[3] = pChars[3];
89 normalize();
90 }
91
92 explicit script_name(const UChar * pChars)
93 {
94 assert(pChars);
95 assert(u_strlen(pChars) == 4);
96 u_UCharsToChars(pChars, chars, 4);
97 normalize();
98 }
99 };
100
101 struct lessLocaleId: public std::binary_function<std::string, std::string, bool>
102 {
103 result_type operator()(const first_argument_type& x, const second_argument_type& y) const
104 {
105 return stricmp(x.c_str(), y.c_str()) < 0;
106 }
107 };
108
109 std::string convertLocale(const char * locale)
110 {
111 std::string s(locale); // FIXME!!!
112 return s;
113 }
114
115 bool validId(const std::string& id)
116 {
117 std::string::const_iterator p = id.begin();
118
119 if(p == id.end() || !u_isIDStart(*p))
120 return false;
121
122 ++ p;
123
124 for(; p != id.end(); ++ p)
125 if(!u_isIDPart(*p))
126 return false;
127
128 return true;
129 }
130
131 std::string getLocaleLiteral(const std::string& locale)
132 {
133 std::string lit;
134 lit += '"';
135 lit += locale; // FIXME!!! escapes
136 lit += '"';
137 return lit;
138 }
139
140 std::string getScriptLiteral(const script_name& s)
141 {
142 std::string lit;
143 lit += '"';
144 lit += s.chars[0];
145 lit += s.chars[1];
146 lit += s.chars[2];
147 lit += s.chars[3];
148 lit += '"';
149 return lit;
150 }
151
152 std::string getScriptId(const script_name& s)
153 {
154 std::string id("IDNDL_Script_");
155 id += s.chars[0];
156 id += s.chars[1];
157 id += s.chars[2];
158 id += s.chars[3];
159 assert(validId(id));
160 return id;
161 }
162
163 std::string getScriptSetId(const std::set<script_name>& s)
164 {
165 std::string id("IDNDL_ScriptSet_");
166
167 for(std::set<script_name>::const_iterator p = s.begin(); p != s.end(); ++ p)
168 {
169 id += p->chars[0];
170 id += p->chars[1];
171 id += p->chars[2];
172 id += p->chars[3];
173 }
174
175 assert(validId(id));
176 return id;
177 }
178
179 int main()
180 {
181 UErrorCode status = U_ZERO_ERROR;
182
183 /* Locale -> scripts table */
184 int32_t localeCount = uloc_countAvailable();
185
186 typedef std::map<std::string, std::set<script_name>, lessLocaleId> LocalesScripts;
187 LocalesScripts localesScripts;
188
189 for(int32_t i = 0; i < localeCount; ++ i)
190 {
191 const char * locale = uloc_getAvailable(i);
192 UResourceBundle * localeRes = ures_open(NULL, locale, &status);
193
194 if(U_SUCCESS(status))
195 {
196 UErrorCode localStatus = U_ZERO_ERROR;
197 UResourceBundle * scriptsRes = ures_getByKey(localeRes, "LocaleScript", NULL, &status);
198
199 if(U_SUCCESS(status))
200 {
201 std::set<script_name> localeScripts;
202
203 while(ures_hasNext(scriptsRes))
204 {
205 int32_t scriptLen = 0;
206 const UChar * script = ures_getNextString(scriptsRes, &scriptLen, NULL, &localStatus);
207
208 if(U_SUCCESS(localStatus))
209 localeScripts.insert(script_name(script));
210 else
211 {
212 fprintf(stderr, "warning: failed reading scripts for locale %s: %s\n", locale, u_errorName(localStatus));
213 break;
214 }
215 }
216
217 if(localeScripts.size())
218 localesScripts[convertLocale(locale)].insert(localeScripts.begin(), localeScripts.end());
219
220 ures_close(scriptsRes);
221 }
222 else
223 fprintf(stderr, "warning: failed reading scripts for locale %s: %s\n", locale, u_errorName(localStatus));
224
225 ures_close(localeRes);
226 }
227 else
228 break;
229 }
230
231 if(!U_SUCCESS(status))
232 {
233 fprintf(stderr, "error: failed enumerating locale scripts: %s\n", u_errorName(status));
234 return 1;
235 }
236
237 typedef std::set<std::set<script_name> > UniqueScriptSets;
238 UniqueScriptSets uniqueScriptSets;
239
240 for(LocalesScripts::const_iterator p = localesScripts.begin(); p != localesScripts.end(); ++ p)
241 uniqueScriptSets.insert(p->second);
242
243 typedef std::map<std::string, UniqueScriptSets::const_iterator> LocalesScriptsFolded;
244 LocalesScriptsFolded localesScriptsFolded;
245
246 for(LocalesScripts::const_iterator p = localesScripts.begin(); p != localesScripts.end(); ++ p)
247 localesScriptsFolded.insert(std::make_pair(p->first, uniqueScriptSets.find(p->second)));
248
249 // Unique script sets
250 printf("struct %s { wchar_t const * scripts; int length; };\n", "IDNDL_ScriptSet");
251
252 for(UniqueScriptSets::const_iterator p = uniqueScriptSets.begin(); p != uniqueScriptSets.end(); ++ p)
253 {
254 printf("static const %s %s = {", "IDNDL_ScriptSet", getScriptSetId(*p).c_str());
255
256 for(std::set<script_name>::const_iterator pScript = p->begin(); pScript != p->end(); ++ pScript)
257 printf(" L%s L\";\"", getScriptLiteral(*pScript).c_str());
258
259 printf(", %d };\n", static_cast<int>(p->size() * (4 + 1) + 1));
260 }
261
262 // Sorted table of locale ids
263 printf("static wchar_t const * const %s [] = {\n", "IDNDL_Locales");
264
265 for(LocalesScriptsFolded::const_iterator p = localesScriptsFolded.begin(); p != localesScriptsFolded.end(); ++ p)
266 printf("L%s,\n", getLocaleLiteral(p->first).c_str());
267
268 printf("};\n");
269
270 // Locale id index -> script set
271 printf("static %s const * const %s [] = {\n", "IDNDL_ScriptSet", "IDNDL_ScriptSets");
272
273 for(LocalesScriptsFolded::const_iterator p = localesScriptsFolded.begin(); p != localesScriptsFolded.end(); ++ p)
274 printf("&%s,\n", getScriptSetId(*p->second).c_str());
275
276 printf("};\n");
277
278 /* Codepoint -> script table */
279 // Script code -> script name table
280 printf("static const wchar_t * const %s[] = {\n", "IDNDL_ScriptNames");
281
282 for(int script = 0; script < USCRIPT_CODE_LIMIT; ++ script)
283 printf("L%s,\n", getScriptLiteral(script_name(uscript_getShortName(static_cast<UScriptCode>(script)))).c_str());
284
285 printf("};\n");
286
287 // Codepoint range -> script code
288 printf("struct %s { int lbound; int ubound; int code; };\n", "IDNDL_CharRangeScript");
289 printf("static const %s %s[] = {\n", "IDNDL_CharRangeScript", "IDNDL_CharRangeScripts");
290
291 int lbound = UCHAR_MIN_VALUE;
292 UScriptCode lastScript = uscript_getScript(UCHAR_MIN_VALUE, &status);
293
294 if(!U_SUCCESS(status) || lastScript < 0)
295 lastScript = USCRIPT_UNKNOWN;
296
297 for(UChar32 c = UCHAR_MIN_VALUE + 1; c <= UCHAR_MAX_VALUE; ++ c)
298 {
299 UScriptCode script = uscript_getScript(c, &status);
300
301 if(!U_SUCCESS(status) || script < 0)
302 script = USCRIPT_UNKNOWN;
303
304 assert(script >= 0 && script < USCRIPT_CODE_LIMIT);
305
306 if(script != lastScript)
307 {
308 if(lastScript != USCRIPT_UNKNOWN)
309 printf("{ %d, %d, %d },\n", lbound, c - 1, static_cast<int>(lastScript));
310
311 lbound = c;
312 lastScript = script;
313 }
314 }
315
316 if(lastScript != USCRIPT_UNKNOWN)
317 printf("{ %d, %d, %d },\n", lbound, UCHAR_MAX_VALUE, static_cast<int>(lastScript));
318
319 printf("};\n");
320 }
321
322 // EOF