2 * Tool for creating NT-like NLS files for Unicode <-> Codepage conversions.
3 * Tool for creating NT-like l_intl.nls file for case mapping of unicode
5 * Copyright 2000 Timoshkov Dmitry
6 * Copyright 2001 Matei Alexandru
8 * Sources of information:
9 * Andrew Kozin's YAW project http://www.chat.ru/~stanson/yaw_en.html
10 * Ove Kõven's investigations http://www.ping.uio.no/~ovehk/nls
19 static const WCHAR
* const uprtable
[256];
20 static const WCHAR
* const lwrtable
[256];
22 #define NLSDIR "../../media/nls"
23 #define LIBDIR "unicode.org/"
26 WORD wSize
; /* in words 0x000D */
28 WORD MaxCharSize
; /* 1 or 2 */
29 BYTE DefaultChar
[MAX_DEFAULTCHAR
];
30 WCHAR UnicodeDefaultChar
;
33 BYTE LeadByte
[MAX_LEADBYTES
];
34 } __attribute__((packed
)) NLS_FILE_HEADER
;
37 Support for translation from the multiple unicode chars
38 to the single code page char.
40 002D;HYPHEN-MINUS;Pd;0;ET;;;;;N;;;;;
41 00AD;SOFT HYPHEN;Pd;0;ON;;;;;N;;;;;
42 2010;HYPHEN;Pd;0;ON;;;;;N;;;;;
43 2011;NON-BREAKING HYPHEN;Pd;0;ON;<noBreak> 2010;;;;N;;;;;
44 2013;EN DASH;Pd;0;ON;;;;;N;;;;;
45 2014;EM DASH;Pd;0;ON;;;;;N;;;;;
46 2015;HORIZONTAL BAR;Pd;0;ON;;;;;N;QUOTATION DASH;;;;
49 /* HYPHEN-MINUS aliases */
50 static WCHAR hyphen_aliases
[] = {0x00AD,0x2010,0x2011,0x2013,0x2014,0x2015,0};
54 WCHAR
*alias
; /* must be 0 terminated */
56 /* HYPHEN-MINUS aliases */
57 {0x002D, hyphen_aliases
}
60 static void patch_aliases(void *u2cp
, CPINFOEXA
*cpi
)
66 if(cpi
->MaxCharSize
== 2) {
68 for(i
= 0; i
< 65536; i
++) {
69 for(j
= 0; j
< sizeof(u2cp_alias
)/sizeof(u2cp_alias
[0]); j
++) {
70 alias
= u2cp_alias
[j
].alias
;
72 if(*alias
== i
&& wc
[i
] == *(WCHAR
*)cpi
->DefaultChar
) {
73 wc
[i
] = u2cp_alias
[j
].cp_char
;
82 for(i
= 0; i
< 65536; i
++) {
83 for(j
= 0; j
< sizeof(u2cp_alias
)/sizeof(u2cp_alias
[0]); j
++) {
84 alias
= u2cp_alias
[j
].alias
;
86 if(*alias
== i
&& c
[i
] == cpi
->DefaultChar
[0] && u2cp_alias
[j
].cp_char
< 256) {
87 c
[i
] = (BYTE
)u2cp_alias
[j
].cp_char
;
96 static BOOL
write_unicode2cp_table(FILE *out
, CPINFOEXA
*cpi
, WCHAR
*table
)
104 u2cp
= malloc(cpi
->MaxCharSize
* 65536);
106 printf("Not enough memory for Unicode to Codepage table\n");
110 if(cpi
->MaxCharSize
== 2) {
112 for(i
= 0; i
< 65536; i
++)
113 wc
[i
] = *(WCHAR
*)cpi
->DefaultChar
;
115 for(i
= 0; i
< 65536; i
++)
117 wc
[table
[i
]] = (WCHAR
)i
;
121 for(i
= 0; i
< 65536; i
++)
122 c
[i
] = cpi
->DefaultChar
[0];
124 for(i
= 0; i
< 256; i
++)
126 c
[table
[i
]] = (CHAR
)i
;
129 patch_aliases(u2cp
, cpi
);
131 if(fwrite(u2cp
, 1, cpi
->MaxCharSize
* 65536, out
) != cpi
->MaxCharSize
* 65536)
139 static BOOL
write_lb_ranges(FILE *out
, CPINFOEXA
*cpi
, WCHAR
*table
)
141 WCHAR sub_table
[256];
142 WORD offset
, offsets
[256];
145 memset(offsets
, 0, sizeof(offsets
));
149 for(i
= 0; i
< MAX_LEADBYTES
; i
+= 2) {
150 for(range
= cpi
->LeadByte
[i
]; range
!= 0 && range
<= cpi
->LeadByte
[i
+ 1]; range
++) {
152 offsets
[range
] = offset
;
156 if(fwrite(offsets
, 1, sizeof(offsets
), out
) != sizeof(offsets
))
159 for(i
= 0; i
< MAX_LEADBYTES
; i
+= 2) {
160 for(range
= cpi
->LeadByte
[i
]; range
!= 0 && range
<= cpi
->LeadByte
[i
+ 1]; range
++) {
161 /*printf("Writing sub table for LeadByte %02X\n", range);*/
162 for(j
= MAKEWORD(0, range
); j
<= MAKEWORD(0xFF, range
); j
++) {
163 sub_table
[j
- MAKEWORD(0, range
)] = table
[j
];
166 if(fwrite(sub_table
, 1, sizeof(sub_table
), out
) != sizeof(sub_table
))
174 static BOOL
create_nls_file(char *name
, CPINFOEXA
*cpi
, WCHAR
*table
, WCHAR
*oemtable
)
178 WORD wValue
, number_of_lb_ranges
, number_of_lb_subtables
, i
;
180 printf("Creating NLS table \"%s\"\n", name
);
182 if(!(out
= fopen(name
, "wb"))) {
183 printf("Could not create file \"%s\"\n", name
);
187 memset(&nls
, 0, sizeof(nls
));
189 nls
.wSize
= sizeof(nls
) / sizeof(WORD
);
190 nls
.CodePage
= cpi
->CodePage
;
191 nls
.MaxCharSize
= cpi
->MaxCharSize
;
192 memcpy(nls
.DefaultChar
, cpi
->DefaultChar
, MAX_DEFAULTCHAR
);
193 nls
.UnicodeDefaultChar
= cpi
->UnicodeDefaultChar
;
196 memcpy(nls
.LeadByte
, cpi
->LeadByte
, MAX_LEADBYTES
);
198 if(fwrite(&nls
, 1, sizeof(nls
), out
) != sizeof(nls
)) {
200 printf("Could not write to file \"%s\"\n", name
);
204 number_of_lb_ranges
= 0;
205 number_of_lb_subtables
= 0;
207 for(i
= 0; i
< MAX_LEADBYTES
; i
+= 2) {
208 if(cpi
->LeadByte
[i
] != 0 && cpi
->LeadByte
[i
+ 1] > cpi
->LeadByte
[i
]) {
209 number_of_lb_ranges
++;
210 number_of_lb_subtables
+= cpi
->LeadByte
[i
+ 1] - cpi
->LeadByte
[i
] + 1;
214 /*printf("Number of LeadByte ranges %d\n", number_of_lb_ranges);*/
215 /*printf("Number of LeadByte subtables %d\n", number_of_lb_subtables);*/
217 /* Calculate offset to Unicode to CP table in words:
218 * 1. (256 * sizeof(WORD)) primary CP to Unicode table +
219 * 2. (WORD) optional OEM glyph table size in words +
220 * 3. OEM glyph table size in words * sizeof(WORD) +
221 * 4. (WORD) Number of DBCS LeadByte ranges +
222 * 5. if (Number of DBCS LeadByte ranges != 0) 256 * sizeof(WORD) offsets of lead byte sub tables
223 * 6. (Number of DBCS LeadByte sub tables * 256 * sizeof(WORD)) LeadByte sub tables +
224 * 7. (WORD) Unknown flag
227 wValue
= (256 * sizeof(WORD
) + /* 1 */
228 sizeof(WORD
) + /* 2 */
229 ((oemtable
!=NULL
) ? (256 * sizeof(WORD
)) : 0) + /* 3 */
230 sizeof(WORD
) + /* 4 */
231 ((number_of_lb_subtables
!= 0) ? 256 * sizeof(WORD
) : 0) + /* 5 */
232 number_of_lb_subtables
* 256 * sizeof(WORD
) + /* 6 */
236 /* offset of Unicode to CP table in words */
237 fwrite(&wValue
, 1, sizeof(wValue
), out
);
239 /* primary CP to Unicode table */
240 if(fwrite(table
, 1, 256 * sizeof(WCHAR
), out
) != 256 * sizeof(WCHAR
)) {
242 printf("Could not write to file \"%s\"\n", name
);
246 /* optional OEM glyph table size in words */
247 wValue
= (oemtable
!= NULL
) ? (256 * sizeof(WORD
)) : 0;
248 fwrite(&wValue
, 1, sizeof(wValue
), out
);
250 /* optional OEM to Unicode table */
252 if(fwrite(oemtable
, 1, 256 * sizeof(WCHAR
), out
) != 256 * sizeof(WCHAR
)) {
254 printf("Could not write to file \"%s\"\n", name
);
259 /* Number of DBCS LeadByte ranges */
260 fwrite(&number_of_lb_ranges
, 1, sizeof(number_of_lb_ranges
), out
);
262 /* offsets of lead byte sub tables and lead byte sub tables */
263 if(number_of_lb_ranges
> 0) {
264 if(!write_lb_ranges(out
, cpi
, table
)) {
266 printf("Could not write to file \"%s\"\n", name
);
273 fwrite(&wValue
, 1, sizeof(wValue
), out
);
275 if(!write_unicode2cp_table(out
, cpi
, table
)) {
277 printf("Could not write to file \"%s\"\n", name
);
285 /* correct the codepage information such as default chars */
286 static void patch_codepage_info(CPINFOEXA
*cpi
)
288 /* currently nothing */
291 static WCHAR
*Load_CP2Unicode_Table(char *table_name
, UINT cp
, CPINFOEXA
*cpi
)
298 int lb_ranges
, lb_range_started
, line
;
300 printf("Loading translation table \"%s\"\n", table_name
);
302 /* Init to default values */
303 memset(cpi
, 0, sizeof(CPINFOEXA
));
305 *(WCHAR
*)cpi
->DefaultChar
= '?';
306 cpi
->MaxCharSize
= 1;
307 cpi
->UnicodeDefaultChar
= '?';
309 patch_codepage_info(cpi
);
311 table
= (WCHAR
*)malloc(sizeof(WCHAR
) * 65536);
313 printf("Not enough memory for Codepage to Unicode table\n");
317 for(n
= 0; n
< 256; n
++)
320 for(n
= 256; n
< 65536; n
++)
321 table
[n
] = cpi
->UnicodeDefaultChar
;
323 file
= fopen(table_name
, "r");
331 lb_range_started
= 0;
333 while(fgets(buf
, sizeof(buf
), file
)) {
336 while(isspace(*p
)) p
++;
338 if(!*p
|| p
[0] == '#')
341 n
= strtol(p
, &p
, 0);
343 printf("Line %d: Entry 0x%06lX: File \"%s\" corrupted\n", line
, n
, table_name
);
347 if(n
> 0xFF && cpi
->MaxCharSize
!= 2) {
348 /*printf("Line %d: Entry 0x%04lX: Switching to DBCS\n", line, n);*/
349 cpi
->MaxCharSize
= 2;
352 while(isspace(*p
)) p
++;
354 if(!*p
|| p
[0] == '#') {
355 /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/
358 value
= strtol(p
, &p
, 0);
360 printf("Line %d: Entry 0x%06lX unicode value: File \"%s\" corrupted\n", line
, n
, table_name
);
362 table
[n
] = (WCHAR
)value
;
365 /* wait for comment */
366 while(*p
&& *p
!= '#') p
++;
368 if(*p
== '#' && strstr(p
, "DBCS LEAD BYTE")) {
369 /*printf("Line %d, entry 0x%02lX DBCS LEAD BYTE\n", line, n);*/
371 printf("Line %d: Entry 0x%04lX: Error: DBCS lead byte overflowed\n", line
, n
);
377 if(lb_range_started
) {
378 cpi
->LeadByte
[(lb_ranges
- 1) * 2 + 1] = (BYTE
)n
;
381 /*printf("Line %d: Starting new DBCS lead byte range, entry 0x%02lX\n", line, n);*/
382 if(lb_ranges
< MAX_LEADBYTES
/2) {
384 lb_range_started
= 1;
385 cpi
->LeadByte
[(lb_ranges
- 1) * 2] = (BYTE
)n
;
388 printf("Line %d: Error: could not start new lead byte range\n", line
);
393 lb_range_started
= 0;
402 static WCHAR
*Load_OEM2Unicode_Table(char *table_name
, WCHAR
*def_table
, UINT cp
, CPINFOEXA
*cpi
)
411 printf("Loading oem glyph table \"%s\"\n", table_name
);
413 table
= (WCHAR
*)malloc(sizeof(WCHAR
) * 65536);
415 printf("Not enough memory for Codepage to Unicode table\n");
419 memcpy(table
, def_table
, 65536 * sizeof(WCHAR
));
421 file
= fopen(table_name
, "r");
429 while(fgets(buf
, sizeof(buf
), file
)) {
432 while(isspace(*p
)) p
++;
434 if(!*p
|| p
[0] == '#')
437 value
= strtol(p
, &p
, 16);
439 printf("Line %d: Entry 0x%06lX: File \"%s\" corrupted\n", line
, value
, table_name
);
443 while(isspace(*p
)) p
++;
445 if(!*p
|| p
[0] == '#') {
446 /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/
450 n
= strtol(p
, &p
, 16);
452 printf("Line %d: Entry 0x%06lX unicode value: File \"%s\" corrupted\n", line
, value
, table_name
);
457 if (cpi
->CodePage
== 864) {
458 while(isspace(*p
)) p
++;
460 if(!*p
|| p
[0] == '#' || p
[0] == '-') {
461 /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/
465 n
= strtol(p
, &p
, 16);
467 printf("Line %d: Entry 0x%06lX oem value: File \"%s\" corrupted\n", line
, value
, table_name
);
473 table
[n
] = (WCHAR
)value
;
481 int write_nls_files()
485 char nls_filename
[256];
491 char *table_filename
;
494 {37, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT", "IBM EBCDIC US Canada"},
495 {424, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MISC/CP424.TXT", "IBM EBCDIC Hebrew"},
496 {437, TRUE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT", "OEM United States"},
497 {500, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT", "IBM EBCDIC International"},
498 /*{708, FALSE, "", "Arabic ASMO"},*/
499 /*{720, FALSE, "", "Arabic Transparent ASMO"},*/
500 {737, TRUE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/PC/CP737.TXT", "OEM Greek 437G"},
501 {775, TRUE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/PC/CP775.TXT", "OEM Baltic"},
502 {850, TRUE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/PC/CP850.TXT", "OEM Multilingual Latin 1"},
503 {852, TRUE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/PC/CP852.TXT", "OEM Slovak Latin 2"},
504 {855, TRUE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/PC/CP855.TXT", "OEM Cyrillic" },
505 {856, TRUE
, LIBDIR
"MAPPINGS/VENDORS/MISC/CP856.TXT", "Hebrew PC"},
506 {857, TRUE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/PC/CP857.TXT", "OEM Turkish"},
507 {860, TRUE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/PC/CP860.TXT", "OEM Portuguese"},
508 {861, TRUE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/PC/CP861.TXT", "OEM Icelandic"},
509 {862, TRUE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/PC/CP862.TXT", "OEM Hebrew"},
510 {863, TRUE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/PC/CP863.TXT", "OEM Canadian French"},
511 {864, TRUE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/PC/CP864.TXT", "OEM Arabic"},
512 {865, TRUE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/PC/CP865.TXT", "OEM Nordic"},
513 {866, TRUE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/PC/CP866.TXT", "OEM Russian"},
514 {869, TRUE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/PC/CP869.TXT", "OEM Greek"},
515 /*{870, FALSE, "", "IBM EBCDIC Multilingual/ROECE (Latin 2)"},*/
516 {874, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/PC/CP874.TXT", "ANSI/OEM Thai"},
517 {875, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP875.TXT", "IBM EBCDIC Greek"},
518 {878, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MISC/KOI8-R.TXT", "Russian KOI8"},
519 {932, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT", "ANSI/OEM Japanese Shift-JIS"},
520 {936, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT", "ANSI/OEM Simplified Chinese GBK"},
521 {949, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT", "ANSI/OEM Korean Unified Hangul"},
522 {950, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT", "ANSI/OEM Traditional Chinese Big5"},
523 {1006, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MISC/CP1006.TXT", "IBM Arabic"},
524 {1026, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP1026.TXT", "IBM EBCDIC Latin 5 Turkish"},
525 {1250, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT", "ANSI Eastern Europe"},
526 {1251, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT", "ANSI Cyrillic"},
527 {1252, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT", "ANSI Latin 1"},
528 {1253, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT", "ANSI Greek"},
529 {1254, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT", "ANSI Turkish"},
530 {1255, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT", "ANSI Hebrew"},
531 {1256, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT", "ANSI Arabic"},
532 {1257, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT", "ANSI Baltic"},
533 {1258, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT", "ANSI/OEM Viet Nam"},
534 {10000, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/MAC/ROMAN.TXT", "Mac Roman"},
535 {10006, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/MAC/GREEK.TXT", "Mac Greek"},
536 {10007, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/MAC/CYRILLIC.TXT", "Mac Cyrillic"},
537 {10029, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/MAC/LATIN2.TXT", "Mac Latin 2"},
538 {10079, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/MAC/ICELAND.TXT", "Mac Icelandic"},
539 {10081, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MICSFT/MAC/TURKISH.TXT", "Mac Turkish"},
540 /*{20000, FALSE, "", "CNS Taiwan"},*/
541 /*{20001, FALSE, "", "TCA Taiwan"},*/
542 /*{20002, FALSE, "", "Eten Taiwan"},*/
543 /*{20003, FALSE, "", "IBM5550 Taiwan"},*/
544 /*{20004, FALSE, "", "TeleText Taiwan"},*/
545 /*{20005, FALSE, "", "Wang Taiwan"},*/
546 /*{20105, FALSE, "", "IA5 IRV International Alphabet No.5"},*/
547 /*{20106, FALSE, "", "IA5 German"},*/
548 /*{20107, FALSE, "", "IA5 Swedish"},*/
549 /*{20108, FALSE, "", "IA5 Norwegian"},*/
550 /*{20127, FALSE, "", "US ASCII"}, */
551 /*{20261, FALSE, "", "T.61"},*/
552 /*{20269, FALSE, "", "ISO 6937 NonSpacing Accent"},*/
553 /*{20273, FALSE, "", "IBM EBCDIC Germany"},*/
554 /*{20277, FALSE, "", "IBM EBCDIC Denmark/Norway"},*/
555 /*{20278, FALSE, "", "IBM EBCDIC Finland/Sweden"},*/
556 /*{20280, FALSE, "", "IBM EBCDIC Italy"},*/
557 /*{20284, FALSE, "", "IBM EBCDIC Latin America/Spain"},*/
558 /*{20285, FALSE, "", "IBM EBCDIC United Kingdom"},*/
559 /*{20290, FALSE, "", "IBM EBCDIC Japanese Katakana Extended"},*/
560 /*{20297, FALSE, "", "IBM EBCDIC France"},*/
561 /*{20420, FALSE, "", "IBM EBCDIC Arabic"},*/
562 /*{20423, FALSE, "IBM869.TXT", "IBM EBCDIC Greek"},*/
563 /*{20424, FALSE, "", "IBM EBCDIC Hebrew"},*/
564 /*{20833, FALSE, "", "IBM EBCDIC Korean Extended"},*/
565 /*{20838, FALSE, "", "IBM EBCDIC Thai"},*/
566 {20871, FALSE
, "ReactOS/IBMCP861.TXT", "IBM EBCDIC Icelandic"},
567 /*{20880, FALSE, "", "IBM EBCDIC Cyrillic (Russian)"},*/
568 {20866, FALSE
, LIBDIR
"MAPPINGS/VENDORS/MISC/KOI8-R.TXT", "Russian KOI8"},
569 /*{20905, FALSE, "", "IBM EBCDIC Turkish"},*/
570 /*{21025, FALSE, "", "IBM EBCDIC Cyrillic (Serbian, Bulgarian)"},*/
571 /*{21027, FALSE, "", "Ext Alpha Lowercase"},*/
572 {28591, FALSE
, LIBDIR
"MAPPINGS/ISO8859/8859-1.TXT", "ISO 8859-1 Latin 1"},
573 {28592, FALSE
, LIBDIR
"MAPPINGS/ISO8859/8859-2.TXT", "ISO 8859-2 Eastern Europe"},
574 {28593, FALSE
, LIBDIR
"MAPPINGS/ISO8859/8859-3.TXT", "ISO 8859-3 Turkish"},
575 {28594, FALSE
, LIBDIR
"MAPPINGS/ISO8859/8859-4.TXT", "ISO 8859-4 Baltic"},
576 {28595, FALSE
, LIBDIR
"MAPPINGS/ISO8859/8859-5.TXT", "ISO 8859-5 Cyrillic"},
577 {28596, FALSE
, LIBDIR
"MAPPINGS/ISO8859/8859-6.TXT", "ISO 8859-6 Arabic"},
578 {28597, FALSE
, LIBDIR
"MAPPINGS/ISO8859/8859-7.TXT", "ISO 8859-7 Greek"},
579 {28598, FALSE
, LIBDIR
"MAPPINGS/ISO8859/8859-8.TXT", "ISO 8859-8 Hebrew"},
580 {28599, FALSE
, LIBDIR
"MAPPINGS/ISO8859/8859-9.TXT", "ISO 8859-9 Latin 5"}
583 for(i
= 0; i
< sizeof(pages
)/sizeof(pages
[0]); i
++) {
584 table
= Load_CP2Unicode_Table(pages
[i
].table_filename
, pages
[i
].cp
, &cpi
);
586 printf("Could not load \"%s\" (%s)\n", pages
[i
].table_filename
, pages
[i
].comment
);
591 oemtable
= Load_OEM2Unicode_Table(LIBDIR
"MAPPINGS/VENDORS/MISC/IBMGRAPH.TXT", table
, pages
[i
].cp
, &cpi
);
593 printf("Could not load \"%s\" (%s)\n", LIBDIR
"MAPPINGS/VENDORS/MISC/IBMGRAPH.TXT", "IBM OEM glyph table");
598 sprintf(nls_filename
, "%s/c_%03d.nls", NLSDIR
, cpi
.CodePage
);
599 if(!create_nls_file(nls_filename
, &cpi
, table
, pages
[i
].oem
? oemtable
: NULL
)) {
600 printf("Could not write \"%s\" (%s)\n", nls_filename
, pages
[i
].comment
);
614 static WORD
*to_upper_org
= NULL
, *to_lower_org
= NULL
;
617 static WORD diffs
[256];
618 static int number_of_diffs
;
621 static WORD number_of_subtables_with_diffs
;
622 /* pointers to subtables with 16 elements in each to the main table */
623 static WORD
*subtables_with_diffs
[4096];
625 static WORD number_of_subtables_with_offsets
;
626 /* subtables with 16 elements */
627 static WORD subtables_with_offsets
[4096 * 16];
629 static void test_packed_table(WCHAR
*table
)
631 WCHAR test_str
[] = L
"This is an English text. \x0CF\x0EE-\x0F0\x0F3\x0F1\x0F1\x0EA\x0E8 \x0FF \x0EF\x0E8\x0F1\x0E0\x0F2\x0FC \x0F3\x0EC\x0E5\x0FE \x0ED\x0E5\x0EC\x0ED\x0EE\x0E6\x0EA\x0EE. 1234567890";
636 len
= lstrlenW(test_str
);
638 for(i
= 0; i
< len
+ 1; i
++) {
639 /*off = table[HIBYTE(test_str[i])];
641 sub_table = table + off;
642 off = sub_table[LOBYTE(test_str[i]) >> 4];
644 sub_table = table + off;
645 off = LOBYTE(test_str[i]) & 0x0F;
647 diff = sub_table[off];
649 test_str[i] += diff;*/
650 test_str
[i
] += table
[table
[table
[HIBYTE(test_str
[i
])] + (LOBYTE(test_str
[i
]) >> 4)] + (LOBYTE(test_str
[i
]) & 0x0F)];
658 sprintf(name, "text%02d.dat", n++);
659 file = fopen(name, "wb");
660 fwrite(test_str, len * sizeof(WCHAR), 1, file);
665 static BOOL
CreateCaseDiff(char *table_name
)
669 WORD code
, case_mapping
;
673 to_upper_org
= (WORD
*)calloc(65536, sizeof(WORD
));
675 printf("Not enough memory for to upper table\n");
679 to_lower_org
= (WORD
*)calloc(65536, sizeof(WORD
));
681 printf("Not enough memory for to lower table\n");
685 file
= fopen(table_name
, "r");
687 printf("Could not open file \"%s\"\n", table_name
);
693 while(fgets(buf
, sizeof(buf
), file
)) {
696 while(*p
&& isspace(*p
)) p
++;
702 code
= (WORD
)strtol(p
, &p
, 16);
704 //if(code != 0x9A0 && code != 0xBA0)
707 while(*p
&& *p
!= ';') p
++;
712 /* 1. Character name */
713 while(*p
&& *p
!= ';') p
++;
718 /* 2. General Category */
719 while(*p
&& *p
!= ';') p
++;
724 /* 3. Canonical Combining Classes */
725 while(*p
&& *p
!= ';') p
++;
730 /* 4. Bidirectional Category */
731 while(*p
&& *p
!= ';') p
++;
736 /* 5. Character Decomposition Mapping */
737 while(*p
&& *p
!= ';') p
++;
742 /* 6. Decimal digit value */
743 while(*p
&& *p
!= ';') p
++;
749 while(*p
&& *p
!= ';') p
++;
754 /* 8. Numeric value */
755 while(*p
&& *p
!= ';') p
++;
761 while(*p
&& *p
!= ';') p
++;
766 /* 10. Unicode 1.0 Name */
767 while(*p
&& *p
!= ';') p
++;
772 /* 11. 10646 comment field */
773 while(*p
&& *p
!= ';') p
++;
778 /* 12. Uppercase Mapping */
779 while(*p
&& isspace(*p
)) p
++;
782 case_mapping
= (WORD
)strtol(p
, &p
, 16);
783 to_upper_org
[code
] = case_mapping
- code
;
784 while(*p
&& *p
!= ';') p
++;
789 /* 13. Lowercase Mapping */
790 while(*p
&& isspace(*p
)) p
++;
793 case_mapping
= (WORD
)strtol(p
, &p
, 16);
794 to_lower_org
[code
] = case_mapping
- code
;
795 while(*p
&& *p
!= ';') p
++;
800 /* 14. Titlecase Mapping */
801 while(*p
&& *p
!= ';') p
++;
813 static int find_diff(WORD diff
)
817 for(i
= 0; i
< number_of_diffs
; i
++) {
826 static WORD
find_subtable_with_diffs(WORD
*table
, WORD
*subtable
)
830 for(index
= 0; index
< number_of_subtables_with_diffs
; index
++) {
831 if(memcmp(subtables_with_diffs
[index
], subtable
, 16 * sizeof(WORD
)) == 0) {
836 if(number_of_subtables_with_diffs
>= 4096) {
837 printf("Could not add new subtable with diffs, storage is full\n");
841 subtables_with_diffs
[number_of_subtables_with_diffs
] = subtable
;
842 number_of_subtables_with_diffs
++;
847 static WORD
find_subtable_with_offsets(WORD
*subtable
)
851 for(index
= 0; index
< number_of_subtables_with_offsets
; index
++) {
852 if(memcmp(&subtables_with_offsets
[index
* 16], subtable
, 16 * sizeof(WORD
)) == 0) {
857 if(number_of_subtables_with_offsets
>= 4096) {
858 printf("Could not add new subtable with offsets, storage is full\n");
862 memcpy(&subtables_with_offsets
[number_of_subtables_with_offsets
* 16], subtable
, 16 * sizeof(WORD
));
863 number_of_subtables_with_offsets
++;
868 static WORD
*pack_table(WORD
*table
, WORD
*packed_size_in_words
)
870 WORD high
, low4
, index
;
871 WORD main_index
[256];
872 WORD temp_subtable
[16];
874 WORD
*subtable_src
, *subtable_dst
;
876 memset(subtables_with_diffs
, 0, sizeof(subtables_with_diffs
));
877 number_of_subtables_with_diffs
= 0;
879 memset(subtables_with_offsets
, 0, sizeof(subtables_with_offsets
));
880 number_of_subtables_with_offsets
= 0;
882 for(high
= 0; high
< 256; high
++) {
883 for(low4
= 0; low4
< 256; low4
+= 16) {
884 index
= find_subtable_with_diffs(table
, &table
[MAKEWORD(low4
, high
)]);
886 temp_subtable
[low4
>> 4] = index
;
889 index
= find_subtable_with_offsets(temp_subtable
);
890 main_index
[high
] = index
;
893 *packed_size_in_words
= 0x100 + number_of_subtables_with_offsets
* 16 + number_of_subtables_with_diffs
* 16;
894 packed_table
= calloc(*packed_size_in_words
, sizeof(WORD
));
896 /* fill main index according to the subtables_with_offsets */
897 for(high
= 0; high
< 256; high
++) {
898 packed_table
[high
] = 0x100 + main_index
[high
] * 16;
901 //memcpy(sub_table, subtables_with_offsets, number_of_subtables_with_offsets * 16);
903 /* fill subtable index according to the subtables_with_diffs */
904 for(index
= 0; index
< number_of_subtables_with_offsets
; index
++) {
905 subtable_dst
= packed_table
+ 0x100 + index
* 16;
906 subtable_src
= &subtables_with_offsets
[index
* 16];
908 for(low4
= 0; low4
< 16; low4
++) {
909 subtable_dst
[low4
] = 0x100 + number_of_subtables_with_offsets
* 16 + subtable_src
[low4
] * 16;
914 for(index
= 0; index
< number_of_subtables_with_diffs
; index
++) {
915 subtable_dst
= packed_table
+ 0x100 + number_of_subtables_with_offsets
* 16 + index
* 16;
916 memcpy(subtable_dst
, subtables_with_diffs
[index
], 16 * sizeof(WORD
));
921 test_packed_table(packed_table
);
926 int write_casemap_file(void)
928 WORD packed_size_in_words
, offset_to_next_table_in_words
;
929 WORD
*packed_table
, value
;
932 if(!CreateCaseDiff(LIBDIR
"UnicodeData.txt"))
935 file
= fopen(NLSDIR
"/l_intl.nls", "wb");
937 /* write version number */
939 fwrite(&value
, 1, sizeof(WORD
), file
);
941 /* pack upper case table */
942 packed_table
= pack_table(to_upper_org
, &packed_size_in_words
);
943 offset_to_next_table_in_words
= packed_size_in_words
+ 1;
944 fwrite(&offset_to_next_table_in_words
, 1, sizeof(WORD
), file
);
945 /* write packed upper case table */
946 fwrite(packed_table
, sizeof(WORD
), packed_size_in_words
, file
);
949 /* pack lower case table */
950 packed_table
= pack_table(to_lower_org
, &packed_size_in_words
);
951 offset_to_next_table_in_words
= packed_size_in_words
+ 1;
952 fwrite(&offset_to_next_table_in_words
, 1, sizeof(WORD
), file
);
953 /* write packed lower case table */
954 fwrite(packed_table
, sizeof(WORD
), packed_size_in_words
, file
);
968 write_casemap_file();